问题
Im trying to use Genetic Algorithims using "GA" Package but faced a problem in making the fitness function, im using GA to simulate my data and get the most fitted values for constants in my model.
My data is from observations for a car speed and other parameters, so let's say i've a car and it made a 2 trips, and i want to make a model for it. Each trip have multiple columns ( speed, delta velocity with the opposite car, and Range between the two cars ), so i've to take the first row of each trip and pass it to the equations in fitness function, then the equations will generate new results for the speed,delta velocity and the range, then i've to use the new values and generate others, then compare the simulated distance with the old range i've in my data which is the observed one and get the lowest difference by the GA .
First: here's my data. https://drive.google.com/open?id=1923Jl6pDnQa_tGAluANUfIWCcyf85YVq
Second: here's my fitness function and the GA
Fitness_Function <- function(data, M_Acc, D_Speed, Beta, Com_Acc, Gap_J, D_Time){
Trips_IDs <- sort(unique(data$FileName))
# Trip=1;ROW=1
Calibrated_DF <- data.frame()
for (Trip in 1:2) {
Trip_Data <- data%>%filter(FileName==Trips_IDs[Trip])
attach(Trip_Data, warn.conflicts=F)
for (ROW in 1:(nrow(Trip_Data)-1)) {
if (ROW==1) {
speed <- Filling_Speed[1]
Delta_V <- Filling_DeltaVelocity[1]
Dist <- Filling_Range[1]
# M_Acc = 0.8418 ;D_Speed =29.2 ;Beta = 3.52
# Com_Acc = 0.8150 ;Gap_J = 1.554 ;D_Time = 0.878
Distance <- speed*D_Time - (speed*Delta_V)/(2*sqrt(M_Acc*Com_Acc))
if (Distance < 0 ) {
Distance <- 0
}
D_Gap <- Gap_J + Distance
Acceleration <- M_Acc*(1-(speed/D_Speed)^Beta-(D_Gap/Dist)^2)
}else{
speed <- speed_C
Delta_V <- Delta_V_C
Dist <- Dist_c
Distance <- speed*D_Time - (speed*Delta_V)/(2*sqrt(M_Acc*Com_Acc))
if (is.na(Distance)) {
}
Distance = 0
if (Distance < 0 ) {
Distance <- 0
}
D_Gap <- Gap_J + Distance
Acceleration <- M_Acc*(1-(speed/D_Speed)^Beta-(D_Gap/Dist)^2)
}
Lead_Veh_Speed_F <- Filling_Speed[ROW+1]+Filling_DeltaVelocity[ROW+1]
speed_C <- speed + Acceleration*0.1
Delta_V_C <- Lead_Veh_Speed_F-speed_C
Dist_c <- Dist+(Delta_V_C+Delta_V)/2*0.1
Calibrated_DF <- rbind(Calibrated_DF,c(Dist_c,ROW+1,Trips_IDs[Trip],Trip_Data$Filling_Range[ROW+1]))
}
detach(Trip_Data)
}
colnames(Calibrated_DF) <- c("C_Distance","row","Trip","Actual_Distance")
Calibrated_DF$Dif <- (Calibrated_DF$C_Distance-Calibrated_DF$Actual_Distance)^2
RMSPE <- sqrt(sum(Calibrated_DF$Dif)/sum(Calibrated_DF$Actual_Distance^2))
return(RMSPE)
# return(Calibrated_DF)
}
GA_Test <- ga(type='real-valued', lower=c( 0.1 , 1 , 1 , 0.1 , 0.1 , 0.1 ),
upper=c( 5 , 40 , 40 , 5 , 10 , 5 ), popSize=300, maxiter=300,run = 100,
keepBest=T, names = c("M_Acc", "D_Speed", "Beta", "Com_Acc", "Gap_J", "D_Time"),
fitness = function(b) -Fitness_Function(data, b[1],b[2], b[3],b[4],b[5],b[6]))
my problem is that: the code is very large, and it's veeeery slow to do even one iteration, i tried to use dplyr instead of using for loops but it's impossible to do that with dplyr, because i've to calculate the distance then acceleration then speed, then calculate them again for the other rows and i couldn't find away to do that with dplyr. I'll post my beta code of using Dplyr here but it's not complete because i can't complete it.
So help please.
data <- data%>%group_by(Driver,FileName)%>%
mutate(Distance_Term = ifelse(row_number()==1,Speed_C*D_Time - (Speed_C*Delta_V_C)/(2*sqrt(M_Acc*Com_Acc)),0))
data <- data%>%mutate(Distance_Term = ifelse(Distance_Term < 0 , 0, Distance_Term))%>%
mutate(D_Gap = Gap_J + Distance_Term,Acceleration_C = M_Acc*(1-(Speed_C/D_Speed)^Beta-(D_Gap/Distance)^2))
Note: the FileName column in the trip ID also my PC has good qualifications, so the problem isn't in my PC
回答1:
I've changed the for loop with accumulate2
function in purrr
so it's more faster and more efficient, i got this answer from this question Calculate variables using equations then use the generated values to generate new one
Objective_Function <- function(data, M_Acc, D_Speed, Beta, Com_Acc, Gap_J, D_Time){
myfun <- function(list, lcs,lcs2){
ds <- lcs - list[[1]]
Distance <- list[[1]]*D_Time - (list[[1]] * ds) / (2*sqrt(M_Acc*Com_Acc))
if (Distance < 0|is.na(Distance)) {Distance <- 0}
gap <- Gap_J + Distance
acc <- M_Acc * (1 - (list[[1]] / D_Speed)^Beta - (gap / list[[2]])^2)
fcs_new <- list[[1]] + acc * 0.1
ds_new <- lcs2- fcs_new
di_new <- list[[2]]+(ds_new+ds)/2*0.1
return(list(Speed = fcs_new,Distance = di_new))
}
Generated_Data <- data %>%group_by(Driver,FileName)%>%
mutate(Speed_Distance_Calibrated = accumulate2( .init = list(Filling_Speed[1],
Filling_Range[1]),.x = Lead_Veh_Speed_F,.y = Lead_Veh_Speed_F2, myfun)[-1])%>%ungroup()
Generated_Data <- Generated_Data %>% group_by(Driver,FileName)%>%
mutate(Speed_Distance_Calibrated = append(list(list(Speed = Filling_Speed[1],Distance = Filling_Range[1])),Speed_Distance_Calibrated[-length(Speed_Distance_Calibrated)]))%>%ungroup()
Dif <- map_df(Generated_Data$Speed_Distance_Calibrated, `[`, 2)
Generated_Data <- Generated_Data %>% mutate(Dif_sq = (Dif$Distance - Generated_Data$Filling_Range)^2)
RMSPE <- sqrt(sum(Generated_Data$Dif_sq)/sum(Generated_Data$Filling_Range^2))
return(RMSPE)
}
GA_Test <- ga(type='real-valued', lower=c( 0.1 , 1 , 1 , 0.1 , 0.1 , 0.1 ),
upper=c( 5 , 40 , 40 , 5 , 10 , 5 ), popSize=300, maxiter=300,run = 100,
keepBest=T, names = c("M_Acc", "D_Speed", "Beta", "Com_Acc", "Gap_J", "D_Time"),
fitness = function(b) -Objective_Function(data, b[1],b[2], b[3],b[4],b[5],b[6]),parallel = TRUE)
Summary <- summary(GA_Test)
来源:https://stackoverflow.com/questions/57029872/using-nested-for-loops-in-fitness-function-in-genetic-algroithims-makes-it-too-s