问题
I have the following data:
library(data.table)
modelling_dt_train <- structure(list(`1` = c(54593L, 74481L, 85566L, 97637L, 101081L,
184089L, 158895L, 153780L, 153681L, 157188L, 142216L, 136437L,
135501L, 111264L, 123259L, 110397L, 146034L, 162900L, 132499L,
121516L, 119651L, 114045L, 112551L, 123209L, 134930L, 132147L,
151327L, 155666L, 158538L, 205766L, 200407L, 219588L, 231954L,
179884L, 159121L, 156148L, 136191L, 132956L, 202086L, 141047L,
118490L, 116595L, 127620L, 135962L, 137419L, 127334L, 158804L,
139142L, 181773L, 228278L, 272373L, 186666L, 148791L, 143608L,
169634L, 188149L, 239867L, 332543L, 253463L, 240574L, 237245L,
275466L, 262755L, 241538L, 303377L),
`2` = c(148181L, 186894L,
243357L, 227298L, 195640L, 412137L, 363152L, 355169L, 296208L,
328993L, 281652L, 308027L, 316254L, 249293L, 320821L, 220521L,
284411L, 263807L, 258093L, 261060L, 320153L, 311547L, 279734L,
258453L, 269697L, 313700L, 255285L, 232495L, 305346L, 393256L,
390655L, 527039L, 529056L, 450689L, 425190L, 372144L, 303765L,
324658L, 365035L, 285178L, 230985L, 251308L, 290378L, 279595L,
294676L, 391377L, 445682L, 364056L, 441207L, 516852L, 673401L,
415677L, 304000L, 266365L, 311924L, 314192L, 407313L, 664519L,
456920L, 384978L, 351644L, 432627L, 409624L, 386330L, 487679L
),
`3` = c(60217L, 66492L, 66675L, 76400L, 117252L, 264527L,
256384L, 241815L, 187115L, 193106L, 177620L, 140833L, 188291L,
110069L, 163581L, 107650L, 118319L, 118821L, 122383L, 117267L,
134962L, 121227L, 124952L, 111740L, 137493L, 163895L, 60653L,
69311L, 88810L, 128620L, 132077L, 153399L, 162989L, 151866L,
127325L, 122813L, 115284L, 103765L, 113185L, 101607L, 92379L,
98646L, 94376L, 98069L, 98972L, 103074L, 142199L, 123497L, 141823L,
205582L, 251187L, 109603L, 80711L, 80799L, 84175L, 104965L, 181221L,
245377L, 201378L, 235504L, 188925L, 214614L, 220312L, 191591L,
203292L)),
.Names = c("1", "2", "3"), class = c("data.table",
"data.frame"), row.names = c(NA, -65L))
modelling_x_train <- structure(list(`1` = c(1982134L, 1968327L, 2019222L, 2025126L,
2033065L, 2188202L, 2066808L, 2070103L, 2041154L, 2201142L, 2105848L,
2067669L, 2005707L, 2239632L, 2435928L, 2363759L, 2444016L, 2556139L,
2807283L, 2674632L, 2687984L, 2889011L, 2839239L, 2712064L, 2928420L,
2889533L, 3106868L, 2746471L, 2953436L, 3225171L, 2926874L, 2914124L,
3210355L, 2847523L, 2890636L, 3268445L, 2941468L, 2931027L, 2906610L,
3222324L, 2833093L, 2978953L, 3196315L, 3055240L, 3210672L, 3368890L,
3046191L, 2960181L, 3341146L, 3227672L, 3062702L, 3197227L, 3445476L,
3441273L, 3651232L, 3566179L, 3619685L, 3716756L, 3600666L, 3732533L,
3695464L, 3857145L, 3700072L, 3608183L, 3904237L),
`2` = c(4082316L,
4644387L, 5230567L, 5115720L, 4729153L, 5658227L, 5492034L, 5443022L,
5094415L, 5939637L, 5354626L, 5509783L, 5438960L, 4912936L, 5736293L,
5167632L, 5244341L, 5580274L, 5750346L, 5358527L, 5916955L, 6129790L,
5245982L, 5801479L, 5683117L, 5721551L, 6972176L, 7072498L, 7979325L,
8324202L, 7434885L, 8189438L, 8062609L, 7658496L, 8066643L, 8528136L,
7515745L, 8276800L, 8227022L, 6523804L, 5780869L, 6481060L, 6912797L,
6276934L, 6592158L, 6908732L, 6067945L, 6459707L, 6910377L, 6645470L,
6538196L, 6694136L, 7484290L, 7299620L, 8532078L, 7713988L, 7256825L,
8237839L, 7834919L, 7725377L, 7291804L, 8224205L, 7784470L, 7514557L,
8164590L),
`3` = c(3181556L, 3232260L, 3272852L, 3233534L, 2876956L,
2979204L, 3275916L, 3345278L, 2951867L, 2976889L, 3289397L, 2955148L,
3306653L, 1861934L, 2239827L, 2207356L, 2335514L, 2387791L, 2592206L,
2371527L, 2586856L, 2447660L, 2322218L, 2342827L, 2666258L, 2627928L,
2525534L, 2521129L, 2573991L, 2752528L, 2538251L, 2676848L, 2802139L,
2702108L, 2630417L, 2778233L, 2725544L, 2723849L, 2795745L, 1954820L,
1842684L, 2132844L, 2182141L, 2041725L, 2355857L, 2414334L, 2350885L,
2367547L, 2436918L, 2328244L, 2390647L, 2460700L, 3081623L, 2877487L,
3025104L, 3108909L, 3172441L, 3267766L, 3354357L, 3273165L, 3322516L,
3342817L, 3413854L, 3217624L, 2736617L)),
.Names = c("1", "2",
"3"), class = c("data.table", "data.frame"), row.names = c(NA,
-65L))
where modelling_dt_train
is the time series of 3 products and modelling_x_train
is an exogenous variable (which is also a time series) for the same products.
I am estimating a VAR
model using the following code
library(vars)
x <- log(modelling_dt_train)
x <- x[,lapply(.SD,function(x){ifelse(is.infinite(x),0,x)})]
modelling_x_train <- log(modelling_x_train)
modelling_x_train <- modelling_x_train[,lapply(.SD,function(x){ifelse(is.infinite(x),0,x)})]
x_mat <- as.matrix(x)
dx <- x_mat
var = VAR(dx, p=p, exogen = modelling_x_train, season = 18)
So far so good, but when I want to predict the values for 12 periods using
predict(var, newdata = modelling_dt_test, dumvar = modelling_x_test, n.ahead = 12)
I get an error:
Error in predict.varest(var, newdata = modelling_dt_test, dumvar = modelling_x_test, :
Column names of dumvar do not coincide with exogen.
The newdata
and the dumvar
that I am using are some future values of the previous same products, but further in time
modelling_x_test <- structure(list(`1` = c(4447896L, 4779229L, 4628391L, 4737933L,
5102152L, 4838918L, 4955183L, 5258605L, 5084001L, 4798945L, 5204015L,
5129690L, 5101568L),
`2` = c(6108187L, 6733956L, 7065148L, 7111155L,
6513151L, 7622806L, 7062042L, 7206067L, 7144091L, 7412266L, 6752614L,
7705255L, 7487054L),
`3` = c(1716975L, 2022198L, 2122109L, 2155489L,
2428639L, 2433860L, 2717315L, 2471655L, 2795100L, 2908946L, 2581813L,
2633578L, 2666302L)),
.Names = c("1", "2", "3"), class = c("data.table",
"data.frame"), row.names = c(NA, -13L))
modelling_dt_test <- structure(list(`1` = c(244876L, 275993L, 256180L, 321256L, 316042L,
275097L, 250842L, 245543L, 233386L, 218958L, 254270L, 238804L,
234079L),
`2` = c(375278L, 429496L, 478816L, 532311L, 442922L,
485787L, 460750L, 501956L, 454178L, 425800L, 413112L, 434328L,
446069L),
`3` = c(119577L, 139870L, 127951L, 125017L, 138176L,
114517L, 129880L, 120941L, 159176L, 157890L, 149554L, 144210L,
165979L)),
.Names = c("1", "2", "3"), class = c("data.table",
"data.frame"), row.names = c(NA, -13L))
EDIT
In the source code here, at line 58 there is this check. But what this line is checking is if colnames(data.all)
(which are "X1" "X2" "X3" "X1.l1" "X2.l1" "X3.l1" "const" "sd1" "sd2" "sd3" "sd4" "sd5" "sd6" "sd7" "sd8" "sd9" "sd10" "sd11" "sd12" "sd13" "sd14" "sd15" "sd16" "sd17" "X1.1" "X2.1" "X3.1"
, so it seems to me as the coefficients) are equal with colnames(modelling_x_test)
which are "1" "2" "3"
(the products). How can these ever be equal ?
来源:https://stackoverflow.com/questions/51764339/how-to-predict-using-var-with-exogenous-variables-in-r