问题
I have a data frame which I then split into three (or any number) of dataframes.
What I’m trying to do is to automatically process each column in each dataframe and add lagged versions of existing variables.
For example if there were three variables in each data.frame (V1, V2, V3) I would like to automatically (without hardcoding) add V1.lag, V2.lag and V3.lag.
Here is what I have so far, but I’m stuck now.
Any help would be highly apprecaited.
dd<-data.frame(matrix(rnorm(216),72,3),c(rep("A",24),rep("B",24),rep("C",24)),c(rep("J",36),rep("K",36)));
colnames(dd) <- c("v1", "v2", "v3", "dim1", "dim2");
dd;
dds <- split(dd, dd$dim1);
dds;
# Missing step 1: Automatically create v1.lag, v2.lag, v3.lag, etc (if required)
Finally I would like to merge the three data frames into one big dataframe which will include newly created variables.
# Missing step 2: Merge data frames into single data frame
Any help would be highly appreciated.
EDIT: In comments section I asked about moving averages instead of lags. here is the solution:
ma <- function(x, f=c(1,1,1)){as.numeric(filter(x, f, sides=1)/length(f));}
foo <- function(df, f = c(1,1,1)) {
nums <- sapply(df, is.numeric); ## which are numeric vars
nams <- paste(names(df)[nums], "ma", length(f), sep = "."); ## generate new names foo.ma
df[, nams] <- lapply(which(nums), function(id, df, f) ma(df[[id]], f = f), df = df, f = f); ## apply ma to each numeric variable
df; ## return
}
回答1:
Here is one option:
## reuse @Andrie's clag() function as lag() is silly
clag <- function(x, n = 1) c(rep(NA, n), head(x, -n))
## wrapper function to do the addition of lag variables for single DF
foo <- function(df, n = 1) {
nums <- sapply(df, is.numeric) ## which are numeric vars
nams <- paste(names(df)[nums], "lag", sep = ".") ## generate new names foo.lag
df[, nams] <- lapply(which(nums), function(id, df, n) clag(df[[id]], n = n),
df = df, n = n) ## apply clag to each numeric variable
df ## return
}
lapply(dds, foo)
Which gives:
> lapply(dds, foo)
$A
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
1 -1.15107343 1.47671548 -0.146501739 A J NA NA NA
2 -1.61068272 -0.85397093 -1.240187604 A J -1.15107343 1.47671548 -0.146501739
3 -1.23470282 -0.26194027 1.938344030 A J -1.61068272 -0.85397093 -1.240187604
4 -0.57874043 -0.44600138 0.326069423 A J -1.23470282 -0.26194027 1.938344030
5 0.16139066 -1.95804742 -0.744678169 A J -0.57874043 -0.44600138 0.326069423
6 -1.01497027 0.36850034 1.532640065 A J 0.16139066 -1.95804742 -0.744678169
7 0.72288058 -0.40115543 -0.686450596 A J -1.01497027 0.36850034 1.532640065
8 -0.51300447 0.19686310 0.441649595 A J 0.72288058 -0.40115543 -0.686450596
9 0.95439966 -2.03513002 -0.897784897 A J -0.51300447 0.19686310 0.441649595
10 -1.36736081 -0.41040962 -0.459403176 A J 0.95439966 -2.03513002 -0.897784897
11 0.59503846 0.28925760 -0.003095389 A J -1.36736081 -0.41040962 -0.459403176
12 -0.37951869 0.49551357 0.269412108 A J 0.59503846 0.28925760 -0.003095389
13 -0.52953401 -0.28433351 1.125505917 A J -0.37951869 0.49551357 0.269412108
14 -1.73466020 0.25442637 -1.094139749 A J -0.52953401 -0.28433351 1.125505917
15 0.08479137 -0.11688894 -1.034378216 A J -1.73466020 0.25442637 -1.094139749
16 -2.45854464 0.15806266 -2.275995527 A J 0.08479137 -0.11688894 -1.034378216
17 1.10663502 1.28587230 0.070334868 A J -2.45854464 0.15806266 -2.275995527
18 -0.01945585 1.63659116 -0.137040232 A J 1.10663502 1.28587230 0.070334868
19 0.59026606 -1.95724134 -0.480014930 A J -0.01945585 1.63659116 -0.137040232
20 -0.32245933 1.35372005 1.348717525 A J 0.59026606 -1.95724134 -0.480014930
21 -0.42560327 -1.30145328 2.020609480 A J -0.32245933 1.35372005 1.348717525
22 1.19550777 0.18417336 0.099232994 A J -0.42560327 -1.30145328 2.020609480
23 1.20198621 0.05926023 -0.171505810 A J 1.19550777 0.18417336 0.099232994
24 -1.00667141 1.32441782 0.056696824 A J 1.20198621 0.05926023 -0.171505810
$B
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
25 0.7878614 0.10354576 -0.69308980 B J NA NA NA
26 0.5824551 0.42319616 0.42734938 B J 0.7878614 0.10354576 -0.69308980
27 -0.2769730 1.51559382 -0.64106570 B J 0.5824551 0.42319616 0.42734938
28 -0.5736416 -1.58745816 -1.13274631 B J -0.2769730 1.51559382 -0.64106570
29 -1.9082145 -0.26148604 -0.04699411 B J -0.5736416 -1.58745816 -1.13274631
30 -1.6254549 0.39390814 -1.79993619 B J -1.9082145 -0.26148604 -0.04699411
31 0.3963274 1.79667985 0.92873142 B J -1.6254549 0.39390814 -1.79993619
32 -0.5889415 -0.04690351 1.43394978 B J 0.3963274 1.79667985 0.92873142
33 0.4683819 -1.34023029 0.18749782 B J -0.5889415 -0.04690351 1.43394978
34 0.7373052 -0.93470320 -1.14528378 B J 0.4683819 -1.34023029 0.18749782
35 -0.7751348 -1.26533917 0.11246728 B J 0.7373052 -0.93470320 -1.14528378
36 1.7786627 -0.19757164 0.14150980 B J -0.7751348 -1.26533917 0.11246728
37 1.8570412 -2.15174901 1.07751105 B K 1.7786627 -0.19757164 0.14150980
38 0.5128697 0.40112948 -0.94826274 B K 1.8570412 -2.15174901 1.07751105
39 0.8710264 -0.59978467 0.54462858 B K 0.5128697 0.40112948 -0.94826274
40 -0.3711512 -0.15632337 0.15832543 B K 0.8710264 -0.59978467 0.54462858
41 1.4505624 0.20915835 2.59369653 B K -0.3711512 -0.15632337 0.15832543
42 0.0871329 0.25440471 0.30096063 B K 1.4505624 0.20915835 2.59369653
43 -0.7398342 -1.72678544 0.45534941 B K 0.0871329 0.25440471 0.30096063
44 0.1953264 -0.60560630 -0.36884626 B K -0.7398342 -1.72678544 0.45534941
45 -0.2702493 0.50747209 -0.50699830 B K 0.1953264 -0.60560630 -0.36884626
46 0.2987449 0.46347722 1.20725190 B K -0.2702493 0.50747209 -0.50699830
47 -0.5682779 -0.71470625 -0.07865078 B K 0.2987449 0.46347722 1.20725190
48 -1.5291983 1.80092050 -1.73317395 B K -0.5682779 -0.71470625 -0.07865078
$C
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
49 0.06095825 -0.518263220 0.510999371 C K NA NA NA
50 0.40077713 0.477989115 0.855752036 C K 0.06095825 -0.518263220 0.510999371
51 0.06763037 0.802110426 -0.102536186 C K 0.40077713 0.477989115 0.855752036
52 -0.90530986 -0.005452101 -0.089703589 C K 0.06763037 0.802110426 -0.102536186
53 -0.79360209 0.299844218 -0.765164525 C K -0.90530986 -0.005452101 -0.089703589
54 1.34050298 -1.093705314 -0.955952912 C K -0.79360209 0.299844218 -0.765164525
55 0.45377712 0.054978470 0.382874895 C K 1.34050298 -1.093705314 -0.955952912
56 0.95283101 -0.564193352 1.458002944 C K 0.45377712 0.054978470 0.382874895
57 1.09157807 -1.351894599 -1.366084414 C K 0.95283101 -0.564193352 1.458002944
58 2.71993062 -1.126272793 1.374046159 C K 1.09157807 -1.351894599 -1.366084414
59 -0.04685281 0.423085481 -0.455903151 C K 2.71993062 -1.126272793 1.374046159
60 -0.31055449 0.818291875 0.400386018 C K -0.04685281 0.423085481 -0.455903151
61 -0.54904545 1.542272313 0.648135340 C K -0.31055449 0.818291875 0.400386018
62 -0.72914142 1.495482707 -0.212135011 C K -0.54904545 1.542272313 0.648135340
63 -0.27374611 -1.309254707 -0.005125047 C K -0.72914142 1.495482707 -0.212135011
64 0.87439910 -2.666588138 1.043778597 C K -0.27374611 -1.309254707 -0.005125047
65 1.07142042 0.446233778 -0.286784683 C K 0.87439910 -2.666588138 1.043778597
66 -0.10431808 0.510820156 0.405309569 C K 1.07142042 0.446233778 -0.286784683
67 -1.04006019 -0.041327622 1.202855549 C K -0.10431808 0.510820156 0.405309569
68 0.41084794 -0.376796559 -1.147032471 C K -1.04006019 -0.041327622 1.202855549
69 0.88329788 -0.344611311 1.862998306 C K 0.41084794 -0.376796559 -1.147032471
70 -0.67916248 1.396061431 0.697517685 C K 0.88329788 -0.344611311 1.862998306
71 3.55359528 -0.207825480 -0.949834845 C K -0.67916248 1.396061431 0.697517685
72 0.11329113 0.294747300 -0.955891419 C K 3.55359528 -0.207825480 -0.949834845
For the last bit, the combine step, save the above:
dds <- lapply(dds, foo)
then use do.call()
to rbind()
the individual data frames together, as in:
df2 <- do.call(rbind, dds)
which gives:
> head(df2)
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
A.1 -1.1510734 1.4767155 -0.1465017 A J NA NA NA
A.2 -1.6106827 -0.8539709 -1.2401876 A J -1.1510734 1.4767155 -0.1465017
A.3 -1.2347028 -0.2619403 1.9383440 A J -1.6106827 -0.8539709 -1.2401876
A.4 -0.5787404 -0.4460014 0.3260694 A J -1.2347028 -0.2619403 1.9383440
A.5 0.1613907 -1.9580474 -0.7446782 A J -0.5787404 -0.4460014 0.3260694
A.6 -1.0149703 0.3685003 1.5326401 A J 0.1613907 -1.9580474 -0.7446782
回答2:
Use the plyr
package to do all of this one step:
library(plyr)
clag <- function(x, n=1)c(rep(NA, n), head(x, -n))
x <- ddply(dd, .(dim1), transform,
v1.lag=clag(v1), v2.lag=clag(v2), v3.lag=clag(v3))
head(x)
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
1 0.4465910 -0.2564334 -0.9122640 A J NA NA NA
2 -0.3748563 -0.9461061 0.1641274 A J 0.4465910 -0.2564334 -0.9122640
3 -0.5010834 -0.4413026 -0.7509968 A J -0.3748563 -0.9461061 0.1641274
4 -0.5278584 -0.6377017 0.5528831 A J -0.5010834 -0.4413026 -0.7509968
5 -0.4290586 0.4687849 0.6885102 A J -0.5278584 -0.6377017 0.5528831
6 0.1179935 -0.2742456 -0.1945482 A J -0.4290586 0.4687849 0.6885102
来源:https://stackoverflow.com/questions/14141198/processing-the-list-of-data-frames-with-apply-family-of-functions