问题
I'm having trouble passing this rle
function on a data.frame
. Function works great on another set:
fgroup <- aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), function(x) rle(x)$values)
Which yields the error:
Error in rle(x) : 'x' must be an atomic vector
Sample data:
> dput(fevents2[1:20,])
structure(list(weeks = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), A1M.Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("2012-05-09", "2012-05-10", "2012-05-11",
"2012-05-14", "2012-05-15", "2012-05-17", "2012-05-18", "2012-05-21",
"2012-05-22", "2012-05-24", "2012-05-25", "2012-05-28", "2012-05-29",
"2012-05-30", "2012-05-31", "2012-06-04", "2012-06-05", "2012-06-07",
"2012-06-08", "2012-06-11", "2012-06-12", "2012-06-14", "2012-06-15",
"2012-06-18", "2012-06-19", "2012-06-21", "2012-06-22"), class = "factor"),
vv = structure(c(8L, 8L, 8L, 20L, 24L, 24L, 24L, 1L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 24L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "nil"), class = "factor"),
rv = structure(c(25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
10L, 10L, 22L, 22L, 22L, 25L, 10L, 22L, 22L, 22L, 22L, 25L
), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
"C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
"G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
"G VR", "G VV", "nil"), class = "factor"), ja = structure(c(12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 25L, 25L,
12L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("C AA", "C AJ",
"C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
"G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
aa = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), bv = structure(c(25L, 11L, 11L, 11L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
23L, 23L, 23L, 23L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
"G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
aj = structure(c(7L, 7L, 7L, 25L, 25L, 25L, 25L, 25L, 9L,
9L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 25L, 25L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), vb = structure(c(1L, 1L, 1L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 2L,
25L, 2L, 2L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
"G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
rj = structure(c(5L, 5L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JR", "G RJ",
"G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
rr = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), vr = structure(c(5L, 5L, 5L, 25L, 25L,
7L, 7L, 7L, 7L, 7L, 25L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
"C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
"G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
"G VR", "G VV", "nil"), class = "factor"), bb = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G RJ", "G RR", "G RV",
"G VB", "G VR", "G VV", "nil"), class = "factor"), jr = structure(c(25L,
25L, 10L, 10L, 22L, 22L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 5L, 5L, 5L, 5L), .Label = c("C AA", "C AJ",
"C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
"G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor")),
.Names = c("weeks",
"A1M.Date", "vv", "rv", "ja", "aa", "bv", "aj", "vb", "rj", "rr",
"vr", "bb", "jr"), row.names = c(NA, 20L), class = "data.frame")
Structure of data:
str(fevents2)
data.frame': 1430 obs. of 14 variables:
$ weeks : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
$ A1M.Date: Factor w/ 27 levels "2012-05-09","2012-05-10",..: 1 1 1 1 1 1 1 1 1 1 ...
$ vv : Factor w/ 24 levels "C AA","C AJ",..: 8 8 8 20 24 24 24 1 13 13 ..
$ rv : Factor w/ 25 levels "C AA","C AJ",..: 25 25 25 25 25 25 25 25 10 10 ...
$ ja : Factor w/ 25 levels "C AA","C AJ",..: 12 12 12 12 12 12 12 12 12 12 ...
$ aa : Factor w/ 25 levels "C AA","C AJ",..: 2 2 2 2 2 2 2 2 25 25 ...
$ bv : Factor w/ 25 levels "C AA","C AJ",..: 25 11 11 11 23 23 23 23 23 23 ...
$ aj : Factor w/ 25 levels "C AA","C AJ",..: 7 7 7 25 25 25 25 25 9 9 ...
$ vb : Factor w/ 25 levels "C AA","C AJ",..: 1 1 1 25 25 25 25 25 25 25 ...
$ rj : Factor w/ 23 levels "C AA","C AJ",..: 5 5 16 16 16 16 16 16 16 16 ...
$ rr : Factor w/ 25 levels "C AA","C AJ",..: 3 3 3 3 3 3 3 3 3 3 ...
$ vr : Factor w/ 25 levels "C AA","C AJ",..: 5 5 5 25 25 7 7 7 7 7 ...
$ bb : Factor w/ 24 levels "C AA","C AJ",..: 4 4 4 4 4 4 4 4 4 4 ...
$ jr : Factor w/ 25 levels "C AA","C AJ",..: 25 25 10 10 22 22 25 25 25 25 ...
NULL
I understand that I have factor
s, but converting factor
s to numeric
with
as.numeric(as.character(fevents2))
or:
sapply(fevents2, function(x) as.numeric(as.character(x)))
doesn't solve my issue:
Error in fevents3[, 3:14] : incorrect number of dimensions
In addition: Warning message:
In eval.with.vis(expr, envir, enclos) : NAs introduced by coercion
Here's a sample data.frame
on which the rle
function works:
dput(fevents[1:20,]
structure(list(weeks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), A1M.Date = c("2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09"), vv = c("C RR", "C RR",
"C RR", "G RR", "nil", "nil", "nil", "C AA", "G AA", "G AA",
"G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA",
"G AA", "nil"), rv = c("nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "C VB", "C VB", "G VB", "G VB", "G VB", "nil",
"G VB", "G VB", "G VB", "G VB", "G VB", "nil"), ja = c("C VV",
"C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV",
"C VV", "C VV", "nil", "nil", "G VV", "G VV", "G VV", "G VV",
"G VV", "G VV", "G VV"), aa = c("C AJ", "C AJ", "C AJ", "C AJ",
"C AJ", "C AJ", "C AJ", "C AJ", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "nil"), bv = c("nil",
"C VR", "C VR", "C VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR"), aj = c("C RJ", "C RJ", "C RJ", "nil",
"nil", "nil", "nil", "nil", "C RV", "C RV", "G RV", "G RV", "G RV",
"G RV", "G RV", "G RV", "G RV", "G RV", "nil", "nil"), vb = c("C AA",
"C AA", "C AA", "nil", "nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "C AJ", "nil", "C AJ",
"C AJ"), rj = c("C JR", "C JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR"), rr = c("C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB"), vr = c("C JA", "C JA", "C JA", "nil",
"nil", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "nil", "C RJ",
"C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ"
), bb = c("C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV"), jr = c("nil", "nil",
"C VB", "C VB", "G VB", "G VB", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "C JA", "C JA", "C JA", "C JA"
)), .Names = c("weeks", "A1M.Date", "vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), row.names = c(NA,
20L), class = "data.frame")
str(fevents)
'data.frame': 1430 obs. of 14 variables:
$ weeks : num 1 1 1 1 1 1 1 1 1 1 ...
$ A1M.Date: chr "2012-05-09" "2012-05-09" "2012-05-09" "2012-05-09" ...
$ vv : chr "C RR" "C RR" "C RR" "G RR" ...
$ rv : chr "nil" "nil" "nil" "nil" ...
$ ja : chr "C VV" "C VV" "C VV" "C VV" ...
$ aa : chr "C AJ" "C AJ" "C AJ" "C AJ" ...
$ bv : chr "nil" "C VR" "C VR" "C VR" ...
$ aj : chr "C RJ" "C RJ" "C RJ" "nil" ...
$ vb : chr "C AA" "C AA" "C AA" "nil" ...
$ rj : chr "C JR" "C JR" "G JR" "G JR" ...
$ rr : chr "C BB" "C BB" "C BB" "C BB" ...
$ vr : chr "C JA" "C JA" "C JA" "nil" ...
$ bb : chr "C BV" "C BV" "C BV" "C BV" ...
$ jr : chr "nil" "nil" "C VB" "C VB" ...
I found a really "not elegant" workaround. Writing data.frame
to file as CSV and importing it with stringsAsFactors = FALSE
. This is not what I want to write in my code... There must be a simpler way to rearrange the structure of the data.frame
to please rle
?
回答1:
The problem is that a factor is *not* an atomic vector as the error clearly says. Either convert all the factors to characters first (and not by coercing them to numeric!) or do the conversion inside the anonymous function you are applying.
So this, which implements the second idea, works:
aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
function(x) rle(as.character(x))$values)
after a fashion:
> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
+ function(x) rle(as.character(x))$values)
weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1
1 1 C RR G RR nil C AA G AA nil nil C VB G VB nil C VB G VB nil C VV
ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2
1 nil C VV G VV C AJ nil nil C VR G VR C RJ nil C RV G RV nil C AA nil
vb.3 vb.4 vb.5 rj.1 rj.2 rr vr.1 vr.2 vr.3 vr.4 vr.5 bb jr.1 jr.2 jr.3
1 C AJ nil C AJ C JR G JR C BB C JA nil C RJ nil C RJ C BV nil C VB G VB
jr.4 jr.5
1 nil C JA
though I am not sure what you expected to get - there is only one week here and aggregate
and rle
have stuck all the values together. Did you want separate $values
for each of the variables in fevents2
that you are aggregating over?
Another thing:
as.numeric(as.character(fevents2))
can't possibly work as the data are not numeric! and you can't apply those functions to a data frame and get anything like what you intended - if they work at all.
The sapply()
thing should work. Here is a version that checks whether each variable is a factor or not and coerces it if it is:
fevents3 <- sapply(fevents2,
function(x) if(is.factor(x)) { as.character(x) } else { x })
But note sapply()
simplifies to a matrix which will change the aggregate()
method dispatched:
> class(fevents3)
[1] "matrix"
Instead perhaps
fevents3 <- lapply(fevents2,
function(x) if(is.factor(x)) { as.character(x) } else { x })
fevents3 <- data.frame(fevents3, stringsAsFactors = FALSE)
Now if you wanted to apply rle()
to each column of the split-up data and keep the separate how about
spl <- split(fevents3, list(weeks = fevents3[, 1]))
res <- lapply(spl, function(x) lapply(x[, 3:14], function(y) rle(y)$values))
which gives
> res
$`1`
$`1`$vv
[1] "C RR" "G RR" "nil" "C AA" "G AA" "nil"
$`1`$rv
[1] "nil" "C VB" "G VB" "nil" "C VB" "G VB" "nil"
$`1`$ja
[1] "C VV" "nil" "C VV" "G VV"
$`1`$aa
[1] "C AJ" "nil"
$`1`$bv
[1] "nil" "C VR" "G VR"
$`1`$aj
[1] "C RJ" "nil" "C RV" "G RV" "nil"
$`1`$vb
[1] "C AA" "nil" "C AJ" "nil" "C AJ"
$`1`$rj
[1] "C JR" "G JR"
$`1`$rr
[1] "C BB"
$`1`$vr
[1] "C JA" "nil" "C RJ" "nil" "C RJ"
$`1`$bb
[1] "C BV"
$`1`$jr
[1] "nil" "C VB" "G VB" "nil" "C JA"
Which is the same answer as that for aggregate()
above, but with each rle()
output kept separate:
> unlist(res)
1.vv1 1.vv2 1.vv3 1.vv4 1.vv5 1.vv6 1.rv1 1.rv2 1.rv3 1.rv4 1.rv5
"C RR" "G RR" "nil" "C AA" "G AA" "nil" "nil" "C VB" "G VB" "nil" "C VB"
1.rv6 1.rv7 1.ja1 1.ja2 1.ja3 1.ja4 1.aa1 1.aa2 1.bv1 1.bv2 1.bv3
"G VB" "nil" "C VV" "nil" "C VV" "G VV" "C AJ" "nil" "nil" "C VR" "G VR"
1.aj1 1.aj2 1.aj3 1.aj4 1.aj5 1.vb1 1.vb2 1.vb3 1.vb4 1.vb5 1.rj1
"C RJ" "nil" "C RV" "G RV" "nil" "C AA" "nil" "C AJ" "nil" "C AJ" "C JR"
1.rj2 1.rr 1.vr1 1.vr2 1.vr3 1.vr4 1.vr5 1.bb 1.jr1 1.jr2 1.jr3
"G JR" "C BB" "C JA" "nil" "C RJ" "nil" "C RJ" "C BV" "nil" "C VB" "G VB"
1.jr4 1.jr5
"nil" "C JA"
> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
+ function(x) rle(as.character(x))$values)
weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1
1 1 C RR G RR nil C AA G AA nil nil C VB G VB nil C VB G VB nil C VV
ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2
1 nil C VV G VV C AJ nil nil C VR G VR C RJ nil C RV G RV nil C AA nil
vb.3 vb.4 vb.5 rj.1 rj.2 rr vr.1 vr.2 vr.3 vr.4 vr.5 bb jr.1 jr.2 jr.3
1 C AJ nil C AJ C JR G JR C BB C JA nil C RJ nil C RJ C BV nil C VB G VB
jr.4 jr.5
1 nil C JA
[Note: This is only true here because the data snippet you show has just one week. I can't recall how unlist(res))
will look if there is more than one week.]
来源:https://stackoverflow.com/questions/13311457/why-doesnt-rle-accept-a-factor-as-input