问题
I have a data set that is as follows:
library(data.table)
test <- data.table(structure(list(Issue.Date = structure(c(16041, 16056, 16042,15990, 15996, 16001, 15995, 15981, 15986, 15996, 15996, 16002,16015, 16020, 16025, 16032, 16023, 16084, 16077, 16102, 16104,16107, 16112, 16113, 16115, 16121, 16125, 16128, 16104, 16132,16133, 16135, 16139, 16146, 16151), class = "Date"),
Complaint = structure(c(1L,4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,5L, 3L, 1L, 3L, 1L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 3L,3L, 3L), .Label = c("A", "B", "C", "D", "E"), class = "factor"),
yr = c("2013", "2013", "2013", "2013", "2013", "2013", "2013","2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013","2013", "2013", "2014", "2014", "2014", "2014", "2014", "2014","2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014","2014", "2014", "2014", "2014"),
Month = c("2013-12", "2013-12","2013-12", "2013-10", "2013-10", "2013-10", "2013-10", "2013-10","2013-10", "2013-10", "2013-10", "2013-10", "2013-11", "2013-11","2013-11", "2013-11", "2013-11", "2014-01", "2014-01", "2014-02","2014-02", "2014-02", "2014-02", "2014-02", "2014-02", "2014-02","2014-02", "2014-02", "2014-02", "2014-03", "2014-03", "2014-03","2014-03", "2014-03", "2014-03"),
da = c("02", "17", "03","12", "18", "23", "17", "03", "08", "18", "18", "24", "06","11", "16", "23", "14", "14", "07", "01", "03", "06", "11","12", "14", "20", "24", "27", "03", "03", "04", "06", "10","17", "22")),
.Names = c("Issue.Date", "Complaint", "yr","Month", "da"), class = c("data.table", "data.frame"), row.names = c(NA,-35L)))
Basically what I would like to do is use data.table
to create a frequency table that has Complaint
and Count
by Month
. The trick is that I need it to show a Count
of zero if there are no Complaints
of that type for that Month
. I know how to do it without showing the zeros, but I want to know how to include them.
test[ , count := .N, by = "Month,Complaint"]
回答1:
To directly get the counts for each group:
setkey(test, Month, Complaint)
# may need to also add allow.cartesian, depending on actual data
test[CJ(Month, Complaint, unique = TRUE), .N, by = .EACHI]
# Month Complaint N
# 1: 2013-10 A 0
# 2: 2013-10 B 0
# 3: 2013-10 C 5
# 4: 2013-10 D 4
# 5: 2013-10 E 0
# 6: 2013-11 A 1
# 7: 2013-11 B 0
# 8: 2013-11 C 4
# 9: 2013-11 D 0
#10: 2013-11 E 0
#11: 2013-12 A 1
#12: 2013-12 B 0
#13: 2013-12 C 0
#14: 2013-12 D 2
#15: 2013-12 E 0
#16: 2014-01 A 0
#17: 2014-01 B 0
#18: 2014-01 C 1
#19: 2014-01 D 0
#20: 2014-01 E 1
#21: 2014-02 A 2
#22: 2014-02 B 0
#23: 2014-02 C 6
#24: 2014-02 D 2
#25: 2014-02 E 0
#26: 2014-03 A 1
#27: 2014-03 B 2
#28: 2014-03 C 3
#29: 2014-03 D 0
#30: 2014-03 E 0
# Month Complaint N
See first revision of the answer if you want to have the counts in the full data.table
instead of summarizing.
回答2:
It sounds like you perhaps need to use expand.grid
to "fill in" your data.table
:
EG <- data.table(expand.grid(Complaint = unique(test$Complaint),
Month = unique(test$Month)),
key = "Complaint,Month")
Then, you can merge
:
setkey(test, Complaint, Month)
Full <- merge(test, EG, all.y = TRUE)
And count like this:
Full[ , list(sum(!is.na(Issue.Date))), by = "Month,Complaint"]
# Month Complaint V1
# 1: 2013-11 A 1
# 2: 2013-12 A 1
# 3: 2014-02 A 2
# 4: 2014-03 A 1
# 5: 2013-10 A 0
# 6: 2014-01 A 0
# 7: 2013-11 B 0
# 8: 2013-12 B 0
# ::: S N I P :::
# 24: 2014-01 D 0
# 25: 2013-11 E 0
# 26: 2013-12 E 0
# 27: 2014-02 E 0
# 28: 2014-03 E 0
# 29: 2013-10 E 0
# 30: 2014-01 E 1
# Month Complaint V1
Alternatively, just use table
(???)
data.table(table(test[, c("Month", "Complaint"), with = FALSE]))
# Month Complaint N
# 1: 2013-10 A 0
# 2: 2013-11 A 1
# 3: 2013-12 A 1
# 4: 2014-01 A 0
# 5: 2014-02 A 2
# 6: 2014-03 A 1
# 7: 2013-10 B 0
# ::: S N I P :::
# 28: 2014-01 E 1
# 29: 2014-02 E 0
# 30: 2014-03 E 0
# Month Complaint N
来源:https://stackoverflow.com/questions/23547200/frequency-table-including-zeros-for-unused-values-on-a-data-table