问题
data=data.frame("Student"=c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5),
"Grade"=c(5,6,7,3,4,5,4,5,6,8,9,10,2,3,4),
"Pass"=c(NA,0,1,0,1,1,0,1,0,0,NA,NA,0,0,0),
"NEWPass"=c(0,0,1,0,1,1,0,1,1,0,0,0,0,0,0),
"GradeNEWPass"=c(7,7,7,4,4,4,5,5,5,10,10,10,4,4,4),
"GradeBeforeNEWPass"=c(6,6,6,3,3,3,4,4,4,10,10,10,4,4,4))
I have a data.frame called data. It has column names Student, Grade and Pass. I wish to do this:
NEWPass: Take Pass and for every Student fill in NA values with the previous value. If the first value is 'NA' than put a zero. Then this should be a running maximum.
GradeNEWPass: Take the lowest value of Grade that a Student got a one in NEWPass. If a Student did not get a one in NEWPass, this equals to the maximum grade.
GradeBeforeNEWPass: Take the value of Grade BEFORE a Student got a one in NEWPass. If a Student did not get a one in NEWPass, this equals to the maximum grade.
__ Attempts:
setDT(data)[, NEWPassTry := cummax(Pass), by = Student]
data$GradeNEWPass = data$NEWPassTry * data$Grade
data[, GradeNEWPass := min(GradeNEWPass), by = Student]
回答1:
We can use data.table
methods. Grouped by 'Student', create an index ('i1') where the 'Pass' is 1 and not an NA, then get the first position of 1 with which
and head
('i2'), while calculating the max
of 'Grade' ('mx'), then create the three columns based on the indexes ('v1' - get the cumulative maximum of the binary, 'v2' - if
there are any
1s, then subset the 'Grade' with the index 'i2' or else
return 'mx', similarly 'v3'- the index is subtracted 1 to get the 'Grade' value
library(data.table)
setDT(data)[, c('NEWPass1', 'GradeNEWPass1', 'GradeBeforeNEWPass1') :={
i1 <- Pass == 1 & !is.na(Pass)
i2 <- head(which(i1), 1)
mx <- max(Grade, na.rm = TRUE)
v1 <- cummax(+(i1))
v2 <- if(any(i1)) Grade[i2] else mx
v3 <- if(any(i1)) Grade[max(1, i2-1)] else mx
.(v1, v2, v3)}, Student]
data
# Student Grade Pass NEWPass GradeNEWPass GradeBeforeNEWPass NEWPass1 GradeNEWPass1 GradeBeforeNEWPass1
# 1: 1 5 NA 0 7 6 0 7 6
# 2: 1 6 0 0 7 6 0 7 6
# 3: 1 7 1 1 7 6 1 7 6
# 4: 2 3 0 0 4 3 0 4 3
# 5: 2 4 1 1 4 3 1 4 3
# 6: 2 5 1 1 4 3 1 4 3
# 7: 3 4 0 0 5 4 0 5 4
# 8: 3 5 1 1 5 4 1 5 4
# 9: 3 6 0 1 5 4 1 5 4
#10: 4 8 0 0 10 10 0 10 10
#11: 4 9 NA 0 10 10 0 10 10
#12: 4 10 NA 0 10 10 0 10 10
#13: 5 2 0 0 4 4 0 4 4
#14: 5 3 0 0 4 4 0 4 4
#15: 5 4 0 0 4 4 0 4 4
回答2:
Not pretty, admittedly, but your logic includes words like "if any ... for a student", so it's a group-wise conditional, not a row-wise conditional.
library(magrittr) # just for %>% for breakout, not required
mydata %>%
.[, NEWPass2 :=
cummax(fifelse(seq_len(.N) == 1 & is.na(Pass), 0,
zoo::na.locf(Pass, na.rm = FALSE))), by = .(Student) ] %>%
.[, GradeNEWPass2 :=
if (any(NEWPass2 > 0)) min(Grade[ NEWPass2 > 0 ]) else max(Grade),
by = .(Student) ] %>%
.[, GradeBeforeNEWPass2 :=
if (NEWPass2[1] == 0 && any(NEWPass2 > 0)) Grade[ which(NEWPass2 > 0)[1] - 1 ] else max(Grade),
by = .(Student) ]
# Student Grade Pass NEWPass GradeNEWPass GradeBeforeNEWPass NEWPass2 GradeNEWPass2 GradeBeforeNEWPass2
# 1: 1 5 NA 0 7 6 0 7 6
# 2: 1 6 0 0 7 6 0 7 6
# 3: 1 7 1 1 7 6 1 7 6
# 4: 2 3 0 0 4 3 0 4 3
# 5: 2 4 1 1 4 3 1 4 3
# 6: 2 5 1 1 4 3 1 4 3
# 7: 3 4 0 0 5 4 0 5 4
# 8: 3 5 1 1 5 4 1 5 4
# 9: 3 6 0 1 5 4 1 5 4
# 10: 4 8 0 0 10 10 0 10 10
# 11: 4 9 NA 0 10 10 0 10 10
# 12: 4 10 NA 0 10 10 0 10 10
# 13: 5 2 0 0 4 4 0 4 4
# 14: 5 3 0 0 4 4 0 4 4
# 15: 5 4 0 0 4 4 0 4 4
I'm using magrittr::%>%
solely to break it out into stages of computation, it is not required.
来源:https://stackoverflow.com/questions/61583036/r-configure-data-with-data-table