问题
How can I replace all 0's with the last non-zero value per ID in R?
Example:
Input:
df <- data.frame(ID = c(1,1,1,1,1,1,1,2,2,2,2),
Var1 = c(0,10, 30, 0, 0,50,80,0, 0, 57, 0))
Output:
df <- data.frame(ID = c(1,1,1,1,1,1,1,2,2,2,2),
Var1 = c(0,10, 30, 0, 0,50,80,0, 0, 57, 0),
res = c(0,10,30,30,30,50,80,0,0,57,57))
Is there an easy way with lag function?
回答1:
Here's a tidyverse approach:
library(tidyverse)
df %>%
group_by(ID) %>%
mutate(x = replace(Var1, cumsum(Var1 !=0) > 0 & Var1 == 0, NA)) %>%
fill(x)
# # A tibble: 11 x 4
# # Groups: ID [2]
# ID Var1 res x
# <dbl> <dbl> <dbl> <dbl>
# 1 1. 0. 0. 0.
# 2 1. 10. 10. 10.
# 3 1. 30. 30. 30.
# 4 1. 0. 30. 30.
# 5 1. 0. 30. 30.
# 6 1. 50. 50. 50.
# 7 1. 80. 80. 80.
# 8 2. 0. 0. 0.
# 9 2. 0. 0. 0.
# 10 2. 57. 57. 57.
# 11 2. 0. 57. 57.
In the mutate step, we replace 0's with NA except for those that are at the beginning of each ID-run because in those cases we have no values to replace the NAs afterwards.
If you have multiple columns to adjust, you can use:
df %>%
group_by(ID) %>%
mutate_at(vars(starts_with("Var")),
funs(replace(., cumsum(. !=0) > 0 & . == 0, NA))) %>%
fill(starts_with("Var"))
where df could be:
df <- data.frame(ID = c(1,1,1,1,1,1,1,2,2,2,2),
Var1 = c(0,10, 30, 0, 0,50,80,0, 0, 57, 0),
Var2 = c(4,0, 30, 0, 0,50,0,16, 0, 57, 0))
回答2:
Without using any package, just using the loops
:
df <- data.frame(ID = c(1,1,1,1,1,1,1,2,2,2,2),
Var1 = c(0,10, 30, 0, 0,50,80,0, 0, 57, 0))
for(i in 1:nrow(df)){
if(i!=1){
if(df$ID[i-1]==df$ID[i] && df$Var1[i]==0){ # if value is zero and value of current and previous rows ID are same
if(df$Var1[i-1]!=0){ # If previous value is not zero then store it
df$res[i]=df$Var1[i-1] # Use previous value of var1
a=0
a=df$Var1[i-1]
}else{
df$res[i]=a # Use previous value var1
a=0
}
}else{
df$res[i]=df$Var1[i] # Use the current value of var1
}
}else{
df$res[i]=df$Var1[i] # Set the first point as it is
}
}
Output:
> df
ID Var1 res
1 1 0 0
2 1 10 10
3 1 30 30
4 1 0 30
5 1 0 30
6 1 50 50
7 1 80 80
8 2 0 0
9 2 0 0
10 2 57 57
11 2 0 57
来源:https://stackoverflow.com/questions/50966647/replace-0s-with-previous-non-zero-value-per-id-lag