Extend an irregular sequence and add zeros to missing values

前端未结

关注

 9  2230

I have a data frame with a sequence in \'col1\' and values in \'col2\':

I want to exp

相关标签:

9条回答

醉梦人生

2020-12-19 08:57

I didn't see a simple merge solution, so here is one:

res <- merge(data.frame(col1=1:max(df$col1)),df,by="col1",all.x=TRUE)
res$col2 <- ifelse(is.na(res$col2),0,res$col2)

The second line is replacing the NA's from the merge (left outer join) with zeros. As @Axeman points out, this can also be accomplished by:

res$col2[is.na(res$col2)] <- 0

The result is:

res
##   col1 col2
##1     1 0.00
##2     2 0.02
##3     3 0.00
##4     4 0.00
##5     5 0.12
##6     6 0.00
##7     7 0.00
##8     8 0.00
##9     9 0.91
##10   10 0.00
##11   11 0.00
##12   12 0.00
##13   13 1.13

0 讨论(0)

一个人的身影

2020-12-19 08:59

We can use base R with merge and replace

transform(merge(data.frame(col1= 1:13), df, all.x=TRUE),
                      col2 = replace(col2, is.na(col2), 0))
#    col1 col2
#1     1 0.00
#2     2 0.02
#3     3 0.00  
#4     4 0.00
#5     5 0.12
#6     6 0.00
#7     7 0.00
#8     8 0.00
#9     9 0.91
#10   10 0.00
#11   11 0.00
#12   12 0.00
#13   13 1.13

0 讨论(0)

走了就别回头了

2020-12-19 09:00

There are already some interesting answers here.

Just to hop in, we can create a sequence of numbers from 1 to max(col1) and then get the respective value of col2 using match

col1 = seq(1, max(df$col1))
data.frame(col1, col2 = df$col2[match(col1, df$col1)])

#   col1 col2
#1     1   NA
#2     2 0.02
#3     3   NA
#4     4   NA
#5     5 0.12
#6     6   NA
#7     7   NA
#8     8   NA
#9     9 0.91
#10   10   NA
#11   11   NA
#12   12   NA 
#13   13 1.13

This will give NAs instead of 0. If we need 0's,

data.frame(col1,col2 = ifelse(is.na(match(col1,df$col1)), 0,
                                             df$col2[match(col1,df$col1)]))

#   col1 col2
#1     1 0.00
#2     2 0.02
#3     3 0.00
#4     4 0.00
#5     5 0.12
#6     6 0.00
#7     7 0.00
#8     8 0.00
#9     9 0.91
#10   10 0.00
#11   11 0.00
#12   12 0.00
#13   13 1.13

0 讨论(0)

长情又很酷

2020-12-19 09:03

library(tidyr)

complete(d, col1 = 1:13, fill = list(col2 = 0))

complete(d, col1 = seq(max(col1))), fill = list(col2 = 0))

# A tibble: 13 × 2
    col1  col2
   <int> <dbl>
1      1  0.00
2      2  0.02
3      3  0.00
4      4  0.00
5      5  0.12
6      6  0.00
7      7  0.00
8      8  0.00
9      9  0.91
10    10  0.00
11    11  0.00
12    12  0.00
13    13  1.13

library(dplyr)

left_join(data.frame(col1 = seq(max(d$col1)))), d)

But this will leave NAs instead of zeros.

0 讨论(0)

长发绾君心

2020-12-19 09:10

Here is a function that uses expandRows from splitstackshape package,

expand_seq <- function(x){
  x$new <- c(x$col1[1], diff(x$col1))
  new_df <- splitstackshape::expandRows(x, 'new')
  new_df$col1 <- seq(max(new_df$col1))
  new_df$col2[!new_df$col1 %in% x$col1] <- 0
  rownames(new_df) <- NULL
  return(new_df)
}

expand_seq(df)
#   col1 col2
#1     1 0.00
#2     2 0.02
#3     3 0.00
#4     4 0.00
#5     5 0.12
#6     6 0.00
#7     7 0.00
#8     8 0.00
#9     9 0.91
#10   10 0.00
#11   11 0.00
#12   12 0.00
#13   13 1.13

0 讨论(0)

無奈伤痛

2020-12-19 09:10

Just to add a different point of view, consider that what you have can be seen as a sparse vector, i.e. a vector whose only the non-zero values are defined. Sparse vectors are implemented by the Matrix package in R. If df is your initial data.frame, try:

require(Matrix)
data.frame(col1=seq_len(max(df$col1)),
      col2=as.vector(sparseVector(df$col2,df$col1,max(df$col1))))
#   col1 col2
#1     1 0.00
#2     2 0.02
#3     3 0.00
#4     4 0.00
#5     5 0.12
#6     6 0.00
#7     7 0.00
#8     8 0.00
#9     9 0.91
#10   10 0.00
#11   11 0.00
#12   12 0.00
#13   13 1.13

The same result in a one-liner base R:

data.frame(col1=seq_len(max(df$col1)),
   col2=`[<-`(numeric(max(df$col1)),df$col1,df$col2))

0 讨论(0)

1 2 下一页