Duplicate the rows based on some criteria in SQL or R

大憨熊 提交于 2019-12-04 19:20:33

Perhaps another option using a CROSS APPLY

Example

Select A.Name
      ,B.*
 From  YourTable A
 Cross Apply ( values (C1,0,0,0)
                     ,(0,C2,0,0)
                     ,(0,0,C3,0)
                     ,(0,0,0,C4)
             ) B (C1,C2,C3,C4)
 Where B.C1+B.C2+B.C3+B.C4<>0

Returns

You can do this with a few tidyverse functions. First, we enter your sample data

library(tidyverse)
dd <- tribble(~name, ~c1, ~c2, ~c3, ~c4,
        "Tom", 1, 0, 0, 0,
        "Shane", 2, 3, 0, 0,
        "Daniel", 3, 5, 1, 0,
        "Akira", 0, 0, 0 ,1,
        "Jack", 5, 4, 0, 0,
        "Zoe", 0, 0, 3, 0)

Then we gather, filter, and spread to get the rows you want. By adding in a row id, we keep the different values on different rows.

dd %>% 
  gather("var", "val", -name) %>% 
  rowid_to_column() %>% 
  filter(val>0) %>% 
  spread(var, val, fill=0) %>% 
  select(-rowid)
# A tibble: 10 x 5
#      name    c1    c2    c3    c4
#  *  <chr> <dbl> <dbl> <dbl> <dbl>
#  1    Tom     1     0     0     0
#  2  Shane     2     0     0     0
#  3 Daniel     3     0     0     0
#  4   Jack     5     0     0     0
#  5  Shane     0     3     0     0
#  6 Daniel     0     5     0     0
#  7   Jack     0     4     0     0
#  8 Daniel     0     0     1     0
#  9    Zoe     0     0     3     0
# 10  Akira     0     0     0     1

One more option using union all.

select name,c1,0 as c2,0 as c3,0 as c4 from tbl where c1>0
union all
select name,0,c2,0,0 from tbl where c2>0
union all
select name,0,0,c3,0 from tbl where c3>0
union all
select name,0,0,0,c4 from tbl where c4>0
df1 = data.frame(name = c("Tom", "Shane", "Daniel", "Akira", "Jack", "Zoe"),
                 c1 = c(1,2,3,0,5,0),
                 c2 = c(0, 3, 5, 0,4,0),
                 c3 = c(0, 0,1,0,0,3),
                 c4=c(0,0,0,1,0,0))

df2 = df1[rep(1:NROW(df1), apply(df1, 1, function(x) sum(x[-(1)] > 0))),]
df3 = df2
df3[-1] = df3[-1] * 0
df3[ave(1:NROW(df2), df2$name, FUN = length) == 1,] = df2[ave(1:NROW(df2), df2$name, FUN = length) == 1,]
replace(x = df3,
        list = cbind(1:NROW(df3), 1+ave(1:NROW(df2), df2$name, FUN = seq_along)),
        values = df2[cbind(1:NROW(df3), 1+ave(1:NROW(df2), df2$name, FUN = seq_along))])
#      name c1 c2 c3 c4
#1      Tom  1  0  0  0
#2    Shane  2  0  0  0
#2.1  Shane  0  3  0  0
#3   Daniel  3  0  0  0
#3.1 Daniel  0  5  0  0
#3.2 Daniel  0  0  1  0
#4    Akira  0  0  0  1
#5     Jack  5  0  0  0
#5.1   Jack  0  4  0  0
#6      Zoe  0  0  3  0

Consider base R with by that builds a zero padded dataframe for each distinct name then row binds all dataframes into final one, similar to union SQL:

df_list <- by(df, df$name, FUN = function(d){

  tmp <- data.frame(name = d$name[1],
             c1 = c(max(d$c1), rep(0, 3)),
             c2 = c(0, max(d$c2), rep(0, 2)),
             c3 = c(rep(0, 2), max(d$c3), 0),
             c4 = c(rep(0, 3), max(d$c4)))

  tmp <- tmp[rowSums(tmp[-1])!=0,]
  row.names(tmp) <- NULL
  tmp

})

final_df <- do.call(rbind, unname(df_list))
final_df
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!