rbind dataframes with a different column name

前端 未结 5 492
伪装坚强ぢ
伪装坚强ぢ 2020-11-27 20:28

I\'ve 12 data frames, each one contains 6 columns: 5 have the same name, 1 is different. Then when I call rbind() I get:

Error in match.names(cla         


        
相关标签:
5条回答
  • 2020-11-27 20:34

    My favourite use of mapply:

    Example Data

    a <- data.frame(a=runif(5), b=runif(5))
    > a
              a         b
    1 0.8403348 0.1579255
    2 0.4759767 0.8182902
    3 0.8091875 0.1080651
    4 0.9846333 0.7035959
    5 0.2153991 0.8744136
    

    and b

    b <- data.frame(c=runif(5), d=runif(5))
    > b
              c         d
    1 0.7604137 0.9753853
    2 0.7553924 0.1210260
    3 0.7315970 0.6196829
    4 0.5619395 0.1120331
    5 0.5711995 0.7252631
    

    Solution

    Using mapply:

    > mapply(c, a,b)    #or as.data.frame(mapply(c, a,b)) for a data.frame
                  a         b
     [1,] 0.8403348 0.1579255
     [2,] 0.4759767 0.8182902
     [3,] 0.8091875 0.1080651
     [4,] 0.9846333 0.7035959
     [5,] 0.2153991 0.8744136
     [6,] 0.7604137 0.9753853
     [7,] 0.7553924 0.1210260
     [8,] 0.7315970 0.6196829
     [9,] 0.5619395 0.1120331
    [10,] 0.5711995 0.7252631
    

    And based on @Marat's comment below:

    You can also do data.frame(mapply(c, a, b, SIMPLIFY=FALSE)) or, alternatively, data.frame(Map(c,a,b)) to avoid double data.frame-matrix conversion

    0 讨论(0)
  • 2020-11-27 20:38

    You could use rbindlist which takes different column names. Using @LyzandeR's data

    library(data.table) #data.table_1.9.5
    rbindlist(list(a,b))
    #            a         b
    # 1: 0.8403348 0.1579255
    # 2: 0.4759767 0.8182902
    # 3: 0.8091875 0.1080651
    # 4: 0.9846333 0.7035959
    # 5: 0.2153991 0.8744136
    # 6: 0.7604137 0.9753853
    # 7: 0.7553924 0.1210260
    # 8: 0.7315970 0.6196829
    # 9: 0.5619395 0.1120331
    #10: 0.5711995 0.7252631
    

    Update

    Based on the object names of the 12 datasets (i.e. 'Goal1_Costo', 'Goal2_Costo',..., 'Goal12_Costo'),

     nm1 <- paste(paste0('Goal', 1:12), 'Costo', sep="_")
     #or using `sprintf`
     #nm1 <- sprintf('%s%d_%s', 'Goal', 1:12, 'Costo')
     rbindlist(mget(nm1))
    
    0 讨论(0)
  • 2020-11-27 20:44

    Another base R approach if you have data.frames with different column names:

    # Create a list of data frames
    df_list <- list()
    df_list[[1]] <- data.frame(x = 1, y = paste0("y1", 1:3))
    df_list[[2]] <- data.frame(x = 2, y = paste0("y2", 1:4))
    df_list[[3]] <- data.frame(x = 3, y = paste0("y3", 1:5), z = "z3")
    df_list
    #> [[1]]
    #>   x   y
    #> 1 1 y11
    #> 2 1 y12
    #> 3 1 y13
    #> 
    #> [[2]]
    #>   x   y
    #> 1 2 y21
    #> 2 2 y22
    #> 3 2 y23
    #> 4 2 y24
    #> 
    #> [[3]]
    #>   x   y  z
    #> 1 3 y31 z3
    #> 2 3 y32 z3
    #> 3 3 y33 z3
    #> 4 3 y34 z3
    #> 5 3 y35 z3
    
    # This works when the column names are the same
    do.call(rbind, df_list[1:2])
    #>   x   y
    #> 1 1 y11
    #> 2 1 y12
    #> 3 1 y13
    #> 4 2 y21
    #> 5 2 y22
    #> 6 2 y23
    #> 7 2 y24
    
    # but fails when the column names differ
    do.call(rbind, df_list)
    #> Error in rbind(deparse.level, ...): numbers of columns of arguments do not match
    
    # This can fill the unmatched columns with NA's without 
    # depending on other packages:
    Reduce(rbind, Map(function(x) {
      x[, setdiff(unique(unlist(lapply(df_list, colnames))), names(x))] <- NA; 
      return(x)
      }, 
      df_list))
    #>    x   y    z
    #> 1  1 y11 <NA>
    #> 2  1 y12 <NA>
    #> 3  1 y13 <NA>
    #> 4  2 y21 <NA>
    #> 5  2 y22 <NA>
    #> 6  2 y23 <NA>
    #> 7  2 y24 <NA>
    #> 8  3 y31   z3
    #> 9  3 y32   z3
    #> 10 3 y33   z3
    #> 11 3 y34   z3
    #> 12 3 y35   z3
    
    0 讨论(0)
  • 2020-11-27 20:45

    I would rename the columns. This is very easy with names() if the columns are in the same order.

    df1 <- data.frame(one=1:10,two=11:20,three=21:30)
    
    df2 <- data.frame(four=31:40,five=41:50,six=51:60)
    
    names(df2)<-names(df1)
    
    rbind(df1,df2)
    

    or

    df1 <- data.frame(one=1:10,two=11:20,three=21:30)
    
    df2 <- data.frame(four=31:40,five=41:50,six=51:60)
    
    rbind(df1,setnames(df2,names(df1)))
    

    Result:

       one two three
    1    1  11    21
    2    2  12    22
    3    3  13    23
    4    4  14    24
    5    5  15    25
    6    6  16    26
    7    7  17    27
    8    8  18    28
    9    9  19    29
    10  10  20    30
    11  31  41    51
    12  32  42    52
    13  33  43    53
    14  34  44    54
    15  35  45    55
    16  36  46    56
    17  37  47    57
    18  38  48    58
    19  39  49    59
    20  40  50    60
    
    0 讨论(0)
  • 2020-11-27 20:55

    Here is a possible tidyverse solution. I created 3 example dataframes based on your description of your dataframes.

    df1 <- read.table(text ="date,source,medium,campaign,goal1Completions,ad.cost,Goal
    2014-10-01,(direct),(none),(notset),1,0.0000,Vida
    2014-10-01,Master,email,CAFRE,2,0.0000,Vida
    2014-10-01,apeseg,referral,(not set),3,0.0000,vida",sep = ",",header=TRUE) 
    
    df2 <- read.table(text ="date,source,medium,campaign,goal2Completions,ad.cost,Goal
    2014-10-01,(direct),(none),(notset),4,0.0000,Vida
    2014-10-01,Master,email,CAFRE,5,0.0000,Vida
    2014-10-01,apeseg,referral,(not set),6,0.0000,vida",sep = ",",header=TRUE) 
    
    df3 <- read.table(text ="date,source,medium,campaign,goal3Completions,ad.cost,Goal
    2014-10-01,(direct),(none),(notset),7,0.0000,Vida
    2014-10-01,Master,email,CAFRE,8,0.0000,Vida
    2014-10-01,apeseg,referral,(not set),9,0.0000,vida",sep = ",",header=TRUE) 
    
    > df1
            date   source   medium  campaign goal1Completions ad.cost Goal
    1 2014-10-01 (direct)   (none)  (notset)                1       0 Vida
    2 2014-10-01   Master    email     CAFRE                2       0 Vida
    3 2014-10-01   apeseg referral (not set)                3       0 vida
    > df2
            date   source   medium  campaign goal2Completions ad.cost Goal
    1 2014-10-01 (direct)   (none)  (notset)                4       0 Vida
    2 2014-10-01   Master    email     CAFRE                5       0 Vida
    3 2014-10-01   apeseg referral (not set)                6       0 vida
    > df3
            date   source   medium  campaign goal3Completions ad.cost Goal
    1 2014-10-01 (direct)   (none)  (notset)                7       0 Vida
    2 2014-10-01   Master    email     CAFRE                8       0 Vida
    3 2014-10-01   apeseg referral (not set)                9       0 vida
    
    library(dplyr)
    library(tidyselect)
    library(purrr)
    
    bind_rows(df1,df2,df3) %>%
       mutate(goalCompletions = reduce(select_at(.,vars(matches("goal[[:digit:]]+Completions"))),coalesce)) %>%
       select_at(vars(-matches("goal[[:digit:]]+Completions")))
    
            date   source   medium  campaign ad.cost Goal goalCompletions
    1 2014-10-01 (direct)   (none)  (notset)       0 Vida               1
    2 2014-10-01   Master    email     CAFRE       0 Vida               2
    3 2014-10-01   apeseg referral (not set)       0 vida               3
    4 2014-10-01 (direct)   (none)  (notset)       0 Vida               4
    5 2014-10-01   Master    email     CAFRE       0 Vida               5
    6 2014-10-01   apeseg referral (not set)       0 vida               6
    7 2014-10-01 (direct)   (none)  (notset)       0 Vida               7
    8 2014-10-01   Master    email     CAFRE       0 Vida               8
    9 2014-10-01   apeseg referral (not set)       0 vida               9
    
    0 讨论(0)
提交回复
热议问题