rbind dataframes with a different column name

前端未结

关注

 5  500

I\'ve 12 data frames, each one contains 6 columns: 5 have the same name, 1 is different. Then when I call rbind() I get:

Error in match.names(cla


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  情书的邮戳        
                
              
                            
                2020-11-27 20:34
              
            
            
                                                                       
My favourite use of mapply:

Example Data

a <- data.frame(a=runif(5), b=runif(5))
> a
          a         b
1 0.8403348 0.1579255
2 0.4759767 0.8182902
3 0.8091875 0.1080651
4 0.9846333 0.7035959
5 0.2153991 0.8744136


and b

b <- data.frame(c=runif(5), d=runif(5))
> b
          c         d
1 0.7604137 0.9753853
2 0.7553924 0.1210260
3 0.7315970 0.6196829
4 0.5619395 0.1120331
5 0.5711995 0.7252631


Solution

Using mapply:

> mapply(c, a,b)    #or as.data.frame(mapply(c, a,b)) for a data.frame
              a         b
 [1,] 0.8403348 0.1579255
 [2,] 0.4759767 0.8182902
 [3,] 0.8091875 0.1080651
 [4,] 0.9846333 0.7035959
 [5,] 0.2153991 0.8744136
 [6,] 0.7604137 0.9753853
 [7,] 0.7553924 0.1210260
 [8,] 0.7315970 0.6196829
 [9,] 0.5619395 0.1120331
[10,] 0.5711995 0.7252631


And based on @Marat's comment below:

You can also do data.frame(mapply(c, a, b, SIMPLIFY=FALSE)) or, alternatively, data.frame(Map(c,a,b))  to avoid double data.frame-matrix conversion
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  情深已故        
                
              
                            
                2020-11-27 20:38
              
            
            
                                                                       
You could use rbindlist which takes different column names.  Using @LyzandeR's data

library(data.table) #data.table_1.9.5
rbindlist(list(a,b))
#            a         b
# 1: 0.8403348 0.1579255
# 2: 0.4759767 0.8182902
# 3: 0.8091875 0.1080651
# 4: 0.9846333 0.7035959
# 5: 0.2153991 0.8744136
# 6: 0.7604137 0.9753853
# 7: 0.7553924 0.1210260
# 8: 0.7315970 0.6196829
# 9: 0.5619395 0.1120331
#10: 0.5711995 0.7252631


Update

Based on the object names of the 12 datasets (i.e. 'Goal1_Costo', 'Goal2_Costo',..., 'Goal12_Costo'),

 nm1 <- paste(paste0('Goal', 1:12), 'Costo', sep="_")
 #or using `sprintf`
 #nm1 <- sprintf('%s%d_%s', 'Goal', 1:12, 'Costo')
 rbindlist(mget(nm1))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  轻奢々        
                
              
                            
                2020-11-27 20:44
              
            
            
                                                                       
Another base R approach if you have data.frames with different column names:
# Create a list of data frames
df_list <- list()
df_list[[1]] <- data.frame(x = 1, y = paste0("y1", 1:3))
df_list[[2]] <- data.frame(x = 2, y = paste0("y2", 1:4))
df_list[[3]] <- data.frame(x = 3, y = paste0("y3", 1:5), z = "z3")
df_list
#> [[1]]
#>   x   y
#> 1 1 y11
#> 2 1 y12
#> 3 1 y13
#> 
#> [[2]]
#>   x   y
#> 1 2 y21
#> 2 2 y22
#> 3 2 y23
#> 4 2 y24
#> 
#> [[3]]
#>   x   y  z
#> 1 3 y31 z3
#> 2 3 y32 z3
#> 3 3 y33 z3
#> 4 3 y34 z3
#> 5 3 y35 z3

# This works when the column names are the same
do.call(rbind, df_list[1:2])
#>   x   y
#> 1 1 y11
#> 2 1 y12
#> 3 1 y13
#> 4 2 y21
#> 5 2 y22
#> 6 2 y23
#> 7 2 y24

# but fails when the column names differ
do.call(rbind, df_list)
#> Error in rbind(deparse.level, ...): numbers of columns of arguments do not match

# This can fill the unmatched columns with NA's without 
# depending on other packages:
Reduce(rbind, Map(function(x) {
  x[, setdiff(unique(unlist(lapply(df_list, colnames))), names(x))] <- NA; 
  return(x)
  }, 
  df_list))
#>    x   y    z
#> 1  1 y11 <NA>
#> 2  1 y12 <NA>
#> 3  1 y13 <NA>
#> 4  2 y21 <NA>
#> 5  2 y22 <NA>
#> 6  2 y23 <NA>
#> 7  2 y24 <NA>
#> 8  3 y31   z3
#> 9  3 y32   z3
#> 10 3 y33   z3
#> 11 3 y34   z3
#> 12 3 y35   z3

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  北海茫月        
                
              
                            
                2020-11-27 20:45
              
            
            
                                                                       
I would rename the columns.  This is very easy with names() if the columns are in the same order.

df1 <- data.frame(one=1:10,two=11:20,three=21:30)

df2 <- data.frame(four=31:40,five=41:50,six=51:60)

names(df2)<-names(df1)

rbind(df1,df2)


or

df1 <- data.frame(one=1:10,two=11:20,three=21:30)

df2 <- data.frame(four=31:40,five=41:50,six=51:60)

rbind(df1,setnames(df2,names(df1)))


Result:

   one two three
1    1  11    21
2    2  12    22
3    3  13    23
4    4  14    24
5    5  15    25
6    6  16    26
7    7  17    27
8    8  18    28
9    9  19    29
10  10  20    30
11  31  41    51
12  32  42    52
13  33  43    53
14  34  44    54
15  35  45    55
16  36  46    56
17  37  47    57
18  38  48    58
19  39  49    59
20  40  50    60

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  伪装坚强ぢ        
                
              
                            
                2020-11-27 20:55
              
            
            
                                                                       
Here is a possible tidyverse solution. I created 3 example dataframes based on your description of your dataframes.

df1 <- read.table(text ="date,source,medium,campaign,goal1Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),1,0.0000,Vida
2014-10-01,Master,email,CAFRE,2,0.0000,Vida
2014-10-01,apeseg,referral,(not set),3,0.0000,vida",sep = ",",header=TRUE) 

df2 <- read.table(text ="date,source,medium,campaign,goal2Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),4,0.0000,Vida
2014-10-01,Master,email,CAFRE,5,0.0000,Vida
2014-10-01,apeseg,referral,(not set),6,0.0000,vida",sep = ",",header=TRUE) 

df3 <- read.table(text ="date,source,medium,campaign,goal3Completions,ad.cost,Goal
2014-10-01,(direct),(none),(notset),7,0.0000,Vida
2014-10-01,Master,email,CAFRE,8,0.0000,Vida
2014-10-01,apeseg,referral,(not set),9,0.0000,vida",sep = ",",header=TRUE) 

> df1
        date   source   medium  campaign goal1Completions ad.cost Goal
1 2014-10-01 (direct)   (none)  (notset)                1       0 Vida
2 2014-10-01   Master    email     CAFRE                2       0 Vida
3 2014-10-01   apeseg referral (not set)                3       0 vida
> df2
        date   source   medium  campaign goal2Completions ad.cost Goal
1 2014-10-01 (direct)   (none)  (notset)                4       0 Vida
2 2014-10-01   Master    email     CAFRE                5       0 Vida
3 2014-10-01   apeseg referral (not set)                6       0 vida
> df3
        date   source   medium  campaign goal3Completions ad.cost Goal
1 2014-10-01 (direct)   (none)  (notset)                7       0 Vida
2 2014-10-01   Master    email     CAFRE                8       0 Vida
3 2014-10-01   apeseg referral (not set)                9       0 vida

library(dplyr)
library(tidyselect)
library(purrr)

bind_rows(df1,df2,df3) %>%
   mutate(goalCompletions = reduce(select_at(.,vars(matches("goal[[:digit:]]+Completions"))),coalesce)) %>%
   select_at(vars(-matches("goal[[:digit:]]+Completions")))

        date   source   medium  campaign ad.cost Goal goalCompletions
1 2014-10-01 (direct)   (none)  (notset)       0 Vida               1
2 2014-10-01   Master    email     CAFRE       0 Vida               2
3 2014-10-01   apeseg referral (not set)       0 vida               3
4 2014-10-01 (direct)   (none)  (notset)       0 Vida               4
5 2014-10-01   Master    email     CAFRE       0 Vida               5
6 2014-10-01   apeseg referral (not set)       0 vida               6
7 2014-10-01 (direct)   (none)  (notset)       0 Vida               7
8 2014-10-01   Master    email     CAFRE       0 Vida               8
9 2014-10-01   apeseg referral (not set)       0 vida               9

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复