Fill missing values from another dataframe with the same columns

前端 未结 1 1862
既然无缘
既然无缘 2021-01-06 23:46

I searched various join questions and none seemed to quite answer this. I have two dataframes which each have an ID column and several information columns.

d         


        
相关标签:
1条回答
  • 2021-01-07 00:31

    If you want to join only by id column, you can remove phase in the on clause of code below.

    Also your data in the question has discrepancies, which are corrected in the data posted in this answer.

    library('data.table')
    setDT(df1)  # make data table by reference
    setDT(df2)  # make data table by reference
    df1[ i = df2, color := i.color, on = .(id, phase)] # join df1 with df2 by id and phase values, and replace color values of df2 with color values of df1
    
    tail(df1)
    #     id color phase   rand.col
    # 1:  95 green   gas  1.5868335
    # 2:  96 green   gas  0.5584864
    # 3:  97 green   gas -1.2765922
    # 4:  98 green   gas -0.5732654
    # 5:  99 green   gas -1.2246126
    # 6: 100 green   gas -0.4734006
    

    one-liner:

    setDT(df1)[df2, color := i.color, on = .(id, phase)]
    

    Data:

    set.seed(1L)
    df1 <- data.frame(id = c(1:100), color = c(rep("blue", 25), rep("red", 25), 
                                               rep(NA, 50)), phase = c(rep("liquid", 50), rep("gas", 50)),
                      rand.col = rnorm(100))
    
    df2 <- data.frame(id = c(51:100), color = rep("green", 50), phase = rep("gas", 50))
    

    EDIT: based on new data posted in the question

    Data:

    set.seed(1L)
    df1 = data.frame(id = c(1:100),  wq2 = rnorm(50), wq3 = rnorm(50), wq4 = rnorm(50), 
                     wq5 = rnorm(50))
    set.seed(2423L)
    df2 = data.frame(id = c(51:100), wq2 = rnorm(50), wq3 = rnorm(50), wq4 = rnorm(50), 
                     wq5 = rnorm(50))
    

    Code:

    library('data.table')
    setDT(df1)[ id == 52, ]
    #    id       wq2        wq3        wq4         wq5
    # 1: 52 0.1836433 -0.6120264 0.04211587 -0.01855983
    
    setDT(df2)[ id == 52, ]
    #    id       wq2       wq3        wq4       wq5
    # 1: 52 0.3917297 -1.007601 -0.6820783 0.3153687
    
    df1[df2, `:=` ( wq2 = i.wq2,
                    wq3 = i.wq3,
                    wq4 = i.wq4,
                    wq5 = i.wq5), on = .(id)]
    
    setDT(df1)[ id == 52, ]
    #    id       wq2       wq3        wq4       wq5
    # 1: 52 0.3917297 -1.007601 -0.6820783 0.3153687
    
    0 讨论(0)
提交回复
热议问题