I searched various join questions and none seemed to quite answer this. I have two dataframes which each have an ID column and several information columns.
d
If you want to join only by id
column, you can remove phase
in the on
clause of code below.
Also your data in the question has discrepancies, which are corrected in the data posted in this answer.
library('data.table')
setDT(df1) # make data table by reference
setDT(df2) # make data table by reference
df1[ i = df2, color := i.color, on = .(id, phase)] # join df1 with df2 by id and phase values, and replace color values of df2 with color values of df1
tail(df1)
# id color phase rand.col
# 1: 95 green gas 1.5868335
# 2: 96 green gas 0.5584864
# 3: 97 green gas -1.2765922
# 4: 98 green gas -0.5732654
# 5: 99 green gas -1.2246126
# 6: 100 green gas -0.4734006
one-liner:
setDT(df1)[df2, color := i.color, on = .(id, phase)]
Data:
set.seed(1L)
df1 <- data.frame(id = c(1:100), color = c(rep("blue", 25), rep("red", 25),
rep(NA, 50)), phase = c(rep("liquid", 50), rep("gas", 50)),
rand.col = rnorm(100))
df2 <- data.frame(id = c(51:100), color = rep("green", 50), phase = rep("gas", 50))
EDIT: based on new data posted in the question
Data:
set.seed(1L)
df1 = data.frame(id = c(1:100), wq2 = rnorm(50), wq3 = rnorm(50), wq4 = rnorm(50),
wq5 = rnorm(50))
set.seed(2423L)
df2 = data.frame(id = c(51:100), wq2 = rnorm(50), wq3 = rnorm(50), wq4 = rnorm(50),
wq5 = rnorm(50))
Code:
library('data.table')
setDT(df1)[ id == 52, ]
# id wq2 wq3 wq4 wq5
# 1: 52 0.1836433 -0.6120264 0.04211587 -0.01855983
setDT(df2)[ id == 52, ]
# id wq2 wq3 wq4 wq5
# 1: 52 0.3917297 -1.007601 -0.6820783 0.3153687
df1[df2, `:=` ( wq2 = i.wq2,
wq3 = i.wq3,
wq4 = i.wq4,
wq5 = i.wq5), on = .(id)]
setDT(df1)[ id == 52, ]
# id wq2 wq3 wq4 wq5
# 1: 52 0.3917297 -1.007601 -0.6820783 0.3153687