I have the following data frame:
╔══════╦═════════╗
║ Code ║ Airline ║
╠══════╬═════════╣
║ 1 ║ AF ║
║ 1 ║ KL ║
║ 8 ║ AR ║
║ 8 ║ A
Several options using the data.table
package:
1) Using strsplit
, paste
& operate by row:
library(data.table)
setDT(dat)[, Airline := trimws(Airline) # this step is needed to remove the leading and trailing whitespaces
][, sharedwith := paste(Airline, collapse = ','), Code
][, sharedwith := paste(unlist(strsplit(sharedwith,','))[!unlist(strsplit(sharedwith,',')) %in% Airline],
collapse = ','), 1:nrow(dat)]
which gives:
> dat
Code Airline sharedwith
1: 1 AF KL
2: 1 KL AF
3: 8 AR AZ,DL
4: 8 AZ AR,DL
5: 8 DL AR,AZ
2) Using strsplit
& paste
with mapply
instead of by = 1:nrow(dat)
:
setDT(dat)[, Airline := trimws(Airline)
][, sharedwith := paste(Airline, collapse = ','), Code
][, sharedwith := mapply(function(s,a) paste(unlist(strsplit(s,','))[!unlist(strsplit(s,',')) %in% a],
collapse = ','),
sharedwith, Airline)][]
which will give you the same result.
3) Or by using the CJ
function with paste
(inspired by the expand.grid
solution of @zx8754):
library(data.table)
setDT(dat)[, Airline := trimws(Airline)
][, CJ(air=Airline, Airline, unique=TRUE)[air!=V2][, .(shared=paste(V2,collapse=',')), air],
Code]
which gives:
Code air shared
1: 1 AF KL
2: 1 KL AF
3: 8 AR AZ,DL
4: 8 AZ AR,DL
5: 8 DL AR,AZ
A solution with dplyr
& tidyr
to get the desired solution (inspired by @jaimedash):
library(dplyr)
library(tidyr)
dat <- dat %>% mutate(Airline = trimws(as.character(Airline)))
dat %>%
mutate(SharedWith = Airline) %>%
group_by(Code) %>%
nest(-Code, -Airline, .key = SharedWith) %>%
left_join(dat, ., by = 'Code') %>%
unnest() %>%
filter(Airline != SharedWith) %>%
group_by(Code, Airline) %>%
summarise(SharedWith = toString(SharedWith))
which gives:
Code Airline SharedWith
(int) (chr) (chr)
1 1 AF KL
2 1 KL AF
3 8 AR AZ, DL
4 8 AZ AR, DL
5 8 DL AR, AZ
split
helps. Here's a fully reproducible EDIT that works w/o any additional package. Works with the OPs data.frame - changed it after OP added a reproducible dataset.
# strip white space in Airline names:
dat$Airline <- gsub(" ","",dat$Airline)
li <- split(dat,factor(dat$Code))
do.call("rbind",lapply(li,function(x)
data.frame(Airline = x[1,2],
SharedWith = paste(x$Airline[-1]
,collapse=",")
))
)
An an igraph
approach
library(igraph)
g <- graph_from_data_frame(dat)
# Find neighbours for select nodes
ne <- setNames(ego(g,2, nodes=as.character(dat$Airline), mindist=2), dat$Airline)
ne
#$`AF `
#+ 1/7 vertex, named:
#[1] KL
#$`KL `
#+ 1/7 vertex, named:
#[1] AF
---
---
# Get final format
data.frame(Airline=names(ne),
Shared=sapply(ne, function(x)
paste(V(g)$name[x], collapse=",")))
# Airline Shared
# 1 AF KL
# 2 KL AF
# 3 AR AZ,DL
# 4 AZ AR,DL
# 5 DL AR,AZ
Using expand.grid and aggregate:
do.call(rbind,
lapply(split(dat, dat$Code), function(i){
x <- expand.grid(i$Airline, i$Airline)
x <- x[ x$Var1 != x$Var2, ]
x <- aggregate(x$Var2, list(x$Var1), paste, collapse = ",")
colnames(x) <- c("Airline", "SharedWith")
cbind(Code = i$Code, x)
}))
# output
# Code Airline SharedWith
# 1.1 1 AF KL
# 1.2 1 KL AF
# 8.1 8 AR AZ,DL
# 8.2 8 AZ AR,DL
# 8.3 8 DL AR,AZ
You can try something like this in dplyr
library(dplyr)
df %>% group_by(code) %>% mutate(SharedWith = paste(sort(Airline), collapse = ', ')) %>% ungroup() %>% select(Airline, SharedWith)
Take the following as a comment that is posted as an answer just because this allows more convenient formatting.
for each code
lookup all rows in the table where the value = code
ummm... sorry, I don't get how this psedudocode is related to your desired output
+--------------------+
| Airline SharedWith |
+--------------------+
| AF "KL" |
| KL "AF" |
| AR "AZ","DL" |
+--------------------+
The result of this pseudocode should rather be:
+---------------------+
+ Code + Airlines +
+---------------------+
+ 1 + AF, KL +
+ 2 + AR, AZ, DL +
+---------------------+
That is,
codes <- unique(dat$Code)
data.frame(Code=codes, Airlines = sapply(codes, function(x) paste(subset(dat, Code %in% x)$Airline, collapse=",")))