“recursive” self join in data.table

倖福魔咒の 提交于 2019-12-04 11:16:54
Cole

Here's my attempt using your dataset.

It uses a while loop checking to see if there's any components that also are in the prodName field. The loop always needs to have the same fields so instead of adding a column for the recursive multipliers (i.e., 5*8*7 at the end), the iterative multipliers are integrated. That is, 5*8*7 becomes 5*56 at the end.

library(data.table)

a[, qty_multiplier := 1]
b <- copy(a)

while (b[component %in% prodName, .N] > 0) {
  b <- b[a
         , on = .(prodName = component)
         , .(prodName = i.prodName
             , component = ifelse(is.na(x.component), i.component, x.component)
             , qty = i.qty
             , qty_multiplier = ifelse(is.na(x.qty), 1, x.qty * qty_multiplier)
         )
         ]
}

b[prodName %like% 'prod', .(qty = sum(qty * qty_multiplier)), by = .(prodName, component)] 

   prodName component qty
1:    prod1         a  13
2:    prod1         b  14
3:    prod2         b   3
4:    prod3         b 284
5:    prod3         a 240
6:    prod3         d  45
Joris Chau

Essentially, your data represents a weighted edgelist in a directed graph. The below code directly calculates the sum of (product) distances over each simple path from raw component -> final product using the igraph library:

library(igraph)

## transform edgelist into graph
graph <- graph_from_edgelist(as.matrix(a[, c(2, 1)])) %>%
  set_edge_attr("weight", value = unlist(a[, 3]))

## combinations raw components -> final products
out <- expand.grid(prodname = c("prod1", "prod2", "prod3"), component = c("a", "b", "d"), stringsAsFactors = FALSE)

## calculate quantities
out$qty <- mapply(function(component, prodname) {

  ## all simple paths from component -> prodname
  all_paths <- all_simple_paths(graph, from = component, to = prodname)

  ## if simple paths exist, sum over product of weights for each path
  ifelse(length(all_paths) > 0,
         sum(sapply(all_paths, function(path) prod(E(graph, path = path)$weight))), 0)

}, out$component, out$prodname)

out
#>   prodname component qty
#> 1    prod1         a  13
#> 2    prod2         a   0
#> 3    prod3         a 240
#> 4    prod1         b  14
#> 5    prod2         b   3
#> 6    prod3         b 284
#> 7    prod1         d   0
#> 8    prod2         d   0
#> 9    prod3         d  45

I think you are better off representing the information in a set of adjacency matrices that tell you "how much of this is made of that". You need 4 matrices, corresponding to all the possible relationships. For example you put the relationship between final product and intermediate in a matrix with 3 rows and 2 columns like this:

QPI <- matrix(0,3,2)
row.names(QPI) <- c("p1","p2","p3")
colnames(QPI) <- c("i1","i2")

QPI["p1","i1"] <- 2
QPI["p3","i2"] <- 5

   i1 i2
p1  2  0
p2  0  0
p3  0  5

this tells you that it takes 2 units of intermediate product i1 to make one unit of final product p1.

Similarly you define the other matrices:

QPR <- matrix(0,3,3)
row.names(QPR) <- c("p1","p2","p3")
colnames(QPR) <- c("a","b","d")

QPR["p1","a"] <- 1
QPR["p2","b"] <- 3
QPR["p3","b"] <- 4

QIR <- matrix(0,2,3)
row.names(QIR) <- c("i1","i2")
colnames(QIR) <- c("a","b","d")

QIR["i1","a"] <- 6
QIR["i1","b"] <- 7
QIR["i2","d"] <- 9

QII <- matrix(0,2,2)
row.names(QII) <- colnames(QII) <- c("i1","i2")

For example looking at QIR we see it takes 6 units of raw material a to make one unit of intermediate product i1. Once you have it in this way you sum over all possible ways of going from raw material to final product using matrix multiplication.

You have 3 terms: you can go directly from raw to final [QPR] QPR, or go from raw to intermediate to final [QPI%*%QIR] or go from raw to intermediate to other intermediate to final [QPI%*%QII%*%QIR]

You result is in the end represented by the matrix

result <- QPI%*%QIR + QPI%*%QII%*%QIR + QPR

I put all the code together below. If you run it you will see that the result looks like this:

     a   b  d
p1  13  14  0
p2   0   3  0
p3 240 284 45

which says exactly the same thing as

prodName  |component  |qty
prod1     |a          |1+2*6 = 13
prod1     |b          |0+2*7 = 14
prod2     |b          |3
prod3     |b          |4+5*8*7 = 284
prod3     |a          |0+5*8*6 = 240
prod3     |d          |0+5*9 = 45

hope this helps


QPI <- matrix(0,3,2)
row.names(QPI) <- c("p1","p2","p3")
colnames(QPI) <- c("i1","i2")

QPI["p1","i1"] <- 2
QPI["p3","i2"] <- 5

QPR <- matrix(0,3,3)
row.names(QPR) <- c("p1","p2","p3")
colnames(QPR) <- c("a","b","d")

QPR["p1","a"] <- 1
QPR["p2","b"] <- 3
QPR["p3","b"] <- 4

QIR <- matrix(0,2,3)
row.names(QIR) <- c("i1","i2")
colnames(QIR) <- c("a","b","d")

QIR["i1","a"] <- 6
QIR["i1","b"] <- 7
QIR["i2","d"] <- 9

QII <- matrix(0,2,2)
row.names(QII) <- colnames(QII) <- c("i1","i2")


QII["i2","i1"] <- 8

result <- QPI%*%QIR + QPI%*%QII%*%QIR + QPR
print(result)
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!