Diagonal product multiplication

后端 未结 1 1912
感情败类
感情败类 2021-01-15 20:00

I want the diagonal multiplication of each group dates.

main Data Set:

 date      Bucket                 D            
1/31/2013   bkt 0                     


        
相关标签:
1条回答
  • 2021-01-15 20:08
    # Change to date format so can be ordered
    d$date = as.Date(d$date, format="%m/%d/%Y")
    # Form matrix so easier to find diagonals
    # [-1, -2] removes rows & columns of all NA
    r <- reshape2::dcast(data=d, Bucket ~ date, value.var="D")[-1, -2]
    # convert to matrix to allow row and col functions & remove non-numeric grouping
    mat <- as.matrix(r[-1]) 
    
    # Multiply diagonals
    myD <- col(mat) - row(mat)
    rg <- range(myD)
    out <- sapply(seq(rg[1], rg[2]), function(x) 
              `length<-`(rev(cumprod(rev(mat[myD==x]))), nrow(mat)))[,1:ncol(mat)]
    # remove if not needed: ensures four values in product
    # not sure if needed: done to match expected outcome
    out[, colSums(is.na(out)) > 0] <- NA 
    
    # reshape
    colnames(out) <- colnames(mat) # add dates as headers
    out <- reshape2::melt(cbind(r[1], out))
    
    # merge with original data
    out <- merge(d, out, by.x=c("date", "Bucket"), by.y=c("variable", "Bucket"), all=TRUE)
    

    Which gives

    out[21:35,]
    
             date       Bucket      D     value
    21 2013-05-30        bkt 0     NA        NA
    22 2013-05-30 bkt 1(10-20)   2.34  13608.00
    23 2013-05-30 bkt 2(20-30)   4.10   4536.00
    24 2013-05-30 bkt 3(30-40) 107.00  11340.00
    25 2013-05-30   bkt 4(40+) 108.00    108.00
    26 2013-06-30        bkt 0     NA        NA
    27 2013-06-30 bkt 1(10-20)   4.00  23628.28
    28 2013-06-30 bkt 2(20-30)   5.00  42960.50
    29 2013-06-30 bkt 3(30-40) 109.00  11770.00
    30 2013-06-30   bkt 4(40+) 110.00    110.00
    31 2013-07-30        bkt 0     NA        NA
    32 2013-07-30 bkt 1(10-20)   8.00 212724.40
    33 2013-07-30 bkt 2(20-30)   7.00  50052.80
    34 2013-07-30 bkt 3(30-40) 111.00  12208.00
    35 2013-07-30   bkt 4(40+) 112.00    112.00
    

    Data (*corrected April and June dates)

    d <- 
        structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
    5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L), .Label = c("1/31/2013", 
    "2/28/2013", "3/30/2013", "4/30/2013", "5/30/2013", "6/30/2013", 
    "7/30/2013"), class = "factor"), Bucket = structure(c(1L, 2L, 
    3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
    4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
    5L), .Label = c("bkt 0", "bkt 1(10-20)", "bkt 2(20-30)", "bkt 3(30-40)", 
    "bkt 4(40+)"), class = "factor"), D = c(NA, NA, NA, NA, NA, NA, 
    3, 3.63, 101, 102, NA, 0.55, 0.4, 103, 104, NA, 4.25, 3.65, 105, 
    106, NA, 2.34, 4.1, 107, 108, NA, 4, 5, 109, 110, NA, 8, 7, 111, 
    112)), .Names = c("date", "Bucket", "D"), class = "data.frame", row.names = c(NA, 
    -35L))
    
    0 讨论(0)
提交回复
热议问题