This is what my data frame looks like:
library(data.table)
df <- fread(\'
Name EventType Date SalesAmount RunningTotal Runningt
Using newer non-equi joins feature in data.table:
df1 = df[.(iName=Name,start = Date - 365L, end = Date),
on=.(Name=iName,Date >= start, Date <= end),nomatch = 0, allow.cart=TRUE][,
.(MyTotal = sum(SalesAmount)), by=.(Name,Date = Date.1)]
df[df1, on = .(Name,Date)]
Here's an approach using foverlaps
function from data.table
package:
require(data.table)
setDT(df)[, end := as.Date(EventDate, format="%d/%m/%Y")
][, start := end - 365L]
setkey(df, Name, start, end)
olaps = foverlaps(df, df, nomatch=0L, which=TRUE)
olaps = olaps[xid >= yid, .(ans = sum(dt$SalesAmount[yid])), by=xid]
df[olaps$xid, Runningtotal := olaps$ans]
You can remove the start
and end
columns, if necessary, by doing:
df[, c("start", "end") := NULL]
Would be nice to know how fast/slow it is..
Give this a try:
DF <- read.table(text = "Name EventType EventDate SalesAmount RunningTotal Runningtotal(prior365Days)
John Email 1/1/2014 0 0 0
John Sale 2/1/2014 10 10 10
John Sale 7/1/2014 20 30 30
John Sale 4/1/2015 30 60 50
John Webinar 5/1/2015 0 60 50
Tom Email 1/1/2014 0 0 0
Tom Sale 2/1/2014 15 15 15
Tom Sale 7/1/2014 10 25 25
Tom Sale 4/1/2015 25 50 35
Tom Webinar 5/1/2015 0 50 35", header = TRUE)
fun <- function(x, date, thresh) {
D <- as.matrix(dist(date)) #distance matrix between dates
D <- D <= thresh
D[lower.tri(D)] <- FALSE #don't sum to future
R <- D * x #FALSE is treated as 0
colSums(R)
}
library(data.table)
setDT(DF)
DF[, EventDate := as.Date(EventDate, format = "%m/%d/%Y")]
setkey(DF, Name, EventDate)
DF[, RT365 := fun(SalesAmount, EventDate, 365), by = Name]
# Name EventType EventDate SalesAmount RunningTotal Runningtotal.prior365Days. RT365
# 1: John Email 2014-01-01 0 0 0 0
# 2: John Sale 2014-02-01 10 10 10 10
# 3: John Sale 2014-07-01 20 30 30 30
# 4: John Sale 2015-04-01 30 60 50 50
# 5: John Webinar 2015-05-01 0 60 50 50
# 6: Tom Email 2014-01-01 0 0 0 0
# 7: Tom Sale 2014-02-01 15 15 15 15
# 8: Tom Sale 2014-07-01 10 25 25 25
# 9: Tom Sale 2015-04-01 25 50 35 35
#10: Tom Webinar 2015-05-01 0 50 35 35