Having a data frame which provides a specific timestamp
dframe1 <- structure(list(id = c(1L, 1L, 1L, 2L, 2L), name = c(\"Google\",
\"Yahoo\", \"Amazon\",
library(dplyr)
dframe1 <- mutate(dframe1, date = as.Date(date))
dframe2 <- mutate(dframe2, date = as.Date(date))
text_sth
inside each id
, date
group in dframe2. They'll appear together anyways in the output. df2 <-
dframe2 %>%
group_by(id, date) %>%
summarise(text_sth = paste(text_sth, collapse = " "))
df2
#> # A tibble: 10 x 3
#> # Groups: id [2]
#> id date text_sth
#> <int> <date> <chr>
#> 1 1 2008-10-31 another text other
#> 2 1 2008-11-01 test text_sth
#> 3 1 2008-11-02 another one test text_sth another text
#> 4 1 2008-11-03 other
#> 5 1 2008-11-04 text here
#> 6 1 2008-11-05 text here
#> 7 2 2008-10-31 etc
#> 8 2 2008-11-01 test text_sth
#> 9 2 2008-11-02 text here another text
#> 10 2 2008-11-03 text here
id
, keep only rows where difference between the date from the first df and the date from the second df is either 1 or -1. Depending on the sign, populate the label
variable.left_join(dframe1, df2, by = "id") %>%
mutate(date_diff = as.numeric(date.y - date.x)) %>%
filter(abs(date_diff) == 1) %>%
mutate(label = ifelse(date_diff == -1, "before", "after")) %>%
select(id, name, label, text_sth)
#> id name label text_sth
#> 1 1 Google before another text other
#> 2 1 Google after another one test text_sth another text
#> 3 1 Yahoo before another text other
#> 4 1 Yahoo after another one test text_sth another text
#> 5 1 Amazon before other
#> 6 1 Amazon after text here
#> 7 2 Amazon before etc
#> 8 2 Amazon after text here another text
#> 9 2 Google before test text_sth
#> 10 2 Google after text here