I have
household person time mode
1 1 07:45:00 non-car
1 1 09:05:00 car
Here's a dplyr
approach that produces those matches.
library(dplyr); library(hms)
df %>%
# Connect the table to itself, linking by household.
# So every row gets linked to every row (including itself)
# with the same household. The original data with end .x and
# the joined data will end .y, so we can compare then below.
left_join(df, by = c("household")) %>%
# Find the difference in time, in seconds
mutate(time_dif = abs(time.y - time.x)) %>%
filter(time_dif < 3600, # Keep if <1hr difference
person.x != person.y, # Keep if different person
mode.x != mode.y) %>% # Keep if different mode
# We have the answers now, everything below is for formatting
# Rename and hide some variables we don't need any more
select(household, person = person.x, time = time.x,
mode = mode.x, other = person.y) %>%
# Combine each person's overlaps into one row
group_by(household, person, time) %>%
summarise(overlaps = paste(other, collapse =","), times = length(other)) %>%
# Add back all original rows, even if no overlaps
right_join(df) %>%
ungroup()
## A tibble: 7 x 6
# household person time overlaps times mode
# <int> <int> <time> <chr> <int> <chr>
#1 1 1 07:45 4 1 non-car
#2 1 1 09:05 2 1 car
#3 1 2 08:10 1,4 2 non-car
#4 1 3 22:45 NA NA non-car
#5 1 4 08:30 1,2 2 car
#6 2 1 07:45 NA NA non-car
#7 2 2 16:45 NA NA car