structure(list(group = c(17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L,
19L, 19L, 20L, 20L, 20L, 21L, 21L, 22L, 23L, 24L, 25L, 25L, 25L,
26L, 27L, 27L, 27L, 28L), var = c
library(dplyr)
df$first = NULL
df %>%
group_by(group) %>%
mutate(first = as.numeric(row_number() == min(row_number()[var == 1]))) %>%
ungroup()
# # A tibble: 26 x 3
# group var first
#
# 1 17 74 0
# 2 17 49 0
# 3 17 1 1
# 4 18 74 0
# 5 18 1 1
# 6 18 49 0
# 7 18 61 0
# 8 19 49 0
# 9 19 1 1
# 10 19 5 0
# # ... with 16 more rows
The idea is to flag the minimum row number where var
= 1, within each group.
This will return some warnings, because in some groups there are no var
= 1 cases.
Another option would be this:
library(dplyr)
df$first = NULL
# create row id
df$id = seq_along(df$group)
df %>%
filter(var == 1) %>% # keep cases where var = 1
distinct(group, .keep_all = T) %>% # keep distinct cases based on group
mutate(first = 1) %>% # create first column
right_join(df, by=c("id","group","var")) %>% # join back original dataset
mutate(first = coalesce(first, 0)) %>% # replace NAs with 0
select(-id) # remove row id
# # A tibble: 26 x 3
# group var first
#
# 1 17 74 0
# 2 17 49 0
# 3 17 1 1
# 4 18 74 0
# 5 18 1 1
# 6 18 49 0
# 7 18 61 0
# 8 19 49 0
# 9 19 1 1
#10 19 5 0
# # ... with 16 more rows