I\'ve searched on SO trying to find a solution to no avail. So here it is. I have a data frame with many columns, some of which are numerical and should be non-negative. I w
This is a very awkward use of dplyr
, but might be true to the spirit
> df %>% mutate(m = do.call(pmin, select(df, ends_with("_num"))))
id sth1 tg1_num sth2 tg2_num others m
1 1 dave 2 ca 35 new 2
2 2 tom 5 tn -3 old -3
3 3 jane -3 al 0 new -3
4 4 leroy 0 az 25 old 0
5 5 jerry 4 mi 55 old 4
From there you can add a filter(m >= 0)
to get the answer you want. If there were a rowMins
analogous to rowMeans
then that would simplify this significantly.
> rowMins <- function(df) { do.call(pmin, df) }
> df %>% mutate(m = rowMins(select(df, ends_with("_num"))))
id sth1 tg1_num sth2 tg2_num others m
1 1 dave 2 ca 35 new 2
2 2 tom 5 tn -3 old -3
3 3 jane -3 al 0 new -3
4 4 leroy 0 az 25 old 0
5 5 jerry 4 mi 55 old 4
I don't know how efficient this is, though. And nesting the select
seems real ugly.
EDIT3: Using ideas cribbed from other solutions/comments (h/t to @Vlo) I can speed mine up a lot (unfortunately, a similar optimization speeds up @Vlo's solution even more (EDIT4: Whoops, misread the chart, I am the fastest, ok, no more on this))
df %>% select(ends_with("_num")) %>% rowMins %>% {df[. >= 0,]}
EDIT: out of curiosity, did some microbenchmarking on some of the solutions (EDIT2: Added more solutions)
microbenchmark(rowmins(df), rowmins2(df), reducer(df), sapplyer(df), grepapply(df), tchotchke(df), withrowsums(df), reducer2(df))
Unit: microseconds
expr min lq mean median uq max
rowmins(df) 1373.452 1431.9700 1732.188 1576.043 1729.410 5147.847
rowmins2(df) 836.885 875.9900 1015.364 913.285 1038.729 2510.339
reducer(df) 990.096 1058.6645 1217.264 1201.159 1297.997 3103.809
sapplyer(df) 14119.236 14939.8755 16820.701 15952.057 16612.709 66023.721
grepapply(df) 12907.657 13686.2325 14517.140 14485.520 15146.294 17291.779
tchotchke(df) 2770.818 2939.6425 3114.233 3036.926 3172.325 4098.161
withrowsums(df) 1526.227 1627.8185 1819.220 1722.430 1876.360 3025.095
reducer2(df) 900.524 943.1265 1087.025 1003.820 1109.188 3869.993
And here are the definitions I used
rowmins <- function(df) {
df %>%
mutate(m = rowMins(select(df, ends_with("_num")))) %>%
filter(m >= 0) %>%
select(-m)
}
rowmins2 <- function(df) {
df %>% select(ends_with("_num")) %>% rowMins %>% {df[. >= 0,]}
}
reducer <- function(df) {
df %>%
select(matches("_num$")) %>%
lapply(">=", 0) %>%
Reduce(f = "&", .) %>%
which %>%
slice(.data = df)
}
reducer2 <- function(df) {
df %>%
select(matches("_num$")) %>%
lapply(">=", 0) %>%
Reduce(f = "&", .) %>%
{df[.,]}
}
sapplyer <- function(df) {
nums <- sapply(df, is.numeric)
df[apply(df[, nums], MARGIN=1, function(x) all(x >= 0)), ]
}
grepapply <- function(df) {
cond <- df[, grepl("_num$", colnames(df))] >= 0
df[apply(cond, 1, function(x) {prod(x) == 1}), ]
}
tchotchke <- function(df) {
pattern <- "_num$"
ind <- grep(pattern, colnames(df))
target_columns <- colnames(df)[ind]
desired_rows <- sapply(target_columns, function(x) which(df[,x]<0), simplify=TRUE)
as.vector(unique(unlist(desired_rows)))
}
withrowsums <- function(df) {
df %>% mutate(m=rowSums(select(df, ends_with("_num"))>0)) %>% filter(m==2) %>% select(-m)
}
df <- data.frame(id=1:10000, sth1=sample(LETTERS, 10000, replace=T), tg1_num=runif(10000,-1,1), tg2_num=runif(10000,-1, 1))