I\'ve got a data frame with lots of columns. For each row of the data frame, I\'d like to get a count of how many columns are NA. The problem is that I\'m only interested in
Another dplyr
solution:
library(stringr)
## define count function
count_na <- function(x) sum(!is.na(x))
df$count_na <- df %>%
select(starts_with("word")) %>%
apply(., 1, count_na)
## A tibble: 10 × 6
id name word1 word2 word3 n_words
<int> <chr> <chr> <chr> <chr> <int>
1 1 apple a actual <NA> 2
2 2 apricot able add <NA> 2
3 3 avocado about address <NA> 2
4 4 banana absolute admit agree 3
5 5 bell pepper accept advertise <NA> 2
6 6 bilberry <NA> affect <NA> 1
7 7 blackberry achieve afford alright 3
8 8 blackcurrant across after <NA> 2
9 9 blood orange act afternoon <NA> 2
10 10 blueberry active again awful 3
You can use is.na()
over the selected columns, then rowSums()
the result:
library(stringr)
df <- data_frame(
id = 1:10
, name = fruit[1:10]
, word1 = c(words[1:5],NA,words[7:10])
, word2 = words[11:20]
, word3 = c(NA,NA,NA,words[25],NA,NA,words[32],NA,NA,words[65]))
df$word_count <- rowSums( !is.na( df [,3:5]))
df
id name word1 word2 word3 n_words
<int> <chr> <chr> <chr> <chr> <dbl>
1 1 apple a actual <NA> 2
2 2 apricot able add <NA> 2
3 3 avocado about address <NA> 2
4 4 banana absolute admit agree 3
5 5 bell pepper accept advertise <NA> 2
6 6 bilberry <NA> affect <NA> 1
7 7 blackberry achieve afford alright 3
8 8 blackcurrant across after <NA> 2
9 9 blood orange act afternoon <NA> 2
10 10 blueberry active again awful 3
Using dplyr
you could do this:
df %>%
select(3:5) %>%
is.na %>%
`!` %>%
rowSums
library(dplyr)
library(stringr)
df <- data_frame(
id = 1:10
, name = fruit[1:10]
, word1 = c(words[1:5],NA,words[7:10])
, word2 = words[11:20]
, word3 = c(NA,NA,NA,words[25],NA,NA,words[32],NA,NA,words[65])
)
library(purrr)
# Rowwise sum of NAs
df %>% by_row(~ sum(is.na(.)), .collate = 'cols')
# Rowwise sum of non-NAs for word columns
df %>%
select(starts_with('word')) %>%
by_row(~ sum(!is.na(.)), .collate = 'cols')