I want to calculate the percentage of Profit
by YEAR
which is a fairly simple task but somehow I am getting NA
. I have checked same questi
The problem lies in the fact each group has one observation. One unique year per Vertical. What is the lag of one observation? Additionally since the years go in descending order I trust you need lead.
library(tidyverse)
z %>%
group_by(VERTICAL) %>%
mutate(pct_change = (Profit/lead(Profit) - 1) * 100)
#output
YEAR VERTICAL Profit pct_change
1 2017 AGRICULTURE 0 -100
2 2016 AGRICULTURE 2053358 Inf
3 2015 AGRICULTURE 0 -100
4 2014 AGRICULTURE 2370747 - 41.7
5 2013 AGRICULTURE 4066693 NA
6 2017 COMMUNICATION 0 -100
7 2016 COMMUNICATION 1680074 27.0
8 2015 COMMUNICATION 1322470 - 9.43
9 2014 COMMUNICATION 1460133 - 4.56
10 2013 COMMUNICATION 1529863 NA
This solution assumes the years are arranged in the correct order, to make sure:
z %>%
group_by(VERTICAL) %>%
arrange(YEAR, .by_group = TRUE) %>%
mutate(pct_change = (Profit/lag(Profit) - 1) * 100)
#output
YEAR VERTICAL Profit pct_change
1 2013 AGRICULTURE 4066693 NA
2 2014 AGRICULTURE 2370747 - 41.7
3 2015 AGRICULTURE 0 -100
4 2016 AGRICULTURE 2053358 Inf
5 2017 AGRICULTURE 0 -100
6 2013 COMMUNICATION 1529863 NA
7 2014 COMMUNICATION 1460133 - 4.56
8 2015 COMMUNICATION 1322470 - 9.43
9 2016 COMMUNICATION 1680074 27.0
10 2017 COMMUNICATION 0 -100
or use
arrange(desc(YEAR), .by_group = TRUE)
and lead
z is:
structure(list(YEAR = c(2017L, 2016L, 2015L, 2014L, 2013L, 2017L,
2016L, 2015L, 2014L, 2013L, 2017L, 2016L, 2015L, 2014L, 2013L,
2017L, 2016L, 2015L, 2014L, 2013L, 2017L, 2016L, 2015L, 2014L,
2013L, 2017L, 2016L, 2015L, 2014L, 2013L, 2017L, 2016L, 2015L,
2014L, 2013L, 2017L, 2016L, 2015L, 2014L, 2013L, 2017L, 2016L,
2015L, 2014L, 2013L, 2017L, 2016L, 2015L, 2014L, 2013L, 2017L,
2016L, 2015L, 2014L, 2013L, 2017L, 2016L, 2015L, 2014L, 2013L
), VERTICAL = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 12L, 12L, 12L, 12L, 12L), .Label = c("AGRICULTURE", "COMMUNICATION",
"CONSTRUCTION", "EDUCATION", "HEALTHCARE", "HOSPITALITY", "MANUFACTURING",
"MINING", "OTHER", "SERVICE", "TRANSPORTATION", "UTILITY"), class = "factor"),
Profit = c(0L, 2053358L, 0L, 2370747L, 4066693L, 0L, 1680074L,
1322470L, 1460133L, 1529863L, 0L, 0L, 0L, 8250149L, 0L, 0L,
12497015L, 13437356L, 10856685L, 13881127L, 0L, 0L, 0L, 4554364L,
5078130L, 0L, 4445512L, 5499419L, 9060639L, 4391522L, 0L,
0L, 0L, 0L, 27466974L, 0L, 4359251L, 4163201L, 6272530L,
6668191L, 0L, 0L, 0L, 5935199L, 3585969L, 0L, 0L, 0L, 0L,
28018522L, 0L, 0L, 0L, 0L, 8430244L, 0L, 3551989L, 6535248L,
3995486L, 4477617L)), .Names = c("YEAR", "VERTICAL", "Profit"
), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36",
"37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47",
"48", "49", "50", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75"), class = "data.frame")