问题
I have the following dataframe in R showing several attributes for some community districts (field CD) in two different years:
#Example data with one single attribute
x <- structure(list(numbldgs = c(195, 845, 3621, 3214, 10738, 793,
223, 957, 4248, 3456, 11576, 803), Year = c("2007", "2007", "2007",
"2007", "2007", "2007", "2018", "2018", "2018", "2018", "2018",
"2018"), CD = c("103", "111", "210", "313", "414", "501", "103",
"111", "210", "313", "414", "501")), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"))
(thanks to the input of folks here) I have been able to calculate the % increase per CD across time, using:
x2<-x %>%
arrange(CD) %>%
group_by(CD) %>%
mutate(rel_inc= 100*(numbldgs-lag(numbldgs, default=first(numbldgs)))/lag(numbldgs, default=first(numbldgs)))
Now I am trying to apply the same function to several fields of a larger dataset, like the one below.
x<-structure(list(Year = c("2007", "2007", "2007", "2007", "2007",
"2007", "2018", "2018", "2018", "2018", "2018", "2018"), SFHA_effective = c("yes",
"yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes",
"yes", "yes"), CD = c("103", "111", "210", "313", "414", "501",
"103", "111", "210", "313", "414", "501"), Abb = c("LES", "EH",
"CI/Co", "CI", "RA", "SP", "LES", "EH", "CI/Co", "CI", "RA",
"SP"), numbldgs = c(195, 845, 3621, 3214, 10738, 793, 223, 957,
4248, 3456, 11576, 803), resunits = c(15174, 18475, 20115, 24705,
29052, 1891, 16454, 21256, 20375, 24932, 26868, 2047), resarea = c(14858870,
14749327, 24253820, 23481299, 23925402, 2340073, 15931286, 17683557,
23350742, 24030181, 26324026, 1817435), factryarea = c(4532,
275223, 164443, 172154, 149287, 2006414, 4532, 54760, 301298,
258694, 202702, 1052597), commarea = c(1681049, 4976927, 4074631,
5551895, 3627362, 6017128, 1824355, 6283092, 5890039, 6682742,
4507698, 5911856), officearea = c(129729, 204418, 462429, 1912298,
231134, 530517, 129919, 215750, 692181, 1959893, 298734, 599141
), TotalLandValue = c(676413625, 889697539, 485997234, 904462580,
1193787898, 326146766, 1216611479, 1568389461, 988172432, 1473477767,
1486619757, 461667391), TotPop_p = c(50189, 61958, 48320, 69280,
93751, 41346, 46009, 64686, 51867, 73806, 104715, 44024), TotPop_se = c(1544.96479999394,
1932.91122991911, 1477.67713526889, 1621.11087850568, 2060.0971658131,
1349.79788954655, 1463.73132791495, 1811.36511978071, 2126.19689193412,
1625.67922346675, 2428.71826421752, 1460.56966131475), POC_p = c(0.66801490366415,
0.768504470770522, 0.516804635761589, 0.337759815242494, 0.527898369084063,
0.575630048855996, 0.701362776848008, 0.741968895897103, 0.586230165615902,
0.37544373086199, 0.515962374062933, 0.534185898600763), POC_se = c(0.0214554297500083,
0.0197231495256417, 0.0212115659870603, 0.0180918607533261, 0.0160111622396801,
0.0254703302968768, 0.0249330451532724, 0.0205125632282244, 0.031076574811459,
0.0161455000463029, 0.0143444833627841, 0.025013791232592), White_p = c(0.576898518414303,
0.56703233256351, 0.372785800348877, 0.3746093375004, 0.419207632046567,
0.276616843225463, 0.565697382933922, 0.562515242663198, 0.350836117682446,
0.352671537029079, 0.382279196038988, 0.240164455751408), White_se = c(0.00675552182469777,
0.00812057705141857, 0.00750732072656769, 0.0075838608821734,
0.0070183352536629, 0.00975357052715687, 0.00744338106611418,
0.00899123586507761, 0.00853192425238527, 0.00870241005563739,
0.00554259946464601, 0.00817859134095142)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
Suppose I wanted to calculate the % increment for the columns numbldgs
, resarea
and resunits
, generating 3 new columns that can be differentiated from each other e.g. inc_bldgs
, inc_resarea
, inc_resunits
.
Based on this question, it seems like mutate_at
might be the way to go, but I am unsure about how to create the new fields with different names and how to set the generic function I need.
回答1:
Is it what you are looking for?
list(mod )
is used to add a suffix to the new columns created.
transform <- function(x) (100*(x-lag(x, default=first(x)))/lag(x, default=first(x)))
x %>%
arrange(CD) %>%
group_by(CD) %>%
mutate_at(c("numbldgs", "resarea", "resunits"),
list(mod = transform))
# A tibble: 6 x 4
# Groups: CD [3]
# CD numbldgs_mod resarea_mod resunits_mod
# <chr> <dbl> <dbl> <dbl>
# 1 103 0 0 0
# 2 103 14.4 7.22 8.44
# 3 111 0 0 0
# 4 111 13.3 19.9 15.1
# 5 210 0 0 0
# 6 210 17.3 -3.72 1.29
来源:https://stackoverflow.com/questions/61416762/mutate-df-to-generate-several-new-columns-using-a-function-on-several-original-f