My goal is generating a Los Angles zipcode map filled by another variable, say number of crime events using ggplot. As the filling variable that is originally not stored in
You may need to install ggplot2
from GitHub for this. Probably not, but I'm always on the dev version of it and can't really downgrade for this example.
Switch to sf
:
library(sf)
library(tidyverse)
sf::st_read(
path.expand("~/Data/cb_2017_us_zcta510_500k/cb_2017_us_zcta510_500k.shp"),
stringsAsFactors = FALSE
) -> ztca
## Reading layer `cb_2017_us_zcta510_500k' from data source `/Users/hrbrmstr/Data/cb_2017_us_zcta510_500k/cb_2017_us_zcta510_500k.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33144 features and 5 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34122
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
ztca
## Simple feature collection with 33144 features and 5 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34122
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
## ZCTA5CE10 AFFGEOID10 GEOID10 ALAND10 AWATER10 geometry
## 1 35442 8600000US35442 35442 610213891 10838694 MULTIPOLYGON (((-88.25262 3...
## 2 85365 8600000US85365 85365 3545016067 9766486 MULTIPOLYGON (((-114.6847 3...
## 3 71973 8600000US71973 71973 204670474 1264709 MULTIPOLYGON (((-94.46643 3...
## 4 95445 8600000US95445 95445 221559097 7363179 MULTIPOLYGON (((-123.6431 3...
## 5 06870 8600000US06870 06870 5945321 3841130 MULTIPOLYGON (((-73.58766 4...
## 6 19964 8600000US19964 19964 24601672 124382 MULTIPOLYGON (((-75.74767 3...
## 7 32233 8600000US32233 32233 26427514 8163517 MULTIPOLYGON (((-81.45931 3...
## 8 90401 8600000US90401 90401 2199165 439378 MULTIPOLYGON (((-118.5028 3...
## 9 30817 8600000US30817 30817 498860155 105853904 MULTIPOLYGON (((-82.5951 33...
## 10 30458 8600000US30458 30458 378422575 12351325 MULTIPOLYGON (((-81.73032 3...
It should have read that in much faster.
Now we need your data. First the zips:
c(
91306, 90008, 90014, 90094, 91326, 90212, 90001, 90006, 90301, 91401, 90095,
90021, 90089, 90057, 90029, 90502, 90036, 90745, 91330, 91411, 91601, 91607,
90302, 90073, 91340, 91602, 90071, 90039, 90011, 90248, 91303, 90020, 90732,
90405, 91342, 90077, 90061, 90015, 90002, 90041, 90211, 90059, 90066, 90210,
91343, 90062, 91606, 91403, 90063, 90012, 91307, 90293, 91402, 91364, 90007,
91331, 90005, 91040, 90031, 91324, 90043, 91316, 91352, 90069, 91325, 91344,
90019, 90056, 90032, 90291, 90027, 90247, 90232, 90013, 90042, 90035, 90024,
90026, 90034, 90016, 90018, 90004, 90067, 90010, 90025, 90047, 90037, 91605,
90049, 90744, 90028, 90272, 90275, 90731, 90710, 91042, 91345, 90003, 91335,
91304, 91311, 91214, 91604, 91608, 91405, 91367, 91406, 91436, 91504, 91423,
91505, 91356, 90292, 90402, 90065, 90501, 90230, 90048, 90046, 90038, 90044,
90810, 90023, 90068, 90045, 90717, 90064, 90058, 90033, 90017, 90090
) %>% as.character() -> zip_interest
The ZCTA5CE10
field is the zip (IIRC) so we filter those out:
la_df <- filter(ztca, ZCTA5CE10 %in% zip_interest)
And, then we make your zip-fill data:
data_frame(
zipcode = c(
90001, 90002, 90003, 90004, 90005, 90006,
90007, 90008, 90010, 90011, 90012, 90013, 90014, 90015, 90016,
90017, 90018, 90019, 90020, 90021, 90023, 90024, 90025, 90026,
90027, 90028, 90029, 90031, 90032, 90033, 90034, 90035, 90036,
90037, 90038, 90039, 90041, 90042, 90043, 90044, 90045, 90046,
90047, 90048, 90049, 90056, 90057, 90058, 90059, 90061, 90062,
90063, 90064, 90065, 90066, 90067, 90068, 90069, 90071, 90073,
90077, 90089, 90090, 90094, 90095, 90210, 90211, 90212, 90230,
90232, 90247, 90248, 90272, 90275, 90291, 90292, 90293, 90301,
90302, 90402, 90405, 90501, 90502, 90710, 90717, 90731, 90732,
90744, 90745, 90810, 91040, 91042, 91214, 91303, 91304, 91306,
91307, 91311, 91316, 91324, 91325, 91326, 91330, 91331, 91335,
91340, 91342, 91343, 91344, 91345, 91352, 91356, 91364, 91367,
91401, 91402, 91403, 91405, 91406, 91411, 91423, 91436, 91504,
91505, 91601, 91602, 91604, 91605, 91606, 91607, 91608) %>%
as.character(),
fillvar = c(
7086,
20731, 45814, 24883, 15295, 23099, 22163, 24461, 6351, 40047,
16028, 22822, 9500, 22859, 23201, 19959, 21000, 25806, 12917,
11767, 12791, 9221, 14967, 26850, 21314, 36956, 14556, 12233,
12986, 19864, 17239, 10843, 17399, 35476, 13786, 10083, 9214,
18613, 22267, 46012, 26956, 14627, 24949, 10958, 10238, 294,
21961, 3037, 15441, 12432, 20470, 3673, 12165, 14869, 14516,
2299, 10013, 2458, 1862, 473, 1755, 3772, 691, 1418, 200, 1689,
813, 181, 3553, 912, 3807, 2693, 5017, 558, 18841, 5872, 3890,
573, 322, 592, 331, 5795, 1119, 7323, 667, 29879, 5318, 22950,
110, 144, 5919, 7465, 135, 17871, 14904, 16814, 6811, 14521,
9806, 17144, 9857, 7175, 647, 32723, 25985, 3182, 29040, 23947,
18500, 7194, 19031, 12226, 11075, 16189, 17455, 26382, 9818,
22774, 22124, 12000, 14122, 4965, 566, 522, 20601, 7204, 14051,
24116, 18162, 11174, 478)
) -> zip_df
Now, it's just a matter of joining that data to the sf
object and plotting it:
left_join(zip_df, by=c("ZCTA5CE10"="zipcode")) %>%
ggplot() +
geom_sf(aes(fill=fillvar), size=0.125, color="#b2b2b2") + # thinner lines
coord_sf(datum = NA) + # this gets rid of the graticule
viridis::scale_fill_viridis(name="Better legend name:", labels=scales::comma) + # color-blind friendly color map
ggthemes::theme_map() + # clean map
theme(legend.position="top") + # legend on top
theme(legend.key.width = unit(3, "lines")) # wider legend