Ever since Joshua Katz published these dialect maps that you can find all over the web using harvard\'s dialect survey, I have been trying to copy and generalize his methods.. b
I'm not sure how much of a help I can be with spatial smoothing as it's a task I have little experience with, but I've spent some time making maps in R so I hope what I add below will help with the that part of your question.
I've started editing your code at # # # shapefile read-in # # #
; you'll notice that I kept the map in the SpatialPolygonsDataFrame
class and I relied on the raster
and gstat
packages to build the grid and run the spatial smoothing. The spatial smoothing model is the part I'm least comfortable with, yet the process allowed me to make a raster and demonstrate how to mask, project and plot it.
library(rgdal)
library(raster)
library(gstat)
# read in a base map
m <- getData("GADM", country="United States", level=1)
m <- m[!m$NAME_1 %in% c("Alaska","Hawaii"),]
# specify the tiger file to download
tiger <- "ftp://ftp2.census.gov/geo/tiger/TIGER2010/CBSA/2010/tl_2010_us_cbsa10.zip"
# create a temporary file and a temporary directory
tf <- tempfile() ; td <- tempdir()
# download the tiger file to the local disk
download.file( tiger , tf , mode = 'wb' )
# unzip the tiger file into the temporary directory
z <- unzip( tf , exdir = td )
# isolate the file that ends with ".shp"
shapefile <- z[ grep( 'shp$' , z ) ]
# read the shapefile into working memory
cbsa.map <- readOGR( shapefile, layer="tl_2010_us_cbsa10" )
# remove CBSAs ending with alaska, hawaii, and puerto rico
cbsa.map <- cbsa.map[ !grepl( "AK$|HI$|PR$" , cbsa.map$NAME10 ) , ]
# cbsa.map$NAME10 now has a length of 933
length( cbsa.map$NAME10 )
# extract centroid for each CBSA
cbsa.centroids <- data.frame(coordinates(cbsa.map), cbsa.map$GEOID10)
names(cbsa.centroids) <- c("lon","lat","GEOID10")
# add lat lon to popualtion data
nrow(x)
x <- merge(x, cbsa.centroids, by="GEOID10")
nrow(x) # centroids could not be assigned to all records for some reason
# create a raster object
r <- raster(nrow=500, ncol=500,
xmn=bbox(m)["x","min"], xmx=bbox(m)["x","max"],
ymn=bbox(m)["y","min"], ymx=bbox(m)["y","max"],
crs=proj4string(m))
# run inverse distance weighted model - modified code from ?interpolate...needs more research
model <- gstat(id = "trinary", formula = trinary~1, weights = "weight", locations = ~lon+lat, data = x,
nmax = 7, set=list(idp = 0.5))
r <- interpolate(r, model, xyNames=c("lon","lat"))
r <- mask(r, m) # discard interpolated values outside the states
# project map for plotting (optional)
# North America Lambert Conformal Conic
nalcc <- CRS("+proj=lcc +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")
m <- spTransform(m, nalcc)
r <- projectRaster(r, crs=nalcc)
# plot map
par(mar=c(0,0,0,0), bty="n")
cols <- c(rgb(0.9,0.8,0.8), rgb(0.9,0.4,0.3),
rgb(0.8,0.8,0.9), rgb(0.4,0.6,0.9),
rgb(0.8,0.9,0.8), rgb(0.4,0.9,0.6))
col.ramp <- colorRampPalette(cols) # custom colour ramp
plot(r, axes=FALSE, legend=FALSE, col=col.ramp(100))
plot(m, add=TRUE) # overlay base map
legend("right", pch=22, pt.bg=cols[c(2,4,6)], legend=c(0,1,2), bty="n")