I need to calculate the centroids of a set of spatial zones based on a separate population grid dataset. Grateful for a steer on how to achieve this for the example below.<
My own less elegant solution below. Gives exactly the same results as Spacedman and Josh.
# raster to pixels
p = rasterToPoints(dat) %>% as.data.frame()
coordinates(p) = ~ x + y
crs(p) = crs(polys)
# overlay pixels on polygons
ol = over(p, polys) %>% mutate(pop = p$layer) %>% cbind(coordinates(p)) %>%
filter(COLUMBUS_ %in% polys$COLUMBUS_) %>% # i.e. a unique identifier
dplyr::select(x, y, pop, COLUMBUS_) %>% as_data_frame()
# weighted means of x/y values, by pop
pwcs = split(ol, ol$COLUMBUS_) %>% lapply(function(g){
data.frame(x = weighted.mean(g$x, g$pop), y = weighted.mean(g$y, g$pop))
}) %>% bind_rows() %>% as_data_frame()
Another alternative.
I like it for its compactness, but it will likely only make sense if you're fairly familiar with the full family of raster functions:
## Convert polygons to a raster layer
z <- rasterize(polys, dat)
## Compute weighted x and y coordinates within each rasterized region
xx <- zonal(init(dat, v="x")*dat, z) / zonal(dat,z)
yy <- zonal(init(dat, v="y")*dat, z) / zonal(dat,z)
## Combine results in a matrix
res <- cbind(xx[,2],yy[,2])
head(res)
# [,1] [,2]
# [1,] 8.816277 14.35309
# [2,] 8.327463 14.02354
# [3,] 8.993655 13.82518
# [4,] 8.467312 13.71929
# [5,] 9.011808 13.28719
# [6,] 9.745000 13.47444
The answers by Spacedman and Josh are really great, but I'd like to share two other alternatives which are relatively fast and simple.
library(data.table)
library(spatialEco)
library(raster)
library(rgdal)
data.table
approach:# get centroids of raster data
data_points <- rasterToPoints(dat, spatial=TRUE)
# intersect with polygons
grid_centroids <- point.in.poly(data_points, polys)
# calculate weighted centroids
grid_centroids <- as.data.frame(grid_centroids)
w.centroids <- setDT(grid_centroids)[, lapply(.SD, weighted.mean, w=layer), by=POLYID, .SDcols=c('x','y')]
wt.centroid{spatialEco}
: # get a list of the ids from each polygon
poly_ids <- unique(grid_centroids@data$POLYID)
# use lapply to calculate the weighted centroids of each individual polygon
w.centroids.list <- lapply(poly_ids, function(i){wt.centroid( subset(grid_centroids, grid_centroids@data$POLYID ==i)
, 'layer', sp = TRUE)} )
Three steps:
First, find all the cells in each polygon, return a list of 2-column matrices with the cell number and the value:
require(plyr) # for llply, laply in a bit...
cell_value = extract(dat, polys,cellnumbers=TRUE)
head(cell_value[[1]])
cell value
[1,] 31 108
[2,] 32 108
[3,] 33 110
[4,] 92 110
[5,] 93 110
[6,] 94 111
Second, turn into a list of similar matrices but add the x and y coords:
cell_value_xy = llply(cell_value, function(x)cbind(x,xyFromCell(dat,x[,"cell"])))
head(cell_value_xy[[1]])
cell value x y
[1,] 31 108 8.581164 14.71973
[2,] 32 108 8.669893 14.71973
[3,] 33 110 8.758623 14.71973
[4,] 92 110 8.581164 14.67428
[5,] 93 110 8.669893 14.67428
[6,] 94 111 8.758623 14.67428
Third, compute the weighted mean coordinate. This neglects any edge effects and assumes all grid cells are the same size:
centr = laply(cell_value_xy, function(m){c(weighted.mean(m[,3],m[,2]), weighted.mean(m[,4],m[,2]))})
head(centr)
1 2
[1,] 8.816277 14.35309
[2,] 8.327463 14.02354
[3,] 8.993655 13.82518
[4,] 8.467312 13.71929
[5,] 9.011808 13.28719
[6,] 9.745000 13.47444
Now centr
is a 2-column matrix. In your example its very close to coordinates(polys)
so I'd make a contrived example with some extreme weights to make sure its working as expected.