I have a data frame named A which has lat and long in separate columns. Sample Data
ID Lat Long
a 10.773046 76.6392061
b 10
Here is a solution using the geosphere library, which calculates the distance in meters (the script does convert to km). If you dataset is a reasonable size (ie < 50,000) performance is acceptable.
a <- data.frame(ID = c("a", "b", "c", "d"), Lat = c(10.773046, 10.7751978, 12.954027, 12.9608638),
Long = c(76.6392061, 76.6392061, 78.988818, 77.521573))
b <- data.frame(Store = c("a", "b", "c", "d", "e"), Lat = c(21.244769, 9.919337, 10.053961, 13.829922, 23.849729),
Long = c(81.63861, 78.14844, 76.32757, 77.49369, 77.93647))
library(geosphere)
#calculate the distance matrix
distmatrix<-distm(a[, c(3, 2)], b[,c(3, 2)])
#find closest column and get distance
closest<-apply(distmatrix, 1, which.min)
a$store<-as.character(b$Store[closest])
a$distance<-apply(distmatrix, 1, min)/1000
a
ID Lat Long store distance
1 a 10.77305 76.63921 c 86.54914
2 b 10.77520 76.63921 c 86.76789
3 c 12.95403 78.98882 d 188.71751
4 d 12.96086 77.52157 d 96.19473
Solution is based on a similar question: Is there an efficient way to group nearby locations based on longitude and latitude?
you can check this out
a <- data.frame(ID = c("a", "b", "c", "d"), Lat = c(10.773046, 10.7751978, 12.954027, 12.9608638),
Long = c(76.6392061, 76.6392061, 78.988818, 77.521573))
b <- data.frame(Store = c("a", "b", "c", "d", "e"), Lat = c(21.244769, 9.919337, 10.053961, 13.829922, 23.849729),
Long = c(81.63861, 78.14844, 76.32757, 77.49369, 77.93647))
library(tidyverse)
earth.dist <- function (long1, lat1, long2, lat2)
{
rad <- pi/180
a1 <- lat1 * rad
a2 <- long1 * rad
b1 <- lat2 * rad
b2 <- long2 * rad
dlon <- b2 - a2
dlat <- b1 - a1
a <- (sin(dlat/2))^2 + cos(a1) * cos(b1) * (sin(dlon/2))^2
c <- 2 * atan2(sqrt(a), sqrt(1 - a))
R <- 6378.145
d <- R * c
return(d)
}
a1 <- a %>%
group_by(ID, Lat, Long) %>%
summarise(closest = which.min(abs(Lat - b$Lat) + abs(Long - b$Long))) %>%
mutate(Store = b$Store[closest],
Distance = sqrt((Lat - b$Lat[closest])^2 + (Long - b$Long[closest])^2),
distKm = earth.dist(Lat, Long, b$Lat[closest],b$Long[closest]))
Which results in:
a1
ID Lat Long closest Store Distance distKm
<fct> <dbl> <dbl> <int> <fct> <dbl> <dbl>
1 a 10.8 76.6 3 c 0.784 39.4
2 b 10.8 76.6 3 c 0.786 39.4
3 c 13.0 79.0 4 d 1.73 168.
4 d 13.0 77.5 4 d 0.870 21.2