问题
I have a list of geocode output from the googleway package (ggmap geocode wouldn't work with an API key) stored in a list, each element of which contains two lists. However, for addresses in which no result was found, the structure of the list is different, frustrating my attempts to convert the list to a dataframe.
The structure of a "non-missing" result (created with dput()) is as follows (ignore the gibberish, RStudio doesn't display Cyrillic correctly in the console):
structure(list(results = structure(list(address_components = list(
structure(list(long_name = c("11À", "óëèöà Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã",
"Çåëåíîãðàä", "Ìîñêâà", "Ìîñêâà", "Ðîññèÿ", "124575"), short_name = c("11À",
"óë. Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã", "Çåëåíîãðàä",
"Ìîñêâà", "Ìîñêâà", "RU", "124575"), types = list("street_number",
"route", c("political", "sublocality", "sublocality_level_1"
), c("locality", "political"), c("administrative_area_level_2",
"political"), c("administrative_area_level_1", "political"
), c("country", "political"), "postal_code")), .Names = c("long_name",
"short_name", "types"), class = "data.frame", row.names = c(NA,
8L))), formatted_address = "óë. Ãîãîëÿ, 11À, Çåëåíîãðàä, Ìîñêâà, Ðîññèÿ, 124575",
geometry = structure(list(location = structure(list(lat = 55.987567,
lng = 37.17152), .Names = c("lat", "lng"), class = "data.frame", row.names = 1L),
location_type = "ROOFTOP", viewport = structure(list(
northeast = structure(list(lat = 55.9889159802915,
lng = 37.1728689802915), .Names = c("lat", "lng"
), class = "data.frame", row.names = 1L), southwest = structure(list(
lat = 55.9862180197085, lng = 37.1701710197085), .Names = c("lat",
"lng"), class = "data.frame", row.names = 1L)), .Names = c("northeast",
"southwest"), class = "data.frame", row.names = 1L)), .Names = c("location",
"location_type", "viewport"), class = "data.frame", row.names = 1L),
place_id = "ChIJzXSgUeQUtUYREIohzQOG--A", types = list("street_address")), .Names = c("address_components",
"formatted_address", "geometry", "place_id", "types"), class = "data.frame", row.names = 1L),
status = "OK"), .Names = c("results", "status"))
The structure of a "missing" result is as follows:
structure(list(results = list(), status = "ZERO_RESULTS"), .Names = c("results",
"status"))
Basically, the issue appears to be that when the function doesn't get a result from the Google API, it creates an empty list, rather than a list with the same elements as the "non-missing" list with NA as values. This creates an error when you pass it these lists to data.frame()
, because it cannot create a data frame from nothing.
I have tried the solution here after extracting the results sublists into a list of their own: Converting nested list (unequal length) to data frame. It is supposed to fill in NAs and create equal length lists, enabling a conversion to a data frame:
first100geocode.results.l <- vector("list", 100)
for(i in 1:length(first100geocode.results.l)){
first100geocode.results.l[[i]] <- first100geocode[[i]]$results
}
indx <- sapply(first100geocode.results.l, length)
res <- as.data.frame(do.call(rbind,lapply(first100geocode.results.l,
`length<-`, max(indx))))
colnames(res) <- names(first100geocode.results.l[[which.max(indx)]])
However, the line in which the "res" object is created throws an error: Error in rbind(deparse.level, ...) : invalid list argument: all variables should have the same length'.
Is there some other way to fill in NAs for the missing results, so that I can convert this to a data frame?
(Note: I can't just simply remove the missing results, I need to bind this back to the original list of addresses).
回答1:
We'll let jsonlite::flatten
do most of the work:
Put your two example results in one list (hopefully this is faithful to your actual data structure):
first100geocode <- list(
structure(list(results = structure(list(address_components = list(
structure(list(long_name = c(
"11À", "óëèöà Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã",
"Çåëåíîãðàä", "Ìîñêâà", "Ìîñêâà", "Ðîññèÿ", "124575"), short_name = c(
"11À", "óë. Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã", "Çåëåíîãðàä",
"Ìîñêâà", "Ìîñêâà", "RU", "124575"), types = list(
"street_number", "route", c("political", "sublocality", "sublocality_level_1"
), c("locality", "political"), c(
"administrative_area_level_2",
"political"), c("administrative_area_level_1", "political"
), c("country", "political"), "postal_code")), .Names = c(
"long_name",
"short_name", "types"), class = "data.frame", row.names = c(NA, 8L))),
formatted_address = "óë. Ãîãîëÿ, 11À, Çåëåíîãðàä, Ìîñêâà, Ðîññèÿ, 124575",
geometry = structure(list(location = structure(
list(lat = 55.987567, lng = 37.17152),
.Names = c("lat", "lng"), class = "data.frame", row.names = 1L),
location_type = "ROOFTOP", viewport = structure(list(
northeast = structure(list(
lat = 55.9889159802915, lng = 37.1728689802915), .Names = c("lat", "lng"
), class = "data.frame", row.names = 1L), southwest = structure(list(
lat = 55.9862180197085, lng = 37.1701710197085), .Names = c("lat", "lng"),
class = "data.frame", row.names = 1L)), .Names = c("northeast", "southwest"),
class = "data.frame", row.names = 1L)),
.Names = c("location", "location_type", "viewport"),
class = "data.frame", row.names = 1L),
place_id = "ChIJzXSgUeQUtUYREIohzQOG--A", types = list("street_address")),
.Names = c("address_components",
"formatted_address", "geometry", "place_id", "types"),
class = "data.frame", row.names = 1L),
status = "OK"), .Names = c("results", "status")),
structure(list(results = list(), status = "ZERO_RESULTS"),
.Names = c("results", "status"))
)
Do the actual flattening (and filter out address_components
and types
that are a bit trickier and of no interest to you):
flatten_googleway <- function(df) {
res <- jsonlite::flatten(df)
res[, !names(res) %in% c("address_components", "types")]
}
Prepare the template data frame we'll use for "missing" results. And apply it to those:
template_res <- flatten_googleway(first100geocode[[1]]$results)[FALSE, ]
do.call(rbind, lapply(first100geocode, function(x) {
if (length(x$results) == 0) template_res[1, ] else flatten_googleway(x$results)
}))
# formatted_address place_id
# 1 óë. Ãîãîëÿ, 11À, Çåëåíîãðàä, Ìîñêâà, Ðîññèÿ, 124575 ChIJzXSgUeQUtUYREIohzQOG--A
# NA <NA> <NA>
# geometry.location_type geometry.location.lat geometry.location.lng
# 1 ROOFTOP 55.98757 37.17152
# NA <NA> NA NA
# geometry.viewport.northeast.lat geometry.viewport.northeast.lng
# 1 55.98892 37.17287
# NA NA NA
# geometry.viewport.southwest.lat geometry.viewport.southwest.lng
# 1 55.98622 37.17017
# NA NA NA
来源:https://stackoverflow.com/questions/44463968/converting-nested-list-with-missing-values-to-data-frame-in-r