问题
I have over 800 dbf files which I need to import and merge in R. I have been able to bring in all of the files using this code:
library(foreign)
setwd("c:/temp/help/")
files <- list.files(pattern="\\.dbf$")
all.the.data <- lapply(files, read.dbf, as.is=FALSE)
DATA <- do.call("rbind",all.the.data)
However, these dbf files have different numbers of columns and even if they sometimes have the same number of columns, those headers may be different. Here are four of the dbf files to provide an example:
file01 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1002_2km", class = "factor"),
VALUE_11 = 11443500, VALUE_31 = 13500, VALUE_42 = 928800,
VALUE_43 = 162000, VALUE_90 = 18900), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_31", "VALUE_42", "VALUE_43", "VALUE_90"), row.names = c(NA,
-1L), class = "data.frame", data_types = c("C", "F", "F", "F",
"F", "F"))
file02 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1002_5km", class = "factor"),
VALUE_11 = 66254400, VALUE_21 = 125100, VALUE_31 = 80100,
VALUE_41 = 4234500, VALUE_42 = 3199500, VALUE_43 = 4194000,
VALUE_52 = 376200, VALUE_90 = 72000), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_21", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43",
"VALUE_52", "VALUE_90"), row.names = c(NA, -1L), class = "data.frame", data_types = c("C",
"F", "F", "F", "F", "F", "F", "F", "F"))
file03 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1003_2km", class = "factor"),
VALUE_11 = 1972800, VALUE_31 = 125100, VALUE_41 = 5316300,
VALUE_42 = 990900, VALUE_43 = 1995300, VALUE_52 = 740700,
VALUE_90 = 1396800, VALUE_95 = 25200), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43", "VALUE_52",
"VALUE_90", "VALUE_95"), row.names = c(NA, -1L), class = "data.frame", data_types = c("C",
"F", "F", "F", "F", "F", "F", "F", "F"))
file04 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1003_5km", class = "factor"),
VALUE_11 = 43950600, VALUE_31 = 270000, VALUE_41 = 12969900,
VALUE_42 = 5105700, VALUE_43 = 12614400, VALUE_52 = 1491300,
VALUE_90 = 2055600, VALUE_95 = 70200), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43", "VALUE_52",
"VALUE_90", "VALUE_95"), row.names = c(NA, -1L), class = "data.frame", data_types = c("C",
"F", "F", "F", "F", "F", "F", "F", "F"))
I would like the dataframe to match this:
merged <- structure(list(PLOTBUFFER = structure(1:2, .Label = c("1002_2km",
"1002_5km"), class = "factor"), VALUE_11 = c(11443500, 66254400
), VALUE_21 = c(0, 125100), VALUE_31 = c(13500, 80100), VALUE_41 = c(0,
4234500), VALUE_42 = c(928800, 3199500), VALUE_43 = c(162000,
4194000), VALUE_52 = c(0, 376200), VALUE_90 = c(18900, 72000)), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_21", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43",
"VALUE_52", "VALUE_90"), class = "data.frame", row.names = c(NA,
-2L))
Where if there is a missing column from one dataset it simply is filled in with a zero or NULL.
Thanks
-al
The suggestion by @infominer worked for the 4 files I included as an example but when I tried to use merge_recurse on the large list of 802 elements, I received an error.
files <- list.files(pattern="\\.dbf$")
all.the.data <- lapply(files, read.dbf, as.is=FALSE)
merged <- merge_recurse(all.the.data)
Error: evaluation nested too deeply: infinite recursion / options(expressions=)? Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)?
回答1:
Use the package reshape
library(reshape)
merged.files <-merge_recurse(list(file01,file02,file03,file04))
Edit:
Try this code thanks to Ramnath
Reduce(function(...) merge(..., all=T),all.the.data)
adapted from https://stackoverflow.com/a/6947326/2747709
来源:https://stackoverflow.com/questions/22331853/merge-multiple-dbf-files-with-non-matching-headers-in-r