问题
I am using RStudio (running R 4.0.1) and Stata 12 for Windows and have got a large number of folders with Stata 16 .dta files (and other types of files not relevant to this question). I want to create an automated process of converting all Stata 16 .dta files into Stata 12 format (keeping all labels) to then analyze. Ideally, I want to keep the names of the original folders and files but save the converted versions into a new location.
This is what I have got so far:
setwd("C:/FilesLocation")
#vector with name of files to be converted
all_files <- list.files(pattern="*.dta",full.names = TRUE)
for (i in all_files){
#Load file to be converted into STATA12 version
data <- read_dta("filename.dta",
encoding = NULL,
col_select = NULL,
skip = 0,
n_max = Inf,
.name_repair = "unique")
#Write as .dta
write_dta(data,"c:/directory/filename.dta", version = 12, label = attr(data, "label"))
}
Not sure this is the best approach. I know the commands inside the loop are working for a single file but not really being able to automate for all files.
回答1:
Your code only needs some very minor modifications. I've indicated the changes (along with comments explaining them) in the snippet below.
library(haven)
mypath <- "C:/FilesLocation"
all_files <- list.files(path = mypath, pattern = "*.dta", full.names = TRUE)
for (i in 1:length(all_files)){
#(Above) iterations need the length of the vector to be specified
#Load file to be converted into STATA12 version
data <- read_dta(all_files[i], #You want to read the ith element in all_files
encoding = NULL,
col_select = NULL,
skip = 0,
n_max = Inf,
.name_repair = "unique")
#Add a _v12 to the filename to
#specify that is is version 12 now
new_fname <- paste0(unlist(strsplit(basename(all_files[i]), "\\."))[1],
"_v12.", unlist(strsplit(basename(all_files[i]), "\\."))[2])
#Write as .dta
#with this new filename
write_dta(data, path = paste0(mypath, "/", new_fname),
version = 12, label = attr(data, "label"))
}
I tried this out with some .sta
files from here, and the script ran without throwing up errors. I haven't tested this on Windows but in theory it should work fine.
Edit: here is a more complete solution with read_dta
and write_dta
wrapped into a single function dtavconv
. This function also allows the user to convert version numbers to arbitrary values (default is 12).
#----
#.dta file version conversion function
dtavconv <- function(mypath = NULL, myfile = NULL, myver = 12){
#Function to convert .dta file versions
#Default version files are converted to is v12
#Default directory is whatever is specified by getwd()
if(is.null(mypath)) mypath <- getwd()
#Main code block wrapped in a tryCatch()
myres <- tryCatch(
{
#Load file to be converted into STATA12 version
data <- haven::read_dta(paste0(mypath, "/", myfile),
encoding = NULL,
col_select = NULL,
skip = 0,
n_max = Inf,
.name_repair = "unique")
#Add a _v12 to the filename to
#specify that is is version 12 now
new_fname <- paste0(unlist(strsplit(basename(myfile), "\\."))[1],
"_v", myver, ".", unlist(strsplit(basename(myfile), "\\."))[2])
#Write as .dta
#with this new filename
haven::write_dta(data, path = paste0(mypath, "/", new_fname),
version = myver, label = attr(data, "label"))
message("\nSuccessfully converted ", myfile, " to ", new_fname, "\n")
},
error = function(cond){
#message("Unable to write file", myfile, " as ", new_fname)
message("\n", cond, "\n")
return(NA)
}
)
return(myres)
}
#----
The function can then be run on as many files as desired by invoking it via lapply
or a for
loop, as the example below illustrates:
#----
#Example run
library(haven)
#Set your path here below
mypath <- paste0(getwd(), "/", "dta")
#Check to see if this directory exists
#if not, create it
if(!dir.exists(mypath)) dir.create(mypath)
list.files(mypath)
# character(0)
#----
#Downloading some valid example files
myurl <- c("http://www.principlesofeconometrics.com/stata/airline.dta",
"http://www.principlesofeconometrics.com/stata/cola.dta")
lapply(myurl, function(x){ download.file (url = x, destfile = paste0(mypath, "/", basename(x)))})
#Also creating a negative test case
file.create(paste0(mypath, "/", "anegcase.dta"))
list.files(mypath)
# [1] "airline.dta" "anegcase.dta" "cola.dta"
#----
#Getting list of files in the directory
all_files <- list.files(path = mypath, pattern = "*.dta")
#Converting files using dtavconv via lapply
res <- lapply(all_files, dtavconv, mypath = mypath)
#
# Successfully converted airline.dta to airline_v12.dta
#
#
# Error in df_parse_dta_file(spec, encoding, cols_skip, n_max, skip,
# name_repair = .name_repair): Failed to parse /my/path/
# /dta/anegcase.dta: Unable to read from file.
#
#
#
# Successfully converted cola.dta to cola_v12.dta
#
list.files(mypath)
# [1] "airline_v12.dta" "airline.dta" "anegcase.dta" "cola_v12.dta"
# "cola.dta"
#Example for converting to version 14
res <- lapply(all_files, dtavconv, mypath = mypath, myver = 14)
#
# Successfully converted airline.dta to airline_v14.dta
#
#
# Error in df_parse_dta_file(spec, encoding, cols_skip, n_max, skip,
# name_repair = .name_repair): Failed to parse /my/path
# /dta/anegcase.dta: Unable to read from file.
#
#
#
# Successfully converted cola.dta to cola_v14.dta
#
list.files(mypath)
# [1] "airline_v12.dta" "airline_v14.dta" "airline.dta" "anegcase.dta"
# "cola_v12.dta" "cola_v14.dta" "cola.dta"
#----
来源:https://stackoverflow.com/questions/62399350/convert-stata-16-files-to-stata-12-files-using-r