Convert Stata 16 files to Stata 12 files using R

两盒软妹~` 提交于 2021-01-28 09:51:00


I am using RStudio (running R 4.0.1) and Stata 12 for Windows and have got a large number of folders with Stata 16 .dta files (and other types of files not relevant to this question). I want to create an automated process of converting all Stata 16 .dta files into Stata 12 format (keeping all labels) to then analyze. Ideally, I want to keep the names of the original folders and files but save the converted versions into a new location.

This is what I have got so far:

#vector with name of files to be converted
all_files <- list.files(pattern="*.dta",full.names = TRUE)
for (i in all_files){

#Load file to be converted into STATA12 version
data <- read_dta("filename.dta",
                 encoding = NULL,
                 col_select = NULL,
                 skip = 0,
                 n_max = Inf,
                 .name_repair = "unique")

#Write as .dta
write_dta(data,"c:/directory/filename.dta", version = 12, label = attr(data, "label"))

Not sure this is the best approach. I know the commands inside the loop are working for a single file but not really being able to automate for all files.


Your code only needs some very minor modifications. I've indicated the changes (along with comments explaining them) in the snippet below.


mypath <- "C:/FilesLocation"
all_files <- list.files(path = mypath, pattern = "*.dta", full.names = TRUE)

for (i in 1:length(all_files)){ 
  #(Above) iterations need the length of the vector to be specified

  #Load file to be converted into STATA12 version
  data <- read_dta(all_files[i], #You want to read the ith element in all_files
                   encoding = NULL,
                   col_select = NULL,
                   skip = 0,
                   n_max = Inf,
                   .name_repair = "unique")

  #Add a _v12 to the filename to 
  #specify that is is version 12 now
  new_fname <- paste0(unlist(strsplit(basename(all_files[i]), "\\."))[1], 
                     "_v12.", unlist(strsplit(basename(all_files[i]), "\\."))[2])

  #Write as .dta
  #with this new filename
  write_dta(data, path = paste0(mypath, "/", new_fname), 
            version = 12, label = attr(data, "label"))


I tried this out with some .sta files from here, and the script ran without throwing up errors. I haven't tested this on Windows but in theory it should work fine.

Edit: here is a more complete solution with read_dta and write_dta wrapped into a single function dtavconv. This function also allows the user to convert version numbers to arbitrary values (default is 12).

#.dta file version conversion function
dtavconv <- function(mypath = NULL, myfile = NULL, myver = 12){

  #Function to convert .dta file versions
  #Default version files are converted to is v12
  #Default directory is whatever is specified by getwd()

  if(is.null(mypath)) mypath <- getwd()

  #Main code block wrapped in a tryCatch()
  myres <- tryCatch(

      #Load file to be converted into STATA12 version
      data <- haven::read_dta(paste0(mypath, "/", myfile),
                              encoding = NULL,
                              col_select = NULL,
                              skip = 0,
                              n_max = Inf,
                              .name_repair = "unique")

      #Add a _v12 to the filename to 
      #specify that is is version 12 now
      new_fname <- paste0(unlist(strsplit(basename(myfile), "\\."))[1], 
                          "_v", myver, ".", unlist(strsplit(basename(myfile), "\\."))[2])

      #Write as .dta
      #with this new filename
      haven::write_dta(data, path = paste0(mypath, "/", new_fname), 
                       version = myver, label = attr(data, "label"))

      message("\nSuccessfully converted ", myfile, " to ", new_fname, "\n")

    error = function(cond){

      #message("Unable to write file", myfile, " as ", new_fname)
      message("\n", cond, "\n")



The function can then be run on as many files as desired by invoking it via lapply or a for loop, as the example below illustrates:


#Example run

#Set your path here below
mypath <- paste0(getwd(), "/", "dta")

#Check to see if this directory exists
#if not, create it
if(!dir.exists(mypath)) dir.create(mypath)
# character(0)

#Downloading some valid example files
myurl <- c("", 
lapply(myurl, function(x){ download.file (url = x, destfile = paste0(mypath, "/", basename(x)))})

#Also creating a negative test case
file.create(paste0(mypath, "/", "anegcase.dta"))

# [1] "airline.dta"  "anegcase.dta" "cola.dta" 

#Getting list of files in the directory
all_files <- list.files(path = mypath, pattern = "*.dta")

#Converting files using dtavconv via lapply
res <- lapply(all_files, dtavconv, mypath = mypath)
# Successfully converted airline.dta to airline_v12.dta
# Error in df_parse_dta_file(spec, encoding, cols_skip, n_max, skip, 
# name_repair = .name_repair): Failed to parse /my/path/
# /dta/anegcase.dta: Unable to read from file.
# Successfully converted cola.dta to cola_v12.dta

# [1] "airline_v12.dta" "airline.dta"     "anegcase.dta"    "cola_v12.dta"    
# "cola.dta" 

#Example for converting to version 14
res <- lapply(all_files, dtavconv, mypath = mypath, myver = 14)
# Successfully converted airline.dta to airline_v14.dta
# Error in df_parse_dta_file(spec, encoding, cols_skip, n_max, skip, 
# name_repair = .name_repair): Failed to parse /my/path
# /dta/anegcase.dta: Unable to read from file.
# Successfully converted cola.dta to cola_v14.dta

# [1] "airline_v12.dta" "airline_v14.dta" "airline.dta"     "anegcase.dta"    
# "cola_v12.dta"    "cola_v14.dta"    "cola.dta" 


