I have this dataframe CC.Number Date Time Accident.Type Location.1 1 12T008826 07/01/2012 1630 PD (39.26699, -76.560642) 2
separate
from tidyr
also works
library(tidyr)
# Sub out the parentheses
df$Location.1 <- gsub("[()]", "", df$Location.1)
separate(df, col = Location.1, into = c("lat","long"), sep = ",")
# CC.Number Date Time Accident.Type lat long
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
We can use extract
from tidyr
by capturing as two groups with only the numeric elements with dots and discard the rest in 'Location.1'
library(tidyr)
df1 %>%
extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
In base you can use trimws
to remove ()
and read.table
to split at ,
.
cbind(md[1:4], read.table(sep=",", text=trimws(md$Location.1, whitespace = "[ ()]"),
col.names=c("alt", "lng")))
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.56064
#2 12L005385 07/02/2012 1229 PD 39.00055 -76.39931
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.39927
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.24086 -76.59902
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
Data:
md <- structure(list(CC.Number = c("12T008826", "12L005385", "12L005388",
"12T008851", "12T008858", "12T008860"), Date = c(" 07/01/2012",
" 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012"
), Time = c(1630L, 1229L, 1229L, 445L, 802L, 832L), Accident.Type = c(" PD",
" PD", " PD", " PI", " PD",
" PD"), Location.1 = c(" (39.26699, -76.560642)",
" (39.000549, -76.399312)", " (39.00058, -76.399267)", " (39.26367, -76.56648)",
" (39.240862, -76.599017)", " (39.27022, -76.63926)")), class = "data.frame", row.names = c(NA,
-6L))
You can do like this also, assuming dat1
is your original dataset name, we can use strsplit and gsub. First we replace commas and parenthesis with nothing using gsub and then using strsplit to split the values by spaces:
df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))
# CC.Number Date Time Accident.Type Lat Long
# 1 12T008826 07/01/12 1630 PD 39.26699 -76.560642
# 2 12L005385 07/02/12 1229 PD 39.000549 -76.399312
# 3 12L005388 07/02/12 1229 PD 39.00058 -76.399267
# 4 12T008851 07/02/12 445 PI 39.26367 -76.56648
# 5 12T008858 07/02/12 802 PD 39.240862 -76.599017
# 6 12T008860 07/02/12 832 PD 39.27022 -76.63926