Split a column into 2 in R

后端 未结 4 1756
灰色年华
灰色年华 2020-12-22 03:07
 I have this dataframe 

      CC.Number       Date Time Accident.Type              Location.1
    1 12T008826 07/01/2012 1630            PD  (39.26699, -76.560642)
    2          


        
相关标签:
4条回答
  • 2020-12-22 03:27

    separate from tidyr also works

    library(tidyr)
    # Sub out the parentheses
    df$Location.1 <- gsub("[()]", "", df$Location.1)
    
    separate(df, col = Location.1, into = c("lat","long"), sep = ",")
    #  CC.Number       Date Time Accident.Type       lat        long
    #1 12T008826 07/01/2012 1630            PD  39.26699  -76.560642
    #2 12L005385 07/02/2012 1229            PD 39.000549  -76.399312
    #3 12L005388 07/02/2012 1229            PD  39.00058  -76.399267
    #4 12T008851 07/02/2012  445            PI  39.26367   -76.56648
    #5 12T008858 07/02/2012  802            PD 39.240862  -76.599017
    #6 12T008860 07/02/2012  832            PD  39.27022   -76.63926
    
    0 讨论(0)
  • 2020-12-22 03:29

    We can use extract from tidyr by capturing as two groups with only the numeric elements with dots and discard the rest in 'Location.1'

    library(tidyr)
    df1 %>% 
      extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
    # CC.Number       Date Time Accident.Type       alt        lng
    #1 12T008826 07/01/2012 1630            PD  39.26699 -76.560642
    #2 12L005385 07/02/2012 1229            PD 39.000549 -76.399312
    #3 12L005388 07/02/2012 1229            PD  39.00058 -76.399267
    #4 12T008851 07/02/2012  445            PI  39.26367  -76.56648
    #5 12T008858 07/02/2012  802            PD 39.240862 -76.599017
    #6 12T008860 07/02/2012  832            PD  39.27022  -76.63926
    
    0 讨论(0)
  • 2020-12-22 03:31

    In base you can use trimws to remove () and read.table to split at ,.

    cbind(md[1:4], read.table(sep=",", text=trimws(md$Location.1, whitespace = "[ ()]"),
     col.names=c("alt", "lng")))
    #  CC.Number        Date Time  Accident.Type      alt       lng
    #1 12T008826  07/01/2012 1630             PD 39.26699 -76.56064
    #2 12L005385  07/02/2012 1229             PD 39.00055 -76.39931
    #3 12L005388  07/02/2012 1229             PD 39.00058 -76.39927
    #4 12T008851  07/02/2012  445             PI 39.26367 -76.56648
    #5 12T008858  07/02/2012  802             PD 39.24086 -76.59902
    #6 12T008860  07/02/2012  832             PD 39.27022 -76.63926
    

    Data:

    md <- structure(list(CC.Number = c("12T008826", "12L005385", "12L005388", 
    "12T008851", "12T008858", "12T008860"), Date = c(" 07/01/2012", 
    " 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012"
    ), Time = c(1630L, 1229L, 1229L, 445L, 802L, 832L), Accident.Type = c("            PD", 
    "            PD", "            PD", "            PI", "            PD", 
    "            PD"), Location.1 = c("  (39.26699, -76.560642)", 
    " (39.000549, -76.399312)", "  (39.00058, -76.399267)", "   (39.26367, -76.56648)", 
    " (39.240862, -76.599017)", "   (39.27022, -76.63926)")), class = "data.frame", row.names = c(NA, 
    -6L))
    
    0 讨论(0)
  • 2020-12-22 03:45

    You can do like this also, assuming dat1 is your original dataset name, we can use strsplit and gsub. First we replace commas and parenthesis with nothing using gsub and then using strsplit to split the values by spaces:

    df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
    df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))
    
    # CC.Number     Date Time Accident.Type       Lat       Long
    # 1 12T008826 07/01/12 1630            PD  39.26699 -76.560642
    # 2 12L005385 07/02/12 1229            PD 39.000549 -76.399312
    # 3 12L005388 07/02/12 1229            PD  39.00058 -76.399267
    # 4 12T008851 07/02/12  445            PI  39.26367  -76.56648
    # 5 12T008858 07/02/12  802            PD 39.240862 -76.599017
    # 6 12T008860 07/02/12  832            PD  39.27022  -76.63926
    
    0 讨论(0)
提交回复
热议问题