Convert string to date, format: “dd.mm.yyyy”

前端 未结 3 746
难免孤独
难免孤独 2020-12-17 20:38
D <- \"06.12.1948\"                 # which is dd.mm.yyyy
as.Date(D, \"%d.%m.%y\")            # convert to date
[1] \"2019-12-06\"                  # ????    


        
相关标签:
3条回答
  • 2020-12-17 21:18

    To avoid remembering formats of the date we can use packaged solutions.

    1) With lubridate

    lubridate::dmy(D)
    #[1] "1948-12-06"
    

    2) Using anytime

    anytime::anydate(D)
    #[1] "1948-06-12"
    
    0 讨论(0)
  • 2020-12-17 21:22

    The format is case-sensitive ("%y" is ambiguous and system dependent, I believe):

    as.Date(D, "%d.%m.%Y")
    [1] "1948-12-06"
    

    The help topic ?strptime has details:

     ‘%y’ Year without century (00-99).  On input, values 00 to 68 are
          prefixed by 20 and 69 to 99 by 19 - that is the behaviour
          specified by the 2004 and 2008 POSIX standards, but they do
          also say ‘it is expected that in a future version the default
          century inferred from a 2-digit year will change’.
    
    0 讨论(0)
  • 2020-12-17 21:39

    Might be helpful for someone. I have found this function in tutorial "Handling date-times in R" by Cole Beck. The function identifies format of your data.

    # FUNCTION guessDateFormat @x vector of character dates/datetimes @returnDates return
    # actual dates rather than format convert character datetime to POSIXlt datetime, or 
    # at least guess the format such that you could convert to datetime
    guessDateFormat <- function(x, returnDates = FALSE, tzone = "") {
    x1 <- x
    # replace blanks with NA and remove
    x1[x1 == ""] <- NA
    x1 <- x1[!is.na(x1)]
    if (length(x1) == 0)
      return(NA)
    # if it's already a time variable, set it to character
    if ("POSIXt" %in% class(x1[1])) {
    x1 <- as.character(x1)
    }
    dateTimes <- do.call(rbind, strsplit(x1, " "))
    for (i in ncol(dateTimes)) {
    dateTimes[dateTimes[, i] == "NA"] <- NA
    }
    # assume the time part can be found with a colon
    timePart <- which(apply(dateTimes, MARGIN = 2, FUN = function(i) {
                            any(grepl(":", i))
                            }))
    # everything not in the timePart should be in the datePart
    datePart <- setdiff(seq(ncol(dateTimes)), timePart)
    # should have 0 or 1 timeParts and exactly one dateParts
    if (length(timePart) > 1 || length(datePart) != 1)
      stop("cannot parse your time variable")
    timeFormat <- NA
    if (length(timePart)) {
    # find maximum number of colons in the timePart column
    ncolons <- max(nchar(gsub("[^:]", "", na.omit(dateTimes[, timePart]))))
    if (ncolons == 1) {
    timeFormat <- "%H:%M"
    } else if (ncolons == 2) {
    timeFormat <- "%H:%M:%S"
    } else stop("timePart should have 1 or 2 colons")
    }
    # remove all non-numeric values
    dates <- gsub("[^0-9]", "", na.omit(dateTimes[, datePart]))
    # sep is any non-numeric value found, hopefully / or -
    sep <- unique(na.omit(substr(gsub("[0-9]", "", dateTimes[, datePart]), 1, 1)))
    if (length(sep) > 1)
      stop("too many seperators in datePart")
    # maximum number of characters found in the date part
    dlen <- max(nchar(dates))
    dateFormat <- NA
    # when six, expect the century to be omitted
    if (dlen == 6) {
    if (sum(is.na(as.Date(dates, format = "%y%m%d"))) == 0) {
    dateFormat <- paste("%y", "%m", "%d", sep = sep)
    } else if (sum(is.na(as.Date(dates, format = "%m%d%y"))) == 0) {
    dateFormat <- paste("%m", "%d", "%y", sep = sep)
    } else stop("datePart format [six characters] is inconsistent")
    }else if (dlen == 8) {
    if (sum(is.na(as.Date(dates, format = "%Y%m%d"))) == 0) {
    dateFormat <- paste("%Y", "%m", "%d", sep = sep)
    } else if (sum(is.na(as.Date(dates, format = "%m%d%Y"))) == 0) {
    dateFormat <- paste("%m", "%d", "%Y", sep = sep)
    } else stop("datePart format [eight characters] is inconsistent")
    } else {
    stop(sprintf("datePart has unusual length: %s", dlen))
    }
    if (is.na(timeFormat)) {
    format <- dateFormat
    } else if (timePart == 1) {
    format <- paste(timeFormat, dateFormat)
    } else if (timePart == 2) {
    format <- paste(dateFormat, timeFormat)
    } else stop("cannot parse your time variable")
    if (returnDates)
      return(as.POSIXlt(x, format = format, tz = tzone))
    format
    }
    
    # generate some dates
    mydates <- format(as.POSIXct(sample(31536000, 20), origin = "2011-01-01", tz = "UTC"), "%m.%d.%Y %H:%M")
    
    mydates
    ## [1] "02/07/2011 06:51" "11/21/2011 17:03" "09/17/2011 22:42" "02/16/2011 13:45"
    ## [5] "12/14/2011 19:11" "09/08/2011 09:22" "12/06/2011 14:06" "02/02/2011 11:00"
    ## [9] "03/27/2011 06:12" "01/05/2011 15:09" "04/15/2011 04:17" "10/20/2011 14:20"
    ## [13] "11/13/2011 21:46" "02/26/2011 03:24" "12/29/2011 11:02" "03/17/2011 02:24"
    ## [17] "02/27/2011 13:51" "06/27/2011 08:36" "03/14/2011 10:54" "01/28/2011 14:14"
    guessDateFormat(mydates)
    

    [1] "%m.%d.%Y %H:%M"

    0 讨论(0)
提交回复
热议问题