问题
I have a netcdf file with a timeseries and the time variable has the following typical metadata:
double time(time) ;
time:standard_name = "time" ;
time:bounds = "time_bnds" ;
time:units = "days since 1979-1-1 00:00:00" ;
time:calendar = "standard" ;
time:axis = "T" ;
Inside R I want to convert the time into an R date object. I achieve this at the moment in a hardwired way by reading the units attribute and splitting the string and using the third entry as my origin (thus assuming the spacing is "days" and the time is 00:00 etc):
require("ncdf4")
f1<-nc_open("file.nc")
time<-ncvar_get(f1,"time")
tunits<-ncatt_get(f1,"time",attname="units")
tustr<-strsplit(tunits$value, " ")
dates<-as.Date(time,origin=unlist(tustr)[3])
This hardwired solution works for my specific example, but I was hoping that there might be a package in R that nicely handles the UNIDATA netcdf data conventions for time units and convert them safely to an R date object?
回答1:
There is not, that I know of. I have this handy function using lubridate
, which is basically identical to yours.
getNcTime <- function(nc) {
require(lubridate)
ncdims <- names(nc$dim) #get netcdf dimensions
timevar <- ncdims[which(ncdims %in% c("time", "Time", "datetime", "Datetime", "date", "Date"))[1]] #find time variable
times <- ncvar_get(nc, timevar)
if (length(timevar)==0) stop("ERROR! Could not identify the correct time variable")
timeatt <- ncatt_get(nc, timevar) #get attributes
timedef <- strsplit(timeatt$units, " ")[[1]]
timeunit <- timedef[1]
tz <- timedef[5]
timestart <- strsplit(timedef[4], ":")[[1]]
if (length(timestart) != 3 || timestart[1] > 24 || timestart[2] > 60 || timestart[3] > 60 || any(timestart < 0)) {
cat("Warning:", timestart, "not a valid start time. Assuming 00:00:00\n")
warning(paste("Warning:", timestart, "not a valid start time. Assuming 00:00:00\n"))
timedef[4] <- "00:00:00"
}
if (! tz %in% OlsonNames()) {
cat("Warning:", tz, "not a valid timezone. Assuming UTC\n")
warning(paste("Warning:", timestart, "not a valid start time. Assuming 00:00:00\n"))
tz <- "UTC"
}
timestart <- ymd_hms(paste(timedef[3], timedef[4]), tz=tz)
f <- switch(tolower(timeunit), #Find the correct lubridate time function based on the unit
seconds=seconds, second=seconds, sec=seconds,
minutes=minutes, minute=minutes, min=minutes,
hours=hours, hour=hours, h=hours,
days=days, day=days, d=days,
months=months, month=months, m=months,
years=years, year=years, yr=years,
NA
)
suppressWarnings(if (is.na(f)) stop("Could not understand the time unit format"))
timestart + f(times)
}
EDIT: One might also want to take a look at ncdf4.helpers::nc.get.time.series
EDIT2: note that the newly-proposed and currently in developement awesome stars
package will handle dates automatically, see the first blog post for an example.
EDIT3: another way is to use the units
package directly, which is what stars
uses. One could do something like this: (still not handling the calendar correctly, I'm not sure units
can)
getNcTime <- function(nc) { ##NEW VERSION, with the units package
require(units)
require(ncdf4)
options(warn=1) #show warnings by default
if (is.character(nc)) nc <- nc_open(nc)
ncdims <- names(nc$dim) #get netcdf dimensions
timevar <- ncdims[which(ncdims %in% c("time", "Time", "datetime", "Datetime", "date", "Date"))] #find (first) time variable
if (length(timevar) > 1) {
warning(paste("Found more than one time var. Using the first:", timevar[1]))
timevar <- timevar[1]
}
if (length(timevar)!=1) stop("ERROR! Could not identify the correct time variable")
times <- ncvar_get(nc, timevar) #get time data
timeatt <- ncatt_get(nc, timevar) #get attributes
timeunit <- timeatt$units
units(times) <- make_unit(timeunit)
as.POSIXct(time)
}
回答2:
I couldn't get @AF7's function to work with my files so I wrote my own. The function below creates a POSIXct vector of dates, for which the start date, time interval, unit and length are read from the nc file. It works with nc files of many (but probably not every...) shapes or forms.
ncdate <- function(nc) {
ncdims <- names(nc$dim) #Extract dimension names
timevar <- ncdims[which(ncdims %in% c("time", "Time", "datetime", "Datetime",
"date", "Date"))[1]] # Pick the time dimension
ntstep <-nc$dim[[timevar]]$len
tm <- ncvar_get(nc, timevar) # Extract the timestep count
tunits <- ncatt_get(nc, timevar, "units") # Extract the long name of units
tspace <- tm[2] - tm[1] # Calculate time period between two timesteps, for the "by" argument
tstr <- strsplit(tunits$value, " ") # Extract string components of the time unit
a<-unlist(tstr[1]) # Isolate the unit .i.e. seconds, hours, days etc.
uname <- a[which(a %in% c("seconds","hours","days"))[1]] # Check unit
startd <- as.POSIXct(gsub(paste(uname,'since '),'',tunits$value),format="%Y-%m-%d %H:%M:%S") ## Extract the start / origin date
tmulti <- 3600 # Declare hourly multiplier for date
if (uname == "days") tmulti =86400 # Declare daily multiplier for date
## Rename "seconds" to "secs" for "by" argument and change the multiplier.
if (uname == "seconds") {
uname <- "secs"
tmulti <- 1 }
byt <- paste(tspace,uname) # Define the "by" argument
if (byt == "0.0416666679084301 days") { ## If the unit is "days" but the "by" interval is in hours
byt= "1 hour" ## R won't understand "by < 1" so change by and unit to hour.
uname = "hours"}
datev <- seq(from=as.POSIXct(startd+tm[1]*tmulti),by= byt, units=uname,length=ntstep)
}
Edit
To address the flaw highlighted by @AF7's comment that the above code would only work for regularly spaced files, datev
could be calculated as
datev <- as.POSIXct(tm*tmulti,origin=startd)
回答3:
I have just discovered that there is a package called ncdf.tools which has the function:
convertDateNcdf2R
which
converts a time vector from a netCDF file or a vector of Julian days (or seconds, minutes, hours) since a specified origin into a POSIXct R vector.
which is helpful. More details available here: https://rdrr.io/cran/ncdf.tools/man/convertDateNcdf2R.html
来源:https://stackoverflow.com/questions/46001573/convert-a-netcdf-time-variable-to-an-r-date-object