Better way to optimize my code for getting NOAA climate data

孤街醉人 提交于 2020-01-16 01:17:35

问题


So I've been working on grabbing climate data (specifically temperature, precip) from NOAA's network of GHCN weather stations. I've managed to get a list of the stations pertinent to my area (~200) and have built a loop to essentially get a certain a climate variable for every station on that list for every day of between a specified min and max date. Ultimately I need ~10 years worth of data. However my simple loop is taking forever to get this data and I was wondering if there's a better way to optimize it? Also I really want to access monthly data rather than daily but rnoaa doesn't seem to have an option for GHCN monthly data as the only available function is ghcnd_search(). If anyone also knows about how to mine monthly rather than daily data that would be appreciated

Station list:

df<-c("US1FLAL0048", "US1FLBK0003", "US1FLBV0002", "US1FLBV0006", 
"US1FLBV0023", "US1FLBV0040", "US1FLBW0099", "US1FLCT0012", "US1FLDV0051", 
"US1FLFR0006", "US1FLHL0003", "US1FLHN0009", "US1FLLB0001", "US1FLLE0005", 
"US1FLLK0012", "US1FLLN0004", "US1FLLN0018", "US1FLMN0013", "US1FLMR0012", 
"US1FLMR0033", "US1FLOK0017", "US1FLOR0028", "US1FLPS0002", "US1FLPS0018", 
"US1FLPT0007", "US1FLSJ0012", "US1FLSM0008", "US1FLSS0044", "US1FLST0014", 
"US1FLSW0008", "US1FLVL0035", "US1FLWK0001", "USC00080228", "USC00080236", 
"USC00080369", "USC00080414", "USC00080478", "USC00080598", "USC00080737", 
"USC00080945", "USC00080992", "USC00081163", "USC00081276", "USC00081306", 
"USC00081544", "USC00081641", "USC00081651", "USC00081978", "USC00082008", 
"USC00082046", "USC00082150", "USC00082229", "USC00082288", "USC00082298", 
"USC00082391", "USC00082418", "USC00082441", "USC00082850", "USC00082915", 
"USC00082944", "USC00083020", "USC00083153", "USC00083163", "USC00083168", 
"USC00083207", "USC00083209", "USC00083470", "USC00083874", "USC00083909", 
"USC00083956", "USC00083986", "USC00084050", "USC00084095", "USC00084210", 
"USC00084289", "USC00084320", "USC00084366", "USC00084394", "USC00084412", 
"USC00084461", "USC00084625", "USC00084662", "USC00084731", "USC00084802", 
"USC00085076", "USC00085099", "USC00085184", "USC00085275", "USC00085359", 
"USC00085377", "USC00085539", "USC00085612", "USC00085667", "USC00085879", 
"USC00085895", "USC00085973", "USC00086065", "USC00086078", "USC00086129", 
"USC00086240", "USC00086315", "USC00086406", "USC00086414", "USC00086618", 
"USC00086657", "USC00086764", "USC00086767", "USC00086828", "USC00086842", 
"USC00086999", "USC00087020", "USC00087025", "USC00087205", "USC00087228", 
"USC00087261", "USC00087304", "USC00087397", "USC00087429", "USC00087760", 
"USC00087826", "USC00087851", "USC00087869", "USC00087886", "USC00087982", 
"USC00088368", "USC00088529", "USC00088620", "USC00088756", "USC00088782", 
"USC00088824", "USC00088942", "USC00089120", "USC00089176", "USC00089219", 
"USC00089401", "USC00089430", "USC00089566", "USC00089640", "USC00089795", 
"USR0000FBLO", "USR0000FCAC", "USR0000FCEN", "USR0000FCHE", "USR0000FLSU", 
"USR0000FMER", "USR0000FMIL", "USR0000FNAV", "USR0000FOAS", "USR0000FOCH", 
"USR0000FOLU", "USR0000FRAC", "USR0000FSAN", "USR0000FSTM", "USR0000FSUM", 
"USR0000FWIL", "USW00003818", "USW00003853", "USW00012812", "USW00012815", 
"USW00012816", "USW00012818", "USW00012819", "USW00012832", "USW00012833", 
"USW00012834", "USW00012835", "USW00012836", "USW00012838", "USW00012839", 
"USW00012841", "USW00012842", "USW00012843", "USW00012844", "USW00012849", 
"USW00012850", "USW00012854", "USW00012871", "USW00012873", "USW00012876", 
"USW00012882", "USW00012885", "USW00012888", "USW00012894", "USW00012895", 
"USW00012896", "USW00012897", "USW00013884", "USW00013889", "USW00013899", 
"USW00053847", "USW00053853", "USW00053860", "USW00092805", "USW00092806", 
"USW00092809", "USW00092811", "USW00092821", "USW00093805", "USW00093837", 
"USW00093841")

Code:

library(rnoaa)
options(noaakey = "your api key")
data<-matrix(, nrow=0, ncol=0) #create empty matrix
for (i in 1:length(df)){
  a<-ghcnd_search(stationid=df[1],var='TMAX',date_min='2010-1-30',date_max='2015-12-31')
  data=rbind(data,a$tmax)

}

回答1:


Assuming the station ID is stored in a vector called dat, we can use the functions from the purrr package to download the data and create a data frame.

# Load packages
library(rnoaa)
library(purrr)

# Download the data and create a data frame. 
dat_df <- map(dat, ghcnd_search, 
              var='TMAX', date_min = '2010-1-30', date_max = '2015-12-31') %>%
          map_dfr("tmax")

DATA

dat<-c("US1FLAL0048", "US1FLBK0003", "US1FLBV0002", "US1FLBV0006", 
      "US1FLBV0023", "US1FLBV0040", "US1FLBW0099", "US1FLCT0012", "US1FLDV0051", 
      "US1FLFR0006", "US1FLHL0003", "US1FLHN0009", "US1FLLB0001", "US1FLLE0005", 
      "US1FLLK0012", "US1FLLN0004", "US1FLLN0018", "US1FLMN0013", "US1FLMR0012", 
      "US1FLMR0033", "US1FLOK0017", "US1FLOR0028", "US1FLPS0002", "US1FLPS0018", 
      "US1FLPT0007", "US1FLSJ0012", "US1FLSM0008", "US1FLSS0044", "US1FLST0014", 
      "US1FLSW0008", "US1FLVL0035", "US1FLWK0001", "USC00080228", "USC00080236", 
      "USC00080369", "USC00080414", "USC00080478", "USC00080598", "USC00080737", 
      "USC00080945", "USC00080992", "USC00081163", "USC00081276", "USC00081306", 
      "USC00081544", "USC00081641", "USC00081651", "USC00081978", "USC00082008", 
      "USC00082046", "USC00082150", "USC00082229", "USC00082288", "USC00082298", 
      "USC00082391", "USC00082418", "USC00082441", "USC00082850", "USC00082915", 
      "USC00082944", "USC00083020", "USC00083153", "USC00083163", "USC00083168", 
      "USC00083207", "USC00083209", "USC00083470", "USC00083874", "USC00083909", 
      "USC00083956", "USC00083986", "USC00084050", "USC00084095", "USC00084210", 
      "USC00084289", "USC00084320", "USC00084366", "USC00084394", "USC00084412", 
      "USC00084461", "USC00084625", "USC00084662", "USC00084731", "USC00084802", 
      "USC00085076", "USC00085099", "USC00085184", "USC00085275", "USC00085359", 
      "USC00085377", "USC00085539", "USC00085612", "USC00085667", "USC00085879", 
      "USC00085895", "USC00085973", "USC00086065", "USC00086078", "USC00086129", 
      "USC00086240", "USC00086315", "USC00086406", "USC00086414", "USC00086618", 
      "USC00086657", "USC00086764", "USC00086767", "USC00086828", "USC00086842", 
      "USC00086999", "USC00087020", "USC00087025", "USC00087205", "USC00087228", 
      "USC00087261", "USC00087304", "USC00087397", "USC00087429", "USC00087760", 
      "USC00087826", "USC00087851", "USC00087869", "USC00087886", "USC00087982", 
      "USC00088368", "USC00088529", "USC00088620", "USC00088756", "USC00088782", 
      "USC00088824", "USC00088942", "USC00089120", "USC00089176", "USC00089219", 
      "USC00089401", "USC00089430", "USC00089566", "USC00089640", "USC00089795", 
      "USR0000FBLO", "USR0000FCAC", "USR0000FCEN", "USR0000FCHE", "USR0000FLSU", 
      "USR0000FMER", "USR0000FMIL", "USR0000FNAV", "USR0000FOAS", "USR0000FOCH", 
      "USR0000FOLU", "USR0000FRAC", "USR0000FSAN", "USR0000FSTM", "USR0000FSUM", 
      "USR0000FWIL", "USW00003818", "USW00003853", "USW00012812", "USW00012815", 
      "USW00012816", "USW00012818", "USW00012819", "USW00012832", "USW00012833", 
      "USW00012834", "USW00012835", "USW00012836", "USW00012838", "USW00012839", 
      "USW00012841", "USW00012842", "USW00012843", "USW00012844", "USW00012849", 
      "USW00012850", "USW00012854", "USW00012871", "USW00012873", "USW00012876", 
      "USW00012882", "USW00012885", "USW00012888", "USW00012894", "USW00012895", 
      "USW00012896", "USW00012897", "USW00013884", "USW00013889", "USW00013899", 
      "USW00053847", "USW00053853", "USW00053860", "USW00092805", "USW00092806", 
      "USW00092809", "USW00092811", "USW00092821", "USW00093805", "USW00093837", 
      "USW00093841")


来源:https://stackoverflow.com/questions/49743774/better-way-to-optimize-my-code-for-getting-noaa-climate-data

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!