Why do I get an error message pointing to Inf values when trying to plot counts over time in R?

后端未结

关注

 2  633

I am using the code given in this answer to generate this plot

library(rvest)

cachedir <- \"cache\"
if (!dir.exists(cachedir)) dir.create(cachedir)

URL <


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  滥情空心        
                
              
                            
                2021-01-25 23:58
              
            
            
                                                                       
# Install pacakges if they are not already installed:
necessary_packages <- c("rvest", "tidyverse")

# Create a vector containing the names of any packages needing installation:
new_packages <- necessary_packages[!(necessary_packages %in%
                                       installed.packages()[, "Package"])]

# If the vector has more than 0 values, install the new pacakges
# (and their) associated dependencies:
if (length(new_packages) > 0) {
  install.packages(new_packages, dependencies = TRUE)
}

# Initialise the packages in the session:
lapply(necessary_packages, require, character.only = TRUE)

# Store a scalar that's values is the github url: URL => vector:
URL <-
  "https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports"

# Store a scalar of the directory path where files are to be stored:
# Enter your desired path here !
covid_19_csv_dir_path <- "C:/Users/.../Documents/covid_19_csvs"

# If the directory doesn't exist create it:
if (!(dir.exists(covid_19_csv_dir_path))) {
  dir.create(covid_19_csv_dir_path)
}

# Store a vector of URLs: csvlinks => character vector:
csvlinks <-
  read_html(URL) %>%
  html_nodes("a") %>%
  html_attr("href") %>%
  grep("csv$", ., value = TRUE) %>%
  paste0("https://raw.githubusercontent.com", .) %>%
  gsub("/blob", "", .)

# Store a vector of csv names: csv_names => vector
csv_names <- sub(".*\\/", "", csvlinks)

# Check if the file already exists in directory: csvs_stored_locally => vector:
csvs_stored_locally <- list.files(covid_19_csv_dir_path)

# Subset the csvlinks vector to contain those csvs that
# require downloading: csvs_to_be_stored => vector
csvs_to_be_stored <- csvlinks[!(csv_names %in% csvs_stored_locally)]

# Conditionally execute the next segment if there are csvs to store:
if (length(csvs_to_be_stored) > 0) {
  # Create a vector of the date of each version: version_dates => vector
  version_dates <- as.Date(gsub("\\.csv", "",
                                csv_names[!(csv_names %in% csvs_stored_locally)]),
                           "%m-%d-%Y")

  # Create a vector of names for each dataframe in the list: df_names => vector
  df_names <- paste0("x_", gsub("[[:punct:]]", "_", version_dates))

  # Create an empty list to store the dataframes: df_list => list
  df_list <- vector("list", length(csvs_to_be_stored))

  # Store the dataframes in the named list: df_list => list
  df_list <- lapply(seq_along(csvs_to_be_stored),
                    function(i) {
                      read.csv(csvs_to_be_stored[i], sep = ",")
                    })

  # Clean the vector names in each dataframe: cleaned_df_names_list => df_list
  df_list <- setNames(lapply(df_list,
                             function(x) {
                               names(x) <- gsub("[[:punct:]]|\\s+|.*\\.", "",
                                                trimws(names(x), "both"))
                               return(x)
                             }),
                      df_names)

  # Store the version date as a variable: df_list => list
  df_list <-
    mapply(cbind,
           "version_date" = version_dates,
           df_list,
           SIMPLIFY = FALSE)

  # Store the csv files in the directory: stdout
  lapply(seq_along(df_list),
         function(i) {
           write.csv(as.data.frame(df_list[i]),
                     paste0(covid_19_csv_dir_path, "/",
                            sub("\\/", "", csv_names[i])),
                     row.names = FALSE)
         })
}

# If there are files stored in the directory read them in:
if (length(csvs_stored_locally) > 0) {
  # Allocate some memory for a list of dataframes: ls_csvs => list
  ls_csvs <- vector("list", length(csvs_stored_locally))

  # Read the csvs as dataframes into a list and name them appropriately: ls_csvs => list
  ls_csvs <-
    setNames(lapply(seq_along(csvs_stored_locally), function(i) {
      read.csv(paste0(covid_19_csv_dir_path, "/", csvs_stored_locally[i]))
    }),
    paste0("x_", gsub(
      "[[:punct:]]", "_",
      gsub("\\.csv", "", csvs_stored_locally)
    )))
}

# If csvs have been downloaded from the github in this execution, combine
# the list of stored dataframes with the those downloaded: combined_df_list => list
if (exists("df_list") & exists("ls_csvs")) {

  # Combine the two lists: combined_df_list => list
  combined_df_list <- c(ls_csvs, df_list)

  # Remove df_list, ls_csvs variables from the global environment:
  rm(df_list, ls_csvs)

  # If ls_csvs but not df_list exists in the global environment:
} else if (exists("ls_csvs") & !(exists("df_list"))) {

  # Rename the ls_csvs list: combined_df_list => list
  combined_df_list <- ls_csvs

  # Remove ls_csv variables from the global environment:
  rm(ls_csvs)


# Otherwise:
} else{

  # Rename the df_list list: combined_df_list => list
  combined_df_list <- df_list

}

# Re-allocate some memory:
gc()

# Store a function to row-bind all dataframes in the list: rbind_all_columns => function
rbind_all_columns <- function(x, y) {
  x[, c(as.character(setdiff(colnames(y), colnames(x))))] <- NA
  y[, c(as.character(setdiff(colnames(x), colnames(y))))] <- NA
  return(rbind(x, y))
}

# Apply the function and store the result as a dataframe: df => data.frame
df <-
  Reduce(function(x, y) {
    rbind_all_columns(x, y)
  }, combined_df_list)

# Subset out New York and Washington: washington_vs_ny => data.frame 
washington_vs_ny <- df[which(df$State == "Washington" | df$State == "New York"),]

# Clean the data.frame: washington_vs_nyordered => data.frame
washington_vs_nyordered <- within(washington_vs_ny[order(washington_vs_ny$version_date),],
                             {
                               Confirmed <- ifelse(is.na(Confirmed), 0, Confirmed)
                               Deaths <- ifelse(is.na(Deaths), 0, Deaths)
                               Recovered <- ifelse(is.na(Recovered), 0, Recovered)
                             }
)[,c("version_date", "State", "Confirmed", "Deaths", "Recovered")]

# Reshape the data for charting: chart_df => data.frame: 
chart_data <- within(reshape(washington_vs_nyordered,
        direction = "long", 
        varying = c("Confirmed", "Deaths", "Recovered"),
        v.names=c("vars"),
        idvar = c("version_date", "State"),
        timevar = "vals", 
        times = c("Confirmed", "Deaths", "Recovered"),
        new.row.names = 1:(length(c("Confirmed", "Deaths", "Recovered")) * 
                             nrow(washington_vs_nyordered))
      ), {version_date <- as.Date(as.character(version_date), "%Y-%m-%d")})

# Chart the data: 
ggplot(chart_data, aes(x = version_date, y = vars, colour = vals)) +
  geom_line() + 
  facet_wrap(.~State)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  再見小時候        
                
              
                            
                2021-01-25 23:59
              
            
            
                                                                       
With this line, you are calling the base R plot

plot("Confirmed", "Last_update", Atlantic, xaxt='n')


And plot a character versus another character, which is not going to work. So most likely you need something like this:

with(as.data.frame(Atlantic),plot(Last_Update,Confirmed,xaxt="n"))
axis.POSIXct(1,at=Atlantic$Last_Update,
labels=format(Atlantic$Last_Update,"%y-%m-%d"),las=2)



                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复