Difficulty Plotting Time Series in R

半腔热情 提交于 2020-12-15 05:40:50

问题


I have been trying to plot time series data in R. I consulted several different sources online and somehow I am still having problems creating this plots. I have simulated some data below that represents daily information a fictitious company received from 2014 to 2016:

#create data
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")

date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")

property_damages_in_dollars <- rnorm(731,100,10)

car_damages_in_dollars <- rnorm(731,105,8)

other_damages_in_dollars <- rnorm(731,104,9)

location <- c("canada","usa")

location <- sample(location, 731, replace=TRUE, prob=c(0.3, 0.7))

type_of_house <- c("single","townhome", "rental" )

type_of_house<- sample(type_of_house , 731, replace=TRUE, prob=c(0.5, 0.3, 0.2))

response_variable <- c("claim_approved","claim_rejected")

response_variable<- sample(response_variable, 731, replace=TRUE, prob=c(0.4, 0.6))

final_dataset <- cbind(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars, location, type_of_house, response_variable)

final_dataset <- as.data.frame(final_dataset)

final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)

final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)

final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)

prop_damage <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars))

car_damage <-subset(final_dataset, select = c(date_decision_made, car_damages_in_dollars))

other_damage <-subset(final_dataset, select = c(date_decision_made, other_damages_in_dollars))

new <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars))

Based on this data, I have tried to plot this data as a time series in R. I tried several methods and all of them are producing errors. I have tried to resolve these problems but I just can't seem to figure them out. Could someone please help me?

#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)

ggplot() + geom_line(data = prop_damage, aes(x = date_decision_made, y = property_damages_in_dollars, group = 1), color = "red") 
+ scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+ 
geom_line(data = car_damage, aes(x = date_decision_made, y = car_damages_in_dollars, group =1 ), color = "blue") 
+ geom_line(data = other_damage, aes(x = date_decision_made, y = other_damages_in_dollars, group =1), color = "green") 
+ xlab('data_date') + ylab('percent.change')

#second way (error)

ggplot(data = new, aes(x = date_decision_made)) +
  geom_line(aes(y = property_damages_in_dollars, colour = "property_damages_in_dollars")) +
  geom_line(aes(y = car_damages_in_dollars, colour = "car_damages_in_dollars")) +
  geom_line(aes(y =other_damages_in_dollars, colour = "other_damages_in_dollars")) +

  scale_colour_manual("", 
                      breaks = c("property_damages_in_dollars", "car_damages_in_dollars", "other_damages_in_dollars"),
                      values = c("red", "green", "blue")) +
  xlab(" ") +
  scale_y_continuous("Dollars", limits = c(0,10000)) + 
  labs(title="demo graph")

#3rd way error

##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))


ggplot(dd) + geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1)) + scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+  scale_colour_manual(values=c("red","green","blue"))


#4th error

mymts = ts(new,
           frequency = 1,
           start = c(2014, 1))

autoplot(mymts) +
  ggtitle("Time Series Plot") +
  theme(plot.title = element_text(hjust = 0.5))

#5th Method error

x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
ts.plot(x, y, gpars = list(col = c("black", "red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))



#6th method error


##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))

qplot(date_decision_made,value,data=dd,geom='line',color=variable)

#7th way error

x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))

comb_ts <- cbind(x1, x2) 
plot.ts(comb_ts, plot.type = "single")

Could someone please show me what I am doing wrong in these codes? Thanks


回答1:


Try this approach and be careful with the dates. There are plenty of dates because of years:

#Data
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
dd$date_decision_made <- as.Date(as.character(dd$date_decision_made),'%Y/%m/%d')
#Plot
ggplot(dd) +
  geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1))+
  scale_x_date(date_breaks = "months" ,breaks = '12 months', date_labels = "%b %d %a")+
  scale_colour_manual(values=c("red","green","blue"))+
  theme(axis.text.x = element_text(angle=90))

Output:




回答2:


Another option is to use pivot_longer from tidyr

library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
    pivot_longer(cols = -date_decision_made) %>%
    mutate(date_decision_made = ymd(date_decision_made)) %>%
    ggplot() + 
     geom_line(aes(x = date_decision_made, y=value, colour = name, group = 1)) + 
     scale_x_date(date_breaks = "months", breaks = '12 months',
         date_labels = "%b %d %a")+
     scale_colour_manual(values=c("red","green","blue"))+
    theme(axis.text.x = element_text(angle = 90))
    


来源:https://stackoverflow.com/questions/64580526/difficulty-plotting-time-series-in-r

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!