问题
I am using the R programming language. I am trying to learn how to overlay points on a graph and then visualize them.
Using the following code, I can generate some time series data, aggregate them by month, taking the average/min/max, and plot the following graph:
library(xts)
library(ggplot2)
library(dplyr)
library(plotly)
library(lubridate)
set.seed(123)
#time series 1
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
property_damages_in_dollars <- rnorm(731,100,10)
final_data <- data.frame(date_decision_made, property_damages_in_dollars)
#####aggregate
final_data$year_month <- format(as.Date(final_data$date_decision_made), "%Y-%m")
final_data$year_month <- as.factor(final_data$year_month)
f = final_data %>% group_by (year_month) %>% summarise(max_value = max(property_damages_in_dollars), mean_value = mean(property_damages_in_dollars), min_value = min(property_damages_in_dollars))
####plot####
fig <- plot_ly(f, x = ~year_month, y = ~max_value, type = 'scatter', mode = 'lines',
line = list(color = 'transparent'),
showlegend = FALSE, name = 'max_value')
fig <- fig %>% add_trace(y = ~min_value, type = 'scatter', mode = 'lines',
fill = 'tonexty', fillcolor='rgba(0,100,80,0.2)', line = list(color = 'transparent'),
showlegend = FALSE, name = 'min_value')
fig <- fig %>% add_trace(x = ~year_month, y = ~mean_value, type = 'scatter', mode = 'lines',
line = list(color='rgb(0,100,80)'),
name = 'Average')
fig <- fig %>% layout(title = "Average Property Damages",
paper_bgcolor='rgb(255,255,255)', plot_bgcolor='rgb(229,229,229)',
xaxis = list(title = "Months",
gridcolor = 'rgb(255,255,255)',
showgrid = TRUE,
showline = FALSE,
showticklabels = TRUE,
tickcolor = 'rgb(127,127,127)',
ticks = 'outside',
zeroline = FALSE),
yaxis = list(title = "Dollars",
gridcolor = 'rgb(255,255,255)',
showgrid = TRUE,
showline = FALSE,
showticklabels = TRUE,
tickcolor = 'rgb(127,127,127)',
ticks = 'outside',
zeroline = FALSE))
fig
Now (on the same plot "fig"), for each month, I am trying to plot all the observations for that month in a vertical fashion. I am trying to create something like this:
With a bit of data manipulation, the following code can produce the graph below : plot( final_data$year_month, final_data$property_damages_in_dollars)
Can someone please show me how to extend this solution for a plotly diagram (i.e. enhance the "fig" object)?
Thanks
回答1:
At least I always find it simpler to go with ggplot and then send it to plotly by using the magical function ggplotly
. Hopefully this helps you.
library(xts)
library(ggplot2)
library(dplyr)
library(plotly)
library(lubridate)
set.seed(123)
#time series 1
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
property_damages_in_dollars <- rnorm(731,100,10)
final_data <- data.frame(date_decision_made, property_damages_in_dollars)
#####aggregate
dat <- final_data %>%
mutate(month = month(date_decision_made),
year = year(date_decision_made),
month_end = ceiling_date(date_decision_made, unit = "month")-1) %>%
group_by(month, year) %>%
mutate(mean_val = mean(property_damages_in_dollars,na.rm = TRUE),
max_val = max(property_damages_in_dollars,na.rm = TRUE),
min_val = min(property_damages_in_dollars,na.rm = TRUE))
p <- ggplot(data = dat) +
geom_ribbon(aes(x = month_end,
ymin = min_val,
ymax = max_val), alpha = 0.2)+
geom_point(aes(x = month_end,
y = property_damages_in_dollars), alpha = 0.3) +
geom_line(aes(x = month_end,
y = mean_val), size = 1.25) +
labs(y = "Dollars",
x = "Months")+
theme_minimal()
ggplotly(p)
回答2:
To have full flexibilty with regards to formatting your markers, you can use add_trace
with subsets of your dataframe final_data
using the following addition to your code:
date_split <- split(final_data, final_data$year_month)
for (i in 1:length(date_split)) {
fig <- fig %>% add_trace(y=date_split[[i]]$property_damages_in_dollars,
x=date_split[[i]]$year_month,
mode='markers'
)
}
Result 1:
If you'd like only black markers you can add the following to add_trace()
:
marker=list(color='rgba(0,0,0, 1)'
Result 2:
And if you'd like to adjust the transparency of your plots you can do so directly throug tha last argument in rgba()
, for example:
marker=list(color='rgba(0,0,0, 0.2)')
Result 3:
Complete code:
library(xts)
library(ggplot2)
library(dplyr)
library(plotly)
library(lubridate)
set.seed(123)
#time series 1
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
property_damages_in_dollars <- rnorm(731,100,10)
final_data <- data.frame(date_decision_made, property_damages_in_dollars)
#####aggregate
final_data$year_month <- format(as.Date(final_data$date_decision_made), "%Y-%m")
final_data$year_month <- as.factor(final_data$year_month)
f = final_data %>% group_by (year_month) %>% summarise(max_value = max(property_damages_in_dollars), mean_value = mean(property_damages_in_dollars), min_value = min(property_damages_in_dollars))
####plot####
fig <- plot_ly(f, x = ~year_month, y = ~max_value, type = 'scatter', mode = 'lines',
line = list(color = 'transparent'),
showlegend = FALSE, name = 'max_value')
fig <- fig %>% add_trace(y = ~min_value, type = 'scatter', mode = 'lines',
fill = 'tonexty', fillcolor='rgba(0,100,80,0.2)', line = list(color = 'transparent'),
showlegend = FALSE, name = 'min_value')
fig <- fig %>% add_trace(x = ~year_month, y = ~mean_value, type = 'scatter', mode = 'lines',
line = list(color='rgb(0,100,80)'),
name = 'Average')
fig <- fig %>% layout(title = "Average Property Damages",
paper_bgcolor='rgb(255,255,255)', plot_bgcolor='rgb(229,229,229)',
xaxis = list(title = "Months",
gridcolor = 'rgb(255,255,255)',
showgrid = TRUE,
showline = FALSE,
showticklabels = TRUE,
tickcolor = 'rgb(127,127,127)',
ticks = 'outside',
zeroline = FALSE),
yaxis = list(title = "Dollars",
gridcolor = 'rgb(255,255,255)',
showgrid = TRUE,
showline = FALSE,
showticklabels = TRUE,
tickcolor = 'rgb(127,127,127)',
ticks = 'outside',
zeroline = FALSE))
date_split <- split(final_data, final_data$year_month)
for (i in 1:length(date_split)) {
fig <- fig %>% add_trace(y=date_split[[i]]$property_damages_in_dollars,
x=date_split[[i]]$year_month,
mode='markers',
marker=list(color='rgba(0,0,0, 0.2)')
#marker=list(color='rgba(0,0,0, 1)')
)
}
fig
回答3:
The following addition to your last line of code:
fig %>% add_trace(data = final_data,
y = ~property_damages_in_dollars, x = ~year_month,
name = "Property Damage in Dollars", mode = "markers",
marker = list(color = " rgba(46, 49, 49, 1)", opacity = 0.2))
produces the following plot, where the arguments color
and opacity
can be adjusted to your preferred styling. We used the data.frame final_data
, since that is where the points are. The variabel year_month
was already set by yourself, so there is no additional data wrangling required. To actually generate the dots, be sure to set mode = "markers"
in the add_trace()
function.
来源:https://stackoverflow.com/questions/65650991/r-overlaying-points-on-a-graph