I\'d like to output a chart similar to the one represented on this page (on the right) using R and any package that would make it look good:
http://processtrends.com/pg_
An awful piece of R coding here by me, but it might give you some ideas on how to do it:
This was the data I used taken from the excel file on the linked site:
> dput(redata)
structure(c(1093L, 1182L, 1299L, 1372L, 1319L, 1362L, 1239L,
1162L, 1059L, 921L, 815L, 720L, 835L, 853L, 1034L, 1030L, 1240L,
1388L, 1429L, 1319L, 1231L, 1184L, 1076L, 825L, 991L, 1093L,
854L, 808L, 1079L, 1092L, 1220L, 1251L, 1130L, 1131L, 1052L,
951L, 950L, 1006L, 1112L, 1119L, 1250L, 1322L, 1347L, 1310L,
1215L, 1128L, 1035L, 992L, 1079L, 1018L, 1112L, 1224L, 1323L,
1344L, 1326L, 1267L, 1171L, 1075L, 916L, 932L, 888L, 904L, 939L,
1018L, 1140L, 1174L, 1285L, 1311L, 1298L, 1231L, 1091L, 1088L,
991L, 1028L, 1177L, 1322L, 1322L, 1398L, 1389L, 1174L, 1196L,
1115L, 756L, 496L, 693L, 673L, 748L, 777L, 820L, 948L, 966L,
1027L, 960L, 865L, 767L, 675L, 765L, 732L, 613L, 632L, 659L,
705L, 684L, 734L, 715L, 626L, 551L, 487L, 500L, 536L, 575L, 595L,
736L, 798L, 832L, 797L, 792L, 726L, 650L, 584L, 567L, 524L, 574L,
571L, 591L, 657L, 699L, 756L, 867L, 795L, 760L, 685L, 609L, 588L,
521L, 581L, 614L, 623L, 668L, 702L, 777L, 697L, 647L, 562L, 523L,
508L, 493L, 504L, 534L, 586L, 621L, 620L, 636L, 600L, 549L, 557L
), .Dim = 12:13, .Dimnames = list(c("Jan", "Feb", "Mar", "Apr",
"May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), c("X1993",
"X1994", "X1995", "X1996", "X1997", "X1998", "X1999", "X2000",
"X2001", "X2002", "X2003", "X2004", "X2005")))
And here's my woeful coding... so much cleanup is possible here, but it is a quick test of the possibilities.
monthnames <- c(
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"
)
# size of window
windows(w=6,h=3)
# margins
par(
mar=c(5.1,5.1,2.1,2.1),
cex.axis=0.7
)
# set up plot with the number of categories and the y limits
# yaxs="i" sets the yaxis as having no separation from the corner point
ylimlp <- c(0,max(redata))*1.06
plot(1:156, type="n", xaxt="n", ylim=ylimlp, ann=FALSE, yaxs="i", xaxs="i", bty="l", las="1")
abline(v=seq(13,156,13),lty=1,col="grey")
title(xlab="Month", col.lab=rgb(0,0,0), font.lab=2, cex.lab=0.75)
title(ylab="Listings", col.lab=rgb(0,0,0), font.lab=2, cex.lab=0.75)
lines(redata[1,],type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*1),redata[2,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*2),redata[3,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*3),redata[4,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*4),redata[5,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*5),redata[6,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*6),redata[7,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*7),redata[8,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*8),redata[9,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*9),redata[10,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*10),redata[11,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*11),redata[12,]),type="l",pch=NA,lwd=1,col="grey")
redatamonthmean <- apply(redata,1,mean)
lines(rep(redatamonthmean[1],13),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*1),rep(redatamonthmean[2],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*2),rep(redatamonthmean[3],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*3),rep(redatamonthmean[4],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*4),rep(redatamonthmean[5],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*5),rep(redatamonthmean[6],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*6),rep(redatamonthmean[7],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*7),rep(redatamonthmean[8],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*8),rep(redatamonthmean[9],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*9),rep(redatamonthmean[10],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*10),rep(redatamonthmean[11],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*11),rep(redatamonthmean[12],13)),type="l",pch=NA,lwd=1,col="black")
mtext(monthnames[1], side=1, cex=0.7, at=6.5)
mtext(monthnames[2], side=1, cex=0.7, at=6.5*3)
mtext(monthnames[3], side=1, cex=0.7, at=6.5*5)
mtext(monthnames[4], side=1, cex=0.7, at=6.5*7)
mtext(monthnames[5], side=1, cex=0.7, at=6.5*9)
mtext(monthnames[6], side=1, cex=0.7, at=6.5*11)
mtext(monthnames[7], side=1, cex=0.7, at=6.5*13)
mtext(monthnames[8], side=1, cex=0.7, at=6.5*15)
mtext(monthnames[9], side=1, cex=0.7, at=6.5*17)
mtext(monthnames[10], side=1, cex=0.7, at=6.5*19)
mtext(monthnames[11], side=1, cex=0.7, at=6.5*21)
mtext(monthnames[12], side=1, cex=0.7, at=6.5*23)
And the example image
The stats
package in R base already has a function to do this. Here is my one-liner and the output that it produces
monthplot(redata, labels = month.abb, ylab = 'Listings')
Building on this an example of using monthplot for a weekly cycle plot is here (gives full R code and source data): http://figshare.com/figures/index.php/OpenURL_Router_Data:_Total_Requests_by_Weekday
monthplot(ts(sdf$values, frequency = 7, start=c(12,5)), labels = dow, ylab = "No. requests / day", xlab = "Weekday")
which gives this weekly cycle plot: Example of monthplot to create a graph showing a weekly cycle http://figshare.com/figures/images/a/a7/Total_requests_by_weekday_01_Apr_to_31_Jul_2011.jpeg
Of course no graphical challenge will be complete without a ggplot
solution. The tricky bit is to use ddply
to summarise the monthly averages, and to pass this as data to a separate layer to ggplot
.
library(lubridate)
library(plyr)
library(ggplot2)
df$month <- factor(month(df$dates), levels=1:12, labels=month.abb, ordered=TRUE)
df$year <- year(df$dates)
hline.data <- ddply(df, .(month), summarize, avgvalue=mean(values))
ggplot() +
geom_line(aes(x=year, y=values, group=month), data=df, colour="blue") +
geom_hline(aes(yintercept=avgvalue), data=hline.data, colour="blue", size=2) +
facet_grid(~month) +
opts(axis.text.x = theme_blank()) +
xlab("")
The data:
df <- structure(list(dates = structure(c(8415, 8446, 8474, 8505, 8535,
8566, 8596, 8627, 8658, 8688, 8719, 8749, 8780, 8811, 8839, 8870,
8900, 8931, 8961, 8992, 9023, 9053, 9084, 9114, 9145, 9176, 9204,
9235, 9265, 9296, 9326, 9357, 9388, 9418, 9449, 9479, 9510, 9541,
9570, 9601, 9631, 9662, 9692, 9723, 9754, 9784, 9815, 9845, 9876,
9907, 9935, 9966, 9996, 10027, 10057, 10088, 10119, 10149, 10180,
10210, 10241, 10272, 10300, 10331, 10361, 10392, 10422, 10453,
10484, 10514, 10545, 10575, 10606, 10637, 10665, 10696, 10726,
10757, 10787, 10818, 10849, 10879, 10910, 10940, 10971, 11002,
11031, 11062, 11092, 11123, 11153, 11184, 11215, 11245, 11276,
11306, 11337, 11368, 11396, 11427, 11457, 11488, 11518, 11549,
11580, 11610, 11641, 11671, 11702, 11733, 11761, 11792, 11822,
11853, 11883, 11914, 11945, 11975, 12006, 12036, 12067, 12098,
12126, 12157, 12187, 12218, 12248, 12279, 12310, 12340, 12371,
12401, 12432, 12463, 12492, 12523, 12553, 12584, 12614, 12645,
12676, 12706, 12737, 12767, 12798, 12829, 12857, 12888, 12918,
12949, 12979, 13010, 13041, 13071, 13102, 13132), class = "Date"),
values = c(1093, 1182, 1299, 1372, 1319, 1362, 1239, 1162,
1059, 921, 815, 720, 835, 853, 1034, 1030, 1240, 1388, 1429,
1319, 1231, 1184, 1076, 825, 991, 1093, 854, 808, 1079, 1092,
1220, 1251, 1130, 1131, 1052, 951, 950, 1006, 1112, 1119,
1250, 1322, 1347, 1310, 1215, 1128, 1035, 992, 1079, 1018,
1112, 1224, 1323, 1344, 1326, 1267, 1171, 1075, 916, 932,
888, 904, 939, 1018, 1140, 1174, 1285, 1311, 1298, 1231,
1091, 1088, 991, 1028, 1177, 1322, 1322, 1398, 1389, 1174,
1196, 1115, 756, 496, 693, 673, 748, 777, 820, 948, 966,
1027, 960, 865, 767, 675, 765, 732, 613, 632, 659, 705, 684,
734, 715, 626, 551, 487, 500, 536, 575, 595, 736, 798, 832,
797, 792, 726, 650, 584, 567, 524, 574, 571, 591, 657, 699,
756, 867, 795, 760, 685, 609, 588, 521, 581, 614, 623, 668,
702, 777, 697, 647, 562, 523, 508, 493, 504, 534, 586, 621,
620, 636, 600, 549, 557)), .Names = c("dates", "values"), row.names = c(NA,
-156L), class = "data.frame")