问题
I have a function that works as expected until I subset it. The function, plotCalendar() is my attempt at a Calendar Heat Map using ggplot2 with facets. The y-axis order is important because it is for the "WeekOfMonth" - when the order is reversed the data viz does not look like a calendar.
The code is below, first the calling code, then the function to generate some data - generateData(), then the plot function - plotCalendar()
The code works as expected when I used df for the data but when I used df2, the subsetted data, the order of the WeekOfMonth is reversed along the y-axis.
library(ggplot2)
library(ProgGUIinR)
library(chron)
df <- generateData()
plotCalendar(df, dateFieldName = "dates", numericFieldName = "counts", yLab = "Month of Year")
df2 <- df[df$filterField == 42, ]
plotCalendar(df2, dateFieldName = "dates", numericFieldName = "counts", yLab = "Month of Year")
The two functions, one to generate test data, the other to plot the Calendar
generateData <- function()
{
set.seed(42)
dates <- seq(as.Date("2012/01/01"), as.Date("2012/6/30"), by = "1 day")
counts <- 1:length(dates)
filterField <- sample(1:42,length(dates),replace=T)
df <- data.frame(dates, counts, filterField)
return(df)
}
plotCalendar <- function(data, dateFieldName, numericFieldName, title = "Title", yLab = "Y Label", fillLab = "Fill Label", lowColor = "moccasin", highColor = "dodgerblue")
{
agg <- aggregate(as.formula(paste(numericFieldName, "~", dateFieldName)), data, sum)
names(agg)[names(agg) == dateFieldName] <- "DateField"
names(agg)[names(agg) == numericFieldName] <- "NumericField"
minMonth <- as.POSIXlt(min(agg$DateField))$mon + 1
maxMonth <- as.POSIXlt(max(agg$DateField))$mon + 1
minYear <- as.POSIXlt(min(agg$DateField))$year + 1900
maxYear <- as.POSIXlt(max(agg$DateField))$year + 1900
minDate <- ISOdate(minYear, minMonth, 1)
maxDate <- ISOdate(maxYear, maxMonth, 1)
maxDateEndMonth <- as.POSIXlt(as.Date(seq(maxDate, length = 2, by = "1 month")[2]))
daySeq <- seq(minDate, maxDateEndMonth, by = "1 day")
daySeq <- as.data.frame(daySeq)
names(daySeq) <- c("DateField")
daySeq$DateField <- as.Date(daySeq$DateField)
agg$DateField <- as.Date(agg$DateField)
agg <- merge(daySeq, agg, by = "DateField", all.x = T)
agg$Day <- as.numeric(days(agg$DateField))
agg$Weekday <- weekdays(agg$DateField)
agg$Weekday <- factor(agg$Weekday, levels = rev(c("Saturday", "Friday", "Thursday", "Wednesday", "Tuesday", "Monday", "Sunday")))
agg$Month <- months(agg$DateField)
agg$Month <- factor(agg$Month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))
agg$MonthNumber <- as.POSIXlt(agg$DateField)$mon + 1
agg$Year <- as.POSIXlt(agg$DateField)$year + 1900
agg$WeekOfMonth <- 1 + week.of.month(agg$Year, agg$MonthNumber, agg$Day)
agg$WeekOfMonth <- factor(agg$WeekOfMonth, levels = 6:1)
#makeSpreadsheet(gActs, "Group Activities - Member Participation")
View(agg)
p <- ggplot(agg)
p <- p + aes(Year, WeekOfMonth, fill = NumericField)
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0)
{
p <- p + geom_tile(data = noData, color = "gray", fill = "white")
}
p <- p + geom_text(aes(label = paste(paste(rep(" ", 5), collapse = ""), Day)), vjust = 0, size = 3, colour = "black")
p <- p + geom_text(data = subset(agg, !is.na(NumericField)), aes(label = NumericField), size = 4, vjust = 0.5, hjust = 1, color = 'black', fontface = "bold")
p <- p + facet_grid(Month ~ Weekday) + scale_fill_gradient(low = lowColor, high = highColor)
p <- p + labs(title = paste(title, "\n"), y = paste(yLab, "\n"), fill = fillLab)
p <- p + theme(plot.title = element_text(size = 20, face="bold"),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.text = element_text(size = 14, face = "bold"))
plot(p)
}
Thanks,
Paul
回答1:
If you reverse the order of the to tile layers, it works.
Current:
p <- ggplot(agg, aes(Year, WeekOfMonth, fill = NumericField))
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0) p <- p + geom_tile(data = noData, color = "gray", fill = "white")
New:
p <- ggplot(agg,aes(Year, WeekOfMonth, fill = NumericField))
noData <- subset(agg, is.na(agg$NumericField))
if(nrow(noData) > 0) p <- p + geom_tile(data = noData, color = "gray", fill = "white")
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
I think the problem is to do with ggplot's treatment of factors,e.g., agg$WeekOfMonth
, that have missing levels. One way around this is to avoid making agg$WeekOfMonth
a factor.
agg$WeekOfMonth <- 1 + week.of.month(agg$Year, agg$MonthNumber, agg$Day)
p <- ggplot(agg)
p <- p + aes(Year, -WeekOfMonth, fill = NumericField)
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0)p <- p + geom_tile(data = noData, color = "gray", fill = "white")
To avoid negative y-axis labels, you have to add:
p <- p + scale_y_continuous(label=abs)
to the ggplot
layer definitions. This produces the same plot as above, and does not require reversing the order of the tile layers.
EDIT Found a much better way to do this.
By using the na.value-...
argument to scale_fill_continuous(...)
you can avoid multiple datasets completely.
p <- ggplot(agg)
p <- p + aes(Year, WeekOfMonth, fill = NumericField)
p <- p + geom_tile(aes(fill = NumericField), color = "gray")
p <- p + scale_fill_gradient(low = lowColor, high = highColor, na.value="white")
This avoids the need for noData
altogether.
Finally, I suppose you have a reason for displaying the calendars this way, but IMO here is a more intuitive calendar view.
gg.calendar <- function(df) {
require(ggplot2)
require(lubridate)
wom <- function(date) { # week-of-month
first <- wday(as.Date(paste(year(date),month(date),1,sep="-")))
return((mday(date)+(first-2)) %/% 7+1)
}
df$month <- month(df$dates)
df$day <- mday(df$dates)
rng <- range(df$dates)
rng <- as.Date(paste(year(rng),month(rng),1,sep="-"))
start <- rng[1]
end <- rng[2]
month(end) <- month(end)+1
day(end) <- day(end) -1
cal <- data.frame(dates=seq(start,end,by="day"))
cal$year <- year(cal$dates)
cal$month <- month(cal$dates)
cal$cmonth<- month(cal$dates,label=T)
cal$day <- mday(cal$dates)
cal$cdow <- wday(cal$dates,label=T)
cal$dow <- wday(cal$dates)
cal$week <- wom(cal$dates)
cal <- merge(cal,df[,c("dates","counts")],all.x=T)
ggplot(cal, aes(x=cdow,y=-week))+
geom_tile(aes(fill=counts,colour="grey50"))+
geom_text(aes(label=day),size=3,colour="grey20")+
facet_wrap(~cmonth, ncol=3)+
scale_fill_gradient(low = "moccasin", high = "dodgerblue", na.value="white")+
scale_color_manual(guide=F,values="grey50")+
scale_x_discrete(labels=c("S","M","T","W","Th","F","S"))+
theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())+
theme(panel.grid=element_blank())+
labs(x="",y="")+
coord_fixed()
}
gg.calendar(df)
gg.calendar(df2)
来源:https://stackoverflow.com/questions/23478497/ggplot2-y-axis-order-changes-after-subsetting