ggplot2 y-axis order changes after subsetting

谁都会走 提交于 2020-01-03 03:02:08

问题


I have a function that works as expected until I subset it. The function, plotCalendar() is my attempt at a Calendar Heat Map using ggplot2 with facets. The y-axis order is important because it is for the "WeekOfMonth" - when the order is reversed the data viz does not look like a calendar.

The code is below, first the calling code, then the function to generate some data - generateData(), then the plot function - plotCalendar()

The code works as expected when I used df for the data but when I used df2, the subsetted data, the order of the WeekOfMonth is reversed along the y-axis.

library(ggplot2)
library(ProgGUIinR)
library(chron)

df <- generateData()
plotCalendar(df, dateFieldName = "dates", numericFieldName = "counts", yLab = "Month of Year")
df2 <- df[df$filterField == 42, ]
plotCalendar(df2, dateFieldName = "dates", numericFieldName = "counts", yLab = "Month of Year")

The two functions, one to generate test data, the other to plot the Calendar

generateData <- function()
{
      set.seed(42)
      dates <- seq(as.Date("2012/01/01"), as.Date("2012/6/30"), by = "1 day")
      counts <- 1:length(dates)
      filterField <- sample(1:42,length(dates),replace=T)
      df <- data.frame(dates, counts, filterField)

      return(df)
}


plotCalendar <- function(data, dateFieldName, numericFieldName, title = "Title", yLab = "Y Label", fillLab = "Fill Label", lowColor = "moccasin", highColor = "dodgerblue")
{

      agg <- aggregate(as.formula(paste(numericFieldName, "~", dateFieldName)), data, sum)

      names(agg)[names(agg) == dateFieldName] <- "DateField"
      names(agg)[names(agg) == numericFieldName] <- "NumericField"

      minMonth <- as.POSIXlt(min(agg$DateField))$mon + 1
      maxMonth <- as.POSIXlt(max(agg$DateField))$mon + 1

      minYear <- as.POSIXlt(min(agg$DateField))$year + 1900
      maxYear <- as.POSIXlt(max(agg$DateField))$year + 1900 

      minDate <- ISOdate(minYear, minMonth, 1)
      maxDate <- ISOdate(maxYear, maxMonth, 1)
      maxDateEndMonth <- as.POSIXlt(as.Date(seq(maxDate, length = 2, by = "1 month")[2]))
      daySeq <- seq(minDate, maxDateEndMonth, by = "1 day")

      daySeq <- as.data.frame(daySeq)
      names(daySeq) <- c("DateField")
      daySeq$DateField <- as.Date(daySeq$DateField)
      agg$DateField <- as.Date(agg$DateField)

      agg <- merge(daySeq, agg, by = "DateField", all.x = T)

      agg$Day <- as.numeric(days(agg$DateField))

      agg$Weekday <- weekdays(agg$DateField)
      agg$Weekday <- factor(agg$Weekday, levels = rev(c("Saturday", "Friday", "Thursday", "Wednesday", "Tuesday", "Monday", "Sunday")))

      agg$Month <- months(agg$DateField)
      agg$Month <- factor(agg$Month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))

      agg$MonthNumber <- as.POSIXlt(agg$DateField)$mon + 1

      agg$Year <-  as.POSIXlt(agg$DateField)$year + 1900

      agg$WeekOfMonth <- 1 + week.of.month(agg$Year, agg$MonthNumber, agg$Day)
      agg$WeekOfMonth <- factor(agg$WeekOfMonth, levels = 6:1)

      #makeSpreadsheet(gActs, "Group Activities - Member Participation")

      View(agg)
      p <- ggplot(agg)
      p <- p + aes(Year, WeekOfMonth, fill = NumericField)

      noData <- subset(agg, is.na(agg$NumericField))

      p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
      if(nrow(noData) > 0)
      {
        p <- p + geom_tile(data = noData, color = "gray", fill = "white")
      }

      p <- p + geom_text(aes(label = paste(paste(rep(" ", 5), collapse = ""), Day)), vjust = 0, size = 3, colour = "black")
      p <- p + geom_text(data = subset(agg, !is.na(NumericField)), aes(label = NumericField), size = 4, vjust = 0.5, hjust = 1, color = 'black', fontface = "bold")
      p <- p + facet_grid(Month ~ Weekday) + scale_fill_gradient(low = lowColor, high = highColor)
      p <- p + labs(title = paste(title, "\n"), y = paste(yLab, "\n"), fill = fillLab)
      p <- p + theme(plot.title = element_text(size = 20, face="bold"),  
                     axis.title.x = element_blank(), 
                     axis.ticks.x = element_blank(),
                     axis.text.x = element_blank(),
                     axis.title.y = element_text(size = 16, face = "bold"), 
                     legend.title = element_text(size = 14, face = "bold"), 
                     legend.text = element_text(size = 11),
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     strip.text = element_text(size = 14, face = "bold"))
      plot(p)
}

Thanks,

Paul


回答1:


If you reverse the order of the to tile layers, it works.

Current:

p <- ggplot(agg, aes(Year, WeekOfMonth, fill = NumericField))
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0) p <- p + geom_tile(data = noData, color = "gray", fill = "white")

New:

p <- ggplot(agg,aes(Year, WeekOfMonth, fill = NumericField))  
noData <- subset(agg, is.na(agg$NumericField)) 
if(nrow(noData) > 0) p <- p + geom_tile(data = noData, color = "gray", fill = "white")
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")

I think the problem is to do with ggplot's treatment of factors,e.g., agg$WeekOfMonth, that have missing levels. One way around this is to avoid making agg$WeekOfMonth a factor.

agg$WeekOfMonth <- 1 + week.of.month(agg$Year, agg$MonthNumber, agg$Day)
p <- ggplot(agg)
p <- p + aes(Year, -WeekOfMonth, fill = NumericField)  
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0)p <- p + geom_tile(data = noData, color = "gray", fill = "white")

To avoid negative y-axis labels, you have to add:

p <- p + scale_y_continuous(label=abs)

to the ggplot layer definitions. This produces the same plot as above, and does not require reversing the order of the tile layers.

EDIT Found a much better way to do this.

By using the na.value-... argument to scale_fill_continuous(...) you can avoid multiple datasets completely.

p <- ggplot(agg)
p <- p + aes(Year, WeekOfMonth, fill = NumericField)
p <- p + geom_tile(aes(fill = NumericField), color = "gray")
p <- p + scale_fill_gradient(low = lowColor, high = highColor, na.value="white")

This avoids the need for noData altogether.

Finally, I suppose you have a reason for displaying the calendars this way, but IMO here is a more intuitive calendar view.

gg.calendar <- function(df) {
  require(ggplot2)
  require(lubridate)
  wom <- function(date) { # week-of-month
    first <- wday(as.Date(paste(year(date),month(date),1,sep="-")))
    return((mday(date)+(first-2)) %/% 7+1)
  }
  df$month <- month(df$dates)
  df$day   <- mday(df$dates)

  rng   <- range(df$dates)
  rng   <- as.Date(paste(year(rng),month(rng),1,sep="-"))
  start <- rng[1]
  end   <- rng[2]
  month(end) <- month(end)+1
  day(end)   <- day(end)  -1

  cal <- data.frame(dates=seq(start,end,by="day"))
  cal$year  <- year(cal$dates)
  cal$month <- month(cal$dates)
  cal$cmonth<- month(cal$dates,label=T)
  cal$day   <- mday(cal$dates)
  cal$cdow  <- wday(cal$dates,label=T)
  cal$dow   <- wday(cal$dates)
  cal$week  <- wom(cal$dates)

  cal        <- merge(cal,df[,c("dates","counts")],all.x=T)

  ggplot(cal, aes(x=cdow,y=-week))+
    geom_tile(aes(fill=counts,colour="grey50"))+
    geom_text(aes(label=day),size=3,colour="grey20")+
    facet_wrap(~cmonth, ncol=3)+
    scale_fill_gradient(low = "moccasin", high = "dodgerblue", na.value="white")+
    scale_color_manual(guide=F,values="grey50")+
    scale_x_discrete(labels=c("S","M","T","W","Th","F","S"))+
    theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    theme(panel.grid=element_blank())+
    labs(x="",y="")+
    coord_fixed()
}
gg.calendar(df)
gg.calendar(df2)


来源:https://stackoverflow.com/questions/23478497/ggplot2-y-axis-order-changes-after-subsetting

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!