问题
I believe my question is very similar to this post. Only difference is my aes fill is a factor with multiple levels. This what I am after
and this is how far I have gotten
set.seed(123)
n = 100
LoanStatus = sample(c('Chargedoff', 'Completed', 'Current', 'Defaulted', 'PastDue'), n, replace = T, prob = NULL)
ProsperScore = sample(1:11, n, replace = T, prob = NULL)
df = data.frame(ProsperScore,factor(LoanStatus))
df = data.frame(ProsperScore,LoanStatus)
probs = data.frame(prop.table(table(df),1))
回答1:
Code for the stacked bar plot could look something like this:
library(ggplot2)
brks <- c(0, 0.25, 0.5, 0.75, 1)
ggplot(data=probs,aes(x=ProsperScore,y=Freq,fill=LoanStatus)) +
geom_bar(stat="identity") +
scale_y_continuous(breaks = brks, labels = scales::percent(brks)) +
scale_x_discrete(breaks = c(3,6,9))
More complete code, demonstrating how you would go about adding percentages to the plot, is here:
library(ggplot2)
library(plyr)
brks <- c(0, 0.25, 0.5, 0.75, 1)
probs <- probs %>% dplyr::group_by(ProsperScore) %>%
dplyr::mutate(pos=cumsum(Freq)-(Freq*0.5)) %>%
dplyr::mutate(pos=ifelse(Freq==0,NA,pos))
probs$LoanStatus <- factor(probs$LoanStatus, levels = rev(levels(probs$LoanStatus)))
ggplot(data=probs,aes(x=ProsperScore,y=Freq,fill=LoanStatus)) +
geom_bar(stat="identity") +
scale_y_continuous(breaks = brks, labels = scales::percent(brks)) +
scale_x_discrete(breaks = c(3,6,9)) +
geom_text(data=probs, aes(x = ProsperScore, y = pos,
label = paste0(round(100*Freq),"%")), size=2)
To only show the percentages in the first column of the graph, add %>%
dplyr::mutate(pos=ifelse(ProsperScore==1,pos,NA))
to the dplyr
calls.
来源:https://stackoverflow.com/questions/45603370/r-stacked-frequency-histogram-with-percentage-of-aggregated-data-based-on