问题
Here is the code I have for what I thought was a simple line graph
ggplot(data=top15andAllDatasummary.df, aes(x=years, y=calculations, group=1)) +
geom_line() +
geom_point()
And I got this error:
Error: Aesthetics must be either length 1 or the same as the data (16): x, y, group
I have data in a dataframe in r. My X-Axis was going to be years and the Y-axis was going to be some calculations (16 of them) I constructed per year.
Edited to add
structure(list(`2001` = c(349.315750645518, 217.47436370343,
5.17963850977499, 126.661748432313, 57, 39), `2002` = c(703.26693877551,
429.92, 9.32897959183673, 264.017959183673, 161, 108), `2003` = c(314.897774687065,
193.792420027816, 4.08936022253129, 117.015994436718, 54, 37),
`2004` = c(305.988451086957, 190.680027173913, 3.87839673913043,
111.430027173913, 55, 38), `2005` = c(118.528015659408, 74.3175923660387,
1.50942011255199, 42.7010031808172, 10, 8), `2006` = c(120.531992244304,
73.8279205041202, 1.54362578768783, 45.1604459524964, 10,
8), `2007` = c(113.973899988451, 69.7619817530893, 1.44693382607691,
42.7649844092851, 10, 8), `2008` = c(110.676242590059, 67.3693570451436,
1.36285909712722, 41.9440264477884, 9, 7), `2009` = c(101.965558714192,
63.1446534003936, 1.22982724688388, 37.5910780669145, 9,
7), `2010` = c(93.9744360902256, 59.8894736842105, 1.14199785177229,
32.9429645542427, 9, 7), `2011` = c(91.8911316298046, 58.5660296328108,
1.15675327464033, 32.1683487223534, 9, 7), `2012` = c(91.2302181013592,
58.598356337583, 1.16773785691708, 31.4641239068591, 8, 6
), `2013` = c(87.1390443392165, 55.0509040034438, 1.10277658200603,
30.9853637537667, 8, 6), `2014` = c(85.7812132234942, 56.0456831068792,
1.09725045469134, 28.6382796619236, 8, 6), `2015` = c(88.331452900479,
58.526237360298, 1.22362959020756, 28.5815859499734, 8, 6
)), .Names = c("2001", "2002", "2003", "2004", "2005", "2006",
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014",
"2015"), row.names = c("AllDataMeanByYear", "AllDataMeanAggAssault",
"AllDataMeanMurderManSlaughter", "AllDataMeanRobbery", "AllDataMedianByYear",
"AllDataMedianAggAssault"), class = "data.frame")
All Code:
## Total
lwdata$total <- lwdata$murdermanslaughter + lwdata$Robbery + lwdata$Aggravated_assault
## Data Calculations Top 15
top15 <- lwdata[lwdata$total >= lwdata$total[order(lwdata$Year, lwdata$total, decreasing=TRUE)][15] , ]
## Top 15 Means
Top15MeanByYear <- tapply(top15$total,top15$Year,mean)
Top15MeanAggAssault <- tapply(top15$Aggravated_assault,top15$Year,mean)
Top15MeanMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,mean)
Top15MeanRob <- tapply(top15$Robbery,top15$Year,mean)
## All Data Means
AllDataMeanByYear <- tapply(lwdata$total,lwdata$Year,mean)
AllDataMeanAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,mean)
AllDataMeanMurderManSlaughter <- tapply(lwdata$murdermanslaughter,lwdata$Year,mean)
AllDataMeanRobbery <- tapply(lwdata$Robbery,lwdata$Year,mean)
## Top 15 Medians
Top15MedianByYear <- tapply(top15$total,top15$Year,median)
Top15MedianAggAssault <- tapply(top15$Aggravated_assault,top15$Year,median)
Top15MedianMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,median)
Top15MedianRob <- tapply(top15$Robbery,top15$Year,median)
## All Data Medians
AllDataMedianByYear <- tapply(lwdata$total,lwdata$Year,median)
AllDataMedianAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,median)
AllDataMedianMurderManSlaughter <- tapply(lwdata$murdermanslaughter,lwdata$Year,median)
AllDataMedianRobbery <- tapply(lwdata$Robbery,lwdata$Year,median)
## Rounding Data To Two Decimal Points
Top15MeanByYear <- round(Top15MeanByYear,digits=2)
Top15MeanAggAssault <- round(Top15MeanAggAssault,digits=2)
Top15MeanMurderManSlaughter <- round(Top15MeanMurderManSlaughter,digits=2)
Top15MeanRob <- round(Top15MeanRob,digits=2)
AllDataMeanByYear <- round(AllDataMeanByYear,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanRobbery <- round(AllDataMeanRobbery,digits=2)
Top15MedianByYear <- round(Top15MedianByYear,digits=2)
Top15MedianAggAssault <- round(Top15MedianAggAssault,digits=2)
Top15MedianMurderManSlaughter <- round(Top15MedianMurderManSlaughter,digits=2)
Top15MedianRob <- round(Top15MedianRob,digits=2)
AllDataMedianByYear <- round(AllDataMedianByYear,digits=2)
AllDataMedianAggAssault <- round(AllDataMedianAggAssault,digits=2)
AllDataMedianMurderManSlaughter <- round(AllDataMedianMurderManSlaughter,digits=2)
AllDataMedianRobbery <- round(AllDataMedianRobbery,digits=2)
## Summaries
AllDataSummary <- rbind(AllDataMeanByYear, AllDataMeanAggAssault, AllDataMeanMurderManSlaughter, AllDataMeanRobbery, AllDataMedianByYear, AllDataMedianAggAssault, AllDataMedianMurderManSlaughter, AllDataMedianRobbery)
Top15Summary <- rbind(Top15MeanByYear, Top15MeanAggAssault, Top15MeanMurderManSlaughter, Top15MeanRob,Top15MedianByYear,Top15MedianAggAssault,Top15MedianMurderManSlaughter,Top15MedianRob)
Top15andAllDatasummary <- rbind(AllDataSummary,Top15Summary)
## Class of New Items
class(AllDataSummary)
class(Top15Summary)
class(top15andAllDatasummary)
## Converting Matrices to Data Frames
AllDataSummary.df <- as.data.frame(AllDataSummary)
Top15Summary.df <- as.data.frame(Top15Summary)
Top15andAllDatasummary.df <- as.data.frame(Top15andAllDatasummary)
## Checking of New Classes
class(AllDataSummary.df)
class(Top15Summary.df)
class(Top15andAllDatasummary.df)
## Verifications for Names of New Components
colnames(Top15andAllDatasummary.df)
rownames(Top15andAllDatasummary.df)
## New Components
years <- colnames(Top15andAllDatasummary.df)
calculations <- colnames(Top15andAllDatasummary.df)
## Chicago
Chicago <- top15[which(top15$City=="Chicago"), ]
## Basic Plots
plot(y=Chicago$total, x=Chicago$Year, type="l", xlab = "Year", ylab = "Total Violent Crime (minus rape)", main="Chicago-Specific Data", col="blue")
## Data Types for Chicago
str(Chicago)
link to full >100K set of data is here
回答1:
Your data frame (let's call it df
) has a column for each year, and rownames for each of your calculated variables. This is "wide" data, where each row has multiple data values. ggplot is meant to work with "long" data, in which each row has a single column containing a data value, and other columns that tell us things about that data point (i.e., what variable the data point represents and what year it's from.
The tidyverse library of packages, by Hadley Wickham (who also wrote ggplot), makes it easy to transform data from wide to long and back again.
library(tidyverse)
df.new <- mutate(df, variable = rownames(df)) %>%
gather(year, value, -variable)
variable year value
1 AllDataMeanByYear 2001 349.315751
2 AllDataMeanAggAssault 2001 217.474364
3 AllDataMeanMurderManSlaughter 2001 5.179639
4 AllDataMeanRobbery 2001 126.661748
5 AllDataMedianByYear 2001 57.000000
6 AllDataMedianAggAssault 2001 39.000000
7 AllDataMeanByYear 2002 703.266939
8 AllDataMeanAggAssault 2002 429.920000
9 AllDataMeanMurderManSlaughter 2002 9.328980
10 AllDataMeanRobbery 2002 264.017959
11 AllDataMedianByYear 2002 161.000000
12 AllDataMedianAggAssault 2002 108.000000
13 AllDataMeanByYear 2003 314.897775
14 AllDataMeanAggAssault 2003 193.792420
15 AllDataMeanMurderManSlaughter 2003 4.089360
16 AllDataMeanRobbery 2003 117.015994
17 AllDataMedianByYear 2003 54.000000
18 AllDataMedianAggAssault 2003 37.000000
19 AllDataMeanByYear 2004 305.988451
20 AllDataMeanAggAssault 2004 190.680027
... and 70 more rows
This long data can then be sent to ggplot. Note that your original attempt used a variable called "years", which did not exist in the data frame. R (and ggplot) have no way of knowing that your column names (2001:2015) somehow magically represent years.
plot.years <- ggplot(data = df.new, aes(x = year, y = value, color = variable, group = variable)) +
geom_line()
print(plot.years)
回答2:
Based on your data, I would do this:
library(tidyr)
top15andAllDatasummary.df$variable = rownames(top15andAllDatasummary.df)
df.long = gather(data = top15andAllDatasummary.df,
key = years,
value = calculations,
-variable)
The point of this gather
call is to restructure your data into this form:
head(df.long)
# variable years calculations
# 1 AllDataMeanByYear 2001 349.315751
# 2 AllDataMeanAggAssault 2001 217.474364
# 3 AllDataMeanMurderManSlaughter 2001 5.179639
# 4 AllDataMeanRobbery 2001 126.661748
# 5 AllDataMedianByYear 2001 57.000000
# 6 AllDataMedianAggAssault 2001 39.000000
Having done that, we can proceed to plotting:
ggplot(data = df.long, aes(x = years,
y = calculations,
group=variable,
color=variable)) +
geom_line() +
geom_point()
Is this your desired result?
来源:https://stackoverflow.com/questions/39946535/ggplot2-error-aesthetics-must-be-either-length-1-or-the-same-as-the-data-16