Adding grid lines to a ggplot independent of axis breaks

吃可爱长大的小学妹 提交于 2019-12-11 23:50:35

问题


I have data on abundance over a sequence of nucleotides (ordered character sequence of A, T, C, and G) by two different categories. I want to use ggplot to show the coverage as a line plot over the sequence. I've gotten reasonably far and have just a few more hurdles to jump. Sample data:

> dput(so.sample)
structure(list(`1_mes_wt` = c(0, 0, 4.25897346349789, 10.0666645500859, 
10.0666645500859, 48.3974257215669, 78.2102399660521, 250.698665237717, 
250.698665237717, 268.702507606139, 271.79994285232, 301.225577691032, 
301.225577691032, 301.225577691032, 301.225577691032, 311.292242241118, 
311.292242241118, 311.292242241118, 311.292242241118, 311.292242241118, 
311.292242241118, 311.292242241118, 311.292242241118, 311.292242241118, 
311.292242241118, 311.292242241118, 311.292242241118, 321.746086196977, 
321.746086196977, 321.746086196977, 319.616599465228, 308.969165806483, 
285.931991163017, 204.817905653671, 195.332010212244, 170.165348837029, 
170.165348837029, 170.165348837029, 170.165348837029, 37.3628126570497, 
40.8474273090025, 22.456405534807, 22.456405534807, 17.8102526655366, 
17.8102526655366, 17.8102526655366, 17.8102526655366, 17.8102526655366, 
17.8102526655366, 17.8102526655366, 17.8102526655366, 15.6807659337877, 
15.6807659337877, 15.6807659337877, 15.6807659337877, 15.6807659337877, 
15.6807659337877, 15.6807659337877, 15.6807659337877, 15.6807659337877, 
15.6807659337877, 15.6807659337877, 15.6807659337877, 15.6807659337877, 
15.6807659337877, 15.6807659337877, 5.22692197792922, 5.22692197792922, 
5.22692197792922, 5.22692197792922, 5.22692197792922, 5.22692197792922, 
5.22692197792922, 5.22692197792922, 5.22692197792922, 5.22692197792922, 
5.22692197792922, 1.74230732597641, 1.93589702886268, 1.93589702886268, 
4.25897346349789, 2.71025584040775, 2.71025584040775, 68.5307548217387, 
89.6320324363419, 90.0192118421144, 93.310236791181, 93.1166470882947, 
93.5038264940673, 93.8910058998398, 93.8910058998398, 93.8910058998398, 
93.8910058998398, 93.8910058998398, 93.8910058998398, 93.8910058998398, 
93.8910058998398, 93.8910058998398, 93.8910058998398, 93.8910058998398, 
93.8910058998398, 93.8910058998398, 93.8910058998398, 93.8910058998398, 
93.8910058998398, 93.6974161969535, 93.6974161969535, 93.6974161969535, 
93.6974161969535, 93.6974161969535, 93.6974161969535, 93.6974161969535, 
93.6974161969535, 93.6974161969535, 89.2448530305694, 77.2422914516208, 
1.16153821731761, 0, 0, 0, 0), `2_mes_wt` = c(0, 0, 13.3249362857652, 
41.0267775114349, 41.3774337294814, 100.988990797378, 100.988990797378, 
341.188500159198, 343.643093685523, 359.422623497613, 381.163309016493, 
450.242583971645, 450.242583971645, 450.242583971645, 450.242583971645, 
450.242583971645, 450.242583971645, 450.242583971645, 450.242583971645, 
450.242583971645, 450.242583971645, 450.242583971645, 450.242583971645, 
450.242583971645, 450.242583971645, 450.242583971645, 450.242583971645, 
450.242583971645, 450.242583971645, 450.242583971645, 450.242583971645, 
426.047304926439, 366.085091640496, 308.928128098924, 201.977981594756, 
201.977981594756, 205.133887557174, 205.133887557174, 205.133887557174, 
85.5601172033343, 98.8850534890995, 17.1821546842762, 17.1821546842762, 
17.1821546842762, 17.1821546842762, 17.1821546842762, 17.1821546842762, 
16.8314984662297, 16.8314984662297, 16.8314984662297, 16.8314984662297, 
50.8451516167356, 52.5984327069678, 52.5984327069678, 52.5984327069678, 
52.5984327069678, 52.5984327069678, 52.5984327069678, 52.5984327069678, 
52.5984327069678, 52.5984327069678, 52.5984327069678, 52.5984327069678, 
52.5984327069678, 52.5984327069678, 52.5984327069678, 52.5984327069678, 
52.5984327069678, 52.5984327069678, 52.5984327069678, 52.5984327069678, 
52.5984327069678, 52.5984327069678, 52.5984327069678, 52.5984327069678, 
52.5984327069678, 52.5984327069678, 39.2734964212026, 37.5202153309704, 
42.0787461655743, 54.7023700152465, 54.7023700152465, 54.7023700152465, 
113.261958429004, 114.313927083143, 114.313927083143, 116.067208173376, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 82.0535550228698, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 82.0535550228698, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 82.0535550228698, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 82.0535550228698, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 82.0535550228698, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 82.0535550228698, 
82.0535550228698, 82.0535550228698, 82.0535550228698, 74.3391182258479, 
63.4687754664078, 0, 0, 0, 0, 0), nucl = 47738064:47738184, base = c("T", 
"C", "A", "A", "A", "A", "A", "A", "G", "A", "C", "T", "A", "G", 
"T", "C", "A", "A", "G", "T", "G", "C", "A", "G", "T", "A", "G", 
"T", "G", "A", "G", "A", "A", "G", "G", "G", "G", "G", "G", "A", 
"A", "A", "G", "T", "G", "T", "A", "G", "A", "A", "C", "A", "G", 
"G", "A", "G", "T", "T", "C", "A", "A", "T", "C", "T", "G", "T", 
"A", "A", "C", "T", "G", "A", "C", "T", "G", "T", "G", "A", "A", 
"C", "A", "A", "T", "C", "A", "A", "T", "T", "G", "A", "G", "A", 
"T", "A", "A", "C", "T", "C", "A", "C", "T", "A", "C", "C", "T", 
"T", "C", "G", "G", "A", "C", "C", "A", "G", "C", "C", "A", "A", 
"T", "G", "C")), .Names = c("1_mes_wt", "2_mes_wt", "nucl", "base"
), row.names = c(NA, -121L), class = "data.frame")

To show both the sequence (base) and the sequence positions (nucl) I plot the position as factors and then use a text geom to label the axis with the sequence. It works reasonably well:

pdf("so_sample.pdf",width=15,height=7)
ggplot(melt(so.sample,id.vars=c("nucl","base")), aes(factor(nucl), 
       value, group=variable, colour=variable)) + 
  geom_line() + geom_text(y=-5,size=3,aes(label=base))
dev.off()

However, plotting these large numbers as factors causes major overlapping and I don't need to see each position label anyway. So I remove some of the breaks.

pdf("so_sample2.pdf",width=15,height=7)
ggplot(melt(so.sample,id.vars=c("nucl","base")), aes(factor(nucl), 
       value, group=variable, colour=variable)) + 
  geom_line() + geom_text(y=-5,size=3,aes(label=base)) + 
  scale_x_discrete(breaks=seq(min(so.sample$nucl),
                   max(so.sample$nucl),10))
dev.off()

I am almost there, but in removing the breaks I have also removed the minor grid lines that are particularly useful alongside the character sequence. Is there any way of adding minor grid lines independent of the breaks specified in scale_x_discrete?

And one side question: The sequence labeling works because it is plotting the same characters in the exact same position for each grouping, and the final colour of these labels is determined by the order of plotting the groups. I would prefer the sequence to be a neutral colour, but I have no idea if it is even possible to give geom_text these values independent of the groupings.


回答1:


Leaving nucl as is without wrapping it in call to factor allows you to use scale_x_continuous, which unlike scale_x_discrete, has a minor_breaks argument.



来源:https://stackoverflow.com/questions/12370175/adding-grid-lines-to-a-ggplot-independent-of-axis-breaks

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!