Tidyverse course: ggplot Code
clp
29 June, 2020
前言
本教程包含tidyverse
课程的ggplot
部分幻灯片中显示的所有代码。并提供了课程中使用的练习的答案。
加载必要的包及内置数据集
library("tidyverse")
#> Warning: package 'ggplot2' was built under R version 3.6.2
#> Warning: package 'tibble' was built under R version 3.6.2
#> Warning: package 'tidyr' was built under R version 3.6.2
#> Warning: package 'purrr' was built under R version 3.6.2
#> Warning: package 'dplyr' was built under R version 3.6.2
library("ggplot2")
msleep
#> # A tibble: 83 x 11
#> name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Chee… Acin… carni Carn… lc 12.1 NA NA 11.9
#> 2 Owl … Aotus omni Prim… <NA> 17 1.8 NA 7
#> 3 Moun… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
#> 4 Grea… Blar… omni Sori… lc 14.9 2.3 0.133 9.1
#> 5 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
#> 6 Thre… Brad… herbi Pilo… <NA> 14.4 2.2 0.767 9.6
#> 7 Nort… Call… carni Carn… vu 8.7 1.4 0.383 15.3
#> 8 Vesp… Calo… <NA> Rode… <NA> 7 NA NA 17
#> 9 Dog Canis carni Carn… domesticated 10.1 2.9 0.333 13.9
#> 10 Roe … Capr… herbi Arti… lc 3 NA NA 21
#> # … with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
class(msleep)
#> [1] "tbl_df" "tbl" "data.frame"
清洗数据
从msleep
中删除NA
值
msleep %>% filter(!is.na(vore)) -> msleep.clean
msleep.clean
#> # A tibble: 76 x 11
#> name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Chee… Acin… carni Carn… lc 12.1 NA NA 11.9
#> 2 Owl … Aotus omni Prim… <NA> 17 1.8 NA 7
#> 3 Moun… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
#> 4 Grea… Blar… omni Sori… lc 14.9 2.3 0.133 9.1
#> 5 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
#> 6 Thre… Brad… herbi Pilo… <NA> 14.4 2.2 0.767 9.6
#> 7 Nort… Call… carni Carn… vu 8.7 1.4 0.383 15.3
#> 8 Dog Canis carni Carn… domesticated 10.1 2.9 0.333 13.9
#> 9 Roe … Capr… herbi Arti… lc 3 NA NA 21
#> 10 Goat Capri herbi Arti… lc 5.3 0.6 NA 18.7
#> # … with 66 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
绘制散点图(Scatterplot)
基础图形
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total)
)+geom_point() -> scatterplot
scatterplot
其实,我们可以直接从过滤通过管道符%>%
进入ggplot
,而不需要保存中间数据直接出图(这一波操作很秀!)。
msleep %>%
filter(!is.na(vore)) %>%
ggplot(
aes(x=bodywt, y=sleep_total)
)+geom_point()
然后是各种美化策略
加点颜色
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total, colour=vore)
)+geom_point()
另一种加颜色的方法
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total)
)+geom_point(aes(colour=vore))
看上去这个数据比较集中在X轴的2000以内,现在这个显示不出更更多信息,要做到一图胜千言,那就要对X轴进行数据转换,常用的方法有取对数值(with log: log axis)
对X轴取取对数展示
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total, colour=vore)
)+geom_point() -> scatterplot
scatterplot+scale_x_log10()
一个字:秀!
另一个展示方法
ggplot(
msleep.clean,
aes(x=log(bodywt), y=sleep_total,colour=vore)
)+geom_point()
展示更大的点、轴和图形标题
ggplot(
msleep.clean,
aes(x=log(bodywt), y=sleep_total,colour=vore)
) +
geom_point(size=4) +
xlab("Log Body Weight") +
ylab("Total Hours Sleep") +
ggtitle("Some Sleep Data") -> scatterplot
scatterplot
更换显示主题
theme_set(theme_bw(base_size=18))
scatterplot+theme(plot.title = element_text(hjust = 0.5)) -> scatterplot
scatterplot
更改x轴和y轴上的配色方案和刻度,并改进图例 这将添加到先前的图形中,而不是重新创建它。
scatterplot +
scale_colour_brewer(
palette="Set1",
name="Trophic levels",
labels=c("Carnivore", "Herbivore", "Insectivore", "Omnivore")
) +
scale_x_continuous(breaks=-5:10) +
scale_y_continuous(breaks=seq(0,20, 2)) -> scatterplot
scatterplot
手动更改颜色
scatterplot +
scale_color_manual(
values=c("chocolate3", "chartreuse3", "darkorchid2","cyan3"),
name="Trophic levels",
labels=c("Carnivore", "Herbivore", "Insectivore", "Omnivore")
) -> scatterplot
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
scatterplot
超喜欢这个“Insectivore”颜色
接下来就是练习题了
练习1
文件up_down_expression sion.txt
包含一个表达矩阵,该数据集带有一个额外的列,该列将行分类为3组(上调、下调或不变)。 加载up_down_expression sion.txt
检查下文件的结构并绘制散点图geom_point()
:红色表示上调,蓝色下调灰色不变, - 主标题:表达数据
- 颜色图例:下调,不变,上调
- 轴标签:条件1
和条件2
expression <- read_tsv("up_down_expression.txt")
expression
#> # A tibble: 5,196 x 4
#> Gene Condition1 Condition2 State
#> <chr> <dbl> <dbl> <chr>
#> 1 A4GNT -3.68 -3.44 unchanging
#> 2 AAAS 4.55 4.39 unchanging
#> 3 AASDH 3.72 3.48 unchanging
#> 4 AATF 5.08 5.02 unchanging
#> 5 AATK 0.471 0.560 unchanging
#> 6 AB015752.4 -3.68 -3.59 unchanging
#> 7 ABCA7 3.45 3.83 unchanging
#> 8 ABCA9-AS1 -3.68 -3.59 unchanging
#> 9 ABCC11 -3.53 -1.86 unchanging
#> 10 ABCC3 0.931 3.26 up
#> # … with 5,186 more rows
expression.scatter<-ggplot(expression, aes(Condition1, Condition2, colour=State))+
geom_point()+
scale_colour_manual(values=c("blue", "grey", "red"),
name="State",
labels=c("Down", "Unchanging", "Up"))+
xlab("Condition 1") +
ylab("Condition 2") +
ggtitle("Expression data")+
theme(plot.title = element_text(hjust = 0.5))
expression.scatter
现在,让我们尝试另一种类型的图:条形图。它类似于散点图,但x变量本质上是定性的或绝对的。
Stripchart
ggplot(
msleep.clean,
aes(vore, sleep_total)
)+geom_point()
抖动,变大,上色 jitter, bigger points and colours
ggplot(
msleep.clean,
aes(vore,sleep_total, colour=vore)
) + geom_point(size=4,position="jitter")
调节抖动的范围
ggplot(
msleep.clean,
aes(vore, sleep_total, colour=vore)
) +
geom_jitter(
width = .2,
size=4
) -> stripchart
stripchart
为平均值添加一条线,并为y轴添加标题
stripchart +
stat_summary(
fun.y="mean",
geom='errorbar',
aes(ymin=..y.., ymax=..y..),
width=0.6,
size=1.5,
colour="grey25"
) -> stripchart
#> Warning: `fun.y` is deprecated. Use `fun` instead.
stripchart
箱式图的雏形
一小段计算平均值(mean)和标准误(SEm)的tidyverse
式的代码
msleep.clean %>%
group_by(vore) %>%
summarise(sleep=mean(sleep_total), sem=sd(sleep_total)/sqrt(n()))
#> # A tibble: 4 x 3
#> vore sleep sem
#> <chr> <dbl> <dbl>
#> 1 carni 10.4 1.07
#> 2 herbi 9.51 0.862
#> 3 insecti 14.9 2.65
#> 4 omni 10.9 0.659
继续美化
stripchart +
ylab("Total Hours Sleep") +
xlab("Trophic Levels") +
ggtitle("Some Sleep Data") +
scale_y_continuous(breaks=seq(0, 20, 2)) +
scale_x_discrete(labels=c("Carnivore", "Herbivore", "Insectivore", "Omnivore")) +
theme(legend.position = "none") -> stripchart
stripchart
同前
stripchart +
scale_colour_brewer(palette="Dark2")+
scale_x_discrete(
limit=c("insecti","omni","carni", "herbi"),
labels=c("Insectivore", "Herbivore", "Carnivore", "Omnivore"))+
theme(plot.title = element_text(hjust = 0.5)
) -> stripchart
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
stripchart
library("ggthemes")
## Warning: package 'ggthemes' was built under R version 3.5.3
stripchart+
theme_wsj()+
scale_colour_wsj("colors6")+
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
现在,让我们尝试一些其他的数据。DownloadFestival数据记录了为期三天的音乐节期间810名音乐会观众的hygiene scores(0-5)。
读入数据进行预处理
read_csv("DownloadFestival.csv") -> festival.data
festival.data
#> # A tibble: 810 x 5
#> ticknumb gender day1 day2 day3
#> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 2111 Male 2.64 1.35 1.61
#> 2 2229 Female 0.97 1.41 0.290
#> 3 2338 Male 0.84 NA NA
#> 4 2384 Female 3.03 NA NA
#> 5 2401 Female 0.88 0.08 NA
#> 6 2405 Male 0.85 NA NA
#> 7 2467 Female 1.56 NA NA
#> 8 2478 Female 3.02 NA NA
#> 9 2490 Male 2.29 NA NA
#> 10 2504 Female 1.11 0.44 0.55
#> # … with 800 more rows
max(festival.data$day1)
#> [1] 3.69
Histogram
ggplot(
festival.data,
aes(day1)
)+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
稍美化一下
ggplot(
festival.data,
aes(day1)
)+geom_histogram(binwidth=0.3)
进一步美化
ggplot(
festival.data,
aes(day1)
)+geom_histogram(binwidth=0.3, color="black", fill="yellow")+
labs(x="Score", y="Counts")+
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Hygiene at Day 1") -> Day1Histogram
Day1Histogram
现在我们想要画出所有3天每个性别情况。所以对数据进行清洗(reshape)。我们还将删除NAs
。
festival.data %>%
gather(day,score,-ticknumb,-gender) -> festival.data.stack
festival.data.stack %>% filter(!is.na(score)) -> festival.data.stack
festival.data.stack
#> # A tibble: 1,197 x 4
#> ticknumb gender day score
#> <dbl> <chr> <chr> <dbl>
#> 1 2111 Male day1 2.64
#> 2 2229 Female day1 0.97
#> 3 2338 Male day1 0.84
#> 4 2384 Female day1 3.03
#> 5 2401 Female day1 0.88
#> 6 2405 Male day1 0.85
#> 7 2467 Female day1 1.56
#> 8 2478 Female day1 3.02
#> 9 2490 Male day1 2.29
#> 10 2504 Female day1 1.11
#> # … with 1,187 more rows
ggplot(festival.data.stack,aes(score))+
geom_histogram(binwidth=0.3, color="black", fill="yellow")+
labs(x="Hygiene score", y="Counts")+
facet_grid(gender~day) -> histogram.3days
histogram.3days
可以修改小分面(facets)的标签。下面是一些示例。
histogram.3days<-ggplot(festival.data.stack,aes(score))+
geom_histogram(binwidth=0.3, color="black", fill="yellow")+
labs(x="Hygiene score", y="Counts")+
facet_grid(gender~day)+
theme(strip.text.x = element_text(size = 16, colour = "purple", face="bold"),
strip.text.y = element_text(size=12, face="bold"))
histogram.3days
密度图
density.3days<-ggplot(festival.data.stack, aes(score))+
geom_density(aes(group=day, fill=day), alpha=0.5)+
facet_grid(~gender)
density.3days
练习 2: Plot a stripchart representing all 3 days and each gender
stripchart <-ggplot(festival.data.stack, aes(gender, score, colour=gender))+
facet_grid(~day)+
geom_point(position="jitter")+
scale_colour_manual(values=c("darkorange", "darkorchid4"))+
stat_summary(geom='errorbar',fun.y=mean, aes(ymin=..y.., ymax=..y..),
colour="black", width=0.8, size=1.5)+
labs(x="Gender", y="Score")+
theme(legend.position = "none")
#> Warning: `fun.y` is deprecated. Use `fun` instead.
stripchart
从条形图中,我们可以为平均值或任何其他描述性地理位置添加一条线作为统计汇总。
具体操作分2步:
stripchart<-ggplot(festival.data.stack, aes(gender, score,colour=gender))+facet_grid(~day)+
geom_point(position="jitter")+
scale_colour_manual(values=c("darkorange", "darkorchid4"))+
labs(x="Gender", y="Score")+
theme(legend.position = "none")
stripchart
加均值线
stripchart+
stat_summary(fun.y="mean",geom="errorbar", aes(ymin=..y.., ymax=..y..), width=0.8, colour="black", size = 1.3)
#> Warning: `fun.y` is deprecated. Use `fun` instead.
加个框框
stripchart+
geom_boxplot(alpha=0, colour="black")
进一步美化
stripchart+
geom_boxplot(aes(gender, score, fill=gender), alpha=0.5, colour="black")+
scale_fill_manual(values=c("darkorange", "darkorchid4"))
说到把图表做得更漂亮,我们可以改进箱式图(boxplot)。
如果需要,我们可以更改x轴上的顺序:boxplot+scale_x_discrete(limits=c(“Male”,“Female”))
boxplot<-ggplot(festival.data.stack, aes(gender,score))+
geom_boxplot()+
facet_grid(~day)
boxplot
boxplot <-ggplot(festival.data.stack, aes(gender,score, fill=gender))+
facet_grid(~day)+
stat_boxplot(geom="errorbar", width=0.5)+
geom_boxplot(outlier.shape=8)+
theme(legend.position = "none")+
scale_fill_manual(values=c("sienna1","darkorchid3 "))+
labs(x="Gender", y="Score")
boxplot
Violinplot (beanplot) 小提琴图
stripchart+
geom_violin(alpha=0, colour="black")
基础款
violinplot<-ggplot(festival.data.stack, aes(gender,score))+geom_violin()+facet_grid(~day)
violinplot
美化版
violinplot<-ggplot(festival.data.stack, aes(gender,score,fill=gender))+
facet_grid(~day)+
geom_violin(trim = FALSE)+
scale_fill_manual(values=c("goldenrod2","darkgrey"))+
theme(legend.position="none")+
stat_summary(fun.y=median, geom="point", size=2, color="black")+
labs(x="Gender", y="Hygiene scores")
#> Warning: `fun.y` is deprecated. Use `fun` instead.
violinplot
叠加箱式图
violinplot+geom_boxplot(width=0.3)
violinplot+geom_jitter(width=0.1,size=1, shape=1)
Barchart 柱状图:首先,我们要计算平均值和sem,并将这些值存储在一个文件中。
festival.data.stack %>%
group_by(gender,day) %>%
summarise(mean=mean(score), sem=sd(score)/sqrt(n())) -> score.sem
score.sem
#> # A tibble: 6 x 4
#> # Groups: gender [2]
#> gender day mean sem
#> <chr> <chr> <dbl> <dbl>
#> 1 Female day1 1.88 0.0316
#> 2 Female day2 1.08 0.0608
#> 3 Female day3 1.10 0.0990
#> 4 Male day1 1.60 0.0362
#> 5 Male day2 0.773 0.0585
#> 6 Male day3 0.829 0.0721
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(stat="identity")
barchart
加误差线
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(stat="identity", position="dodge")+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), position="dodge")
barchart
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(position="dodge", stat="identity")+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), position="dodge")
barchart
美化
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(position="dodge", colour="black",stat="identity",size=1)+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.5, position=position_dodge(0.9), size=1)+
ylab("Mean scores")+
ggtitle("Levels of hygiene over 3 days of concert")+
theme(plot.title = element_text(hjust = 0.5))+
theme(plot.title = element_text(size = 19))+
theme(axis.title.x=element_blank())+
scale_fill_manual(values=c("darkorange3", "darkorchid4"), name="Gender")
barchart
Linegraph 折线图
linegraph<-ggplot(score.sem, aes(day, mean, group=gender))+
geom_line()+
geom_point()+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem))
linegraph
美化
linegraph<-ggplot(score.sem, aes(day,mean, colour=gender, group=gender))+
geom_line(size=1.5)+
geom_point(size=4)+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.2, size=1.5)
linegraph
进一步美化
linegraph<-ggplot(score.sem, aes(day,mean, colour=gender, group=gender))+
geom_line(size=1.5)+
geom_point(size=5)+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.2, size=1.5)+
labs(x="", y="Mean scores")+
scale_y_continuous(breaks=seq(0, 2, 0.2))+
ggtitle("Levels of hygiene over 3 days of concert")+
theme(plot.title = element_text(hjust = 0.5))+
scale_colour_manual(values=c("purple","darkorange3"), name="")+
theme(legend.position = c(0.85, 0.9))+
theme(legend.text=element_text(size=14))+
theme(legend.background = element_rect(fill = "transparent"))
linegraph
练习 3 该文件包含3个不同数据集(一个WT和两个mutants)的positional count数据。 绘制一个图,显示同一图下的所有3个数据集 先读Chrometic_position_data.txt
检查文件的结构, 用gather()
将文件从宽格式重新构造为长格式, 重命名列:Gentype
和Value
绘制基本折线图.
chromosome<-read_tsv("chromosome_position_data.txt")
chromosome
#> # A tibble: 184 x 4
#> Position Mut1 Mut2 WT
#> <dbl> <dbl> <dbl> <dbl>
#> 1 91757273 2.71 1.34 1.25
#> 2 91757323 2.71 1.3 1.25
#> 3 91757373 5.41 1.14 1.25
#> 4 91757423 2.71 1.58 1.88
#> 5 91757473 2.71 1.19 1.25
#> 6 91757523 2.71 2.82 2.5
#> 7 91757573 2.71 3.15 3.75
#> 8 91757623 0 4.05 3.75
#> 9 91757673 0 2.94 3.12
#> 10 91757723 0 2.69 3.12
#> # … with 174 more rows
chromosome %>%
gather(Genotype, Value,-Position) -> chromosome.long
chromosome.long
#> # A tibble: 552 x 3
#> Position Genotype Value
#> <dbl> <chr> <dbl>
#> 1 91757273 Mut1 2.71
#> 2 91757323 Mut1 2.71
#> 3 91757373 Mut1 5.41
#> 4 91757423 Mut1 2.71
#> 5 91757473 Mut1 2.71
#> 6 91757523 Mut1 2.71
#> 7 91757573 Mut1 2.71
#> 8 91757623 Mut1 0
#> 9 91757673 Mut1 0
#> 10 91757723 Mut1 0
#> # … with 542 more rows
chromosome.linegraph<-ggplot(chromosome.long, aes(x=Position, y=Value, group=Genotype, colour=Genotype))+
geom_line(size=2)
chromosome.linegraph
画一张图表,显示一个典型婴儿在出生后9个月的年龄和体重之间的关系。 读入数据weight_chart.txt
检查文件结构 绘制基本折线图绘制更漂亮的版本:更改点的大小和颜色更改线的粗细和颜色 更改y轴:比例从2 kg更改为10 kg 更改x轴:比例从0 t 10个月更改两个轴上的标签 向图表添加标题
weight<-read_tsv("weight_chart.txt")
weight
#> # A tibble: 10 x 2
#> Age Weight
#> <dbl> <dbl>
#> 1 0 3.6
#> 2 1 4.4
#> 3 2 5.2
#> 4 3 6
#> 5 4 6.6
#> 6 5 7.2
#> 7 6 7.8
#> 8 7 8.4
#> 9 8 8.8
#> 10 9 9.2
#基础
weight.linegraph<-ggplot(weight, aes(Age, Weight))+
geom_line()+
geom_point()
weight.linegraph
#美化
weight.linegraph<-ggplot(weight, aes(Age, Weight))+
geom_line(size=1, colour="lightblue2")+
geom_point(shape=16, size=3, colour="darkorchid1")+
scale_y_continuous(breaks=2:10, limits = c(2, 10))+
scale_x_continuous(breaks=0:10, limits = c(0, 10))+
labs(x="Age (months)", y="Weight (kg)")+
ggtitle("Relation between age and weight")+
theme(plot.title = element_text(hjust = 0.5))
weight.linegraph
练习 5 文件brain_bodyweight.txt
包含一系列物种的log10大脑和体重数据,以及每个点的SEM测量结果。 将这些数据绘制在带有误差条的散点图上,显示每个点下的平均值+/-SEM和数据集的名称。 读入brain_bodyweitt.txt
检查文件的结构,绘制一个基本的图形。绘制一个更漂亮的版本,对于水平误差线需要更改:geom_barh()
,对于标签需要更改:geom_text()
。
由于从教程中获取的.txt文档有点问题,所以费了点时间
options(stringsAsFactors = F)
brain.bodyweight<- read.csv("brain_bodyweight.txt",sep=',',header = T)
brain.bodyweight
#> Species Bodyweight Brainweight Bodyweight.SEM Brainweight.SEM X
#> 1 Cow 2.670 2.630 0.12500 0.12700 NA
#> 2 Goat 1.440 2.060 0.01930 0.14200 NA
#> 3 Guinea Pig 0.017 0.740 0.00163 0.03590 NA
#> 4 Diplodocus 4.070 1.700 0.10100 0.13400 NA
#> 5 Horse 2.720 2.820 0.00367 0.16900 NA
#> 6 Cat 0.519 1.410 0.02920 0.10200 NA
#> 7 Gorilla 2.320 2.610 0.02680 0.24300 NA
#> 8 Human 1.790 3.120 0.03610 0.23100 NA
#> 9 African Elephant 3.820 3.760 0.11200 0.32900 NA
#> 10 Rhesus Monkey 0.833 2.250 0.04650 0.14600 NA
#> 11 Kangaroo 1.540 1.750 0.01750 0.05870 NA
#> 12 Hamster -0.921 0.100 0.02740 0.00981 NA
#> 13 Mouse -1.640 -0.398 0.06860 0.02340 NA
#> 14 Rabbit 0.398 1.080 0.01150 0.06690 NA
#> 15 Sheep 1.740 2.240 0.04100 0.13600 NA
#> 16 Chimpanzee 1.720 2.640 0.01640 0.23800 NA
#> 17 Brachiosaurus 4.940 2.190 0.26900 0.11200 NA
#> 18 Rat -0.553 0.279 0.03370 0.00188 NA
#> 19 Mole -0.914 0.477 0.06170 0.04770 NA
#> 20 Pig 2.280 2.260 0.01330 0.16000 NA
brain.bodyweight=brain.bodyweight[,-6]
brain.bodyweight
#> Species Bodyweight Brainweight Bodyweight.SEM Brainweight.SEM
#> 1 Cow 2.670 2.630 0.12500 0.12700
#> 2 Goat 1.440 2.060 0.01930 0.14200
#> 3 Guinea Pig 0.017 0.740 0.00163 0.03590
#> 4 Diplodocus 4.070 1.700 0.10100 0.13400
#> 5 Horse 2.720 2.820 0.00367 0.16900
#> 6 Cat 0.519 1.410 0.02920 0.10200
#> 7 Gorilla 2.320 2.610 0.02680 0.24300
#> 8 Human 1.790 3.120 0.03610 0.23100
#> 9 African Elephant 3.820 3.760 0.11200 0.32900
#> 10 Rhesus Monkey 0.833 2.250 0.04650 0.14600
#> 11 Kangaroo 1.540 1.750 0.01750 0.05870
#> 12 Hamster -0.921 0.100 0.02740 0.00981
#> 13 Mouse -1.640 -0.398 0.06860 0.02340
#> 14 Rabbit 0.398 1.080 0.01150 0.06690
#> 15 Sheep 1.740 2.240 0.04100 0.13600
#> 16 Chimpanzee 1.720 2.640 0.01640 0.23800
#> 17 Brachiosaurus 4.940 2.190 0.26900 0.11200
#> 18 Rat -0.553 0.279 0.03370 0.00188
#> 19 Mole -0.914 0.477 0.06170 0.04770
#> 20 Pig 2.280 2.260 0.01330 0.16000
brain.bodyweight.graph<-ggplot(brain.bodyweight, aes(x=Bodyweight, y=Brainweight))+
geom_point()+
geom_errorbar(aes(ymin=Brainweight-Brainweight.SEM, ymax=Brainweight+Brainweight.SEM))+
geom_errorbarh(aes(xmin=Bodyweight-Bodyweight.SEM, xmax=Bodyweight+Bodyweight.SEM))+
geom_text(aes(label=Species), hjust = 1.05, vjust = -0.6, size=2.7)
brain.bodyweight.graph
brain.bodyweight.graph<-ggplot(brain.bodyweight, aes(x=Bodyweight, y=Brainweight))+
geom_point()+
geom_errorbar(aes(ymin=Brainweight-Brainweight.SEM, ymax=Brainweight+Brainweight.SEM), width=.1, size=1, colour="tomato3")+
geom_errorbarh(aes(xmin=Bodyweight-Bodyweight.SEM, xmax=Bodyweight+Bodyweight.SEM), height=.1, size=1, colour="tomato3")+
geom_point(size=2)+
geom_text(aes(label=Species), hjust = 1.1, vjust = -0.6, size=2.7)
brain.bodyweight.graph
进一步美化
library("ggrepel")
## Warning: package 'ggrepel' was built under R version 3.5.3
ggplot(brain.bodyweight, aes(x=Bodyweight, y=Brainweight))+
geom_errorbar(aes(ymin=Brainweight-Brainweight.SEM, ymax=Brainweight+Brainweight.SEM),
width=.1, size=0.5, colour="grey28")+
geom_errorbarh(aes(xmin=Bodyweight-Bodyweight.SEM, xmax=Bodyweight+Bodyweight.SEM),
height=.1, size=0.5, colour="grey28")+
geom_point(shape=21, size=3, colour="black", fill="maroon3")+
geom_label_repel(aes(label = Species), box.padding=0.6, point.padding =0.5,
fill="mintcream", segment.colour="grey", size=3) -> brain.bodyweight.graph
brain.bodyweight.graph
Stacked bar: categorical data 堆叠柱状图
Changing<-read_csv("Changing.csv")
Changing
#> # A tibble: 60 x 3
#> Type.of.Behaviour Sample.Size Stage.of.Change
#> <chr> <dbl> <chr>
#> 1 Smoking cessation 108 Precontemplation
#> 2 Smoking cessation 187 Contemplation
#> 3 Smoking cessation 0 Preparation
#> 4 Smoking cessation 134 Action
#> 5 Smoking cessation 247 Maintenance
#> 6 Quitting cocaine 8 Precontemplation
#> 7 Quitting cocaine 15 Contemplation
#> 8 Quitting cocaine 0 Preparation
#> 9 Quitting cocaine 71 Action
#> 10 Quitting cocaine 62 Maintenance
#> # … with 50 more rows
stackedBar<-ggplot(Changing, aes(Type.of.Behaviour, Sample.Size, fill=Stage.of.Change))+
geom_bar(stat="identity")
stackedBar
更改比较的顺序:factor(variable name, levels = c(“”, “” .)) 。 旋转图表以读取x轴标签:coord_flip()
Changing$Stage.of.Change <- factor(Changing$Stage.of.Change, levels = c("Maintenance","Action","Preparation","Contemplation","Precontemplation"))
stackedBar<-ggplot(Changing, aes(Type.of.Behaviour, Sample.Size, fill = Stage.of.Change))+
geom_bar(stat="identity", colour="black")+
coord_flip()
stackedBar
进一步美化
stackedBar<-stackedBar+
labs(title="Stages for Each of the 12 Problem Behaviours", y="Sample Size", fill="Stages of Change")+
theme(plot.title = element_text(hjust = 0.5, size=12, face="bold"))+
theme(axis.title.y=element_blank())+
scale_fill_brewer(palette = 4)+
theme(axis.text.x = element_text(size=10), axis.text.y = element_text(size=9))+
theme(legend.text=element_text(size=8), legend.title=element_text(size=10, face="bold"))+
theme(axis.title.x = element_text(size=10))
stackedBar
提高对比度
stackedBar+scale_fill_brewer(palette="RdYlGn", direction=-1)
练习 6 让我们将相同的数据绘制为百分比,将变化的数据绘制为百分比。 将文件格式更改为应急xtabs()
计算百分比prop.table()
将格式更改为dataframe as.data.frame()
检查前几行head()
像以前一样使用不同的调色板绘制数据
contingency.table100<-prop.table(xtabs(Sample.Size~Type.of.Behaviour+Stage.of.Change, Changing),1)*100
contingency.table100
#> Stage.of.Change
#> Type.of.Behaviour Maintenance Action Preparation Contemplation
#> Adolescent delinquency 25.786164 27.044025 0.000000 28.930818
#> Condom use 35.294118 6.191950 0.000000 17.956656
#> Exercise acquisition 19.386332 14.086471 25.383543 33.751743
#> High fat diet 56.666667 2.777778 0.000000 17.777778
#> Mammography screening 42.553191 18.439716 0.000000 17.021277
#> Physicians'practices 49.629630 1.481481 2.222222 14.814815
#> Quitting cocaine 39.743590 45.512821 0.000000 9.615385
#> Radon gas exposure 0.000000 8.166189 0.000000 17.335244
#> Safer sex 0.000000 47.887324 0.000000 7.981221
#> Smoking cessation 36.538462 19.822485 0.000000 27.662722
#> Sunscreen use 35.242291 4.405286 0.000000 7.929515
#> Weight control 14.634146 17.886179 0.000000 52.845528
#> Stage.of.Change
#> Type.of.Behaviour Precontemplation
#> Adolescent delinquency 18.238994
#> Condom use 40.557276
#> Exercise acquisition 7.391911
#> High fat diet 22.777778
#> Mammography screening 21.985816
#> Physicians'practices 31.851852
#> Quitting cocaine 5.128205
#> Radon gas exposure 74.498567
#> Safer sex 44.131455
#> Smoking cessation 15.976331
#> Sunscreen use 52.422907
#> Weight control 14.634146
Changing.percent<-as.data.frame(contingency.table100)
Changing.percent
#> Type.of.Behaviour Stage.of.Change Freq
#> 1 Adolescent delinquency Maintenance 25.786164
#> 2 Condom use Maintenance 35.294118
#> 3 Exercise acquisition Maintenance 19.386332
#> 4 High fat diet Maintenance 56.666667
#> 5 Mammography screening Maintenance 42.553191
#> 6 Physicians'practices Maintenance 49.629630
#> 7 Quitting cocaine Maintenance 39.743590
#> 8 Radon gas exposure Maintenance 0.000000
#> 9 Safer sex Maintenance 0.000000
#> 10 Smoking cessation Maintenance 36.538462
#> 11 Sunscreen use Maintenance 35.242291
#> 12 Weight control Maintenance 14.634146
#> 13 Adolescent delinquency Action 27.044025
#> 14 Condom use Action 6.191950
#> 15 Exercise acquisition Action 14.086471
#> 16 High fat diet Action 2.777778
#> 17 Mammography screening Action 18.439716
#> 18 Physicians'practices Action 1.481481
#> 19 Quitting cocaine Action 45.512821
#> 20 Radon gas exposure Action 8.166189
#> 21 Safer sex Action 47.887324
#> 22 Smoking cessation Action 19.822485
#> 23 Sunscreen use Action 4.405286
#> 24 Weight control Action 17.886179
#> 25 Adolescent delinquency Preparation 0.000000
#> 26 Condom use Preparation 0.000000
#> 27 Exercise acquisition Preparation 25.383543
#> 28 High fat diet Preparation 0.000000
#> 29 Mammography screening Preparation 0.000000
#> 30 Physicians'practices Preparation 2.222222
#> 31 Quitting cocaine Preparation 0.000000
#> 32 Radon gas exposure Preparation 0.000000
#> 33 Safer sex Preparation 0.000000
#> 34 Smoking cessation Preparation 0.000000
#> 35 Sunscreen use Preparation 0.000000
#> 36 Weight control Preparation 0.000000
#> 37 Adolescent delinquency Contemplation 28.930818
#> 38 Condom use Contemplation 17.956656
#> 39 Exercise acquisition Contemplation 33.751743
#> 40 High fat diet Contemplation 17.777778
#> 41 Mammography screening Contemplation 17.021277
#> 42 Physicians'practices Contemplation 14.814815
#> 43 Quitting cocaine Contemplation 9.615385
#> 44 Radon gas exposure Contemplation 17.335244
#> 45 Safer sex Contemplation 7.981221
#> 46 Smoking cessation Contemplation 27.662722
#> 47 Sunscreen use Contemplation 7.929515
#> 48 Weight control Contemplation 52.845528
#> 49 Adolescent delinquency Precontemplation 18.238994
#> 50 Condom use Precontemplation 40.557276
#> 51 Exercise acquisition Precontemplation 7.391911
#> 52 High fat diet Precontemplation 22.777778
#> 53 Mammography screening Precontemplation 21.985816
#> 54 Physicians'practices Precontemplation 31.851852
#> 55 Quitting cocaine Precontemplation 5.128205
#> 56 Radon gas exposure Precontemplation 74.498567
#> 57 Safer sex Precontemplation 44.131455
#> 58 Smoking cessation Precontemplation 15.976331
#> 59 Sunscreen use Precontemplation 52.422907
#> 60 Weight control Precontemplation 14.634146
绘制百分比图
stackedBar.percent<-ggplot(Changing.percent,aes(Type.of.Behaviour, Freq, fill = Stage.of.Change))+
geom_bar(stat="identity",colour="black")+
coord_flip()+
scale_fill_brewer(palette = "Spectral", direction=-1)+
labs(title="Stages for Each of the 12 Problem Behaviours", y="Frequencies")+
theme(axis.title.y=element_blank())+
theme(plot.title = element_text(hjust = 0.5, size=12, face="bold"))+
theme(axis.text.x = element_text(size=10), axis.text.y = element_text(size=9))+
theme(legend.text=element_text(size=8), legend.title=element_text(size=10, face="bold"))+
theme(axis.title.x = element_text(size=10))
stackedBar.percent
保存图片
stackedBar_percent <-ggsave(stackedBar.percent, file="stackedBar_percent.png")
本教程所用数据部分来自http://www.bioinformatics.babraham.ac.uk/training.html