数据的分析和可视化工作没有止境!
1. alpha多样性数据导入和整理
数据来自公司给的alpha多样性指数数据表格,主要包括Observe,Chao1,ACE, Shannon和Simpson指数。
#载入包
library(tidyverse)
library(ggsignif) #统计分析并标记显著性
#alpha多样性数据导入和转换
alpha_data <- read.csv('alpha_wide.csv', sep = ',',
stringsAsFactors = FALSE,check.names = FALSE)
#宽数据转化为长数据
alpha_tidy_data <- alpha_data %>%
pivot_longer(-sample, names_to = "alpha_index", values_to = "value")
write.csv(alpha_tidy_data, file = "alpha_tidy_data.csv")
#手动添加了分组信息后再次导入数据
alpha_data <- read.csv('alpha_tidy_data.csv', row.names = 1,
header = TRUE, sep = ',',
stringsAsFactors = FALSE,check.names = FALSE)
head(alpha_data)
使用tidyverse包将宽数据变成了长数据,但使用R进一步整理数据表格能力有限,在长数据中手动添加了一些分组信息。
2. 在不同时间点上对照组和处理组间的多样性指标的差异
#因子排序,对多样性指数指标进行排序
alpha_data$alpha_index <- factor(alpha_data$alpha_index,
levels = c("Observe", "Chao1", "ACE",
"Shannon", "Simpson"),
ordered = TRUE)
#ggplot2画图
library(scales)
library(facetscales) #facetscales包可以控制分面后Y轴刻度
#设置分面后各个部分的Y轴刻度
scales_y <- list(
ACE = scale_y_continuous(limits = c(50, 162), breaks = seq(50, 162, 30)),
Chao1 = scale_y_continuous(limits = c(50, 160), breaks = seq(50, 160, 30)),
Observe = scale_y_continuous(limits = c(50, 155), breaks = seq(50, 155, 30)),
Shannon = scale_y_continuous(limits = c(1.7, 2.5), breaks = seq(1.7, 2.6, 0.25)),
Simpson = scale_y_continuous(limits = c(0.10, 0.40), breaks = seq(0.10, 0.40, 0.1))
)
#修改分面标题
to_string <- as_labeller(c(`1` = "1DPE", `7` = "7DPE",
`14` = "14DPE",`ACE` = "ACE"))
#画图
p_alpha <- ggplot(data = alpha_data, mapping = aes(x = treatment, y = value)) +
geom_violin(mapping = aes(fill = treatment),width = 0.8, size = 0.2) + #小提琴图
geom_boxplot(width = 0.1, linetype = 1, size = 0.2,outlier.size = 0.7) + #箱线图
facet_grid_sc(alpha_data$alpha_index~alpha_data$time, #两个维度上的分面,
#按不同的多样性指数分面,按不同时间分面。
scales = list(y = scales_y),
labeller = to_string) +
scale_fill_manual(values=c("#2874C5", "#EABF00")) + #指定颜色
theme(legend.position="none",
plot.margin =unit(c(2,2,2,0),"mm"),
axis.ticks.y=element_blank(),
panel.grid=element_blank(),
strip.background = element_rect(colour=NA, fill="grey"),
axis.title.x = element_text(size = 16, vjust = 0.5,
hjust = 0.5),
axis.title.y = element_text(size = 18, vjust = 0.5,
hjust = 0.5),
axis.text.x = element_text(angle = 0, size = 10,
vjust = 0.5, hjust = 0.5),
axis.text.y = element_text(size = 15,vjust = 0.5,
hjust = 0.5)) +
theme_bw() + labs(x = '', y = '', fill = "Treatment")+
geom_signif(comparisons = list(c("UN","IR")),
map_signif_level = TRUE,
textsize=3, size = 0.3, vjust = 0)
p_alpha
3. 随着时间变化多样性指标的变化趋势
通过分组拟合数据点展示变化趋势
p_alpha_fit <- ggplot() +
geom_smooth(data = alpha_data, #拟合
mapping = aes(x = time, y = value,
fill = treatment, #拟合线的置信区间的填色
color = treatment, #拟合线的填色
group = treatment), #分组
size = 1.2, level = 0.95, alpha=0.3) +
scale_color_manual(values=c("#2874C5", "#EABF00")) +
scale_fill_manual(values=c("#2874C5", "#EABF00")) +
theme_bw() + facet_grid_sc(rows = vars(alpha_index), #分面
scales = list(y = scales_y)) +
theme(plot.margin =unit(c(2,2,2,0),"mm"),
axis.ticks.y=element_blank(),
plot.title = element_text(size = 12, vjust = 0.5,
hjust=0.5),
axis.title.x = element_text(size = 12, hjust = 0.5,
vjust = 0.5),
axis.title.y = element_text(size = 12, vjust = 0.5,
hjust = 0.5),
axis.text.x = element_text(size = 8,vjust = 0.5,
hjust = 0.5),
axis.text.y = element_text(size = 8,vjust = 0.5,
hjust = 0.5)) +
labs(x = '', y = '', color = "Treatment", title = "") +
guides(fill = "none", color = "none") +
scale_x_continuous(breaks = c(1, 7, 14),
labels = c("1DPE", "7DPE", "14DPE")) #指定X轴刻度的标记
p_alpha_fit
4. 使用patchwork包拼图
之前使用过cowplot包来拼图,最近发现patchwork包拼图更优秀,它使用“+”,“/”,“()”等简单的符号进行拼图,并且不限于ggplot系列的图片。
#将p_alpha和p_alpha_fit拼图
library(patchwork)
(p_alpha + p_alpha_fit) + plot_layout(widths = c(3, 1)) + #以3:1宽度比例拼两张图.
plot_layout(guides = 'collect') #图例自动校正到合适位置
需要进一步学习R中的数据清洗,R for data science这本书还得看好几遍,熟练使用tidyverse中的 dplyr和 tidyr包,强迫自己在R中完成所有的数据处理和转换工作。