引言
在对样本的微生物群落组成多样性分析过程中,除了Alpha多样性与Beta多样性的分析,最重要的一个部分就是对样本中微生物具体的群落结构组成情况进行分析。一般我们从门水平及属水平上计算优势物种的相对丰度并通过表格或者柱状堆积图进行展示。
正文—代码
设置工作目录并加载包
rm(list=ls())#clear Global Environment
setwd('D:\\桌面\\物种丰度计算及可视化')#设置工作路径
#安装包
install.packages("reshape2")
install.packages("ggplot2")
install.packages("ggprism")
install.packages("plyr")
#加载R包
library (reshape2)
library(ggplot2)
library(ggprism)
library(plyr)
门水平的物种丰度计算及可视化
1、读取数据
df1 <- read.table(file="Phylum.txt",sep="\t",header=T,check.names=FALSE)
2、对数据进行处理
##利用循环将其中的重复门数据进行求和
data<-aggregate(E ~ Tax,data=df1,sum)
colnames(data)[2]<-"example"
for (i in colnames(df1)[2:length(colnames(df1))]){
data1<-aggregate(df1[,i]~Tax,data=df1,sum)
colnames(data1)[2]<-i
data<-merge(data,data1,by="Tax")
}
df2<-data[,-2]
rownames(df2)=df2$Tax
df3=df2[,-1]
3、计算物种总丰度并进行降序排列
df3$rowsum <- apply(df3,1,sum)
df4 <- df3[order (df3$rowsum,decreasing=TRUE),]
df5 = df4[,-6]
#求物种相对丰度
df6 <- apply(df5,2,function(x) x/sum(x))
#导出数据
write.table (df9, file ="phylum.csv",sep =",", quote =FALSE)
#变量格式转换,宽数据转化为长数据,方便后续作图
df_phylum <- melt(df6)
names(df_phylum)[1:2] <- c("Taxonomy","sample")
4、绘图
p1 <- ggplot(df_phylum, aes( x = sample,y=100 * value,fill = Taxonomy))+
geom_col(position = 'stack', width = 0.6)+#geom_bar(position = "stack", stat = "identity", width = 0.6)
scale_y_continuous(expand = c(0,0))+#
labs(x="Samples",y="Relative Abundance(%)",
fill="Taxonomy")+
guides(fill=guide_legend(keywidth = 1, keyheight = 1)) +
theme_prism(palette = "candy_bright",
base_fontface = "plain",
base_family = "serif",
base_size = 16,
base_line_size = 0.8,
axis_text_angle = 45)+
scale_fill_prism(palette = "candy_bright")
p1
属水平的物种丰度计算及可视化
1、加载数据
df1 <- read.table(file="Genus.txt",sep="\t",header=T,check.names=FALSE)
2、对数据进行处理
data<-aggregate(E ~ Tax,data=df1,sum)
colnames(data)[2]<-"example"
for (i in colnames(df1)[2:length(colnames(df1))]){
data1<-aggregate(df1[,i]~Tax,data=df1,sum)
colnames(data1)[2]<-i
data<-merge(data,data1,by="Tax")
}
df2<-data[,-2]
rownames(df2)=df2$Tax
df3=df2[,-1]
3、计算物种丰度并降序排列
df3$rowsum <- apply(df3,1,sum)
df4 <- df3[order (df3$rowsum,decreasing=TRUE),]
df5 = df4[,-6]
#求物种相对丰度
df6 <- apply(df5,2,function(x) x/sum(x))
#由于之间已经按照每行的和进行过升序排列,所以可以直接取前10行
df7 <- df6[1:10,]
df8 <- 1-apply(df7, 2, sum) #计算剩下物种的总丰度
#合并数据
df9 <- rbind(df7,df8)
row.names(df9)[11]="Others"
#导出数据
write.table (df9, file ="genus.csv",sep =",", quote =FALSE)
#变量格式转换,宽数据转化为长数据,方便后续作图
df_genus <- melt(df9)
names(df_genus)[1:2] <- c("Taxonomy","sample")
4、绘图
p2 <- ggplot(df_genus, aes( x = sample,y=100 * value,fill = Taxonomy))+
geom_col(position = 'stack', width = 0.6)+#geom_bar(position = "stack", stat = "identity", width = 0.6)
scale_y_continuous(expand = c(0,0))+
labs(x="Samples",y="Relative Abundance(%)",
fill="Taxonomy")+
guides(fill=guide_legend(keywidth = 1, keyheight = 1)) +
theme_prism(palette = "candy_bright",
base_fontface = "plain",
base_family = "serif",
base_size = 16,
base_line_size = 0.8,
axis_text_angle = 45)+
scale_fill_prism(palette = "colors")
p2
拼图
library("cowplot")
plot_grid(p1,p2, labels=c('A','B'), ncol=1, nrow=2)
作图数据及源码请在微信公众号后台回复Re_abundance获取!