前面介绍了微生物多样性如何整合数据并进行了基础可视化分析,现在让我们开始进行更加深入的相关性分析和差异分析(相关性热图,RDA,CCA,Lefse)
1. 相关性热图
一般我们主要绘制属水平物种与理化因子的相关性热图
ps
phyloseq-class experiment-level object
otu_table() OTU Table: [ 3365 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 2 sample variables ]
tax_table() Taxonomy Table: [ 3365 taxa by 7 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 3365 tips and 3363 internal nodes ]
依然使用我们转化为phyloseq
的对象的数据集,首先让我们导出属水平物种组成表:
pacman::p_load(tidyverse,phyloseq,MicrobiotaProcess,magrittr)
phytax <- get_taxadf(ps, taxlevel=6)
phytax
p <- phyloseq::otu_table(phytax) %>% as.data.frame()
write.table (p,file ="genus.xls", sep ="\t", row.names = T)
导出数据之后我们要对属水平物种组成表进行过滤,把丰度占比低于1%的归入others类
让我们自定义一个函数对其进行过滤
computed_persent <- function(path) {
data <- path %>%
read.delim(check.names = FALSE, row.names = 1)
data2 <- data %>%
mutate(sum = rowSums(.), persent = sum / sum(sum) * 100, sum = NULL,) %>%
rbind(filter(., persent < 1) %>% colSums()) %>%
mutate(OTU_ID = c(data %>% rownames(), "others"))
filter(data2[1:(nrow(data2) - 1),], persent > 1) %>%
rbind(data2[nrow(data2),]) %>%
select(ncol(.), 1:(ncol(.) - 2)) %>%
set_rownames(seq_len(nrow(.))) %>%
return()
}
path <- "genus.xls" #此处写入的是文件路径
ALL_genus <- computed_persent(path)
write.table (ALL_genus,file ="ALL_genus.xls", sep ="\t", row.names =F)
经过上面的步骤我们完成了对数据的过滤,接下来让我们用属水平的物种组成表结合理化因子数据绘制相关性热图:
2.绘制相关性热图:
library(pacman)
pacman::p_load(tidyverse,reshape,psych,ggtree,aplot)
#注:2张表的行名,名称与顺序必须一致
table1 <- read.table("data1.xls",
sep="\t",header = T,
row.names = 1,check.names = F)
table2 <- read.table("data2.xls",
sep="\t",header = T,
check.names = F,row.names = 1)
pp <- corr.test(table1,table2,method="pearson",adjust = "fdr")
#抽提出相关性系数与P值
cor <- pp$r
pvalue <- pp$p
#根据P值用循环将P值做成*
display <- pvalue
l1 <- nrow(display);l2 <- ncol(display)
for(i in 1:l1){
for(k in 1:l2){
a <- as.numeric(display[i,k])
if(a <= 0.001){
a <- ""
}
if( 0.001 < a && a <= 0.01){
a <- "**"
}
if(0.01 < a && a < 0.05){
a <- "*"
}
if(a >= 0.05){
a <- ""
}
display[i,k] <- a
}
}
#对数据进行格式转换适用于ggplot2绘图
heatmap <- melt(cor)
#对数据进行格式转换适用于ggplot2绘图
heatmap <- melt(cor)
heatmap[,4] <- melt(pvalue)[,3]
heatmap[,5] <- melt(display)[,3]
names(heatmap) <- c("sample","gene","cor","pvalue","display")
#导出数据
write.table (heatmap,file ="heatmap.xls", sep ="\t", row.names = F)
#根据相关性系数矩阵,用ggtree绘制行聚类与列聚类树
phr <- hclust(dist(cor)) %>%
ggtree(layout="rectangular", branch.length="none")
phc <- hclust(dist(t(cor))) %>% ggtree() + layout_dendrogram()
#ggplot2绘制热图
pp <- ggplot(heatmap,aes(gene,sample,fill=cor)) +
geom_tile()+
theme_minimal()+scale_fill_viridis_c()+
geom_text(aes(label=display),size=5,color="white")+
scale_y_discrete(position="right")+xlab(NULL) + ylab(NULL)+
theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.6))+
theme(axis.text.x=element_text(family = "Times",
face = "italic",colour = "black",size=10))+
theme(axis.text.y=element_text(family = "Times",
face = "italic",colour = "black",size=10))+
theme(legend.text=element_text(face="plain",family = "Times",
colour = "black",size = 10))+
labs(fill = "expr")
#通过aplot包将聚类树与热图拼接
pp %>% insert_left(phr, width=.1) %>%
insert_top(phc, height=.1)
经过上面两步我们完成了相关性热图的分析及可视化,接下来进行RDA,CCA的分析