轻轻松松的完成数据分析,高高兴兴的发论文,一个完美的PCA分析流程呈现给各位,喜欢请关注,点赞;后面的内容更精彩!!!
pacman::p_load(tidyverse,ggrepel,FactoMineR,magrittr)
da <- function(da) {
(da / (da %>% colSums() %>% as.numeric() %>% rep(each = nrow(da)) %>%
matrix(ncol = ncol(da), nrow = nrow(da)))) %>% t() %>% return()
}
da <- "data.xls" %>%
read.delim(header = T, check.names = F, row.names = 1, sep = "\t") %>% da()
groups <- "group2.txt" %>% read.delim(header = T, sep = "\t")
pca <- da %>%
PCA(scale.unit = F, graph = F)
pca %$% write.csv(ind$coord, file = "pca.xls")
group <- NULL
for (i in seq_along(groups$Sample)) {
group[i] <- groups$Group[rownames(pca$ind$coord)[i] %>%
grep(groups$Sample)]
}
if (ncol(groups) == 2) {
pcadata <- data.frame(rownames(pca$ind$coord),
pca$ind$coord[, 1],pca$ind$coord[, 2], group) %>%
set_colnames(c("sample", "PC1", "PC2", "group"))
}
ggplot(pcadata, aes(PC1,PC2)) +
geom_point(aes(fill = group, color = group), size = 4) +
geom_vline(aes(xintercept = 0), linetype = "dotted") +
geom_hline(aes(yintercept = 0), linetype = "dotted") +
labs(title = "PCA Plot",
x = (floor(pca$eig[1,2] * 100) / 100) %>%
paste0("PC1 ( ", ., "%", " )"),
y = (floor(pca$eig[2,2] * 100) / 100) %>%
paste0("PC2 ( ", ., "%", " )")) +
theme(text = element_text(family = "Times", size = 12)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
axis.title.x = element_text(colour="black",size = 14),
axis.title.y = element_text(colour="black",size = 14),
axis.text = element_text(colour="black", size = 12),
legend.title = element_text(colour="black",size = 12,
face = "bold"),
legend.key = element_blank(),
plot.title = element_text(size = 12, colour = "black",
hjust = 0.5,face = "bold")) +
theme(legend.position = c(0.9, 0.9)) +
theme(legend.title = element_blank())
PCOA分析
rm(list=ls())
pacman::p_load(tidyverse,ggrepel,vegan,ape)
data <- read.csv("data.xls", header = T,check.names = F,
sep="\t",row.names = 1) %>% t()
data[is.na(data)] <- 0
pcoa <- vegdist(data,method = "bray") %>% pcoa(correction = "none", rn = NULL)
groups <- read.table("group2.txt",sep = "\t",header = T) %>% as.list()
PC1 = pcoa$vectors[,1]
PC2 = pcoa$vectors[,2]
pcoadata <- data.frame(rownames(pcoa$vectors),PC1,PC2,groups$Group)
colnames(pcoadata) <-c("sample","PC1","PC2","group")
ggplot(pcoadata, aes(PC1, PC2)) +
geom_point(aes(colour=group,fill=group),size=4)+
labs(title="PCoA",
x=(floor(pcoa$values$Relative_eig[1]*100)) %>%
paste0("PC1 ( ", ., "%", " )"),
y=(floor(pcoa$values$Relative_eig[2]*100)) %>%
paste0("PC2 ( ", ., "%", " )")) +
theme(text=element_text(size=10))+
geom_vline(aes(xintercept = 0),linetype="dotted")+
geom_hline(aes(yintercept = 0),linetype="dotted")+
theme(panel.background = element_rect(fill='white', colour='black'),
axis.title.x=element_text(colour='black', size=12),
axis.title.y=element_text(colour='black', size=12),
axis.text=element_text(colour='black',size=12),
legend.title=element_blank(),
legend.key.height=unit(0.6,"cm"))
可以通过以下代码添加置信区间与标签
stat_ellipse(aes(fill = group),geom = "polygon",
level = 0.95,alpha = 0.3)+
geom_label_repel(aes(PC1,PC2,label = sample),
fill = "white",color = "black",
box.padding = unit(0.6,"lines"),segment.colour = "grey50",
label.padding = unit(0.2,"lines"))
NMDS分析
Stress小于0.2时,表示NMDS分析具有一定的可靠性;
小于0.05则认为结果很好;
小于0.01认为结果极好。
pacman::p_load(tidyverse,vegan)
nmds <- read.delim("data.xls",header=T,row.names=1,
check.names = F,sep="\t") %>%
t() %>% metaMDS(distance="bray")
#capture.output(nmds,file = "Stress.txt")
scores <- scores(nmds, choices = c(1,2))
write.table(scores,file="NMDS_scores.txt")
scores <- read.table("NMDS_scores.txt",header=T)
groups <- read.table("group2.txt", header=T,sep = "\t")
plotdata <- data.frame(rownames(scores),scores$NMDS1,
scores$NMDS2,groups$Group)
colnames(plotdata)=c("sample","MDS1","MDS2","group")
plotdata$sample <- factor(plotdata$sample)
plotdata$MDS1 <- as.numeric(as.vector(plotdata$MDS1))
plotdata$MDS2 <- as.numeric(as.vector(plotdata$MDS2))
ggplot(plotdata, aes(MDS1, MDS2)) +
geom_point(aes(colour=group,fill=group),size=3)+
labs(title="NMDS Plot") + xlab(paste("MDS1")) +
ylab(paste("MDS2"))+
theme(text=element_text(size=14))+
geom_vline(aes(xintercept = 0),linetype="dotted")+
geom_hline(aes(yintercept = 0),linetype="dotted")+
geom_text(aes(x=max(MDS1),y=max(MDS2)),hjust=3,vjust=0.5,
size=5,label=paste("Stress = ",round(nmds$stress,3),sep = ""),
colour="black")+
theme(panel.background = element_rect(fill='white', colour='black'),
panel.grid=element_blank(),
axis.title = element_text(color='black',size=14),
axis.ticks.length = unit(0.4,"lines"),
axis.ticks = element_line(color='black'),
axis.line = element_line(colour = "black"),
axis.title.x=element_text(colour='black', size=14),
axis.title.y=element_text(colour='black', size=14),
axis.text=element_text(colour='black',size=14),
legend.title=element_blank(),
legend.text=element_text(family="Times", size=14),
legend.key=element_blank())+
theme(plot.title = element_text(size=14,colour = "black",face = "plain"))
示例文件:
链接: https://pan.baidu.com/s/1UatWUnVUGoIvxzWAKKwA9w 提取码: x485