ClusterGVis是一个基因表达数据可视化的R包,由中国药科大学Jun Zhang博士开发,可以同时绘制聚类+分组表达趋势折线图+功能注释的组合图,通过一张热图可以了解差异基因可以划分成几个cluster,每个cluster的表达随着时间是如何变化,以及这些cluster变化的基因通过GO或者KEGG功能注释了解其功能。
如下根据其github以及网上教程整理一下,可以放心食用。
此例中用的数据为酿酒酵母基因表达数据。
#devtools::install_github( "jokergoo/ComplexHeatmap")
#devtools::install_github( "junjunlab/ClusterGVis")
#2024.2.20更新:qlcMatrix包已从cran下架,需要单独安装,地址为https://cran.r-project.org/src/contrib/Archive/qlcMatrix/
library(ClusterGVis)
library(ComplexHeatmap)
library(org.Sc.sgd.db)
library(cols4all)
library(clusterProfiler)
#因为图中要标出基因名称,所以这里读入的表用的是SYMBOL
dt<- read.csv("FPKM-SYMBOL.csv",row.names = 1)
#head(dt)
#getClusters(exp = dt)#可根据拐点选择最佳聚类数:
cm<- clusterData(exp = dt,cluster.method = "mfuzz", cluster.num = 8, seed= 123)
#str(cm)
#ck<- clusterData(exp = dt,cluster.method = "kmeans",cluster.num = 8,seed= 123)
#str(ck)
- 表达趋势折线图绘制
visCluster(object= cm,plot.type = "line", add.mline = TRUE)
visCluster(object = ck,plot.type = "line",add.mline = TRUE)
- 简单热图绘制
mycol<- c4a('light',8)#这里分了8个module
#这里是随机挑的28个基因作为标注,实际使用过程中最好换成感兴趣的基因,或者每个module中membership前几的基因
mark<- rownames(dt)[sample(1:nrow(dt),28, replace = F)]
pdf('heatmap.pdf', height = 8, width = 7, onefile = F)
visCluster(object= cm
,plot.type = "heatmap"
,column_names_rot= 60
,show_row_dend= F
#,ht.col = c( "#0da9ce", "white", "#e74a32")#这个有报错
,ctAnno.col = mycol
,markGenes= mark
)
dev.off()
- GO富集分析
俊俊大神的ClusterGVis用的是enrichcluster做的GO和KEGG富集分析,我在跑的时候有报错且一直没有解决,所以用clusterProfiler包做的GO和KEGG富集,然后把注释信息提取作图。
注意:enrichGO用ENTREZID,enrichKEGG用ENSEMBL,cm用COMMON
gene_list <- read.csv("genelist.csv",header = T)
gl <-merge(cm$wide.res,gene_list,by.x = 'gene', by.y = 'COMMON')
8个cluster循环做GO注释,并把注释结果拼接起来
enrich_go <- data.frame()
for (i in 1:8) {
p<- enrichGO(gene= gl$ENTREZID[which(gl$cluster==i)]
,OrgDb= org.Sc.sgd.db
,keyType= 'ENTREZID'
,ont= "BP"
,pAdjustMethod = "BH"
,pvalueCutoff = 0.05
,qvalueCutoff = 0.2
)
pp <- data.frame(id=paste0("C",i),term=p[,2],pval=p[,5])[c(1:5),]
enrich_go <- rbind(enrich_go,pp)
}
pdf('GO.pdf',height = 10,width = 10)
visCluster(object = cm
,plot.type = "both"
,column_names_rot = 45
, annoTerm.data = enrich_go
, line.side = "left"
, markGenes = mark
, show_row_dend = F)
dev.off()
4.KEGG富集分析
同理,8个cluster循环做KEGG注释,并把注释结果拼接起来
enrich_KEGG <- data.frame()
for (i in 1:8) {
p<- enrichKEGG(
gene = gl$ENSEMBL[which(gl$cluster==i)],
keyType = 'kegg',
organism = 'sce',
pAdjustMethod = 'fdr',
pvalueCutoff = 0.05,
qvalueCutoff = 0.2)
pp <- data.frame(id=paste0("C",i),term=p[,2],pval=p[,5])[c(1:5),]
enrich_KEGG <- rbind(enrich_KEGG,pp)
}
#这两步是把注释信息里面冗余的信息手动去除
write.csv(enrich_KEGG,"enKEGG.csv",quote=F)
enKEGG <- read.csv("enKEGG.csv",header = T)
pdf('KEGG.pdf',height = 12,width = 12)
visCluster(object = cm
,plot.type = "both"
,column_names_rot = 45
, annoTerm.data = enKEGG
, line.side = "left"
, markGenes = mark
,show_row_dend = F)
dev.off()
Citation
Jun Zhang (2022). ClusterGVis: One-step to Cluster and Visualize Gene Expression Matrix. https://github.com/junjunlab/ClusterGVis