WGCNA分析代码

黑暗网址https://zhuanlan.zhihu.com/p/42970323

1、分析流程

image.png

2、过程

#1第1步:输入数据的准备

9、代码部分

#读入原始数据
#原始数据包含64个样本,9904个lncRNA表达量,其中第一列是lncRNA_ID,第66列是Annotation。
setwd("G:/My_exercise/WGCNA/")
lncRNAexpr <- read.csv("All_sample_LncRNA_exp_RPKM.csv",sep=",",header = T)
#重命名数据列表,行名和列名
fpkm <- lncRNAexpr[,-66]##去掉Annotation这列
rownames(fpkm)=fpkm[,1]
fpkm=fpkm[,-1] 
#准备表型信息
subname=sapply(colnames(fpkm),function(x) strsplit(x,"_")[[1]][1])
datTraits = data.frame(gsm=names(fpkm),
                       subtype=subname)
rownames(datTraits)=datTraits[,1]
head(datTraits)
#下载并载入WGCNA包
source("http://bioconductor.org/biocLite.R")
#biocLite(c("AnnotationDbi", "impute", "GO.db", "preprocessCore"))
#如果已经下载过了,这里就不用下载
biocLite("WGCNA")
library(WGCNA)
#行列转置
#WGCNA针对的是基因进行聚类,而一般我们的聚类是针对样本
#用hclust即可,也就是说要转置为行名是样本,列名是基因。
RNAseq_voom <- fpkm 
WGCNA_matrix = t(RNAseq_voom[order(apply(RNAseq_voom,1,mad), decreasing = T)[1:5000],])
datExpr <- WGCNA_matrix  ## top 5000 mad genes
datExpr[1:4,1:4]
#确定临床表型与样本名字
sampleNames = rownames(datExpr);
traitRows = match(sampleNames, datTraits$gsm)  
rownames(datTraits) = datTraits[traitRows, 1]

#STEP2:确定最佳soft-thresholding power
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="green")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#选择beta值
best_beta=sft$powerEstimate
#> best_beta
[1] 3

#STEP3: 一步法构建共表达矩阵
net = blockwiseModules(datExpr, power = sft$powerEstimate,
                       maxBlockSize = 6000,TOMType = "unsigned", 
                       minModuleSize = 30,reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "AS-green-FPKM-TOM",
                       verbose = 3)

#STEP4:模块鉴定及可视化
table(net$colors)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
table(mergedColors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
## assign all of the gene to their corresponding module 
## hclust for the genes.
#明确样本数和基因数
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#首先针对样本做个系统聚类树
datExpr_tree<-hclust(dist(datExpr), method = "average")
par(mar = c(0,5,2,0))
plot(datExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, 
     cex.axis = 1, cex.main = 1,cex.lab=1)
## 如果这个时候样本是有性状,或者临床表型的,可以加进去看看是否聚类合理
#针对前面构造的样品矩阵添加对应颜色
sample_colors <- numbers2colors(as.numeric(factor(datTraits$subtype)), 
                                colors = c("grey","blue","red","green"),signed = FALSE)
## 这个给样品添加对应颜色的代码需要自行修改以适应自己的数据分析项目。
#  sample_colors <- numbers2colors( datTraits ,signed = FALSE)
## 如果样品有多种分类情况,而且 datTraits 里面都是分类信息,那么可以直接用上面代码,
##当然,这样给的颜色不明显,意义不大。
#构造10个样品的系统聚类树及性状热图
par(mar = c(1,4,3,1),cex=0.8)
plotDendroAndColors(datExpr_tree, sample_colors,
                    groupLabels = colnames(sample),
                    cex.dendroLabels = 0.8,
                    marAll = c(1, 4, 3, 1),
                    cex.rowText = 0.01,
                    main = "Sample dendrogram and trait heatmap")

#STEP5:模块和性状的关系
design=model.matrix(~0+ datTraits$subtype)
colnames(design)=levels(datTraits$subtype)
moduleColors <- labels2colors(net$colors)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0); ##不同颜色的模块的ME值矩阵(样本vs模块)
moduleTraitCor = cor(MEs, design , use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(design),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))

#STEP6:感兴趣性状的模块的具体基因分析
##首先计算模块与基因的相关性矩阵
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
## 算出每个模块跟基因的皮尔森相关系数矩阵
## MEs是每个模块在每个样本里面的值
## datExpr是每个基因在每个样本的表达量
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
##再计算性状与基因的相关性矩阵
## 只有连续型性状才能只有计算
## 这里把是否属于 CB 表型这个变量用0,1进行数值化。
CB = as.data.frame(design[,2]);
names(CB) = "CB"
geneTraitSignificance = as.data.frame(cor(datExpr, CB, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(CB), sep="");
names(GSPvalue) = paste("p.GS.", names(CB), sep="")
最后把两个相关性矩阵联合起来,指定感兴趣模块进行分析
module = "turquoise"
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7);
par(mfrow = c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Gene significance for CB",
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)


#STEP7:网络的可视化
#首先针对所有基因画热图
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
geneTree = net$dendrograms[[1]]; 
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = sft$powerEstimate); # 设置power=sft$powerEstimate,最佳beta值,此处是3
plotTOM = dissTOM^7; 
diag(plotTOM) = NA; 
#TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

#然后随机选取部分基因作图
nSelect = 400
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];
# Open a graphical window
sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
#最后画模块和性状的关系
# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
## 只有连续型性状才能只有计算
## 这里把是否属于 Luminal 表型这个变量用0,1进行数值化。
CB = as.data.frame(design[,2]);
names(CB) = "CB"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, CB))
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
                      = 90)
# Plot the dendrogram
sizeGrWindow(6,6);
par(cex = 1.0)
## 模块的聚类图
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
                      plotHeatmaps = FALSE)
# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
## 性状与模块热图
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
                      plotDendrograms = FALSE, xLabelsAngle = 90)
#STEP8:提取指定模块的基因名
#提取基因信息,进行下游分析包括GO/KEGG等功能数据库的注释
# Select module
module = "turquoise";
# Select module probes
probes = colnames(datExpr) ## 我们例子里面的probe就是基因名
inModule = (moduleColors==module);
modProbes = probes[inModule];
#GO分析
#STEP9: 模块的导出
# Recalculate topological overlap
TOM = TOMsimilarityFromExpr(datExpr, power = sft$powerEstimate); 
# Select module
module = "turquoise";
# Select module probes
probes = colnames(datExpr) ## 我们例子里面的probe就是基因名
inModule = (moduleColors==module);
modProbes = probes[inModule]; 
## 也是提取指定模块的基因名
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
#模块对应的基因关系矩阵
#首先是导出到VisANT
vis = exportNetworkToVisANT(modTOM,
                            file = paste("VisANTInput-", module, ".txt", sep=""),
                            weighted = TRUE,
                            threshold = 0)
然后是导出到cytoscape
cyt = exportNetworkToCytoscape(
  modTOM,
  edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
  nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
  weighted = TRUE,
  threshold = 0.02,
  nodeNames = modProbes, 
  nodeAttr = moduleColors[inModule]
);

#STEP10: 模块内的分析—— 提取hub genes
#hub genes指模块中连通性(connectivity)较高的基因,如设定排名
#前30或前10%)。
#高连通性的Hub基因通常为调控因子(调控网络中处于偏上游的位
#置),而低连通性的基因通常为调控网络中偏下游的基因(例如,转运蛋白、催化酶等)。
#HubGene: |kME| >=阈值(0.8)
#(1)计算连通性
### Intramodular connectivity, module membership, and screening for intramodular hub genes

# (1) Intramodular connectivity

# moduleColors <- labels2colors(net$colors)
connet=abs(cor(datExpr,use="p"))^6
Alldegrees1=intramodularConnectivity(connet, moduleColors)
head(Alldegrees1)
#(2)模块内的连通性与gene显著性的关系
# (2) Relationship between gene significance and intramodular connectivity
which.module="black"
EB= as.data.frame(design[,2]); # change specific 
names(EB) = "EB"
GS1 = as.numeric(cor(EB,datExpr, use="p"))
GeneSignificance=abs(GS1)
colorlevels=unique(moduleColors)
sizeGrWindow(9,6)
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
par(mar = c(4,5,3,1))
for (i in c(1:length(colorlevels)))
{
  whichmodule=colorlevels[[i]];
  restrict1 = (moduleColors==whichmodule);
  verboseScatterplot(Alldegrees1$kWithin[restrict1],
                     GeneSignificance[restrict1], col=moduleColors[restrict1],
                     main=whichmodule,
                     xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
}
#(3)计算模块内所有基因的连通性, 筛选hub genes
#abs(GS1)> .9 可以根据实际情况调整参数
#abs(datKME$MM.black)>.8 至少大于 >0.8
###(3) Generalizing intramodular connectivity for all genes on the array
datKME=signedKME(datExpr, MEs, outputColumnName="MM.")
# Display the first few rows of the data frame
head(datKME)
##Finding genes with high gene significance and high intramodular connectivity in
# interesting modules
# abs(GS1)> .9 可以根据实际情况调整参数
# abs(datKME$MM.black)>.8 至少大于 >0.8
FilterGenes= abs(GS1)> .9 & abs(datKME$MM.black)>.8
table(FilterGenes)

#STEP11: 其他分析
#(1) another plot for realtionship between module eigengenes
plotMEpairs(MEs,y=datTraits$cellType)
(2) Diagnostics: heatmap plots of module expression
sizeGrWindow(8,9)
#par(mfrow=c(3,1), mar=c(1, 2, 4, 1))
# for black module
which.module="blue";
plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),nrgcols=30,rlabels=T,
        clabels=T,rcols=which.module,
        title=which.module )
#(3) Diagnostics: displaying module heatmap and the eigengene
sizeGrWindow(8,7);
which.module="blue"
ME=MEs[, paste("ME",which.module, sep="")]
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),
        nrgcols=30,rlabels=F,rcols=which.module,
        main=which.module, cex.main=2)
par(mar=c(5, 4.2, 0, 0.7))
barplot(ME, col=which.module, main="", cex.main=2,
        ylab="eigengene expression",xlab="MPP")










其他实验代码

其他代码
source("http://bioconductor.org/biocLite.R")
#biocLite()
biocLite("affy")
biocLite("hgu133plus2cdf")
setwd("C:/Users/tvtZz/Downloads/GSE36272_RAW")
library(affy)
files =list.files("C:/Users/tvtZz/Downloads/GSE36272_RAW", full.names = TRUE)
affy.data = ReadAffy(filenames = files)
eset.mas5 = mas5(affy.data)
exprSet.nologs = exprs(eset.mas5)
#exprSet.nologs = preprocessCore::normalize.quntiles(eset.mas5)
colnames(exprSet.nologs)
colnames(exprSet.nologs)=c("GSM885697_25-Nip-Pxo99-1.CEL","GSM885698_26-Nip-Pxo99-2.CEL","GSM885699_27-Nip-Pxo99-3.CEL","GSM885700_31-Nip-ME7-1.CEL","GSM885701_32-Nip-ME7-2.CEL","GSM885702_33-Nip-ME7-3.CEL","GSM885703_28-Nip-ME5-1.CEL","GSM885704_29-Nip-ME5-2.CEL","GSM885705_30-Nip-ME5-3.CEL","GSM885706_43-Nip-H2O-1.CEL","GSM885707_44-Nip-H2O-2.CEL","GSM885708_45-Nip-H2O-3.CEL","GSM885709_46-Nip-NI-1.CEL","GSM885710_48-Nip-NI-3.CEL","GSM885711_37-Nip-T7174-1.CEL""GSM885712_38-Nip-T7174-2.CEL","GSM885713_39-Nip-T7174-3.CEL","GSM885714_34-Nip-PXO86-1.CEL","GSM885715_35-Nip-PXO86-2.CEL","GSM885716_36-Nip-PXO86-3.CEL","GSM885717_40-Nip-BLS303-1.CEL","GSM885718_41-Nip-BLS303-2.CEL","GSM885719_42-Nip-BLS303-3.CEL","GSM885720_Nip-ME1-1_7.CEL","GSM885721_Nip-ME1-2_8.CEL","GSM885722_Nip-ME1-3_9.CEL","GSM885723_Nip-ME2-1_4.CEL","GSM885724_Nip-ME2-2_5.CEL","GSM885725_Nip-ME2-3_6.CEL","GSM885726_Nip-99-1b_1.CEL","GSM885727_Nip-99-2b_2.CEL","GSM885728_Nip-99-3b_3.CEL")
exprSet = log(exprSet.nologs, 2)
data.mas5calls = mas5calls(affy.data)
data.mas5calls.calls = exprs(data.mas5calls)
resmean=apply(exprSet[,c("GSM885697_25-Nip-Pxo99-1.CEL","GSM885698_26-Nip-Pxo99-2.CEL","GSM885699_27-Nip-Pxo99-3.CEL","GSM885700_31-Nip-ME7-1.CEL","GSM885701_32-Nip-ME7-2.CEL","GSM885702_33-Nip-ME7-3.CEL","GSM885703_28-Nip-ME5-1.CEL","GSM885704_29-Nip-ME5-2.CEL","GSM885705_30-Nip-ME5-3.CEL","GSM885706_43-Nip-H2O-1.CEL","GSM885707_44-Nip-H2O-2.CEL","GSM885708_45-Nip-H2O-3.CEL","GSM885709_46-Nip-NI-1.CEL","GSM885710_48-Nip-NI-3.CEL","GSM885711_37-Nip-T7174-1.CEL","GSM885712_38-Nip-T7174-2.CEL","GSM885713_39-Nip-T7174-3.CEL","GSM885714_34-Nip-PXO86-1.CEL","GSM885715_35-Nip-PXO86-2.CEL","GSM885716_36-Nip-PXO86-3.CEL","GSM885717_40-Nip-BLS303-1.CEL","GSM885718_41-Nip-BLS303-2.CEL","GSM885719_42-Nip-BLS303-3.CEL","GSM885720_Nip-ME1-1_7.CEL","GSM885721_Nip-ME1-2_8.CEL","GSM885722_Nip-ME1-3_9.CEL","GSM885723_Nip-ME2-1_4.CEL","GSM885724_Nip-ME2-2_5.CEL","GSM885725_Nip-ME2-3_6.CEL","GSM885726_Nip-99-1b_1.CEL","GSM885727_Nip-99-2b_2.CEL","GSM885728_Nip-99-3b_3.CEL")],1,mean)
negmean=apply(exprSet[,c("GSM885729_1-IR24-99-1.CEL","GSM885730_2-IR24-99-2.CEL","GSM885731_3-IR24-99-3.CEL","GSM885732_7-IR24-ME7-1.CEL","GSM885733_8-IR24-ME7-2.CEL","GSM885734_9-IR-24-ME7-3.CEL","GSM885735_4-IR24-ME5-1.CEL","GSM885739_20-IR-24-H20-2.CEL","GSM885740_21-IR-24-H20-3.CEL","GSM885741_22-IR-24-NI-1.CEL","GSM885742_23-IR-24-NI-2.CEL","GSM885743_24-IR-24-NI-3.CEL","GSM885744_13-IR-24-T-7174-1.CEL","GSM885745_14-IR-24-T-7174-2.CEL","GSM885746_15-IR-24-T-7174-3.CEL","GSM885747_10-IR-24-PXO-86-1.CEL","GSM885748_11-IR-24-PXO-86-2.CEL","GSM885749_12-IR-24-PXO-86-3.CEL","GSM885750_16-IR-24-BLS30J-1.CEL","GSM885751_17-IR-24-BLS3O3-2.CEL","GSM885752_18-IR-24-BLS3O3-3.CEL","GSM885753_IR24-ME1-1_16.CEL","GSM885754_IR24-ME1-2_17.CEL","GSM885755_IR24-ME1-3_18.CEL","GSM885756_IR24-ME2-1_15.CEL","GSM885757_IR24-ME2-2_13.CEL","GSM885758_IR24-ME2-3_14.CEL","GSM885759_IR24-99-1b_10.CEL","GSM885760_IR24-99-2b_11.CEL","GSM885761_IR24-99-3b_12.CEL" )],1,mean)
diff=resmean-negmean
pvalue=apply(exprSet, 1, function(x) {
    t.test(x[1:32], x[33:65])$p.value
})
pvalue=p.adjust(pvalue,"fdr")
AP = apply(data.mas5calls.calls, 1, paste, collapse = "")
#biocLite("stringr")
library(stringr)
genes.present = names(AP[str_count(AP, pattern = "A")<20])
exprSet.present = exprSet[genes.present, ]
-
exprSet.significant = exprSet[intersect(genes.present,names(pvalue[pvalue<0.01])),]
write.csv(exprSet.significant,"exprSet.significant.csv",sep=",")
heatmap(exprSet.significant, main = "transcriptional responses of two susceptible rice cultivars to strains of Xanthomonas oryzae")
ɾ³ýaffy»­Í¼
x<-read.csv(file = "exprSet.significant_2.csv",header=T,row.names = 1)
exprSet.significant<-as.matrix(x)
heatmap(exprSet.significant, main = "transcriptional responses of two susceptible rice cultivars to strains of Xanthomonas oryzae")

setwd("/home/student3/wgcna-zzy/")
 getwd()
library(WGCNA);
enableWGCNAThreads(2)
#memory.limit(6000)
options(stringsAsFactors = FALSE);#²»ÒªÈûùÒòºÅ×÷ΪÒòËØ
#datTraits=read.csv("exprSet.significant.csv")
dat0=read.csv("exprSet.significant.csv",header=TRUE)    #sep='\t',use read.table for txt file,colClasses="numeric" when data is integer,storage.mode(datExpr) = "double" for REAL() can only be applied to a 'numeric' error suggests none-num in the last column or some whereelse
datSummary=dat0[,1:1];dim(dat0)

datExpr = t(dat0[,2: ncol(dat0)])
no.samples = dim(datExpr)[[1]]
dim(datExpr)
ArrayName= names(data.frame(dat0[,-1]))
GeneName= dat0$GeneID
datExpr=data.frame(t(dat0[,-1]))
names(datExpr)=dat0[,1]
#dimnames(datExpr)[[1]]=names(data.frame(dat0[,-1]))
#table(dimnames(datExpr)[[1]]==datTraits$sample)
#y=datTraits$gender
#z=datTraits$age
#sizeGrWindow(9, 5)
#pdf("ClusterTreeSamples.pdf")
#plotClusterTreeSamples(datExpr=datExpr, y=y)
#dev.off()
#rm(dat0);
#gc()

memory.size=100000
powers=c(seq(1,10,by=1),seq(12,14,by=2))#¸øpowers¼¸¸öÖµÊÔ
sft=pickSoftThreshold(datExpr, powerVector=powers,networkType = "signed") ÕÒ×îºÃµÄprower ,¾ØÕóÖÐR.sqÖУ¬ÖµÔ½¸ßÔ½ºÃ£¨0.8-0.9£©£¬µ«Ò²Òª×¢ÒâpowerÖµ²»Òª¹ý¸ß

RpowerTable=sft[[2]]
sizeGrWindow(9, 5);pdf('choosing power.pdf');par(mfrow = c(1,2));cex1 = 0.9;
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red");dev.off()
# Mean connectivity as a function of the soft-thresholding power
sizeGrWindow(9, 5);pdf('mean connectivity.pdf');plot(sft$fitIndices[,1], sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red");dev.off()

softPower =14
Connectivity=softConnectivity(datExpr,corFnc = "cor", corOptions = "use ='p'",power=softPower)  #Ƥ¶ûÉ­Ïà¹ØϵÊý cor
pdf("scale-free.pdf");scaleFreePlot(Connectivity,nBreaks = 10,truncated = FALSE,removeFirst = FALSE, main = "");dev.off()
adjacency = adjacency(datExpr,corFnc = "cor", corOptions = "use ='p'",type = "signed", power = softPower)
TOM = TOMsimilarity(adjacency,TOMType="signed");
dissTOM = 1-TOM   #1-ÏàËƶȣ¬Ò»Ñù£¬Öظ´¾àÀëΪ0
#method="complete"  ?
geneTree = hclust(as.dist(dissTOM), method = "average")#¸ß°æ±¾ÒѾ­ÓÃhclust   h ²ã´Î¾ÛÀà
minModuleSize =30;#Éè×îС¾ÛÀ࣬ʹ½á¹û¸ü¿ÉÐÅ
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,deepSplit = 0, pamRespectsDendro = FALSE,minClusterSize = minModuleSize,cutHeight=0.99);#¶¯Ì¬ÇÐ¸î £¬deepsplitÈ¡Öµ0-4 Ô½¸ßÔ½Ãô¸Ð£¬ÏàËƶȸ߿ÉÉè¸ßÒ»µã
table(dynamicMods) #table½øÐÐ×Ü½á  ÓÐ0 µÄÊÇ»ùÒòÊÇ
dynamicColors = labels2colors(dynamicMods) #°Ñ1 2 3±êÇ©¸Ä³ÉÑÕÉ«
table(dynamicColors)

#the following codes, dynamicColors was changed to dynamicMods for num labeling than Color
MEList = moduleEigengenes(datExpr, colors = dynamicMods) #ÒÔÄ£¿éΪµ¥Î»¼ÆËã»ùÒò±í´ï PCA½µÎ¬
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs); #ËãÄ£¿éÓëÄ£¿éÖ®¼äµÄ¾àÀë
METree = hclust(as.dist(MEDiss), method = "average");#
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",xlab = "", sub = "")
MEDissThres = 0.2
abline(h=MEDissThres, col = "red")
merge = mergeCloseModules(datExpr, dynamicMods, cutHeight = MEDissThres, verbose = 3);#ºÏ²¢½üµÄÄ£¿é
mergedColors = merge$colors;#¸üÐÂÄ£¿éÑÕÉ«
mergedMEs = merge$newMEs;
sizeGrWindow(12, 9)
pdf("DendroAndColors.pdf")
plotDendroAndColors(geneTree, cbind(dynamicMods, mergedColors),c("Dynamic Tree Cut", "Merged dynamic"),dendroLabels = FALSE, hang = 0.03,addGuide = TRUE, guideHang = 0.05)
dev.off()

moduleColors = mergedColors
colorOrder = c("grey", standardColors(unique(moduleColors)));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;
MEDiss = 1-cor(MEs);
METree = hclust(as.dist(MEDiss), method = "average");#
pdf("METree.pdf")
plot(METree, main = "Clustering of module eigengenes",xlab = "", sub = "")
dev.off()


MEList = moduleEigengenes(datExpr, colors = dynamicMods)
nSamples=nrow(datExpr)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
MMPvalue = cbind.data.frame(datSummary,corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
write.table(data.frame(ArrayName,MEs),"MEs.csv",row.name=F) 
kMEdat=data.frame(geneModuleMembership,MMPvalue)
write.table(data.frame(datSummary,kMEdat),"kME-MMPvalue.csv",row.names=FALSE)
datout=data.frame(datSummary, colorNEW=moduleColors, ConnectivityNew=Connectivity)
write.table(datout, file="OutputCancerNetwork.csv", sep=",", row.names=F)
k.in=intramodularConnectivity(adjacency(datExpr,,corFnc = "cor", corOptions = "use ='p'", type = "signed", power = softPower), moduleColors,scaleByMax = FALSE)
write.table(data.frame(datSummary,k.in),"intraK.csv",sep=',',row.names=FALSE)
hubs    = chooseTopHubInEachModule(datExpr, moduleColors)
write.csv(data.frame(unique(moduleColors)[!unique(moduleColors) %in% 0)],labels2colors(unique(moduleColors)[!(unique(moduleColors) %in% 0)]),hubs[unique(moduleColors)]),"num2color.csv",row.names=F)

#×Ô¼ºµÄ

 data.frame(unique(moduleColors)[!unique(moduleColors) %in% 0],labels2colors(unique(moduleColors)[!unique(moduleColors) %in% 0]),hubs[!is.na(hubs[unique(moduleColors)])])
 
 write.table(data.frame(unique(moduleColors)[!unique(moduleColors) %in% 0],labels2colors(unique(moduleColors)[!unique(moduleColors) %in% 0])),"num2color.csv",row.names=F)

#»­TOMÈÈͼ£º¿É²»×ö
pdf("TOM.pdf")
TOMplot(dissTOM , geneTree, moduleColors)
dev.off()
#»­MDSͼ£º¿É²»×ö
pdf("MDS.pdf")
cmd1=cmdscale(as.dist(dissTOM),2)
par(mfrow=c(1,1))#par ²ÎÊýµÄËõд£¬ÉèÖû­Í¼²ÎÊý   °Ñ¶àάµÄÊý¾ÝÓ³Éäµ½¶þά£¬TSNA
plot(cmd1, col=as.character(moduleColors),  main="MDS plot",xlab="Scaling Dimension 1",ylab="Scaling Dimension 2")
dev.off()
#±í´ïÈÈͼ,restConnectivityCutÓÃgene¸öÊý´úÌ棺¿É²»×ö
ConnectivityCut = 5000 #ãÐÖµÉèÖµ Ëæ±ã¸øÒ»¸ö²ÎÊý
ConnectivityRank = rank(-Connectivity)  #¸øÁÐÏòÁ¿ÅÅÐò -ºÅÉýÐò
restConnectivity = ConnectivityRank <= ConnectivityCut #Ñ¡Ç°500¸ö×ö±í´ïÈÈͼ
par(mfrow=c(2,1), mar=c(1, 2, 4, 1))
ClusterSamples=hclust(dist(datExpr[,restConnectivity] ),method="average") #
# for the first (turquoise) module we use
which.module="turquoise"
pdf("Heat.pdf")
plotMat(t(scale(datExpr[ClusterSamples$order,restConnectivity][,moduleColors==which.module ]) ),nrgcols=30,rlabels=T, clabels=T,rcols=which.module,main=which.module )
dev.off()
# for the second (blue) module we use
which.module="brown"
plot.mat(t(scale(datExpr[ClusterSamples$order,restConnectivity][,moduleColors==which.module ]) ),nrgcols=30,rlabels=T, clabels=T,rcols=which.module,
main=which.module )



#displaying module heatmap and the eigengene
pdf("modheatmap.pdf")
sizeGrWindow(8,7);
which.module="green"
ME=MEs[, paste("ME",which.module, sep="")]
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
plotmat(t(scale(datExpr[,moduleColors==which.module ])),nrgcols=30,rlabels=F,rcols=which.module,main=which.module, cex.main=2)
dev.off()
par(mar=c(5, 4.2, 0, 0.7))
pdf("eigenebar.pdf")
barplot(ME, col=which.module, main="", cex.main=2,ylab="eigengene expression",xlab="array sample")
dev.off()

#Correlate the module eigengenes with the trait
signif(cor(y,MEs, use="p"),2)
p.values = corPvalueStudent(cor(y,MEs, use="p"), nSamples = length(y))
#Measure of module signicance as average gene signicance
GS1=as.numeric(cor(y,datExpr, use="p"))
GeneSignificance=abs(GS1)
# Next module significance is defined as average gene significance.
ModuleSignificance=tapply(GeneSignificance, moduleColors, mean, na.rm=T)
sizeGrWindow(8,7)
par(mfrow = c(1,1))
plotModuleSignificance(GeneSignificance,moduleColors)

#plot Gene signicance (y-axis) vs. intramodular connectivity (x-axis) »­Í¼gene sigºÍÁ¬½Ó¶ÈÉ¢µã
colorlevels=unique(moduleColors)
sizeGrWindow(9,6)
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
par(mar = c(4,5,3,1))
for (i in c(1:length(colorlevels)))
{
whichmodule=colorlevels[[i]];
restrict1 = (moduleColors==whichmodule);
verboseScatterplot(k.in$kWithin[restrict1],
GeneSignificance[restrict1], col=moduleColors[restrict1],
main=whichmodule,
xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
}

#Generalizing intramodular connectivity for all genes on the array
datKME=signedKME(datExpr, MEs, outputColumnName="MM.")
# Display the first few rows of the data frame
head(datKME)

#Output of the results of network screening analysis
NS1=networkScreening(y=y, datME=MEs, datExpr=datExpr,oddPower=3, blockSize=10000, minimumSampleSize=4,addMEy=TRUE, removeDiag=FALSE, weightESy=0.5)
GeneResultsNetworkScreening=data.frame(GeneName=row.names(NS1), NS1)
#write.table(GeneResultsNetworkScreening, file="GeneResultsNetworkScreening.csv",row.names=F,sep=",")
MEsy = data.frame(y, MEs)
eigengeneSignificance = cor(MEsy, y);
eigengeneSignificance[1,1] = (1+max(eigengeneSignificance[-1, 1]))/2
eigengeneSignificance.pvalue = corPvalueStudent(eigengeneSignificance, nSamples = length(y))
namesME=names(MEsy)
# Form a summary data frame
out1=data.frame(t(data.frame(eigengeneSignificance,eigengeneSignificance.pvalue, namesME, t(MEsy))))
# Set appropriate row names
dimnames(out1)[[1]][1]="EigengeneSignificance"
dimnames(out1)[[1]][2]="EigengeneSignificancePvalue"
dimnames(out1)[[1]][3]="ModuleEigengeneName"
dimnames(out1)[[1]][-c(1:3)]=dimnames(datExpr)[[1]]
# Write the data frame into a file
write.table(out1, file="MEResultsNetworkScreening.csv", row.names=TRUE, col.names = TRUE, sep=",")
GeneName=dimnames(datExpr)[[2]]
GeneSummary=data.frame(GeneName, moduleColors, NS1)
write.table(GeneSummary, file="GeneSummaryTutorial.csv", row.names=F,sep=",")
datTraits=data.frame(ArrayName, MEsy)
dimnames(datTraits)[[2]][2:length(namesME)]=paste("Trait",dimnames(datTraits)[[2]][2:length(namesME)],sep=".")
write.table(datTraits, file="TraitsTutorial.csv", row.names=F,sep=",")

#Relationships among the top 30 most signicant genes,correlation heatmaps for signed network:
sizeGrWindow(7,7)
NS1=networkScreening(y=y, datME=MEs, datExpr=datExpr,oddPower=3, blockSize=10000, minimumSampleSize=4,addMEy=TRUE, removeDiag=FALSE, weightESy=0.5)
topList=rank(NS1$p.Weighted,ties.method="first")<=30
gene.names= names(datExpr)[topList]
# The following shows the correlations between the top genes
plotNetworkHeatmap(datExpr, plotGenes = gene.names,networkType="signed", useTOM=FALSE,power=1, main="signed correlations")

#Exporting to Cytoscape£»¿É²Î¿¼cytoscapeCode.txt    ÌáÈ¡Ä£¿éÐÅÏ¢
# Recalculate topological overlap if needed
#TOM = TOMsimilarityFromExpr(datExpr, power = 6,TOMType="signed");
# Read in the annotation file
#annot = read.csv(file = "HGU1332-annot.csv");
# Select modules
modules = c("black","blue","brown","cyan","green","greenyellow","magenta","midnightblue","pink","purple","red","salmon","turquoise","yellow");
# Select module probes
probes = dat0[,1]
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
#modGenes = annot$To[match(modProbes, annot$From)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,threshold = 0.02,nodeNames = modProbes ,nodeAttr = moduleColors[inModule]);
#automatic finish the Cytoscape mods
n=length(unique(moduleColors))  #Ä£¿éÊý
pb <- txtProgressBar(min = 0, max = n, style = 3) #½ø¶ÈÌõ
for (p in 1:n){ modules=unique(moduleColors)[p]
inModule = is.finite(match(moduleColors,modules));
modProbes = probes[inModule]; #¼ÆËãÏà¹Ø¶È Ìáȡ̽Õë
modTOM = TOM[inModule, inModule];#̽Õë¶ÔÓ¦TOMÖµ
dimnames(modTOM) = list(modProbes, modProbes)
cyt = exportNetworkToCytoscape(modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
#nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,threshold = 0.02,nodeNames = modProbes ,nodeAttr = moduleColors[inModule]);  #threshold can be replaced by quantile(abs(modTOM),probs=0.8)
setTxtProgressBar(pb, p)}
close(pb)

#¶à¸ötraitºÍmoduleÁªÏµÆðÀ´
allTraits = read.csv("Trait.csv");#¶ÁÈ¡ÐÔ×´µÄ¹Øϵ
femaleSamples=rownames(datExpr);
traitRows = match(femaleSamples, allTraits$arrayname);
datTraits = allTraits[traitRows, -1];
nGenes = ncol(datExpr);
nSamples = nrow(datExpr);
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
#¿ÉÒÔÊä³öÈÈͼÖеÄÊý¾Ý£¬textMatrix¿ÉÒÔɾȥ
out2=data.frame(data.frame(moduleTraitCor,moduleTraitPvalue))
write.table(out2, file="trait-module relationship.csv", row.names=TRUE, col.names = TRUE, sep=",")
#corº¯Êý¿ÉÒÔÓÐuseºÍmethod²ÎÊýµÄÉèÖà : Missing values present in input data. Consider using use = 'pairwise.complete.obs'.
×¢Ò⣺´óСдÊDz»Í¬µÄ£¬ÌرðÊÇÎļþÃûµÈ£¡
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 199,830评论 5 468
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 83,992评论 2 376
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 146,875评论 0 331
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 53,837评论 1 271
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 62,734评论 5 360
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,091评论 1 277
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,550评论 3 390
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,217评论 0 254
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,368评论 1 294
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,298评论 2 317
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,350评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,027评论 3 315
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,623评论 3 303
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,706评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 30,940评论 1 255
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 42,349评论 2 346
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 41,936评论 2 341

推荐阅读更多精彩内容

  • 简介 本文是一个静态代码分析工具的清单,但是为公司产品,需要付费使用。共有37个公司,有些公司包含多个工具。其中2...
    烟花诗人阅读 3,468评论 0 0
  • 从百度文库下载下来的,这里保存一份 别人的原代码程序员怎样阅读 源码就是指编写的最原始程序的代码。 运行的软件是要...
    Albert陈凯阅读 3,366评论 0 15
  • 目录faster rcnn论文备注caffe代码框架简介faster rcnn代码分析后记 faster rcnn...
    db24cc阅读 9,591评论 2 12
  • ¥开启¥ 【直接拨打指定电话】 〖2017-08-21 11:28:06〗 《sit(a, "action","a...
    小菜c阅读 37,968评论 2 26
  • 每到年头岁尾的时候,总要回望过去一年,展望新的一年。今天是2017年的最后一天,辞旧迎新,心生感慨,回头望望...
    丫头168阅读 250评论 0 0