先保存,下次有时间再整理
rm(list = ls())
load(file = 'step2_select.Rdata')
# 每次都要检测数据
dat<- as.data.frame(t(M_expr))
dat[1:4,1:4]
library(hgu133plus2.db)
ls("package:hgu133plus2.db")
ids=toTable(hgu133plus2SYMBOL)
head(ids)
ids$symbol
dat<- dat[rownames(dat)%in%ids$symbol,]
table(rownames(dat) %in% ids$symbol)
ids<- ids[ids$symbol%in%rownames(dat),]
ids<- ids[!duplicated(ids$symbol),]
ids$median=apply(dat,1,median)
#
# ids=ids[order(ids$symbol,ids$median,decreasing = T),]
# ids=ids[!duplicated(ids$symbol),]
# dat=dat[ids$symbol,]
# rownames(dat)=ids$symbol
# dat[1:4,1:4]
Kmeans_results$group_list[Kmeans_results$`km.res$cluster`==1]<- c('cluster1')
Kmeans_results$group_list[Kmeans_results$`km.res$cluster`==2]<- c('cluster2')
group_list<- Kmeans_results$group_list
X=dat
##############################################################################################
X<- as.matrix(X)
library(GSVA)
options(stringsAsFactors = F)
gene_Set <- read.delim("C:/Users/chenyuqiao/Desktop/LN RNAseq/Step GSVA/395gene GeneSet.txt", header=FALSE, row.names=1)
gene_Set<- as.data.frame(t(gene_Set))
gene_list<- as.list(gene_Set)
es.max <- gsva(X, gene_list, mx.diff=FALSE, verbose=FALSE, parallel.sz=1)
adjPvalueCutoff <- 0.05 #################这里要调整
logFCcutoff <- log2(2)
table(group_list)
dim(es.max)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
rownames(design)=colnames(es.max)
design
library(limma)
contrast.matrix<-makeContrasts(paste0(unique(group_list),collapse = "-"),
levels = design)
# contrast.matrix<-makeContrasts("TNBC-noTNBC",
# levels = design)
contrast.matrix ##这个矩阵声明,我们要把progres.组跟stable进行差异分析比较
##step1
fit <- lmFit(es.max,design)
##step2
fit2 <- contrasts.fit(fit, contrast.matrix)
##这一步很重要,大家可以自行看看效果
fit2 <- eBayes(fit2) ## default no trend !!!
##eBayes() with trend=TRUE
##step3
res <- decideTests(fit2, p.value=adjPvalueCutoff)
summary(res)
tempOutput = topTable(fit2, coef=1, n=Inf)
nrDEG = na.omit(tempOutput)
#write.csv(nrDEG2,"limma_notrend.results.csv",quote = F)
head(nrDEG)
data_gene_set<- as.data.frame(es.max)
nrDEG=nrDEG[nrDEG$P.Value<0.5 & abs(nrDEG$logFC) > 0.1,] #######在这里调阈值,保证输出
print(dim(nrDEG))
n=rownames(nrDEG)
match(n,rownames(nrDEG))
data_gene_set=data_gene_set[match(n,rownames(data_gene_set)),] ######n=rownames(nrDEG),筛选表达矩阵数据
ac=data.frame(g=group_list) ########对应后面的图中的annotation_col
rownames(ac)=colnames(data_gene_set)
# rownames(nrDEG)=substring(rownames(nrDEG),1,50)
library(pheatmap)
pheatmap(data_gene_set)
dev.off
pheatmap(data_gene_set)
pheatmap::pheatmap(data_gene_set,
fontsize_row = 8,height = 11,
annotation_col = ac,show_colnames = F,filename = '11111111111.pdf')
# pheatmap::pheatmap(nrDEG,
# fontsize_row = 8,height = 11,
# annotation_col = ac,show_colnames = F,
# filename = '11111111111.pdf')
#