不同的时空以及不同的条件下差异基因分析是RNAseq分析的重要目标。
差异表达分析方法包括:
基于Read数目:DESeq、limma和edgeR;
基于组装技术:Cuffdif和Ballgown;
基于免比对的定量方法(kallisto、Salmon、Salfish):sleuth。
安装packages
if(!requireNamespace("BiocManager",quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
BiocManager::install("edgeR")
BiocManager::install("limma")
BiocManager::install("Rgraphviz") 好像没用到这个包诶
整理矩阵
用的是rsem的count文件
data<-read.table("~/rnaseq/rsem_out/input_data",sep="\t",header=TRUE,row.names=1,col.names=c("name","exp1","exp2","exp3","exp4","exp5","exp6","exp7","exp8","exp9","exp10","ctl1","ctl2","ctl3","ctl4","ctl5","ctl6","ctl7"))
data<-round(data,digits=0)
data<-as.matrix(data)
exp=data
group_list<-factor(c(rep("exp",10),rep("ctl",7)))
coldata<-data.frame(row.name=colnames(exp),condition=group_list)
差异分析
DESeq2
library(DESeq2)
dds<- DESeqDataSetFromMatrix(countData=data,colData=coldata,design=~condition)
dds<-DESeq(dds)
res <- results(dds,contrast = c("condition",rev(levels(group_list))))
resOrdered<-res[order(res$pvalue),]
DEG<-as.data.frame(resOrdered)
head(DEG)
#DEG<-na.omit(DGE) 这里不确定要不要去掉NA值
#添加change列标记基因上调下调
logFC_cutoff <- with(DEG,mean(abs(log2FoldChange)) + 2*sd(abs(log2FoldChange)) )
logFC_cutoff =1
k1 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange < - logFC_cutoff)
k2 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
head(DEG)
DESeq2_DEG <- DEG
write.csv(DESeq2_DEG,file = "~/rnaseq/deseq_out/DESeq2_DEG.csv")
edgeR
library(edgeR)
dge <- DGEList(counts=exp,group=group_list)
dge$samples$lib.size <- colSums(dge$counts)
dge <- calcNormFactors(dge)
design <- model.matrix(~0+group_list)
rownames(design)<-colnames(dge)
colnames(design)<-levels(group_list)
dge <- estimateGLMCommonDisp(dge,design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)
fit <- glmFit(dge, design)
fit2 <- glmLRT(fit, contrast=c(-1,1))
DEG=topTags(fit2, n=nrow(exp))
DEG=as.data.frame(DEG)
logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )
logFC_cutoff =1
k1 = (DEG$PValue < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$PValue < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
head(DEG)
table(DEG$change)
edgeR_DEG <- DEG
write.csv(edgeR_DEG,file = "~/rnaseq/edgeR_out/edgeR_DEG.csv")
limma
library(limma)
design <- model.matrix(~0+group_list)
colnames(design)=levels(group_list)
rownames(design)=colnames(exp)
dge <- DGEList(counts=exp)
dge <- calcNormFactors(dge)
v <- voom(dge,design, normalize="quantile")
fit <- lmFit(v, design)
constrasts = paste(rev(levels(group_list)),collapse = "-")
cont.matrix <- makeContrasts(contrasts=constrasts,levels = design)
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)
DEG = topTable(fit2, coef=constrasts, n=Inf)
DEG = na.omit(DEG)
logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )
logFC_cutoff=1
k1 = (DEG$P.Value < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$P.Value < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
head(DEG)
limma_voom_DEG <- DEG
write.csv(limma_voom_DEG,file = "~/rnaseq/limma_out/limma_voom_DEG.csv")
分别得到三款软件计算的上调、下调以及既不上调也不下调的基因数
tj = data.frame(deseq2 = as.integer(table(DESeq2_DEG$change)),
edgeR = as.integer(table(edgeR_DEG$change)),
limma_voom = as.integer(table(limma_voom_DEG$change)),
row.names = c("down","not","up")
);
绘制韦恩图
UP=function(df){
rownames(df)[df$change=="UP"]
}
DOWN=function(df){
rownames(df)[df$change=="DOWN"]
}
up = intersect(intersect(UP(DESeq2_DEG),UP(edgeR_DEG)),UP(limma_voom_DEG))
write.csv(up,file="./up.csv")
down= intersect(intersect(DOWN(DESeq2_DEG),DOWN(edgeR_DEG)),DOWN(limma_voom_DEG))
write.csv(down,file="./down.csv")
上调基因画维恩图
up_DESeq2<-UP(DESeq2_DEG)
write.csv(up_DESeq2,file="./up_DESeq2.csv")
up_edgeR<-UP(edgeR_DEG)
write.csv(up_edgeR,file="./up_edgeR.csv")
up_limma_voom<-UP(limma_voom_DEG)
write.csv(up_limma_voom,file="./up_limma_voom.csv")
提前手动删除以下csv文件的序号列
df1<-read.csv("up_limma_voom.csv",header = T,stringsAsFactors = F)
df2<-read.csv("up_edgeR.csv",header = T,stringsAsFactors = F)
df3<-read.csv("up_DESeq2.csv",header = T,stringsAsFactors = F)
library(ggvenn)
x<-list(limma_voom=df1$x,edgeR=df2$x,DESeq2=df3$x)
ggvenn(x,c("limma_voom","edgeR","DESeq2"),set_name_size = 3,fill_alpha = 1,text_size = 3)
下调基因画韦恩图
down_DESeq2<-DOWN(DESeq2_DEG)
write.csv(down_DESeq2,file="./down_DESeq2.csv")
down_edgeR<-DOWN(edgeR_DEG)
write.csv(down_edgeR,file="./down_edgeR.csv")
down_limma_voom<-DOWN(limma_voom_DEG)
write.csv(down_limma_voom,file="./down_limma_voom.csv")
提前手动删除以下csv文件的序号列
df4<-read.csv("down_limma_voom.csv",header = T,stringsAsFactors = F)
df5<-read.csv("down_edgeR.csv",header = T,stringsAsFactors = F)
df6<-read.csv("down_DESeq2.csv",header = T,stringsAsFactors = F)
library(ggvenn)
x<-list(limma_voom=df4$x,edgeR=df5$x,DESeq2=df6$x)
ggvenn(x,c("limma_voom","edgeR","DESeq2"),set_name_size = 3,fill_alpha = 1,text_size = 3)