参考https://zhuanlan.zhihu.com/p/375756362
从原始counts表达矩阵导入到差异基因导出。
1. 安装加载DEseq2包
rm(list = ls())
# BiocManager::install('DESeq2')
library('DESeq2')
2. 载入数据
count <- read.csv(file = "../gene_sample_count_with_symbol.csv",header = T,row.names = 1)
head(count)
> head(count)
GFP3 GFP4 NLS_10 NLS_5
ENSMUSG00000028180 2572 2916 2484 3173
ENSMUSG00000028182 26 23 13 20
ENSMUSG00000028185 5 0 0 0
ENSMUSG00000028184 613 463 646 879
ENSMUSG00000028187 973 765 815 1067
ENSMUSG00000028186 191 5 7 11
3.构建分组矩阵
group <- read.csv(file = "../group.csv",header = T,row.names = 1)
as.factor(group$Group)
> group
Group
GFP3 AAVGFP
GFP4 AAVGFP
NLS_10 AAVNLS
NLS_5 AAVNLS
4.制作dds对象,构建差异基因分析所需的数据格式
dds <- DESeqDataSetFromMatrix(countData=count, colData=group, design=~Group)
5.进行差异分析及ID转换
dds <- DESeq(dds)
result <- as.data.frame(results(dds)) # results()从DESeq分析中提取出结果表
result$ENSEMBL <- rownames(result)
ens <- rownames(result)
#加载clusterProfiler
library(clusterProfiler)
ens2s = bitr(ens, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Mm.eg.db")
ens2s
result <- merge(result,ens2s,by="ENSEMBL")
6.增加上调和下调阈值
result$type= ifelse(result$pvalue < 0.05 & result$log2FoldChange > 0.59,"up",
(ifelse(result$pvalue < 0.05 & result$log2FoldChange < -0.59,"down","not")))
write.csv(result,'result1.csv',row.names = TRUE)
7.提取显著差异表达基因的矩阵
DGE <-subset(result,pvalue < 0.05 & (log2FoldChange > 0.59 | log2FoldChange < -0.59))
DEG <- DGE[order(DGE$log2FoldChange),]
write.csv(DEG,'DEG.csv',row.names = TRUE)