芯片数据差异分析,常规用limma进行差异分析,而对于RNA-seq数据,常用edgeR、DEseq2和limma包数据分析,但数据数据必须是counts数据。如果是FPKM和RPKM数据呢?
FPKM或RPKM数据(未经log转换),直接用t检验或非参数检验
,下面就大致总结下这个过程,以备不时只需:
rt <- as.matrix(rt)
rt <- na.omit(rt)
rownames(rt) <- trimws(rt[,1]) #去掉行名前后的空格
exp <- rt[,2:ncol(rt)]
dimnames <- list(rownames(exp),colnames(exp))
data <- matrix(as.numeric(as.matrix(exp)),nrow = nrow(exp),dimnames = dimnames)
data <- avereps(data)
data <- normalizeBetweenArrays(data)
data <- data[,group_list$id]
data <- data[colSums(data)>0.1*ncol(data),]
data <- log2(data+1)
newGeneLists=c()
outTab=data.frame()
conNum=4
treatNum=18
grade=c(rep(1,conNum),rep(2,conNum))
#差异分析
for(i in row.names(data)){
rt=rbind(expression=data[i,],grade=grade)
rt=as.matrix(t(rt))
wilcoxTest<-wilcox.test(expression ~ grade, data=rt)
conGeneMeans=mean(data[i,1:conNum])
treatGeneMeans=mean(data[i,(conNum+1):ncol(data)])
logFC=log2(treatGeneMeans)-log2(conGeneMeans)
pvalue=wilcoxTest$p.value
conMed=median(data[i,1:conNum])
treatMed=median(data[i,(conNum+1):ncol(data)])
diffMed=treatMed-conMed
outTab=rbind(outTab,cbind(gene=i,conMean=conGeneMeans,treatMean=treatGeneMeans,logFC=logFC,pValue=pvalue))
newGeneLists=c(newGeneLists,i)
}
outTab <- outTab %>%
arrange(logFC)