library(ballgown)
library(RSkittleBrewer)
library(genefilter)
library(dplyr)
library(devtools)
patient_data = read.csv("D:/AAA-台式机-LY-WorkSpace/Patient_data-10samples.csv")
ten_sample = ballgown(dataDir = "D:/AAA-台式机-LY-WorkSpace/ballgown-10samples", samplePattern = "sample", pData=patient_data)
#dataDir告知数据路径, samplePattern则依据样本的名字来,pheno_data则指明了样本数据的关系,
#这个里面第一列样本名需要和ballgown下面的文件夹的样本名一样,不然会报错
ten_sample_filt = subset(ten_sample, "rowVars(texpr(ten_sample))>1", genomesubset=TRUE)
#滤掉低丰度的基因,这里选择过滤掉样本间差异少于一个转录本的数据
transcript_fpkm_filt = texpr(ten_sample_filt, 'FPKM')
gene_expression_filt = gexpr(ten_sample_filt)
full_table_filt <- texpr(ten_sample_filt , 'all')
#you can get a table with all the information in your ballgown object (bg) , doing:
#full_table <- texpr(bg , 'all')
#The table will include: transcript id, chromosome, strand, start positon, end position, number of exons, length, gene id, gene name, and the FPKM values for each sample.
#Btw, I have noticed that sometimes the same gene id (MSTRG.xxxx) can correspond to more than one gene name, so don't fully rely on the gene_id as a unique identifier.
# 赋予调色板五个指定颜色
tropical= c('darkorange', 'dodgerblue','hotpink', 'limegreen', 'yellow')
palette(tropical)
# 当然rainbow()函数也可以完成这个任务
palette(rainbow(5))
# 提取FPKM值
fpkm = texpr(ten_sample,meas="FPKM")
#方便作图将其log转换,+1是为了避免出现log2(0)的情况
fpkm = log2(fpkm+1)
# 作图
boxplot(fpkm,las=2,ylab='log2(FPKM+1)')
# plotTranscripts函数可以根据指定基因的id画出在特定区段的转录本
# 可以通过sample函数指定看在某个样本中的表达情况,这里选用id=6, sample=105TRA
plotTranscripts(ballgown::geneIDs(ten_sample)[6], ten_sample, main=c('Gene MSTRG.455 in sample 105TRA'), sample=c('105TRA'))
write.csv(full_table_filt,file = "D:/AAA-台式机-LY-WorkSpace/full_table_filt.CSV")