signafiture
0.R包
rm(list=ls())
options(stringsAsFactors = F)
project='TCGA_KIRC'
library(maftools)
library(dplyr)
library(pheatmap)
library(ComplexHeatmap)
library(stringr)
library(deconstructSigs)
library(BSgenome)
library(BSgenome.Hsapiens.UCSC.hg38)
1.读取突变数据maf
是和上一篇一样的数据,从gdc下载的maf文件和临床信息
laml = read.maf(maf = "TCGA.KIRC.mutect.2a8f2c83-8b5e-4987-8dbf-01f7ee24dc26.DR-10.0.somatic.maf.gz")
#> -Reading
#> -Validating
#> -Silent variants: 8383
#> -Summarizing
#> --Mutiple centers found
#> BI;BCM--Possible FLAGS among top ten genes:
#> TTN
#> MUC16
#> HMCN1
#> -Processing clinical data
#> --Missing clinical data
#> -Finished in 3.345s elapsed (3.050s cpu)
laml
#> An object of class MAF
#> ID summary Mean Median
#> 1: NCBI_Build GRCh38 NA NA
#> 2: Center BI;BCM NA NA
#> 3: Samples 336 NA NA
#> 4: nGenes 9444 NA NA
#> 5: Frame_Shift_Del 1732 5.155 4
#> 6: Frame_Shift_Ins 1201 3.574 1
#> 7: In_Frame_Del 238 0.708 0
#> 8: In_Frame_Ins 350 1.042 0
#> 9: Missense_Mutation 12997 38.682 36
#> 10: Nonsense_Mutation 1259 3.747 2
#> 11: Nonstop_Mutation 18 0.054 0
#> 12: Splice_Site 490 1.458 1
#> 13: Translation_Start_Site 25 0.074 0
#> 14: total 18310 54.494 47
mut=laml@data
mut[1:4,1:2]
#> Hugo_Symbol Entrez_Gene_Id
#> 1: CTPS1 1503
#> 2: TRIM33 51592
#> 3: NUP133 55746
#> 4: GRHL1 29841
mut=mut[mut$Variant_Type=='SNP',]#只要单个位点的突变
#每行记录了一个突变,所以统计样本列的行数即为样本的突变数量
plot(table(mut[,16]),las = 2)
2.制作signatures的输入数据(96频谱)
关于什么是mutation
signature:http://www.bio-info-trainee.com/1619.html
关于96频谱:http://www.biotrainee.com/thread-832-1-1.html
a = mut[,c(16,5,6,12,13)]
colnames(a)=c( "Sample","chr", "pos","ref", "alt")
a$Sample=as.character(a$Sample)
sigs.input <- mut.to.sigs.input(mut.ref = a,
sample.id = "Sample",
chr = "chr",
pos = "pos",
ref = "ref",
alt = "alt",
bsg = BSgenome.Hsapiens.UCSC.hg38)
#> Warning in mut.to.sigs.input(mut.ref = a, sample.id = "Sample", chr = "chr", : Some samples have fewer than 50 mutations:
#> TCGA-A3-3383-01A-01D-0966-08, TCGA-CJ-4908-01A-01D-1429-08, TCGA-CZ-5469-01A-01D-1501-10, TCGA-CZ-5986-01A-11D-1669-08, TCGA-A3-3365-01A-01D-0966-08, TCGA-CJ-4903-01A-01D-1429-08, TCGA-CJ-6032-01A-11D-1669-08, TCGA-B0-5116-01A-02D-1421-08, TCGA-G6-A8L7-01A-11D-A36X-10, TCGA-BP-5191-01A-01D-1429-08, TCGA-B8-5158-01A-01D-1421-08, TCGA-AK-3443-01A-01D-0966-08, TCGA-BP-4988-01A-01D-1462-08, TCGA-CZ-5984-01A-11D-1669-08, TCGA-DV-5566-01A-01D-1534-10, TCGA-BP-5202-01A-02D-1429-08, TCGA-DV-5568-01A-01D-1534-10, TCGA-CW-5589-01A-01D-1534-10, TCGA-B0-5400-01A-01D-1501-10, TCGA-BP-5184-01A-01D-1429-08, TCGA-BP-5181-01A-01D-1429-08, TCGA-DV-5575-01A-01D-1534-10, TCGA-BP-4965-01A-01D-1462-08, TCGA-BP-5009-01A-01D-1462-08, TCGA-B8-A54J-01A-11D-A33K-10, TCGA-BP-5187-01A-01D-1429-08, TCGA-CZ-5466-01A-01D-1501-10, TCGA-B8-5545-01A-01D-1669-08, TCGA-B8-5165-01A-01D-1421-08, TCGA-B8-A54K-01A-11D-A33K-10, TCGA-CW-5581-01A-02D-1534-10, TCGA-B0-5709-01A-11D-1534-10, TCGA-CW-6097-01A-11D-1669-08, TCGA-MM-A563-01A-11D-A25V-10, TCGA-T7-A92I-01A-11D-A36X-10, TCGA-B8-A8YJ-01A-13D-A38X-10, TCGA-CZ-5452-01A-01D-1501-10, TCGA-B8-4153-01B-11D-1669-08, TCGA-BP-5196-01A-01D-1429-08, TCGA-BP-5008-01A-01D-1462-08, TCGA-DV-5576-01A-01D-1534-10, TCGA-A3-3323-01A-01D-0966-08, TCGA-BP-5169-01A-01D-1429-08, TCGA-CZ-5470-01A-01D-1501-10, TCGA-BP-4968-01A-01D-1462-08, TCGA-B0-5081-01A-01D-1462-08, TCGA-BP-4801-01A-02D-1421-08, TCGA-DV-A4W0-01A-11D-A25V-10, TCGA-B8-A54I-01A-21D-A33K-10, TCGA-B2-5636-01A-02D-1534-10, TCGA-DV-5573-01A-01D-1534-10, TCGA-BP-5180-01A-01D-1429-08, TCGA-A3-3376-01A-02D-1421-08, TCGA-B0-5113-01A-01D-1421-08, TCGA-CJ-5676-01A-11D-1534-10, TCGA-B0-5110-01A-01D-1421-08, TCGA-BP-4991-01A-01D-1462-08, TCGA-BP-5186-01A-01D-1429-08, TCGA-CW-6087-01A-11D-1669-08, TCGA-B0-5711-01A-11D-1669-08, TCGA-B4-5832-01A-11D-1669-08, TCGA-CJ-4923-01A-01D-1429-08, TCGA-B4-5836-01A-11D-1669-08, TCGA-B2-4102-01A-02D-1386-10, TCGA-B0-5700-01A-11D-1534-10, TCGA-B0-5108-01A-01D-1421-08, TCGA-CJ-5671-01A-11D-1534-10, TCGA-BP-4967-01A-01D-1462-08, TCGA-B8-5549-01A-01D-1534-10, TCGA-CZ-5988-01A-11D-1669-08, TCGA-CJ-4904-01A-02D-1429-08, TCGA-CJ-4916-01A-01D-1429-08, TCGA-B0-4842-01A-02D-1421-08, TCGA-A3-3319-01A-01D-0966-08, TCGA-6D-AA2E-01A-11D-A36X-10, TCGA-CJ-4913-01A-01D-1429-08, TCGA-B8-A54G-01A-11D-A25V-10, TCGA-CJ-5675-01A-11D-1534-10, TCGA-B0-5693-01A-11D-1534-10, TCGA-CJ-4907-01A-01D-1429-08, TCGA-B8-4148-01A-02D-1386-10, TCGA-CW-5583-01A-02D-1534-10, TCGA-A3-3385-01A-02D-1421-08, TCGA-A3-A6NL-01A-11D-A33K-10, TCGA-AK-3440-01A-01D-0966-08, TCGA-BP-5175-01A-01D-1429-08, TCGA-BP-4975-01A-01D-1462-08, TCGA-BP-4987-01A-01D-1462-08, TCGA-B4-5834-01A-11D-1669-08, TCGA-AS-3778-01A-01D-0966-08, TCGA-B2-5633-01A-01D-A270-10, TCGA-DV-5565-01A-01D-1534-10, TCGA-DV-5569-01A-01D-1534-10, TCGA-B0-5084-01A-01D-1462-08, TCGA-B8-5159-01A-01D-1421-08, TCGA-CZ-5455-01A-01D-1501-10, TCGA-AK-3455-01A-01D-0966-08, TCGA-BP-5200-01A-01D-1429-08, TCGA-BP-5170-01A-01D-1429-08, TCGA-CJ-5678-01A-11D-1534-10, TCGA-EU-5907-01A-11D-1669-08, TCGA-A3-3380-01A-01D-0966-08, TCGA-BP-4998-01A-01D-1462-08, TCGA-CJ-6028-01A-11D-1669-08, TCGA-CJ-4899-01A-01D-1462-08, TCGA-CJ-4902-01A-01D-1429-08, TCGA-A3-A8OX-01A-11D-A36X-10, TCGA-B8-5553-01A-01D-1534-10, TCGA-BP-4177-01A-02D-1421-08, TCGA-EU-5905-01A-11D-1669-08, TCGA-CZ-5985-01A-11D-1669-08, TCGA-CW-5588-01A-01D-1534-10, TCGA-B0-5107-01A-01D-1421-08, TCGA-AK-3465-01A-01D-0966-08, TCGA-B0-5102-01A-01D-1421-08, TCGA-B0-5694-01A-11D-1534-10, TCGA-BP-4960-01A-01D-1462-08, TCGA-CZ-5982-01A-11D-1669-08, TCGA-A3-3374-01A-01D-0966-08, TCGA-CJ-4900-01A-01D-1462-08, TCGA-CZ-5987-01A-11D-1669-08, TCGA-B0-5696-01A-11D-1534-10, TCGA-B0-5083-01A-02D-1421-08, TCGA-BP-4981-01A-01D-1462-08, TCGA-B4-5844-01A-11D-1669-08, TCGA-BP-4961-01A-01D-1462-08, TCGA-B8-5162-01A-01D-1421-08, TCGA-B0-5104-01A-01D-1421-08, TCGA-BP-5201-01A-01D-1429-08, TCGA-B0-5120-01A-01D-1421-08, TCGA-B0-5402-01A-01D-1501-10, TCGA-B2-5635-01A-01D-A270-10, TCGA-B8-A7U6-01A-12D-A36X-10, TCGA-BP-5189-01A-02D-1429-08, TCGA-CZ-5460-01A-01D-1501-10, TCGA-B2-4101-01A-02D-1458-08, TCGA-B8-A54E-01A-11D-A25V-10, TCGA-AK-3453-01A-01D-0966-08, TCGA-BP-5010-01A-02D-1421-08, TCGA-DV-A4VZ-01A-11D-A25V-10, TCGA-B4-5843-01A-11D-1669-08, TCGA-B2-A4SR-01A-11D-A25V-10, TCGA-A3-A8CQ-01A-11D-A36X-10, TCGA-BP-5000-01A-01D-1462-08, TCGA-BP-4962-01A-01D-1462-08, TCGA-CZ-4863-01A-01D-1501-10, TCGA-BP-4982-01A-01D-1462-08, TCGA-B8-4151-01A-01D-1806-10, TCGA-CW-5591-01A-01D-1534-10, TCGA-BP-5006-01A-01D-1462-08, TCGA-A3-3317-01A-01D-0966-08, TCGA-B0-5077-01A-01D-1462-08, TCGA-CZ-5456-01A-01D-1501-10, TCGA-A3-3316-01A-01D-0966-08, TCGA-CZ-5989-01A-11D-1669-08, TCGA-CZ-5462-01A-01D-1501-10, TCGA-A3-3326-01A-01D-0966-08, TCGA-A3-3378-01A-01D-0966-08, TCGA-B0-4700-01A-02D-1534-10, TCGA-BP-5001-01A-01D-1462-08, TCGA-CJ-4905-01A-02D-1429-08, TCGA-BP-5004-01A-01D-1462-08, TCGA-A3-3373-01A-02D-1421-08, TCGA-B0-5691-01A-11D-1534-10, TCGA-A3-3367-01A-02D-1421-08, TCGA-B8-5163-01A-01D-1421-08, TCGA-BP-4971-01A-01D-1462-08, TCGA-BP-5178-01A-01D-1429-08, TCGA-AK-3427-01A-01D-0966-08, TCGA-BP-4992-01A-01D-1462-08, TCGA-BP-4986-01A-01D-1462-08, TCGA-BP-5194-01A-02D-1429-08, TCGA-BP-5192-01A-01D-1429-08, TCGA-BP-4970-01A-01D-1462-08, TCGA-CJ-6031-01A-11D-1669-08, TCGA-B0-5695-01A-11D-1534-10, TCGA-A3-3363-01A-01D-0966-08, TCGA-B8-5551-01A-01D-1534-10, TCGA-BP-4760-01A-02D-1421-08, TCGA-B0-4945-01A-01D-1421-08, TCGA-B0-5121-01A-02D-1421-08, TCGA-BP-4977-01A-01D-1462-08, TCGA-B0-5697-01A-11D-1534-10, TCGA-BP-5190-01A-01D-1429-08, TCGA-B0-5097-01A-01D-1421-08, TCGA-B8-A54F-01A-11D-A25V-10, TCGA-B0-5812-01A-11D-1669-08, TCGA-MW-A4EC-01A-11D-A25V-10, TCGA-B8-4146-01B-11D-1669-08, TCGA-B8-4622-01A-02D-1553-08, TCGA-B0-5707-01A-11D-1534-10, TCGA-B0-5699-01A-11D-1534-10, TCGA-CZ-5458-01A-01D-1501-10, TCGA-CJ-5683-01A-11D-1534-10, TCGA-BP-4972-01A-01D-1462-08, TCGA-CZ-5454-01A-01D-1501-10, TCGA-BP-4995-01A-01D-1462-08, TCGA-B0-5100-01A-01D-1421-08, TCGA-B8-A54D-01A-21D-A25V-10, TCGA-B2-5639-01A-01D-1534-10, TCGA-DV-5567-01A-01D-1534-10, TCGA-B0-5710-01A-11D-1669-08, TCGA-BP-4795-01A-02D-1421-08, TCGA-BP-4973-01A-01D-1462-08, TCGA-CW-5585-01A-01D-1534-10, TCGA-B8-5552-01B-11D-1669-08, TCGA-CJ-5684-01A-11D-1534-10, TCGA-A3-A8OW-01A-11D-A36X-10, TCGA-BP-5177-01A-01D-1429-08, TCGA-CZ-5467-01A-01D-1501-10, TCGA-B4-5377-01A-01D-1501-10, TCGA-A3-3322-01A-01D-0966-08, TCGA-AS-3777-01A-01D-0966-08, TCGA-A3-A6NI-01A-11D-A33K-10, TCGA-BP-4999-01A-01D-1462-08, TCGA-A3-3370-01A-02D-1421-08, TCGA-CZ-5463-01A-01D-1501-10, TCGA-B0-5109-01A-02D-1421-08, TCGA-EU-5906-01A-11D-1669-08, TCGA-BP-5007-01A-01D-1462-08, TCGA-B0-5399-01A-01D-1501-10, TCGA-B0-5692-01A-11D-1534-10, TCGA-CJ-4901-01A-01D-1429-08, TCGA-A3-3311-01A-01D-0966-08, TCGA-A3-A6NJ-01A-12D-A33K-10, TCGA-G6-A8L6-01A-11D-A36X-10, TCGA-G6-A5PC-01A-11D-A33K-10, TCGA-B8-5546-01A-01D-1534-10, TCGA-BP-5183-01A-01D-1429-08, TCGA-CJ-5680-01A-11D-1534-10, TCGA-DV-5574-01A-01D-1534-10, TCGA-EU-5904-01A-11D-1669-08, TCGA-B0-5706-01A-11D-1534-10, TCGA-BP-4974-01A-01D-1462-08, TCGA-B0-5115-01A-01D-1421-08, TCGA-CW-5587-01A-01D-1534-10, TCGA-BP-5174-01A-01D-1429-08, TCGA-CJ-5681-01A-11D-1534-10, TCGA-BP-4993-01A-02D-1421-08, TCGA-CJ-5686-01A-11D-1669-08, TCGA-B0-5117-01A-01D-1421-08
class(sigs.input)
#> [1] "data.frame"
#第一个样本的突变绘图
barplot(as.numeric(sigs.input[1,])~colnames(sigs.input))
sigs.input[1:4,1:4]
#> A[C>A]A A[C>A]C A[C>A]G A[C>A]T
#> TCGA-A3-3383-01A-01D-0966-08 2 0 0 0
#> TCGA-CJ-4920-01A-01D-1429-08 0 1 0 2
#> TCGA-CJ-4908-01A-01D-1429-08 0 2 0 0
#> TCGA-CZ-5469-01A-01D-1501-10 2 0 1 0
一顿操作猛如虎,生成signature与样本对应关系的矩阵
if(!file.exists(paste0(project,"w.Rdata"))){
w=lapply(unique(a$Sample), function(i){
## signatures.cosmic signatures.nature2013
sample_1 = whichSignatures(tumor.ref = sigs.input,
signatures.ref = signatures.cosmic,
sample.id = i,
contexts.needed = TRUE,
tri.counts.method = 'default')
print(c(i,which(unique(a$Sample)==i)))
return(sample_1$weights)
})
w=do.call(rbind,w)
save(w,file = paste0(project,"w.Rdata"))
}
load(paste0(project,"w.Rdata"))
mat = t(w)
3.可视化
矩阵的可视化–热图
3.1 简单版本的热图
pheatmap::pheatmap(mat,
cluster_rows = F,
cluster_cols = T,
show_colnames = F)
3.2 好看一点的热图
看到了这个图,氮素不知道用什么包画的,反正闲的没事干,不如自己模仿一波。如果你知道用的什么包,可以告诉我~感谢
热图上面放直方图,直方图内容是每个样本的突变数量。想到了ComplexHeatmap,可以将直方图作为注释添加上去。
难点是调整顺序,R语言神技能加持。
捋一捋:
直方图横纵坐标是样本和突变数量 热图输入数据mat横纵坐标是样本和signature 注释栏来源于clinical数据,与样本一一对应。 所以需要调整样本顺序,三者统一。 注释栏简化一下,用stage、生死和性别;先排一下序,再按照排好的顺序给mat和直方图输入数据排序。
load("KIRC_pd.Rdata")
head(pd)
#> Tumor_Sample_Barcode stage_event gender vital_status
#> 482 TCGA-A3-3383-01A-01D-0966-08 I MALE Alive
#> 438 TCGA-CJ-4920-01A-01D-1429-08 I FEMALE Dead
#> 142 TCGA-CJ-4908-01A-01D-1429-08 I MALE Alive
#> 311 TCGA-CZ-5469-01A-01D-1501-10 II MALE Dead
#> 234 TCGA-CZ-5986-01A-11D-1669-08 I MALE Alive
#> 202 TCGA-A3-3365-01A-01D-0966-08 I MALE Alive
#调整pd的行顺序
pd = arrange(pd,stage_event,gender,vital_status)
#mat与pd一一对应
mat = mat[,match(pd$Tumor_Sample_Barcode,colnames(mat))]
#只取一部分signiture来画
s = head(names(sort(rowSums(mat),decreasing = T)),8)
mat = mat[s,]
#顶部直方图
for_bar = table(mut$Tumor_Sample_Barcode)
for_bar = for_bar[match(colnames(mat),names(for_bar))]
for_bar = as.integer(for_bar)
annotation = HeatmapAnnotation(
mut = anno_barplot(for_bar),
df = pd[,-1])
ht_list = Heatmap(mat, name = "mat",
cluster_rows = F,
cluster_columns = F,
show_column_names = F,
column_split = pd$stage_event,
top_annotation = annotation)
draw(ht_list, heatmap_legend_side = "left", annotation_legend_side = "bottom")
*生信技能树课程笔记