目的
通过celltalker找出小鼠的单细胞转录组数据的受体配体互作信息,并绘制类似这样的圈图
具体用法
看如下网站:
- https://www.jianshu.com/p/177bbf7e9d67
- https://arc85.github.io/celltalker/articles/celltalker.html#celltalker
问题解决
与GSVA或者CellPhoneDB的情况相同
解决方案:像我之前构建GSVA的小鼠gmt文件的方法相同https://www.jianshu.com/p/2d659a4ec167,将 ramilowski_pairs
表示的人的受体配体的关系文件,转换成小鼠的同源基因表示形式
实际操作
数据导入
#Robin 2020/03/04
#For celltalker analysis mouse scRNA-seq Data
library(Seurat)
library(ggplot2)
library(celltalker)
library(dplyr)
setwd('/share/nas1/Data/Users/luohb/20200303_Ligand_interaction')
#Load data
mouse<-readRDS("../20200221/scaled_merge_seurat_rna.rds")
ser.obj<-mouse
用Python转换成小鼠的同源基因的受体配体关系表
在R中:
write.table(ramilowski_pairs, file='./ramilowski_pairs.xls', sep='\t', quote=F)
在Python中:
#!/usr/bin/python
#Author:Robin 20200220
#For transfer human to mouse
human_set = set()
h2m = dict()
m2m = dict()
# human gene ID to mouse Entrez ID
with open('./mart_export.txt') as f1:
for line in f1:
line = line.strip('\n')
lst = line.split('\t')
if lst[3]:
if lst[3] in human_set:
h2m[lst[3]].append(lst[0])
else:
h2m[lst[3]] = [ lst[0] ]
human_set.add(lst[3])
# mouse Entrez ID to mouse symbol ID
with open('./features.tsv') as f2:
for line in f2:
line = line.strip('\n')
lst = line.split('\t')
m2m[lst[0]] = lst[1]
# 遍历ramilowski_pairs文件每行,进行替换
with open('./ramilowski_pairs.xls') as f3:
with open('./mm.ramilowski_pairs.xls', 'w') as f4:
for line in f3:
line = line.strip('\n')
lst = line.split('\t')
ligand = lst[0]
receptor = lst[1]
mouse_pair = lst[2]
mouse_entrez_ligand_lst = h2m.get(ligand, "")
mouse_entrez_receptor_lst = h2m.get(receptor, "")
for mouse_entrez_ligand in mouse_entrez_ligand_lst:
for mouse_entrez_receptor in mouse_entrez_receptor_lst:
mouse_symbol_ligand = m2m[mouse_entrez_ligand]
mouse_symbol_receptor = m2m[mouse_entrez_receptor]
mouse_pair = mouse_symbol_ligand + "_" + mouse_symbol_receptor
line = [mouse_symbol_ligand, mouse_symbol_receptor, mouse_pair]
line = '\t'.join(line)
line = line + '\n'
print(line)
f4.write(line)
回到R中:
# 读入已经构建好的ramilowski_pairs文件
ramilowski_pairs<-read.table('./mm.ramilowski_pairs.xls', header=F, sep='\t', col.names=c('ligand','receptor','pair'))
head(ramilowski_pairs)
dim(ramilowski_pairs) #该数据集中有2,557个特异的配体/受体对
View(ramilowski_pairs)
#获取小鼠与人的同源基因的对应关系,然后用人的同源基因替换掉原本小鼠对应的基因
mart_export<-read.csv('./mart_export.txt', sep='\t', header=T)
barcode_feature<-read.csv('./features.tsv', sep='\t', header=F, col.names=c('mus_entrezID', 'mus_symbol', "type"))
expr.mat <- GetAssayData(ser.obj,slot="counts")
#在我们的数据集中识别配体和受体
ligs <- as.character(unique(ramilowski_pairs$ligand))
recs <- as.character(unique(ramilowski_pairs$receptor))
gene_list<-rownames(ser.obj)
ligs.present <- gene_list[gene_list %in% ligs]
recs.present <- gene_list[gene_list %in% recs]
genes.to.use <- union(ligs.present, recs.present) # 取并集
# 使用之前已经使用FindAllMarkers跑好了的所有cluster差异表达基因,用于区分组之间差异表达的配体和受体
Idents(ser.obj) <- ser.obj@meta.data$seurat_clusters
markers <- read.table('/share/nas1/Data/Users/yinl/Project/personality/20191227/markgene/all_sig_markgene.xls',
header=T)
marker_gene<-markers$gene
ligs.recs.use <- unique(marker_gene)
length(ligs.recs.use) #产生7648个配体和受体以进行评估
#过滤ramilowski配受对
interactions.forward1 <- ramilowski_pairs[as.character(ramilowski_pairs$ligand) %in% ligs.recs.use,]
interactions.forward2 <- ramilowski_pairs[as.character(ramilowski_pairs$receptor) %in% ligs.recs.use,]
interact.for <- rbind(interactions.forward1, interactions.forward2)
dim(interact.for)
构建celltalker
的输入文件:
#生成celltalker的输入数据
#Create data for celltalker
row_expr.mat <- GetAssayData(ser.obj,slot="counts")
expr.mat<-row_expr.mat
rownames(expr.mat)<-rownames(expr.mat)
rownames(expr.mat)<-gsub('\\.[0-9]*', '', rownames(expr.mat)) #去除版本号
defined.clusters <- ser.obj@meta.data$seurat_clusters
names(defined.clusters)<-colnames(ser.obj)
defined.groups <- ser.obj@meta.data$group
names(defined.groups)<-colnames(ser.obj)
defined.replicates <- ser.obj@meta.data$orig.ident
names(defined.replicates)<-colnames(ser.obj)
reshaped.matrices <- reshape_matrices(count.matrix=expr.mat,
clusters=defined.clusters,
groups=defined.groups,
replicates=defined.replicates,
ligands.and.receptors=interact.for)
unnest(reshaped.matrices,cols="samples")
names(pull(unnest(reshaped.matrices,cols="samples"))[[1]])
#在此初始步骤中,我们要做的是将我们的整体表达矩阵中给每个样本中分离出单独的表达矩阵。
#接下来,使用create_lig_rec_tib函数为每个组创建一组一致表达的配体和受体。
# cells.reqd=10:每个cluster中至少有10个细胞表达了配体/受体
# freq.pos.reqd=0.5:至少有50%重复个体中表达的配体/受体
consistent.lig.recs <- create_lig_rec_tib(exp.tib=reshaped.matrices,
clusters=defined.clusters,
groups=defined.groups,
replicates=defined.replicates,
cells.reqd=300,
freq.pos.reqd=0.5,
ligands.and.receptors=interact.for)
unnest(consistent.lig.recs, cols="lig.rec.exp")
pull(unnest(consistent.lig.recs[1,2],cols="lig.rec.exp")[1,2])[[1]]
#从每个实验组的每个cluster中获得了一致表达的配体和受体的列表。
生成Celltalker
对象,并绘制circos
圈图
#Determine putative ligand/receptor pairs
# freq.group.in.cluster: 只对包含细胞数大于总细胞数5%的簇进行互作分析
put.int<-putative_interactions(ligand.receptor.tibble=consistent.lig.recs,
clusters=defined.clusters,
groups=defined.groups,
freq.group.in.cluster=0.05,
ligands.and.receptors=interact.for)
# Identifying and visualizing unique ligand/receptor pairs in a group
###Identify unique ligand/receptor interactions present in each sample
unique.ints <- unique_interactions(put.int,group1="Fat",group2="Health",interact.for)
#Get data to plot circos for Fat
fat.to.plot <- pull(unique.ints[1,2])[[1]]
for.circos.fat <- pull(put.int[1,2])[[1]][fat.to.plot]
par(mar=c(1, 1, 1, 1))
p_fat<-circos_plot(interactions=for.circos.fat, clusters=defined.clusters)
#Get data to plot circos for Health
health.to.plot <- pull(unique.ints[2,2])[[1]]
for.circos.health <- pull(put.int[2,2])[[1]][health.to.plot]
p2<-circos_plot(interactions=for.circos.health, clusters=defined.clusters)
#all
to.plot <- pull(unique.ints[2])[[1]]
for.circos <- pull(put.int[2])[[1]][to.plot]
p3<-circos_plot(interactions=for.circos, clusters=defined.clusters)