单细胞转录组之拟时序分析

1.数据的准备

1.1选择想要进行时间轨迹分析的seurat亚群,，并将亚群提取出来

#走一遍seurat标准流程后，得到数据，提取亚群
table( Idents(sce ))
table(sce@meta.data$seurat_clusters) 
table(sce@meta.data$orig.ident) 

# 取子集
levels(Idents(sce))
sce = sce[, Idents(sce) %in% c( "FCGR3A+ Mono", "CD14+ Mono"  )] 
#sce[基因,细胞]
levels(Idents(sce))

markers_df <- FindMarkers(object = sce, ident.1 = 'FCGR3A+ Mono',ident.2 = 'CD14+ Mono', #logfc.threshold = 0,min.pct = 0.25)
head(markers_df)
挑选差异基因
cg_markers_df=markers_df[abs(markers_df$avg_log2FC) >1,]

1.2 创建CellDataSet对象及monocle标准流程

详见此链接：https://www.jianshu.com/p/34c23dbd9dc1

创建CellDataSet对象

library(monocle)

sample_ann <-  sce@meta.data  

sample_ann$celltype=Idents(sce)

#准备newCellDataSet函数的输入文件
gene_ann <- data.frame(gene_short_name = rownames(sce@assays$RNA,  row.names =  rownames(sce@assays$RNA))
pd <- new("AnnotatedDataFrame",
          data=sample_ann)
fd <- new("AnnotatedDataFrame",
          data=gene_ann)
ct=as.data.frame(sce@assays$RNA@counts)

sc_cds <- newCellDataSet(as.matrix(ct), phenoData = pd,featureData =fd,expressionFamily = negbinomial.size(),lowerDetectionLimit=1)

monocle标准流程

sc_cds <- detectGenes(sc_cds, min_expr = 1) 

sc_cds <- sc_cds[fData(sc_cds$num_cells_expressed > 10, ]

cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds) 

disp_table <- dispersionTable(cds)

unsup_clustering_genes <- subset(disp_table,mean_expression >= 0.1)

cds <- setOrderingFilter(cds,unsup_clustering_genes$gene_id)

cds <- reduceDimension(cds, max_components = 2, num_dim = 6,reduction_method = 'tSNE', verbose = T)

cds <- clusterCells(cds, num_clusters = 6)

2.查找差异基因及时间轨迹分析

2.1 查找差异基因

根据自己生物学意图，选择查看哪个性状的轨迹

将monocle的分群改为seurat分群
pData(cds)$Cluster=pData(cds)$celltype
table(pData(cds1)$Cluster)

#测试每个基因作为伪时间函数或根据指定的其他协变量的差异表达。fullModelFormulaStr 选择按照什么找差异
diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~Cluster")

# 选择 FDR < 10% 的基因作为差异基因
sig_genes <- subset(diff_test_res, qval < 0.1)

sig_genes<-sig_genes[order(sig_genes$pval),]

head(sig_genes[,c("gene_short_name", "pval", "qval")] ) 

cg=as.character(head(sig_genes$gene_short_name)) 
#  挑选差异最显著的基因可视化，将一个或多个基因的表达绘制点图

plot_genes_jitter(cds[cg,], grouping = "Cluster", color_by = "Cluster",nrow= 3,ncol = NULL )

cg2=as.character(tail(sig_genes$gene_short_name)) 
plot_genes_jitter(cds[cg2,],grouping = "Cluster",color_by = "Cluster",nrow= 3,ncol = NULL )

2.2 时间轨迹分析

第一步: 挑选合适的基因. 有多个方法，例如提供已知的基因集，这里选取统计学显著的差异基因列表。

ordering_genes <- row.names (subse(diff_test_res, qval < 0.01))

ordering_genes

#准备聚类基因名单
cds <- setOrderingFilter(cds, ordering_genes)
plot_ordering_genes

第二步: 降维。降维的目的是为了更好的展示数据。函数里提供了很多种方法,不同方法的最后展示的图都不太一样, 其中“DDRTree”是Monocle2使用的默认方法。

cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')

第三步: 对细胞进行排序学习描述细胞正在经历的生物过程的“轨迹”，并计算每个细胞在该轨迹内的位置。

cds <- orderCells(cds)

最后: 可视化
注意，伪时序的解读需要结合生物学意义

plot_cell_trajectory(cds, color_by = "Cluster")  

#绘制一个或多个基因的表达作为伪时序。
plot_genes_in_pseudotime(cds[cg,],color_by = "Cluster")

前面根据差异基因，推断好了拟时序，也就是说把差异基因动态化了，后面就可以具体推断哪些基因随着拟时序如何的变化

my_cds_subset=cds
# 拟时序数据和细胞位置在pData 中
head(pData(my_cds_subset))

# 这个differentialGeneTest会比较耗费时间，测试每个基因的拟时序表达
my_pseudotime_de <- differentialGeneTest(my_cds_subset,fullModelFormulaStr = "~sm.ns(Pseudotime)",cores = 4 )#cores调用的核心数

head(my_pseudotime_de)

3.分析结果的精致可视化

library(Seurat)
library(gplots)
library(ggplot2)
library(monocle)
library(dplyr)

cds=my_cds_subset
phe=pData(cds)
colnames(phe)
library(ggsci)

#绘制最小生成树
p1=plot_cell_trajectory(cds, color_by = "Cluster")  + scale_color_nejm() 
p1
ggsave('trajectory_by_cluster.pdf')
plot_cell_trajectory(cds, color_by = "celltype")  

p2=plot_cell_trajectory(cds, color_by = "Pseudotime")  
p2
ggsave('trajectory_by_Pseudotime.pdf')

p3=plot_cell_trajectory(cds, color_by = "State")  + scale_color_npg()
p3
ggsave('trajectory_by_State.pdf')
library(patchwork)#拼图
p1+p2/p3

以qval前六的基因做图

phe=pData(cds)
head(phe)
table(phe$State,phe$Cluster) 

library(dplyr)  
#%>% 管道函数 把左边的值发送给右边的表达式，并作为右件表达式函数的第一个参数
my_pseudotime_de %>% arrange(qval) %>% head() 

# 保存前六的基因
my_pseudotime_de %>% arrange(qval) %>% head() %>% select(gene_short_name) -> my_pseudotime_gene
my_pseudotime_gene=my_pseudotime_gene[,1]
my_pseudotime_gene

#绘制一个或多个基因的拟时序
plot_genes_in_pseudotime(my_cds_subset[my_pseudotime_gene,])+ scale_color_npg()
ggsave('monocle_top6_pseudotime_by_state.pdf')

将一个或多个基因的表达绘制点图

plot_genes_jitter(my_cds_subset[my_pseudotime_gene,],grouping = "Cluster",color_by = "Cluster", nrow= 3,ncol = NULL )+ scale_color_nejm()
ggsave('monocle_top6_pseudotime_by_cluster.pdf')

将前50个随拟时序变化的基因做聚类热图

# cluster the top 50 genes that vary as a function of pseudotime
my_pseudotime_de %>% arrange(qval) %>% head(50) %>% select(gene_short_name) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster[,1]
gene_to_cluster
colnames(pData(my_cds_subset))
table(pData(my_cds_subset)$Cluster,pData(my_cds_subset)$State) 
ac=pData(my_cds_subset)[c('celltype','State','Pseudotime')]
head(ac)

# 这个热图绘制的并不是纯粹的细胞基因表达量矩阵，而是被 Pseudotime 好了的100列，50行的矩阵

my_pseudotime_cluster <- plot_pseudotime_heatmap(my_cds_subset[gene_to_cluster,],# num_clusters = 2, # add_annotation_col = ac,show_rownames = TRUE,
return_heatmap = TRUE)

my_pseudotime_cluster

pdf('monocle_top50_heatmap.pdf')
print(my_pseudotime_cluster)
dev.off()

分支表达分析建模识别具有分支依赖性表达的基因。

#计算建模分支节点
BEAM_branch1 <- BEAM(my_cds_subset, branch_point = 1, cores = 4)

head(BEAM_branch1)
colnames(BEAM_branch1)

BEAM_branch1 <- BEAM_branch1[order(BEAM_branch1$qval),]

BEAM_branch1 <- BEAM_branch1[,c("gene_short_name", "pval", "qval")]
head(BEAM_branch1) 
  
BEAM_branch2 <- BEAM(my_cds_subset, branch_point = 2, cores = 20)

BEAM_branch2 <- BEAM_branch2[order(BEAM_branch2$qval),]
BEAM_branch2 <- BEAM_branch2[,c("gene_short_name", "pval", "qval")]
head(BEAM_branch2)

使用全部的基因进行绘图创建一个热图来展示基因表达沿两个分支的分叉

BEAM_res = BEAM_branch1
my_branched_heatmap <- plot_genes_branched_heatmap( my_cds_subset[row.names(subset(BEAM_res, qval < 1e-4)),], branch_point = 1,num_clusters = 4, e_short_name = TRUE,show_rownames = F,return_heatmap = TRUE)

pdf('monocle_BEAM_branch1_heatmap.pdf')
print(my_branched_heatmap$ph)
dev.off()

BEAM_res = BEAM_branch2

my_branched_heatmap <- plot_genes_branched_heatmap(my_cds_subset[row.names(subset(BEAM_res, qval < 1e-4)),],branch_point = 1,num_clusters = 4, use_gene_short_name = TRUE,show_rownames = F,return_heatmap = TRUE)

pdf('monocle_BEAM_branch2_heatmap.pdf')
print(my_branched_heatmap$ph)
dev.off()

#将所做热图的基因和cluster提取出来
head(my_branched_heatmap$annotation_row)
table(my_branched_heatmap$annotation_row$Cluster) 
my_row <- my_branched_heatmap$annotation_row
my_row <- data.frame(cluster = my_row$Cluster,
                     gene = row.names(my_row),
                     stringsAsFactors = FALSE)

head(my_row[my_row$cluster == 3,'gene']) 

my_gene <- row.names(subset(fData(my_cds_subset),
                            gene_short_name %in% head(my_row[my_row$cluster == 2,'gene'])))
my_gene

# 绘制分支处的基因拟时序轨迹
#分支1
plot_genes_branched_pseudotime(my_cds_subset[my_gene,],
                               branch_point = 1,
                               ncol = 1)
#分支2
plot_genes_branched_pseudotime(my_cds_subset[my_gene,],
                               branch_point = 2,
                               ncol = 1)

分支1

分支2

做热图查看拟时序基因在两个亚群的表达

cds=my_cds_subset
phe=pData(cds)
colnames(phe)
plot_cell_trajectory(cds)
counts = Biobase::exprs(cds)
dim(counts)

library(dplyr) 
my_pseudotime_de %>% arrange(qval) %>% head(100) %>% select(gene_short_name) -> my_pseudotime_gene
my_pseudotime_gene=my_pseudotime_gene[,1]
my_pseudotime_gene


library(pheatmap)
#数据中心化和归一化
n=t(scale(t( counts[my_pseudotime_gene,] ))) # 'scale'可以对log-ratio数值进行归一化
n[n>2]=2 
n[n< -2]= -2
n[1:4,1:4]
pheatmap(n,show_colnames =F,show_rownames = F)
ac=phe[,c(10,16,17)]
head(ac)
rownames(ac)=colnames(n)
dim(n)
n[1:4,1:4]
pheatmap(n,show_colnames =F,
         show_rownames = F,
         annotation_col=ac)
od=order(ac$Pseudotime)
pheatmap(n[,od],show_colnames =F,
         show_rownames = F,cluster_cols = F,
         annotation_col=ac[od,])

参考来源

#section 3已更新#「生信技能树」单细胞公开课2021_哔哩哔哩_bilibili

致谢

I thank Dr.Jianming Zeng(University of Macau), and all the members of his bioinformatics team, biotrainee, for generously sharing their experience and codes.

THE END

最后编辑于：2022.07.10 10:20:52

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 204,684评论 6赞 478
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 87,143评论 2赞 381
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 151,214评论 0赞 337
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 54,788评论 1赞 277
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 63,796评论 5赞 368
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,665评论 1赞 281
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 38,027评论 3赞 399
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,679评论 0赞 258
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 41,346评论 1赞 299
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,664评论 2赞 321
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,766评论 1赞 331
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,412评论 4赞 321
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 39,015评论 3赞 307
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,974评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 31,203评论 1赞 260
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 45,073评论 2赞 350
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 42,501评论 2赞 343