1.导入数据
类似单样本,输入数据是seurat做完降维聚类分群注释的数据
rm(list = ls())
library(Seurat)
library(monocle)
library(dplyr)
load("scRNA.Rdata")
table(Idents(scRNA))
table(scRNA$orig.ident)
head(scRNA@meta.data)
DimPlot(scRNA,label = T)+NoLegend()
scRNA$celltype = Idents(scRNA)
但是table的时候每种细胞数目不一样
但是每个样本的细胞数和metadata是一样的
UMAP长这样
2.创建CellDataSet对象
# count矩阵,官方建议用count
ct <- scRNA@assays$RNA$counts
# 基因注释
gene_ann <- data.frame(
gene_short_name = row.names(ct),
row.names = row.names(ct)
)
fd <- new("AnnotatedDataFrame",
data=gene_ann)
# 临床信息
pd <- new("AnnotatedDataFrame",
data=scRNA@meta.data)
#新建CellDataSet对象
sc_cds <- newCellDataSet(
ct,
phenoData = pd,
featureData =fd,
expressionFamily = negbinomial.size(),
lowerDetectionLimit=1)
sc_cds
3.构建细胞发育轨迹
sc_cds <- estimateSizeFactors(sc_cds)
sc_cds <- estimateDispersions(sc_cds)
fdif = "diff_test_res2.Rdata"
if(!file.exists(fdif)){
diff_test_res <- differentialGeneTest(sc_cds,
fullModelFormulaStr = " ~ celltype + orig.ident",
reducedModelFormulaStr = " ~ orig.ident",
cores = 8)
save(diff_test_res,file = fdif)
}
load(fdif)
ordering_genes <- row.names(subset(diff_test_res, qval < 0.01))
#查看基因,筛选适合用于排序的,设置为排序要使用的基因
head(ordering_genes)
length(ordering_genes)
sc_cds <- setOrderingFilter(sc_cds, ordering_genes)
#画出选择的基因
plot_ordering_genes(sc_cds)
#降维
sc_cds <- reduceDimension(sc_cds,residualModelFormulaStr = "~orig.ident")
#细胞排序
sc_cds <- orderCells(sc_cds)
这一步筛出来的基因也不完全一样
4.绘图展示
4.1 发育轨迹图
library(ggsci)
p1 = plot_cell_trajectory(sc_cds)+ scale_color_nejm()
p2 = plot_cell_trajectory(sc_cds, color_by = 'Pseudotime')
p3 = plot_cell_trajectory(sc_cds, color_by = 'celltype') + scale_color_npg()
library(patchwork)
p2+p1/p3
orig.ident着色:不同样本中的细胞基本是均匀分布在轨迹上的,说明前面的代码很好的去除了样本间的批次效应。
plot_cell_trajectory(sc_cds, color_by = 'orig.ident')
4.2 经典的拟时序热图
gene_to_cluster = diff_test_res %>% arrange(qval) %>% head(50) %>% pull(gene_short_name);head(gene_to_cluster)
## [1] "GNLY" "GZMB" "FGFBP2" "KLRD1" "FCGR3A" "KLRF1"
plot_pseudotime_heatmap(sc_cds[gene_to_cluster,],
num_clusters = nlevels(Idents(scRNA)),
show_rownames = TRUE,
cores = 4,return_heatmap = TRUE,
hmcols = colorRampPalette(c("navy", "white", "firebrick3"))(100))
4.3 基因轨迹图
gs = head(gene_to_cluster)
plot_cell_trajectory(sc_cds,markers=gs,
use_color_gradient=T)
4.4 基因拟时序点图
plot_genes_in_pseudotime(sc_cds[gs,],
color_by = "celltype",
nrow= 3, #6个基因所以排了3行,数量有变化时要改
ncol = NULL )
最后还是没搞清楚哪里除了问题,,,