1.准备数据:从seurat对象中提取metadata(R中运行)
# 提取每个细胞的UMAP或TSNE坐标
write.csv(Embeddings(seurat_obj, reduction = "umap"), file = "cell_embeddings.csv")
# 提取每个细胞的barcode
write.csv(Cells(seurat_obj), file = "cellID_obs.csv", row.names = FALSE)
# 提取每个细胞的celltype信息
write.csv(seurat_obj@meta.data[, 'celltype', drop = FALSE], file = "cell_celltype.csv")
下面都在python中运行,根据一致的barcode,整合loom文件和metadata:
# 整合loom文件和metadata
import scvelo as scv
import pandas as pd
import numpy as np
import os
# 整合多个loom文件
files=['./F1.loom','./F2.loom',
'./F3.loom','./T1.loom',
'./T2.loom','./T3.loom']
output_filename='./combined.loom'
loompy.combine(files, output_filename, key="Accession")
loom_data = scv.read('./combined.loom', cache=False)
loom_data.obs
# barcode名字去重后缀x,与seurat导出的barcode名称一致
loom_data.obs = loom_data.obs.rename(index = lambda x: x.replace(':', '-').replace('x', ''))
loom_data.obs
# 读取seurat中的metadata
meta_path = "/your/path"
sample_obs = pd.read_csv(os.path.join(meta_path, "cellID_obs.csv"))
cell_umap= pd.read_csv(os.path.join(meta_path, "cell_embeddings.csv"),
header=0, names=["Cell ID", "UMAP_1", "UMAP_2"])
cell_celltype = pd.read_csv(os.path.join(meta_path, "cell_celltype.csv"),
header=0, names=["Cell ID", "celltype"])
# 对细胞文件和RNA剪切速率文件取交集,保留关注的细胞类型
sample_one = loom_data[np.isin(loom_data.obs.index, sample_obs)]
sample_one.obs.head()
# 构建umap, cluster, celltype数据框
sample_one_index = pd.DataFrame(sample_one.obs.index)
sample_one_index.head()
sample_one_index.columns = ['Cell ID']
umap_ordered = sample_one_index.merge(cell_umap, on = "Cell ID")
umap_ordered.head()
celltype_ordered = sample_one_index.merge(cell_celltype, on = "Cell ID")
celltype_ordered.head()
# 将umap、cluster信息加入sample_one
umap_ordered = umap_ordered.iloc[:,1:]
celltype_ordered = celltype_ordered.iloc[:,1:]
sample_one.obsm['X_umap'] = umap_ordered.values
sample_one.obs['celltype'] = celltype_ordered.values
# 保存
adata = sample_one
adata.var_names_make_unique()
adata.write('celltype_dynamicModel.h5ad', compression = 'gzip')
adata = scv.read('celltype_dynamicModel.h5ad')
2.运行RNA速率并可视化
scv.pp.filter_and_normalize(adata)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
scv.tl.velocity(adata)
scv.tl.velocity_graph(adata)
显示剪接/未剪接计数的比例:
scv.pl.proportions(adata)
单细胞水平上显示单个细胞的运动方向:
scv.pl.velocity_embedding_stream(adata, basis='X_umap',color = "celltype")
检查标记基因:
scv.pl.velocity(adata, ['Cpe', 'Gnao1', 'Ins2', 'Adk'], ncols=2)
鉴定重要基因:
scv.tl.rank_velocity_genes(adata, groupby='celltype', min_corr=.3)
df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
df.head()
展示伪时序:
scv.pl.velocity_graph(adata, threshold=.1)
scv.tl.velocity_pseudotime(adata)
scv.pl.scatter(adata, color='velocity_pseudotime', cmap='gnuplot')