1.VlnPlot
feats <- c("percent_mito", "percent_ribo", "percent_hb")
p2=VlnPlot(sce.all, group.by = "orig.ident", features = feats, pt.size = 0.01, ncol = 3, same.y.lims=T) +
scale_y_continuous(breaks=seq(0, 100, 5)) +
NoLegend()
p2
2.过滤
> #根据上述指标,过滤低质量细胞/基因
> #过滤指标1:最少表达基因数的细胞&最少表达细胞数的基因
> selected_c <- WhichCells(sce.all, expression = nFeature_RNA > 300)
> selected_f <- rownames(sce.all)[Matrix::rowSums(sce.all@assays$RNA@counts > 0 ) > 3]
> sce.all.filt <- subset(sce.all, features = selected_f, cells = selected_c)
> dim(sce.all)
[1] 20117 7479
> dim(sce.all.filt)
[1] 17648 7479
> table(sce.all@meta.data$orig.ident)
1E17-5.csv 2E17-5.csv 3E17-5.csv
2682 2140 2657
> table(sce.all.filt@meta.data$orig.ident)
1E17-5.csv 2E17-5.csv 3E17-5.csv
2682 2140 2657
# 可以看到,主要是过滤了基因,其次才是细胞
矩阵随机抽样
C=C[,sample(1:ncol(C),1000)]
求比例
求每个基因在细胞中的比例
C=Matrix::t(Matrix::t(C)/Matrix::colSums(C)) * 100
top50高表达的基因
most_expressed <- order(apply(C, 1, median), decreasing = T)[50:1]
挑选颜色
col = (scales::hue_pal())(50)[50:1]
多个图放一张
p1.compare=plot_grid(ncol = 3,
DimPlot(sce.all, reduction = "pca", group.by = "orig.ident")+NoAxes()+ggtitle("PCA raw_data"),
DimPlot(sce.all, reduction = "tsne", group.by = "orig.ident")+NoAxes()+ggtitle("tSNE raw_data"),
DimPlot(sce.all, reduction = "umap", group.by = "orig.ident")+NoAxes()+ggtitle("UMAP raw_data"),
DimPlot(sce.all.int, reduction = "pca", group.by = "orig.ident")+NoAxes()+ggtitle("PCA integrated"),
DimPlot(sce.all.int, reduction = "tsne", group.by = "orig.ident")+NoAxes()+ggtitle("tSNE integrated"),
DimPlot(sce.all.int, reduction = "umap", group.by = "orig.ident")+NoAxes()+ggtitle("UMAP integrated")
)
多个分辨率之间关系
p2_tree=clustree(sce.all@meta.data, prefix = "CCA_snn_res.")
查看数据框
DT::datatable(sce.markers)
细胞类型的注释
celltype=data.frame(ClusterID=0:15,
celltype='na')
celltype[celltype$ClusterID %in% c( 2,3,14,15 ),2]='epi'
celltype[celltype$ClusterID %in% c( 1,6,8,11,12 ),2]='myeloid'
celltype[celltype$ClusterID %in% c( 0,5,13,10),2]='fibo'
celltype[celltype$ClusterID %in% c( 4,7,9 ),2]='endo'
head(celltype)
celltype
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
sce.all@meta.data[which(sce.all@meta.data$CCA_snn_res.0.8 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)
cluster<-readRDS('/data1/nyy/HGPS_scRNAseq/seurat/integrate/cell_tyle/cluster.Rdata')
library("plyr")
#cell_type<-read.delim('/data1/nyy/HGPS_scRNAseq/all_tissue_cell_type_annation.txt')
cell_type<-read.delim('/data1/nyy/HGPS_scRNAseq/seurat/integrate/cell_tyle/cell_sub_type.txt')
library(tidyr)
cell_type <- cell_type %>% separate_rows(cluster, sep = ",")
add_celltype <- join(cluster,cell_type)
#还原到integrate的seurat对象的metedata中
table(add_celltype$cell.type)
table(add_celltype$cluster)
load('/data1/nyy/HGPS_scRNAseq/seurat/integrate/cell_tyle/diff/LS.sample.FindClusters.cluster.group.RData')
sample.integrated <- AddMetaData(sample.integrated,add_celltype$cell.type,col.name = "sub.cell.type")