输入文件数据
names(inputFiles) <-c("AS1_ATAC","ERA_pbmc_1","ERA_SF_1_ATAC","ERA_SF_2_ATAC","load_AS_scATAC","load_Healthy_scATAC")
构建Arrow对象
inputFiles = inputFiles,
sampleNames = names(inputFiles),
filterTSS = 4, #Dont set this too high because you can always increase later
filterFrags = 1000,
addTileMat = TRUE,
addGeneScoreMat = TRUE
)
计算双细胞的分数(DoubletScores)
doubScores <- addDoubletScores(
input = ArrowFiles,
k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search with doublet projection.
LSIMethod = 1
)
R^2用于描述样本中的细胞异质性,如果该数值非常小(例如小于0.9),说明该样本的细胞都非常相似。那么使用模拟的方法去鉴定doublet就不太合适了。这个很好理解,如果所有细胞都表达一个基因,并且表达量是1,那么你模拟的doublet也会只有一个细胞,且表达量是均值1,结果就是所有细胞都是doublet。在这种情况下,我们推荐跳过doublet预测这一步。或者你可以尝试设置knnMethod = "LSI",force = TRUE,在LSI子空间中进行投影。(相当于提高分辨率)。
doubScores <- addDoubletScores(
input = ArrowFiles,
k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
knnMethod = "LSI",force = TRUE,
LSIMethod = 1
)
在你的QualityControl目录下存在结果文件:
创建ArchRProject
proj <- ArchRProject(
ArrowFiles = ArrowFiles,
outputDirectory = "ArchROutput",
copyArrows = F,
geneAnnotation = getGeneAnnotation(),
genomeAnnotation = getGenomeAnnotation(),
showLogo = TRUE,
threads = getArchRThreads()
)
质控:
##没去除之前的统计
cellcoldata_proj <- as.data.frame(proj@cellColData)
table(cellcoldata_proj$Sample)
quantile(proj_5$TSSEnrichment)
结果:
###筛选出TSSEnrichment >= 8的细胞
idxPass <- which(proj$TSSEnrichment >= 5 & proj$DoubletScore == 0 & proj$DoubletEnrichment <= 0.5)
cellsPass <- proj$cellNames[idxPass]
proj_5 <- proj[cellsPass,]
##按照名称检索一列,例如每个细胞的唯一核(非线粒体)片段数
cellcoldata_proj_5 <- as.data.frame(proj_5@cellColData)
table(cellcoldata_proj_5$Sample)
结果:
考虑是否将Load_AS去除,因为细胞数过少。
先放着往后分析。
df <- getCellColData(proj_5,select = c("log10(nFrags)","TSSEnrichment"))
df
p <- ggPoint(
x=df[,1],
y=df[,2],
colorDensity = T ,
continuousSet = "sambaNight",
xlabel = "Log10 Unique Fragments",
ylabel = "TSS Enrichment",
xlim = c(log10(500),quantile(df[,1],probs = 0.99)),
ylim = c(5,quantile(df[,2],probs = 0.99)))+ geom_hline(yintercept = 8,lty = "dashed") + geom_vline(xintercept = 3,lty = "dashed")
plotPDF(p,name = "TSS-vs-FRAG.pdf",ArchRProj = proj_5,addDOC = T)
参考内容: 徐洲更 [果子学生信] 如果单细胞不是单细胞,那还做什么单细胞?