此文演示了一些与 Seurat 对象交互的功能。为了演示,我们将使用在第一个教程中创建的 2,700 个 PBMC 对象。为了模拟我们有两个复制的情景,我们将随机分配每个集群中一半的细胞自"rep1",另一半来自"rep2"。
加载数据
library(Seurat)
library(SeuratData)
InstallData("pbmc3k")
pbmc <- LoadData("pbmc3k", type = "pbmc3k.final")
# pretend that cells were originally assigned to one of two replicates (we assign randomly here)
# if your cells do belong to multiple replicates, and you want to add this info to the Seurat
# object create a data frame with this information (similar to replicate.info below)
set.seed(42)
pbmc$replicate <- sample(c("rep1", "rep2"), size = ncol(pbmc), replace = TRUE)
在cluster ID 和replicate之间切换身份类型
# Plot UMAP, coloring cells by cell type (currently stored in object@ident)
DimPlot(pbmc, reduction = "umap")
# How do I create a UMAP plot where cells are colored by replicate? First, store the current
# identities in a new column of meta.data called CellType
pbmc$CellType <- Idents(pbmc)
# Next, switch the identity class of all cells to reflect replicate ID
Idents(pbmc) <- "replicate"
DimPlot(pbmc, reduction = "umap")
# alternately : DimPlot(pbmc, reduction = 'umap', group.by = 'replicate') you can pass the
# shape.by to label points by both replicate and cell type
# Switch back to cell type labels
Idents(pbmc) <- "CellType"
通过cluster ID, replicate或两者兼有来列表展示细胞
# How many cells are in each cluster
table(Idents(pbmc))
##
## Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T FCGR3A+ Mono
## 697 483 480 344 271 162
## NK DC Platelet
## 155 32 14
# How many cells are in each replicate?
table(pbmc$replicate)
##
## rep1 rep2
## 1348 1290
# What proportion of cells are in each cluster?
prop.table(table(Idents(pbmc)))
##
## Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T FCGR3A+ Mono
## 0.264215315 0.183093252 0.181956027 0.130401820 0.102729340 0.061410159
## NK DC Platelet
## 0.058756634 0.012130402 0.005307051
# How does cluster membership vary by replicate?
table(Idents(pbmc), pbmc$replicate)
##
## rep1 rep2
## Naive CD4 T 354 343
## Memory CD4 T 249 234
## CD14+ Mono 232 248
## B 173 171
## CD8 T 154 117
## FCGR3A+ Mono 81 81
## NK 81 74
## DC 18 14
## Platelet 6 8
prop.table(table(Idents(pbmc), pbmc$replicate), margin = 2)
##
## rep1 rep2
## Naive CD4 T 0.262611276 0.265891473
## Memory CD4 T 0.184718101 0.181395349
## CD14+ Mono 0.172106825 0.192248062
## B 0.128338279 0.132558140
## CD8 T 0.114243323 0.090697674
## FCGR3A+ Mono 0.060089021 0.062790698
## NK 0.060089021 0.057364341
## DC 0.013353116 0.010852713
## Platelet 0.004451039 0.006201550
选择特定的细胞类型取子集
# What are the cell names of all NK cells?
WhichCells(pbmc, idents = "NK")
## [1] "AAACCGTGTATGCG" "AAATTCGATTCTCA" "AACCTTACGCGAGA" "AACGCCCTCGTACA"
## [5] "AACGTCGAGTATCG" "AAGATTACCTCAAG" "AAGCAAGAGCTTAG" "AAGCAAGAGGTGTT"
## [9] "AAGTAGGATACAGC" "AATACTGAATTGGC" "AATCCTTGGTGAGG" "AATCTCTGCTTTAC"
## [13] "ACAAATTGTTGCGA" "ACAACCGAGGGATG" "ACAATTGATGACTG" "ACACCCTGGTGTTG"
## [17] "ACAGGTACTGGTGT" "ACCTGGCTAAGTAG" "ACGAACACCTTGTT" "ACGATCGAGGACTT"
## [21] "ACGCAATGGTTCAG" "ACGCTGCTGTTCTT" "ACGGAACTCAGATC" "ACGTGATGTGACAC"
## [25] "ACGTTGGAGCCAAT" "ACTGCCACTCCGTC" "ACTGGCCTTCAGTG" "ACTTCAACGTAGGG"
## [29] "AGAACAGAAATGCC" "AGATATACCCGTAA" "AGATTCCTGTTCAG" "AGCCTCTGCCAATG"
## [33] "AGCGATTGAGATCC" "AGGATGCTTTAGGC" "AGGGACGAGTCAAC" "AGTAATACATCACG"
## [37] "AGTCACGATGAGCT" "AGTTTGCTACTGGT" "ATACCACTGCCAAT" "ATACTCTGGTATGC"
## [41] "ATCCCGTGCAGTCA" "ATCTTTCTTGTCCC" "ATGAAGGACTTGCC" "ATGATAACTTCACT"
## [45] "ATGATATGGTGCTA" "ATGGACACGCATCA" "ATGGGTACATCGGT" "ATTAACGATGAGAA"
## [49] "ATTCCAACTTAGGC" "CAAGGTTGTCTGGA" "CAATCTACTGACTG" "CACCACTGGCGAAG"
## [53] "CACGGGTGGAGGAC" "CAGATGACATTCTC" "CAGCAATGGAGGGT" "CAGCGGACCTTTAC"
## [57] "CAGCTCTGTGTGGT" "CAGTTTACACACGT" "CATCAGGACTTCCG" "CATCAGGATAGCCA"
## [61] "CATGAGACGTTGAC" "CATTACACCAACTG" "CATTTCGAGATACC" "CCTCGAACACTTTC"
## [65] "CGACCACTAAAGTG" "CGACCACTGCCAAT" "CGAGGCTGACGCTA" "CGCCGAGAGCTTAG"
## [69] "CGGCGAACGACAAA" "CGGCGAACTACTTC" "CGGGCATGTCTCTA" "CGTACCTGGCATCA"
## [73] "CGTGTAGACGATAC" "CGTGTAGAGTTACG" "CGTGTAGATTCGGA" "CTAAACCTCTGACA"
## [77] "CTAACGGAACCGAT" "CTACGCACTGGTCA" "CTACTCCTATGTCG" "CTAGTTACGAAACA"
## [81] "CTATACTGCTACGA" "CTATACTGTCTCAT" "CTCGACTGGTTGAC" "CTGAGAACGTAAAG"
## [85] "CTTTAGTGACGGGA" "GAACCAACTTCCGC" "GAAGTGCTAAACGA" "GAATGCACCTTCGC"
## [89] "GAATTAACGTCGTA" "GACGGCACACGGGA" "GAGCGCTGAAGATG" "GAGGTACTGACACT"
## [93] "GAGGTGGATCCTCG" "GATAGAGAAGGGTG" "GATCCCTGACCTTT" "GCACACCTGTGCTA"
## [97] "GCACCACTTCCTTA" "GCACTAGAGTCGTA" "GCAGGGCTATCGAC" "GCCGGAACGTTCTT"
## [101] "GCCTACACAGTTCG" "GCGCATCTTGCTCC" "GCGCGATGGTGCAT" "GGAAGGTGGCGAGA"
## [105] "GGACGCTGTCCTCG" "GGAGGCCTCGTTGA" "GGCAAGGAAAAAGC" "GGCATATGCTTATC"
## [109] "GGCCGAACTCTAGG" "GGCTAAACACCTGA" "GGGTTAACGTGCAT" "GGTGGAGAAACGGG"
## [113] "GTAGTGTGAGCGGA" "GTCGACCTGAATGA" "GTGATTCTGGTTCA" "GTGTATCTAGTAGA"
## [117] "GTTAAAACCGAGAG" "GTTCAACTGGGACA" "GTTGACGATATCGG" "TAACTCACTCTACT"
## [121] "TAAGAGGACTTGTT" "TAATGCCTCGTCTC" "TACGGCCTGGGACA" "TACTACTGATGTCG"
## [125] "TACTCTGAATCGAC" "TACTGTTGAGGCGA" "TAGCATCTCAGCTA" "TAGCCCACAGCTAC"
## [129] "TAGGGACTGAACTC" "TAGTGGTGAAGTGA" "TAGTTAGAACCACA" "TATGAATGGAGGAC"
## [133] "TATGGGTGCATCAG" "TATTTCCTGGAGGT" "TCAACACTGTTTGG" "TCAGACGACGTTAG"
## [137] "TCCCGAACACAGTC" "TCCTAAACCGCATA" "TCGATTTGCAGCTA" "TCTAACACCAGTTG"
## [141] "TGATAAACTCCGTC" "TGCACAGACGACAT" "TGCCACTGCGATAC" "TGCTGAGAGAGCAG"
## [145] "TGGAACACAAACAG" "TGGTAGACCCTCAC" "TGTAATGACACAAC" "TGTAATGAGGTAAA"
## [149] "TTACTCGATCTACT" "TTAGTCTGCCAACA" "TTCCAAACTCCCAC" "TTCCCACTTGAGGG"
## [153] "TTCTAGTGGAGAGC" "TTCTGATGGAGACG" "TTGTCATGGACGGA"
# How can I extract expression matrix for all NK cells (perhaps, to load into another package)
nk.raw.data <- as.matrix(GetAssayData(pbmc, slot = "counts")[, WhichCells(pbmc, ident = "NK")])
# Can I create a Seurat object based on expression of a feature or value in object metadata?
subset(pbmc, subset = MS4A1 > 1)
## An object of class Seurat
## 13714 features across 414 samples within 1 assay
## Active assay: RNA (13714 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, umap
subset(pbmc, subset = replicate == "rep2")
## An object of class Seurat
## 13714 features across 1290 samples within 1 assay
## Active assay: RNA (13714 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, umap
# Can I create a Seurat object of just the NK cells and B cells?
subset(pbmc, idents = c("NK", "B"))
## An object of class Seurat
## 13714 features across 499 samples within 1 assay
## Active assay: RNA (13714 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, umap
# Can I create a Seurat object of all cells except the NK cells and B cells?
subset(pbmc, idents = c("NK", "B"), invert = TRUE)
## An object of class Seurat
## 13714 features across 2139 samples within 1 assay
## Active assay: RNA (13714 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, umap
# note that if you wish to perform additional rounds of clustering after subsetting we recommend
# re-running FindVariableFeatures() and ScaleData()
计算群内的平均基因表达
# How can I calculate the average expression of all cells within a cluster?
cluster.averages <- AverageExpression(pbmc)
head(cluster.averages[["RNA"]][, 1:5])
## Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T
## AL627309.1 0.006128664 0.005927264 0.04854338 0.00000000 0.02054586
## AP006222.2 0.000000000 0.008206078 0.01088471 0.00000000 0.01191488
## RP11-206L10.2 0.007453092 0.000000000 0.00000000 0.02065031 0.00000000
## RP11-206L10.9 0.000000000 0.000000000 0.01050116 0.00000000 0.00000000
## LINC00115 0.019118933 0.024690483 0.03753737 0.03888541 0.01948277
## NOC2L 0.497463190 0.359811462 0.27253750 0.58653489 0.55704897
# Return this information as a Seurat object (enables downstream plotting and analysis) First,
# replace spaces with underscores '_' so ggplot2 doesn't fail
orig.levels <- levels(pbmc)
Idents(pbmc) <- gsub(pattern = " ", replacement = "_", x = Idents(pbmc))
orig.levels <- gsub(pattern = " ", replacement = "_", x = orig.levels)
levels(pbmc) <- orig.levels
cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE)
cluster.averages
## An object of class Seurat
## 13714 features across 9 samples within 1 assay
## Active assay: RNA (13714 features, 0 variable features)
# How can I plot the average expression of NK cells vs. CD8 T cells? Pass do.hover = T for an
# interactive plot to identify gene outliers
CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8_T")
# How can I calculate expression averages separately for each replicate?
cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE, add.ident = "replicate")
CellScatter(cluster.averages, cell1 = "CD8_T_rep1", cell2 = "CD8_T_rep2")
# You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between
# replicates
DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[["pca"]], balanced = TRUE)), size = 3,
draw.lines = FALSE)