Find markers enriched in each cluster Use integrated data from previous stem
library(Seurat)
library(ggplot2)
library(here)
library(presto)
library(dplyr)
set.seed(9)
load(file = here("data/integrated.seurat.rda"))
integrated.seurat
An object of class Seurat
34285 features across 26254 samples within 2 assays
Active assay: RNA (32285 features, 3000 variable features)
9 layers present: counts.KZ1, counts.KZ2, counts.KZ3, counts.KZ4, data.KZ1, data.KZ2, data.KZ3, data.KZ4, scale.data
1 other assay present: mnn.reconstructed
10 dimensional reductions calculated: pca, umap.unintegrated, cca, umap.cca, rpca, umap.rpca, harmony, umap.harmony, mnn, umap.mnn
# play around with the cluster resolution to merge/split clusters. lower number = fewer cluster & larger number = more clusters
integrated.seurat <- FindNeighbors(integrated.seurat, reduction = "harmony", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
integrated.seurat <- FindClusters(integrated.seurat, resolution = 0.43, cluster.name = "harmony.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 26254
Number of edges: 979792
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9623
Number of communities: 28
Elapsed time: 3 seconds
integrated.seurat <- RunUMAP(integrated.seurat, reduction = "harmony", dims = 1:30, reduction.name = "umap.harmony")
17:35:45 UMAP embedding parameters a = 0.9922 b = 1.112
17:35:45 Read 26254 rows and found 30 numeric columns
17:35:45 Using Annoy for neighbor search, n_neighbors = 30
17:35:45 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:35:46 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e25b0ba00
17:35:46 Searching Annoy index using 1 thread, search_k = 3000
17:35:50 Annoy recall = 100%
17:35:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
17:35:51 Initializing from normalized Laplacian + noise (using RSpectra)
17:35:55 Commencing optimization for 200 epochs, with 1126620 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:36:02 Optimization finished
DimPlot(integrated.seurat, reduction = "umap.harmony", group.by = "harmony.clusters", label=TRUE)
save(integrated.seurat, file = here("data/integrated.seurat.rds"))
Prefer roc, but wilcox is faster.
# before we can find the markers the layers need to be joined.
join.seurat <- JoinLayers(integrated.seurat)
Idents(join.seurat) <- join.seurat$harmony.clusters
cluster.markers <- FindAllMarkers(join.seurat,
logfc.threshold = 0.5,
test.use = "wilcox",
min.pct = 0.1,
only.pos = TRUE)
Calculating cluster 0
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
Calculating cluster 14
Calculating cluster 15
Calculating cluster 16
Calculating cluster 17
Calculating cluster 18
Calculating cluster 19
Calculating cluster 20
Calculating cluster 21
Calculating cluster 22
Calculating cluster 23
Calculating cluster 24
Calculating cluster 25
Calculating cluster 26
Calculating cluster 27
table(cluster.markers$cluster)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
1419 1742 1813 2488 1997 1195 1771 1329 1654 1755 1561 3030 2695 1239 1289 1125 3216 1625 2601 1901 1113 1650 570 1559 2091 1743 2841 1621
write.csv(cluster.markers, file = here("plots/4.annotation/wilcox.cluster.markers.csv"))
tops <- cluster.markers |> group_by(cluster) |> slice_max(avg_log2FC, n = 3) |> select(gene)
Adding missing grouping variables: `cluster`
join.seurat <- ScaleData(join.seurat, features = tops$gene)
Centering and scaling data matrix
|
| | 0%
|
|==================================================================================================================================================| 100%
Warning: Different features in new layer data than already exists for scale.data
# I hate the color scheme of the default heatmap
DoHeatmap(join.seurat, feature = tops$gene)
# black/white version
DoHeatmap(join.seurat, feature = tops$gene) + scale_fill_gradientn(colors = c("white", "white", "black"))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
ggsave(here("plots/4.annotation/ClusterHeatmap.jpg"), width = 10, height = 25)
## change the "GOI" to some of the promising clusters markers
GOI <- c("C1qa", "Foxp3", "Col26a1", "Meg3", "Pax5", "Apoc2")
FeaturePlot(join.seurat, features = GOI, reduction ="umap.harmony", order = TRUE)
ggsave(here("plots/4.annotation/ClusterFeaturePlot.jpg"), width = 5, height = 7)
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Chicago
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] dplyr_1.1.4 presto_1.0.0 data.table_1.17.0 Rcpp_1.0.14 SeuratWrappers_0.3.5
[6] future_1.49.0 here_1.0.1 scDblFinder_1.18.0 SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
[11] Biobase_2.64.0 GenomicRanges_1.56.2 GenomeInfoDb_1.40.1 IRanges_2.38.1 S4Vectors_0.42.1
[16] BiocGenerics_0.50.0 MatrixGenerics_1.16.0 matrixStats_1.5.0 ggplot2_3.5.2 Seurat_5.2.1
[21] SeuratObject_5.0.2 sp_2.1-4
loaded via a namespace (and not attached):
[1] spatstat.sparse_3.1-0 bitops_1.0-9 httr_1.4.7 RColorBrewer_1.1-3 tools_4.4.1
[6] sctransform_0.4.1 utf8_1.2.4 R6_2.6.1 ResidualMatrix_1.14.1 lazyeval_0.2.2
[11] uwot_0.2.2 withr_3.0.2 gridExtra_2.3 progressr_0.15.1 cli_3.6.5
[16] textshaping_1.0.0 spatstat.explore_3.3-4 fastDummies_1.7.5 labeling_0.4.3 sass_0.4.10
[21] spatstat.data_3.1-4 ggridges_0.5.6 pbapply_1.7-2 Rsamtools_2.20.0 systemfonts_1.2.1
[26] R.utils_2.12.3 harmony_1.2.3 scater_1.32.1 parallelly_1.45.0 limma_3.60.6
[31] rstudioapi_0.17.1 generics_0.1.3 BiocIO_1.14.0 ica_1.0-3 spatstat.random_3.3-2
[36] Matrix_1.7-2 ggbeeswarm_0.7.2 abind_1.4-8 R.methodsS3_1.8.2 lifecycle_1.0.4
[41] yaml_2.3.10 edgeR_4.2.2 SparseArray_1.4.8 Rtsne_0.17 grid_4.4.1
[46] promises_1.3.3 dqrng_0.4.1 crayon_1.5.3 miniUI_0.1.1.1 lattice_0.22-6
[51] beachmat_2.20.0 cowplot_1.1.3 pillar_1.10.1 knitr_1.50 metapod_1.12.0
[56] rjson_0.2.23 xgboost_1.7.8.1 future.apply_1.11.3 codetools_0.2-20 glue_1.8.0
[61] spatstat.univar_3.1-1 remotes_2.5.0 vctrs_0.6.5 png_0.1-8 spam_2.11-1
[66] gtable_0.3.6 cachem_1.1.0 xfun_0.52 S4Arrays_1.4.1 mime_0.13
[71] survival_3.8-3 statmod_1.5.0 bluster_1.14.0 fitdistrplus_1.2-2 ROCR_1.0-11
[76] nlme_3.1-167 RcppAnnoy_0.0.22 rprojroot_2.0.4 bslib_0.9.0 irlba_2.3.5.1
[81] vipor_0.4.7 KernSmooth_2.23-26 colorspace_2.1-1 ggrastr_1.0.2 tidyselect_1.2.1
[86] compiler_4.4.1 curl_6.2.2 BiocNeighbors_1.22.0 DelayedArray_0.30.1 plotly_4.10.4
[91] rtracklayer_1.64.0 scales_1.3.0 lmtest_0.9-40 stringr_1.5.1 digest_0.6.37
[96] goftest_1.2-3 spatstat.utils_3.1-2 rmarkdown_2.29 XVector_0.44.0 RhpcBLASctl_0.23-42
[101] htmltools_0.5.8.1 pkgconfig_2.0.3 sparseMatrixStats_1.16.0 fastmap_1.2.0 rlang_1.1.6
[106] htmlwidgets_1.6.4 UCSC.utils_1.0.0 shiny_1.10.0 DelayedMatrixStats_1.26.0 farver_2.1.2
[111] jquerylib_0.1.4 zoo_1.8-12 jsonlite_2.0.0 BiocParallel_1.38.0 R.oo_1.27.0
[116] BiocSingular_1.20.0 RCurl_1.98-1.16 magrittr_2.0.3 scuttle_1.14.0 GenomeInfoDbData_1.2.12
[121] dotCall64_1.2 patchwork_1.3.0 munsell_0.5.1 viridis_0.6.5 reticulate_1.42.0
[126] stringi_1.8.4 zlibbioc_1.50.0 MASS_7.3-64 plyr_1.8.9 parallel_4.4.1
[131] listenv_0.9.1 ggrepel_0.9.6 deldir_2.0-4 Biostrings_2.72.1 splines_4.4.1
[136] tensor_1.5 locfit_1.5-9.10 igraph_2.1.4 spatstat.geom_3.3-5 RcppHNSW_0.6.0
[141] reshape2_1.4.4 ScaledMatrix_1.12.0 XML_3.99-0.18 evaluate_1.0.4 scran_1.32.0
[146] BiocManager_1.30.25 httpuv_1.6.16 batchelor_1.20.0 RANN_2.6.2 tidyr_1.3.1
[151] purrr_1.0.4 polyclip_1.10-7 scattermore_1.2 rsvd_1.0.5 xtable_1.8-4
[156] restfulr_0.0.15 RSpectra_0.16-2 later_1.4.2 viridisLite_0.4.2 ragg_1.3.3
[161] tibble_3.2.1 beeswarm_0.4.0 GenomicAlignments_1.40.0 cluster_2.1.8 globals_0.18.0