Find markers enriched in each cluster Use integrated data from previous stem

Load Libraries and set a seed

library(Seurat)
library(ggplot2)
library(here)
library(presto)
library(dplyr)
set.seed(9)

Load data and tidy

load(file = here("data/integrated.seurat.rda"))
integrated.seurat
An object of class Seurat 
34285 features across 26254 samples within 2 assays 
Active assay: RNA (32285 features, 3000 variable features)
 9 layers present: counts.KZ1, counts.KZ2, counts.KZ3, counts.KZ4, data.KZ1, data.KZ2, data.KZ3, data.KZ4, scale.data
 1 other assay present: mnn.reconstructed
 10 dimensional reductions calculated: pca, umap.unintegrated, cca, umap.cca, rpca, umap.rpca, harmony, umap.harmony, mnn, umap.mnn

Lets use the harmony integration and make sure our clusters and map match

# play around with the cluster resolution to merge/split clusters. lower number = fewer cluster & larger number = more clusters
integrated.seurat <- FindNeighbors(integrated.seurat, reduction = "harmony", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
integrated.seurat <- FindClusters(integrated.seurat, resolution = 0.43, cluster.name = "harmony.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 26254
Number of edges: 979792

Running Louvain algorithm...
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9623
Number of communities: 28
Elapsed time: 3 seconds
integrated.seurat <- RunUMAP(integrated.seurat, reduction = "harmony", dims = 1:30, reduction.name = "umap.harmony")
17:35:45 UMAP embedding parameters a = 0.9922 b = 1.112
17:35:45 Read 26254 rows and found 30 numeric columns
17:35:45 Using Annoy for neighbor search, n_neighbors = 30
17:35:45 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:35:46 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e25b0ba00
17:35:46 Searching Annoy index using 1 thread, search_k = 3000
17:35:50 Annoy recall = 100%
17:35:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
17:35:51 Initializing from normalized Laplacian + noise (using RSpectra)
17:35:55 Commencing optimization for 200 epochs, with 1126620 positive edges
Using method 'umap'
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:36:02 Optimization finished
DimPlot(integrated.seurat, reduction = "umap.harmony", group.by = "harmony.clusters", label=TRUE)


save(integrated.seurat, file = here("data/integrated.seurat.rds"))

Find cluster markers

Prefer roc, but wilcox is faster.

# before we can find the markers the layers need to be joined. 
join.seurat <- JoinLayers(integrated.seurat)
Idents(join.seurat) <- join.seurat$harmony.clusters
cluster.markers <- FindAllMarkers(join.seurat,
                                  logfc.threshold = 0.5,
                                  test.use = "wilcox",
                                  min.pct = 0.1,
                                  only.pos = TRUE)
Calculating cluster 0
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
Calculating cluster 14
Calculating cluster 15
Calculating cluster 16
Calculating cluster 17
Calculating cluster 18
Calculating cluster 19
Calculating cluster 20
Calculating cluster 21
Calculating cluster 22
Calculating cluster 23
Calculating cluster 24
Calculating cluster 25
Calculating cluster 26
Calculating cluster 27
table(cluster.markers$cluster)

   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22   23   24   25   26   27 
1419 1742 1813 2488 1997 1195 1771 1329 1654 1755 1561 3030 2695 1239 1289 1125 3216 1625 2601 1901 1113 1650  570 1559 2091 1743 2841 1621 
write.csv(cluster.markers, file = here("plots/4.annotation/wilcox.cluster.markers.csv"))

plot heatmap of top cluster markers

tops <- cluster.markers |> group_by(cluster) |> slice_max(avg_log2FC, n = 3) |> select(gene)
Adding missing grouping variables: `cluster`
join.seurat <- ScaleData(join.seurat, features = tops$gene)
Centering and scaling data matrix

  |                                                                                                                                                        
  |                                                                                                                                                  |   0%
  |                                                                                                                                                        
  |==================================================================================================================================================| 100%
Warning: Different features in new layer data than already exists for scale.data
# I hate the color scheme of the default heatmap
DoHeatmap(join.seurat, feature = tops$gene)


# black/white version
DoHeatmap(join.seurat, feature = tops$gene) + scale_fill_gradientn(colors = c("white", "white", "black"))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
ggsave(here("plots/4.annotation/ClusterHeatmap.jpg"), width = 10, height = 25)

## change the "GOI" to some of the promising clusters markers
GOI <- c("C1qa", "Foxp3", "Col26a1", "Meg3", "Pax5", "Apoc2")
FeaturePlot(join.seurat, features = GOI, reduction ="umap.harmony", order = TRUE)

ggsave(here("plots/4.annotation/ClusterFeaturePlot.jpg"), width = 5, height = 7)
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Chicago
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] dplyr_1.1.4                 presto_1.0.0                data.table_1.17.0           Rcpp_1.0.14                 SeuratWrappers_0.3.5       
 [6] future_1.49.0               here_1.0.1                  scDblFinder_1.18.0          SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
[11] Biobase_2.64.0              GenomicRanges_1.56.2        GenomeInfoDb_1.40.1         IRanges_2.38.1              S4Vectors_0.42.1           
[16] BiocGenerics_0.50.0         MatrixGenerics_1.16.0       matrixStats_1.5.0           ggplot2_3.5.2               Seurat_5.2.1               
[21] SeuratObject_5.0.2          sp_2.1-4                   

loaded via a namespace (and not attached):
  [1] spatstat.sparse_3.1-0     bitops_1.0-9              httr_1.4.7                RColorBrewer_1.1-3        tools_4.4.1              
  [6] sctransform_0.4.1         utf8_1.2.4                R6_2.6.1                  ResidualMatrix_1.14.1     lazyeval_0.2.2           
 [11] uwot_0.2.2                withr_3.0.2               gridExtra_2.3             progressr_0.15.1          cli_3.6.5                
 [16] textshaping_1.0.0         spatstat.explore_3.3-4    fastDummies_1.7.5         labeling_0.4.3            sass_0.4.10              
 [21] spatstat.data_3.1-4       ggridges_0.5.6            pbapply_1.7-2             Rsamtools_2.20.0          systemfonts_1.2.1        
 [26] R.utils_2.12.3            harmony_1.2.3             scater_1.32.1             parallelly_1.45.0         limma_3.60.6             
 [31] rstudioapi_0.17.1         generics_0.1.3            BiocIO_1.14.0             ica_1.0-3                 spatstat.random_3.3-2    
 [36] Matrix_1.7-2              ggbeeswarm_0.7.2          abind_1.4-8               R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.10               edgeR_4.2.2               SparseArray_1.4.8         Rtsne_0.17                grid_4.4.1               
 [46] promises_1.3.3            dqrng_0.4.1               crayon_1.5.3              miniUI_0.1.1.1            lattice_0.22-6           
 [51] beachmat_2.20.0           cowplot_1.1.3             pillar_1.10.1             knitr_1.50                metapod_1.12.0           
 [56] rjson_0.2.23              xgboost_1.7.8.1           future.apply_1.11.3       codetools_0.2-20          glue_1.8.0               
 [61] spatstat.univar_3.1-1     remotes_2.5.0             vctrs_0.6.5               png_0.1-8                 spam_2.11-1              
 [66] gtable_0.3.6              cachem_1.1.0              xfun_0.52                 S4Arrays_1.4.1            mime_0.13                
 [71] survival_3.8-3            statmod_1.5.0             bluster_1.14.0            fitdistrplus_1.2-2        ROCR_1.0-11              
 [76] nlme_3.1-167              RcppAnnoy_0.0.22          rprojroot_2.0.4           bslib_0.9.0               irlba_2.3.5.1            
 [81] vipor_0.4.7               KernSmooth_2.23-26        colorspace_2.1-1          ggrastr_1.0.2             tidyselect_1.2.1         
 [86] compiler_4.4.1            curl_6.2.2                BiocNeighbors_1.22.0      DelayedArray_0.30.1       plotly_4.10.4            
 [91] rtracklayer_1.64.0        scales_1.3.0              lmtest_0.9-40             stringr_1.5.1             digest_0.6.37            
 [96] goftest_1.2-3             spatstat.utils_3.1-2      rmarkdown_2.29            XVector_0.44.0            RhpcBLASctl_0.23-42      
[101] htmltools_0.5.8.1         pkgconfig_2.0.3           sparseMatrixStats_1.16.0  fastmap_1.2.0             rlang_1.1.6              
[106] htmlwidgets_1.6.4         UCSC.utils_1.0.0          shiny_1.10.0              DelayedMatrixStats_1.26.0 farver_2.1.2             
[111] jquerylib_0.1.4           zoo_1.8-12                jsonlite_2.0.0            BiocParallel_1.38.0       R.oo_1.27.0              
[116] BiocSingular_1.20.0       RCurl_1.98-1.16           magrittr_2.0.3            scuttle_1.14.0            GenomeInfoDbData_1.2.12  
[121] dotCall64_1.2             patchwork_1.3.0           munsell_0.5.1             viridis_0.6.5             reticulate_1.42.0        
[126] stringi_1.8.4             zlibbioc_1.50.0           MASS_7.3-64               plyr_1.8.9                parallel_4.4.1           
[131] listenv_0.9.1             ggrepel_0.9.6             deldir_2.0-4              Biostrings_2.72.1         splines_4.4.1            
[136] tensor_1.5                locfit_1.5-9.10           igraph_2.1.4              spatstat.geom_3.3-5       RcppHNSW_0.6.0           
[141] reshape2_1.4.4            ScaledMatrix_1.12.0       XML_3.99-0.18             evaluate_1.0.4            scran_1.32.0             
[146] BiocManager_1.30.25       httpuv_1.6.16             batchelor_1.20.0          RANN_2.6.2                tidyr_1.3.1              
[151] purrr_1.0.4               polyclip_1.10-7           scattermore_1.2           rsvd_1.0.5                xtable_1.8-4             
[156] restfulr_0.0.15           RSpectra_0.16-2           later_1.4.2               viridisLite_0.4.2         ragg_1.3.3               
[161] tibble_3.2.1              beeswarm_0.4.0            GenomicAlignments_1.40.0  cluster_2.1.8             globals_0.18.0           
LS0tCnRpdGxlOiAiRDIgLy8gQnJlYWtvdXQgU2Vzc2lvbiA0IC8vIENsdXN0ZXIgTWFya2VycyIKb3V0cHV0OiBodG1sX25vdGVib29rCmF1dGhvcjogIkxpbmRzYXkgTi4gSGF5ZXMiCmRhdGU6ICIxMi0xMC0yMDI1IgotLS0KCgpGaW5kIG1hcmtlcnMgZW5yaWNoZWQgaW4gZWFjaCBjbHVzdGVyClVzZSBpbnRlZ3JhdGVkIGRhdGEgZnJvbSBwcmV2aW91cyBzdGVtCgojIyBMb2FkIExpYnJhcmllcyBhbmQgc2V0IGEgc2VlZApgYGB7ciwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShoZXJlKQpsaWJyYXJ5KHByZXN0bykKbGlicmFyeShkcGx5cikKc2V0LnNlZWQoOSkKYGBgCgoKIyMgTG9hZCBkYXRhIGFuZCB0aWR5CmBgYHtyfQpsb2FkKGZpbGUgPSBoZXJlKCJkYXRhL2ludGVncmF0ZWQuc2V1cmF0LnJkcyIpKQppbnRlZ3JhdGVkLnNldXJhdApgYGAKCiMjIExldHMgdXNlIHRoZSBoYXJtb255IGludGVncmF0aW9uIGFuZCBtYWtlIHN1cmUgb3VyIGNsdXN0ZXJzIGFuZCBtYXAgbWF0Y2gKYGBge3J9CiMgcGxheSBhcm91bmQgd2l0aCB0aGUgY2x1c3RlciByZXNvbHV0aW9uIHRvIG1lcmdlL3NwbGl0IGNsdXN0ZXJzLiBsb3dlciBudW1iZXIgPSBmZXdlciBjbHVzdGVyICYgbGFyZ2VyIG51bWJlciA9IG1vcmUgY2x1c3RlcnMKaW50ZWdyYXRlZC5zZXVyYXQgPC0gRmluZE5laWdoYm9ycyhpbnRlZ3JhdGVkLnNldXJhdCwgcmVkdWN0aW9uID0gImhhcm1vbnkiLCBkaW1zID0gMTozMCkKaW50ZWdyYXRlZC5zZXVyYXQgPC0gRmluZENsdXN0ZXJzKGludGVncmF0ZWQuc2V1cmF0LCByZXNvbHV0aW9uID0gMC40MywgY2x1c3Rlci5uYW1lID0gImhhcm1vbnkuY2x1c3RlcnMiKQppbnRlZ3JhdGVkLnNldXJhdCA8LSBSdW5VTUFQKGludGVncmF0ZWQuc2V1cmF0LCByZWR1Y3Rpb24gPSAiaGFybW9ueSIsIGRpbXMgPSAxOjMwLCByZWR1Y3Rpb24ubmFtZSA9ICJ1bWFwLmhhcm1vbnkiKQpEaW1QbG90KGludGVncmF0ZWQuc2V1cmF0LCByZWR1Y3Rpb24gPSAidW1hcC5oYXJtb255IiwgZ3JvdXAuYnkgPSAiaGFybW9ueS5jbHVzdGVycyIsIGxhYmVsPVRSVUUpCgpzYXZlKGludGVncmF0ZWQuc2V1cmF0LCBmaWxlID0gaGVyZSgiZGF0YS9pbnRlZ3JhdGVkLnNldXJhdC5yZHMiKSkKYGBgCgojIyBGaW5kIGNsdXN0ZXIgbWFya2VycwpQcmVmZXIgcm9jLCBidXQgd2lsY294IGlzIGZhc3Rlci4KYGBge3J9CiMgYmVmb3JlIHdlIGNhbiBmaW5kIHRoZSBtYXJrZXJzIHRoZSBsYXllcnMgbmVlZCB0byBiZSBqb2luZWQuIApqb2luLnNldXJhdCA8LSBKb2luTGF5ZXJzKGludGVncmF0ZWQuc2V1cmF0KQpJZGVudHMoam9pbi5zZXVyYXQpIDwtIGpvaW4uc2V1cmF0JGhhcm1vbnkuY2x1c3RlcnMKY2x1c3Rlci5tYXJrZXJzIDwtIEZpbmRBbGxNYXJrZXJzKGpvaW4uc2V1cmF0LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbG9nZmMudGhyZXNob2xkID0gMC41LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGVzdC51c2UgPSAid2lsY294IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1pbi5wY3QgPSAwLjEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBvbmx5LnBvcyA9IFRSVUUpCgp0YWJsZShjbHVzdGVyLm1hcmtlcnMkY2x1c3RlcikKd3JpdGUuY3N2KGNsdXN0ZXIubWFya2VycywgZmlsZSA9IGhlcmUoInBsb3RzLzQuYW5ub3RhdGlvbi93aWxjb3guY2x1c3Rlci5tYXJrZXJzLmNzdiIpKQpgYGAKCiMjIHBsb3QgaGVhdG1hcCBvZiB0b3AgY2x1c3RlciBtYXJrZXJzCmBgYHtyfQp0b3BzIDwtIGNsdXN0ZXIubWFya2VycyB8PiBncm91cF9ieShjbHVzdGVyKSB8PiBzbGljZV9tYXgoYXZnX2xvZzJGQywgbiA9IDMpIHw+IHNlbGVjdChnZW5lKQoKam9pbi5zZXVyYXQgPC0gU2NhbGVEYXRhKGpvaW4uc2V1cmF0LCBmZWF0dXJlcyA9IHRvcHMkZ2VuZSkKCiMgSSBoYXRlIHRoZSBjb2xvciBzY2hlbWUgb2YgdGhlIGRlZmF1bHQgaGVhdG1hcApEb0hlYXRtYXAoam9pbi5zZXVyYXQsIGZlYXR1cmUgPSB0b3BzJGdlbmUpCgojIGJsYWNrL3doaXRlIHZlcnNpb24KRG9IZWF0bWFwKGpvaW4uc2V1cmF0LCBmZWF0dXJlID0gdG9wcyRnZW5lKSArIHNjYWxlX2ZpbGxfZ3JhZGllbnRuKGNvbG9ycyA9IGMoIndoaXRlIiwgIndoaXRlIiwgImJsYWNrIikpCmdnc2F2ZShoZXJlKCJwbG90cy80LmFubm90YXRpb24vQ2x1c3RlckhlYXRtYXAuanBnIiksIHdpZHRoID0gMTAsIGhlaWdodCA9IDI1KQpgYGAKCgpgYGB7ciwgZmlnLmhlaWdodD02LCBmaWcud2lkdGg9NX0KIyMgY2hhbmdlIHRoZSAiR09JIiB0byBzb21lIG9mIHRoZSBwcm9taXNpbmcgY2x1c3RlcnMgbWFya2VycwpHT0kgPC0gYygiQzFxYSIsICJGb3hwMyIsICJDb2wyNmExIiwgIk1lZzMiLCAiUGF4NSIsICJBcG9jMiIpCkZlYXR1cmVQbG90KGpvaW4uc2V1cmF0LCBmZWF0dXJlcyA9IEdPSSwgcmVkdWN0aW9uID0idW1hcC5oYXJtb255Iiwgb3JkZXIgPSBUUlVFKQpnZ3NhdmUoaGVyZSgicGxvdHMvNC5hbm5vdGF0aW9uL0NsdXN0ZXJGZWF0dXJlUGxvdC5qcGciKSwgd2lkdGggPSA1LCBoZWlnaHQgPSA3KQpgYGAKCgpgYGB7cn0Kc2Vzc2lvbkluZm8oKQpgYGAK