Find markers enriched in each cluster Use integrated data from previous stem

Load Libraries and set a seed

library(Seurat)
library(ggplot2)
library(here)
library(SingleR) # bioconductor install
library(pheatmap)
library(scater)
set.seed(9)

Load data and reference

load(file = here("data/integrated.seurat.rds"))
load(file = here("data/ref.se.rds"))

integrated.seurat
An object of class Seurat 
34285 features across 26254 samples within 2 assays 
Active assay: RNA (32285 features, 3000 variable features)
 9 layers present: counts.KZ1, counts.KZ2, counts.KZ3, counts.KZ4, data.KZ1, data.KZ2, data.KZ3, data.KZ4, scale.data
 1 other assay present: mnn.reconstructed
 10 dimensional reductions calculated: pca, umap.unintegrated, cca, umap.cca, rpca, umap.rpca, harmony, umap.harmony, mnn, umap.mnn
DimPlot(integrated.seurat, reduction = "umap.harmony", group.by = "harmony.clusters", label=TRUE)

# build a sce object from the seurat object
join.seurat <- JoinLayers(integrated.seurat)
sce <- SingleCellExperiment(assays = list(counts = join.seurat[["RNA"]]$counts))
sce$harmony.clusters <- integrated.seurat$harmony.clusters
sce <- logNormCounts(sce)
sce

pred <- SingleR(test = sce, ref = ref.se, labels = ref.se$coarse_anno) # ~ 30 m
save(pred, file = here("data/pred.rds"))
head(pred$scores)
     arteriole     B cells capillary dendritic cell fibroblasts       ILC2s Intercalated cells      limb  macrophage Mixed S1_S2    monocyte Neutrophils
[1,] 0.3840381  0.50692617 0.3873603     0.54248919   0.3253819  0.47561354          0.3043483 0.3534321  0.59473205   0.3599607  0.57534175  0.44999182
[2,] 0.3785673  0.50240422 0.3782312     0.54910030   0.3189176  0.49826652          0.3074463 0.3529678  0.59482783   0.3605026  0.57513946  0.44270288
[3,] 0.1279269 -0.01606203 0.1256420     0.09315129   0.2197587 -0.05783673          0.2626452 0.3562973 -0.01076363   0.2526534 -0.08442247 -0.05710663
[4,] 0.4041674  0.60784002 0.3983539     0.50921725   0.3321245  0.52331207          0.2783260 0.3541139  0.48178791   0.3409455  0.46743655  0.40858385
[5,] 0.3165781  0.12093887 0.3261246     0.16029629   0.2393430  0.06909369          0.1721440 0.1662770  0.10092668   0.1047828  0.03802315  0.02920402
[6,] 0.4860897  0.36172475 0.5173907     0.29818473   0.3165531  0.34818150          0.2459560 0.2844097  0.30971294   0.2647431  0.29724008  0.26762791
        NK cells  Pericytes Podocytes Principal cells      T cell    tubule
[1,]  0.44340624 0.39785164 0.3712859       0.3536552  0.45496464 0.3109168
[2,]  0.47207369 0.38338956 0.3648400       0.3597624  0.48372363 0.3101903
[3,] -0.07735930 0.06906101 0.2099821       0.2119036 -0.04284216 0.4258736
[4,]  0.50020947 0.41455939 0.3926174       0.3533299  0.52512596 0.2935508
[5,]  0.05761137 0.16078442 0.1655019       0.1595536  0.08930727 0.1401068
[6,]  0.30562019 0.37433889 0.3344069       0.2824906  0.33336318 0.2358037
table(pred$labels)

         arteriole            B cells          capillary     dendritic cell        fibroblasts              ILC2s Intercalated cells               limb 
              2073               3806               3101               2326                 69               1719                602                117 
        macrophage        Mixed S1_S2           monocyte        Neutrophils           NK cells          Pericytes          Podocytes    Principal cells 
              2709               2866                863                561                392               1148                157                741 
            T cell             tubule 
              1998               1006 
plotScoreHeatmap(pred)

table(colLabels(sce))
< table of extent 0 >
tab <- table(Assigned=pred$labels, Cluster=sce$harmony.clusters)
head(tab)
                Cluster
Assigned            0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22   23   24   25   26   27
  arteriole         0  955    0    0   29    0    0    0 1055    0    0    0    0    0    0    0    0    0    0    7    0    0    0   17    2    8    0    0
  B cells           1    8  638   24    0 1561    1  812    9   13   35    0   24   89   91    6   16   35   10   40   30    2  303    9    6   39    1    3
  capillary         0 1197    0    0 1695    0    0    0   99    0    0    0    1    0    0    0    0    0    0    7    2    0    0    5    1   93    0    1
  dendritic cell  589    0    3    0    0    9  271   31    1    0    0    0    1  555   15    0    0  530    0  318    0    0    2    0    0    0    1    0
  fibroblasts       0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0   41   25    0    0    1
  ILC2s             0    0  540  237    0    0    0  147    0   14  313    0    1    4   85   23   87    6   90   18   33    0   16    0    4   28   73    0
pheatmap(log2(tab+10), color=colorRampPalette(c("white", "blue"))(101))

sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Chicago
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] scater_1.32.1               scuttle_1.14.0              SingleCellExperiment_1.26.0 pheatmap_1.0.12             SingleR_2.6.0              
 [6] SummarizedExperiment_1.34.0 Biobase_2.64.0              GenomicRanges_1.56.2        GenomeInfoDb_1.40.1         IRanges_2.38.1             
[11] S4Vectors_0.42.1            BiocGenerics_0.50.0         MatrixGenerics_1.16.0       matrixStats_1.5.0           here_1.0.1                 
[16] ggplot2_3.5.2               Seurat_5.2.1                SeuratObject_5.0.2          sp_2.1-4                   

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3        rstudioapi_0.17.1         jsonlite_2.0.0            magrittr_2.0.3            ggbeeswarm_0.7.2          spatstat.utils_3.1-2     
  [7] rmarkdown_2.29            farver_2.1.2              zlibbioc_1.50.0           vctrs_0.6.5               ROCR_1.0-11               DelayedMatrixStats_1.26.0
 [13] spatstat.explore_3.3-4    htmltools_0.5.8.1         S4Arrays_1.4.1            BiocNeighbors_1.22.0      SparseArray_1.4.8         sass_0.4.10              
 [19] sctransform_0.4.1         parallelly_1.45.0         bslib_0.9.0               KernSmooth_2.23-26        htmlwidgets_1.6.4         ica_1.0-3                
 [25] plyr_1.8.9                cachem_1.1.0              plotly_4.10.4             zoo_1.8-12                igraph_2.1.4              mime_0.13                
 [31] lifecycle_1.0.4           pkgconfig_2.0.3           rsvd_1.0.5                Matrix_1.7-2              R6_2.6.1                  fastmap_1.2.0            
 [37] GenomeInfoDbData_1.2.12   fitdistrplus_1.2-2        future_1.49.0             shiny_1.10.0              digest_0.6.37             colorspace_2.1-1         
 [43] patchwork_1.3.0           rprojroot_2.0.4           tensor_1.5                RSpectra_0.16-2           irlba_2.3.5.1             beachmat_2.20.0          
 [49] labeling_0.4.3            progressr_0.15.1          spatstat.sparse_3.1-0     httr_1.4.7                polyclip_1.10-7           abind_1.4-8              
 [55] compiler_4.4.1            withr_3.0.2               BiocParallel_1.38.0       viridis_0.6.5             fastDummies_1.7.5         MASS_7.3-64              
 [61] DelayedArray_0.30.1       tools_4.4.1               vipor_0.4.7               lmtest_0.9-40             beeswarm_0.4.0            httpuv_1.6.16            
 [67] future.apply_1.11.3       goftest_1.2-3             glue_1.8.0                nlme_3.1-167              promises_1.3.3            grid_4.4.1               
 [73] Rtsne_0.17                cluster_2.1.8             reshape2_1.4.4            generics_0.1.3            gtable_0.3.6              spatstat.data_3.1-4      
 [79] tidyr_1.3.1               data.table_1.17.0         ScaledMatrix_1.12.0       BiocSingular_1.20.0       XVector_0.44.0            spatstat.geom_3.3-5      
 [85] RcppAnnoy_0.0.22          ggrepel_0.9.6             RANN_2.6.2                pillar_1.10.1             stringr_1.5.1             spam_2.11-1              
 [91] RcppHNSW_0.6.0            later_1.4.2               splines_4.4.1             dplyr_1.1.4               lattice_0.22-6            survival_3.8-3           
 [97] deldir_2.0-4              tidyselect_1.2.1          miniUI_0.1.1.1            pbapply_1.7-2             knitr_1.50                gridExtra_2.3            
[103] scattermore_1.2           xfun_0.52                 stringi_1.8.4             UCSC.utils_1.0.0          yaml_2.3.10               lazyeval_0.2.2           
[109] evaluate_1.0.4            codetools_0.2-20          tibble_3.2.1              cli_3.6.5                 uwot_0.2.2                xtable_1.8-4             
[115] reticulate_1.42.0         jquerylib_0.1.4           munsell_0.5.1             Rcpp_1.0.14               globals_0.18.0            spatstat.random_3.3-2    
[121] png_0.1-8                 spatstat.univar_3.1-1     parallel_4.4.1            dotCall64_1.2             sparseMatrixStats_1.16.0  listenv_0.9.1            
[127] viridisLite_0.4.2         scales_1.3.0              ggridges_0.5.6            crayon_1.5.3              purrr_1.0.4               rlang_1.1.6              
[133] cowplot_1.1.3            
LS0tCnRpdGxlOiAiRDIgLy8gQnJlYWtvdXQgU2Vzc2lvbiA0IC8vIENsdXN0ZXIgTWFya2VycyIKb3V0cHV0OiBodG1sX25vdGVib29rCmF1dGhvcjogIkxpbmRzYXkgTi4gSGF5ZXMiCmRhdGU6ICIxMi0xMC0yMDI1IgplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCgpGaW5kIG1hcmtlcnMgZW5yaWNoZWQgaW4gZWFjaCBjbHVzdGVyClVzZSBpbnRlZ3JhdGVkIGRhdGEgZnJvbSBwcmV2aW91cyBzdGVtCgojIyBMb2FkIExpYnJhcmllcyBhbmQgc2V0IGEgc2VlZApgYGB7ciwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShoZXJlKQpsaWJyYXJ5KFNpbmdsZVIpICMgYmlvY29uZHVjdG9yIGluc3RhbGwKbGlicmFyeShwaGVhdG1hcCkKbGlicmFyeShzY2F0ZXIpCnNldC5zZWVkKDkpCmBgYAoKCiMjIExvYWQgZGF0YSBhbmQgcmVmZXJlbmNlCmBgYHtyfQpsb2FkKGZpbGUgPSBoZXJlKCJkYXRhL2ludGVncmF0ZWQuc2V1cmF0LnJkcyIpKQpsb2FkKGZpbGUgPSBoZXJlKCJkYXRhL3JlZi5zZS5yZHMiKSkKCmludGVncmF0ZWQuc2V1cmF0CkRpbVBsb3QoaW50ZWdyYXRlZC5zZXVyYXQsIHJlZHVjdGlvbiA9ICJ1bWFwLmhhcm1vbnkiLCBncm91cC5ieSA9ICJoYXJtb255LmNsdXN0ZXJzIiwgbGFiZWw9VFJVRSkKYGBgCgoKCgpgYGB7cn0KIyBidWlsZCBhIHNjZSBvYmplY3QgZnJvbSB0aGUgc2V1cmF0IG9iamVjdApqb2luLnNldXJhdCA8LSBKb2luTGF5ZXJzKGludGVncmF0ZWQuc2V1cmF0KQpzY2UgPC0gU2luZ2xlQ2VsbEV4cGVyaW1lbnQoYXNzYXlzID0gbGlzdChjb3VudHMgPSBqb2luLnNldXJhdFtbIlJOQSJdXSRjb3VudHMpKQpzY2UkaGFybW9ueS5jbHVzdGVycyA8LSBpbnRlZ3JhdGVkLnNldXJhdCRoYXJtb255LmNsdXN0ZXJzCnNjZSA8LSBsb2dOb3JtQ291bnRzKHNjZSkKc2NlCgpwcmVkIDwtIFNpbmdsZVIodGVzdCA9IHNjZSwgcmVmID0gcmVmLnNlLCBsYWJlbHMgPSByZWYuc2UkY29hcnNlX2Fubm8pICMgfiAzMCBtCnNhdmUocHJlZCwgZmlsZSA9IGhlcmUoImRhdGEvcHJlZC5yZHMiKSkKYGBgCmBgYHtyfQpoZWFkKHByZWQkc2NvcmVzKQp0YWJsZShwcmVkJGxhYmVscykKcGxvdFNjb3JlSGVhdG1hcChwcmVkKQp0YWJsZShjb2xMYWJlbHMoc2NlKSkKdGFiIDwtIHRhYmxlKEFzc2lnbmVkPXByZWQkbGFiZWxzLCBDbHVzdGVyPXNjZSRoYXJtb255LmNsdXN0ZXJzKQpoZWFkKHRhYikKcGhlYXRtYXAobG9nMih0YWIrMTApLCBjb2xvcj1jb2xvclJhbXBQYWxldHRlKGMoIndoaXRlIiwgImJsdWUiKSkoMTAxKSkKYGBgCgoKCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoKCgoK