Perform dimension reduction on the KZ sample from Session 1 Data use saved data from prior session
library(Seurat)
library(ggplot2)
library(here)
set.seed(9)
## load the data (or skip if you pick up from Session 1)
load( file = here("data/KZ2_object.rds"))
## check the object, how many cells and features?
seurat.filt
An object of class Seurat
32285 features across 6175 samples within 1 assay
Active assay: RNA (32285 features, 3000 variable features)
3 layers present: counts, data, scale.data
seurat.filt <- RunPCA(seurat.filt)
PC_ 1
Positive: Esrrg, Ank3, Pkhd1, Kcnj16, Tinag, Ptprd, Glis3, Bicc1, Slc25a21, Ptprk
Magi2, Ghr, Atp6v0a4, Igfbp7, Ndrg1, P3h2, Bnc2, Osbpl6, Fhod3, Fut9
Pter, Egfr, Col4a3, Trabd2b, Cyp2j5, Nhs, Slc4a4, Slc47a1, Pde4d, Phyhipl
Negative: Ctss, Epsti1, Cd74, Tyrobp, Fcer1g, H2-Aa, Spi1, H2-Eb1, Ly86, Lsp1
Cybb, Rgs10, H2-Ab1, Ctsc, Crip1, Ms4a6c, Pid1, Lyz2, Bcl2a1b, Lst1
Hck, Plbd1, H2-DMa, Cd300c2, Alox5ap, Ifi27l2a, Apobec1, March1, Cd68, Plek
PC_ 2
Positive: Meis2, Ldb2, Ptprb, Emcn, Plpp1, Flt1, Ptprm, Adgrl4, Egfl7, Prex2
Cyyr1, Adgrf5, Eng, Sparc, Fbxl7, Samd12, Tek, Cdh5, Dysf, Plpp3
Ly6c1, Pbx1, Rapgef4, Ptprg, Cald1, Tm4sf1, Cd34, Fgd5, Exoc3l2, Podxl
Negative: Ctss, Maf, Tyrobp, Cd74, Aoah, Fcer1g, Ly86, Spi1, Fbp1, Cybb
H2-Aa, H2-Eb1, Gatm, Epsti1, Pdzk1, Ctsc, Ass1, Gsta2, March1, Rgs10
Keg1, Plbd1, Gm2a, Cd300c2, Cyp2j5, Aldob, Fcgr2b, H2-DMa, Gk, Csf1r
PC_ 3
Positive: Fmo1, Cyp4b1, Abcg2, Aldob, Cyp2j5, Fbp1, Miox, Meis2, Spink1, Slc34a1
Gpx3, Ptprb, Pdzk1, Flt1, Emcn, Keg1, Gsta2, Lrp2, Akr1c21, Pakap.1
Ldb2, Pter, Adgrl4, Cda, Ass1, Cyyr1, Pdzd2, Egfl7, Lims2, Acsm2
Negative: Naaladl2, Slit2, Mal2, Epcam, Sim1, Rgs6, Tacstd2, Efna5, Tmem45b, Krt7
Scnn1b, Tmem213, Sgpp2, Cdh3, Cldn7, Aif1l, Kcnj1, Rhcg, Defb1, Tspan1
Sh2d4a, Cldn8, Scnn1g, Hsd11b2, Cdh1, Tfap2b, Efemp1, Krt18, Prdm16, Fras1
PC_ 4
Positive: Fcer1g, Pid1, Csf1r, Tyrobp, Cst3, Lyz2, Cd300c2, Adgre1, C1qa, C1qb
C1qc, Mpeg1, Cd68, Fcgr3, Spi1, Aif1, H2-DMb1, Ctsc, Cx3cr1, Lst1
Mafb, Ly86, Apobec1, Slc8a1, Mrc1, Clec4a3, Ms4a7, Psap, H2-Ab1, Ifi207
Negative: Skap1, Cd3g, Tnik, Trbc2, Cd247, Itk, Ms4a4b, Tox, Cd3d, Themis
Gm2682, Ikzf3, Trac, Ltb, Camk4, Thy1, Il7r, Prkca, Cd5, Trbc1
Cd226, Cd6, Icos, Cxcr6, Ctla4, Ikzf2, Sh2d1a, Ctsw, Nkg7, AW112010
PC_ 5
Positive: Ror1, Lhfp, Gucy1a2, Gpc6, Gucy1a1, Prkca, Pdgfrb, Fhl2, Pde3a, Pawr
Cacna2d1, Unc5c, Fign, 4921539H07Rik, Bnc2, Nhs, Auts2, Arhgap10, Plcl1, Pdzrn3
Lama2, Col8a1, Serping1, Agtr1a, Magi2, B130024G19Rik, Gm37245, Glis1, Pdgfra, Skap1
Negative: Serpina1f, Napsa, Aadat, Acot1, Cbr1, Kcnk1, Slc7a12, Guca2b, Acy3, Ttc36
Mep1b, Hao2, Mep1a, Defb29, Slc5a10, Slc22a13, Mpv17l, Cyp2a4, Cth, Acadm
Tmigd1, Rida, Chchd10, Serpina1d, Khk, Eci3, Ggt1, Cyp4a10, Bphl, Mettl7a2
## check the object can you see where the pca is stored?
seurat.filt
An object of class Seurat
32285 features across 6175 samples within 1 assay
Active assay: RNA (32285 features, 3000 variable features)
3 layers present: counts, data, scale.data
1 dimensional reduction calculated: pca
will take 5-10 mins
DimPlot(seurat.filt, reduction="pca")
ggsave(here("plots/2.DimRed/PCA.jpg"), width = 5, height = 5)
ElbowPlot(seurat.filt, ndims = 50, reduction = "pca")
ggsave(here("plots/2.DimRed/ElbowPlot.jpg"), width = 5, height = 5)
seurat.filt <- JackStraw(seurat.filt, dims = 50) # 8 min
| | 0 % ~calculating
|+ | 1 % ~05m 13s
|+ | 2 % ~05m 05s
|++ | 3 % ~04m 57s
|++ | 4 % ~04m 56s
|+++ | 5 % ~04m 47s
|+++ | 6 % ~04m 46s
|++++ | 7 % ~04m 46s
|++++ | 8 % ~04m 43s
|+++++ | 9 % ~04m 41s
|+++++ | 10% ~04m 39s
|++++++ | 11% ~04m 37s
|++++++ | 12% ~04m 35s
|+++++++ | 13% ~04m 30s
|+++++++ | 14% ~04m 27s
|++++++++ | 15% ~04m 24s
|++++++++ | 16% ~04m 20s
|+++++++++ | 17% ~04m 17s
|+++++++++ | 18% ~04m 13s
|++++++++++ | 19% ~04m 10s
|++++++++++ | 20% ~04m 08s
|+++++++++++ | 21% ~04m 04s
|+++++++++++ | 22% ~04m 01s
|++++++++++++ | 23% ~03m 58s
|++++++++++++ | 24% ~03m 55s
|+++++++++++++ | 25% ~03m 52s
|+++++++++++++ | 26% ~03m 49s
|++++++++++++++ | 27% ~03m 46s
|++++++++++++++ | 28% ~03m 43s
|+++++++++++++++ | 29% ~03m 40s
|+++++++++++++++ | 30% ~03m 37s
|++++++++++++++++ | 31% ~03m 34s
|++++++++++++++++ | 32% ~03m 31s
|+++++++++++++++++ | 33% ~03m 28s
|+++++++++++++++++ | 34% ~03m 25s
|++++++++++++++++++ | 35% ~03m 22s
|++++++++++++++++++ | 36% ~03m 19s
|+++++++++++++++++++ | 37% ~03m 16s
|+++++++++++++++++++ | 38% ~03m 12s
|++++++++++++++++++++ | 39% ~03m 09s
|++++++++++++++++++++ | 40% ~03m 07s
|+++++++++++++++++++++ | 41% ~03m 04s
|+++++++++++++++++++++ | 42% ~03m 00s
|++++++++++++++++++++++ | 43% ~02m 57s
|++++++++++++++++++++++ | 44% ~02m 54s
|+++++++++++++++++++++++ | 45% ~02m 51s
|+++++++++++++++++++++++ | 46% ~02m 48s
|++++++++++++++++++++++++ | 47% ~02m 45s
|++++++++++++++++++++++++ | 48% ~02m 42s
|+++++++++++++++++++++++++ | 49% ~02m 39s
|+++++++++++++++++++++++++ | 50% ~02m 36s
|++++++++++++++++++++++++++ | 51% ~02m 33s
|++++++++++++++++++++++++++ | 52% ~02m 30s
|+++++++++++++++++++++++++++ | 53% ~02m 27s
|+++++++++++++++++++++++++++ | 54% ~02m 24s
|++++++++++++++++++++++++++++ | 55% ~02m 20s
|++++++++++++++++++++++++++++ | 56% ~02m 17s
|+++++++++++++++++++++++++++++ | 57% ~02m 14s
|+++++++++++++++++++++++++++++ | 58% ~02m 11s
|++++++++++++++++++++++++++++++ | 59% ~02m 08s
|++++++++++++++++++++++++++++++ | 60% ~02m 05s
|+++++++++++++++++++++++++++++++ | 61% ~02m 02s
|+++++++++++++++++++++++++++++++ | 62% ~01m 59s
|++++++++++++++++++++++++++++++++ | 63% ~01m 56s
|++++++++++++++++++++++++++++++++ | 64% ~01m 52s
|+++++++++++++++++++++++++++++++++ | 65% ~01m 49s
|+++++++++++++++++++++++++++++++++ | 66% ~01m 46s
|++++++++++++++++++++++++++++++++++ | 67% ~01m 43s
|++++++++++++++++++++++++++++++++++ | 68% ~01m 40s
|+++++++++++++++++++++++++++++++++++ | 69% ~01m 37s
|+++++++++++++++++++++++++++++++++++ | 70% ~01m 34s
|++++++++++++++++++++++++++++++++++++ | 71% ~01m 31s
|++++++++++++++++++++++++++++++++++++ | 72% ~01m 28s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01m 25s
|+++++++++++++++++++++++++++++++++++++ | 74% ~01m 22s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01m 18s
|++++++++++++++++++++++++++++++++++++++ | 76% ~01m 15s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01m 12s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~01m 09s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01m 06s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01m 03s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~60s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~57s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~53s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~50s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~47s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~44s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~41s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~38s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~35s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~31s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~28s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~25s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~22s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~19s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~16s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~13s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~09s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~06s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=05m 16s
| | 0 % ~calculating
|+ | 2 % ~01s
|++ | 4 % ~01s
|+++ | 6 % ~01s
|++++ | 8 % ~01s
|+++++ | 10% ~01s
|++++++ | 12% ~01s
|+++++++ | 14% ~01s
|++++++++ | 16% ~01s
|+++++++++ | 18% ~01s
|++++++++++ | 20% ~01s
|+++++++++++ | 22% ~01s
|++++++++++++ | 24% ~01s
|+++++++++++++ | 26% ~01s
|++++++++++++++ | 28% ~01s
|+++++++++++++++ | 30% ~01s
|++++++++++++++++ | 32% ~01s
|+++++++++++++++++ | 34% ~01s
|++++++++++++++++++ | 36% ~01s
|+++++++++++++++++++ | 38% ~01s
|++++++++++++++++++++ | 40% ~01s
|+++++++++++++++++++++ | 42% ~01s
|++++++++++++++++++++++ | 44% ~00s
|+++++++++++++++++++++++ | 46% ~00s
|++++++++++++++++++++++++ | 48% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++ | 52% ~00s
|+++++++++++++++++++++++++++ | 54% ~00s
|++++++++++++++++++++++++++++ | 56% ~00s
|+++++++++++++++++++++++++++++ | 58% ~00s
|++++++++++++++++++++++++++++++ | 60% ~00s
|+++++++++++++++++++++++++++++++ | 62% ~00s
|++++++++++++++++++++++++++++++++ | 64% ~00s
|+++++++++++++++++++++++++++++++++ | 66% ~00s
|++++++++++++++++++++++++++++++++++ | 68% ~00s
|+++++++++++++++++++++++++++++++++++ | 70% ~00s
|++++++++++++++++++++++++++++++++++++ | 72% ~00s
|+++++++++++++++++++++++++++++++++++++ | 74% ~00s
|++++++++++++++++++++++++++++++++++++++ | 76% ~00s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~00s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01s
seurat.filt <- ScoreJackStraw(seurat.filt, dims = 1:50)
JackStrawPlot(seurat.filt, dims = 1:50, reduction = "pca")
Warning: Removed 105585 rows containing missing values or values outside the scale range (`geom_point()`).
ggsave(here("plots/2.DimRed/JackStrawPlot.jpg"), width = 8, height = 5)
Warning: Removed 105585 rows containing missing values or values outside the scale range (`geom_point()`).
DimHeatmap(seurat.filt, dims = 1:18, cells = 100, balanced = TRUE, fast = FALSE)
ggsave(here("plots/2.DimRed/PCAHeatmap.jpg"), width = 10, height = 25)
## How many PCs are at the Elbow?
## are the dims being used reasonable?
## try out different resolution in the FindClusters to see how the clusters change
seurat.filt <- FindNeighbors(seurat.filt, dims = 1:30, reduction = "pca")
Computing nearest neighbor graph
Computing SNN
seurat.filt <- FindClusters(seurat.filt, resolution = 0.4, cluster.name = "clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6175
Number of edges: 202054
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9521
Number of communities: 19
Elapsed time: 0 seconds
seurat.filt <- RunUMAP(seurat.filt, dims = 1:30, reduction = "pca", reduction.name = "umap")
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
13:14:47 UMAP embedding parameters a = 0.9922 b = 1.112
13:14:47 Read 6175 rows and found 30 numeric columns
13:14:47 Using Annoy for neighbor search, n_neighbors = 30
13:14:47 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:14:48 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e456815c3
13:14:48 Searching Annoy index using 1 thread, search_k = 3000
13:14:48 Annoy recall = 100%
13:14:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
13:14:49 Initializing from normalized Laplacian + noise (using RSpectra)
13:14:50 Commencing optimization for 500 epochs, with 250554 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:14:54 Optimization finished
DimPlot(seurat.filt, reduction = "umap")
ggsave(here("plots/2.DimRed/UMAP.jpg"), width = 5, height = 5)
## change the "Gene of interest" to one of your HVGs
GOI <- "Cx3cr1"
FeaturePlot(seurat.filt, reduction ="umap", features = GOI, order = TRUE)
ggsave(here("plots/2.DimRed/Cx3cr1_umap.jpg"), width = 5, height = 5)
save(seurat.filt, file = here("data/KZ2_umap_object.rds"))
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Chicago
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] future_1.49.0 here_1.0.1 scDblFinder_1.18.0 SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
[6] Biobase_2.64.0 GenomicRanges_1.56.2 GenomeInfoDb_1.40.1 IRanges_2.38.1 S4Vectors_0.42.1
[11] BiocGenerics_0.50.0 MatrixGenerics_1.16.0 matrixStats_1.5.0 ggplot2_3.5.2 Seurat_5.2.1
[16] SeuratObject_5.0.2 sp_2.1-4
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.4.1 later_1.4.2 BiocIO_1.14.0 bitops_1.0-9
[6] R.oo_1.27.0 tibble_3.2.1 polyclip_1.10-7 XML_3.99-0.18 fastDummies_1.7.5
[11] lifecycle_1.0.4 rprojroot_2.0.4 edgeR_4.2.2 globals_0.18.0 lattice_0.22-6
[16] MASS_7.3-64 magrittr_2.0.3 sass_0.4.10 rmarkdown_2.29 limma_3.60.6
[21] plotly_4.10.4 jquerylib_0.1.4 yaml_2.3.10 metapod_1.12.0 httpuv_1.6.16
[26] sctransform_0.4.1 spam_2.11-1 spatstat.sparse_3.1-0 reticulate_1.42.0 cowplot_1.1.3
[31] pbapply_1.7-2 RColorBrewer_1.1-3 abind_1.4-8 zlibbioc_1.50.0 Rtsne_0.17
[36] R.utils_2.12.3 purrr_1.0.4 RCurl_1.98-1.16 GenomeInfoDbData_1.2.12 ggrepel_0.9.6
[41] irlba_2.3.5.1 listenv_0.9.1 spatstat.utils_3.1-2 goftest_1.2-3 RSpectra_0.16-2
[46] dqrng_0.4.1 spatstat.random_3.3-2 fitdistrplus_1.2-2 parallelly_1.45.0 DelayedMatrixStats_1.26.0
[51] codetools_0.2-20 DelayedArray_0.30.1 scuttle_1.14.0 tidyselect_1.2.1 UCSC.utils_1.0.0
[56] farver_2.1.2 viridis_0.6.5 ScaledMatrix_1.12.0 spatstat.explore_3.3-4 GenomicAlignments_1.40.0
[61] jsonlite_2.0.0 BiocNeighbors_1.22.0 progressr_0.15.1 ggridges_0.5.6 survival_3.8-3
[66] scater_1.32.1 systemfonts_1.2.1 tools_4.4.1 ragg_1.3.3 ica_1.0-3
[71] Rcpp_1.0.14 glue_1.8.0 gridExtra_2.3 SparseArray_1.4.8 xfun_0.52
[76] dplyr_1.1.4 withr_3.0.2 fastmap_1.2.0 bluster_1.14.0 digest_0.6.37
[81] rsvd_1.0.5 R6_2.6.1 mime_0.13 textshaping_1.0.0 colorspace_2.1-1
[86] scattermore_1.2 tensor_1.5 spatstat.data_3.1-4 R.methodsS3_1.8.2 tidyr_1.3.1
[91] generics_0.1.3 data.table_1.17.0 rtracklayer_1.64.0 httr_1.4.7 htmlwidgets_1.6.4
[96] S4Arrays_1.4.1 uwot_0.2.2 pkgconfig_2.0.3 gtable_0.3.6 lmtest_0.9-40
[101] XVector_0.44.0 htmltools_0.5.8.1 dotCall64_1.2 scales_1.3.0 png_0.1-8
[106] spatstat.univar_3.1-1 scran_1.32.0 knitr_1.50 rstudioapi_0.17.1 reshape2_1.4.4
[111] rjson_0.2.23 nlme_3.1-167 curl_6.2.2 cachem_1.1.0 zoo_1.8-12
[116] stringr_1.5.1 KernSmooth_2.23-26 parallel_4.4.1 miniUI_0.1.1.1 vipor_0.4.7
[121] ggrastr_1.0.2 restfulr_0.0.15 pillar_1.10.1 grid_4.4.1 vctrs_0.6.5
[126] RANN_2.6.2 promises_1.3.3 BiocSingular_1.20.0 beachmat_2.20.0 xtable_1.8-4
[131] cluster_2.1.8 beeswarm_0.4.0 evaluate_1.0.4 locfit_1.5-9.10 cli_3.6.5
[136] compiler_4.4.1 Rsamtools_2.20.0 rlang_1.1.6 crayon_1.5.3 future.apply_1.11.3
[141] labeling_0.4.3 plyr_1.8.9 ggbeeswarm_0.7.2 stringi_1.8.4 viridisLite_0.4.2
[146] deldir_2.0-4 BiocParallel_1.38.0 munsell_0.5.1 Biostrings_2.72.1 lazyeval_0.2.2
[151] spatstat.geom_3.3-5 Matrix_1.7-2 RcppHNSW_0.6.0 patchwork_1.3.0 sparseMatrixStats_1.16.0
[156] statmod_1.5.0 shiny_1.10.0 ROCR_1.0-11 igraph_2.1.4 bslib_0.9.0
[161] xgboost_1.7.8.1