Find markers enriched in each cluster Use integrated data from previous stem
load(file = here("data/annotated.seurat.rds"))
# checking your cells is always a good place to start
integrated.seurat
An object of class Seurat
34285 features across 26254 samples within 2 assays
Active assay: RNA (32285 features, 3000 variable features)
9 layers present: counts.KZ1, counts.KZ2, counts.KZ3, counts.KZ4, data.KZ1, data.KZ2, data.KZ3, data.KZ4, scale.data
1 other assay present: mnn.reconstructed
10 dimensional reductions calculated: pca, umap.unintegrated, cca, umap.cca, rpca, umap.rpca, harmony, umap.harmony, mnn, umap.mnn
DimPlot(integrated.seurat, reduction = "umap.harmony", group.by = "prediction", label=TRUE)
table(integrated.seurat$prediction, integrated.seurat$group)
CON PKD
arteriole 1271 802
B cells 1745 2061
capillary 1612 1489
dendritic cell 1019 1307
fibroblasts 50 19
ILC2s 974 745
Intercalated cells 387 215
limb 82 35
macrophage 1381 1328
Mixed S1_S2 1472 1394
monocyte 469 394
Neutrophils 343 218
NK cells 188 204
Pericytes 674 474
Podocytes 99 58
Principal cells 417 324
T cell 955 1043
tubule 482 524
tmp <-table(integrated.seurat$prediction, integrated.seurat$group)
round(prop.table(tmp, margin = 2)*100,1)
CON PKD
arteriole 9.3 6.3
B cells 12.8 16.3
capillary 11.8 11.8
dendritic cell 7.5 10.3
fibroblasts 0.4 0.2
ILC2s 7.2 5.9
Intercalated cells 2.8 1.7
limb 0.6 0.3
macrophage 10.1 10.5
Mixed S1_S2 10.8 11.0
monocyte 3.4 3.1
Neutrophils 2.5 1.7
NK cells 1.4 1.6
Pericytes 4.9 3.8
Podocytes 0.7 0.5
Principal cells 3.1 2.6
T cell 7.0 8.3
tubule 3.5 4.1
abundance <- as.data.frame(round(prop.table(tmp, margin = 2)*100,1))
colnames(abundance) <- c("celltype", "group", "Freq")
ggplot(abundance, aes(fill=celltype, y=Freq, x=group)) + geom_bar(position="stack", stat="identity") + theme_bw()
ggsave(here("plots/5.Abund/stackedbar.jpg"), width = 5, height = 6)
dat <- data.frame(
ID = integrated.seurat$orig.ident,
group = integrated.seurat$group,
celltype = as.character(integrated.seurat$prediction),
stringsAsFactors = FALSE)
abundance <- dat |>
count(ID, group, celltype) |>
group_by(group, ID) |>
mutate(prop = n / sum(n) * 100)
abundance |> filter(celltype == "B cells") |>
ggplot(aes(x = group, y = prop, fill = group)) + geom_boxplot() + geom_point(size = 2) +
theme_bw() + scale_fill_manual(values = c("grey", "orange2")) + ylim(0,20)
abundance |> filter(celltype == "B cells") %>% t.test(prop ~ group, data=.)
Welch Two Sample t-test
data: prop by group
t = -1.2643, df = 1.0716, p-value = 0.4153
alternative hypothesis: true difference in means between group CON and group PKD is not equal to 0
95 percent confidence interval:
-32.34535 25.60288
sample estimates:
mean in group CON mean in group PKD
12.92784 16.29907
table(integrated.seurat$harmony.clusters, integrated.seurat$group)
CON PKD
0 1424 1581
1 1154 1018
2 998 1107
3 1058 721
4 1017 707
5 695 875
6 629 569
7 549 647
8 634 545
9 608 570
10 537 582
11 439 462
12 439 438
13 430 364
14 323 306
15 385 223
16 397 189
17 233 340
18 305 217
19 223 293
20 160 176
21 176 155
22 192 137
23 178 123
24 158 76
25 127 75
26 80 68
27 72 70
tmp <-table(integrated.seurat$harmony.clusters, integrated.seurat$group)
round(prop.table(tmp, margin = 2)*100,1)
CON PKD
0 10.5 12.5
1 8.5 8.1
2 7.3 8.8
3 7.8 5.7
4 7.5 5.6
5 5.1 6.9
6 4.6 4.5
7 4.0 5.1
8 4.7 4.3
9 4.5 4.5
10 3.9 4.6
11 3.2 3.7
12 3.2 3.5
13 3.2 2.9
14 2.4 2.4
15 2.8 1.8
16 2.9 1.5
17 1.7 2.7
18 2.2 1.7
19 1.6 2.3
20 1.2 1.4
21 1.3 1.2
22 1.4 1.1
23 1.3 1.0
24 1.2 0.6
25 0.9 0.6
26 0.6 0.5
27 0.5 0.6
abundance <- as.data.frame(round(prop.table(tmp, margin = 2)*100,1))
colnames(abundance) <- c("cluster", "group", "Freq")
ggplot(abundance, aes(fill=cluster, y=Freq, x=group)) + geom_bar(position="stack", stat="identity") + theme_bw()
ggsave(here("plots/5.Abund/stackedbar_clusters.jpg"), width = 5, height = 6)
dat <- data.frame(
ID = integrated.seurat$orig.ident,
group = integrated.seurat$group,
cluster = as.character(integrated.seurat$harmony.clusters),
stringsAsFactors = FALSE)
abundance <- dat |>
count(ID, group, cluster) |>
group_by(group, ID) |>
mutate(prop = n / sum(n) * 100)
abundance$cluster <- as.numeric(abundance$cluster)
abundance |> filter(cluster == "3") |>
ggplot(aes(x = group, y = prop, fill = group)) + geom_boxplot() + geom_point(size = 2) +
theme_bw() + scale_fill_manual(values = c("grey", "gold")) + labs(subtitle = "cluster 3")
res <- data.frame(cluster = 0:27, pval = NA)
for (i in 0:27) {
test <- abundance |> filter(cluster == i) %>% t.test(prop ~ group, data=.)
res$pval[res$cluster == i] <- round(test$p.value,2)
}
res$adj.pval <- p.adjust(res$pval, method = "BH")
res
NA
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Chicago
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] dplyr_1.1.4 here_1.0.1 ggplot2_3.5.2 Seurat_5.2.1 SeuratObject_5.0.2
[6] sp_2.1-4 SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0 Biobase_2.64.0 GenomicRanges_1.56.2
[11] GenomeInfoDb_1.40.1 IRanges_2.38.1 MatrixGenerics_1.16.0 matrixStats_1.5.0 S4Vectors_0.42.1
[16] BiocGenerics_0.50.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0 magrittr_2.0.3 spatstat.utils_3.1-2 rmarkdown_2.29
[7] farver_2.1.2 ragg_1.3.3 zlibbioc_1.50.0 vctrs_0.6.5 ROCR_1.0-11 spatstat.explore_3.3-4
[13] htmltools_0.5.8.1 S4Arrays_1.4.1 SparseArray_1.4.8 sass_0.4.10 sctransform_0.4.1 parallelly_1.45.0
[19] bslib_0.9.0 KernSmooth_2.23-26 htmlwidgets_1.6.4 ica_1.0-3 plyr_1.8.9 cachem_1.1.0
[25] plotly_4.10.4 zoo_1.8-12 igraph_2.1.4 mime_0.13 lifecycle_1.0.4 pkgconfig_2.0.3
[31] Matrix_1.7-2 R6_2.6.1 fastmap_1.2.0 GenomeInfoDbData_1.2.12 fitdistrplus_1.2-2 future_1.49.0
[37] shiny_1.10.0 digest_0.6.37 colorspace_2.1-1 patchwork_1.3.0 rprojroot_2.0.4 tensor_1.5
[43] RSpectra_0.16-2 irlba_2.3.5.1 textshaping_1.0.0 labeling_0.4.3 progressr_0.15.1 spatstat.sparse_3.1-0
[49] httr_1.4.7 polyclip_1.10-7 abind_1.4-8 compiler_4.4.1 withr_3.0.2 fastDummies_1.7.5
[55] MASS_7.3-64 DelayedArray_0.30.1 tools_4.4.1 lmtest_0.9-40 httpuv_1.6.16 future.apply_1.11.3
[61] goftest_1.2-3 glue_1.8.0 nlme_3.1-167 promises_1.3.3 grid_4.4.1 Rtsne_0.17
[67] cluster_2.1.8 reshape2_1.4.4 generics_0.1.3 gtable_0.3.6 spatstat.data_3.1-4 tidyr_1.3.1
[73] data.table_1.17.0 utf8_1.2.4 XVector_0.44.0 spatstat.geom_3.3-5 RcppAnnoy_0.0.22 ggrepel_0.9.6
[79] RANN_2.6.2 pillar_1.10.1 stringr_1.5.1 spam_2.11-1 RcppHNSW_0.6.0 later_1.4.2
[85] splines_4.4.1 lattice_0.22-6 survival_3.8-3 deldir_2.0-4 tidyselect_1.2.1 miniUI_0.1.1.1
[91] pbapply_1.7-2 knitr_1.50 gridExtra_2.3 scattermore_1.2 xfun_0.52 stringi_1.8.4
[97] UCSC.utils_1.0.0 yaml_2.3.10 lazyeval_0.2.2 evaluate_1.0.4 codetools_0.2-20 tibble_3.2.1
[103] cli_3.6.5 uwot_0.2.2 systemfonts_1.2.1 xtable_1.8-4 reticulate_1.42.0 jquerylib_0.1.4
[109] munsell_0.5.1 Rcpp_1.0.14 globals_0.18.0 spatstat.random_3.3-2 png_0.1-8 spatstat.univar_3.1-1
[115] parallel_4.4.1 dotCall64_1.2 listenv_0.9.1 viridisLite_0.4.2 scales_1.3.0 ggridges_0.5.6
[121] purrr_1.0.4 crayon_1.5.3 rlang_1.1.6 cowplot_1.1.3