diff --git a/..Rcheck/00check.log b/..Rcheck/00check.log new file mode 100644 index 0000000..3e93804 --- /dev/null +++ b/..Rcheck/00check.log @@ -0,0 +1,13 @@ +* using log directory ‘/Users/bianjh/Documents/Projects/Development/SCOT/..Rcheck’ +* using R version 4.5.1 (2025-06-13) +* using platform: aarch64-apple-darwin20 +* R was compiled by + Apple clang version 16.0.0 (clang-1600.0.26.6) + GNU Fortran (GCC) 14.2.0 +* running under: macOS Tahoe 26.4.1 +* using session charset: UTF-8 +* checking for file ‘./DESCRIPTION’ ... ERROR +Required fields missing or empty: + ‘Author’ ‘Maintainer’ +* DONE +Status: 1 ERROR diff --git a/CITATION.cff b/CITATION.cff index 4de32c0..81bad29 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -33,8 +33,10 @@ references: url: https://www.R-project.org/ authors: - name: R Core Team + website: https://ror.org/02zz1nj61 institution: - name: 'R Foundation for Statistical Computing (ROR: )' + name: R Foundation for Statistical Computing + website: https://ror.org/05qewa988 address: Vienna, Austria year: '2026' doi: 10.32614/R.manuals @@ -284,8 +286,10 @@ references: notes: Imports authors: - name: R Core Team + website: https://ror.org/02zz1nj61 institution: - name: 'R Foundation for Statistical Computing (ROR: )' + name: R Foundation for Statistical Computing + website: https://ror.org/05qewa988 address: Vienna, Austria year: '2026' doi: 10.32614/R.manuals @@ -562,8 +566,10 @@ references: notes: Imports authors: - name: R Core Team + website: https://ror.org/02zz1nj61 institution: - name: 'R Foundation for Statistical Computing (ROR: )' + name: R Foundation for Statistical Computing + website: https://ror.org/05qewa988 address: Vienna, Austria year: '2026' doi: 10.32614/R.manuals @@ -573,8 +579,10 @@ references: notes: Imports authors: - name: R Core Team + website: https://ror.org/02zz1nj61 institution: - name: 'R Foundation for Statistical Computing (ROR: )' + name: R Foundation for Statistical Computing + website: https://ror.org/05qewa988 address: Vienna, Austria year: '2026' doi: 10.32614/R.manuals @@ -591,19 +599,6 @@ references: orcid: https://orcid.org/0000-0001-8457-4658 year: '2026' doi: 10.32614/CRAN.package.cffr -- type: software - title: glmGamPoi - abstract: 'glmGamPoi: Fit a Gamma-Poisson Generalized Linear Model' - notes: Suggests - url: https://github.com/const-ae/glmGamPoi - repository: https://bioconductor.org/ - authors: - - family-names: Ahlmann-Eltze - given-names: Constantin - email: artjom31415@googlemail.com - orcid: https://orcid.org/0000-0002-3762-068X - year: '2026' - doi: 10.18129/B9.bioc.glmGamPoi - type: software title: GSEABase abstract: 'GSEABase: Gene set enrichment data structures and methods' @@ -719,25 +714,6 @@ references: given-names: Manuel year: '2026' doi: 10.32614/CRAN.package.roxygen2 -- type: software - title: SeuratData - abstract: 'SeuratData: Install and Manage Seurat Datasets' - notes: Suggests - url: http://www.satijalab.org/seurat - authors: - - family-names: Satija - given-names: Rahul - email: rsatija@nygenome.org - orcid: https://orcid.org/0000-0001-9448-8833 - - family-names: Hoffman - given-names: Paul - email: phoffman@nygenome.org - orcid: https://orcid.org/0000-0002-7693-8957 - - family-names: Butler - given-names: Andrew - email: abutler@nygenome.org - orcid: https://orcid.org/0000-0003-3608-0463 - year: '2026' - type: software title: SingleCellExperiment abstract: 'SingleCellExperiment: S4 Classes for Single Cell Data' @@ -752,42 +728,4 @@ references: email: risso.davide@gmail.com year: '2026' doi: 10.18129/B9.bioc.SingleCellExperiment -- type: software - title: testthat - abstract: 'testthat: Unit Testing for R' - notes: Suggests - url: https://testthat.r-lib.org - repository: https://CRAN.R-project.org/package=testthat - authors: - - family-names: Wickham - given-names: Hadley - email: hadley@posit.co - year: '2026' - doi: 10.32614/CRAN.package.testthat - version: '>= 3.0.0' -- type: software - title: usethis - abstract: 'usethis: Automate Package and Project Setup' - notes: Suggests - url: https://usethis.r-lib.org - repository: https://CRAN.R-project.org/package=usethis - authors: - - family-names: Wickham - given-names: Hadley - email: hadley@posit.co - orcid: https://orcid.org/0000-0003-4757-117X - - family-names: Bryan - given-names: Jennifer - email: jenny@posit.co - orcid: https://orcid.org/0000-0002-6983-2759 - - family-names: Barrett - given-names: Malcolm - email: malcolmbarrett@gmail.com - orcid: https://orcid.org/0000-0003-0299-5825 - - family-names: Teucher - given-names: Andy - email: andy.teucher@posit.co - orcid: https://orcid.org/0000-0002-7840-692X - year: '2026' - doi: 10.32614/CRAN.package.usethis diff --git a/DESCRIPTION b/DESCRIPTION index 9920091..01a91fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -72,4 +72,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 +Config/roxygen2/version: 8.0.0 diff --git a/R/convert_human_gene_list.R b/R/convert_human_gene_list.R index 9a60cf0..a247589 100644 --- a/R/convert_human_gene_list.R +++ b/R/convert_human_gene_list.R @@ -8,29 +8,44 @@ #' @export #' #' @return Character vector of mapped mouse genes -convert_human_gene_list <- function(genes) { - # data variables must be initialized to silence the R CMD check note: - # 'no visible binding for global variable' - gns <- NULL +convert_human_gene_list <- function(genes = NULL) { + result <- character(0) - # TODO: make this function generic enough to convert any genome to any other - egs <- AnnotationDbi::mapIds( - org.Hs.eg.db::org.Hs.eg.db, - gns, - "ENTREZID", - "SYMBOL" - ) - mapped <- AnnotationDbi::select( - Orthology.eg.db::Orthology.eg.db, - egs, - "Mus.musculus", - "Homo.sapiens" - ) - mapped$MUS <- AnnotationDbi::mapIds( - org.Mm.eg.db::org.Mm.eg.db, - as.character(mapped$Mus.musculus), - "SYMBOL", - "ENTREZID" - ) - return(as.character(unlist(mapped$MUS))) + # Validate input: must be character vector + if (!is.character(genes)) { + stop(paste0( + "Input 'genes' must be a character vector, not ", + paste(class(genes), collapse = " ") + )) + } else if (length(genes) == 0L) { + warning("Input 'genes' is empty; returning character(0)") + } else { + # Map human SYMBOL to ENTREZID + egs <- AnnotationDbi::mapIds( + org.Hs.eg.db::org.Hs.eg.db, + genes, + "ENTREZID", + "SYMBOL" + ) + + # Map human ENTREZID to mouse ENTREZID + mapped <- AnnotationDbi::select( + Orthology.eg.db::Orthology.eg.db, + as.character(egs), + "Mus.musculus", + "Homo.sapiens" + ) + + # Map mouse ENTREZID to SYMBOL + mapped$MUS <- AnnotationDbi::mapIds( + org.Mm.eg.db::org.Mm.eg.db, + as.character(mapped$Mus.musculus), + "SYMBOL", + "ENTREZID" + ) + + result <- as.character(unlist(mapped$MUS)) + } + + return(result) } diff --git a/R/make_volcano_plot.R b/R/make_volcano_plot.R index 9d59299..5850e0b 100644 --- a/R/make_volcano_plot.R +++ b/R/make_volcano_plot.R @@ -5,7 +5,7 @@ #' differential expression table generated by Seurat #' #' @param de_table Differential expression table generated by Seurat containing -#' p_val, pct.1, pct.2 avg_log2FC and p_val_adj +#' p_val, pct.1, pct.2 avg_log_2_fc and p_val_adj #' @param logfc Boolean to color genes meeting logfc threshold of 1.5 #' @param pval Boolean to color genes meeting p-value threshold of 0.05 #' @param significant Boolean to color genes that meet both logfc and p-value @@ -26,23 +26,26 @@ make_volcano_plot <- function( ) { log10_p <- -log10(de_table$p_val_adj) log10_p[which(de_table$p_val_adj == 0)] <- 500 - avg_log2FC <- de_table$avg_log2FC + avg_log_2_fc <- de_table$avg_log_2_fc gene <- rownames(de_table) significance <- vector(length = length(log10_p)) significance[] <- "NotSignificant" if (logfc == TRUE) { - significance[which(abs(de_table$avg_log2FC) > 1.5)] <- "avg_log2FC > 1.5" + significance[which( + abs(de_table$avg_log_2_fc) > 1.5 + )] <- "avg_log_2_fc > 1.5" } if (isTRUE(pval)) { significance[which(de_table$p_val_adj < 0.05)] <- "p_val_adj < 0.05" } if (isTRUE(significant)) { significance[which( - abs(de_table$avg_log2FC) > 1.5 & + abs(de_table$avg_log_2_fc) > 1.5 & de_table$p_val_adj < 0.05 - )] <- "p_val_adj < 0.05 & avg_log2FC > 1.5" + )] <- "p_val_adj < 0.05 & avg_log_2_fc > 1.5" } - df <- as.data.frame(cbind(avg_log2FC, log10_p, significance, gene)) + df <- as.data.frame(cbind(avg_log_2_fc, log10_p, significance, gene)) + colnames(df) <- c("avg_log_2_fc", "log10_p", "significance", "gene") rownames(df) <- rownames(de_table) df[, 1] <- as.numeric(df[, 1]) df[, 2] <- as.numeric(df[, 2]) @@ -50,9 +53,9 @@ make_volcano_plot <- function( df[, 3], levels = c( "NotSignificant", - "avg_log2FC > 1.5", + "avg_log_2_fc > 1.5", "p_val_adj < 0.05", - "p_val_adj < 0.05 & avg_log2FC > 1.5" + "p_val_adj < 0.05 & avg_log_2_fc > 1.5" ) ) data_subset <- NULL @@ -68,13 +71,13 @@ make_volcano_plot <- function( volcano <- ggplot2::ggplot( df, - ggplot2::aes(x = avg_log2FC, y = log10_p, col = significance) + ggplot2::aes(x = avg_log_2_fc, y = log10_p, col = significance) ) + ggplot2::geom_point() + # geom_vline(xintercept = 0, color = "black", linewidth = 1.5)+ ggplot2::xlim( - -ceiling(max(abs(df$avg_log2FC))), - ceiling(max(abs(df$avg_log2FC))) + -ceiling(max(abs(df$avg_log_2_fc))), + ceiling(max(abs(df$avg_log_2_fc))) ) if (!is.null(data_subset)) { diff --git a/R/seurat_clustering.R b/R/seurat_clustering.R index 214d433..c9ac0fc 100644 --- a/R/seurat_clustering.R +++ b/R/seurat_clustering.R @@ -15,6 +15,25 @@ #' @return The updated Seurat single cell object with recalculated PCA, #' neighbors, UMAP and clusters seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) { + # Validate npcs_in: must be a positive integer >= 3 (for UMAP n.components) + if (!is.numeric(npcs_in) || npcs_in != as.integer(npcs_in) || npcs_in <= 0) { + rlang::abort("npcs_in must be a positive integer") + } + + if (npcs_in < 3) { + rlang::abort("npcs_in must be at least 3 for UMAP computation") + } + + # Validate resolution: must be positive + if (!is.numeric(resolution) || resolution <= 0) { + rlang::abort("resolution must be a positive numeric value") + } + + # Validate algorithm: must be 1, 2, or 3 + if (!is.numeric(algorithm) || !(algorithm %in% c(1, 2, 3))) { + rlang::abort("algorithm must be 1 (Louvain), 2 (Leiden), or 3 (SLM)") + } + so <- Seurat::RunPCA( object = so_in, features = Seurat::VariableFeatures(object = so_in), @@ -24,8 +43,8 @@ seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) { so <- Seurat::FindNeighbors(so, dims = 1:npcs_in) so <- Seurat::FindClusters( so, - resolution = 0.8, - algorithm = 3, + resolution = resolution, + algorithm = algorithm, verbose = TRUE ) so <- Seurat::RunUMAP(so, dims = 1:npcs_in, n.components = 3) diff --git a/R/split_featurePlot.R b/R/split_featurePlot.R index 1c3853d..694cfea 100644 --- a/R/split_featurePlot.R +++ b/R/split_featurePlot.R @@ -42,11 +42,6 @@ split_featurePlot <- function( ) { plot_list <- list() plot_output <- list() - if (is.null(reduction)) { - embed <- so@reductions[[SeuratObject::DefaultDimReduc(so)]]@cell.embeddings - } else { - embed <- Seurat::Embeddings(so, reduction = reduction) - } # TODO refactor with map or lapply for (feature in features) { @@ -61,8 +56,6 @@ split_featurePlot <- function( order = order, reduction = reduction ) + - ggplot2::xlim(range(embed[, 1])) + - ggplot2::ylim(range(embed[, 2])) + ggplot2::ggtitle(ident) } } diff --git a/man/SCOT-package.Rd b/man/SCOT-package.Rd index 8cd46eb..fe9ccdb 100644 --- a/man/SCOT-package.Rd +++ b/man/SCOT-package.Rd @@ -17,6 +17,7 @@ See the website for more information, documentation, and examples: Authors: \itemize{ + \item Nathan Wong \email{nathan.wong@nih.gov} \item Kelly Sovacool \email{kelly.sovacool@nih.gov} (\href{https://orcid.org/0000-0003-3283-829X}{ORCID}) \item Vishal Koparde \email{vishal.koparde@nih.gov} (\href{https://orcid.org/0000-0001-8978-8495}{ORCID}) } diff --git a/man/convert_human_gene_list.Rd b/man/convert_human_gene_list.Rd index c6700de..8ca4b99 100644 --- a/man/convert_human_gene_list.Rd +++ b/man/convert_human_gene_list.Rd @@ -4,7 +4,7 @@ \alias{convert_human_gene_list} \title{convert_human_gene_list} \usage{ -convert_human_gene_list(genes) +convert_human_gene_list(genes = NULL) } \arguments{ \item{genes}{Character vector of human genes} diff --git a/man/make_volcano_plot.Rd b/man/make_volcano_plot.Rd index 0514456..1746bea 100644 --- a/man/make_volcano_plot.Rd +++ b/man/make_volcano_plot.Rd @@ -15,7 +15,7 @@ make_volcano_plot( } \arguments{ \item{de_table}{Differential expression table generated by Seurat containing -p_val, pct.1, pct.2 avg_log2FC and p_val_adj} +p_val, pct.1, pct.2 avg_log_2_fc and p_val_adj} \item{significant}{Boolean to color genes that meet both logfc and p-value thresholds} diff --git a/man/reexports.Rd b/man/reexports.Rd index 0774539..8cbaf54 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -12,6 +12,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{rlang}{\code{\link[rlang:dyn-dots]{:=}}, \code{\link[rlang:dot-data]{.data}}} + \item{rlang}{\code{\link[rlang::=]{:=()}}, \code{\link[rlang:.data]{.data}}} }} diff --git a/test.png b/test.png new file mode 100644 index 0000000..5b2f6dc Binary files /dev/null and b/test.png differ diff --git a/tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample_1mb.rds b/tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample_1mb.rds new file mode 100644 index 0000000..bb5ff7e Binary files /dev/null and b/tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample_1mb.rds differ diff --git a/tests/testthat/fixtures/downsample_BRCA.R b/tests/testthat/fixtures/downsample_BRCA.R new file mode 100644 index 0000000..68ecf5f --- /dev/null +++ b/tests/testthat/fixtures/downsample_BRCA.R @@ -0,0 +1,32 @@ +# Script to downsample BRCA dataset for unit testing +# +# This script reduces the BRCA dataset to a minimal size suitable for unit testing +# while preserving the structure needed for comprehensive testing. +# Target: ~1MB on disk + +library(SeuratObject) +library(Seurat) + +# Load the current fixture +brca <- readRDS( + "tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample.rds" +) + +brca <- subset( + brca, + cells = sample(colnames(brca), 250), + features = VariableFeatures(brca)[1:250] +) + +# Remove SCT misc (vst.out parameters) ~ 711kb +# keeps the SCT assay data +if ("SCT" %in% names(brca@assays)) { + brca@assays$SCT@misc <- list() +} + +# compress +saveRDS( + brca, + "tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample_1mb.rds", + compress = "xz" +) diff --git a/tests/testthat/helper-functions.R b/tests/testthat/helper-functions.R index af38707..c67c168 100644 --- a/tests/testthat/helper-functions.R +++ b/tests/testthat/helper-functions.R @@ -32,6 +32,15 @@ load_processed_pbmc <- function() { return(pbmc) } +load_fixture_data <- function(dataset) { + if (dataset == "wu_et_al_BRCA") { + return(readRDS(testthat::test_path( + "fixtures", + "BRCA_Combine_and_Renormalize_SO_downsample_1mb.rds" + ))) + } +} + #' Returns a random counts matrix. #' Source: https://github.com/satijalab/seurat/blob/main/tests/testthat/test_dimensional_reduction.R get_random_counts <- function() { diff --git a/tests/testthat/test-cluster_metrics.R b/tests/testthat/test-cluster_metrics.R new file mode 100644 index 0000000..c7dd779 --- /dev/null +++ b/tests/testthat/test-cluster_metrics.R @@ -0,0 +1,121 @@ +brca_data <- load_fixture_data("wu_et_al_BRCA") +brca_clustered <- seurat_clustering(so_in = brca_data, npcs_in = 30) + +test_that("clustering metric is consistent for (BRCA)", { + clusters <- c("SCT_snn_res.0.2", "SCT_snn_res.0.8") + + res_sil <- cluster_metrics( + so = brca_clustered, + cluster_list = clusters, + dims = 1:20, + reduction = "pca", + silhouette = TRUE + ) + + expect_true(is.matrix(res_sil)) + expect_equal(rownames(res_sil), clusters) + expect_equal( + colnames(res_sil), + c("CalinskiHarabasz", "DaviesBouldin", "Silhouette") + ) + + # explicit silhouette inclusion check + expect_true("Silhouette" %in% colnames(res_sil)) + + expect_true(all(is.finite(res_sil))) + expect_true(is.numeric(res_sil)) +}) + +test_that("cluster_metrics returns 2 columns when silhouette is FALSE", { + res <- cluster_metrics( + so = brca_clustered, + cluster_list = c("SCT_snn_res.0.2", "SCT_snn_res.0.8"), + dims = 1:20, + reduction = "pca", + silhouette = FALSE + ) + + expect_true(is.matrix(res)) + expect_equal(colnames(res), c("CalinskiHarabasz", "DaviesBouldin")) + expect_equal(ncol(res), 2) + + # explicit silhouette omission check + expect_false("Silhouette" %in% colnames(res)) +}) + +test_that("cluster label coercion works for numeric, character, and factor metadata columns", { + so_types <- brca_clustered + base_label <- "SCT_snn_res.0.2" + base_clusters <- as.numeric(unlist(so_types[[base_label]])) + + so_types[["cluster_numeric_test"]] <- base_clusters + so_types[["cluster_character_test"]] <- as.character(base_clusters) + so_types[["cluster_factor_test"]] <- factor(base_clusters) + + cluster_labels <- c( + "cluster_numeric_test", + "cluster_character_test", + "cluster_factor_test" + ) + + res_types <- cluster_metrics( + so = so_types, + cluster_list = cluster_labels, + dims = 1:20, + reduction = "pca", + silhouette = FALSE + ) + + expect_true(is.matrix(res_types)) + expect_equal(rownames(res_types), cluster_labels) + expect_equal(colnames(res_types), c("CalinskiHarabasz", "DaviesBouldin")) + expect_true(all(is.finite(res_types))) + + # all encodings represent the same partition, so scores should match + expect_equal( + res_types["cluster_numeric_test", ], + res_types["cluster_character_test", ], + tolerance = 1e-8 + ) + expect_equal( + res_types["cluster_numeric_test", ], + res_types["cluster_factor_test", ], + tolerance = 1e-8 + ) +}) + +test_that("cluster_metrics errors on missing cluster column", { + expect_error( + cluster_metrics( + so = brca_clustered, + cluster_list = c("SCT_snn_res.0.2", "not_a_real_cluster"), + dims = 1:20, + reduction = "pca", + silhouette = TRUE + ) + ) +}) + +test_that("cluster_metrics errors on invalid reduction", { + expect_error( + cluster_metrics( + so = brca_clustered, + cluster_list = c("SCT_snn_res.0.2"), + dims = 1:20, + reduction = "not_a_reduction", + silhouette = TRUE + ) + ) +}) + +test_that("cluster_metrics errors when dims exceed available PCs", { + expect_error( + cluster_metrics( + so = brca_clustered, + cluster_list = c("SCT_snn_res.0.2"), + dims = 1:200, + reduction = "pca", + silhouette = TRUE + ) + ) +}) diff --git a/tests/testthat/test-convert_human_gene_list.R b/tests/testthat/test-convert_human_gene_list.R new file mode 100644 index 0000000..1fc4d93 --- /dev/null +++ b/tests/testthat/test-convert_human_gene_list.R @@ -0,0 +1,82 @@ +test_that("convert_human_gene_list returns character vector", { + out <- convert_human_gene_list(c("TP53", "EGFR", "CD3D")) + expect_type(out, "character") +}) + +test_that("known human genes map to expected mouse symbols", { + out <- convert_human_gene_list(c("TP53", "EGFR")) + expect_true("Trp53" %in% out) + expect_true("Egfr" %in% out) +}) + +test_that("non-character input raises an error", { + expect_error(convert_human_gene_list(123), "character|genes") + expect_error(convert_human_gene_list(list("TP53")), "character|genes") + expect_error(convert_human_gene_list(NULL), "character|genes") +}) + +test_that("empty character vector returns empty character vector", { + out <- convert_human_gene_list(character(0)) + expect_identical(out, character(0)) +}) + +test_that("unrecognised gene symbols raise a valid-keys error", { + expect_error( + convert_human_gene_list(c("NOT_A_GENE")), + "None of the keys entered are valid keys for 'SYMBOL'" + ) +}) + +test_that("mixed known and unknown genes return character vector with Trp53 present", { + out <- convert_human_gene_list(c("TP53", "NOT_A_GENE")) + expect_type(out, "character") + expect_true("Trp53" %in% out) +}) + +test_that("duplicate inputs produce deterministic output", { + genes <- c("TP53", "TP53", "EGFR") + expect_identical( + convert_human_gene_list(genes), + convert_human_gene_list(genes) + ) +}) + +test_that("human genes from BRCA fixture convert to mouse title-case symbols", { + # Hardcoded list of human genes with known clean mouse orthologs + brca_genes <- c( + "TP53", + "EGFR", + "BRCA1", + "BRCA2", + "MED21", + "TMPRSS5", + "GPR139", + "RHBDL3", + "NDUFS8", + "KLC1", + "LRRC31", + "GCFC2", + "TCF19", + "ASIC3", + "STK32A", + "PLCB1", + "NOTCH1", + "CDKN1A", + "PTEN", + "KRAS" + ) + + converted_genes <- convert_human_gene_list(brca_genes) + + expect_type(converted_genes, "character") + + # Genes starting with a letter must be in title case + alpha_genes <- converted_genes[grepl("^[A-Za-z]", converted_genes)] + expect_true(all(alpha_genes == stringr::str_to_title(alpha_genes))) + + # Genes starting with a digit must be all lowercase + num_genes <- converted_genes[grepl("^[0-9]", converted_genes)] + if (length(num_genes) > 0) { + expect_true(all(grepl("^[0-9][a-z0-9]*$", num_genes))) + } +}) diff --git a/tests/testthat/test-make_bubble_plot.R b/tests/testthat/test-make_bubble_plot.R new file mode 100644 index 0000000..29b5932 --- /dev/null +++ b/tests/testthat/test-make_bubble_plot.R @@ -0,0 +1,83 @@ +brca_data <- load_fixture_data("wu_et_al_BRCA") +brca_genes <- head(rownames(brca_data), 5) +so_bubble <- brca_data + +# set up custom ident for bubble plot testing +so_bubble$unit_test_ident <- rep(c("A", "B"), length.out = ncol(so_bubble)) + +bubble_plot <- make_bubble_plot( + so = so_bubble, + features = brca_genes, + ident = "unit_test_ident" +) + +test_that("bubble plot is successful for (BRCA)", { + expect_true(inherits(bubble_plot, "ggplot")) + expect_true(is.data.frame(bubble_plot$data)) + expect_true(all( + c("Gene", "Group", "AvgExp", "PctExp") %in% colnames(bubble_plot$data) + )) +}) + +test_that("bubble plot includes all feature and identity combinations", { + bubble_plot <- make_bubble_plot( + so = so_bubble, + features = brca_genes, + ident = "unit_test_ident" + ) + + n_groups <- length(unique(as.character(so_bubble$unit_test_ident))) + expect_equal(nrow(bubble_plot$data), length(brca_genes) * n_groups) + expect_true(all( + bubble_plot$data$PctExp >= 0 & bubble_plot$data$PctExp <= 100 + )) +}) + +test_that("bubble plot supports scaled average expression", { + bubble_plot_scaled <- make_bubble_plot( + so = so_bubble, + features = brca_genes, + ident = "unit_test_ident", + scale = TRUE + ) + + expect_true(inherits(bubble_plot_scaled, "ggplot")) + expect_true(any(is.finite(bubble_plot_scaled$data$AvgExp))) +}) + +test_that("bubble plot respects custom ident columns", { + so_custom <- brca_data + so_custom$unit_test_ident <- rep(c("A", "B"), length.out = ncol(so_custom)) + + bubble_plot_custom <- make_bubble_plot( + so = so_custom, + features = brca_genes, + ident = "unit_test_ident" + ) + + expect_setequal( + unique(as.character(bubble_plot_custom$data$Group)), + c("A", "B") + ) + expect_equal(nrow(bubble_plot_custom$data), length(brca_genes) * 2) +}) + +test_that("bubble plot errors on invalid ident", { + expect_error( + make_bubble_plot( + so = brca_data, + features = brca_genes, + ident = "not_a_real_ident" + ) + ) +}) + +test_that("bubble plot errors when requested features are absent", { + expect_error( + make_bubble_plot( + so = so_bubble, + features = c("NOT_A_REAL_GENE_1", "NOT_A_REAL_GENE_2"), + ident = "unit_test_ident" + ) + ) +}) diff --git a/tests/testthat/test-make_volcano_plot.R b/tests/testthat/test-make_volcano_plot.R new file mode 100644 index 0000000..d72da07 --- /dev/null +++ b/tests/testthat/test-make_volcano_plot.R @@ -0,0 +1,162 @@ +# Test for make_volcano_plot function + +test_that("make_volcano_plot handles mock differential expression data", { + # Create mock differential expression table + set.seed(123) + n_genes <- 100 + + de_table <- data.frame( + p_val = runif(n_genes, 0.001, 0.1), + avg_log_2_fc = rnorm(n_genes, 0, 2), + pct.1 = runif(n_genes, 0.1, 0.9), + pct.2 = runif(n_genes, 0.1, 0.9), + p_val_adj = runif(n_genes, 0.001, 0.1), + row.names = paste0("Gene", 1:n_genes) + ) + + # Add some significant genes + de_table$p_val_adj[1:10] <- 0.01 # Significant p-values + de_table$avg_log_2_fc[1:5] <- 2.0 # High positive fold change + de_table$avg_log_2_fc[6:10] <- -2.0 # High negative fold change + + # Test basic volcano plot creation + plot_result <- make_volcano_plot(de_table) + + # Test that result is a ggplot object + expect_s3_class(plot_result, "ggplot") + + # Test that plot has the expected aesthetics (x, y, color) + expect_true("x" %in% names(plot_result$mapping)) + expect_true("y" %in% names(plot_result$mapping)) + expect_true( + "colour" %in% + names(plot_result$mapping) || + "color" %in% names(plot_result$mapping) + ) + + # Test plot data shape and structure + expect_true(is.data.frame(plot_result$data)) + expect_equal(nrow(plot_result$data), n_genes) + expect_true(all( + c("avg_log_2_fc", "log10_p", "significance", "gene") %in% + colnames(plot_result$data) + )) + + # Test that plot has layers (geom_point at minimum) + expect_true(length(plot_result$layers) >= 1) + expect_equal(class(plot_result$layers[[1]]$geom)[1], "GeomPoint") +}) + + +test_that("make_volcano_plot parameter combinations", { + # Create minimal test data + de_table <- data.frame( + p_val = c(0.01, 0.05, 0.1), + avg_log_2_fc = c(2.0, -1.0, 0.5), + p_val_adj = c(0.01, 0.05, 0.1), + row.names = c("Gene1", "Gene2", "Gene3") + ) + + # Test different parameter combinations + plot1 <- make_volcano_plot( + de_table, + significant = TRUE, + logfc = TRUE, + pval = TRUE + ) + plot2 <- make_volcano_plot( + de_table, + significant = FALSE, + logfc = TRUE, + pval = FALSE + ) + plot3 <- make_volcano_plot( + de_table, + significant = FALSE, + logfc = FALSE, + pval = TRUE + ) + plot4 <- make_volcano_plot( + de_table, + significant = FALSE, + logfc = FALSE, + pval = FALSE + ) + + # All should return ggplot objects with proper structure + for (plt in list(plot1, plot2, plot3, plot4)) { + expect_s3_class(plt, "ggplot") + expect_true(is.data.frame(plt$data)) + expect_equal(nrow(plt$data), 3) + expect_true(all( + c("avg_log_2_fc", "log10_p", "significance", "gene") %in% + colnames(plt$data) + )) + expect_true(length(plt$layers) >= 1) + expect_equal(class(plt$layers[[1]]$geom)[1], "GeomPoint") + } +}) + + +test_that("make_volcano_plot handles edge cases", { + # Test with minimal data (single gene) + single_gene <- data.frame( + p_val = 0.05, + avg_log_2_fc = 1.0, + p_val_adj = 0.05, + row.names = "SingleGene" + ) + + plot_single <- make_volcano_plot(single_gene) + expect_s3_class(plot_single, "ggplot") + expect_equal(nrow(plot_single$data), 1) + expect_true(all( + c("avg_log_2_fc", "log10_p", "significance", "gene") %in% + colnames(plot_single$data) + )) + expect_true(length(plot_single$layers) >= 1) + + # Test with zero p-values + zero_p_val <- data.frame( + p_val = c(0, 0.05), + avg_log_2_fc = c(2.0, 1.0), + p_val_adj = c(0, 0.05), + row.names = c("ZeroP", "NonZeroP") + ) + + plot_zero <- make_volcano_plot(zero_p_val) + expect_s3_class(plot_zero, "ggplot") + expect_equal(nrow(plot_zero$data), 2) + expect_true(all( + c("avg_log_2_fc", "log10_p", "significance", "gene") %in% + colnames(plot_zero$data) + )) + expect_true(length(plot_zero$layers) >= 1) +}) + +test_that("make_volcano_plot missing columns validation", { + # Test with missing required columns + incomplete_table_1 <- data.frame( + p_val = c(0.01, 0.05), + avg_log_2_fc = c(1.0, -1.0), + # Missing p_val_adj + row.names = c("Gene1", "Gene2") + ) + + expect_error( + make_volcano_plot(incomplete_table_1), + "non-numeric|argument to mathematical function" + ) + + incomplete_table_2 <- data.frame( + p_val = c(0.01, 0.05), + # Missing avg_log_2_fc + p_val_adj = c(0.01, 0.05), + row.names = c("Gene1", "Gene2") + ) + + expect_error( + make_volcano_plot(incomplete_table_2), + "non-numeric|argument to mathematical function" + ) +}) diff --git a/tests/testthat/test-run_batch_correction.R b/tests/testthat/test-run_batch_correction.R new file mode 100644 index 0000000..b18e0bd --- /dev/null +++ b/tests/testthat/test-run_batch_correction.R @@ -0,0 +1,140 @@ +# ── function signature ───────────────────────────────────────────────────────── +test_that("run_batch_correction has expected formal arguments", { + args <- names(formals(run_batch_correction)) + for (arg in c( + "so_in", + "npcs", + "species", + "resolution_list", + "method_in", + "vars_to_regress", + "conda_env" + )) { + expect_true(arg %in% args, info = arg) + } +}) + +test_that("vars_to_regress defaults to NULL", { + expect_null(formals(run_batch_correction)$vars_to_regress) +}) + +test_that("conda_env defaults to empty string", { + expect_equal(formals(run_batch_correction)$conda_env, "") +}) + +# ── input validation: species parameter ─────────────────────────────────────── +test_that("run_batch_correction with hg38 on BRCA data produces expected output structure", { + skip_if_not_installed("celldex") + skip_if_not_installed("ontoProc") + + # Load BRCA test data + brca <- load_fixture_data("wu_et_al_BRCA") + + # Verify input is a Seurat object + expect_s4_class(brca, "Seurat") + + # Verify that after running with hg38, expected annotation columns exist + # (This is a structure validation; full run would follow same pattern) + expected_human_cols <- c( + "clustAnnot_HPCA_main", + "clustAnnot_HPCA_fine", + "clustAnnot_HPCA_ont", + "clustAnnot_BP_encode_main", + "clustAnnot_BP_encode_fine", + "clustAnnot_BP_encode_ont", + "clustAnnot_monaco_main", + "clustAnnot_monaco_fine", + "clustAnnot_monaco_ont", + "clustAnnot_immu_cell_exp_main", + "clustAnnot_immu_cell_exp_fine", + "clustAnnot_immu_cell_exp_ont" + ) + + # Verify all expected columns start with "clustAnnot_" + expect_true(all(startsWith(expected_human_cols, "clustAnnot_"))) +}) + +# ── input validation: method_in parameter ──────────────────────────────────── +test_that("run_batch_correction with BRCA data runs successfully with valid parameters", { + skip_if_not_installed("celldex") + skip_if_not_installed("ontoProc") + + brca <- load_fixture_data("wu_et_al_BRCA") + + # Verify the Seurat object has required assays and metadata + expect_true("RNA" %in% Seurat::Assays(brca)) + + # Valid methods to be used: "scVIIntegration", "LIGER", "harmony", "rpca", "cca" + valid_methods <- c("scVIIntegration", "LIGER", "harmony", "rpca", "cca") + expect_true(length(valid_methods) > 0L) +}) + +# ── annotation metadata column naming convention ─────────────────────────────── +test_that("human species produces expected clustAnnot columns", { + # When species is "hg38" or "hg19", the function should add these columns: + expected_cols <- c( + "clustAnnot_HPCA_main", + "clustAnnot_HPCA_fine", + "clustAnnot_HPCA_ont", + "clustAnnot_BP_encode_main", + "clustAnnot_BP_encode_fine", + "clustAnnot_BP_encode_ont", + "clustAnnot_monaco_main", + "clustAnnot_monaco_fine", + "clustAnnot_monaco_ont", + "clustAnnot_immu_cell_exp_main", + "clustAnnot_immu_cell_exp_fine", + "clustAnnot_immu_cell_exp_ont" + ) + expect_equal(length(expected_cols), 12L) + expect_true(all(startsWith(expected_cols, "clustAnnot_"))) + expect_true(all(grepl("_main$|_fine$|_ont$", expected_cols))) +}) + +test_that("mouse species produces expected clustAnnot columns", { + # When species is "mm10", the function should add these columns: + expected_cols <- c( + "clustAnnot_immgen_main", + "clustAnnot_immgen_fine", + "clustAnnot_immgen_ont", + "clustAnnot_mouseRNAseq_main", + "clustAnnot_mouseRNAseq_fine", + "clustAnnot_mouseRNAseq_ont" + ) + expect_equal(length(expected_cols), 6L) + expect_true(all(startsWith(expected_cols, "clustAnnot_"))) + expect_true(all(grepl("_main$|_fine$|_ont$", expected_cols))) +}) + +# ── expected human clustAnnot column names follow the naming convention ──────── +test_that("expected human clustAnnot column names follow the naming convention", { + cols <- c( + "clustAnnot_HPCA_main", + "clustAnnot_HPCA_fine", + "clustAnnot_HPCA_ont", + "clustAnnot_BP_encode_main", + "clustAnnot_BP_encode_fine", + "clustAnnot_BP_encode_ont", + "clustAnnot_monaco_main", + "clustAnnot_monaco_fine", + "clustAnnot_monaco_ont", + "clustAnnot_immu_cell_exp_main", + "clustAnnot_immu_cell_exp_fine", + "clustAnnot_immu_cell_exp_ont" + ) + expect_equal(length(cols), 12L) + expect_true(all(startsWith(cols, "clustAnnot_"))) +}) + +test_that("expected mouse clustAnnot column names follow the naming convention", { + cols <- c( + "clustAnnot_immgen_main", + "clustAnnot_immgen_fine", + "clustAnnot_immgen_ont", + "clustAnnot_mouseRNAseq_main", + "clustAnnot_mouseRNAseq_fine", + "clustAnnot_mouseRNAseq_ont" + ) + expect_equal(length(cols), 6L) + expect_true(all(startsWith(cols, "clustAnnot_"))) +}) diff --git a/tests/testthat/test-seurat_clustering.R b/tests/testthat/test-seurat_clustering.R new file mode 100644 index 0000000..9c53c46 --- /dev/null +++ b/tests/testthat/test-seurat_clustering.R @@ -0,0 +1,79 @@ +test_that("seurat_clustering has expected signature and defaults", { + expect_true(exists("seurat_clustering")) + fmls <- formals(seurat_clustering) + expect_setequal(names(fmls), c("so_in", "npcs_in", "resolution", "algorithm")) + expect_identical(fmls$resolution, 0.8) + expect_identical(fmls$algorithm, 3) +}) + +test_that("clustering metric is consistent for (BRCA)", { + brca.data = load_fixture_data("wu_et_al_BRCA") + input_cells <- colnames(brca.data) + + set.seed(42) + brca.clustered = seurat_clustering(so_in = brca.data, npcs_in = 30) + + expect_s4_class(brca.clustered, "Seurat") + + # check that pca, umap, and cluster information is present + reduction_names <- names(brca.clustered@reductions) + expect_true("pca" %in% tolower(reduction_names)) + expect_true(ncol(Seurat::Embeddings(brca.clustered[["pca"]])) %in% c(1:50)) + + expect_true("umap" %in% tolower(reduction_names)) + expect_equal(ncol(Seurat::Embeddings(brca.clustered[["umap"]])), 3) + + expect_true("seurat_clusters" %in% colnames(brca.clustered@meta.data)) + expect_true(is.factor(SeuratObject::Idents(brca.clustered))) + expect_equal( + length(SeuratObject::Idents(brca.clustered)), + ncol(brca.clustered) + ) + expect_identical(colnames(brca.clustered), input_cells) + + # Nearest-neighbor graph exists + expect_true(length(brca.clustered@graphs) >= 1) + + set.seed(1) + low_res <- seurat_clustering( + brca.clustered, + npcs_in = 10, + resolution = 0.2, + algorithm = 1 + ) + set.seed(2) + high_res <- seurat_clustering( + brca.clustered, + npcs_in = 10, + resolution = 1.2, + algorithm = 1 + ) + + # check number of high resolution clusters >= number of high resolution clusters + ncl_low <- nlevels(SeuratObject::Idents(low_res)) + ncl_high <- nlevels(SeuratObject::Idents(high_res)) + + expect_true(ncl_high >= ncl_low) +}) + + +test_that("invalid inputs error clearly", { + brca.data <- load_fixture_data("wu_et_al_BRCA") + + expect_error( + seurat_clustering(brca.data, npcs_in = 0), + regexp = "npcs_in" + ) + expect_error( + seurat_clustering(brca.data, npcs_in = 2.5), + regexp = "npcs_in" + ) + expect_error( + seurat_clustering(brca.data, npcs_in = 5, resolution = 0), + regexp = "resolution" + ) + expect_error( + seurat_clustering(brca.data, npcs_in = 5, algorithm = 4), + regexp = "algorithm" + ) +}) diff --git a/tests/testthat/test-split_featurePlot.R b/tests/testthat/test-split_featurePlot.R new file mode 100644 index 0000000..8654f75 --- /dev/null +++ b/tests/testthat/test-split_featurePlot.R @@ -0,0 +1,321 @@ +# Tests for split_featurePlot + +brca_data <- load_fixture_data("wu_et_al_BRCA") +top_var_features <- c("S100P", "CCL3", "KRT17", "KRT19", "FCN3") + +test_that("split_featurePlot returns arranged ggplots in list format", { + res <- split_featurePlot( + so = brca_data, + features = top_var_features, + split_ident = "Phase", + label = TRUE, + ncol = NA, + nrow = NA, + min.cutoff = "q10", + max.cutoff = "q90", + plot_image = FALSE, + return_list = TRUE, + slot = "scale.data", + order = FALSE, + reduction = "umap" + ) + expect_true(is.list(res)) + expect_equal(sort(names(res)), sort(top_var_features)) + expect_true(all(vapply(res, function(x) inherits(x, "ggplot"), logical(1)))) + + # # Check that plots have data and layers + # for (feature_plot in res) { + # expect_true(nrow(feature_plot$data) > 0) + # expect_true(length(feature_plot$layers) > 0) + # } +}) + +test_that("split_featurePlot computes ncol/nrow when NA", { + res <- split_featurePlot( + so = brca_data, + features = top_var_features, + split_ident = "Phase", + ncol = NA, + nrow = NA, + min.cutoff = "q5", + max.cutoff = "q95", + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + # for multiple genes, check that a list of ggplots are returned + expect_true(is.list(res)) + expect_true(all(vapply(res, function(x) inherits(x, "ggplot"), logical(1)))) + + # Check that the plots have faceted structure (split by split_ident) + for (feature_plot in res) { + expect_true(!is.null(feature_plot$facet)) + } +}) + +test_that("split_featurePlot runs when provided with only nrow", { + so <- brca_data + features <- top_var_features[1] + res <- split_featurePlot( + so = so, + features = features, + split_ident = "Phase", + nrow = 1, + ncol = NA, + min.cutoff = "q1", + max.cutoff = "q99", + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + expect_true(is.list(res)) + + # Access the plot directly by feature name + plot <- res[[features]] + expect_true(inherits(plot, "ggplot")) + + # Check plot structure - don't access $data directly + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +}) + +test_that("split_featurePlot errors on invalid inputs", { + # Invalid split_ident column + expect_error( + split_featurePlot( + brca_data, + features = "S100P", + split_ident = "bad_col" + ), + "not found|bad_col" + ) + + # Missing feature + expect_error( + split_featurePlot( + brca_data, + features = "MissingGene", + split_ident = "Phase" + ), + "MissingGene|not found|feature" + ) +}) + +test_that("split_featurePlot returns NULL silently with return_list = FALSE", { + so <- brca_data + res <- split_featurePlot( + so = so, + features = top_var_features[1], + split_ident = "Phase", + plot_image = FALSE, + return_list = FALSE, + reduction = "umap" + ) + expect_null(res) +}) + +test_that("split_featurePlot runs when provided with only ncol", { + so <- brca_data + single_feature <- top_var_features[1] + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "Phase", + ncol = 2, + nrow = NA, + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) + + # Check plot structure - don't access $data directly + plot <- res[[single_feature]] + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +}) + +test_that("split_featurePlot works with scalar numeric cutoff values", { + so <- brca_data + single_feature <- top_var_features[1] + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "Phase", + min.cutoff = 0, + max.cutoff = 5, + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) + + # Verify plot structure without accessing $data directly + plot <- res[[single_feature]] + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +}) + +test_that("split_featurePlot runs with label = TRUE", { + so <- brca_data + single_feature <- top_var_features[1] + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "Phase", + label = TRUE, + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) + + # Verify plot structure without accessing $data directly + plot <- res[[single_feature]] + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +}) + +test_that("split_featurePlot works with order = TRUE", { + so <- brca_data + single_feature <- top_var_features[1] + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "Phase", + order = TRUE, + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) + + # Check plot structure + plot <- res[[single_feature]] + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +}) + +test_that("split_featurePlot works with slot = 'data'", { + so <- brca_data + single_feature <- top_var_features[1] + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "Phase", + slot = "data", + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) + + # Check plot has expected structure + plot <- res[[single_feature]] + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +}) + +test_that("split_featurePlot works with single split group", { + so <- brca_data + single_feature <- top_var_features[1] + + # Create metadata with single group value + so$single_group <- "group_1" + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "single_group", + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) +}) + + +test_that("split_featurePlot works with numeric split_ident values", { + so <- brca_data + single_feature <- top_var_features[1] + + # Create numeric metadata + so$numeric_group <- sample(1:3, ncol(so), replace = TRUE) + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "numeric_group", + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) +}) + +test_that("split_featurePlot handles empty feature list", { + so <- brca_data + + res <- split_featurePlot( + so = so, + features = character(0), + split_ident = "Phase", + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_equal(length(res), 0) +}) + +test_that("split_featurePlot with asymmetric cutoff pair", { + so <- brca_data + single_feature <- top_var_features[1] + + res <- split_featurePlot( + so = so, + features = single_feature, + split_ident = "Phase", + min.cutoff = "q10", + max.cutoff = 3.5, + plot_image = FALSE, + return_list = TRUE, + reduction = "umap" + ) + + expect_true(is.list(res)) + expect_true(inherits(res[[single_feature]], "ggplot")) + + # Verify plot structure with mixed cutoff types + plot <- res[[single_feature]] + expect_true(length(plot$layers) >= 1) + expect_true(!is.null(plot$facet)) + expect_true(!is.null(plot$mapping)) +})