diff --git a/DESCRIPTION b/DESCRIPTION index be8ab16..0a3395d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,13 @@ Package: datadiff -Title: Data Validation Based on YAML Rules -Version: 0.4.6 +Title: Data Validation Based on 'YAML' Rules +Version: 0.4.7 Authors@R: c( person("Vincent", "Guyader", , "vincent@thinkr.fr", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-0671-9270")), person("ThinkR", role = "cph"), person("Agence technique de l'information sur l'hospitalisation", role = "spn") ) Description: A comprehensive data validation package that allows comparing - datasets using configurable validation rules defined in YAML files. + datasets using configurable validation rules defined in 'YAML' files. Built on top of the 'pointblank' package for robust data validation, it supports exact matching, tolerance-based numeric comparisons, text normalization, and row count validation. diff --git a/NEWS.md b/NEWS.md index a0721b9..729cf36 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,28 @@ +# datadiff 0.4.7 + +## Bug fixes + +* The lazy HTML report now keeps the genuine failure detail. Printing + `result$reponse` (or calling `datadiff_report_html()`) on a failing + comparison again shows, for each failing column, the number of failing rows, + the offending cells, and the downloadable CSV extract - while still listing + every check performed, with the `col_exists` existence checks and the + `col_vals_equal` value checks presented as distinct steps. The report is now + built on top of the real interrogated agent (which holds the extracts) rather + than a counts-only synthesis, so nothing is lost. + +* `build_report_agent()` threads `warn_at` / `stop_at` so the report's + warn/stop colouring follows the supplied thresholds instead of hard-coded + values. + +## Documentation + +* Vignette and README document `coverage` / `summary` and the lazy report, and + are normalised to ASCII. +* DESCRIPTION quotes software names ('pointblank', 'YAML') per CRAN convention, + clearing the "Possibly misspelled words" NOTE. +* Internal helpers are marked `@noRd` (documentation kept in source, no `.Rd`). + # datadiff 0.4.6 ## New features diff --git a/R/coverage.R b/R/coverage.R index 8666d63..b656024 100644 --- a/R/coverage.R +++ b/R/coverage.R @@ -47,6 +47,12 @@ build_coverage <- function(tbl, tol_cols, eq_cols, n_failed <<- c(n_failed, as.integer(nf)) } + # Existence checks: every common (non-type-mismatch) column gets a col_exists + # check, distinct from its value check. These pass (the column is present in + # both datasets by construction), mirroring a full per-column pointblank run. + for (c in c(tol_cols, eq_cols)) { + add(c, "col_exists", 1L, 0L) + } for (c in tol_cols) { cnt <- tol_col_counts(tbl, col = c) add(c, "tolerance", cnt$n, cnt$n_failed) diff --git a/R/report.R b/R/report.R index 19aa611..cfbc271 100644 --- a/R/report.R +++ b/R/report.R @@ -8,11 +8,119 @@ # per-column interrogation. The build cost is paid only when the report is # actually displayed, and memoized so repeated prints are instant. -# Build a pointblank agent whose report mirrors the coverage table, populating -# the interrogation result columns directly (no interrogate() scan). +# Coverage column name that a real validation step maps to (undo the internal +# __ok / __eq / dummy-column naming used when the agent was built). +report_underlying_col <- function(col) { + if (identical(col, "row_count_ok")) { + return("") + } + if (startsWith(col, "__missing_col_")) { + return(sub("^__missing_col_", "", col)) + } + if (startsWith(col, "__type_mismatch_")) { + return(sub("^__type_mismatch_", "", col)) + } + sub("__(ok|eq)$", "", col) +} + +# Build a pointblank agent whose report mirrors the coverage table. +# +# When `real_agent` (the genuinely interrogated agent from the comparison) is +# supplied, the report is built ON TOP of it so that failing columns keep their +# REAL data extract (failing rows + CSV download) and the agent stays marked as +# interrogated. Its validation set is rebuilt to list every coverage row in +# order: the real step is kept (and relabelled) for columns it validated, and a +# synthetic passing row is added for the rest. Without `real_agent`, a purely +# synthetic count-only agent is produced (no extracts). build_report_agent <- function(coverage, label, lang = "fr", locale = "fr_FR", - warn_at = 1e-14, stop_at = 1e-14) { + warn_at = 1e-14, stop_at = 1e-14, + real_agent = NULL) { n <- nrow(coverage) + + # Augment the real interrogated agent only when it has genuine failures + # (real extracts to preserve). On an all-pass comparison the real agent is the + # minimal placeholder (a single col_exists step); cloning its template would + # mislabel every value check as col_exists, so fall through to the synthetic + # col_vals_equal build instead. + if (!is.null(real_agent) && + any(real_agent$validation_set$n_failed > 0, na.rm = TRUE)) { + rvs <- real_agent$validation_set + real_cols <- vapply(seq_len(nrow(rvs)), function(j) { + report_underlying_col(rvs$column[[j]][1]) + }, character(1)) + + # Extracts (the failing-row data + CSV) live in `agent$extracts`, a list + # keyed by step index `i`. Reindexing the steps means remapping those keys. + old_extracts <- real_agent$extracts %||% list() + template <- rvs[1, , drop = FALSE] + rows <- vector("list", n) + new_extracts <- list() + for (i in seq_len(n)) { + col <- coverage$column[i] + # Only value checks (tolerance / equality) map to a real step: that is + # where the genuine row-level extract matters. col_exists and the + # structural checks (missing_column, type_mismatch, row_count) are + # synthesized from coverage - mapping them to a real step would pull in + # per-row dummy results (e.g. n_failed = nrow) that contradict coverage. + j <- if (coverage$check[i] %in% c("tolerance", "equality")) { + match(col, real_cols) + } else { + NA_integer_ + } + if (!is.na(j)) { + # Genuine interrogated step: keep it, and carry its real extract over to + # the new step index. + row <- rvs[j, , drop = FALSE] + old_key <- as.character(rvs$i[j]) + if (!is.null(old_extracts[[old_key]])) { + new_extracts[[as.character(i)]] <- old_extracts[[old_key]] + } + } else { + # Column the targeted agent did not validate (it passed): synthesise a + # passing row from the real-row template. + row <- template + row$eval_error <- FALSE + row$eval_warning <- FALSE + row$n <- as.numeric(coverage$n[i]) + row$n_passed <- as.numeric(coverage$n[i] - coverage$n_failed[i]) + row$n_failed <- as.numeric(coverage$n_failed[i]) + row$f_passed <- if (coverage$n[i] > 0) row$n_passed / coverage$n[i] else 1 + row$f_failed <- if (coverage$n[i] > 0) coverage$n_failed[i] / coverage$n[i] else 0 + row$all_passed <- coverage$n_failed[i] == 0L + row$warn <- FALSE + row$stop <- FALSE + row$notify <- FALSE + } + row$column <- list(coverage$column[i]) + row$label <- coverage$check[i] + rows[[i]] <- row + } + new_vs <- dplyr::bind_rows(rows) + new_vs$i <- seq_len(nrow(new_vs)) + new_vs$assertion_type <- ifelse(coverage$check == "col_exists", + "col_exists", "col_vals_equal") + # Make every result column authoritative from coverage so the report is + # consistent with res$coverage / res$summary and warn_at/stop_at apply + # uniformly. The genuine row-level extracts are preserved separately in + # new_extracts (keyed by the new step index). + np <- coverage$n - coverage$n_failed + new_vs$n <- as.numeric(coverage$n) + new_vs$n_passed <- as.numeric(np) + new_vs$n_failed <- as.numeric(coverage$n_failed) + new_vs$f_passed <- ifelse(coverage$n > 0, np / coverage$n, 1) + new_vs$f_failed <- ifelse(coverage$n > 0, coverage$n_failed / coverage$n, 0) + new_vs$all_passed <- coverage$n_failed == 0L + new_vs$warn <- coverage$n_failed > 0L & new_vs$f_failed >= warn_at + new_vs$stop <- coverage$n_failed > 0L & new_vs$f_failed >= stop_at + new_vs$notify <- rep(FALSE, nrow(new_vs)) + new_vs$eval_error <- rep(FALSE, nrow(new_vs)) + new_vs$eval_warning <- rep(FALSE, nrow(new_vs)) + real_agent$validation_set <- new_vs + real_agent$extracts <- new_extracts + return(real_agent) + } + + # No real agent: synthetic count-only report. dummy_ncol <- max(n, 1L) dummy <- as.data.frame( matrix(TRUE, nrow = 1L, ncol = dummy_ncol) @@ -46,6 +154,8 @@ build_report_agent <- function(coverage, label, lang = "fr", locale = "fr_FR", vs$warn <- coverage$n_failed > 0L & vs$f_failed >= warn_at vs$stop <- coverage$n_failed > 0L & vs$f_failed >= stop_at vs$notify <- rep(FALSE, n) + vs$assertion_type <- ifelse(coverage$check == "col_exists", + "col_exists", "col_vals_equal") agent$validation_set <- vs agent } @@ -80,7 +190,8 @@ datadiff_render_report <- function(x) { lang = attr(x, "datadiff_lang") %||% "fr", locale = attr(x, "datadiff_locale") %||% "fr_FR", warn_at = attr(x, "datadiff_warn_at") %||% 1e-14, - stop_at = attr(x, "datadiff_stop_at") %||% 1e-14 + stop_at = attr(x, "datadiff_stop_at") %||% 1e-14, + real_agent = x ) ) } @@ -130,7 +241,8 @@ datadiff_report_html <- function(res, file = NULL) { lang = attr(reponse, "datadiff_lang") %||% "fr", locale = attr(reponse, "datadiff_locale") %||% "fr_FR", warn_at = attr(reponse, "datadiff_warn_at") %||% 1e-14, - stop_at = attr(reponse, "datadiff_stop_at") %||% 1e-14 + stop_at = attr(reponse, "datadiff_stop_at") %||% 1e-14, + real_agent = reponse ) report <- pointblank::get_agent_report(agent) if (!is.null(file)) { diff --git a/tests/testthat/test-coverage.R b/tests/testthat/test-coverage.R index 072b0c3..1b2a938 100644 --- a/tests/testthat/test-coverage.R +++ b/tests/testthat/test-coverage.R @@ -17,11 +17,14 @@ test_that("tolerance columns appear once each with correct counts", { row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, ref_suffix = "__reference", na_equal = TRUE ) - expect_equal(cov$column, c("a", "b")) - expect_equal(cov$check, c("tolerance", "tolerance")) - expect_equal(cov$n, c(3L, 3L)) - expect_equal(cov$n_failed, c(0L, 1L)) - expect_equal(cov$status, c("PASS", "FAIL")) + tol <- cov[cov$check == "tolerance", ] + expect_equal(tol$column, c("a", "b")) + expect_equal(tol$n, c(3L, 3L)) + expect_equal(tol$n_failed, c(0L, 1L)) + expect_equal(tol$status, c("PASS", "FAIL")) + # each column also gets a (passing) col_exists check, distinct from the value check + expect_setequal(cov$column[cov$check == "col_exists"], c("a", "b")) + expect_true(all(cov$status[cov$check == "col_exists"] == "PASS")) }) # --- build_coverage: equality, lazy (__eq) and local (recompute) ------------ @@ -34,10 +37,11 @@ test_that("equality columns use pre-computed __eq when present (lazy path)", { row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, ref_suffix = "__reference", na_equal = TRUE ) - expect_equal(cov$check, "equality") - expect_equal(cov$n, 2L) - expect_equal(cov$n_failed, 1L) - expect_equal(cov$status, "FAIL") + eq <- cov[cov$check == "equality", ] + expect_equal(eq$n, 2L) + expect_equal(eq$n_failed, 1L) + expect_equal(eq$status, "FAIL") + expect_equal(cov$column[cov$check == "col_exists"], "txt") }) test_that("equality columns recompute from raw values when no __eq (local path)", { @@ -52,9 +56,10 @@ test_that("equality columns recompute from raw values when no __eq (local path)" row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, ref_suffix = "__reference", na_equal = TRUE ) - expect_equal(cov$n, 3L) - expect_equal(cov$n_failed, 1L) - expect_equal(cov$status, "FAIL") + eq <- cov[cov$check == "equality", ] + expect_equal(eq$n, 3L) + expect_equal(eq$n_failed, 1L) + expect_equal(eq$status, "FAIL") }) test_that("equality NA semantics follow na_equal (local path)", { @@ -67,8 +72,10 @@ test_that("equality NA semantics follow na_equal (local path)", { list(check_count = FALSE), TRUE, "__reference", TRUE) cov_f <- build_coverage(tbl, character(0), "txt", character(0), character(0), list(check_count = FALSE), TRUE, "__reference", FALSE) - expect_equal(cov_t$n_failed, 0L) # NA both sides + na_equal TRUE -> pass - expect_equal(cov_f$n_failed, 1L) # na_equal FALSE -> the NA row fails + eq_t <- cov_t[cov_t$check == "equality", ] + eq_f <- cov_f[cov_f$check == "equality", ] + expect_equal(eq_t$n_failed, 0L) # NA both sides + na_equal TRUE -> pass + expect_equal(eq_f$n_failed, 1L) # na_equal FALSE -> the NA row fails }) # --- build_coverage: structural checks -------------------------------------- @@ -115,7 +122,7 @@ test_that("row_count appears only when check_count is enabled, with right status # --- exhaustivity & degenerate ----------------------------------------------- -test_that("every column appears exactly once across check types", { +test_that("every column is covered: common columns get col_exists + a value check", { tbl <- data.frame(a__ok = c(TRUE, TRUE), b__ok = c(TRUE, FALSE), t__eq = c(TRUE, TRUE)) cov <- build_coverage( @@ -124,9 +131,13 @@ test_that("every column appears exactly once across check types", { row_validation_info = list(check_count = TRUE), row_count_ok = TRUE, ref_suffix = "__reference", na_equal = TRUE ) - value_and_struct <- cov[cov$check != "row_count", "column"] - expect_setequal(value_and_struct, c("a", "b", "t", "m", "z")) - expect_equal(anyDuplicated(value_and_struct), 0L) + # common columns each appear as a col_exists AND a value (tolerance/equality) check + expect_setequal(cov$column[cov$check == "col_exists"], c("a", "b", "t")) + value <- cov[cov$check %in% c("tolerance", "equality"), "column"] + expect_setequal(value, c("a", "b", "t")) + # structural checks appear once each + expect_equal(cov$column[cov$check == "missing_column"], "m") + expect_equal(cov$column[cov$check == "type_mismatch"], "z") }) test_that("empty inputs yield a 0-row coverage with the right columns", { @@ -150,9 +161,10 @@ test_that("summarize_coverage aggregates checks and pass/fail counts", { row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, ref_suffix = "__reference", na_equal = TRUE ) + # col_exists(a) + col_exists(b) + tolerance(a FAIL) + tolerance(b PASS) s <- summarize_coverage(cov) - expect_equal(s$n_checks, 2L) - expect_equal(s$n_pass, 1L) + expect_equal(s$n_checks, 4L) + expect_equal(s$n_pass, 3L) expect_equal(s$n_fail, 1L) expect_false(s$all_passed) }) @@ -170,9 +182,10 @@ cov_from <- function(tbl, tol_cols, eq_cols, missing = character(0), test_that("print shows an all-PASS roll-up when everything passes", { cov <- cov_from(data.frame(a__ok = c(TRUE, TRUE), b__ok = c(TRUE, TRUE)), c("a", "b"), character(0)) + # 2 col_exists + 2 tolerance, all passing out <- paste(capture.output(print(cov)), collapse = "\n") - expect_match(out, "2 checks") - expect_match(out, "2 PASS") + expect_match(out, "4 checks") + expect_match(out, "4 PASS") expect_match(out, "0 FAIL") }) @@ -188,7 +201,8 @@ test_that("print lists failing checks first when there are failures", { test_that("print handles an all-fail table", { cov <- cov_from(data.frame(a__ok = c(FALSE, FALSE)), "a", character(0)) out <- paste(capture.output(print(cov)), collapse = "\n") - expect_match(out, "1 checks - 0 PASS, 1 FAIL") + # col_exists(a) passes, tolerance(a) fails + expect_match(out, "2 checks - 1 PASS, 1 FAIL") }) test_that("print handles an empty coverage table without error", { @@ -233,5 +247,7 @@ test_that("coverage summary verdict matches all_passed on a failing comparison", ) expect_false(res$all_passed) expect_equal(res$summary$all_passed, res$all_passed) - expect_equal(res$coverage[res$coverage$column == "a", "status"], "FAIL") + cov <- res$coverage + expect_equal(cov[cov$column == "a" & cov$check == "tolerance", "status"], "FAIL") + expect_equal(cov[cov$column == "a" & cov$check == "col_exists", "status"], "PASS") }) diff --git a/tests/testthat/test-internal-coverage.R b/tests/testthat/test-internal-coverage.R new file mode 100644 index 0000000..d2b6fa9 --- /dev/null +++ b/tests/testthat/test-internal-coverage.R @@ -0,0 +1,55 @@ +# Targeted tests for internal branches that the main suite did not exercise. + +test_that("report_underlying_col maps every internal naming scheme", { + expect_equal(report_underlying_col("a__ok"), "a") + expect_equal(report_underlying_col("a__eq"), "a") + expect_equal(report_underlying_col("row_count_ok"), "") + expect_equal(report_underlying_col("__missing_col_b"), "b") + expect_equal(report_underlying_col("__type_mismatch_c"), "c") + expect_equal(report_underlying_col("plain"), "plain") +}) + +test_that("build_report_agent returns an agent for empty coverage (no real agent)", { + empty <- build_coverage( + tbl = data.frame(), tol_cols = character(0), eq_cols = character(0), + missing_in_candidate = character(0), type_mismatch_cols = character(0), + row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, + ref_suffix = "__reference", na_equal = TRUE + ) + ag <- build_report_agent(empty, label = "L", lang = "en", locale = "en_US") + expect_s3_class(ag, "ptblank_agent") +}) + +test_that("datadiff_report_html errors when the result has no coverage", { + expect_error(datadiff_report_html(list(reponse = NULL)), "coverage") +}) + +test_that("datadiff_render_report tolerates a missing cache environment", { + ref <- data.frame(id = 1:3, .row = 1L, a = c(1, 2, 3) * 1.0) + tmp <- tempfile(fileext = ".yml") + on.exit(unlink(tmp), add = TRUE) + write_rules_template(ref, key = c("id", ".row"), numeric_abs = 0.101, + integer_abs = 0L, path = tmp) + res <- suppressMessages( + compare_datasets_from_yaml(ref, ref, key = c("id", ".row"), path = tmp) + ) + x <- res$reponse + attr(x, "datadiff_render") <- NULL # force the fallback branch + rep <- datadiff_render_report(x) + expect_s3_class(rep, "gt_tbl") +}) + +test_that("print.datadiff_coverage truncates beyond 50 checks", { + p <- 60L + tbl <- as.data.frame(matrix(TRUE, nrow = 1L, ncol = p)) + names(tbl) <- paste0("c", seq_len(p), "__ok") + cov <- build_coverage( + tbl = tbl, tol_cols = paste0("c", seq_len(p)), eq_cols = character(0), + missing_in_candidate = character(0), type_mismatch_cols = character(0), + row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, + ref_suffix = "__reference", na_equal = TRUE + ) + expect_gt(nrow(cov), 50L) + out <- paste(capture.output(print(cov)), collapse = "\n") + expect_match(out, "more checks") +}) diff --git a/tests/testthat/test-report.R b/tests/testthat/test-report.R index 89e6821..42c044a 100644 --- a/tests/testthat/test-report.R +++ b/tests/testthat/test-report.R @@ -51,7 +51,7 @@ test_that("build_report_agent warn/stop honour the supplied thresholds", { row_validation_info = list(check_count = FALSE), row_count_ok = TRUE, ref_suffix = "__reference", na_equal = TRUE ) - b_row <- which(vapply(cov$column, identical, logical(1), "b")) + b_row <- which(cov$column == "b" & cov$check == "tolerance") # Thresholds ABOVE the fraction: column b should stay green. ag_loose <- build_report_agent( @@ -70,6 +70,131 @@ test_that("build_report_agent warn/stop honour the supplied thresholds", { expect_true(ag_tight$validation_set$stop[b_row]) }) +test_that("failing report merges the real extract and lists every column", { + ref <- data.frame(id = 1:5, .row = 1L, a = c(1, 2, 3, 4, 5) * 1.0, + b = c(10, 20, 30, 40, 50) * 1.0) + cand <- ref + cand$a[2] <- 999 # 2 failing rows on column a; b passes + cand$a[4] <- 888 + tmp <- tempfile(fileext = ".yml") + on.exit(unlink(tmp), add = TRUE) + write_rules_template(ref, key = c("id", ".row"), numeric_abs = 0.101, + integer_abs = 0L, path = tmp) + res <- suppressMessages( + compare_datasets_from_yaml(ref, cand, key = c("id", ".row"), path = tmp) + ) + expect_false(res$all_passed) + + ag <- build_report_agent(res$coverage, label = "L", lang = "en", + locale = "en_US", real_agent = res$reponse) + vs <- ag$validation_set + + # every coverage column is represented (full "X tests" overview) + expect_setequal(unlist(vs$column), unique(res$coverage$column)) + + # the failing column 'a' VALUE check keeps the REAL extract (2 failing rows), + # carried over to the new step index in agent$extracts + is_a <- vapply(vs$column, identical, logical(1), "a") + i_a <- which(is_a & vs$assertion_type == "col_vals_equal") + expect_length(i_a, 1L) + extract_a <- ag$extracts[[as.character(vs$i[i_a])]] + expect_false(is.null(extract_a)) + expect_equal(nrow(as.data.frame(extract_a)), 2L) + expect_equal(vs$n_failed[i_a], 2) + + # passing column 'b' value check has no extract + is_b <- vapply(vs$column, identical, logical(1), "b") + i_b <- which(is_b & vs$assertion_type == "col_vals_equal") + expect_null(ag$extracts[[as.character(vs$i[i_b])]]) + + expect_s3_class(pointblank::get_agent_report(ag), "gt_tbl") +}) + +test_that("col_exists rows stay PASS even when the column's value check fails", { + ref <- mk_ref(); cand <- ref + cand$a[2] <- 999 # value check for 'a' fails + res <- run(ref, cand) + expect_false(res$all_passed) + ag <- build_report_agent(res$coverage, label = "L", lang = "en", + locale = "en_US", real_agent = res$reponse) + vs <- ag$validation_set + is_a <- vapply(vs$column, identical, logical(1), "a") + + # the col_exists check for 'a' must remain PASS, with no extract + ce <- which(is_a & vs$assertion_type == "col_exists") + expect_length(ce, 1L) + expect_equal(vs$n_failed[ce], 0) + expect_null(ag$extracts[[as.character(vs$i[ce])]]) + + # the value check for 'a' still fails and keeps its extract + cv <- which(is_a & vs$assertion_type == "col_vals_equal") + expect_equal(vs$n_failed[cv], 1) + expect_false(is.null(ag$extracts[[as.character(vs$i[cv])]])) +}) + +test_that("report counts match coverage for structural checks (augment path)", { + ref <- data.frame(id = 1:5, .row = 1L, a = c(1, 2, 3, 4, 5) * 1.0, + b = c(1, 2, 3, 4, 5) * 1.0) + cand <- ref + cand$b <- NULL # column b missing (structural) + cand$a[2] <- 999 # value check on a fails (keeps the augment path active) + res <- run(ref, cand) + expect_false(res$all_passed) + + ag <- build_report_agent(res$coverage, label = "L", lang = "en", + locale = "en_US", real_agent = res$reponse) + vs <- ag$validation_set + + # the missing_column row must report coverage's counts (1/1), not nrow + cov_b <- res$coverage[res$coverage$column == "b" & + res$coverage$check == "missing_column", ] + i_b <- which(vapply(vs$column, identical, logical(1), "b")) + expect_equal(vs$n[i_b], cov_b$n) + expect_equal(vs$n_failed[i_b], cov_b$n_failed) + + # every report row's counts line up with the coverage row in the same position + expect_equal(vs$n_failed, res$coverage$n_failed) + expect_equal(vs$n, res$coverage$n) + + # the failing value column 'a' still keeps its real extract + is_a <- vapply(vs$column, identical, logical(1), "a") + i_a <- which(is_a & vs$assertion_type == "col_vals_equal") + expect_false(is.null(ag$extracts[[as.character(vs$i[i_a])]])) +}) + +test_that("datadiff_report_html on a failing comparison contains the failing values", { + ref <- data.frame(id = 1:5, .row = 1L, a = c(1, 2, 3, 4, 5) * 1.0) + cand <- ref + cand$a[2] <- 999 + tmp <- tempfile(fileext = ".yml") + on.exit(unlink(tmp), add = TRUE) + write_rules_template(ref, key = c("id", ".row"), numeric_abs = 0.101, + integer_abs = 0L, path = tmp) + res <- suppressMessages( + compare_datasets_from_yaml(ref, cand, key = c("id", ".row"), path = tmp) + ) + out <- tempfile(fileext = ".html") + on.exit(unlink(out), add = TRUE) + datadiff_report_html(res, file = out) + html <- paste(readLines(out, warn = FALSE), collapse = "\n") + expect_true(grepl("999", html)) # the failing value is in the report + expect_true(grepl("CSV", html)) # the extract is downloadable +}) + +test_that("report presents col_exists AND col_vals_equal as distinct checks", { + res <- run(mk_ref(), mk_ref()) # all green + expect_true(res$all_passed) + ag <- build_report_agent(res$coverage, label = "L", lang = "en", + locale = "en_US", real_agent = res$reponse) + types <- ag$validation_set$assertion_type + # both kinds of check must appear, not collapsed into one + expect_true("col_exists" %in% types) + expect_true("col_vals_equal" %in% types) + # the existence checks line up with the coverage col_exists rows + expect_equal(sum(types == "col_exists"), + sum(res$coverage$check == "col_exists")) +}) + test_that("get_agent_report renders the injected agent without error", { cov <- build_coverage( tbl = data.frame(a__ok = c(TRUE, TRUE), b__ok = c(TRUE, FALSE)),