diff --git a/CHANGELOG.md b/CHANGELOG.md index bab5d636..0938c80b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,44 @@ follow semantic versioning; release dates are ISO 8601. Open cycle — bug-fix / housekeeping. Entries land here as they merge. +### Performance + +- **Text wrapping stops re-measuring the growing line prefix.** The greedy line + wrapper in `TextFlowSupport` now keeps a running line width and measures each + token once, instead of re-measuring the whole accumulated line on every token. + This removes O(line-length × tokens) measured-character work — and the + per-glyph sanitize/encode it triggered — from paragraph layout. **Output is + byte-identical: all layout and visual-regression snapshots pass unchanged.** + The effect is workload-dependent and concentrated in long-text documents; + measured locally (same-session A/B, full profile) a long multi-page proposal + rendered markedly faster, and a measurement-count probe showed ~9× fewer + measured characters on a long paragraph. No public API or behaviour change. + +- **Long-token line breaking is no longer quadratic.** `TextFlowSupport.fitCharacters` + now binary-searches the break point instead of re-measuring every growing prefix + one character at a time. For an unbreakable run (long URL/ID, no-space CJK, or a + very narrow column) this cuts measurement calls and measured characters by + ~80–85% (probe: 652 → 97 width calls, 36k → 7k measured chars on a 600-char + token). **Output is byte-identical** — the fit predicate is monotonic, so the + search returns the same break index. No public API or behaviour change. + +### Tests / tooling + +- **Benchmark regression gate and measurement probe (benchmarks module, not part + of the published library).** `BenchmarkVerdictTool` compares a current-speed run + to the committed baseline (`baselines/current-speed-full.json`) and reports + improved / neutral / regressed. The hard gate fails only on an **average-latency** + regression beyond the noise band; peak heap is **advisory** (the `peakHeapMb` + used-heap delta is GC-timing noisy — use the probe's per-compile allocation + bytes for deterministic heap). A single run is advisory; the hard gate needs a + median (`-Repeat` >= 2). + `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture + deterministic measurement-call counts and per-compile allocation bytes for + proving algorithmic / allocation changes (the probe warms up the JVM before its + allocation window, so `Alloc KB` reflects steady state, not one-time + class-load / JIT cold-start). `scripts/run-benchmarks.ps1` gains the + `11-verdict-current-speed` step (skippable via `-SkipVerdict`). + ## v1.7.0 — 2026-06-07 Canonical DSL primitives — additive only, zero breaking changes. Adding public diff --git a/baselines/current-speed-full.json b/baselines/current-speed-full.json new file mode 100644 index 00000000..d5e81180 --- /dev/null +++ b/baselines/current-speed-full.json @@ -0,0 +1,88 @@ +{ + "timestamp" : "2026-06-08 12:07:23", + "profile" : "full", + "warmupIterations" : 12, + "measurementIterations" : 40, + "docsPerThread" : 12, + "threadCounts" : [ 1, 2, 4, 8 ], + "latency" : [ { + "scenario" : "cv-template", + "description" : "Compose-first CV template", + "avgMillis" : 4.28, + "p50Millis" : 3.93, + "p95Millis" : 5.83, + "maxMillis" : 7.15, + "docsPerSecond" : 233.52, + "avgKilobytes" : 2.29, + "peakHeapMb" : 33.08 + }, { + "scenario" : "engine-simple", + "description" : "One-page engine composition", + "avgMillis" : 3.17, + "p50Millis" : 2.96, + "p95Millis" : 5.01, + "maxMillis" : 5.9, + "docsPerSecond" : 315.87, + "avgKilobytes" : 1.08, + "peakHeapMb" : 12.0 + }, { + "scenario" : "feature-rich", + "description" : "QR, barcode, watermark, header/footer, page break", + "avgMillis" : 45.37, + "p50Millis" : 37.09, + "p95Millis" : 60.65, + "maxMillis" : 69.62, + "docsPerSecond" : 22.04, + "avgKilobytes" : 6.37, + "peakHeapMb" : 86.14 + }, { + "scenario" : "invoice-template", + "description" : "Compose-first invoice template", + "avgMillis" : 19.42, + "p50Millis" : 18.75, + "p95Millis" : 27.88, + "maxMillis" : 34.26, + "docsPerSecond" : 51.5, + "avgKilobytes" : 9.72, + "peakHeapMb" : 85.09 + }, { + "scenario" : "proposal-template", + "description" : "Long multi-page proposal template", + "avgMillis" : 14.41, + "p50Millis" : 13.71, + "p95Millis" : 19.18, + "maxMillis" : 19.93, + "docsPerSecond" : 69.38, + "avgKilobytes" : 7.72, + "peakHeapMb" : 97.52 + } ], + "throughput" : [ { + "scenario" : "invoice-template", + "threads" : 1, + "totalDocs" : 12, + "docsPerSecond" : 81.22, + "avgMillisPerDoc" : 12.31 + }, { + "scenario" : "invoice-template", + "threads" : 2, + "totalDocs" : 24, + "docsPerSecond" : 158.68, + "avgMillisPerDoc" : 6.3 + }, { + "scenario" : "invoice-template", + "threads" : 4, + "totalDocs" : 48, + "docsPerSecond" : 265.11, + "avgMillisPerDoc" : 3.77 + }, { + "scenario" : "invoice-template", + "threads" : 8, + "totalDocs" : 96, + "docsPerSecond" : 356.61, + "avgMillisPerDoc" : 2.8 + } ], + "totalBytes" : 2905520, + "aggregation" : "median", + "sourceCount" : 7, + "sourceRuns" : [ "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120624.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120635.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120645.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120655.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120704.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120713.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120722.json" ] +} \ No newline at end of file diff --git a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java new file mode 100644 index 00000000..b231265f --- /dev/null +++ b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java @@ -0,0 +1,379 @@ +package com.demcha.compose; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +/** + * Compares a candidate {@code current-speed} benchmark report against a + * committed baseline and emits a per-scenario verdict + * ({@code IMPROVED} / {@code NEUTRAL} / {@code REGRESSED}). + * + *

This is the regression gate of the per-change performance workflow + * described in {@code docs/operations/perf-change-workflow.md}. Unlike + * {@link BenchmarkDiffTool}, which only prints signed deltas between two + * arbitrary runs, this tool classifies each delta against a noise band and + * fails the build (non-zero exit) when a scenario regresses beyond the band on + * the gate metric (average latency). Peak heap is reported as an + * advisory only: the {@code peakHeapMb} field is a used-heap delta + * sampled via {@code Runtime}, which is GC-timing dependent and very noisy + * run-to-run, so it must not fail the build. The deterministic heap signal is + * {@code MeasurementCountBenchmark}'s per-compile allocation bytes + * (ThreadMXBean). It is meant to be pointed at a stable, committed baseline (see + * {@code baselines/}) rather than at the previous ephemeral run under + * {@code target/}.

+ * + *

Usage:

+ * + * + *

Both reports must share the same {@code current-speed} profile + * ({@code smoke} or {@code full}); a {@code smoke} report and a {@code full} + * report are different experiments and are rejected.

+ * + *

Thresholds and gate behaviour are configurable via system properties + * (all percentages):

+ * + * + *

Exit codes: {@code 0} when the gate passes (or is disabled), {@code 1} + * when the gate is enabled and at least one scenario regressed, {@code 2} on + * usage or profile-compatibility errors.

+ * + * @author Artem Demchyshyn + */ +public final class BenchmarkVerdictTool { + + private static final ObjectMapper JSON = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); + private static final DateTimeFormatter TIMESTAMP_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private static final String AVG_BAND_PROPERTY = "graphcompose.benchmark.verdict.avgBandPct"; + private static final String HEAP_BAND_PROPERTY = "graphcompose.benchmark.verdict.heapBandPct"; + private static final String GATE_PROPERTY = "graphcompose.benchmark.verdict.gate"; + + private static final double DEFAULT_AVG_BAND_PCT = 10.0; + private static final double DEFAULT_HEAP_BAND_PCT = 15.0; + + private BenchmarkVerdictTool() { + } + + /** + * CLI entry point. Reads the baseline and candidate reports, prints the + * verdict table, writes JSON/CSV verdict artifacts under + * {@code target/benchmarks/verdicts/current-speed/}, and exits non-zero + * when the regression gate is enabled and at least one scenario regressed. + * + * @param args {@code } + * @throws Exception if a report cannot be read or written + */ + public static void main(String[] args) throws Exception { + BenchmarkSupport.configureQuietLogging(); + if (args.length != 2) { + System.err.println(""" + Usage: + java ... com.demcha.compose.BenchmarkVerdictTool + """); + System.exit(2); + return; + } + + Path baselinePath = Path.of(args[0]); + Path candidatePath = Path.of(args[1]); + JsonNode baseline = JSON.readTree(Files.readAllBytes(baselinePath)); + JsonNode candidate = JSON.readTree(Files.readAllBytes(candidatePath)); + + if (!isCurrentSpeed(baseline) || !isCurrentSpeed(candidate)) { + System.err.println("BenchmarkVerdictTool only supports current-speed reports (latency + throughput)."); + System.exit(2); + return; + } + + String baselineProfile = baseline.path("profile").asText(""); + String candidateProfile = candidate.path("profile").asText(""); + if (!baselineProfile.equals(candidateProfile)) { + System.err.println("Profiles do not match: baseline='" + baselineProfile + + "', candidate='" + candidateProfile + "'. Compare runs from the same profile only."); + System.exit(2); + return; + } + + Thresholds thresholds = Thresholds.fromSystemProperties(); + VerdictReport report = evaluate(baselinePath.toString(), candidatePath.toString(), baseline, candidate, thresholds); + + print(report); + write(report); + + if (thresholds.gateEnabled() && report.regressed()) { + System.out.println(); + System.out.println("PERFORMANCE GATE FAILED: at least one scenario regressed beyond the noise band."); + System.exit(1); + } + } + + /** + * Pure, side-effect-free evaluation core used by both {@link #main(String[])} + * and the unit test. Computes the per-scenario verdict for every scenario + * present in both reports and the overall verdict. + * + * @param baselinePath display path of the baseline report + * @param candidatePath display path of the candidate report + * @param baseline parsed baseline current-speed report + * @param candidate parsed candidate current-speed report + * @param thresholds noise bands and gate flag + * @return the computed verdict report + */ + static VerdictReport evaluate(String baselinePath, + String candidatePath, + JsonNode baseline, + JsonNode candidate, + Thresholds thresholds) { + Map baselineByScenario = indexBy(baseline.path("latency")); + Map candidateByScenario = indexBy(candidate.path("latency")); + + List scenarios = new ArrayList<>(); + List missingScenarios = new ArrayList<>(); + boolean anyRegressed = false; + boolean anyImproved = false; + + for (Map.Entry entry : baselineByScenario.entrySet()) { + String scenario = entry.getKey(); + JsonNode before = entry.getValue(); + JsonNode after = candidateByScenario.get(scenario); + if (after == null) { + missingScenarios.add(scenario); + continue; + } + + double baselineAvg = before.path("avgMillis").asDouble(); + double candidateAvg = after.path("avgMillis").asDouble(); + double avgDeltaPct = percentDelta(baselineAvg, candidateAvg); + double p95DeltaPct = percentDelta(before.path("p95Millis").asDouble(), after.path("p95Millis").asDouble()); + double docsDeltaPct = percentDelta(before.path("docsPerSecond").asDouble(), after.path("docsPerSecond").asDouble()); + double baselineHeap = before.path("peakHeapMb").asDouble(); + double candidateHeap = after.path("peakHeapMb").asDouble(); + double heapDeltaPct = percentDelta(baselineHeap, candidateHeap); + + // Hard gate metric: average latency only. peakHeapMb is a used-heap + // delta sampled via Runtime — GC-timing dependent and very noisy + // run-to-run (observed 48..170 MB across repeats of identical code), + // so it is reported as ADVISORY, never gated. The deterministic heap + // signal is MeasurementCountBenchmark's per-compile allocation bytes. + boolean heapAdvisory = heapDeltaPct > thresholds.heapBandPct(); + Verdict verdict; + if (avgDeltaPct > thresholds.avgBandPct()) { + verdict = Verdict.REGRESSED; + anyRegressed = true; + } else if (avgDeltaPct < -thresholds.avgBandPct()) { + verdict = Verdict.IMPROVED; + anyImproved = true; + } else { + verdict = Verdict.NEUTRAL; + } + + scenarios.add(new ScenarioVerdict( + scenario, + before.path("description").asText(after.path("description").asText("")), + baselineAvg, + candidateAvg, + avgDeltaPct, + p95DeltaPct, + docsDeltaPct, + baselineHeap, + candidateHeap, + heapDeltaPct, + heapAdvisory, + verdict.name())); + } + + Verdict overall = anyRegressed + ? Verdict.REGRESSED + : (anyImproved ? Verdict.IMPROVED : Verdict.NEUTRAL); + + return new VerdictReport( + baselinePath, + candidatePath, + candidate.path("profile").asText(""), + baseline.path("timestamp").asText(""), + candidate.path("timestamp").asText(""), + thresholds.avgBandPct(), + thresholds.heapBandPct(), + thresholds.gateEnabled(), + overall.name(), + anyRegressed, + scenarios, + missingScenarios); + } + + private static void print(VerdictReport report) { + System.out.println("Benchmark verdict (vs committed baseline)"); + System.out.println("Timestamp: " + LocalDateTime.now().format(TIMESTAMP_FORMAT)); + System.out.println("Profile: " + report.profile()); + System.out.println("Baseline: " + report.baselinePath() + " (" + report.baselineTimestamp() + ")"); + System.out.println("Candidate: " + report.candidatePath() + " (" + report.candidateTimestamp() + ")"); + System.out.println("Gate: avg latency +/-" + format(report.avgBandPct()) + + "% (HARD). peakHeap +/-" + format(report.heapBandPct()) + + "% = ADVISORY only (GC-timing noisy, not gated). gate: " + + (report.gateEnabled() ? "enabled" : "disabled")); + System.out.println(); + System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n", + "Scenario", "Avg pct", "p95 pct", "Docs/s pct", "Heap pct", "Verdict"); + System.out.println("-".repeat(82)); + for (ScenarioVerdict row : report.scenarios()) { + System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n", + row.scenario(), + signedPercent(row.avgDeltaPct()), + signedPercent(row.p95DeltaPct()), + signedPercent(row.docsPerSecondDeltaPct()), + signedPercent(row.peakHeapDeltaPct()), + row.verdict()); + } + List heapAdvisories = report.scenarios().stream() + .filter(ScenarioVerdict::heapAdvisory) + .map(row -> row.scenario() + " (" + signedPercent(row.peakHeapDeltaPct()) + ")") + .toList(); + if (!heapAdvisories.isEmpty()) { + System.out.println(); + System.out.println("ADVISORY (not gated) - peakHeapMb over band: " + String.join(", ", heapAdvisories) + + ". peakHeapMb is GC-timing noisy; use MeasurementCountBenchmark for the deterministic allocation signal."); + } + if (!report.missingScenarios().isEmpty()) { + System.out.println(); + System.out.println("WARNING: baseline scenarios missing from candidate (not gated): " + + String.join(", ", report.missingScenarios())); + } + System.out.println(); + System.out.println("Overall verdict: " + report.overallVerdict() + " (hard gate: average latency)"); + } + + private static void write(VerdictReport report) throws Exception { + BenchmarkReportWriter.BenchmarkArtifacts artifacts = BenchmarkReportWriter.prepare("verdicts/current-speed"); + Path jsonPath = artifacts.writeJson(report); + Path csvPath = artifacts.writeCsv( + "verdict", + List.of("scenario", "baseline_avg_ms", "candidate_avg_ms", "avg_delta_pct", + "p95_delta_pct", "docs_per_sec_delta_pct", + "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct", + "heap_advisory", "verdict"), + report.scenarios().stream() + .map(row -> List.of( + row.scenario(), + format(row.baselineAvgMs()), + format(row.candidateAvgMs()), + format(row.avgDeltaPct()), + format(row.p95DeltaPct()), + format(row.docsPerSecondDeltaPct()), + format(row.baselinePeakHeapMb()), + format(row.candidatePeakHeapMb()), + format(row.peakHeapDeltaPct()), + Boolean.toString(row.heapAdvisory()), + row.verdict())) + .toList()); + System.out.println("Saved JSON verdict report to " + jsonPath); + System.out.println("Saved CSV verdict report to " + csvPath); + } + + private static boolean isCurrentSpeed(JsonNode node) { + return node.has("latency") && node.has("throughput"); + } + + private static Map indexBy(JsonNode latencyArray) { + Map result = new TreeMap<>(); + latencyArray.forEach(item -> result.put(item.path("scenario").asText(), item)); + return result; + } + + private static double percentDelta(double baseline, double candidate) { + if (Double.compare(baseline, 0.0) == 0) { + return candidate == 0.0 ? 0.0 : 100.0; + } + return ((candidate - baseline) / baseline) * 100.0; + } + + private static String signedPercent(double value) { + return "%+.2f%%".formatted(value); + } + + private static String format(double value) { + return "%.2f".formatted(value); + } + + /** + * Noise bands (percent) and the gate flag for a verdict evaluation. + * + * @param avgBandPct band for average latency; a candidate slower than this + * fraction of the baseline regresses + * @param heapBandPct band for peak heap delta + * @param gateEnabled whether a regression should fail the build (non-zero exit) + */ + record Thresholds(double avgBandPct, double heapBandPct, boolean gateEnabled) { + + static Thresholds fromSystemProperties() { + return new Thresholds( + doubleProperty(AVG_BAND_PROPERTY, DEFAULT_AVG_BAND_PCT), + doubleProperty(HEAP_BAND_PROPERTY, DEFAULT_HEAP_BAND_PCT), + Boolean.parseBoolean(System.getProperty(GATE_PROPERTY, "true"))); + } + + private static double doubleProperty(String key, double fallback) { + String raw = System.getProperty(key); + if (raw == null || raw.isBlank()) { + return fallback; + } + try { + return Double.parseDouble(raw.trim()); + } catch (NumberFormatException ex) { + return fallback; + } + } + } + + /** Verdict classification for one scenario or for the report as a whole. */ + enum Verdict { + IMPROVED, + NEUTRAL, + REGRESSED + } + + /** Per-scenario verdict row. */ + record ScenarioVerdict(String scenario, + String description, + double baselineAvgMs, + double candidateAvgMs, + double avgDeltaPct, + double p95DeltaPct, + double docsPerSecondDeltaPct, + double baselinePeakHeapMb, + double candidatePeakHeapMb, + double peakHeapDeltaPct, + boolean heapAdvisory, + String verdict) { + } + + /** Full verdict report, serialized to JSON/CSV. */ + record VerdictReport(String baselinePath, + String candidatePath, + String profile, + String baselineTimestamp, + String candidateTimestamp, + double avgBandPct, + double heapBandPct, + boolean gateEnabled, + String overallVerdict, + boolean regressed, + List scenarios, + List missingScenarios) { + } +} diff --git a/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java b/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java new file mode 100644 index 00000000..70fd665e --- /dev/null +++ b/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java @@ -0,0 +1,152 @@ +package com.demcha.compose; + +import com.demcha.compose.engine.components.content.text.TextStyle; +import com.demcha.compose.engine.components.geometry.ContentSize; +import com.demcha.compose.engine.measurement.TextMeasurementSystem; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +/** + * A {@link TextMeasurementSystem} decorator that forwards every call to a real + * delegate while counting how the layout engine asks for text measurements. + * + *

It exists to make the algorithmic findings of the performance audit + * (F1 greedy wrap re-measuring growing prefixes, F2 quadratic long-token + * breaking, F3 table re-measurement) deterministically observable. + * Wall-clock timing hides these under JIT/GC noise; measurement-request counts + * and summed argument characters do not.

+ * + *

The decorator records, per pass:

+ *
    + *
  • the number of width-bearing requests ({@code textWidth} + {@code measure})
  • + *
  • the number of distinct {@code (style, text)} requests — the + * caller-side proxy for how well the delegate's width cache can hit; + * a low repeat rate means the layout keeps asking for one-shot strings + * (the F1/F2 smell)
  • + *
  • the summed and maximum argument length in characters — the proxy for + * the {@code O(chars)} work each uncached measurement performs
  • + *
  • {@code lineMetrics}/{@code lineHeight} call counts (style-only, no text)
  • + *
+ * + *

Not thread-safe: drive it from a single layout pass, like the real + * measurement system.

+ * + * @author Artem Demchyshyn + */ +public final class CountingTextMeasurementSystem implements TextMeasurementSystem { + + private final TextMeasurementSystem delegate; + + private long textWidthCalls; + private long measureCalls; + private long lineMetricsCalls; + private long lineHeightCalls; + private long summedRequestChars; + private long maxRequestChars; + private final Set distinctRequests = new HashSet<>(); + + /** + * Wraps a real measurement system. + * + * @param delegate the measurement system to forward to (e.g. the session's + * {@code FontLibraryTextMeasurementSystem}) + */ + public CountingTextMeasurementSystem(TextMeasurementSystem delegate) { + this.delegate = Objects.requireNonNull(delegate, "delegate"); + } + + @Override + public ContentSize measure(TextStyle style, String text) { + measureCalls++; + record(style, text); + return delegate.measure(style, text); + } + + @Override + public double textWidth(TextStyle style, String text) { + textWidthCalls++; + record(style, text); + return delegate.textWidth(style, text); + } + + @Override + public LineMetrics lineMetrics(TextStyle style) { + lineMetricsCalls++; + return delegate.lineMetrics(style); + } + + @Override + public double lineHeight(TextStyle style) { + lineHeightCalls++; + return delegate.lineHeight(style); + } + + @Override + public void clearCaches() { + delegate.clearCaches(); + } + + private void record(TextStyle style, String text) { + String safe = text == null ? "" : text; + int length = safe.length(); + summedRequestChars += length; + if (length > maxRequestChars) { + maxRequestChars = length; + } + distinctRequests.add(new RequestKey(style, safe)); + } + + /** + * Captures the counts accumulated so far. + * + * @return an immutable snapshot of the measurement-request counters + */ + public Counts snapshot() { + long widthRequests = textWidthCalls + measureCalls; + long distinct = distinctRequests.size(); + double repeatRatePct = widthRequests == 0 + ? 0.0 + : (1.0 - ((double) distinct / (double) widthRequests)) * 100.0; + return new Counts( + textWidthCalls, + measureCalls, + widthRequests, + distinct, + repeatRatePct, + summedRequestChars, + maxRequestChars, + lineMetricsCalls, + lineHeightCalls); + } + + /** + * Immutable snapshot of measurement-request counters. + * + * @param textWidthCalls direct {@code textWidth(style, text)} calls + * @param measureCalls {@code measure(style, text)} calls + * @param widthRequests {@code textWidthCalls + measureCalls} + * @param distinctWidthRequests distinct {@code (style, text)} requests + * @param repeatRatePct {@code (1 - distinct/total) * 100}; higher + * means more cache-friendly (fewer one-shot + * strings) + * @param summedRequestChars total characters across all width requests + * @param maxRequestChars longest single argument measured + * @param lineMetricsCalls {@code lineMetrics(style)} calls + * @param lineHeightCalls {@code lineHeight(style)} calls + */ + public record Counts(long textWidthCalls, + long measureCalls, + long widthRequests, + long distinctWidthRequests, + double repeatRatePct, + long summedRequestChars, + long maxRequestChars, + long lineMetricsCalls, + long lineHeightCalls) { + } + + private record RequestKey(TextStyle style, String text) { + } +} diff --git a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java index d96dfc93..2858d64a 100644 --- a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java +++ b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java @@ -112,7 +112,8 @@ private void run() throws Exception { new Scenario("invoice-template", "Compose-first invoice template", this::renderInvoiceTemplateDocument), new Scenario("cv-template", "Compose-first CV template", this::renderCvTemplateDocument), new Scenario("proposal-template", "Long multi-page proposal template", this::renderProposalTemplateDocument), - new Scenario("feature-rich", "QR, barcode, watermark, header/footer, page break", this::renderFeatureRichDocument) + new Scenario("feature-rich", "QR, barcode, watermark, header/footer, page break", this::renderFeatureRichDocument), + new Scenario("long-token", "Long unbreakable tokens (URLs/IDs) forcing character-level wrap", this::renderLongTokenDocument) ); System.out.println("Latency benchmark"); @@ -551,6 +552,31 @@ private byte[] renderProposalTemplateDocument() throws Exception { } } + private byte[] renderLongTokenDocument() throws Exception { + // Worst-case for character-level wrapping: many long unbreakable tokens + // (long URLs/IDs/no-space runs) that overflow the line and force + // splitLongToken -> fitCharacters. Exercises audit finding F2. + try (DocumentSession document = GraphCompose.document() + .pageSize(com.demcha.compose.document.api.DocumentPageSize.A4) + .margin(22, 22, 22, 22) + .create()) { + var root = document.dsl() + .pageFlow() + .name("BenchmarkLongTokenRoot") + .spacing(8); + for (int i = 1; i <= 40; i++) { + final int index = i; + root.addParagraph(paragraph -> paragraph + .name("BenchmarkLongToken" + index) + .text("Reference " + index + ": https://example.com/" + "a".repeat(500) + + " trailing words to wrap normally after the long token.") + .textStyle(BODY_STYLE)); + } + root.build(); + return document.toPdfBytes(); + } + } + private byte[] renderFeatureRichDocument() throws Exception { PdfFixedLayoutBackend backend = PdfFixedLayoutBackend.builder() .metadata(PdfMetadataOptions.builder() diff --git a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java new file mode 100644 index 00000000..b4b585d5 --- /dev/null +++ b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java @@ -0,0 +1,273 @@ +package com.demcha.compose; + +import com.demcha.compose.document.api.DocumentPageSize; +import com.demcha.compose.document.api.DocumentSession; +import com.demcha.compose.document.backend.fixed.pdf.PdfMeasurementResources; +import com.demcha.compose.document.dsl.PageFlowBuilder; +import com.demcha.compose.document.layout.DocumentGraph; +import com.demcha.compose.document.layout.DocumentLayoutPassContext; +import com.demcha.compose.document.layout.LayoutCanvas; +import com.demcha.compose.document.layout.LayoutCompiler; +import com.demcha.compose.document.layout.LayoutGraph; +import com.demcha.compose.document.layout.NodeRegistry; +import com.demcha.compose.document.node.DocumentNode; +import com.demcha.compose.document.style.DocumentColor; +import com.demcha.compose.document.style.DocumentTextDecoration; +import com.demcha.compose.document.style.DocumentTextStyle; + +import java.awt.Color; +import java.lang.management.ManagementFactory; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; + +/** + * Deterministic measurement-count and allocation probe for the canonical layout + * pipeline. + * + *

For each scenario this harness authors a document through the public DSL, + * then compiles its node graph through a {@link LayoutCompiler} whose + * {@code TextMeasurementSystem} is wrapped in a + * {@link CountingTextMeasurementSystem}. It reports, deterministically and + * independent of wall-clock / GC-timing noise:

+ * + *
    + *
  • measurement requests — how the layout asks the measurement + * system for widths (proves F1/F2/F3); and
  • + *
  • compile allocation bytes — bytes allocated by the layout + * {@code compile} pass, via + * {@link com.sun.management.ThreadMXBean#getCurrentThreadAllocatedBytes()}. + * Unlike the {@code peakHeapMb} sampled by {@code CurrentSpeedBenchmark} + * (a GC-timing-dependent used-heap delta), allocated-bytes is the + * deterministic memory signal for the allocation findings (F7 style/inset + * churn, F8 box recomputation, fragment re-copy, per-cell table lists).
  • + *
+ * + *

The allocation window wraps only {@code compile(...)}; font loading and DSL + * authoring happen outside it, so the number reflects layout allocation — the + * thing the optimizations move. Needs no {@code src/main} changes.

+ */ +public final class MeasurementCountBenchmark { + + private static final DateTimeFormatter TIMESTAMP_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private static final com.sun.management.ThreadMXBean THREAD_MX = + (com.sun.management.ThreadMXBean) ManagementFactory.getThreadMXBean(); + + private static final DocumentTextStyle BODY_STYLE = DocumentTextStyle.builder() + .size(9.5) + .decoration(DocumentTextDecoration.DEFAULT) + .color(DocumentColor.of(new Color(58, 69, 84))) + .build(); + + private static final String LONG_PARAGRAPH = + ("GraphCompose lays out structured business documents efficiently across many pages " + + "while keeping header and footer placement stable. ").repeat(120); + + private static final String LONG_TOKEN_PARAGRAPH = + "Prefix text before an unbreakable token " + "x".repeat(600) + + " and several trailing words that must still wrap onto the following lines here."; + + public static void main(String[] args) throws Exception { + BenchmarkSupport.configureQuietLogging(); + new MeasurementCountBenchmark().run(); + } + + private void run() throws Exception { + enableAllocationMeasurement(); + + System.out.println("GraphCompose Measurement-Count + Allocation Probe"); + System.out.println("Timestamp: " + LocalDateTime.now().format(TIMESTAMP_FORMAT)); + System.out.println("Thread allocation measurement: " + (allocationSupported() ? "enabled" : "UNAVAILABLE (Alloc KB = n/a)")); + System.out.println(); + + Consumer longText = flow -> + flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE)); + Consumer longToken = flow -> + flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE)); + Consumer largeTable = MeasurementCountBenchmark::authorLargeTable; + + // Warm up the JVM (class loading + JIT) BEFORE the allocation window so the + // "Alloc KB" column reflects steady-state per-document layout allocation, not + // one-time cold-start cost. Without this the FIRST scenario measured carried + // ~36 MB of class-load / JIT / static-init allocation — a JVM artifact, not a + // layout cost (verified: cold first compile 36.6 MB vs warm 0.65 MB for the + // same long-text document). The measurement-COUNT columns are exact either way. + for (int warmup = 0; warmup < 5; warmup++) { + measureScenario("warmup", longText); + measureScenario("warmup", longToken); + measureScenario("warmup", largeTable); + } + + List results = new ArrayList<>(); + results.add(measureScenario("long-text", longText)); + results.add(measureScenario("long-token", longToken)); + results.add(measureScenario("large-table", largeTable)); + + System.out.printf("%-14s | %11s | %9s | %9s | %11s | %8s | %11s | %10s | %6s%n", + "Scenario", "WidthReqs", "Distinct", "Repeat %", "Sum chars", "Max arg", "LineMetrics", "Alloc KB", "Pages"); + System.out.println("-".repeat(108)); + for (Result result : results) { + CountingTextMeasurementSystem.Counts c = result.counts(); + System.out.printf("%-14s | %11d | %9d | %8.1f%% | %11d | %8d | %11d | %10s | %6d%n", + result.scenario(), + c.widthRequests(), + c.distinctWidthRequests(), + c.repeatRatePct(), + c.summedRequestChars(), + c.maxRequestChars(), + c.lineMetricsCalls(), + formatAllocKb(result.compileAllocBytes()), + result.pages()); + } + + writeReport(results); + } + + private Result measureScenario(String scenario, Consumer author) throws Exception { + try (DocumentSession session = GraphCompose.document() + .pageSize(DocumentPageSize.A4) + .margin(24, 24, 24, 24) + .create()) { + session.pageFlow(author); + List roots = session.roots(); + LayoutCanvas canvas = session.canvas(); + NodeRegistry registry = session.registry(); + + try (PdfMeasurementResources resources = PdfMeasurementResources.open(List.of())) { + CountingTextMeasurementSystem counter = + new CountingTextMeasurementSystem(resources.textMeasurementSystem()); + DocumentLayoutPassContext context = new DocumentLayoutPassContext( + registry, canvas, resources.fontLibrary(), counter, false); + LayoutCompiler compiler = new LayoutCompiler(registry); + DocumentGraph graph = new DocumentGraph(roots); + + // Measure allocation around the layout compile only — font + // loading and authoring are already done, so this is the + // layout pass's own allocation footprint. + long allocBefore = currentThreadAllocatedBytes(); + LayoutGraph layout = compiler.compile(graph, context, context); + long allocBytes = allocBefore < 0 ? -1 : currentThreadAllocatedBytes() - allocBefore; + + return new Result(scenario, counter.snapshot(), layout.totalPages(), layout.fragments().size(), allocBytes); + } + } + } + + private static void authorLargeTable(PageFlowBuilder flow) { + flow.addTable(table -> { + table.autoColumns(6).header("Item", "Qty", "Unit", "Price", "Tax", "Total"); + for (int row = 1; row <= 200; row++) { + table.row("Line item " + row, "3", "ea", "12.50", "1.25", "38.75"); + } + }); + } + + private static void enableAllocationMeasurement() { + try { + if (THREAD_MX.isThreadAllocatedMemorySupported() && !THREAD_MX.isThreadAllocatedMemoryEnabled()) { + THREAD_MX.setThreadAllocatedMemoryEnabled(true); + } + } catch (UnsupportedOperationException ignored) { + // Allocation measurement unsupported on this JVM; Alloc KB reports n/a. + } + } + + private static boolean allocationSupported() { + try { + return THREAD_MX.isThreadAllocatedMemorySupported() && THREAD_MX.isThreadAllocatedMemoryEnabled(); + } catch (UnsupportedOperationException ex) { + return false; + } + } + + private static long currentThreadAllocatedBytes() { + if (!allocationSupported()) { + return -1; + } + return THREAD_MX.getCurrentThreadAllocatedBytes(); + } + + private static String formatAllocKb(long bytes) { + return bytes < 0 ? "n/a" : "%.1f".formatted(bytes / 1024.0); + } + + private void writeReport(List results) throws Exception { + CounterReport report = new CounterReport( + LocalDateTime.now().format(TIMESTAMP_FORMAT), + results.stream().map(Result::toScenarioCounts).toList()); + + BenchmarkReportWriter.BenchmarkArtifacts artifacts = BenchmarkReportWriter.prepare("counters"); + var jsonPath = artifacts.writeJson(report); + var csvPath = artifacts.writeCsv( + "counters", + List.of("scenario", "width_requests", "distinct_width_requests", "repeat_rate_pct", + "summed_request_chars", "max_request_chars", "text_width_calls", "measure_calls", + "line_metrics_calls", "compile_alloc_bytes", "pages", "fragments"), + results.stream() + .map(result -> { + CountingTextMeasurementSystem.Counts c = result.counts(); + return List.of( + result.scenario(), + Long.toString(c.widthRequests()), + Long.toString(c.distinctWidthRequests()), + "%.2f".formatted(c.repeatRatePct()), + Long.toString(c.summedRequestChars()), + Long.toString(c.maxRequestChars()), + Long.toString(c.textWidthCalls()), + Long.toString(c.measureCalls()), + Long.toString(c.lineMetricsCalls()), + Long.toString(result.compileAllocBytes()), + Integer.toString(result.pages()), + Integer.toString(result.fragments())); + }) + .toList()); + + System.out.println(); + System.out.println("Saved JSON counter report to " + jsonPath); + System.out.println("Saved CSV counter report to " + csvPath); + } + + private record Result(String scenario, + CountingTextMeasurementSystem.Counts counts, + int pages, + int fragments, + long compileAllocBytes) { + ScenarioCounts toScenarioCounts() { + return new ScenarioCounts( + scenario, + counts.widthRequests(), + counts.distinctWidthRequests(), + counts.repeatRatePct(), + counts.summedRequestChars(), + counts.maxRequestChars(), + counts.textWidthCalls(), + counts.measureCalls(), + counts.lineMetricsCalls(), + counts.lineHeightCalls(), + compileAllocBytes, + pages, + fragments); + } + } + + private record ScenarioCounts(String scenario, + long widthRequests, + long distinctWidthRequests, + double repeatRatePct, + long summedRequestChars, + long maxRequestChars, + long textWidthCalls, + long measureCalls, + long lineMetricsCalls, + long lineHeightCalls, + long compileAllocBytes, + int pages, + int fragments) { + } + + private record CounterReport(String timestamp, List scenarios) { + } +} diff --git a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java new file mode 100644 index 00000000..75996c54 --- /dev/null +++ b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java @@ -0,0 +1,149 @@ +package com.demcha.compose; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for the pure {@link BenchmarkVerdictTool#evaluate} core. These + * drive synthetic current-speed reports so the verdict classification and the + * hard-gate {@code regressed} flag are validated deterministically, without + * running real benchmarks or invoking {@code System.exit}. + */ +class BenchmarkVerdictToolTest { + + private static final ObjectMapper JSON = new ObjectMapper(); + private static final BenchmarkVerdictTool.Thresholds GATE = + new BenchmarkVerdictTool.Thresholds(10.0, 15.0, true); + + @Test + void flagsAverageLatencyRegressionBeyondBand() throws Exception { + JsonNode baseline = report(scenario("invoice-template", 10.0, 10.0, 30.0, 100.0)); + JsonNode candidate = report(scenario("invoice-template", 12.0, 11.0, 28.0, 100.0)); // +20% avg + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isTrue(); + assertThat(report.overallVerdict()).isEqualTo("REGRESSED"); + assertThat(report.scenarios()).singleElement() + .satisfies(row -> assertThat(row.verdict()).isEqualTo("REGRESSED")); + } + + @Test + void peakHeapOverBandIsAdvisoryNotGated() throws Exception { + JsonNode baseline = report(scenario("cv-template", 10.0, 10.0, 40.0, 100.0)); + JsonNode candidate = report(scenario("cv-template", 10.3, 10.0, 40.0, 120.0)); // +3% avg, +20% heap + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + // Heap over band must NOT fail the gate — peakHeapMb is advisory only + // (GC-timing noisy). The hard gate metric is average latency. + assertThat(report.regressed()).isFalse(); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("NEUTRAL"); + assertThat(report.scenarios().get(0).heapAdvisory()).isTrue(); + } + + @Test + void marksClearSpeedupAsImproved() throws Exception { + JsonNode baseline = report(scenario("proposal-template", 10.0, 12.0, 28.0, 150.0)); + JsonNode candidate = report(scenario("proposal-template", 8.0, 9.0, 36.0, 150.0)); // -20% avg + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isFalse(); + assertThat(report.overallVerdict()).isEqualTo("IMPROVED"); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("IMPROVED"); + } + + @Test + void treatsWithinBandChangesAsNeutral() throws Exception { + JsonNode baseline = report(scenario("engine-simple", 5.0, 6.0, 170.0, 40.0)); + JsonNode candidate = report(scenario("engine-simple", 5.2, 6.1, 168.0, 43.0)); // +4% avg, +7.5% heap + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isFalse(); + assertThat(report.overallVerdict()).isEqualTo("NEUTRAL"); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("NEUTRAL"); + } + + @Test + void overallIsRegressedWhenAnyScenarioRegresses() throws Exception { + JsonNode baseline = report( + scenario("engine-simple", 5.0, 6.0, 170.0, 40.0), + scenario("invoice-template", 10.0, 11.0, 28.0, 100.0)); + JsonNode candidate = report( + scenario("engine-simple", 5.1, 6.1, 168.0, 41.0), // neutral + scenario("invoice-template", 13.0, 14.0, 22.0, 100.0)); // +30% avg -> regressed + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isTrue(); + assertThat(report.overallVerdict()).isEqualTo("REGRESSED"); + } + + @Test + void reportsMissingScenariosWithoutGating() throws Exception { + JsonNode baseline = report( + scenario("engine-simple", 5.0, 6.0, 170.0, 40.0), + scenario("invoice-template", 10.0, 11.0, 28.0, 100.0)); + JsonNode candidate = report(scenario("engine-simple", 5.1, 6.1, 168.0, 41.0)); // invoice dropped + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.missingScenarios()).containsExactly("invoice-template"); + assertThat(report.scenarios()).hasSize(1); + assertThat(report.regressed()).isFalse(); + } + + @Test + void regressedFlagReflectsStateIndependentOfGateFlag() throws Exception { + JsonNode baseline = report(scenario("invoice-template", 10.0, 10.0, 30.0, 100.0)); + JsonNode candidate = report(scenario("invoice-template", 12.0, 11.0, 28.0, 100.0)); // +20% avg + + BenchmarkVerdictTool.Thresholds gateOff = new BenchmarkVerdictTool.Thresholds(10.0, 15.0, false); + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, gateOff); + + // The state is still "regressed"; only the build-failing decision (exit code) is gated. + assertThat(report.regressed()).isTrue(); + assertThat(report.gateEnabled()).isFalse(); + } + + private static JsonNode report(String... latencyRows) throws Exception { + String latency = String.join(",", latencyRows); + String json = """ + { + "timestamp": "2026-06-08 12:00:00", + "profile": "full", + "latency": [%s], + "throughput": [] + } + """.formatted(latency); + return JSON.readTree(json); + } + + private static String scenario(String name, double avgMs, double p95Ms, double docsPerSec, double peakHeapMb) { + return """ + { + "scenario": "%s", + "description": "%s", + "avgMillis": %s, + "p50Millis": %s, + "p95Millis": %s, + "maxMillis": %s, + "docsPerSecond": %s, + "avgKilobytes": 1.0, + "peakHeapMb": %s + } + """.formatted(name, name, avgMs, avgMs, p95Ms, p95Ms, docsPerSec, peakHeapMb); + } +} diff --git a/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java b/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java new file mode 100644 index 00000000..ebd7397c --- /dev/null +++ b/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java @@ -0,0 +1,81 @@ +package com.demcha.compose; + +import com.demcha.compose.engine.components.content.text.TextStyle; +import com.demcha.compose.engine.components.geometry.ContentSize; +import com.demcha.compose.engine.measurement.TextMeasurementSystem; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for {@link CountingTextMeasurementSystem}. They use a trivial fake + * delegate (no PDFBox) so the counting/forwarding contract is verified + * deterministically and fast. + */ +class CountingTextMeasurementSystemTest { + + private static final TextStyle STYLE = TextStyle.DEFAULT_STYLE; + + @Test + void countsWidthRequestsDistinctKeysAndCharacters() { + CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement()); + + double abWidth = counter.textWidth(STYLE, "ab"); + counter.textWidth(STYLE, "ab"); // repeat -> same key + counter.textWidth(STYLE, "abc"); + counter.measure(STYLE, "ab"); // measure shares the "ab" key + counter.lineMetrics(STYLE); + counter.lineHeight(STYLE); + + CountingTextMeasurementSystem.Counts counts = counter.snapshot(); + + assertThat(abWidth).isEqualTo(2.0); // delegate pass-through (fake width == length) + assertThat(counts.textWidthCalls()).isEqualTo(3); + assertThat(counts.measureCalls()).isEqualTo(1); + assertThat(counts.widthRequests()).isEqualTo(4); + assertThat(counts.distinctWidthRequests()).isEqualTo(2); // "ab", "abc" + assertThat(counts.summedRequestChars()).isEqualTo(9); // 2 + 2 + 3 + 2 + assertThat(counts.maxRequestChars()).isEqualTo(3); + assertThat(counts.repeatRatePct()).isEqualTo(50.0); // 1 - 2/4 + assertThat(counts.lineMetricsCalls()).isEqualTo(1); + assertThat(counts.lineHeightCalls()).isEqualTo(1); + } + + @Test + void emptySnapshotHasNoRequests() { + CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement()); + + CountingTextMeasurementSystem.Counts counts = counter.snapshot(); + + assertThat(counts.widthRequests()).isZero(); + assertThat(counts.distinctWidthRequests()).isZero(); + assertThat(counts.repeatRatePct()).isZero(); + assertThat(counts.summedRequestChars()).isZero(); + } + + @Test + void treatsNullTextAsEmptyWithoutFailing() { + CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement()); + + counter.textWidth(STYLE, null); + + CountingTextMeasurementSystem.Counts counts = counter.snapshot(); + assertThat(counts.widthRequests()).isEqualTo(1); + assertThat(counts.summedRequestChars()).isZero(); + assertThat(counts.distinctWidthRequests()).isEqualTo(1); + } + + /** Minimal delegate: width == text length, fixed line metrics. */ + private static final class FakeMeasurement implements TextMeasurementSystem { + @Override + public ContentSize measure(TextStyle style, String text) { + int length = text == null ? 0 : text.length(); + return new ContentSize(length, 10.0); + } + + @Override + public LineMetrics lineMetrics(TextStyle style) { + return new LineMetrics(8.0, 2.0, 0.0); + } + } +} diff --git a/scripts/run-benchmarks.ps1 b/scripts/run-benchmarks.ps1 index f816915e..dbe162c0 100644 --- a/scripts/run-benchmarks.ps1 +++ b/scripts/run-benchmarks.ps1 @@ -14,11 +14,17 @@ diff gracefully when no compatible historical pair exists yet. Use `-Repeat` to generate repeated current-speed/comparative runs and median aggregates for more stable local comparisons. + +Step 11 (`11-verdict-current-speed`) compares the current-speed result against +the committed baseline (`baselines/current-speed-.json`) and fails the +run when a canonical scenario regresses beyond the noise band. Use `-SkipVerdict` +to skip that gate while exploring. See `docs/operations/perf-change-workflow.md`. #> param( [switch]$IncludeEndurance, [switch]$OpenResults, [switch]$SkipDiff, + [switch]$SkipVerdict, [ValidateSet("full", "smoke")] [string]$CurrentSpeedProfile = "full", [ValidateRange(1, 10)] @@ -448,6 +454,40 @@ try { } Add-SummaryLine(("- Benchmarks folder: ``{0}``" -f (Join-Path $repoRoot "target\benchmarks"))) + if (-not $SkipVerdict) { + $verdictBaseline = Join-Path $repoRoot ("baselines\current-speed-{0}.json" -f $CurrentSpeedProfile) + if ($Repeat -gt 1) { + $verdictCandidate = Get-IfExists (Join-Path $repoRoot ("target\benchmarks\{0}\latest.json" -f $currentSpeedAggregateSuite)) + } else { + $verdictCandidate = $currentSpeedLatest + } + + if (-not (Test-Path $verdictBaseline)) { + Add-SummaryLine("- ``11-verdict-current-speed``: skipped") + Add-SummaryLine((" - Reason: no committed baseline at ``{0}`` (see docs/operations/perf-change-workflow.md)" -f $verdictBaseline)) + } elseif (-not $verdictCandidate) { + Add-SummaryLine("- ``11-verdict-current-speed``: skipped") + Add-SummaryLine(" - Reason: no candidate current-speed report was produced this run") + } else { + # Hard gate only for medians (Repeat >= 2): a single run is too noisy + # to gate against a median baseline, so Repeat 1 runs the verdict as + # advisory (gate disabled) — it prints the table but never fails the + # run. Use -Repeat 5 for the hard gate. The hard gate metric is + # average latency; peakHeapMb is advisory inside the tool. When the + # gate is on, BenchmarkVerdictTool exits non-zero on a regression, + # which makes Invoke-LoggedCommand throw and fail the whole run. + $verdictProperties = @() + if ($Repeat -le 1) { + $verdictProperties += "-Dgraphcompose.benchmark.verdict.gate=false" + Add-SummaryLine("- ``11-verdict-current-speed``: advisory (single run; use -Repeat 5 for the hard gate)") + } + Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -SystemProperties $verdictProperties -Arguments @($verdictBaseline, $verdictCandidate) + } + } else { + Add-SummaryLine("- ``11-verdict-current-speed``: skipped") + Add-SummaryLine(" - Reason: ``-SkipVerdict`` was provided") + } + Write-Section "Benchmark run completed" Write-Host "Summary: $summaryPath" -ForegroundColor Green Write-Host "Benchmarks: $(Join-Path $repoRoot 'target\benchmarks')" -ForegroundColor Green diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java index 01349737..0da49991 100644 --- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java +++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java @@ -819,7 +819,23 @@ private static List wrapParagraph(List logicalLines, List tokens = tokenize(logicalLine); String currentPrefix = initialPrefix; - String currentLine = initialPrefix; + // currentLine is assembled in a reused StringBuilder: appending a + // token is amortised O(1), whereas concatenating Strings re-copied + // the whole growing line on every token (O(chars^2) char copies plus + // a fresh throwaway String each step). The character sequence is + // identical to the old `+` assembly, so wrapping stays byte-for-byte + // the same; we only materialise a String via toString() when a line + // is emitted (which the result list needs anyway). + StringBuilder currentLine = new StringBuilder(initialPrefix); + // Running width of currentLine. The greedy fit only needs the width + // of the line built so far plus the next token, not a fresh + // measurement of the whole growing prefix on every token (which made + // wrapping O(chars per line x tokens) measured characters). PDFBox + // glyph advances are additive here (no kerning), so accumulating + // per-token widths matches measuring the full string to well within + // the EPS the fit test already tolerates; each new line re-measures + // its (short) start to pin any floating-point drift. + double currentWidth = measurement.textWidth(style, initialPrefix); boolean hasContent = false; for (String token : tokens) { @@ -828,22 +844,28 @@ private static List wrapParagraph(List logicalLines, continue; } - String candidate = currentLine + nextToken; - if (!hasContent || measurement.textWidth(style, candidate) <= maxWidth + EPS) { - currentLine = candidate; + double nextTokenWidth = measurement.textWidth(style, nextToken); + if (!hasContent || currentWidth + nextTokenWidth <= maxWidth + EPS) { + currentLine.append(nextToken); + currentWidth += nextTokenWidth; hasContent = true; continue; } - result.add(trimTrailingSpaces(currentLine)); + result.add(trimTrailingSpaces(currentLine.toString())); currentPrefix = continuationPrefix; - currentLine = continuationPrefix; + currentLine.setLength(0); + currentLine.append(continuationPrefix); + currentWidth = measurement.textWidth(style, continuationPrefix); hasContent = false; double availableWidth = availableWidthForPrefix(maxWidth, currentPrefix, style, measurement); String strippedToken = nextToken.stripLeading(); - if (measurement.textWidth(style, currentPrefix + strippedToken) <= maxWidth + EPS) { - currentLine = currentPrefix + strippedToken; + double strippedTokenWidth = measurement.textWidth(style, strippedToken); + if (currentWidth + strippedTokenWidth <= maxWidth + EPS) { + currentLine.setLength(0); + currentLine.append(currentPrefix).append(strippedToken); + currentWidth += strippedTokenWidth; hasContent = true; continue; } @@ -857,11 +879,13 @@ private static List wrapParagraph(List logicalLines, result.add(currentPrefix + chunks.get(index)); currentPrefix = continuationPrefix; } - currentLine = currentPrefix + chunks.get(chunks.size() - 1); + currentLine.setLength(0); + currentLine.append(currentPrefix).append(chunks.get(chunks.size() - 1)); + currentWidth = measurement.textWidth(style, currentLine.toString()); hasContent = true; } - result.add(trimTrailingSpaces(currentLine)); + result.add(trimTrailingSpaces(currentLine.toString())); } return List.copyOf(result); @@ -1503,13 +1527,23 @@ private static int fitCharacters(String text, TextStyle style, double maxWidth, TextMeasurementSystem measurement) { + // Largest prefix length whose width fits. The fit predicate + // width(substring(0,n)) <= maxWidth is monotonic in n (each added char + // contributes a non-negative glyph advance), so the fitting lengths form + // a prefix [1..lastFitting] and a binary search finds the SAME boundary + // as the old linear scan — but in O(log n) width calls instead of + // measuring every growing prefix (which was O(n) calls and O(n^2) + // measured characters for a long unbreakable token). int lastFitting = 0; - for (int index = 1; index <= text.length(); index++) { - String candidate = text.substring(0, index); - if (measurement.textWidth(style, candidate) <= maxWidth + EPS) { - lastFitting = index; + int low = 1; + int high = text.length(); + while (low <= high) { + int mid = (low + high) >>> 1; + if (measurement.textWidth(style, text.substring(0, mid)) <= maxWidth + EPS) { + lastFitting = mid; + low = mid + 1; } else { - break; + high = mid - 1; } } return lastFitting == 0 ? Math.min(1, text.length()) : lastFitting;