From b8267845df1f7adb4cb5d645639e6076585fa5f5 Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 12:24:23 +0100
Subject: [PATCH 1/7] perf(bench): add current-speed verdict gate +
 measurement/allocation probe

BenchmarkVerdictTool classifies a current-speed run vs the committed baseline (improved/neutral/regressed) and exits non-zero on a regression beyond the noise band. MeasurementCountBenchmark + CountingTextMeasurementSystem capture deterministic textWidth call counts and per-compile allocation bytes (ThreadMXBean) for proving algorithmic/allocation changes. run-benchmarks.ps1 gains the 11-verdict-current-speed gate step (skippable via -SkipVerdict). Adds baselines/current-speed-full.json (full-profile median). Benchmark-module only; not part of the published library.
---
 baselines/current-speed-full.json             |  88 +++++
 .../demcha/compose/BenchmarkVerdictTool.java  | 354 ++++++++++++++++++
 .../CountingTextMeasurementSystem.java        | 152 ++++++++
 .../compose/MeasurementCountBenchmark.java    | 257 +++++++++++++
 .../compose/BenchmarkVerdictToolTest.java     | 146 ++++++++
 .../CountingTextMeasurementSystemTest.java    |  81 ++++
 scripts/run-benchmarks.ps1                    |  31 ++
 7 files changed, 1109 insertions(+)
 create mode 100644 baselines/current-speed-full.json
 create mode 100644 benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java
 create mode 100644 benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java
 create mode 100644 benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java
 create mode 100644 benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java
 create mode 100644 benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java

diff --git a/baselines/current-speed-full.json b/baselines/current-speed-full.json
new file mode 100644
index 00000000..d5e81180
--- /dev/null
+++ b/baselines/current-speed-full.json
@@ -0,0 +1,88 @@
+{
+  "timestamp" : "2026-06-08 12:07:23",
+  "profile" : "full",
+  "warmupIterations" : 12,
+  "measurementIterations" : 40,
+  "docsPerThread" : 12,
+  "threadCounts" : [ 1, 2, 4, 8 ],
+  "latency" : [ {
+    "scenario" : "cv-template",
+    "description" : "Compose-first CV template",
+    "avgMillis" : 4.28,
+    "p50Millis" : 3.93,
+    "p95Millis" : 5.83,
+    "maxMillis" : 7.15,
+    "docsPerSecond" : 233.52,
+    "avgKilobytes" : 2.29,
+    "peakHeapMb" : 33.08
+  }, {
+    "scenario" : "engine-simple",
+    "description" : "One-page engine composition",
+    "avgMillis" : 3.17,
+    "p50Millis" : 2.96,
+    "p95Millis" : 5.01,
+    "maxMillis" : 5.9,
+    "docsPerSecond" : 315.87,
+    "avgKilobytes" : 1.08,
+    "peakHeapMb" : 12.0
+  }, {
+    "scenario" : "feature-rich",
+    "description" : "QR, barcode, watermark, header/footer, page break",
+    "avgMillis" : 45.37,
+    "p50Millis" : 37.09,
+    "p95Millis" : 60.65,
+    "maxMillis" : 69.62,
+    "docsPerSecond" : 22.04,
+    "avgKilobytes" : 6.37,
+    "peakHeapMb" : 86.14
+  }, {
+    "scenario" : "invoice-template",
+    "description" : "Compose-first invoice template",
+    "avgMillis" : 19.42,
+    "p50Millis" : 18.75,
+    "p95Millis" : 27.88,
+    "maxMillis" : 34.26,
+    "docsPerSecond" : 51.5,
+    "avgKilobytes" : 9.72,
+    "peakHeapMb" : 85.09
+  }, {
+    "scenario" : "proposal-template",
+    "description" : "Long multi-page proposal template",
+    "avgMillis" : 14.41,
+    "p50Millis" : 13.71,
+    "p95Millis" : 19.18,
+    "maxMillis" : 19.93,
+    "docsPerSecond" : 69.38,
+    "avgKilobytes" : 7.72,
+    "peakHeapMb" : 97.52
+  } ],
+  "throughput" : [ {
+    "scenario" : "invoice-template",
+    "threads" : 1,
+    "totalDocs" : 12,
+    "docsPerSecond" : 81.22,
+    "avgMillisPerDoc" : 12.31
+  }, {
+    "scenario" : "invoice-template",
+    "threads" : 2,
+    "totalDocs" : 24,
+    "docsPerSecond" : 158.68,
+    "avgMillisPerDoc" : 6.3
+  }, {
+    "scenario" : "invoice-template",
+    "threads" : 4,
+    "totalDocs" : 48,
+    "docsPerSecond" : 265.11,
+    "avgMillisPerDoc" : 3.77
+  }, {
+    "scenario" : "invoice-template",
+    "threads" : 8,
+    "totalDocs" : 96,
+    "docsPerSecond" : 356.61,
+    "avgMillisPerDoc" : 2.8
+  } ],
+  "totalBytes" : 2905520,
+  "aggregation" : "median",
+  "sourceCount" : 7,
+  "sourceRuns" : [ "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120624.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120635.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120645.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120655.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120704.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120713.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120722.json" ]
+}
\ No newline at end of file
diff --git a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java
new file mode 100644
index 00000000..0817baf1
--- /dev/null
+++ b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java
@@ -0,0 +1,354 @@
+package com.demcha.compose;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Compares a candidate {@code current-speed} benchmark report against a
+ * committed baseline and emits a per-scenario verdict
+ * ({@code IMPROVED} / {@code NEUTRAL} / {@code REGRESSED}).
+ *
+ * <p>This is the regression gate of the per-change performance workflow
+ * described in {@code docs/operations/perf-change-workflow.md}. Unlike
+ * {@link BenchmarkDiffTool}, which only prints signed deltas between two
+ * arbitrary runs, this tool classifies each delta against a noise band and
+ * fails the build (non-zero exit) when any scenario regresses beyond the band
+ * on a <em>gate metric</em> (average latency or peak heap). It is meant to be
+ * pointed at a stable, committed baseline (see {@code baselines/}) rather than
+ * at the previous ephemeral run under {@code target/}.</p>
+ *
+ * <p>Usage:</p>
+ * <ul>
+ *     <li>{@code java ... BenchmarkVerdictTool <baseline.json> <candidate.json>}</li>
+ * </ul>
+ *
+ * <p>Both reports must share the same {@code current-speed} profile
+ * ({@code smoke} or {@code full}); a {@code smoke} report and a {@code full}
+ * report are different experiments and are rejected.</p>
+ *
+ * <p>Thresholds and gate behaviour are configurable via system properties
+ * (all percentages):</p>
+ * <ul>
+ *     <li>{@code -Dgraphcompose.benchmark.verdict.avgBandPct} (default {@code 10.0})</li>
+ *     <li>{@code -Dgraphcompose.benchmark.verdict.heapBandPct} (default {@code 15.0})</li>
+ *     <li>{@code -Dgraphcompose.benchmark.verdict.gate} (default {@code true})</li>
+ * </ul>
+ *
+ * <p>Exit codes: {@code 0} when the gate passes (or is disabled), {@code 1}
+ * when the gate is enabled and at least one scenario regressed, {@code 2} on
+ * usage or profile-compatibility errors.</p>
+ *
+ * @author Artem Demchyshyn
+ */
+public final class BenchmarkVerdictTool {
+
+    private static final ObjectMapper JSON = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT);
+    private static final DateTimeFormatter TIMESTAMP_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+
+    private static final String AVG_BAND_PROPERTY = "graphcompose.benchmark.verdict.avgBandPct";
+    private static final String HEAP_BAND_PROPERTY = "graphcompose.benchmark.verdict.heapBandPct";
+    private static final String GATE_PROPERTY = "graphcompose.benchmark.verdict.gate";
+
+    private static final double DEFAULT_AVG_BAND_PCT = 10.0;
+    private static final double DEFAULT_HEAP_BAND_PCT = 15.0;
+
+    private BenchmarkVerdictTool() {
+    }
+
+    /**
+     * CLI entry point. Reads the baseline and candidate reports, prints the
+     * verdict table, writes JSON/CSV verdict artifacts under
+     * {@code target/benchmarks/verdicts/current-speed/}, and exits non-zero
+     * when the regression gate is enabled and at least one scenario regressed.
+     *
+     * @param args {@code <baseline.json> <candidate.json>}
+     * @throws Exception if a report cannot be read or written
+     */
+    public static void main(String[] args) throws Exception {
+        BenchmarkSupport.configureQuietLogging();
+        if (args.length != 2) {
+            System.err.println("""
+                    Usage:
+                      java ... com.demcha.compose.BenchmarkVerdictTool <baseline.json> <candidate.json>
+                    """);
+            System.exit(2);
+            return;
+        }
+
+        Path baselinePath = Path.of(args[0]);
+        Path candidatePath = Path.of(args[1]);
+        JsonNode baseline = JSON.readTree(Files.readAllBytes(baselinePath));
+        JsonNode candidate = JSON.readTree(Files.readAllBytes(candidatePath));
+
+        if (!isCurrentSpeed(baseline) || !isCurrentSpeed(candidate)) {
+            System.err.println("BenchmarkVerdictTool only supports current-speed reports (latency + throughput).");
+            System.exit(2);
+            return;
+        }
+
+        String baselineProfile = baseline.path("profile").asText("");
+        String candidateProfile = candidate.path("profile").asText("");
+        if (!baselineProfile.equals(candidateProfile)) {
+            System.err.println("Profiles do not match: baseline='" + baselineProfile
+                    + "', candidate='" + candidateProfile + "'. Compare runs from the same profile only.");
+            System.exit(2);
+            return;
+        }
+
+        Thresholds thresholds = Thresholds.fromSystemProperties();
+        VerdictReport report = evaluate(baselinePath.toString(), candidatePath.toString(), baseline, candidate, thresholds);
+
+        print(report);
+        write(report);
+
+        if (thresholds.gateEnabled() && report.regressed()) {
+            System.out.println();
+            System.out.println("PERFORMANCE GATE FAILED: at least one scenario regressed beyond the noise band.");
+            System.exit(1);
+        }
+    }
+
+    /**
+     * Pure, side-effect-free evaluation core used by both {@link #main(String[])}
+     * and the unit test. Computes the per-scenario verdict for every scenario
+     * present in both reports and the overall verdict.
+     *
+     * @param baselinePath  display path of the baseline report
+     * @param candidatePath display path of the candidate report
+     * @param baseline      parsed baseline current-speed report
+     * @param candidate     parsed candidate current-speed report
+     * @param thresholds    noise bands and gate flag
+     * @return the computed verdict report
+     */
+    static VerdictReport evaluate(String baselinePath,
+                                  String candidatePath,
+                                  JsonNode baseline,
+                                  JsonNode candidate,
+                                  Thresholds thresholds) {
+        Map<String, JsonNode> baselineByScenario = indexBy(baseline.path("latency"));
+        Map<String, JsonNode> candidateByScenario = indexBy(candidate.path("latency"));
+
+        List<ScenarioVerdict> scenarios = new ArrayList<>();
+        List<String> missingScenarios = new ArrayList<>();
+        boolean anyRegressed = false;
+        boolean anyImproved = false;
+
+        for (Map.Entry<String, JsonNode> entry : baselineByScenario.entrySet()) {
+            String scenario = entry.getKey();
+            JsonNode before = entry.getValue();
+            JsonNode after = candidateByScenario.get(scenario);
+            if (after == null) {
+                missingScenarios.add(scenario);
+                continue;
+            }
+
+            double baselineAvg = before.path("avgMillis").asDouble();
+            double candidateAvg = after.path("avgMillis").asDouble();
+            double avgDeltaPct = percentDelta(baselineAvg, candidateAvg);
+            double p95DeltaPct = percentDelta(before.path("p95Millis").asDouble(), after.path("p95Millis").asDouble());
+            double docsDeltaPct = percentDelta(before.path("docsPerSecond").asDouble(), after.path("docsPerSecond").asDouble());
+            double baselineHeap = before.path("peakHeapMb").asDouble();
+            double candidateHeap = after.path("peakHeapMb").asDouble();
+            double heapDeltaPct = percentDelta(baselineHeap, candidateHeap);
+
+            // Gate metrics: average latency and peak heap (both lower-is-better).
+            Verdict verdict;
+            if (avgDeltaPct > thresholds.avgBandPct() || heapDeltaPct > thresholds.heapBandPct()) {
+                verdict = Verdict.REGRESSED;
+                anyRegressed = true;
+            } else if (avgDeltaPct < -thresholds.avgBandPct()) {
+                verdict = Verdict.IMPROVED;
+                anyImproved = true;
+            } else {
+                verdict = Verdict.NEUTRAL;
+            }
+
+            scenarios.add(new ScenarioVerdict(
+                    scenario,
+                    before.path("description").asText(after.path("description").asText("")),
+                    baselineAvg,
+                    candidateAvg,
+                    avgDeltaPct,
+                    p95DeltaPct,
+                    docsDeltaPct,
+                    baselineHeap,
+                    candidateHeap,
+                    heapDeltaPct,
+                    verdict.name()));
+        }
+
+        Verdict overall = anyRegressed
+                ? Verdict.REGRESSED
+                : (anyImproved ? Verdict.IMPROVED : Verdict.NEUTRAL);
+
+        return new VerdictReport(
+                baselinePath,
+                candidatePath,
+                candidate.path("profile").asText(""),
+                baseline.path("timestamp").asText(""),
+                candidate.path("timestamp").asText(""),
+                thresholds.avgBandPct(),
+                thresholds.heapBandPct(),
+                thresholds.gateEnabled(),
+                overall.name(),
+                anyRegressed,
+                scenarios,
+                missingScenarios);
+    }
+
+    private static void print(VerdictReport report) {
+        System.out.println("Benchmark verdict (vs committed baseline)");
+        System.out.println("Timestamp: " + LocalDateTime.now().format(TIMESTAMP_FORMAT));
+        System.out.println("Profile: " + report.profile());
+        System.out.println("Baseline: " + report.baselinePath() + " (" + report.baselineTimestamp() + ")");
+        System.out.println("Candidate: " + report.candidatePath() + " (" + report.candidateTimestamp() + ")");
+        System.out.println("Bands: avg +/-" + format(report.avgBandPct()) + "%, peakHeap +/-"
+                + format(report.heapBandPct()) + "% | gate: " + (report.gateEnabled() ? "enabled" : "disabled"));
+        System.out.println();
+        System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n",
+                "Scenario", "Avg pct", "p95 pct", "Docs/s pct", "Heap pct", "Verdict");
+        System.out.println("-".repeat(82));
+        for (ScenarioVerdict row : report.scenarios()) {
+            System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n",
+                    row.scenario(),
+                    signedPercent(row.avgDeltaPct()),
+                    signedPercent(row.p95DeltaPct()),
+                    signedPercent(row.docsPerSecondDeltaPct()),
+                    signedPercent(row.peakHeapDeltaPct()),
+                    row.verdict());
+        }
+        if (!report.missingScenarios().isEmpty()) {
+            System.out.println();
+            System.out.println("WARNING: baseline scenarios missing from candidate (not gated): "
+                    + String.join(", ", report.missingScenarios()));
+        }
+        System.out.println();
+        System.out.println("Overall verdict: " + report.overallVerdict());
+    }
+
+    private static void write(VerdictReport report) throws Exception {
+        BenchmarkReportWriter.BenchmarkArtifacts artifacts = BenchmarkReportWriter.prepare("verdicts/current-speed");
+        Path jsonPath = artifacts.writeJson(report);
+        Path csvPath = artifacts.writeCsv(
+                "verdict",
+                List.of("scenario", "baseline_avg_ms", "candidate_avg_ms", "avg_delta_pct",
+                        "p95_delta_pct", "docs_per_sec_delta_pct",
+                        "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct", "verdict"),
+                report.scenarios().stream()
+                        .map(row -> List.of(
+                                row.scenario(),
+                                format(row.baselineAvgMs()),
+                                format(row.candidateAvgMs()),
+                                format(row.avgDeltaPct()),
+                                format(row.p95DeltaPct()),
+                                format(row.docsPerSecondDeltaPct()),
+                                format(row.baselinePeakHeapMb()),
+                                format(row.candidatePeakHeapMb()),
+                                format(row.peakHeapDeltaPct()),
+                                row.verdict()))
+                        .toList());
+        System.out.println("Saved JSON verdict report to " + jsonPath);
+        System.out.println("Saved CSV verdict report to " + csvPath);
+    }
+
+    private static boolean isCurrentSpeed(JsonNode node) {
+        return node.has("latency") && node.has("throughput");
+    }
+
+    private static Map<String, JsonNode> indexBy(JsonNode latencyArray) {
+        Map<String, JsonNode> result = new TreeMap<>();
+        latencyArray.forEach(item -> result.put(item.path("scenario").asText(), item));
+        return result;
+    }
+
+    private static double percentDelta(double baseline, double candidate) {
+        if (Double.compare(baseline, 0.0) == 0) {
+            return candidate == 0.0 ? 0.0 : 100.0;
+        }
+        return ((candidate - baseline) / baseline) * 100.0;
+    }
+
+    private static String signedPercent(double value) {
+        return "%+.2f%%".formatted(value);
+    }
+
+    private static String format(double value) {
+        return "%.2f".formatted(value);
+    }
+
+    /**
+     * Noise bands (percent) and the gate flag for a verdict evaluation.
+     *
+     * @param avgBandPct  band for average latency; a candidate slower than this
+     *                    fraction of the baseline regresses
+     * @param heapBandPct band for peak heap delta
+     * @param gateEnabled whether a regression should fail the build (non-zero exit)
+     */
+    record Thresholds(double avgBandPct, double heapBandPct, boolean gateEnabled) {
+
+        static Thresholds fromSystemProperties() {
+            return new Thresholds(
+                    doubleProperty(AVG_BAND_PROPERTY, DEFAULT_AVG_BAND_PCT),
+                    doubleProperty(HEAP_BAND_PROPERTY, DEFAULT_HEAP_BAND_PCT),
+                    Boolean.parseBoolean(System.getProperty(GATE_PROPERTY, "true")));
+        }
+
+        private static double doubleProperty(String key, double fallback) {
+            String raw = System.getProperty(key);
+            if (raw == null || raw.isBlank()) {
+                return fallback;
+            }
+            try {
+                return Double.parseDouble(raw.trim());
+            } catch (NumberFormatException ex) {
+                return fallback;
+            }
+        }
+    }
+
+    /** Verdict classification for one scenario or for the report as a whole. */
+    enum Verdict {
+        IMPROVED,
+        NEUTRAL,
+        REGRESSED
+    }
+
+    /** Per-scenario verdict row. */
+    record ScenarioVerdict(String scenario,
+                           String description,
+                           double baselineAvgMs,
+                           double candidateAvgMs,
+                           double avgDeltaPct,
+                           double p95DeltaPct,
+                           double docsPerSecondDeltaPct,
+                           double baselinePeakHeapMb,
+                           double candidatePeakHeapMb,
+                           double peakHeapDeltaPct,
+                           String verdict) {
+    }
+
+    /** Full verdict report, serialized to JSON/CSV. */
+    record VerdictReport(String baselinePath,
+                         String candidatePath,
+                         String profile,
+                         String baselineTimestamp,
+                         String candidateTimestamp,
+                         double avgBandPct,
+                         double heapBandPct,
+                         boolean gateEnabled,
+                         String overallVerdict,
+                         boolean regressed,
+                         List<ScenarioVerdict> scenarios,
+                         List<String> missingScenarios) {
+    }
+}
diff --git a/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java b/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java
new file mode 100644
index 00000000..70fd665e
--- /dev/null
+++ b/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java
@@ -0,0 +1,152 @@
+package com.demcha.compose;
+
+import com.demcha.compose.engine.components.content.text.TextStyle;
+import com.demcha.compose.engine.components.geometry.ContentSize;
+import com.demcha.compose.engine.measurement.TextMeasurementSystem;
+
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * A {@link TextMeasurementSystem} decorator that forwards every call to a real
+ * delegate while counting how the layout engine asks for text measurements.
+ *
+ * <p>It exists to make the algorithmic findings of the performance audit
+ * (F1 greedy wrap re-measuring growing prefixes, F2 quadratic long-token
+ * breaking, F3 table re-measurement) <em>deterministically</em> observable.
+ * Wall-clock timing hides these under JIT/GC noise; measurement-request counts
+ * and summed argument characters do not.</p>
+ *
+ * <p>The decorator records, per pass:</p>
+ * <ul>
+ *     <li>the number of width-bearing requests ({@code textWidth} + {@code measure})</li>
+ *     <li>the number of <em>distinct</em> {@code (style, text)} requests — the
+ *         caller-side proxy for how well the delegate's width cache can hit;
+ *         a low repeat rate means the layout keeps asking for one-shot strings
+ *         (the F1/F2 smell)</li>
+ *     <li>the summed and maximum argument length in characters — the proxy for
+ *         the {@code O(chars)} work each uncached measurement performs</li>
+ *     <li>{@code lineMetrics}/{@code lineHeight} call counts (style-only, no text)</li>
+ * </ul>
+ *
+ * <p>Not thread-safe: drive it from a single layout pass, like the real
+ * measurement system.</p>
+ *
+ * @author Artem Demchyshyn
+ */
+public final class CountingTextMeasurementSystem implements TextMeasurementSystem {
+
+    private final TextMeasurementSystem delegate;
+
+    private long textWidthCalls;
+    private long measureCalls;
+    private long lineMetricsCalls;
+    private long lineHeightCalls;
+    private long summedRequestChars;
+    private long maxRequestChars;
+    private final Set<RequestKey> distinctRequests = new HashSet<>();
+
+    /**
+     * Wraps a real measurement system.
+     *
+     * @param delegate the measurement system to forward to (e.g. the session's
+     *                 {@code FontLibraryTextMeasurementSystem})
+     */
+    public CountingTextMeasurementSystem(TextMeasurementSystem delegate) {
+        this.delegate = Objects.requireNonNull(delegate, "delegate");
+    }
+
+    @Override
+    public ContentSize measure(TextStyle style, String text) {
+        measureCalls++;
+        record(style, text);
+        return delegate.measure(style, text);
+    }
+
+    @Override
+    public double textWidth(TextStyle style, String text) {
+        textWidthCalls++;
+        record(style, text);
+        return delegate.textWidth(style, text);
+    }
+
+    @Override
+    public LineMetrics lineMetrics(TextStyle style) {
+        lineMetricsCalls++;
+        return delegate.lineMetrics(style);
+    }
+
+    @Override
+    public double lineHeight(TextStyle style) {
+        lineHeightCalls++;
+        return delegate.lineHeight(style);
+    }
+
+    @Override
+    public void clearCaches() {
+        delegate.clearCaches();
+    }
+
+    private void record(TextStyle style, String text) {
+        String safe = text == null ? "" : text;
+        int length = safe.length();
+        summedRequestChars += length;
+        if (length > maxRequestChars) {
+            maxRequestChars = length;
+        }
+        distinctRequests.add(new RequestKey(style, safe));
+    }
+
+    /**
+     * Captures the counts accumulated so far.
+     *
+     * @return an immutable snapshot of the measurement-request counters
+     */
+    public Counts snapshot() {
+        long widthRequests = textWidthCalls + measureCalls;
+        long distinct = distinctRequests.size();
+        double repeatRatePct = widthRequests == 0
+                ? 0.0
+                : (1.0 - ((double) distinct / (double) widthRequests)) * 100.0;
+        return new Counts(
+                textWidthCalls,
+                measureCalls,
+                widthRequests,
+                distinct,
+                repeatRatePct,
+                summedRequestChars,
+                maxRequestChars,
+                lineMetricsCalls,
+                lineHeightCalls);
+    }
+
+    /**
+     * Immutable snapshot of measurement-request counters.
+     *
+     * @param textWidthCalls         direct {@code textWidth(style, text)} calls
+     * @param measureCalls           {@code measure(style, text)} calls
+     * @param widthRequests          {@code textWidthCalls + measureCalls}
+     * @param distinctWidthRequests  distinct {@code (style, text)} requests
+     * @param repeatRatePct          {@code (1 - distinct/total) * 100}; higher
+     *                               means more cache-friendly (fewer one-shot
+     *                               strings)
+     * @param summedRequestChars     total characters across all width requests
+     * @param maxRequestChars        longest single argument measured
+     * @param lineMetricsCalls       {@code lineMetrics(style)} calls
+     * @param lineHeightCalls        {@code lineHeight(style)} calls
+     */
+    public record Counts(long textWidthCalls,
+                         long measureCalls,
+                         long widthRequests,
+                         long distinctWidthRequests,
+                         double repeatRatePct,
+                         long summedRequestChars,
+                         long maxRequestChars,
+                         long lineMetricsCalls,
+                         long lineHeightCalls) {
+    }
+
+    private record RequestKey(TextStyle style, String text) {
+    }
+}
diff --git a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java
new file mode 100644
index 00000000..82e403f9
--- /dev/null
+++ b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java
@@ -0,0 +1,257 @@
+package com.demcha.compose;
+
+import com.demcha.compose.document.api.DocumentPageSize;
+import com.demcha.compose.document.api.DocumentSession;
+import com.demcha.compose.document.backend.fixed.pdf.PdfMeasurementResources;
+import com.demcha.compose.document.dsl.PageFlowBuilder;
+import com.demcha.compose.document.layout.DocumentGraph;
+import com.demcha.compose.document.layout.DocumentLayoutPassContext;
+import com.demcha.compose.document.layout.LayoutCanvas;
+import com.demcha.compose.document.layout.LayoutCompiler;
+import com.demcha.compose.document.layout.LayoutGraph;
+import com.demcha.compose.document.layout.NodeRegistry;
+import com.demcha.compose.document.node.DocumentNode;
+import com.demcha.compose.document.style.DocumentColor;
+import com.demcha.compose.document.style.DocumentTextDecoration;
+import com.demcha.compose.document.style.DocumentTextStyle;
+
+import java.awt.Color;
+import java.lang.management.ManagementFactory;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Consumer;
+
+/**
+ * Deterministic measurement-count and allocation probe for the canonical layout
+ * pipeline.
+ *
+ * <p>For each scenario this harness authors a document through the public DSL,
+ * then compiles its node graph through a {@link LayoutCompiler} whose
+ * {@code TextMeasurementSystem} is wrapped in a
+ * {@link CountingTextMeasurementSystem}. It reports, deterministically and
+ * independent of wall-clock / GC-timing noise:</p>
+ *
+ * <ul>
+ *     <li><b>measurement requests</b> — how the layout asks the measurement
+ *         system for widths (proves F1/F2/F3); and</li>
+ *     <li><b>compile allocation bytes</b> — bytes allocated by the layout
+ *         {@code compile} pass, via
+ *         {@link com.sun.management.ThreadMXBean#getCurrentThreadAllocatedBytes()}.
+ *         Unlike the {@code peakHeapMb} sampled by {@code CurrentSpeedBenchmark}
+ *         (a GC-timing-dependent used-heap delta), allocated-bytes is the
+ *         deterministic memory signal for the allocation findings (F7 style/inset
+ *         churn, F8 box recomputation, fragment re-copy, per-cell table lists).</li>
+ * </ul>
+ *
+ * <p>The allocation window wraps only {@code compile(...)}; font loading and DSL
+ * authoring happen outside it, so the number reflects layout allocation — the
+ * thing the optimizations move. Needs no {@code src/main} changes.</p>
+ */
+public final class MeasurementCountBenchmark {
+
+    private static final DateTimeFormatter TIMESTAMP_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+
+    private static final com.sun.management.ThreadMXBean THREAD_MX =
+            (com.sun.management.ThreadMXBean) ManagementFactory.getThreadMXBean();
+
+    private static final DocumentTextStyle BODY_STYLE = DocumentTextStyle.builder()
+            .size(9.5)
+            .decoration(DocumentTextDecoration.DEFAULT)
+            .color(DocumentColor.of(new Color(58, 69, 84)))
+            .build();
+
+    private static final String LONG_PARAGRAPH =
+            ("GraphCompose lays out structured business documents efficiently across many pages "
+                    + "while keeping header and footer placement stable. ").repeat(120);
+
+    private static final String LONG_TOKEN_PARAGRAPH =
+            "Prefix text before an unbreakable token " + "x".repeat(600)
+                    + " and several trailing words that must still wrap onto the following lines here.";
+
+    public static void main(String[] args) throws Exception {
+        BenchmarkSupport.configureQuietLogging();
+        new MeasurementCountBenchmark().run();
+    }
+
+    private void run() throws Exception {
+        enableAllocationMeasurement();
+
+        System.out.println("GraphCompose Measurement-Count + Allocation Probe");
+        System.out.println("Timestamp: " + LocalDateTime.now().format(TIMESTAMP_FORMAT));
+        System.out.println("Thread allocation measurement: " + (allocationSupported() ? "enabled" : "UNAVAILABLE (Alloc KB = n/a)"));
+        System.out.println();
+
+        List<Result> results = new ArrayList<>();
+        results.add(measureScenario("long-text", flow ->
+                flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE))));
+        results.add(measureScenario("long-token", flow ->
+                flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE))));
+        results.add(measureScenario("large-table", MeasurementCountBenchmark::authorLargeTable));
+
+        System.out.printf("%-14s | %11s | %9s | %9s | %11s | %8s | %11s | %10s | %6s%n",
+                "Scenario", "WidthReqs", "Distinct", "Repeat %", "Sum chars", "Max arg", "LineMetrics", "Alloc KB", "Pages");
+        System.out.println("-".repeat(108));
+        for (Result result : results) {
+            CountingTextMeasurementSystem.Counts c = result.counts();
+            System.out.printf("%-14s | %11d | %9d | %8.1f%% | %11d | %8d | %11d | %10s | %6d%n",
+                    result.scenario(),
+                    c.widthRequests(),
+                    c.distinctWidthRequests(),
+                    c.repeatRatePct(),
+                    c.summedRequestChars(),
+                    c.maxRequestChars(),
+                    c.lineMetricsCalls(),
+                    formatAllocKb(result.compileAllocBytes()),
+                    result.pages());
+        }
+
+        writeReport(results);
+    }
+
+    private Result measureScenario(String scenario, Consumer<PageFlowBuilder> author) throws Exception {
+        try (DocumentSession session = GraphCompose.document()
+                .pageSize(DocumentPageSize.A4)
+                .margin(24, 24, 24, 24)
+                .create()) {
+            session.pageFlow(author);
+            List<DocumentNode> roots = session.roots();
+            LayoutCanvas canvas = session.canvas();
+            NodeRegistry registry = session.registry();
+
+            try (PdfMeasurementResources resources = PdfMeasurementResources.open(List.of())) {
+                CountingTextMeasurementSystem counter =
+                        new CountingTextMeasurementSystem(resources.textMeasurementSystem());
+                DocumentLayoutPassContext context = new DocumentLayoutPassContext(
+                        registry, canvas, resources.fontLibrary(), counter, false);
+                LayoutCompiler compiler = new LayoutCompiler(registry);
+                DocumentGraph graph = new DocumentGraph(roots);
+
+                // Measure allocation around the layout compile only — font
+                // loading and authoring are already done, so this is the
+                // layout pass's own allocation footprint.
+                long allocBefore = currentThreadAllocatedBytes();
+                LayoutGraph layout = compiler.compile(graph, context, context);
+                long allocBytes = allocBefore < 0 ? -1 : currentThreadAllocatedBytes() - allocBefore;
+
+                return new Result(scenario, counter.snapshot(), layout.totalPages(), layout.fragments().size(), allocBytes);
+            }
+        }
+    }
+
+    private static void authorLargeTable(PageFlowBuilder flow) {
+        flow.addTable(table -> {
+            table.autoColumns(6).header("Item", "Qty", "Unit", "Price", "Tax", "Total");
+            for (int row = 1; row <= 200; row++) {
+                table.row("Line item " + row, "3", "ea", "12.50", "1.25", "38.75");
+            }
+        });
+    }
+
+    private static void enableAllocationMeasurement() {
+        try {
+            if (THREAD_MX.isThreadAllocatedMemorySupported() && !THREAD_MX.isThreadAllocatedMemoryEnabled()) {
+                THREAD_MX.setThreadAllocatedMemoryEnabled(true);
+            }
+        } catch (UnsupportedOperationException ignored) {
+            // Allocation measurement unsupported on this JVM; Alloc KB reports n/a.
+        }
+    }
+
+    private static boolean allocationSupported() {
+        try {
+            return THREAD_MX.isThreadAllocatedMemorySupported() && THREAD_MX.isThreadAllocatedMemoryEnabled();
+        } catch (UnsupportedOperationException ex) {
+            return false;
+        }
+    }
+
+    private static long currentThreadAllocatedBytes() {
+        if (!allocationSupported()) {
+            return -1;
+        }
+        return THREAD_MX.getCurrentThreadAllocatedBytes();
+    }
+
+    private static String formatAllocKb(long bytes) {
+        return bytes < 0 ? "n/a" : "%.1f".formatted(bytes / 1024.0);
+    }
+
+    private void writeReport(List<Result> results) throws Exception {
+        CounterReport report = new CounterReport(
+                LocalDateTime.now().format(TIMESTAMP_FORMAT),
+                results.stream().map(Result::toScenarioCounts).toList());
+
+        BenchmarkReportWriter.BenchmarkArtifacts artifacts = BenchmarkReportWriter.prepare("counters");
+        var jsonPath = artifacts.writeJson(report);
+        var csvPath = artifacts.writeCsv(
+                "counters",
+                List.of("scenario", "width_requests", "distinct_width_requests", "repeat_rate_pct",
+                        "summed_request_chars", "max_request_chars", "text_width_calls", "measure_calls",
+                        "line_metrics_calls", "compile_alloc_bytes", "pages", "fragments"),
+                results.stream()
+                        .map(result -> {
+                            CountingTextMeasurementSystem.Counts c = result.counts();
+                            return List.of(
+                                    result.scenario(),
+                                    Long.toString(c.widthRequests()),
+                                    Long.toString(c.distinctWidthRequests()),
+                                    "%.2f".formatted(c.repeatRatePct()),
+                                    Long.toString(c.summedRequestChars()),
+                                    Long.toString(c.maxRequestChars()),
+                                    Long.toString(c.textWidthCalls()),
+                                    Long.toString(c.measureCalls()),
+                                    Long.toString(c.lineMetricsCalls()),
+                                    Long.toString(result.compileAllocBytes()),
+                                    Integer.toString(result.pages()),
+                                    Integer.toString(result.fragments()));
+                        })
+                        .toList());
+
+        System.out.println();
+        System.out.println("Saved JSON counter report to " + jsonPath);
+        System.out.println("Saved CSV counter report to " + csvPath);
+    }
+
+    private record Result(String scenario,
+                          CountingTextMeasurementSystem.Counts counts,
+                          int pages,
+                          int fragments,
+                          long compileAllocBytes) {
+        ScenarioCounts toScenarioCounts() {
+            return new ScenarioCounts(
+                    scenario,
+                    counts.widthRequests(),
+                    counts.distinctWidthRequests(),
+                    counts.repeatRatePct(),
+                    counts.summedRequestChars(),
+                    counts.maxRequestChars(),
+                    counts.textWidthCalls(),
+                    counts.measureCalls(),
+                    counts.lineMetricsCalls(),
+                    counts.lineHeightCalls(),
+                    compileAllocBytes,
+                    pages,
+                    fragments);
+        }
+    }
+
+    private record ScenarioCounts(String scenario,
+                                  long widthRequests,
+                                  long distinctWidthRequests,
+                                  double repeatRatePct,
+                                  long summedRequestChars,
+                                  long maxRequestChars,
+                                  long textWidthCalls,
+                                  long measureCalls,
+                                  long lineMetricsCalls,
+                                  long lineHeightCalls,
+                                  long compileAllocBytes,
+                                  int pages,
+                                  int fragments) {
+    }
+
+    private record CounterReport(String timestamp, List<ScenarioCounts> scenarios) {
+    }
+}
diff --git a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java
new file mode 100644
index 00000000..463f5a80
--- /dev/null
+++ b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java
@@ -0,0 +1,146 @@
+package com.demcha.compose;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Unit tests for the pure {@link BenchmarkVerdictTool#evaluate} core. These
+ * drive synthetic current-speed reports so the verdict classification and the
+ * hard-gate {@code regressed} flag are validated deterministically, without
+ * running real benchmarks or invoking {@code System.exit}.
+ */
+class BenchmarkVerdictToolTest {
+
+    private static final ObjectMapper JSON = new ObjectMapper();
+    private static final BenchmarkVerdictTool.Thresholds GATE =
+            new BenchmarkVerdictTool.Thresholds(10.0, 15.0, true);
+
+    @Test
+    void flagsAverageLatencyRegressionBeyondBand() throws Exception {
+        JsonNode baseline = report(scenario("invoice-template", 10.0, 10.0, 30.0, 100.0));
+        JsonNode candidate = report(scenario("invoice-template", 12.0, 11.0, 28.0, 100.0)); // +20% avg
+
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
+
+        assertThat(report.regressed()).isTrue();
+        assertThat(report.overallVerdict()).isEqualTo("REGRESSED");
+        assertThat(report.scenarios()).singleElement()
+                .satisfies(row -> assertThat(row.verdict()).isEqualTo("REGRESSED"));
+    }
+
+    @Test
+    void flagsPeakHeapRegressionEvenWhenLatencyIsFlat() throws Exception {
+        JsonNode baseline = report(scenario("cv-template", 10.0, 10.0, 40.0, 100.0));
+        JsonNode candidate = report(scenario("cv-template", 10.3, 10.0, 40.0, 120.0)); // +3% avg, +20% heap
+
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
+
+        assertThat(report.regressed()).isTrue();
+        assertThat(report.scenarios().get(0).verdict()).isEqualTo("REGRESSED");
+    }
+
+    @Test
+    void marksClearSpeedupAsImproved() throws Exception {
+        JsonNode baseline = report(scenario("proposal-template", 10.0, 12.0, 28.0, 150.0));
+        JsonNode candidate = report(scenario("proposal-template", 8.0, 9.0, 36.0, 150.0)); // -20% avg
+
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
+
+        assertThat(report.regressed()).isFalse();
+        assertThat(report.overallVerdict()).isEqualTo("IMPROVED");
+        assertThat(report.scenarios().get(0).verdict()).isEqualTo("IMPROVED");
+    }
+
+    @Test
+    void treatsWithinBandChangesAsNeutral() throws Exception {
+        JsonNode baseline = report(scenario("engine-simple", 5.0, 6.0, 170.0, 40.0));
+        JsonNode candidate = report(scenario("engine-simple", 5.2, 6.1, 168.0, 43.0)); // +4% avg, +7.5% heap
+
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
+
+        assertThat(report.regressed()).isFalse();
+        assertThat(report.overallVerdict()).isEqualTo("NEUTRAL");
+        assertThat(report.scenarios().get(0).verdict()).isEqualTo("NEUTRAL");
+    }
+
+    @Test
+    void overallIsRegressedWhenAnyScenarioRegresses() throws Exception {
+        JsonNode baseline = report(
+                scenario("engine-simple", 5.0, 6.0, 170.0, 40.0),
+                scenario("invoice-template", 10.0, 11.0, 28.0, 100.0));
+        JsonNode candidate = report(
+                scenario("engine-simple", 5.1, 6.1, 168.0, 41.0),   // neutral
+                scenario("invoice-template", 13.0, 14.0, 22.0, 100.0)); // +30% avg -> regressed
+
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
+
+        assertThat(report.regressed()).isTrue();
+        assertThat(report.overallVerdict()).isEqualTo("REGRESSED");
+    }
+
+    @Test
+    void reportsMissingScenariosWithoutGating() throws Exception {
+        JsonNode baseline = report(
+                scenario("engine-simple", 5.0, 6.0, 170.0, 40.0),
+                scenario("invoice-template", 10.0, 11.0, 28.0, 100.0));
+        JsonNode candidate = report(scenario("engine-simple", 5.1, 6.1, 168.0, 41.0)); // invoice dropped
+
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
+
+        assertThat(report.missingScenarios()).containsExactly("invoice-template");
+        assertThat(report.scenarios()).hasSize(1);
+        assertThat(report.regressed()).isFalse();
+    }
+
+    @Test
+    void regressedFlagReflectsStateIndependentOfGateFlag() throws Exception {
+        JsonNode baseline = report(scenario("invoice-template", 10.0, 10.0, 30.0, 100.0));
+        JsonNode candidate = report(scenario("invoice-template", 12.0, 11.0, 28.0, 100.0)); // +20% avg
+
+        BenchmarkVerdictTool.Thresholds gateOff = new BenchmarkVerdictTool.Thresholds(10.0, 15.0, false);
+        BenchmarkVerdictTool.VerdictReport report =
+                BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, gateOff);
+
+        // The state is still "regressed"; only the build-failing decision (exit code) is gated.
+        assertThat(report.regressed()).isTrue();
+        assertThat(report.gateEnabled()).isFalse();
+    }
+
+    private static JsonNode report(String... latencyRows) throws Exception {
+        String latency = String.join(",", latencyRows);
+        String json = """
+                {
+                  "timestamp": "2026-06-08 12:00:00",
+                  "profile": "full",
+                  "latency": [%s],
+                  "throughput": []
+                }
+                """.formatted(latency);
+        return JSON.readTree(json);
+    }
+
+    private static String scenario(String name, double avgMs, double p95Ms, double docsPerSec, double peakHeapMb) {
+        return """
+                {
+                  "scenario": "%s",
+                  "description": "%s",
+                  "avgMillis": %s,
+                  "p50Millis": %s,
+                  "p95Millis": %s,
+                  "maxMillis": %s,
+                  "docsPerSecond": %s,
+                  "avgKilobytes": 1.0,
+                  "peakHeapMb": %s
+                }
+                """.formatted(name, name, avgMs, avgMs, p95Ms, p95Ms, docsPerSec, peakHeapMb);
+    }
+}
diff --git a/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java b/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java
new file mode 100644
index 00000000..ebd7397c
--- /dev/null
+++ b/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java
@@ -0,0 +1,81 @@
+package com.demcha.compose;
+
+import com.demcha.compose.engine.components.content.text.TextStyle;
+import com.demcha.compose.engine.components.geometry.ContentSize;
+import com.demcha.compose.engine.measurement.TextMeasurementSystem;
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Unit tests for {@link CountingTextMeasurementSystem}. They use a trivial fake
+ * delegate (no PDFBox) so the counting/forwarding contract is verified
+ * deterministically and fast.
+ */
+class CountingTextMeasurementSystemTest {
+
+    private static final TextStyle STYLE = TextStyle.DEFAULT_STYLE;
+
+    @Test
+    void countsWidthRequestsDistinctKeysAndCharacters() {
+        CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement());
+
+        double abWidth = counter.textWidth(STYLE, "ab");
+        counter.textWidth(STYLE, "ab");   // repeat -> same key
+        counter.textWidth(STYLE, "abc");
+        counter.measure(STYLE, "ab");     // measure shares the "ab" key
+        counter.lineMetrics(STYLE);
+        counter.lineHeight(STYLE);
+
+        CountingTextMeasurementSystem.Counts counts = counter.snapshot();
+
+        assertThat(abWidth).isEqualTo(2.0); // delegate pass-through (fake width == length)
+        assertThat(counts.textWidthCalls()).isEqualTo(3);
+        assertThat(counts.measureCalls()).isEqualTo(1);
+        assertThat(counts.widthRequests()).isEqualTo(4);
+        assertThat(counts.distinctWidthRequests()).isEqualTo(2); // "ab", "abc"
+        assertThat(counts.summedRequestChars()).isEqualTo(9);    // 2 + 2 + 3 + 2
+        assertThat(counts.maxRequestChars()).isEqualTo(3);
+        assertThat(counts.repeatRatePct()).isEqualTo(50.0);      // 1 - 2/4
+        assertThat(counts.lineMetricsCalls()).isEqualTo(1);
+        assertThat(counts.lineHeightCalls()).isEqualTo(1);
+    }
+
+    @Test
+    void emptySnapshotHasNoRequests() {
+        CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement());
+
+        CountingTextMeasurementSystem.Counts counts = counter.snapshot();
+
+        assertThat(counts.widthRequests()).isZero();
+        assertThat(counts.distinctWidthRequests()).isZero();
+        assertThat(counts.repeatRatePct()).isZero();
+        assertThat(counts.summedRequestChars()).isZero();
+    }
+
+    @Test
+    void treatsNullTextAsEmptyWithoutFailing() {
+        CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement());
+
+        counter.textWidth(STYLE, null);
+
+        CountingTextMeasurementSystem.Counts counts = counter.snapshot();
+        assertThat(counts.widthRequests()).isEqualTo(1);
+        assertThat(counts.summedRequestChars()).isZero();
+        assertThat(counts.distinctWidthRequests()).isEqualTo(1);
+    }
+
+    /** Minimal delegate: width == text length, fixed line metrics. */
+    private static final class FakeMeasurement implements TextMeasurementSystem {
+        @Override
+        public ContentSize measure(TextStyle style, String text) {
+            int length = text == null ? 0 : text.length();
+            return new ContentSize(length, 10.0);
+        }
+
+        @Override
+        public LineMetrics lineMetrics(TextStyle style) {
+            return new LineMetrics(8.0, 2.0, 0.0);
+        }
+    }
+}
diff --git a/scripts/run-benchmarks.ps1 b/scripts/run-benchmarks.ps1
index f816915e..4126ade5 100644
--- a/scripts/run-benchmarks.ps1
+++ b/scripts/run-benchmarks.ps1
@@ -14,11 +14,17 @@ diff gracefully when no compatible historical pair exists yet.
 
 Use `-Repeat` to generate repeated current-speed/comparative runs and median
 aggregates for more stable local comparisons.
+
+Step 11 (`11-verdict-current-speed`) compares the current-speed result against
+the committed baseline (`baselines/current-speed-<profile>.json`) and fails the
+run when a canonical scenario regresses beyond the noise band. Use `-SkipVerdict`
+to skip that gate while exploring. See `docs/operations/perf-change-workflow.md`.
 #>
 param(
     [switch]$IncludeEndurance,
     [switch]$OpenResults,
     [switch]$SkipDiff,
+    [switch]$SkipVerdict,
     [ValidateSet("full", "smoke")]
     [string]$CurrentSpeedProfile = "full",
     [ValidateRange(1, 10)]
@@ -448,6 +454,31 @@ try {
     }
     Add-SummaryLine(("- Benchmarks folder: ``{0}``" -f (Join-Path $repoRoot "target\benchmarks")))
 
+    if (-not $SkipVerdict) {
+        $verdictBaseline = Join-Path $repoRoot ("baselines\current-speed-{0}.json" -f $CurrentSpeedProfile)
+        if ($Repeat -gt 1) {
+            $verdictCandidate = Get-IfExists (Join-Path $repoRoot ("target\benchmarks\{0}\latest.json" -f $currentSpeedAggregateSuite))
+        } else {
+            $verdictCandidate = $currentSpeedLatest
+        }
+
+        if (-not (Test-Path $verdictBaseline)) {
+            Add-SummaryLine("- ``11-verdict-current-speed``: skipped")
+            Add-SummaryLine(("  - Reason: no committed baseline at ``{0}`` (see docs/operations/perf-change-workflow.md)" -f $verdictBaseline))
+        } elseif (-not $verdictCandidate) {
+            Add-SummaryLine("- ``11-verdict-current-speed``: skipped")
+            Add-SummaryLine("  - Reason: no candidate current-speed report was produced this run")
+        } else {
+            # Hard gate: BenchmarkVerdictTool exits non-zero on a regression
+            # beyond the noise band, which makes Invoke-LoggedCommand throw and
+            # fail the whole benchmark run.
+            Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -Arguments @($verdictBaseline, $verdictCandidate)
+        }
+    } else {
+        Add-SummaryLine("- ``11-verdict-current-speed``: skipped")
+        Add-SummaryLine("  - Reason: ``-SkipVerdict`` was provided")
+    }
+
     Write-Section "Benchmark run completed"
     Write-Host "Summary: $summaryPath" -ForegroundColor Green
     Write-Host "Benchmarks: $(Join-Path $repoRoot 'target\benchmarks')" -ForegroundColor Green

From d68bd96403f68ffeaa9042beea2cdff38b643bcb Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 12:24:23 +0100
Subject: [PATCH 2/7] perf(layout): wrapParagraph running-width, stop
 re-measuring growing line prefix

The greedy line wrapper measured textWidth(currentLine + nextToken) on every token, re-measuring the whole accumulated line - O(line-length x tokens) measured characters plus the per-glyph sanitize/encode it triggers. Keep a running line width and measure each token once instead; line starts re-measure to pin FP drift. Glyph advances are additive (no kerning) and EPS=1e-6 absorbs FP, so break points are unchanged - rendering is byte-identical (1144 tests + all layout/visual snapshots pass).

Probe: long-text measured characters 291,324 -> 32,457 (~9x fewer); same-session A/B (full, Repeat 7): proposal -57% time / +131% throughput. No API or behaviour change. Refs audit finding F1.
---
 CHANGELOG.md                                  | 24 +++++++++++++++++++
 .../document/layout/TextFlowSupport.java      | 22 +++++++++++++----
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bab5d636..5ee69a8d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,30 @@ follow semantic versioning; release dates are ISO 8601.
 
 Open cycle — bug-fix / housekeeping. Entries land here as they merge.
 
+### Performance
+
+- **Text wrapping stops re-measuring the growing line prefix.** The greedy line
+  wrapper in `TextFlowSupport` now keeps a running line width and measures each
+  token once, instead of re-measuring the whole accumulated line on every token.
+  This removes O(line-length × tokens) measured-character work — and the
+  per-glyph sanitize/encode it triggered — from paragraph layout. **Output is
+  byte-identical: all layout and visual-regression snapshots pass unchanged.**
+  The effect is workload-dependent and concentrated in long-text documents;
+  measured locally (same-session A/B, full profile) a long multi-page proposal
+  rendered markedly faster, and a measurement-count probe showed ~9× fewer
+  measured characters on a long paragraph. No public API or behaviour change.
+
+### Tests / tooling
+
+- **Benchmark regression gate and measurement probe (benchmarks module, not part
+  of the published library).** `BenchmarkVerdictTool` compares a current-speed run
+  to the committed baseline (`baselines/current-speed-full.json`) and reports
+  improved / neutral / regressed, failing on a regression beyond the noise band.
+  `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture
+  deterministic measurement-call counts and per-compile allocation bytes for
+  proving algorithmic / allocation changes. `scripts/run-benchmarks.ps1` gains the
+  `11-verdict-current-speed` step (skippable via `-SkipVerdict`).
+
 ## v1.7.0 — 2026-06-07
 
 Canonical DSL primitives — additive only, zero breaking changes. Adding public
diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
index 01349737..158a451b 100644
--- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
+++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
@@ -820,6 +820,15 @@ private static List<String> wrapParagraph(List<String> logicalLines,
             List<String> tokens = tokenize(logicalLine);
             String currentPrefix = initialPrefix;
             String currentLine = initialPrefix;
+            // Running width of currentLine. The greedy fit only needs the width
+            // of the line built so far plus the next token, not a fresh
+            // measurement of the whole growing prefix on every token (which made
+            // wrapping O(chars per line x tokens) measured characters). PDFBox
+            // glyph advances are additive here (no kerning), so accumulating
+            // per-token widths matches measuring the full string to well within
+            // the EPS the fit test already tolerates; each new line re-measures
+            // its (short) start to pin any floating-point drift.
+            double currentWidth = measurement.textWidth(style, currentLine);
             boolean hasContent = false;
 
             for (String token : tokens) {
@@ -828,9 +837,10 @@ private static List<String> wrapParagraph(List<String> logicalLines,
                     continue;
                 }
 
-                String candidate = currentLine + nextToken;
-                if (!hasContent || measurement.textWidth(style, candidate) <= maxWidth + EPS) {
-                    currentLine = candidate;
+                double nextTokenWidth = measurement.textWidth(style, nextToken);
+                if (!hasContent || currentWidth + nextTokenWidth <= maxWidth + EPS) {
+                    currentLine = currentLine + nextToken;
+                    currentWidth += nextTokenWidth;
                     hasContent = true;
                     continue;
                 }
@@ -838,12 +848,15 @@ private static List<String> wrapParagraph(List<String> logicalLines,
                 result.add(trimTrailingSpaces(currentLine));
                 currentPrefix = continuationPrefix;
                 currentLine = continuationPrefix;
+                currentWidth = measurement.textWidth(style, continuationPrefix);
                 hasContent = false;
 
                 double availableWidth = availableWidthForPrefix(maxWidth, currentPrefix, style, measurement);
                 String strippedToken = nextToken.stripLeading();
-                if (measurement.textWidth(style, currentPrefix + strippedToken) <= maxWidth + EPS) {
+                double strippedTokenWidth = measurement.textWidth(style, strippedToken);
+                if (currentWidth + strippedTokenWidth <= maxWidth + EPS) {
                     currentLine = currentPrefix + strippedToken;
+                    currentWidth += strippedTokenWidth;
                     hasContent = true;
                     continue;
                 }
@@ -858,6 +871,7 @@ private static List<String> wrapParagraph(List<String> logicalLines,
                     currentPrefix = continuationPrefix;
                 }
                 currentLine = currentPrefix + chunks.get(chunks.size() - 1);
+                currentWidth = measurement.textWidth(style, currentLine);
                 hasContent = true;
             }
 

From 2eca80a21b8a69449d5259a0395a7b991bb9c04e Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 12:45:49 +0100
Subject: [PATCH 3/7] fix(bench): gate on avg latency only; peakHeapMb and
 single runs are advisory

peakHeapMb is a Runtime used-heap delta - GC-timing dependent and very noisy (observed 48-170 MB across repeats of identical code), so it false-failed the gate on invoice-template (heap +18.7%) even though that run was -15% faster on time. BenchmarkVerdictTool now hard-gates on average latency only; peakHeapMb is reported as advisory (still shown, never fails the build). The deterministic heap signal stays in MeasurementCountBenchmark (per-compile allocation bytes).

run-benchmarks.ps1: step 11 runs the verdict as advisory for single runs (Repeat 1) and hard-gates only for medians (-Repeat >= 2), since one run is too noisy to gate against a median baseline. Unit test + CHANGELOG updated.
---
 CHANGELOG.md                                  |  6 ++-
 .../demcha/compose/BenchmarkVerdictTool.java  | 45 ++++++++++++++-----
 .../compose/BenchmarkVerdictToolTest.java     |  9 ++--
 scripts/run-benchmarks.ps1                    | 17 +++++--
 4 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5ee69a8d..824ccff1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,7 +25,11 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
 - **Benchmark regression gate and measurement probe (benchmarks module, not part
   of the published library).** `BenchmarkVerdictTool` compares a current-speed run
   to the committed baseline (`baselines/current-speed-full.json`) and reports
-  improved / neutral / regressed, failing on a regression beyond the noise band.
+  improved / neutral / regressed. The hard gate fails only on an **average-latency**
+  regression beyond the noise band; peak heap is **advisory** (the `peakHeapMb`
+  used-heap delta is GC-timing noisy — use the probe's per-compile allocation
+  bytes for deterministic heap). A single run is advisory; the hard gate needs a
+  median (`-Repeat` >= 2).
   `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture
   deterministic measurement-call counts and per-compile allocation bytes for
   proving algorithmic / allocation changes. `scripts/run-benchmarks.ps1` gains the
diff --git a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java
index 0817baf1..b231265f 100644
--- a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java
+++ b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java
@@ -22,10 +22,15 @@
  * described in {@code docs/operations/perf-change-workflow.md}. Unlike
  * {@link BenchmarkDiffTool}, which only prints signed deltas between two
  * arbitrary runs, this tool classifies each delta against a noise band and
- * fails the build (non-zero exit) when any scenario regresses beyond the band
- * on a <em>gate metric</em> (average latency or peak heap). It is meant to be
- * pointed at a stable, committed baseline (see {@code baselines/}) rather than
- * at the previous ephemeral run under {@code target/}.</p>
+ * fails the build (non-zero exit) when a scenario regresses beyond the band on
+ * the <em>gate metric</em> (average latency). Peak heap is reported as an
+ * <em>advisory</em> only: the {@code peakHeapMb} field is a used-heap delta
+ * sampled via {@code Runtime}, which is GC-timing dependent and very noisy
+ * run-to-run, so it must not fail the build. The deterministic heap signal is
+ * {@code MeasurementCountBenchmark}'s per-compile allocation bytes
+ * (ThreadMXBean). It is meant to be pointed at a stable, committed baseline (see
+ * {@code baselines/}) rather than at the previous ephemeral run under
+ * {@code target/}.</p>
  *
  * <p>Usage:</p>
  * <ul>
@@ -161,9 +166,14 @@ static VerdictReport evaluate(String baselinePath,
             double candidateHeap = after.path("peakHeapMb").asDouble();
             double heapDeltaPct = percentDelta(baselineHeap, candidateHeap);
 
-            // Gate metrics: average latency and peak heap (both lower-is-better).
+            // Hard gate metric: average latency only. peakHeapMb is a used-heap
+            // delta sampled via Runtime — GC-timing dependent and very noisy
+            // run-to-run (observed 48..170 MB across repeats of identical code),
+            // so it is reported as ADVISORY, never gated. The deterministic heap
+            // signal is MeasurementCountBenchmark's per-compile allocation bytes.
+            boolean heapAdvisory = heapDeltaPct > thresholds.heapBandPct();
             Verdict verdict;
-            if (avgDeltaPct > thresholds.avgBandPct() || heapDeltaPct > thresholds.heapBandPct()) {
+            if (avgDeltaPct > thresholds.avgBandPct()) {
                 verdict = Verdict.REGRESSED;
                 anyRegressed = true;
             } else if (avgDeltaPct < -thresholds.avgBandPct()) {
@@ -184,6 +194,7 @@ static VerdictReport evaluate(String baselinePath,
                     baselineHeap,
                     candidateHeap,
                     heapDeltaPct,
+                    heapAdvisory,
                     verdict.name()));
         }
 
@@ -212,8 +223,10 @@ private static void print(VerdictReport report) {
         System.out.println("Profile: " + report.profile());
         System.out.println("Baseline: " + report.baselinePath() + " (" + report.baselineTimestamp() + ")");
         System.out.println("Candidate: " + report.candidatePath() + " (" + report.candidateTimestamp() + ")");
-        System.out.println("Bands: avg +/-" + format(report.avgBandPct()) + "%, peakHeap +/-"
-                + format(report.heapBandPct()) + "% | gate: " + (report.gateEnabled() ? "enabled" : "disabled"));
+        System.out.println("Gate: avg latency +/-" + format(report.avgBandPct())
+                + "% (HARD). peakHeap +/-" + format(report.heapBandPct())
+                + "% = ADVISORY only (GC-timing noisy, not gated). gate: "
+                + (report.gateEnabled() ? "enabled" : "disabled"));
         System.out.println();
         System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n",
                 "Scenario", "Avg pct", "p95 pct", "Docs/s pct", "Heap pct", "Verdict");
@@ -227,13 +240,22 @@ private static void print(VerdictReport report) {
                     signedPercent(row.peakHeapDeltaPct()),
                     row.verdict());
         }
+        List<String> heapAdvisories = report.scenarios().stream()
+                .filter(ScenarioVerdict::heapAdvisory)
+                .map(row -> row.scenario() + " (" + signedPercent(row.peakHeapDeltaPct()) + ")")
+                .toList();
+        if (!heapAdvisories.isEmpty()) {
+            System.out.println();
+            System.out.println("ADVISORY (not gated) - peakHeapMb over band: " + String.join(", ", heapAdvisories)
+                    + ". peakHeapMb is GC-timing noisy; use MeasurementCountBenchmark for the deterministic allocation signal.");
+        }
         if (!report.missingScenarios().isEmpty()) {
             System.out.println();
             System.out.println("WARNING: baseline scenarios missing from candidate (not gated): "
                     + String.join(", ", report.missingScenarios()));
         }
         System.out.println();
-        System.out.println("Overall verdict: " + report.overallVerdict());
+        System.out.println("Overall verdict: " + report.overallVerdict() + " (hard gate: average latency)");
     }
 
     private static void write(VerdictReport report) throws Exception {
@@ -243,7 +265,8 @@ private static void write(VerdictReport report) throws Exception {
                 "verdict",
                 List.of("scenario", "baseline_avg_ms", "candidate_avg_ms", "avg_delta_pct",
                         "p95_delta_pct", "docs_per_sec_delta_pct",
-                        "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct", "verdict"),
+                        "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct",
+                        "heap_advisory", "verdict"),
                 report.scenarios().stream()
                         .map(row -> List.of(
                                 row.scenario(),
@@ -255,6 +278,7 @@ private static void write(VerdictReport report) throws Exception {
                                 format(row.baselinePeakHeapMb()),
                                 format(row.candidatePeakHeapMb()),
                                 format(row.peakHeapDeltaPct()),
+                                Boolean.toString(row.heapAdvisory()),
                                 row.verdict()))
                         .toList());
         System.out.println("Saved JSON verdict report to " + jsonPath);
@@ -334,6 +358,7 @@ record ScenarioVerdict(String scenario,
                            double baselinePeakHeapMb,
                            double candidatePeakHeapMb,
                            double peakHeapDeltaPct,
+                           boolean heapAdvisory,
                            String verdict) {
     }
 
diff --git a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java
index 463f5a80..75996c54 100644
--- a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java
+++ b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java
@@ -33,15 +33,18 @@ void flagsAverageLatencyRegressionBeyondBand() throws Exception {
     }
 
     @Test
-    void flagsPeakHeapRegressionEvenWhenLatencyIsFlat() throws Exception {
+    void peakHeapOverBandIsAdvisoryNotGated() throws Exception {
         JsonNode baseline = report(scenario("cv-template", 10.0, 10.0, 40.0, 100.0));
         JsonNode candidate = report(scenario("cv-template", 10.3, 10.0, 40.0, 120.0)); // +3% avg, +20% heap
 
         BenchmarkVerdictTool.VerdictReport report =
                 BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE);
 
-        assertThat(report.regressed()).isTrue();
-        assertThat(report.scenarios().get(0).verdict()).isEqualTo("REGRESSED");
+        // Heap over band must NOT fail the gate — peakHeapMb is advisory only
+        // (GC-timing noisy). The hard gate metric is average latency.
+        assertThat(report.regressed()).isFalse();
+        assertThat(report.scenarios().get(0).verdict()).isEqualTo("NEUTRAL");
+        assertThat(report.scenarios().get(0).heapAdvisory()).isTrue();
     }
 
     @Test
diff --git a/scripts/run-benchmarks.ps1 b/scripts/run-benchmarks.ps1
index 4126ade5..dbe162c0 100644
--- a/scripts/run-benchmarks.ps1
+++ b/scripts/run-benchmarks.ps1
@@ -469,10 +469,19 @@ try {
             Add-SummaryLine("- ``11-verdict-current-speed``: skipped")
             Add-SummaryLine("  - Reason: no candidate current-speed report was produced this run")
         } else {
-            # Hard gate: BenchmarkVerdictTool exits non-zero on a regression
-            # beyond the noise band, which makes Invoke-LoggedCommand throw and
-            # fail the whole benchmark run.
-            Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -Arguments @($verdictBaseline, $verdictCandidate)
+            # Hard gate only for medians (Repeat >= 2): a single run is too noisy
+            # to gate against a median baseline, so Repeat 1 runs the verdict as
+            # advisory (gate disabled) — it prints the table but never fails the
+            # run. Use -Repeat 5 for the hard gate. The hard gate metric is
+            # average latency; peakHeapMb is advisory inside the tool. When the
+            # gate is on, BenchmarkVerdictTool exits non-zero on a regression,
+            # which makes Invoke-LoggedCommand throw and fail the whole run.
+            $verdictProperties = @()
+            if ($Repeat -le 1) {
+                $verdictProperties += "-Dgraphcompose.benchmark.verdict.gate=false"
+                Add-SummaryLine("- ``11-verdict-current-speed``: advisory (single run; use -Repeat 5 for the hard gate)")
+            }
+            Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -SystemProperties $verdictProperties -Arguments @($verdictBaseline, $verdictCandidate)
         }
     } else {
         Add-SummaryLine("- ``11-verdict-current-speed``: skipped")

From b1e24cee4750ec79f52b5f3f8a11365b200dd62c Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 14:26:24 +0100
Subject: [PATCH 4/7] perf(layout): binary-search long-token break in
 fitCharacters (drop quadratic re-measure)

fitCharacters re-measured text.substring(0,index) for every index when breaking a long unbreakable token - O(n) width calls and O(n^2) measured characters. The fit predicate width(prefix) <= maxWidth is monotonic in prefix length, so binary-search the break index instead: it returns the same lastFitting (byte-identical wrapping) in O(log n) width calls.

Probe on a 600-char token: width calls 652 -> 97, measured chars 36,317 -> 7,114, alloc ~1.5MB -> ~0.8MB. long-text (F1 path) and tables untouched; 1144 tests pass with no snapshot drift. Refs audit finding F2.
---
 CHANGELOG.md                                  |  8 ++++++++
 .../document/layout/TextFlowSupport.java      | 20 ++++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 824ccff1..3c51b77a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,14 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
   rendered markedly faster, and a measurement-count probe showed ~9× fewer
   measured characters on a long paragraph. No public API or behaviour change.
 
+- **Long-token line breaking is no longer quadratic.** `TextFlowSupport.fitCharacters`
+  now binary-searches the break point instead of re-measuring every growing prefix
+  one character at a time. For an unbreakable run (long URL/ID, no-space CJK, or a
+  very narrow column) this cuts measurement calls and measured characters by
+  ~80–85% (probe: 652 → 97 width calls, 36k → 7k measured chars on a 600-char
+  token). **Output is byte-identical** — the fit predicate is monotonic, so the
+  search returns the same break index. No public API or behaviour change.
+
 ### Tests / tooling
 
 - **Benchmark regression gate and measurement probe (benchmarks module, not part
diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
index 158a451b..b8d17260 100644
--- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
+++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
@@ -1517,13 +1517,23 @@ private static int fitCharacters(String text,
                                      TextStyle style,
                                      double maxWidth,
                                      TextMeasurementSystem measurement) {
+        // Largest prefix length whose width fits. The fit predicate
+        // width(substring(0,n)) <= maxWidth is monotonic in n (each added char
+        // contributes a non-negative glyph advance), so the fitting lengths form
+        // a prefix [1..lastFitting] and a binary search finds the SAME boundary
+        // as the old linear scan — but in O(log n) width calls instead of
+        // measuring every growing prefix (which was O(n) calls and O(n^2)
+        // measured characters for a long unbreakable token).
         int lastFitting = 0;
-        for (int index = 1; index <= text.length(); index++) {
-            String candidate = text.substring(0, index);
-            if (measurement.textWidth(style, candidate) <= maxWidth + EPS) {
-                lastFitting = index;
+        int low = 1;
+        int high = text.length();
+        while (low <= high) {
+            int mid = (low + high) >>> 1;
+            if (measurement.textWidth(style, text.substring(0, mid)) <= maxWidth + EPS) {
+                lastFitting = mid;
+                low = mid + 1;
             } else {
-                break;
+                high = mid - 1;
             }
         }
         return lastFitting == 0 ? Math.min(1, text.length()) : lastFitting;

From ea15d714030c79e62ba371c4904e910afcceaaec Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 14:44:12 +0100
Subject: [PATCH 5/7] bench: add long-token current-speed scenario (worst-case
 character wrap)

40 paragraphs with ~520-char unbreakable URL/ID tokens that overflow the line and force splitLongToken/fitCharacters. Makes the F2 worst case visible (same-session A/B: -44% avg, 14.47 -> 8.06 ms on this scenario) and guards against re-introducing quadratic long-token wrapping.
---
 .../demcha/compose/CurrentSpeedBenchmark.java | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java
index d96dfc93..2858d64a 100644
--- a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java
+++ b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java
@@ -112,7 +112,8 @@ private void run() throws Exception {
                 new Scenario("invoice-template", "Compose-first invoice template", this::renderInvoiceTemplateDocument),
                 new Scenario("cv-template", "Compose-first CV template", this::renderCvTemplateDocument),
                 new Scenario("proposal-template", "Long multi-page proposal template", this::renderProposalTemplateDocument),
-                new Scenario("feature-rich", "QR, barcode, watermark, header/footer, page break", this::renderFeatureRichDocument)
+                new Scenario("feature-rich", "QR, barcode, watermark, header/footer, page break", this::renderFeatureRichDocument),
+                new Scenario("long-token", "Long unbreakable tokens (URLs/IDs) forcing character-level wrap", this::renderLongTokenDocument)
         );
 
         System.out.println("Latency benchmark");
@@ -551,6 +552,31 @@ private byte[] renderProposalTemplateDocument() throws Exception {
         }
     }
 
+    private byte[] renderLongTokenDocument() throws Exception {
+        // Worst-case for character-level wrapping: many long unbreakable tokens
+        // (long URLs/IDs/no-space runs) that overflow the line and force
+        // splitLongToken -> fitCharacters. Exercises audit finding F2.
+        try (DocumentSession document = GraphCompose.document()
+                .pageSize(com.demcha.compose.document.api.DocumentPageSize.A4)
+                .margin(22, 22, 22, 22)
+                .create()) {
+            var root = document.dsl()
+                    .pageFlow()
+                    .name("BenchmarkLongTokenRoot")
+                    .spacing(8);
+            for (int i = 1; i <= 40; i++) {
+                final int index = i;
+                root.addParagraph(paragraph -> paragraph
+                        .name("BenchmarkLongToken" + index)
+                        .text("Reference " + index + ": https://example.com/" + "a".repeat(500)
+                                + " trailing words to wrap normally after the long token.")
+                        .textStyle(BODY_STYLE));
+            }
+            root.build();
+            return document.toPdfBytes();
+        }
+    }
+
     private byte[] renderFeatureRichDocument() throws Exception {
         PdfFixedLayoutBackend backend = PdfFixedLayoutBackend.builder()
                 .metadata(PdfMetadataOptions.builder()

From 8435be550fd2b7bee7ab2d0b98fb78fd4f2da271 Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 15:14:39 +0100
Subject: [PATCH 6/7] perf(layout): assemble wrapped lines via StringBuilder
 (drop per-token string copy)

wrapParagraph concatenated Strings token-by-token (currentLine + token), re-copying the whole growing line each token and producing a throwaway String per step. Accumulate in a reused StringBuilder instead; the character sequence is identical so wrapping stays byte-for-byte the same (1144 tests, snapshots clean). Measured effect is small on typical text (~1% less compile allocation on long-text, lines bounded by column width) but it removes a latent O(line-length^2) copy on very wide/unwrapped lines.
---
 CHANGELOG.md                                  |  9 ++++++
 .../document/layout/TextFlowSupport.java      | 28 +++++++++++++------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c51b77a..bcc1705c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,15 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
   token). **Output is byte-identical** — the fit predicate is monotonic, so the
   search returns the same break index. No public API or behaviour change.
 
+- **Line assembly avoids quadratic string copying.** `TextFlowSupport.wrapParagraph`
+  now accumulates each wrapped line in a reused `StringBuilder` instead of
+  concatenating Strings token-by-token (which re-copied the whole growing line and
+  produced a throwaway `String` per token). **Output is byte-identical.** The effect
+  is small on typical text (lines are bounded by column width — a probe showed ~1%
+  less per-compile allocation on a long-text document), but it removes a latent
+  O(line-length²) copy on pathologically wide / unwrapped lines. No public API or
+  behaviour change.
+
 ### Tests / tooling
 
 - **Benchmark regression gate and measurement probe (benchmarks module, not part
diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
index b8d17260..0da49991 100644
--- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
+++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java
@@ -819,7 +819,14 @@ private static List<String> wrapParagraph(List<String> logicalLines,
 
             List<String> tokens = tokenize(logicalLine);
             String currentPrefix = initialPrefix;
-            String currentLine = initialPrefix;
+            // currentLine is assembled in a reused StringBuilder: appending a
+            // token is amortised O(1), whereas concatenating Strings re-copied
+            // the whole growing line on every token (O(chars^2) char copies plus
+            // a fresh throwaway String each step). The character sequence is
+            // identical to the old `+` assembly, so wrapping stays byte-for-byte
+            // the same; we only materialise a String via toString() when a line
+            // is emitted (which the result list needs anyway).
+            StringBuilder currentLine = new StringBuilder(initialPrefix);
             // Running width of currentLine. The greedy fit only needs the width
             // of the line built so far plus the next token, not a fresh
             // measurement of the whole growing prefix on every token (which made
@@ -828,7 +835,7 @@ private static List<String> wrapParagraph(List<String> logicalLines,
             // per-token widths matches measuring the full string to well within
             // the EPS the fit test already tolerates; each new line re-measures
             // its (short) start to pin any floating-point drift.
-            double currentWidth = measurement.textWidth(style, currentLine);
+            double currentWidth = measurement.textWidth(style, initialPrefix);
             boolean hasContent = false;
 
             for (String token : tokens) {
@@ -839,15 +846,16 @@ private static List<String> wrapParagraph(List<String> logicalLines,
 
                 double nextTokenWidth = measurement.textWidth(style, nextToken);
                 if (!hasContent || currentWidth + nextTokenWidth <= maxWidth + EPS) {
-                    currentLine = currentLine + nextToken;
+                    currentLine.append(nextToken);
                     currentWidth += nextTokenWidth;
                     hasContent = true;
                     continue;
                 }
 
-                result.add(trimTrailingSpaces(currentLine));
+                result.add(trimTrailingSpaces(currentLine.toString()));
                 currentPrefix = continuationPrefix;
-                currentLine = continuationPrefix;
+                currentLine.setLength(0);
+                currentLine.append(continuationPrefix);
                 currentWidth = measurement.textWidth(style, continuationPrefix);
                 hasContent = false;
 
@@ -855,7 +863,8 @@ private static List<String> wrapParagraph(List<String> logicalLines,
                 String strippedToken = nextToken.stripLeading();
                 double strippedTokenWidth = measurement.textWidth(style, strippedToken);
                 if (currentWidth + strippedTokenWidth <= maxWidth + EPS) {
-                    currentLine = currentPrefix + strippedToken;
+                    currentLine.setLength(0);
+                    currentLine.append(currentPrefix).append(strippedToken);
                     currentWidth += strippedTokenWidth;
                     hasContent = true;
                     continue;
@@ -870,12 +879,13 @@ private static List<String> wrapParagraph(List<String> logicalLines,
                     result.add(currentPrefix + chunks.get(index));
                     currentPrefix = continuationPrefix;
                 }
-                currentLine = currentPrefix + chunks.get(chunks.size() - 1);
-                currentWidth = measurement.textWidth(style, currentLine);
+                currentLine.setLength(0);
+                currentLine.append(currentPrefix).append(chunks.get(chunks.size() - 1));
+                currentWidth = measurement.textWidth(style, currentLine.toString());
                 hasContent = true;
             }
 
-            result.add(trimTrailingSpaces(currentLine));
+            result.add(trimTrailingSpaces(currentLine.toString()));
         }
 
         return List.copyOf(result);

From ae461ab100e6db7299d5e6403efcc44545166ebf Mon Sep 17 00:00:00 2001
From: DemchaAV <demchaav@gmail.com>
Date: Mon, 8 Jun 2026 15:30:02 +0100
Subject: [PATCH 7/7] bench: warm up MeasurementCountBenchmark before the
 allocation window

The probe measured each scenario once, and the first scenario (long-text) in a fresh JVM carried ~36 MB of one-time class-load/JIT/static-init allocation -- a JVM artifact, not a layout cost (verified: cold first compile 36.6 MB vs warm 0.65 MB for the same document; layout alloc scales sub-linearly). Warm up 5 iterations before the measured pass so Alloc KB reflects steady-state per-document allocation; measurement-count columns are exact regardless. Also drops the F1b CHANGELOG perf claim -- a warm A/B shows no measurable steady-state allocation change (719.8 = 719.8 KB), so F1b stays as a byte-identical latent-O(n^2) cleanup, not a perf win.
---
 CHANGELOG.md                                  | 13 +++-------
 .../compose/MeasurementCountBenchmark.java    | 26 +++++++++++++++----
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bcc1705c..0938c80b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,15 +28,6 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
   token). **Output is byte-identical** — the fit predicate is monotonic, so the
   search returns the same break index. No public API or behaviour change.
 
-- **Line assembly avoids quadratic string copying.** `TextFlowSupport.wrapParagraph`
-  now accumulates each wrapped line in a reused `StringBuilder` instead of
-  concatenating Strings token-by-token (which re-copied the whole growing line and
-  produced a throwaway `String` per token). **Output is byte-identical.** The effect
-  is small on typical text (lines are bounded by column width — a probe showed ~1%
-  less per-compile allocation on a long-text document), but it removes a latent
-  O(line-length²) copy on pathologically wide / unwrapped lines. No public API or
-  behaviour change.
-
 ### Tests / tooling
 
 - **Benchmark regression gate and measurement probe (benchmarks module, not part
@@ -49,7 +40,9 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
   median (`-Repeat` >= 2).
   `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture
   deterministic measurement-call counts and per-compile allocation bytes for
-  proving algorithmic / allocation changes. `scripts/run-benchmarks.ps1` gains the
+  proving algorithmic / allocation changes (the probe warms up the JVM before its
+  allocation window, so `Alloc KB` reflects steady state, not one-time
+  class-load / JIT cold-start). `scripts/run-benchmarks.ps1` gains the
   `11-verdict-current-speed` step (skippable via `-SkipVerdict`).
 
 ## v1.7.0 — 2026-06-07
diff --git a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java
index 82e403f9..b4b585d5 100644
--- a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java
+++ b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java
@@ -83,12 +83,28 @@ private void run() throws Exception {
         System.out.println("Thread allocation measurement: " + (allocationSupported() ? "enabled" : "UNAVAILABLE (Alloc KB = n/a)"));
         System.out.println();
 
+        Consumer<PageFlowBuilder> longText = flow ->
+                flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE));
+        Consumer<PageFlowBuilder> longToken = flow ->
+                flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE));
+        Consumer<PageFlowBuilder> largeTable = MeasurementCountBenchmark::authorLargeTable;
+
+        // Warm up the JVM (class loading + JIT) BEFORE the allocation window so the
+        // "Alloc KB" column reflects steady-state per-document layout allocation, not
+        // one-time cold-start cost. Without this the FIRST scenario measured carried
+        // ~36 MB of class-load / JIT / static-init allocation — a JVM artifact, not a
+        // layout cost (verified: cold first compile 36.6 MB vs warm 0.65 MB for the
+        // same long-text document). The measurement-COUNT columns are exact either way.
+        for (int warmup = 0; warmup < 5; warmup++) {
+            measureScenario("warmup", longText);
+            measureScenario("warmup", longToken);
+            measureScenario("warmup", largeTable);
+        }
+
         List<Result> results = new ArrayList<>();
-        results.add(measureScenario("long-text", flow ->
-                flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE))));
-        results.add(measureScenario("long-token", flow ->
-                flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE))));
-        results.add(measureScenario("large-table", MeasurementCountBenchmark::authorLargeTable));
+        results.add(measureScenario("long-text", longText));
+        results.add(measureScenario("long-token", longToken));
+        results.add(measureScenario("large-table", largeTable));
 
         System.out.printf("%-14s | %11s | %9s | %9s | %11s | %8s | %11s | %10s | %6s%n",
                 "Scenario", "WidthReqs", "Distinct", "Repeat %", "Sum chars", "Max arg", "LineMetrics", "Alloc KB", "Pages");