From b8267845df1f7adb4cb5d645639e6076585fa5f5 Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 12:24:23 +0100 Subject: [PATCH 1/7] perf(bench): add current-speed verdict gate + measurement/allocation probe BenchmarkVerdictTool classifies a current-speed run vs the committed baseline (improved/neutral/regressed) and exits non-zero on a regression beyond the noise band. MeasurementCountBenchmark + CountingTextMeasurementSystem capture deterministic textWidth call counts and per-compile allocation bytes (ThreadMXBean) for proving algorithmic/allocation changes. run-benchmarks.ps1 gains the 11-verdict-current-speed gate step (skippable via -SkipVerdict). Adds baselines/current-speed-full.json (full-profile median). Benchmark-module only; not part of the published library. --- baselines/current-speed-full.json | 88 +++++ .../demcha/compose/BenchmarkVerdictTool.java | 354 ++++++++++++++++++ .../CountingTextMeasurementSystem.java | 152 ++++++++ .../compose/MeasurementCountBenchmark.java | 257 +++++++++++++ .../compose/BenchmarkVerdictToolTest.java | 146 ++++++++ .../CountingTextMeasurementSystemTest.java | 81 ++++ scripts/run-benchmarks.ps1 | 31 ++ 7 files changed, 1109 insertions(+) create mode 100644 baselines/current-speed-full.json create mode 100644 benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java create mode 100644 benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java create mode 100644 benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java create mode 100644 benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java create mode 100644 benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java diff --git a/baselines/current-speed-full.json b/baselines/current-speed-full.json new file mode 100644 index 00000000..d5e81180 --- /dev/null +++ b/baselines/current-speed-full.json @@ -0,0 +1,88 @@ +{ + "timestamp" : "2026-06-08 12:07:23", + "profile" : "full", + "warmupIterations" : 12, + "measurementIterations" : 40, + "docsPerThread" : 12, + "threadCounts" : [ 1, 2, 4, 8 ], + "latency" : [ { + "scenario" : "cv-template", + "description" : "Compose-first CV template", + "avgMillis" : 4.28, + "p50Millis" : 3.93, + "p95Millis" : 5.83, + "maxMillis" : 7.15, + "docsPerSecond" : 233.52, + "avgKilobytes" : 2.29, + "peakHeapMb" : 33.08 + }, { + "scenario" : "engine-simple", + "description" : "One-page engine composition", + "avgMillis" : 3.17, + "p50Millis" : 2.96, + "p95Millis" : 5.01, + "maxMillis" : 5.9, + "docsPerSecond" : 315.87, + "avgKilobytes" : 1.08, + "peakHeapMb" : 12.0 + }, { + "scenario" : "feature-rich", + "description" : "QR, barcode, watermark, header/footer, page break", + "avgMillis" : 45.37, + "p50Millis" : 37.09, + "p95Millis" : 60.65, + "maxMillis" : 69.62, + "docsPerSecond" : 22.04, + "avgKilobytes" : 6.37, + "peakHeapMb" : 86.14 + }, { + "scenario" : "invoice-template", + "description" : "Compose-first invoice template", + "avgMillis" : 19.42, + "p50Millis" : 18.75, + "p95Millis" : 27.88, + "maxMillis" : 34.26, + "docsPerSecond" : 51.5, + "avgKilobytes" : 9.72, + "peakHeapMb" : 85.09 + }, { + "scenario" : "proposal-template", + "description" : "Long multi-page proposal template", + "avgMillis" : 14.41, + "p50Millis" : 13.71, + "p95Millis" : 19.18, + "maxMillis" : 19.93, + "docsPerSecond" : 69.38, + "avgKilobytes" : 7.72, + "peakHeapMb" : 97.52 + } ], + "throughput" : [ { + "scenario" : "invoice-template", + "threads" : 1, + "totalDocs" : 12, + "docsPerSecond" : 81.22, + "avgMillisPerDoc" : 12.31 + }, { + "scenario" : "invoice-template", + "threads" : 2, + "totalDocs" : 24, + "docsPerSecond" : 158.68, + "avgMillisPerDoc" : 6.3 + }, { + "scenario" : "invoice-template", + "threads" : 4, + "totalDocs" : 48, + "docsPerSecond" : 265.11, + "avgMillisPerDoc" : 3.77 + }, { + "scenario" : "invoice-template", + "threads" : 8, + "totalDocs" : 96, + "docsPerSecond" : 356.61, + "avgMillisPerDoc" : 2.8 + } ], + "totalBytes" : 2905520, + "aggregation" : "median", + "sourceCount" : 7, + "sourceRuns" : [ "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120624.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120635.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120645.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120655.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120704.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120713.json", "C:\\Users\\Demch\\OneDrive\\Java\\GraphCompose\\target\\benchmarks\\current-speed\\run-20260608-120722.json" ] +} \ No newline at end of file diff --git a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java new file mode 100644 index 00000000..0817baf1 --- /dev/null +++ b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java @@ -0,0 +1,354 @@ +package com.demcha.compose; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +/** + * Compares a candidate {@code current-speed} benchmark report against a + * committed baseline and emits a per-scenario verdict + * ({@code IMPROVED} / {@code NEUTRAL} / {@code REGRESSED}). + * + *

This is the regression gate of the per-change performance workflow + * described in {@code docs/operations/perf-change-workflow.md}. Unlike + * {@link BenchmarkDiffTool}, which only prints signed deltas between two + * arbitrary runs, this tool classifies each delta against a noise band and + * fails the build (non-zero exit) when any scenario regresses beyond the band + * on a gate metric (average latency or peak heap). It is meant to be + * pointed at a stable, committed baseline (see {@code baselines/}) rather than + * at the previous ephemeral run under {@code target/}.

+ * + *

Usage:

+ * + * + *

Both reports must share the same {@code current-speed} profile + * ({@code smoke} or {@code full}); a {@code smoke} report and a {@code full} + * report are different experiments and are rejected.

+ * + *

Thresholds and gate behaviour are configurable via system properties + * (all percentages):

+ * + * + *

Exit codes: {@code 0} when the gate passes (or is disabled), {@code 1} + * when the gate is enabled and at least one scenario regressed, {@code 2} on + * usage or profile-compatibility errors.

+ * + * @author Artem Demchyshyn + */ +public final class BenchmarkVerdictTool { + + private static final ObjectMapper JSON = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); + private static final DateTimeFormatter TIMESTAMP_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private static final String AVG_BAND_PROPERTY = "graphcompose.benchmark.verdict.avgBandPct"; + private static final String HEAP_BAND_PROPERTY = "graphcompose.benchmark.verdict.heapBandPct"; + private static final String GATE_PROPERTY = "graphcompose.benchmark.verdict.gate"; + + private static final double DEFAULT_AVG_BAND_PCT = 10.0; + private static final double DEFAULT_HEAP_BAND_PCT = 15.0; + + private BenchmarkVerdictTool() { + } + + /** + * CLI entry point. Reads the baseline and candidate reports, prints the + * verdict table, writes JSON/CSV verdict artifacts under + * {@code target/benchmarks/verdicts/current-speed/}, and exits non-zero + * when the regression gate is enabled and at least one scenario regressed. + * + * @param args {@code } + * @throws Exception if a report cannot be read or written + */ + public static void main(String[] args) throws Exception { + BenchmarkSupport.configureQuietLogging(); + if (args.length != 2) { + System.err.println(""" + Usage: + java ... com.demcha.compose.BenchmarkVerdictTool + """); + System.exit(2); + return; + } + + Path baselinePath = Path.of(args[0]); + Path candidatePath = Path.of(args[1]); + JsonNode baseline = JSON.readTree(Files.readAllBytes(baselinePath)); + JsonNode candidate = JSON.readTree(Files.readAllBytes(candidatePath)); + + if (!isCurrentSpeed(baseline) || !isCurrentSpeed(candidate)) { + System.err.println("BenchmarkVerdictTool only supports current-speed reports (latency + throughput)."); + System.exit(2); + return; + } + + String baselineProfile = baseline.path("profile").asText(""); + String candidateProfile = candidate.path("profile").asText(""); + if (!baselineProfile.equals(candidateProfile)) { + System.err.println("Profiles do not match: baseline='" + baselineProfile + + "', candidate='" + candidateProfile + "'. Compare runs from the same profile only."); + System.exit(2); + return; + } + + Thresholds thresholds = Thresholds.fromSystemProperties(); + VerdictReport report = evaluate(baselinePath.toString(), candidatePath.toString(), baseline, candidate, thresholds); + + print(report); + write(report); + + if (thresholds.gateEnabled() && report.regressed()) { + System.out.println(); + System.out.println("PERFORMANCE GATE FAILED: at least one scenario regressed beyond the noise band."); + System.exit(1); + } + } + + /** + * Pure, side-effect-free evaluation core used by both {@link #main(String[])} + * and the unit test. Computes the per-scenario verdict for every scenario + * present in both reports and the overall verdict. + * + * @param baselinePath display path of the baseline report + * @param candidatePath display path of the candidate report + * @param baseline parsed baseline current-speed report + * @param candidate parsed candidate current-speed report + * @param thresholds noise bands and gate flag + * @return the computed verdict report + */ + static VerdictReport evaluate(String baselinePath, + String candidatePath, + JsonNode baseline, + JsonNode candidate, + Thresholds thresholds) { + Map baselineByScenario = indexBy(baseline.path("latency")); + Map candidateByScenario = indexBy(candidate.path("latency")); + + List scenarios = new ArrayList<>(); + List missingScenarios = new ArrayList<>(); + boolean anyRegressed = false; + boolean anyImproved = false; + + for (Map.Entry entry : baselineByScenario.entrySet()) { + String scenario = entry.getKey(); + JsonNode before = entry.getValue(); + JsonNode after = candidateByScenario.get(scenario); + if (after == null) { + missingScenarios.add(scenario); + continue; + } + + double baselineAvg = before.path("avgMillis").asDouble(); + double candidateAvg = after.path("avgMillis").asDouble(); + double avgDeltaPct = percentDelta(baselineAvg, candidateAvg); + double p95DeltaPct = percentDelta(before.path("p95Millis").asDouble(), after.path("p95Millis").asDouble()); + double docsDeltaPct = percentDelta(before.path("docsPerSecond").asDouble(), after.path("docsPerSecond").asDouble()); + double baselineHeap = before.path("peakHeapMb").asDouble(); + double candidateHeap = after.path("peakHeapMb").asDouble(); + double heapDeltaPct = percentDelta(baselineHeap, candidateHeap); + + // Gate metrics: average latency and peak heap (both lower-is-better). + Verdict verdict; + if (avgDeltaPct > thresholds.avgBandPct() || heapDeltaPct > thresholds.heapBandPct()) { + verdict = Verdict.REGRESSED; + anyRegressed = true; + } else if (avgDeltaPct < -thresholds.avgBandPct()) { + verdict = Verdict.IMPROVED; + anyImproved = true; + } else { + verdict = Verdict.NEUTRAL; + } + + scenarios.add(new ScenarioVerdict( + scenario, + before.path("description").asText(after.path("description").asText("")), + baselineAvg, + candidateAvg, + avgDeltaPct, + p95DeltaPct, + docsDeltaPct, + baselineHeap, + candidateHeap, + heapDeltaPct, + verdict.name())); + } + + Verdict overall = anyRegressed + ? Verdict.REGRESSED + : (anyImproved ? Verdict.IMPROVED : Verdict.NEUTRAL); + + return new VerdictReport( + baselinePath, + candidatePath, + candidate.path("profile").asText(""), + baseline.path("timestamp").asText(""), + candidate.path("timestamp").asText(""), + thresholds.avgBandPct(), + thresholds.heapBandPct(), + thresholds.gateEnabled(), + overall.name(), + anyRegressed, + scenarios, + missingScenarios); + } + + private static void print(VerdictReport report) { + System.out.println("Benchmark verdict (vs committed baseline)"); + System.out.println("Timestamp: " + LocalDateTime.now().format(TIMESTAMP_FORMAT)); + System.out.println("Profile: " + report.profile()); + System.out.println("Baseline: " + report.baselinePath() + " (" + report.baselineTimestamp() + ")"); + System.out.println("Candidate: " + report.candidatePath() + " (" + report.candidateTimestamp() + ")"); + System.out.println("Bands: avg +/-" + format(report.avgBandPct()) + "%, peakHeap +/-" + + format(report.heapBandPct()) + "% | gate: " + (report.gateEnabled() ? "enabled" : "disabled")); + System.out.println(); + System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n", + "Scenario", "Avg pct", "p95 pct", "Docs/s pct", "Heap pct", "Verdict"); + System.out.println("-".repeat(82)); + for (ScenarioVerdict row : report.scenarios()) { + System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n", + row.scenario(), + signedPercent(row.avgDeltaPct()), + signedPercent(row.p95DeltaPct()), + signedPercent(row.docsPerSecondDeltaPct()), + signedPercent(row.peakHeapDeltaPct()), + row.verdict()); + } + if (!report.missingScenarios().isEmpty()) { + System.out.println(); + System.out.println("WARNING: baseline scenarios missing from candidate (not gated): " + + String.join(", ", report.missingScenarios())); + } + System.out.println(); + System.out.println("Overall verdict: " + report.overallVerdict()); + } + + private static void write(VerdictReport report) throws Exception { + BenchmarkReportWriter.BenchmarkArtifacts artifacts = BenchmarkReportWriter.prepare("verdicts/current-speed"); + Path jsonPath = artifacts.writeJson(report); + Path csvPath = artifacts.writeCsv( + "verdict", + List.of("scenario", "baseline_avg_ms", "candidate_avg_ms", "avg_delta_pct", + "p95_delta_pct", "docs_per_sec_delta_pct", + "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct", "verdict"), + report.scenarios().stream() + .map(row -> List.of( + row.scenario(), + format(row.baselineAvgMs()), + format(row.candidateAvgMs()), + format(row.avgDeltaPct()), + format(row.p95DeltaPct()), + format(row.docsPerSecondDeltaPct()), + format(row.baselinePeakHeapMb()), + format(row.candidatePeakHeapMb()), + format(row.peakHeapDeltaPct()), + row.verdict())) + .toList()); + System.out.println("Saved JSON verdict report to " + jsonPath); + System.out.println("Saved CSV verdict report to " + csvPath); + } + + private static boolean isCurrentSpeed(JsonNode node) { + return node.has("latency") && node.has("throughput"); + } + + private static Map indexBy(JsonNode latencyArray) { + Map result = new TreeMap<>(); + latencyArray.forEach(item -> result.put(item.path("scenario").asText(), item)); + return result; + } + + private static double percentDelta(double baseline, double candidate) { + if (Double.compare(baseline, 0.0) == 0) { + return candidate == 0.0 ? 0.0 : 100.0; + } + return ((candidate - baseline) / baseline) * 100.0; + } + + private static String signedPercent(double value) { + return "%+.2f%%".formatted(value); + } + + private static String format(double value) { + return "%.2f".formatted(value); + } + + /** + * Noise bands (percent) and the gate flag for a verdict evaluation. + * + * @param avgBandPct band for average latency; a candidate slower than this + * fraction of the baseline regresses + * @param heapBandPct band for peak heap delta + * @param gateEnabled whether a regression should fail the build (non-zero exit) + */ + record Thresholds(double avgBandPct, double heapBandPct, boolean gateEnabled) { + + static Thresholds fromSystemProperties() { + return new Thresholds( + doubleProperty(AVG_BAND_PROPERTY, DEFAULT_AVG_BAND_PCT), + doubleProperty(HEAP_BAND_PROPERTY, DEFAULT_HEAP_BAND_PCT), + Boolean.parseBoolean(System.getProperty(GATE_PROPERTY, "true"))); + } + + private static double doubleProperty(String key, double fallback) { + String raw = System.getProperty(key); + if (raw == null || raw.isBlank()) { + return fallback; + } + try { + return Double.parseDouble(raw.trim()); + } catch (NumberFormatException ex) { + return fallback; + } + } + } + + /** Verdict classification for one scenario or for the report as a whole. */ + enum Verdict { + IMPROVED, + NEUTRAL, + REGRESSED + } + + /** Per-scenario verdict row. */ + record ScenarioVerdict(String scenario, + String description, + double baselineAvgMs, + double candidateAvgMs, + double avgDeltaPct, + double p95DeltaPct, + double docsPerSecondDeltaPct, + double baselinePeakHeapMb, + double candidatePeakHeapMb, + double peakHeapDeltaPct, + String verdict) { + } + + /** Full verdict report, serialized to JSON/CSV. */ + record VerdictReport(String baselinePath, + String candidatePath, + String profile, + String baselineTimestamp, + String candidateTimestamp, + double avgBandPct, + double heapBandPct, + boolean gateEnabled, + String overallVerdict, + boolean regressed, + List scenarios, + List missingScenarios) { + } +} diff --git a/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java b/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java new file mode 100644 index 00000000..70fd665e --- /dev/null +++ b/benchmarks/src/main/java/com/demcha/compose/CountingTextMeasurementSystem.java @@ -0,0 +1,152 @@ +package com.demcha.compose; + +import com.demcha.compose.engine.components.content.text.TextStyle; +import com.demcha.compose.engine.components.geometry.ContentSize; +import com.demcha.compose.engine.measurement.TextMeasurementSystem; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +/** + * A {@link TextMeasurementSystem} decorator that forwards every call to a real + * delegate while counting how the layout engine asks for text measurements. + * + *

It exists to make the algorithmic findings of the performance audit + * (F1 greedy wrap re-measuring growing prefixes, F2 quadratic long-token + * breaking, F3 table re-measurement) deterministically observable. + * Wall-clock timing hides these under JIT/GC noise; measurement-request counts + * and summed argument characters do not.

+ * + *

The decorator records, per pass:

+ *
    + *
  • the number of width-bearing requests ({@code textWidth} + {@code measure})
  • + *
  • the number of distinct {@code (style, text)} requests — the + * caller-side proxy for how well the delegate's width cache can hit; + * a low repeat rate means the layout keeps asking for one-shot strings + * (the F1/F2 smell)
  • + *
  • the summed and maximum argument length in characters — the proxy for + * the {@code O(chars)} work each uncached measurement performs
  • + *
  • {@code lineMetrics}/{@code lineHeight} call counts (style-only, no text)
  • + *
+ * + *

Not thread-safe: drive it from a single layout pass, like the real + * measurement system.

+ * + * @author Artem Demchyshyn + */ +public final class CountingTextMeasurementSystem implements TextMeasurementSystem { + + private final TextMeasurementSystem delegate; + + private long textWidthCalls; + private long measureCalls; + private long lineMetricsCalls; + private long lineHeightCalls; + private long summedRequestChars; + private long maxRequestChars; + private final Set distinctRequests = new HashSet<>(); + + /** + * Wraps a real measurement system. + * + * @param delegate the measurement system to forward to (e.g. the session's + * {@code FontLibraryTextMeasurementSystem}) + */ + public CountingTextMeasurementSystem(TextMeasurementSystem delegate) { + this.delegate = Objects.requireNonNull(delegate, "delegate"); + } + + @Override + public ContentSize measure(TextStyle style, String text) { + measureCalls++; + record(style, text); + return delegate.measure(style, text); + } + + @Override + public double textWidth(TextStyle style, String text) { + textWidthCalls++; + record(style, text); + return delegate.textWidth(style, text); + } + + @Override + public LineMetrics lineMetrics(TextStyle style) { + lineMetricsCalls++; + return delegate.lineMetrics(style); + } + + @Override + public double lineHeight(TextStyle style) { + lineHeightCalls++; + return delegate.lineHeight(style); + } + + @Override + public void clearCaches() { + delegate.clearCaches(); + } + + private void record(TextStyle style, String text) { + String safe = text == null ? "" : text; + int length = safe.length(); + summedRequestChars += length; + if (length > maxRequestChars) { + maxRequestChars = length; + } + distinctRequests.add(new RequestKey(style, safe)); + } + + /** + * Captures the counts accumulated so far. + * + * @return an immutable snapshot of the measurement-request counters + */ + public Counts snapshot() { + long widthRequests = textWidthCalls + measureCalls; + long distinct = distinctRequests.size(); + double repeatRatePct = widthRequests == 0 + ? 0.0 + : (1.0 - ((double) distinct / (double) widthRequests)) * 100.0; + return new Counts( + textWidthCalls, + measureCalls, + widthRequests, + distinct, + repeatRatePct, + summedRequestChars, + maxRequestChars, + lineMetricsCalls, + lineHeightCalls); + } + + /** + * Immutable snapshot of measurement-request counters. + * + * @param textWidthCalls direct {@code textWidth(style, text)} calls + * @param measureCalls {@code measure(style, text)} calls + * @param widthRequests {@code textWidthCalls + measureCalls} + * @param distinctWidthRequests distinct {@code (style, text)} requests + * @param repeatRatePct {@code (1 - distinct/total) * 100}; higher + * means more cache-friendly (fewer one-shot + * strings) + * @param summedRequestChars total characters across all width requests + * @param maxRequestChars longest single argument measured + * @param lineMetricsCalls {@code lineMetrics(style)} calls + * @param lineHeightCalls {@code lineHeight(style)} calls + */ + public record Counts(long textWidthCalls, + long measureCalls, + long widthRequests, + long distinctWidthRequests, + double repeatRatePct, + long summedRequestChars, + long maxRequestChars, + long lineMetricsCalls, + long lineHeightCalls) { + } + + private record RequestKey(TextStyle style, String text) { + } +} diff --git a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java new file mode 100644 index 00000000..82e403f9 --- /dev/null +++ b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java @@ -0,0 +1,257 @@ +package com.demcha.compose; + +import com.demcha.compose.document.api.DocumentPageSize; +import com.demcha.compose.document.api.DocumentSession; +import com.demcha.compose.document.backend.fixed.pdf.PdfMeasurementResources; +import com.demcha.compose.document.dsl.PageFlowBuilder; +import com.demcha.compose.document.layout.DocumentGraph; +import com.demcha.compose.document.layout.DocumentLayoutPassContext; +import com.demcha.compose.document.layout.LayoutCanvas; +import com.demcha.compose.document.layout.LayoutCompiler; +import com.demcha.compose.document.layout.LayoutGraph; +import com.demcha.compose.document.layout.NodeRegistry; +import com.demcha.compose.document.node.DocumentNode; +import com.demcha.compose.document.style.DocumentColor; +import com.demcha.compose.document.style.DocumentTextDecoration; +import com.demcha.compose.document.style.DocumentTextStyle; + +import java.awt.Color; +import java.lang.management.ManagementFactory; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; + +/** + * Deterministic measurement-count and allocation probe for the canonical layout + * pipeline. + * + *

For each scenario this harness authors a document through the public DSL, + * then compiles its node graph through a {@link LayoutCompiler} whose + * {@code TextMeasurementSystem} is wrapped in a + * {@link CountingTextMeasurementSystem}. It reports, deterministically and + * independent of wall-clock / GC-timing noise:

+ * + *
    + *
  • measurement requests — how the layout asks the measurement + * system for widths (proves F1/F2/F3); and
  • + *
  • compile allocation bytes — bytes allocated by the layout + * {@code compile} pass, via + * {@link com.sun.management.ThreadMXBean#getCurrentThreadAllocatedBytes()}. + * Unlike the {@code peakHeapMb} sampled by {@code CurrentSpeedBenchmark} + * (a GC-timing-dependent used-heap delta), allocated-bytes is the + * deterministic memory signal for the allocation findings (F7 style/inset + * churn, F8 box recomputation, fragment re-copy, per-cell table lists).
  • + *
+ * + *

The allocation window wraps only {@code compile(...)}; font loading and DSL + * authoring happen outside it, so the number reflects layout allocation — the + * thing the optimizations move. Needs no {@code src/main} changes.

+ */ +public final class MeasurementCountBenchmark { + + private static final DateTimeFormatter TIMESTAMP_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private static final com.sun.management.ThreadMXBean THREAD_MX = + (com.sun.management.ThreadMXBean) ManagementFactory.getThreadMXBean(); + + private static final DocumentTextStyle BODY_STYLE = DocumentTextStyle.builder() + .size(9.5) + .decoration(DocumentTextDecoration.DEFAULT) + .color(DocumentColor.of(new Color(58, 69, 84))) + .build(); + + private static final String LONG_PARAGRAPH = + ("GraphCompose lays out structured business documents efficiently across many pages " + + "while keeping header and footer placement stable. ").repeat(120); + + private static final String LONG_TOKEN_PARAGRAPH = + "Prefix text before an unbreakable token " + "x".repeat(600) + + " and several trailing words that must still wrap onto the following lines here."; + + public static void main(String[] args) throws Exception { + BenchmarkSupport.configureQuietLogging(); + new MeasurementCountBenchmark().run(); + } + + private void run() throws Exception { + enableAllocationMeasurement(); + + System.out.println("GraphCompose Measurement-Count + Allocation Probe"); + System.out.println("Timestamp: " + LocalDateTime.now().format(TIMESTAMP_FORMAT)); + System.out.println("Thread allocation measurement: " + (allocationSupported() ? "enabled" : "UNAVAILABLE (Alloc KB = n/a)")); + System.out.println(); + + List results = new ArrayList<>(); + results.add(measureScenario("long-text", flow -> + flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE)))); + results.add(measureScenario("long-token", flow -> + flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE)))); + results.add(measureScenario("large-table", MeasurementCountBenchmark::authorLargeTable)); + + System.out.printf("%-14s | %11s | %9s | %9s | %11s | %8s | %11s | %10s | %6s%n", + "Scenario", "WidthReqs", "Distinct", "Repeat %", "Sum chars", "Max arg", "LineMetrics", "Alloc KB", "Pages"); + System.out.println("-".repeat(108)); + for (Result result : results) { + CountingTextMeasurementSystem.Counts c = result.counts(); + System.out.printf("%-14s | %11d | %9d | %8.1f%% | %11d | %8d | %11d | %10s | %6d%n", + result.scenario(), + c.widthRequests(), + c.distinctWidthRequests(), + c.repeatRatePct(), + c.summedRequestChars(), + c.maxRequestChars(), + c.lineMetricsCalls(), + formatAllocKb(result.compileAllocBytes()), + result.pages()); + } + + writeReport(results); + } + + private Result measureScenario(String scenario, Consumer author) throws Exception { + try (DocumentSession session = GraphCompose.document() + .pageSize(DocumentPageSize.A4) + .margin(24, 24, 24, 24) + .create()) { + session.pageFlow(author); + List roots = session.roots(); + LayoutCanvas canvas = session.canvas(); + NodeRegistry registry = session.registry(); + + try (PdfMeasurementResources resources = PdfMeasurementResources.open(List.of())) { + CountingTextMeasurementSystem counter = + new CountingTextMeasurementSystem(resources.textMeasurementSystem()); + DocumentLayoutPassContext context = new DocumentLayoutPassContext( + registry, canvas, resources.fontLibrary(), counter, false); + LayoutCompiler compiler = new LayoutCompiler(registry); + DocumentGraph graph = new DocumentGraph(roots); + + // Measure allocation around the layout compile only — font + // loading and authoring are already done, so this is the + // layout pass's own allocation footprint. + long allocBefore = currentThreadAllocatedBytes(); + LayoutGraph layout = compiler.compile(graph, context, context); + long allocBytes = allocBefore < 0 ? -1 : currentThreadAllocatedBytes() - allocBefore; + + return new Result(scenario, counter.snapshot(), layout.totalPages(), layout.fragments().size(), allocBytes); + } + } + } + + private static void authorLargeTable(PageFlowBuilder flow) { + flow.addTable(table -> { + table.autoColumns(6).header("Item", "Qty", "Unit", "Price", "Tax", "Total"); + for (int row = 1; row <= 200; row++) { + table.row("Line item " + row, "3", "ea", "12.50", "1.25", "38.75"); + } + }); + } + + private static void enableAllocationMeasurement() { + try { + if (THREAD_MX.isThreadAllocatedMemorySupported() && !THREAD_MX.isThreadAllocatedMemoryEnabled()) { + THREAD_MX.setThreadAllocatedMemoryEnabled(true); + } + } catch (UnsupportedOperationException ignored) { + // Allocation measurement unsupported on this JVM; Alloc KB reports n/a. + } + } + + private static boolean allocationSupported() { + try { + return THREAD_MX.isThreadAllocatedMemorySupported() && THREAD_MX.isThreadAllocatedMemoryEnabled(); + } catch (UnsupportedOperationException ex) { + return false; + } + } + + private static long currentThreadAllocatedBytes() { + if (!allocationSupported()) { + return -1; + } + return THREAD_MX.getCurrentThreadAllocatedBytes(); + } + + private static String formatAllocKb(long bytes) { + return bytes < 0 ? "n/a" : "%.1f".formatted(bytes / 1024.0); + } + + private void writeReport(List results) throws Exception { + CounterReport report = new CounterReport( + LocalDateTime.now().format(TIMESTAMP_FORMAT), + results.stream().map(Result::toScenarioCounts).toList()); + + BenchmarkReportWriter.BenchmarkArtifacts artifacts = BenchmarkReportWriter.prepare("counters"); + var jsonPath = artifacts.writeJson(report); + var csvPath = artifacts.writeCsv( + "counters", + List.of("scenario", "width_requests", "distinct_width_requests", "repeat_rate_pct", + "summed_request_chars", "max_request_chars", "text_width_calls", "measure_calls", + "line_metrics_calls", "compile_alloc_bytes", "pages", "fragments"), + results.stream() + .map(result -> { + CountingTextMeasurementSystem.Counts c = result.counts(); + return List.of( + result.scenario(), + Long.toString(c.widthRequests()), + Long.toString(c.distinctWidthRequests()), + "%.2f".formatted(c.repeatRatePct()), + Long.toString(c.summedRequestChars()), + Long.toString(c.maxRequestChars()), + Long.toString(c.textWidthCalls()), + Long.toString(c.measureCalls()), + Long.toString(c.lineMetricsCalls()), + Long.toString(result.compileAllocBytes()), + Integer.toString(result.pages()), + Integer.toString(result.fragments())); + }) + .toList()); + + System.out.println(); + System.out.println("Saved JSON counter report to " + jsonPath); + System.out.println("Saved CSV counter report to " + csvPath); + } + + private record Result(String scenario, + CountingTextMeasurementSystem.Counts counts, + int pages, + int fragments, + long compileAllocBytes) { + ScenarioCounts toScenarioCounts() { + return new ScenarioCounts( + scenario, + counts.widthRequests(), + counts.distinctWidthRequests(), + counts.repeatRatePct(), + counts.summedRequestChars(), + counts.maxRequestChars(), + counts.textWidthCalls(), + counts.measureCalls(), + counts.lineMetricsCalls(), + counts.lineHeightCalls(), + compileAllocBytes, + pages, + fragments); + } + } + + private record ScenarioCounts(String scenario, + long widthRequests, + long distinctWidthRequests, + double repeatRatePct, + long summedRequestChars, + long maxRequestChars, + long textWidthCalls, + long measureCalls, + long lineMetricsCalls, + long lineHeightCalls, + long compileAllocBytes, + int pages, + int fragments) { + } + + private record CounterReport(String timestamp, List scenarios) { + } +} diff --git a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java new file mode 100644 index 00000000..463f5a80 --- /dev/null +++ b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java @@ -0,0 +1,146 @@ +package com.demcha.compose; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for the pure {@link BenchmarkVerdictTool#evaluate} core. These + * drive synthetic current-speed reports so the verdict classification and the + * hard-gate {@code regressed} flag are validated deterministically, without + * running real benchmarks or invoking {@code System.exit}. + */ +class BenchmarkVerdictToolTest { + + private static final ObjectMapper JSON = new ObjectMapper(); + private static final BenchmarkVerdictTool.Thresholds GATE = + new BenchmarkVerdictTool.Thresholds(10.0, 15.0, true); + + @Test + void flagsAverageLatencyRegressionBeyondBand() throws Exception { + JsonNode baseline = report(scenario("invoice-template", 10.0, 10.0, 30.0, 100.0)); + JsonNode candidate = report(scenario("invoice-template", 12.0, 11.0, 28.0, 100.0)); // +20% avg + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isTrue(); + assertThat(report.overallVerdict()).isEqualTo("REGRESSED"); + assertThat(report.scenarios()).singleElement() + .satisfies(row -> assertThat(row.verdict()).isEqualTo("REGRESSED")); + } + + @Test + void flagsPeakHeapRegressionEvenWhenLatencyIsFlat() throws Exception { + JsonNode baseline = report(scenario("cv-template", 10.0, 10.0, 40.0, 100.0)); + JsonNode candidate = report(scenario("cv-template", 10.3, 10.0, 40.0, 120.0)); // +3% avg, +20% heap + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isTrue(); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("REGRESSED"); + } + + @Test + void marksClearSpeedupAsImproved() throws Exception { + JsonNode baseline = report(scenario("proposal-template", 10.0, 12.0, 28.0, 150.0)); + JsonNode candidate = report(scenario("proposal-template", 8.0, 9.0, 36.0, 150.0)); // -20% avg + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isFalse(); + assertThat(report.overallVerdict()).isEqualTo("IMPROVED"); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("IMPROVED"); + } + + @Test + void treatsWithinBandChangesAsNeutral() throws Exception { + JsonNode baseline = report(scenario("engine-simple", 5.0, 6.0, 170.0, 40.0)); + JsonNode candidate = report(scenario("engine-simple", 5.2, 6.1, 168.0, 43.0)); // +4% avg, +7.5% heap + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isFalse(); + assertThat(report.overallVerdict()).isEqualTo("NEUTRAL"); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("NEUTRAL"); + } + + @Test + void overallIsRegressedWhenAnyScenarioRegresses() throws Exception { + JsonNode baseline = report( + scenario("engine-simple", 5.0, 6.0, 170.0, 40.0), + scenario("invoice-template", 10.0, 11.0, 28.0, 100.0)); + JsonNode candidate = report( + scenario("engine-simple", 5.1, 6.1, 168.0, 41.0), // neutral + scenario("invoice-template", 13.0, 14.0, 22.0, 100.0)); // +30% avg -> regressed + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.regressed()).isTrue(); + assertThat(report.overallVerdict()).isEqualTo("REGRESSED"); + } + + @Test + void reportsMissingScenariosWithoutGating() throws Exception { + JsonNode baseline = report( + scenario("engine-simple", 5.0, 6.0, 170.0, 40.0), + scenario("invoice-template", 10.0, 11.0, 28.0, 100.0)); + JsonNode candidate = report(scenario("engine-simple", 5.1, 6.1, 168.0, 41.0)); // invoice dropped + + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); + + assertThat(report.missingScenarios()).containsExactly("invoice-template"); + assertThat(report.scenarios()).hasSize(1); + assertThat(report.regressed()).isFalse(); + } + + @Test + void regressedFlagReflectsStateIndependentOfGateFlag() throws Exception { + JsonNode baseline = report(scenario("invoice-template", 10.0, 10.0, 30.0, 100.0)); + JsonNode candidate = report(scenario("invoice-template", 12.0, 11.0, 28.0, 100.0)); // +20% avg + + BenchmarkVerdictTool.Thresholds gateOff = new BenchmarkVerdictTool.Thresholds(10.0, 15.0, false); + BenchmarkVerdictTool.VerdictReport report = + BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, gateOff); + + // The state is still "regressed"; only the build-failing decision (exit code) is gated. + assertThat(report.regressed()).isTrue(); + assertThat(report.gateEnabled()).isFalse(); + } + + private static JsonNode report(String... latencyRows) throws Exception { + String latency = String.join(",", latencyRows); + String json = """ + { + "timestamp": "2026-06-08 12:00:00", + "profile": "full", + "latency": [%s], + "throughput": [] + } + """.formatted(latency); + return JSON.readTree(json); + } + + private static String scenario(String name, double avgMs, double p95Ms, double docsPerSec, double peakHeapMb) { + return """ + { + "scenario": "%s", + "description": "%s", + "avgMillis": %s, + "p50Millis": %s, + "p95Millis": %s, + "maxMillis": %s, + "docsPerSecond": %s, + "avgKilobytes": 1.0, + "peakHeapMb": %s + } + """.formatted(name, name, avgMs, avgMs, p95Ms, p95Ms, docsPerSec, peakHeapMb); + } +} diff --git a/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java b/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java new file mode 100644 index 00000000..ebd7397c --- /dev/null +++ b/benchmarks/src/test/java/com/demcha/compose/CountingTextMeasurementSystemTest.java @@ -0,0 +1,81 @@ +package com.demcha.compose; + +import com.demcha.compose.engine.components.content.text.TextStyle; +import com.demcha.compose.engine.components.geometry.ContentSize; +import com.demcha.compose.engine.measurement.TextMeasurementSystem; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for {@link CountingTextMeasurementSystem}. They use a trivial fake + * delegate (no PDFBox) so the counting/forwarding contract is verified + * deterministically and fast. + */ +class CountingTextMeasurementSystemTest { + + private static final TextStyle STYLE = TextStyle.DEFAULT_STYLE; + + @Test + void countsWidthRequestsDistinctKeysAndCharacters() { + CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement()); + + double abWidth = counter.textWidth(STYLE, "ab"); + counter.textWidth(STYLE, "ab"); // repeat -> same key + counter.textWidth(STYLE, "abc"); + counter.measure(STYLE, "ab"); // measure shares the "ab" key + counter.lineMetrics(STYLE); + counter.lineHeight(STYLE); + + CountingTextMeasurementSystem.Counts counts = counter.snapshot(); + + assertThat(abWidth).isEqualTo(2.0); // delegate pass-through (fake width == length) + assertThat(counts.textWidthCalls()).isEqualTo(3); + assertThat(counts.measureCalls()).isEqualTo(1); + assertThat(counts.widthRequests()).isEqualTo(4); + assertThat(counts.distinctWidthRequests()).isEqualTo(2); // "ab", "abc" + assertThat(counts.summedRequestChars()).isEqualTo(9); // 2 + 2 + 3 + 2 + assertThat(counts.maxRequestChars()).isEqualTo(3); + assertThat(counts.repeatRatePct()).isEqualTo(50.0); // 1 - 2/4 + assertThat(counts.lineMetricsCalls()).isEqualTo(1); + assertThat(counts.lineHeightCalls()).isEqualTo(1); + } + + @Test + void emptySnapshotHasNoRequests() { + CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement()); + + CountingTextMeasurementSystem.Counts counts = counter.snapshot(); + + assertThat(counts.widthRequests()).isZero(); + assertThat(counts.distinctWidthRequests()).isZero(); + assertThat(counts.repeatRatePct()).isZero(); + assertThat(counts.summedRequestChars()).isZero(); + } + + @Test + void treatsNullTextAsEmptyWithoutFailing() { + CountingTextMeasurementSystem counter = new CountingTextMeasurementSystem(new FakeMeasurement()); + + counter.textWidth(STYLE, null); + + CountingTextMeasurementSystem.Counts counts = counter.snapshot(); + assertThat(counts.widthRequests()).isEqualTo(1); + assertThat(counts.summedRequestChars()).isZero(); + assertThat(counts.distinctWidthRequests()).isEqualTo(1); + } + + /** Minimal delegate: width == text length, fixed line metrics. */ + private static final class FakeMeasurement implements TextMeasurementSystem { + @Override + public ContentSize measure(TextStyle style, String text) { + int length = text == null ? 0 : text.length(); + return new ContentSize(length, 10.0); + } + + @Override + public LineMetrics lineMetrics(TextStyle style) { + return new LineMetrics(8.0, 2.0, 0.0); + } + } +} diff --git a/scripts/run-benchmarks.ps1 b/scripts/run-benchmarks.ps1 index f816915e..4126ade5 100644 --- a/scripts/run-benchmarks.ps1 +++ b/scripts/run-benchmarks.ps1 @@ -14,11 +14,17 @@ diff gracefully when no compatible historical pair exists yet. Use `-Repeat` to generate repeated current-speed/comparative runs and median aggregates for more stable local comparisons. + +Step 11 (`11-verdict-current-speed`) compares the current-speed result against +the committed baseline (`baselines/current-speed-.json`) and fails the +run when a canonical scenario regresses beyond the noise band. Use `-SkipVerdict` +to skip that gate while exploring. See `docs/operations/perf-change-workflow.md`. #> param( [switch]$IncludeEndurance, [switch]$OpenResults, [switch]$SkipDiff, + [switch]$SkipVerdict, [ValidateSet("full", "smoke")] [string]$CurrentSpeedProfile = "full", [ValidateRange(1, 10)] @@ -448,6 +454,31 @@ try { } Add-SummaryLine(("- Benchmarks folder: ``{0}``" -f (Join-Path $repoRoot "target\benchmarks"))) + if (-not $SkipVerdict) { + $verdictBaseline = Join-Path $repoRoot ("baselines\current-speed-{0}.json" -f $CurrentSpeedProfile) + if ($Repeat -gt 1) { + $verdictCandidate = Get-IfExists (Join-Path $repoRoot ("target\benchmarks\{0}\latest.json" -f $currentSpeedAggregateSuite)) + } else { + $verdictCandidate = $currentSpeedLatest + } + + if (-not (Test-Path $verdictBaseline)) { + Add-SummaryLine("- ``11-verdict-current-speed``: skipped") + Add-SummaryLine((" - Reason: no committed baseline at ``{0}`` (see docs/operations/perf-change-workflow.md)" -f $verdictBaseline)) + } elseif (-not $verdictCandidate) { + Add-SummaryLine("- ``11-verdict-current-speed``: skipped") + Add-SummaryLine(" - Reason: no candidate current-speed report was produced this run") + } else { + # Hard gate: BenchmarkVerdictTool exits non-zero on a regression + # beyond the noise band, which makes Invoke-LoggedCommand throw and + # fail the whole benchmark run. + Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -Arguments @($verdictBaseline, $verdictCandidate) + } + } else { + Add-SummaryLine("- ``11-verdict-current-speed``: skipped") + Add-SummaryLine(" - Reason: ``-SkipVerdict`` was provided") + } + Write-Section "Benchmark run completed" Write-Host "Summary: $summaryPath" -ForegroundColor Green Write-Host "Benchmarks: $(Join-Path $repoRoot 'target\benchmarks')" -ForegroundColor Green From d68bd96403f68ffeaa9042beea2cdff38b643bcb Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 12:24:23 +0100 Subject: [PATCH 2/7] perf(layout): wrapParagraph running-width, stop re-measuring growing line prefix The greedy line wrapper measured textWidth(currentLine + nextToken) on every token, re-measuring the whole accumulated line - O(line-length x tokens) measured characters plus the per-glyph sanitize/encode it triggers. Keep a running line width and measure each token once instead; line starts re-measure to pin FP drift. Glyph advances are additive (no kerning) and EPS=1e-6 absorbs FP, so break points are unchanged - rendering is byte-identical (1144 tests + all layout/visual snapshots pass). Probe: long-text measured characters 291,324 -> 32,457 (~9x fewer); same-session A/B (full, Repeat 7): proposal -57% time / +131% throughput. No API or behaviour change. Refs audit finding F1. --- CHANGELOG.md | 24 +++++++++++++++++++ .../document/layout/TextFlowSupport.java | 22 +++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bab5d636..5ee69a8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,30 @@ follow semantic versioning; release dates are ISO 8601. Open cycle — bug-fix / housekeeping. Entries land here as they merge. +### Performance + +- **Text wrapping stops re-measuring the growing line prefix.** The greedy line + wrapper in `TextFlowSupport` now keeps a running line width and measures each + token once, instead of re-measuring the whole accumulated line on every token. + This removes O(line-length × tokens) measured-character work — and the + per-glyph sanitize/encode it triggered — from paragraph layout. **Output is + byte-identical: all layout and visual-regression snapshots pass unchanged.** + The effect is workload-dependent and concentrated in long-text documents; + measured locally (same-session A/B, full profile) a long multi-page proposal + rendered markedly faster, and a measurement-count probe showed ~9× fewer + measured characters on a long paragraph. No public API or behaviour change. + +### Tests / tooling + +- **Benchmark regression gate and measurement probe (benchmarks module, not part + of the published library).** `BenchmarkVerdictTool` compares a current-speed run + to the committed baseline (`baselines/current-speed-full.json`) and reports + improved / neutral / regressed, failing on a regression beyond the noise band. + `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture + deterministic measurement-call counts and per-compile allocation bytes for + proving algorithmic / allocation changes. `scripts/run-benchmarks.ps1` gains the + `11-verdict-current-speed` step (skippable via `-SkipVerdict`). + ## v1.7.0 — 2026-06-07 Canonical DSL primitives — additive only, zero breaking changes. Adding public diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java index 01349737..158a451b 100644 --- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java +++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java @@ -820,6 +820,15 @@ private static List wrapParagraph(List logicalLines, List tokens = tokenize(logicalLine); String currentPrefix = initialPrefix; String currentLine = initialPrefix; + // Running width of currentLine. The greedy fit only needs the width + // of the line built so far plus the next token, not a fresh + // measurement of the whole growing prefix on every token (which made + // wrapping O(chars per line x tokens) measured characters). PDFBox + // glyph advances are additive here (no kerning), so accumulating + // per-token widths matches measuring the full string to well within + // the EPS the fit test already tolerates; each new line re-measures + // its (short) start to pin any floating-point drift. + double currentWidth = measurement.textWidth(style, currentLine); boolean hasContent = false; for (String token : tokens) { @@ -828,9 +837,10 @@ private static List wrapParagraph(List logicalLines, continue; } - String candidate = currentLine + nextToken; - if (!hasContent || measurement.textWidth(style, candidate) <= maxWidth + EPS) { - currentLine = candidate; + double nextTokenWidth = measurement.textWidth(style, nextToken); + if (!hasContent || currentWidth + nextTokenWidth <= maxWidth + EPS) { + currentLine = currentLine + nextToken; + currentWidth += nextTokenWidth; hasContent = true; continue; } @@ -838,12 +848,15 @@ private static List wrapParagraph(List logicalLines, result.add(trimTrailingSpaces(currentLine)); currentPrefix = continuationPrefix; currentLine = continuationPrefix; + currentWidth = measurement.textWidth(style, continuationPrefix); hasContent = false; double availableWidth = availableWidthForPrefix(maxWidth, currentPrefix, style, measurement); String strippedToken = nextToken.stripLeading(); - if (measurement.textWidth(style, currentPrefix + strippedToken) <= maxWidth + EPS) { + double strippedTokenWidth = measurement.textWidth(style, strippedToken); + if (currentWidth + strippedTokenWidth <= maxWidth + EPS) { currentLine = currentPrefix + strippedToken; + currentWidth += strippedTokenWidth; hasContent = true; continue; } @@ -858,6 +871,7 @@ private static List wrapParagraph(List logicalLines, currentPrefix = continuationPrefix; } currentLine = currentPrefix + chunks.get(chunks.size() - 1); + currentWidth = measurement.textWidth(style, currentLine); hasContent = true; } From 2eca80a21b8a69449d5259a0395a7b991bb9c04e Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 12:45:49 +0100 Subject: [PATCH 3/7] fix(bench): gate on avg latency only; peakHeapMb and single runs are advisory peakHeapMb is a Runtime used-heap delta - GC-timing dependent and very noisy (observed 48-170 MB across repeats of identical code), so it false-failed the gate on invoice-template (heap +18.7%) even though that run was -15% faster on time. BenchmarkVerdictTool now hard-gates on average latency only; peakHeapMb is reported as advisory (still shown, never fails the build). The deterministic heap signal stays in MeasurementCountBenchmark (per-compile allocation bytes). run-benchmarks.ps1: step 11 runs the verdict as advisory for single runs (Repeat 1) and hard-gates only for medians (-Repeat >= 2), since one run is too noisy to gate against a median baseline. Unit test + CHANGELOG updated. --- CHANGELOG.md | 6 ++- .../demcha/compose/BenchmarkVerdictTool.java | 45 ++++++++++++++----- .../compose/BenchmarkVerdictToolTest.java | 9 ++-- scripts/run-benchmarks.ps1 | 17 +++++-- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ee69a8d..824ccff1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,11 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. - **Benchmark regression gate and measurement probe (benchmarks module, not part of the published library).** `BenchmarkVerdictTool` compares a current-speed run to the committed baseline (`baselines/current-speed-full.json`) and reports - improved / neutral / regressed, failing on a regression beyond the noise band. + improved / neutral / regressed. The hard gate fails only on an **average-latency** + regression beyond the noise band; peak heap is **advisory** (the `peakHeapMb` + used-heap delta is GC-timing noisy — use the probe's per-compile allocation + bytes for deterministic heap). A single run is advisory; the hard gate needs a + median (`-Repeat` >= 2). `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture deterministic measurement-call counts and per-compile allocation bytes for proving algorithmic / allocation changes. `scripts/run-benchmarks.ps1` gains the diff --git a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java index 0817baf1..b231265f 100644 --- a/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java +++ b/benchmarks/src/main/java/com/demcha/compose/BenchmarkVerdictTool.java @@ -22,10 +22,15 @@ * described in {@code docs/operations/perf-change-workflow.md}. Unlike * {@link BenchmarkDiffTool}, which only prints signed deltas between two * arbitrary runs, this tool classifies each delta against a noise band and - * fails the build (non-zero exit) when any scenario regresses beyond the band - * on a gate metric (average latency or peak heap). It is meant to be - * pointed at a stable, committed baseline (see {@code baselines/}) rather than - * at the previous ephemeral run under {@code target/}.

+ * fails the build (non-zero exit) when a scenario regresses beyond the band on + * the gate metric (average latency). Peak heap is reported as an + * advisory only: the {@code peakHeapMb} field is a used-heap delta + * sampled via {@code Runtime}, which is GC-timing dependent and very noisy + * run-to-run, so it must not fail the build. The deterministic heap signal is + * {@code MeasurementCountBenchmark}'s per-compile allocation bytes + * (ThreadMXBean). It is meant to be pointed at a stable, committed baseline (see + * {@code baselines/}) rather than at the previous ephemeral run under + * {@code target/}.

* *

Usage:

*
    @@ -161,9 +166,14 @@ static VerdictReport evaluate(String baselinePath, double candidateHeap = after.path("peakHeapMb").asDouble(); double heapDeltaPct = percentDelta(baselineHeap, candidateHeap); - // Gate metrics: average latency and peak heap (both lower-is-better). + // Hard gate metric: average latency only. peakHeapMb is a used-heap + // delta sampled via Runtime — GC-timing dependent and very noisy + // run-to-run (observed 48..170 MB across repeats of identical code), + // so it is reported as ADVISORY, never gated. The deterministic heap + // signal is MeasurementCountBenchmark's per-compile allocation bytes. + boolean heapAdvisory = heapDeltaPct > thresholds.heapBandPct(); Verdict verdict; - if (avgDeltaPct > thresholds.avgBandPct() || heapDeltaPct > thresholds.heapBandPct()) { + if (avgDeltaPct > thresholds.avgBandPct()) { verdict = Verdict.REGRESSED; anyRegressed = true; } else if (avgDeltaPct < -thresholds.avgBandPct()) { @@ -184,6 +194,7 @@ static VerdictReport evaluate(String baselinePath, baselineHeap, candidateHeap, heapDeltaPct, + heapAdvisory, verdict.name())); } @@ -212,8 +223,10 @@ private static void print(VerdictReport report) { System.out.println("Profile: " + report.profile()); System.out.println("Baseline: " + report.baselinePath() + " (" + report.baselineTimestamp() + ")"); System.out.println("Candidate: " + report.candidatePath() + " (" + report.candidateTimestamp() + ")"); - System.out.println("Bands: avg +/-" + format(report.avgBandPct()) + "%, peakHeap +/-" - + format(report.heapBandPct()) + "% | gate: " + (report.gateEnabled() ? "enabled" : "disabled")); + System.out.println("Gate: avg latency +/-" + format(report.avgBandPct()) + + "% (HARD). peakHeap +/-" + format(report.heapBandPct()) + + "% = ADVISORY only (GC-timing noisy, not gated). gate: " + + (report.gateEnabled() ? "enabled" : "disabled")); System.out.println(); System.out.printf("%-18s | %10s | %10s | %10s | %10s | %-10s%n", "Scenario", "Avg pct", "p95 pct", "Docs/s pct", "Heap pct", "Verdict"); @@ -227,13 +240,22 @@ private static void print(VerdictReport report) { signedPercent(row.peakHeapDeltaPct()), row.verdict()); } + List heapAdvisories = report.scenarios().stream() + .filter(ScenarioVerdict::heapAdvisory) + .map(row -> row.scenario() + " (" + signedPercent(row.peakHeapDeltaPct()) + ")") + .toList(); + if (!heapAdvisories.isEmpty()) { + System.out.println(); + System.out.println("ADVISORY (not gated) - peakHeapMb over band: " + String.join(", ", heapAdvisories) + + ". peakHeapMb is GC-timing noisy; use MeasurementCountBenchmark for the deterministic allocation signal."); + } if (!report.missingScenarios().isEmpty()) { System.out.println(); System.out.println("WARNING: baseline scenarios missing from candidate (not gated): " + String.join(", ", report.missingScenarios())); } System.out.println(); - System.out.println("Overall verdict: " + report.overallVerdict()); + System.out.println("Overall verdict: " + report.overallVerdict() + " (hard gate: average latency)"); } private static void write(VerdictReport report) throws Exception { @@ -243,7 +265,8 @@ private static void write(VerdictReport report) throws Exception { "verdict", List.of("scenario", "baseline_avg_ms", "candidate_avg_ms", "avg_delta_pct", "p95_delta_pct", "docs_per_sec_delta_pct", - "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct", "verdict"), + "baseline_peak_heap_mb", "candidate_peak_heap_mb", "peak_heap_delta_pct", + "heap_advisory", "verdict"), report.scenarios().stream() .map(row -> List.of( row.scenario(), @@ -255,6 +278,7 @@ private static void write(VerdictReport report) throws Exception { format(row.baselinePeakHeapMb()), format(row.candidatePeakHeapMb()), format(row.peakHeapDeltaPct()), + Boolean.toString(row.heapAdvisory()), row.verdict())) .toList()); System.out.println("Saved JSON verdict report to " + jsonPath); @@ -334,6 +358,7 @@ record ScenarioVerdict(String scenario, double baselinePeakHeapMb, double candidatePeakHeapMb, double peakHeapDeltaPct, + boolean heapAdvisory, String verdict) { } diff --git a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java index 463f5a80..75996c54 100644 --- a/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java +++ b/benchmarks/src/test/java/com/demcha/compose/BenchmarkVerdictToolTest.java @@ -33,15 +33,18 @@ void flagsAverageLatencyRegressionBeyondBand() throws Exception { } @Test - void flagsPeakHeapRegressionEvenWhenLatencyIsFlat() throws Exception { + void peakHeapOverBandIsAdvisoryNotGated() throws Exception { JsonNode baseline = report(scenario("cv-template", 10.0, 10.0, 40.0, 100.0)); JsonNode candidate = report(scenario("cv-template", 10.3, 10.0, 40.0, 120.0)); // +3% avg, +20% heap BenchmarkVerdictTool.VerdictReport report = BenchmarkVerdictTool.evaluate("base.json", "cand.json", baseline, candidate, GATE); - assertThat(report.regressed()).isTrue(); - assertThat(report.scenarios().get(0).verdict()).isEqualTo("REGRESSED"); + // Heap over band must NOT fail the gate — peakHeapMb is advisory only + // (GC-timing noisy). The hard gate metric is average latency. + assertThat(report.regressed()).isFalse(); + assertThat(report.scenarios().get(0).verdict()).isEqualTo("NEUTRAL"); + assertThat(report.scenarios().get(0).heapAdvisory()).isTrue(); } @Test diff --git a/scripts/run-benchmarks.ps1 b/scripts/run-benchmarks.ps1 index 4126ade5..dbe162c0 100644 --- a/scripts/run-benchmarks.ps1 +++ b/scripts/run-benchmarks.ps1 @@ -469,10 +469,19 @@ try { Add-SummaryLine("- ``11-verdict-current-speed``: skipped") Add-SummaryLine(" - Reason: no candidate current-speed report was produced this run") } else { - # Hard gate: BenchmarkVerdictTool exits non-zero on a regression - # beyond the noise band, which makes Invoke-LoggedCommand throw and - # fail the whole benchmark run. - Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -Arguments @($verdictBaseline, $verdictCandidate) + # Hard gate only for medians (Repeat >= 2): a single run is too noisy + # to gate against a median baseline, so Repeat 1 runs the verdict as + # advisory (gate disabled) — it prints the table but never fails the + # run. Use -Repeat 5 for the hard gate. The hard gate metric is + # average latency; peakHeapMb is advisory inside the tool. When the + # gate is on, BenchmarkVerdictTool exits non-zero on a regression, + # which makes Invoke-LoggedCommand throw and fail the whole run. + $verdictProperties = @() + if ($Repeat -le 1) { + $verdictProperties += "-Dgraphcompose.benchmark.verdict.gate=false" + Add-SummaryLine("- ``11-verdict-current-speed``: advisory (single run; use -Repeat 5 for the hard gate)") + } + Invoke-JavaMain -Name "11-verdict-current-speed" -Classpath $javaClasspath -MainClass "com.demcha.compose.BenchmarkVerdictTool" -SystemProperties $verdictProperties -Arguments @($verdictBaseline, $verdictCandidate) } } else { Add-SummaryLine("- ``11-verdict-current-speed``: skipped") From b1e24cee4750ec79f52b5f3f8a11365b200dd62c Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 14:26:24 +0100 Subject: [PATCH 4/7] perf(layout): binary-search long-token break in fitCharacters (drop quadratic re-measure) fitCharacters re-measured text.substring(0,index) for every index when breaking a long unbreakable token - O(n) width calls and O(n^2) measured characters. The fit predicate width(prefix) <= maxWidth is monotonic in prefix length, so binary-search the break index instead: it returns the same lastFitting (byte-identical wrapping) in O(log n) width calls. Probe on a 600-char token: width calls 652 -> 97, measured chars 36,317 -> 7,114, alloc ~1.5MB -> ~0.8MB. long-text (F1 path) and tables untouched; 1144 tests pass with no snapshot drift. Refs audit finding F2. --- CHANGELOG.md | 8 ++++++++ .../document/layout/TextFlowSupport.java | 20 ++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 824ccff1..3c51b77a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,14 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. rendered markedly faster, and a measurement-count probe showed ~9× fewer measured characters on a long paragraph. No public API or behaviour change. +- **Long-token line breaking is no longer quadratic.** `TextFlowSupport.fitCharacters` + now binary-searches the break point instead of re-measuring every growing prefix + one character at a time. For an unbreakable run (long URL/ID, no-space CJK, or a + very narrow column) this cuts measurement calls and measured characters by + ~80–85% (probe: 652 → 97 width calls, 36k → 7k measured chars on a 600-char + token). **Output is byte-identical** — the fit predicate is monotonic, so the + search returns the same break index. No public API or behaviour change. + ### Tests / tooling - **Benchmark regression gate and measurement probe (benchmarks module, not part diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java index 158a451b..b8d17260 100644 --- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java +++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java @@ -1517,13 +1517,23 @@ private static int fitCharacters(String text, TextStyle style, double maxWidth, TextMeasurementSystem measurement) { + // Largest prefix length whose width fits. The fit predicate + // width(substring(0,n)) <= maxWidth is monotonic in n (each added char + // contributes a non-negative glyph advance), so the fitting lengths form + // a prefix [1..lastFitting] and a binary search finds the SAME boundary + // as the old linear scan — but in O(log n) width calls instead of + // measuring every growing prefix (which was O(n) calls and O(n^2) + // measured characters for a long unbreakable token). int lastFitting = 0; - for (int index = 1; index <= text.length(); index++) { - String candidate = text.substring(0, index); - if (measurement.textWidth(style, candidate) <= maxWidth + EPS) { - lastFitting = index; + int low = 1; + int high = text.length(); + while (low <= high) { + int mid = (low + high) >>> 1; + if (measurement.textWidth(style, text.substring(0, mid)) <= maxWidth + EPS) { + lastFitting = mid; + low = mid + 1; } else { - break; + high = mid - 1; } } return lastFitting == 0 ? Math.min(1, text.length()) : lastFitting; From ea15d714030c79e62ba371c4904e910afcceaaec Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 14:44:12 +0100 Subject: [PATCH 5/7] bench: add long-token current-speed scenario (worst-case character wrap) 40 paragraphs with ~520-char unbreakable URL/ID tokens that overflow the line and force splitLongToken/fitCharacters. Makes the F2 worst case visible (same-session A/B: -44% avg, 14.47 -> 8.06 ms on this scenario) and guards against re-introducing quadratic long-token wrapping. --- .../demcha/compose/CurrentSpeedBenchmark.java | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java index d96dfc93..2858d64a 100644 --- a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java +++ b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java @@ -112,7 +112,8 @@ private void run() throws Exception { new Scenario("invoice-template", "Compose-first invoice template", this::renderInvoiceTemplateDocument), new Scenario("cv-template", "Compose-first CV template", this::renderCvTemplateDocument), new Scenario("proposal-template", "Long multi-page proposal template", this::renderProposalTemplateDocument), - new Scenario("feature-rich", "QR, barcode, watermark, header/footer, page break", this::renderFeatureRichDocument) + new Scenario("feature-rich", "QR, barcode, watermark, header/footer, page break", this::renderFeatureRichDocument), + new Scenario("long-token", "Long unbreakable tokens (URLs/IDs) forcing character-level wrap", this::renderLongTokenDocument) ); System.out.println("Latency benchmark"); @@ -551,6 +552,31 @@ private byte[] renderProposalTemplateDocument() throws Exception { } } + private byte[] renderLongTokenDocument() throws Exception { + // Worst-case for character-level wrapping: many long unbreakable tokens + // (long URLs/IDs/no-space runs) that overflow the line and force + // splitLongToken -> fitCharacters. Exercises audit finding F2. + try (DocumentSession document = GraphCompose.document() + .pageSize(com.demcha.compose.document.api.DocumentPageSize.A4) + .margin(22, 22, 22, 22) + .create()) { + var root = document.dsl() + .pageFlow() + .name("BenchmarkLongTokenRoot") + .spacing(8); + for (int i = 1; i <= 40; i++) { + final int index = i; + root.addParagraph(paragraph -> paragraph + .name("BenchmarkLongToken" + index) + .text("Reference " + index + ": https://example.com/" + "a".repeat(500) + + " trailing words to wrap normally after the long token.") + .textStyle(BODY_STYLE)); + } + root.build(); + return document.toPdfBytes(); + } + } + private byte[] renderFeatureRichDocument() throws Exception { PdfFixedLayoutBackend backend = PdfFixedLayoutBackend.builder() .metadata(PdfMetadataOptions.builder() From 8435be550fd2b7bee7ab2d0b98fb78fd4f2da271 Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 15:14:39 +0100 Subject: [PATCH 6/7] perf(layout): assemble wrapped lines via StringBuilder (drop per-token string copy) wrapParagraph concatenated Strings token-by-token (currentLine + token), re-copying the whole growing line each token and producing a throwaway String per step. Accumulate in a reused StringBuilder instead; the character sequence is identical so wrapping stays byte-for-byte the same (1144 tests, snapshots clean). Measured effect is small on typical text (~1% less compile allocation on long-text, lines bounded by column width) but it removes a latent O(line-length^2) copy on very wide/unwrapped lines. --- CHANGELOG.md | 9 ++++++ .../document/layout/TextFlowSupport.java | 28 +++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c51b77a..bcc1705c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,15 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. token). **Output is byte-identical** — the fit predicate is monotonic, so the search returns the same break index. No public API or behaviour change. +- **Line assembly avoids quadratic string copying.** `TextFlowSupport.wrapParagraph` + now accumulates each wrapped line in a reused `StringBuilder` instead of + concatenating Strings token-by-token (which re-copied the whole growing line and + produced a throwaway `String` per token). **Output is byte-identical.** The effect + is small on typical text (lines are bounded by column width — a probe showed ~1% + less per-compile allocation on a long-text document), but it removes a latent + O(line-length²) copy on pathologically wide / unwrapped lines. No public API or + behaviour change. + ### Tests / tooling - **Benchmark regression gate and measurement probe (benchmarks module, not part diff --git a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java index b8d17260..0da49991 100644 --- a/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java +++ b/src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java @@ -819,7 +819,14 @@ private static List wrapParagraph(List logicalLines, List tokens = tokenize(logicalLine); String currentPrefix = initialPrefix; - String currentLine = initialPrefix; + // currentLine is assembled in a reused StringBuilder: appending a + // token is amortised O(1), whereas concatenating Strings re-copied + // the whole growing line on every token (O(chars^2) char copies plus + // a fresh throwaway String each step). The character sequence is + // identical to the old `+` assembly, so wrapping stays byte-for-byte + // the same; we only materialise a String via toString() when a line + // is emitted (which the result list needs anyway). + StringBuilder currentLine = new StringBuilder(initialPrefix); // Running width of currentLine. The greedy fit only needs the width // of the line built so far plus the next token, not a fresh // measurement of the whole growing prefix on every token (which made @@ -828,7 +835,7 @@ private static List wrapParagraph(List logicalLines, // per-token widths matches measuring the full string to well within // the EPS the fit test already tolerates; each new line re-measures // its (short) start to pin any floating-point drift. - double currentWidth = measurement.textWidth(style, currentLine); + double currentWidth = measurement.textWidth(style, initialPrefix); boolean hasContent = false; for (String token : tokens) { @@ -839,15 +846,16 @@ private static List wrapParagraph(List logicalLines, double nextTokenWidth = measurement.textWidth(style, nextToken); if (!hasContent || currentWidth + nextTokenWidth <= maxWidth + EPS) { - currentLine = currentLine + nextToken; + currentLine.append(nextToken); currentWidth += nextTokenWidth; hasContent = true; continue; } - result.add(trimTrailingSpaces(currentLine)); + result.add(trimTrailingSpaces(currentLine.toString())); currentPrefix = continuationPrefix; - currentLine = continuationPrefix; + currentLine.setLength(0); + currentLine.append(continuationPrefix); currentWidth = measurement.textWidth(style, continuationPrefix); hasContent = false; @@ -855,7 +863,8 @@ private static List wrapParagraph(List logicalLines, String strippedToken = nextToken.stripLeading(); double strippedTokenWidth = measurement.textWidth(style, strippedToken); if (currentWidth + strippedTokenWidth <= maxWidth + EPS) { - currentLine = currentPrefix + strippedToken; + currentLine.setLength(0); + currentLine.append(currentPrefix).append(strippedToken); currentWidth += strippedTokenWidth; hasContent = true; continue; @@ -870,12 +879,13 @@ private static List wrapParagraph(List logicalLines, result.add(currentPrefix + chunks.get(index)); currentPrefix = continuationPrefix; } - currentLine = currentPrefix + chunks.get(chunks.size() - 1); - currentWidth = measurement.textWidth(style, currentLine); + currentLine.setLength(0); + currentLine.append(currentPrefix).append(chunks.get(chunks.size() - 1)); + currentWidth = measurement.textWidth(style, currentLine.toString()); hasContent = true; } - result.add(trimTrailingSpaces(currentLine)); + result.add(trimTrailingSpaces(currentLine.toString())); } return List.copyOf(result); From ae461ab100e6db7299d5e6403efcc44545166ebf Mon Sep 17 00:00:00 2001 From: DemchaAV Date: Mon, 8 Jun 2026 15:30:02 +0100 Subject: [PATCH 7/7] bench: warm up MeasurementCountBenchmark before the allocation window The probe measured each scenario once, and the first scenario (long-text) in a fresh JVM carried ~36 MB of one-time class-load/JIT/static-init allocation -- a JVM artifact, not a layout cost (verified: cold first compile 36.6 MB vs warm 0.65 MB for the same document; layout alloc scales sub-linearly). Warm up 5 iterations before the measured pass so Alloc KB reflects steady-state per-document allocation; measurement-count columns are exact regardless. Also drops the F1b CHANGELOG perf claim -- a warm A/B shows no measurable steady-state allocation change (719.8 = 719.8 KB), so F1b stays as a byte-identical latent-O(n^2) cleanup, not a perf win. --- CHANGELOG.md | 13 +++------- .../compose/MeasurementCountBenchmark.java | 26 +++++++++++++++---- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcc1705c..0938c80b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,15 +28,6 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. token). **Output is byte-identical** — the fit predicate is monotonic, so the search returns the same break index. No public API or behaviour change. -- **Line assembly avoids quadratic string copying.** `TextFlowSupport.wrapParagraph` - now accumulates each wrapped line in a reused `StringBuilder` instead of - concatenating Strings token-by-token (which re-copied the whole growing line and - produced a throwaway `String` per token). **Output is byte-identical.** The effect - is small on typical text (lines are bounded by column width — a probe showed ~1% - less per-compile allocation on a long-text document), but it removes a latent - O(line-length²) copy on pathologically wide / unwrapped lines. No public API or - behaviour change. - ### Tests / tooling - **Benchmark regression gate and measurement probe (benchmarks module, not part @@ -49,7 +40,9 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge. median (`-Repeat` >= 2). `MeasurementCountBenchmark` + `CountingTextMeasurementSystem` capture deterministic measurement-call counts and per-compile allocation bytes for - proving algorithmic / allocation changes. `scripts/run-benchmarks.ps1` gains the + proving algorithmic / allocation changes (the probe warms up the JVM before its + allocation window, so `Alloc KB` reflects steady state, not one-time + class-load / JIT cold-start). `scripts/run-benchmarks.ps1` gains the `11-verdict-current-speed` step (skippable via `-SkipVerdict`). ## v1.7.0 — 2026-06-07 diff --git a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java index 82e403f9..b4b585d5 100644 --- a/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java +++ b/benchmarks/src/main/java/com/demcha/compose/MeasurementCountBenchmark.java @@ -83,12 +83,28 @@ private void run() throws Exception { System.out.println("Thread allocation measurement: " + (allocationSupported() ? "enabled" : "UNAVAILABLE (Alloc KB = n/a)")); System.out.println(); + Consumer longText = flow -> + flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE)); + Consumer longToken = flow -> + flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE)); + Consumer largeTable = MeasurementCountBenchmark::authorLargeTable; + + // Warm up the JVM (class loading + JIT) BEFORE the allocation window so the + // "Alloc KB" column reflects steady-state per-document layout allocation, not + // one-time cold-start cost. Without this the FIRST scenario measured carried + // ~36 MB of class-load / JIT / static-init allocation — a JVM artifact, not a + // layout cost (verified: cold first compile 36.6 MB vs warm 0.65 MB for the + // same long-text document). The measurement-COUNT columns are exact either way. + for (int warmup = 0; warmup < 5; warmup++) { + measureScenario("warmup", longText); + measureScenario("warmup", longToken); + measureScenario("warmup", largeTable); + } + List results = new ArrayList<>(); - results.add(measureScenario("long-text", flow -> - flow.addParagraph(p -> p.text(LONG_PARAGRAPH).textStyle(BODY_STYLE)))); - results.add(measureScenario("long-token", flow -> - flow.addParagraph(p -> p.text(LONG_TOKEN_PARAGRAPH).textStyle(BODY_STYLE)))); - results.add(measureScenario("large-table", MeasurementCountBenchmark::authorLargeTable)); + results.add(measureScenario("long-text", longText)); + results.add(measureScenario("long-token", longToken)); + results.add(measureScenario("large-table", largeTable)); System.out.printf("%-14s | %11s | %9s | %9s | %11s | %8s | %11s | %10s | %6s%n", "Scenario", "WidthReqs", "Distinct", "Repeat %", "Sum chars", "Max arg", "LineMetrics", "Alloc KB", "Pages");