PQCWorld · xMKx · May 26, 2026
diff --git a/src/forker/context-builder.ts b/src/forker/context-builder.ts
@@ -8,6 +8,10 @@ export class ContextBuilder {
   /**
    * Return the system prompt that instructs the LLM to use AskUserQuestion
    * for all meaningful decision points.
+   *
+   * Sized to clear Anthropic's 1024-token prompt-cache minimum on Sonnet so
+   * the provider can cache it across forks. Below that threshold the
+   * cache_control marker is silently ignored by the API.
    */
   static buildSystemPrompt(): string {
     return [
@@ -31,6 +35,90 @@ export class ContextBuilder {
       "If a decision has already been made earlier in the conversation history, do not ask about it again. Use the previously chosen option.",
       "",
       "Focus on meaningful architectural and design decisions. Do not ask about trivial matters like variable naming conventions, code formatting, or import ordering.",
+      "",
+      "## Examples of decisions worth asking about",
+      "",
+      "### Authentication strategy",
+      'Question: "Which authentication strategy should we use for this app?"',
+      "Options:",
+      "- JWT with refresh tokens — stateless, scales horizontally across replicas, harder to revoke an individual session",
+      "- Server-side sessions — easy revocation and audit, requires shared session storage (Redis or database)",
+      "- OAuth2 via identity provider — outsources credential management entirely, adds a vendor dependency and login redirect flow",
+      "",
+      "### Primary data store",
+      'Question: "Which data store should we use for the primary entity?"',
+      "Options:",
+      "- PostgreSQL — relational guarantees, mature ecosystem, JSONB for schema flexibility, strong tooling",
+      "- MongoDB — document-oriented, easier schema evolution at the cost of eventual-consistency tradeoffs",
+      "- SQLite — embedded, zero-ops, but single-writer constraint limits horizontal scaling",
+      "",
+      "### Pagination strategy",
+      'Question: "Which pagination strategy should the list endpoint use?"',
+      "Options:",
+      "- Offset/limit — simple, allows page jumping, but performance degrades sharply on deep pages",
+      "- Cursor-based — stable under concurrent inserts, no deep-page penalty, but no page jumping",
+      "- Keyset — efficient for sorted lists, requires the ORDER BY column to be indexed",
+      "",
+      "### Background job execution",
+      'Question: "How should background work be executed?"',
+      "Options:",
+      "- In-process queue (e.g. BullMQ, Sidekiq) — easy to operate, tightly coupled to the API process lifecycle",
+      "- External worker pool (e.g. Temporal, dedicated workers) — independent scaling, more infrastructure to manage",
+      "- Cron-driven scripts — simplest possible model, but no retries, no observability, no concurrency control",
+      "",
+      "### Caching layer",
+      'Question: "Where should expensive computed values be cached?"',
+      "Options:",
+      "- In-memory per-process (e.g. LRU) — fastest reads, lost on restart, inconsistent across replicas",
+      "- Redis / Memcached — shared across replicas, network hop on every read, additional infra to operate",
+      "- Materialised database view — consistency guarantees from the DB, refresh cost paid on write, no extra service",
+      "",
+      "### API error format",
+      'Question: "How should the API surface errors to clients?"',
+      "Options:",
+      "- RFC 7807 problem+json — standardised, machine-readable, requires client awareness of the spec",
+      "- Custom envelope { code, message, details } — simple, project-specific, no external spec to track",
+      "- HTTP status codes only — leanest, relies entirely on status semantics, loses room for error context",
+      "",
+      "## Examples of things to decide silently (do NOT ask the user)",
+      "",
+      "- Variable naming, function naming, file naming conventions",
+      "- Import ordering, code formatting, whitespace, brace style",
+      "- Type names for internal-only types",
+      "- Test file location (alongside source vs in a tests/ directory), unless the project has no precedent",
+      "- Whether to use let or const for a local variable",
+      "- Comment style for internal code",
+      "- Log message wording",
+      "- Internal helper function organisation",
+      "- The order of properties in an object literal",
+      "",
+      "## How to phrase options",
+      "",
+      "Each option should:",
+      "1. Have a SHORT label (1-4 words) suitable as a tag or filename",
+      "2. Have a 1-2 sentence description that captures the key tradeoff",
+      "3. Be genuinely distinct from the other options — not three flavours of the same approach",
+      "",
+      'Bad example: ["JWT", "JWT with refresh", "JWT (stateless)"] — three near-duplicates',
+      'Good example: ["JWT", "Sessions", "OAuth2"] — three genuinely different strategies',
+      "",
+      "## When prior decisions exist",
+      "",
+      'If the user has already answered a similar question earlier in the conversation history (or it appears in the "Prior decisions:" header of the user message), do NOT re-ask. Apply the previously chosen answer and continue implementing.',
+      "",
+      'If a candidate decision is downstream of a prior decision (e.g. "which JWT library" only matters if "JWT" was chosen), only surface it when your implementation actually reaches that decision point — not preemptively.',
+      "",
+      "## Anti-patterns to avoid when asking questions",
+      "",
+      'Do not ask the user to choose between options that are not genuinely different. "REST vs RESTful API" and "Postgres vs PostgreSQL" are not real choices — they are the same thing under different names. Resolve such ambiguities silently.',
+      "",
+      'Do not ask the user to choose between options where one is strictly worse than another for the stated requirements. If "plaintext password storage" is one of your options for an authentication question, you have not done the work of filtering options down to reasonable candidates.',
+      "",
+      'Do not chain multiple decisions into a single question. "Should we use JWT with RS256 stored in HttpOnly cookies, or sessions with Redis, or OAuth2 via Google?" is three decisions glued together. Ask them in sequence so each can be answered independently.',
+      "",
+      'Do not ask the user to make a decision that the codebase has already made implicitly. If the project uses Postgres throughout, do not ask "which database" when adding a new entity. Inherit the existing choice.',
+      "",
+      'Do not ask trivia questions disguised as decisions. "Should the function return a Promise or use async/await?" is not a decision — these are equivalent representations of the same thing.',
     ].join("\n");
   }
 

diff --git a/src/providers/anthropic-api.ts b/src/providers/anthropic-api.ts
@@ -40,7 +40,17 @@ export class AnthropicApiProvider implements ExecutionProvider {
       model,
       max_tokens: MAX_TOKENS,
       stream: true,
-      system: opts.systemPrompt ?? "",
+      // System prompt is sent as a structured block with cache_control so the
+      // API caches it across forks. The marker is silently ignored if the
+      // block falls below the per-model cacheable minimum (1024 tok on Sonnet,
+      // 4096 on Haiku). buildSystemPrompt() is sized to clear Sonnet's bar.
+      system: [
+        {
+          type: "text",
+          text: opts.systemPrompt ?? "",
+          cache_control: { type: "ephemeral" },
+        },
+      ],
       messages: [{ role: "user", content: opts.prompt }],
     };
 

diff --git a/test/unit/forker/context-builder.test.ts b/test/unit/forker/context-builder.test.ts
@@ -24,6 +24,18 @@ describe("ContextBuilder", () => {
 
       expect(prompt).toContain("2-4");
     });
+
+    it("is large enough to be cacheable by the Anthropic API", () => {
+      // Anthropic's prompt cache requires the cacheable block to be at least
+      // 1024 tokens on Sonnet (4096 on Haiku). cl100k averages ~4.5 char/tok
+      // for English; 5000 chars gives a comfortable buffer above 1024 tokens
+      // accounting for tokenizer drift between cl100k and the real Anthropic
+      // tokenizer. If this assertion starts failing, prompt edits have made
+      // the system prompt too short to cache and the Anthropic provider will
+      // pay full input price on every fork.
+      const prompt = ContextBuilder.buildSystemPrompt();
+      expect(prompt.length).toBeGreaterThanOrEqual(5000);
+    });
   });
 
   describe("buildAnswerPrompt()", () => {