From e481c23ff58612264f22cb8bee75da5b98aff26d Mon Sep 17 00:00:00 2001 From: HeavyGee <133152184+heavygee@users.noreply.github.com> Date: Sun, 21 Jun 2026 06:21:11 +0100 Subject: [PATCH] feat(overseer): replay harness v0 + CI gate + one-boss invariant stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 2.75 of the Overseer build sequence (prioritization §6, ADR-001). - Captured-event-stream loader: parses a synthetic snapshot (sessions, events, event_links, baseline inbox items, dispatch envelopes, worker messages) and replays it into a sandbox :memory: Store - never touches the production DB. - Run-once promotion + prioritization entry point invokable against a snapshot (runPromotionPass). - 12 golden scenarios from the §6 table: routine-progress surfaces nothing, same-session collapse with merged source_event_ids, idempotent re-emission, blocked_by fan-in root-cause traversal, approval escalation, stale-item aging + EEMUA-191 KPIs (alarm-flood / stale / priority-distribution), completed+PR review item, completed-noise falls out, CI/worker contradiction surfaced-not-resolved, operator noise demotion. Plus the +1: hub-inferred stale silence is captured-only (locks fix/overseer-inbox-stale-noise), worker self-reported stalled promotes. - One-boss invariant (ADR-001): scans dispatched events, asserts the worker message is operator-attributed with no Overseer metadata or attribution boilerplate. Passes vacuously now (no dispatches); clean + leak fixtures prove the assertion shape activates at Step 4. - Dedicated CI gate (.github/workflows/overseer-replay.yml) path-filtered to Overseer logic / inbox scoring / event taxonomy / worker-emission contract; runs the harness on every matching PR. - Fixtures are synthetic (contracts §7), never real transcripts. Co-authored-by: Cursor --- .github/workflows/overseer-replay.yml | 50 ++ hub/src/overseer/oneBossInvariant.test.ts | 35 ++ hub/src/overseer/oneBossInvariant.ts | 164 +++++++ hub/src/overseer/replayHarness.test.ts | 182 +++++++ hub/src/overseer/replayHarness.ts | 454 ++++++++++++++++++ test/fixtures/overseer-replay/README.md | 17 + .../overseer-replay/aging-and-stale.json | 21 + .../fixtures/overseer-replay/alarm-flood.json | 21 + .../overseer-replay/approval-escalation.json | 10 + .../overseer-replay/blocked-by-fanin.json | 21 + .../overseer-replay/ci-contradiction.json | 11 + .../overseer-replay/completed-noise.json | 10 + .../overseer-replay/completed-review-pr.json | 10 + .../idempotent-reemission.json | 11 + .../overseer-replay/one-boss-clean.json | 16 + .../overseer-replay/one-boss-leak.json | 16 + .../operator-noise-demotion.json | 10 + .../routine-progress-flood.json | 41 ++ .../same-session-collapse.json | 11 + .../overseer-replay/stale-captured-only.json | 12 + 20 files changed, 1123 insertions(+) create mode 100644 .github/workflows/overseer-replay.yml create mode 100644 hub/src/overseer/oneBossInvariant.test.ts create mode 100644 hub/src/overseer/oneBossInvariant.ts create mode 100644 hub/src/overseer/replayHarness.test.ts create mode 100644 hub/src/overseer/replayHarness.ts create mode 100644 test/fixtures/overseer-replay/README.md create mode 100644 test/fixtures/overseer-replay/aging-and-stale.json create mode 100644 test/fixtures/overseer-replay/alarm-flood.json create mode 100644 test/fixtures/overseer-replay/approval-escalation.json create mode 100644 test/fixtures/overseer-replay/blocked-by-fanin.json create mode 100644 test/fixtures/overseer-replay/ci-contradiction.json create mode 100644 test/fixtures/overseer-replay/completed-noise.json create mode 100644 test/fixtures/overseer-replay/completed-review-pr.json create mode 100644 test/fixtures/overseer-replay/idempotent-reemission.json create mode 100644 test/fixtures/overseer-replay/one-boss-clean.json create mode 100644 test/fixtures/overseer-replay/one-boss-leak.json create mode 100644 test/fixtures/overseer-replay/operator-noise-demotion.json create mode 100644 test/fixtures/overseer-replay/routine-progress-flood.json create mode 100644 test/fixtures/overseer-replay/same-session-collapse.json create mode 100644 test/fixtures/overseer-replay/stale-captured-only.json diff --git a/.github/workflows/overseer-replay.yml b/.github/workflows/overseer-replay.yml new file mode 100644 index 0000000000..64cf45195d --- /dev/null +++ b/.github/workflows/overseer-replay.yml @@ -0,0 +1,50 @@ +name: Overseer replay gate + +# Dedicated CI gate (build sequence Step 2.75 / prioritization §6): the replay +# harness + one-boss invariant (ADR-001) run on every PR that touches Overseer +# logic, inbox scoring, the event taxonomy, or the worker-emission contract. +# A failure here blocks merge — it is the single mechanical guard against +# salience regressions and one-boss attribution leaks. + +on: + push: + paths: + - 'hub/src/overseer/**' + - 'hub/src/store/events.ts' + - 'hub/src/store/eventStore.ts' + - 'hub/src/store/inboxItems.ts' + - 'hub/src/store/inboxStore.ts' + - 'hub/src/sync/overseerEventRecorder.ts' + - 'shared/src/overseerEvents.ts' + - 'shared/src/overseerInbox.ts' + - 'test/fixtures/overseer-replay/**' + - '.github/workflows/overseer-replay.yml' + pull_request: + paths: + - 'hub/src/overseer/**' + - 'hub/src/store/events.ts' + - 'hub/src/store/eventStore.ts' + - 'hub/src/store/inboxItems.ts' + - 'hub/src/store/inboxStore.ts' + - 'hub/src/sync/overseerEventRecorder.ts' + - 'shared/src/overseerEvents.ts' + - 'shared/src/overseerInbox.ts' + - 'test/fixtures/overseer-replay/**' + - '.github/workflows/overseer-replay.yml' + +jobs: + overseer-replay: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.14 + - run: bun install + # Whole-repo typecheck (tsc compiles each package as one program) + # lives in test.yml; this gate runs the Overseer test surface so it + # stays green/red on Overseer correctness specifically. + - name: Overseer taxonomy + inbox scoring tests (shared) + run: cd shared && bun test src/overseerEvents.test.ts src/overseerInbox.test.ts + - name: Replay harness + one-boss invariant (hub) + run: cd hub && bun test src/overseer src/store/inboxItems.test.ts src/sync/overseerEventRecorder.test.ts src/sync/overseerEventRecorder.injection.test.ts diff --git a/hub/src/overseer/oneBossInvariant.test.ts b/hub/src/overseer/oneBossInvariant.test.ts new file mode 100644 index 0000000000..2deebece5c --- /dev/null +++ b/hub/src/overseer/oneBossInvariant.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from 'bun:test' +import { join } from 'node:path' +import { loadAndReplay } from './replayHarness' +import { assertOneBossInvariant, checkOneBossInvariant } from './oneBossInvariant' + +const FIXTURE_DIR = join(import.meta.dir, '..', '..', '..', 'test', 'fixtures', 'overseer-replay') +const fixture = (name: string) => join(FIXTURE_DIR, `${name}.json`) + +describe('One-boss invariant (ADR-001)', () => { + it('passes vacuously when the stream has no dispatched events', () => { + const ctx = loadAndReplay(fixture('routine-progress-flood')) + const result = checkOneBossInvariant(ctx) + expect(result.checked).toBe(0) + expect(result.violations).toHaveLength(0) + expect(() => assertOneBossInvariant(ctx)).not.toThrow() + }) + + it('checks a clean operator-attributed dispatch and finds no violation', () => { + const ctx = loadAndReplay(fixture('one-boss-clean')) + const result = checkOneBossInvariant(ctx) + expect(result.checked).toBe(1) + expect(result.violations).toHaveLength(0) + expect(() => assertOneBossInvariant(ctx)).not.toThrow() + }) + + it('catches a leaking dispatch (attribution boilerplate + overseer metadata)', () => { + const ctx = loadAndReplay(fixture('one-boss-leak')) + const result = checkOneBossInvariant(ctx) + expect(result.checked).toBe(1) + const kinds = new Set(result.violations.map((v) => v.kind)) + expect(kinds.has('attribution-phrase')).toBe(true) + expect(kinds.has('metadata-key')).toBe(true) + expect(() => assertOneBossInvariant(ctx)).toThrow(/one-boss/) + }) +}) diff --git a/hub/src/overseer/oneBossInvariant.ts b/hub/src/overseer/oneBossInvariant.ts new file mode 100644 index 0000000000..eb678de31b --- /dev/null +++ b/hub/src/overseer/oneBossInvariant.ts @@ -0,0 +1,164 @@ +/** + * One-boss invariant (ADR-001 §"Invariant test"). + * + * Workers never know about the Overseer: every dispatched instruction arrives + * operator-attributed. This module is the mechanical check that protects the + * decision from drift. + * + * At Step 2.75 there is no dispatch writer, so a replayed snapshot contains no + * `dispatched` events and the invariant passes VACUOUSLY (checked === 0). The + * assertion shape is wired so that when Step 4 lands the dispatch envelope + + * worker-message writer, replaying a Step-4-era snapshot (which carries + * `dispatched` events plus their envelopes/messages) activates real coverage + * with zero changes here. + * + * The check is intent-based, not lexical: it does NOT ban the word "overseer" + * from worker messages (operators legitimately reference the product by name). + * It bans GENERATED attribution boilerplate and envelope-metadata exposure. + */ +import type { Database } from 'bun:sqlite' +import type { ReplayContext, SnapshotDispatchEnvelope, SnapshotWorkerMessage } from './replayHarness' + +/** Curated forbidden-phrase list (ADR-001). Extend per persona archetypes. */ +export const FORBIDDEN_ATTRIBUTION_PATTERNS: RegExp[] = [ + /the\s+overseer\s+(says|suggests|asks|wants)/i, + /your\s+assistant\s+(says|suggests|asks|wants)/i, + /on\s+behalf\s+of\s+(the\s+)?overseer/i, + /(message|dispatch|request)\s+from\s+(the\s+)?overseer/i, + /(chief\s+of\s+staff|fleet\s+manager|fleet\s+coordinator)\s+(says|suggests|wants)/i +] + +/** Exact metadata keys that would leak Overseer provenance to a worker. */ +export const FORBIDDEN_METADATA_KEYS = [ + 'source', + 'origin', + 'dispatched_by', + 'envelope_id', + 'dispatch_envelope_id', + 'rationale', + 'related_event_ids', + 'confirmation_source', + 'idempotency_key' +] as const + +export type OneBossViolation = { + eventId: number + messageId: string | null + kind: 'role' | 'metadata-key' | 'attribution-phrase' | 'missing-message' + detail: string +} + +export type OneBossResult = { + checked: number + violations: OneBossViolation[] +} + +function metadataKeyViolation( + eventId: number, + messageId: string, + metadata: Record +): OneBossViolation | null { + for (const key of Object.keys(metadata)) { + const lower = key.toLowerCase() + if (lower.startsWith('overseer') || lower.startsWith('overseer_')) { + return { eventId, messageId, kind: 'metadata-key', detail: `overseer-prefixed metadata key '${key}'` } + } + if ((FORBIDDEN_METADATA_KEYS as readonly string[]).includes(lower)) { + return { eventId, messageId, kind: 'metadata-key', detail: `forbidden metadata key '${key}'` } + } + } + return null +} + +function attributionViolation( + eventId: number, + messageId: string, + rendered: string +): OneBossViolation | null { + for (const pattern of FORBIDDEN_ATTRIBUTION_PATTERNS) { + if (pattern.test(rendered)) { + return { + eventId, + messageId, + kind: 'attribution-phrase', + detail: `rendered instruction matches forbidden attribution ${pattern}` + } + } + } + return null +} + +type DispatchedEventRow = { id: number; idempotency_key: string | null } + +/** + * Check the one-boss invariant over a replayed snapshot. + * + * Resolves each `dispatched` event to its dispatch envelope (by + * idempotency_key) and the worker-facing message it produced, then asserts the + * message is operator-attributed with no Overseer leakage. Envelopes/messages + * are read from the snapshot today; Step 4 swaps the resolution to the real + * `dispatch_envelopes` + `messages` tables. + */ +export function checkOneBossInvariant(ctx: ReplayContext): OneBossResult { + const db: Database = ctx.db + const dispatched = db.prepare( + "SELECT id, idempotency_key FROM events WHERE event_type = 'dispatched'" + ).all() as DispatchedEventRow[] + + const envelopeByKey = new Map() + for (const env of ctx.snapshot.dispatchEnvelopes ?? []) { + envelopeByKey.set(env.idempotencyKey, env) + } + const messageById = new Map() + for (const msg of ctx.snapshot.workerMessages ?? []) { + messageById.set(msg.id, msg) + } + + const violations: OneBossViolation[] = [] + + for (const event of dispatched) { + const envelope = event.idempotency_key ? envelopeByKey.get(event.idempotency_key) : undefined + const message = envelope ? messageById.get(envelope.messageId) : undefined + + if (!envelope || !message) { + // A dispatched event with no resolvable worker message is itself a + // contract violation once dispatch exists (Step 4); flag it. + violations.push({ + eventId: event.id, + messageId: envelope?.messageId ?? null, + kind: 'missing-message', + detail: 'dispatched event has no resolvable worker message' + }) + continue + } + + if (message.role !== 'user') { + violations.push({ + eventId: event.id, + messageId: message.id, + kind: 'role', + detail: `worker message role is '${message.role}', expected 'user'` + }) + } + + const metaViolation = metadataKeyViolation(event.id, message.id, message.metadata ?? {}) + if (metaViolation) violations.push(metaViolation) + + const attrViolation = attributionViolation(event.id, message.id, message.renderedInstruction) + if (attrViolation) violations.push(attrViolation) + } + + return { checked: dispatched.length, violations } +} + +/** Throwing wrapper for use directly in assertions / CI gate scripts. */ +export function assertOneBossInvariant(ctx: ReplayContext): OneBossResult { + const result = checkOneBossInvariant(ctx) + if (result.violations.length > 0) { + const summary = result.violations + .map((v) => `event #${v.eventId} [${v.kind}]: ${v.detail}`) + .join('; ') + throw new Error(`[one-boss] invariant violated (${result.violations.length}): ${summary}`) + } + return result +} diff --git a/hub/src/overseer/replayHarness.test.ts b/hub/src/overseer/replayHarness.test.ts new file mode 100644 index 0000000000..3b1e86434c --- /dev/null +++ b/hub/src/overseer/replayHarness.test.ts @@ -0,0 +1,182 @@ +import { describe, expect, it } from 'bun:test' +import { join } from 'node:path' +import { + computeEffectivePriority, + countStaleItems, + detectAlarmFlood, + detectContradictions, + findRootCauseEventId, + loadAndReplay, + parseSnapshot, + priorityDistribution, + runPromotionPass +} from './replayHarness' + +const FIXTURE_DIR = join(import.meta.dir, '..', '..', '..', 'test', 'fixtures', 'overseer-replay') + +function fixture(name: string): string { + return join(FIXTURE_DIR, `${name}.json`) +} + +/** Fixed reference epoch baked into the time-relative fixtures. */ +const NOW = 1_700_000_000_000 + +describe('Overseer replay harness — golden scenarios (prioritization §6)', () => { + it('loader replays into a sandbox :memory: DB without a production path', () => { + const ctx = loadAndReplay(fixture('routine-progress-flood')) + expect((ctx.store as unknown as { _dbPath: string })._dbPath).toBe(':memory:') + expect(ctx.store.events.count()).toBe(30) + }) + + it('1: 30 routine progress events surface nothing', () => { + const ctx = loadAndReplay(fixture('routine-progress-flood')) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(0) + expect(ctx.store.events.count()).toBe(30) + }) + + it('2: alarm flood — 11 candidates in a 10-min window is detected', () => { + const ctx = loadAndReplay(fixture('alarm-flood')) + const flood = detectAlarmFlood(ctx.store.events.list({ limit: 200 })) + expect(flood.flood).toBe(true) + expect(flood.peakCount).toBe(11) + // routine flood has zero candidates -> no flood + const calm = loadAndReplay(fixture('routine-progress-flood')) + expect(detectAlarmFlood(calm.store.events.list({ limit: 200 })).flood).toBe(false) + }) + + it('3: same-session attention events collapse into one item with merged source_event_ids', () => { + const ctx = loadAndReplay(fixture('same-session-collapse')) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(1) + const merged = inbox[0]!.sourceEventIds.slice().sort((a, b) => a - b) + expect(merged).toEqual([ctx.eventIdBySid.get(1)!, ctx.eventIdBySid.get(2)!].sort((a, b) => a - b)) + expect(inbox[0]!.category).toBe('REVIEW') + }) + + it('4: idempotent re-emission is stored once and produces one item', () => { + const ctx = loadAndReplay(fixture('idempotent-reemission')) + expect(ctx.store.events.count()).toBe(1) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(1) + expect(inbox[0]!.category).toBe('QUESTION') + }) + + it('5: blocked_by fan-in — root-cause traversal returns the upstream, not the symptoms', () => { + const ctx = loadAndReplay(fixture('blocked-by-fanin')) + const upstream = ctx.eventIdBySid.get(100)! + for (const symptomSid of [1, 2, 3]) { + const symptom = ctx.eventIdBySid.get(symptomSid)! + expect(findRootCauseEventId(ctx.db, symptom)).toBe(upstream) + } + // substrate is present even though v0 promotion still surfaces per-session + const inbox = runPromotionPass(ctx) + expect(inbox.length).toBeGreaterThanOrEqual(3) + }) + + it('6: approval_requested escalates to the highest coarse priority tier', () => { + const ctx = loadAndReplay(fixture('approval-escalation')) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(1) + expect(inbox[0]!.category).toBe('APPROVAL') + expect(inbox[0]!.basePriority).toBe(10) + }) + + it('7: stale-item aging — a 24h+ item is detected and out-prioritizes a fresh higher tier', () => { + const ctx = loadAndReplay(fixture('aging-and-stale')) + const items = ctx.store.inbox.list({ activeOnly: true, limit: 200 }) + expect(countStaleItems(items, NOW)).toBe(1) + + const oldCompleted = computeEffectivePriority(50, 1699910000000, NOW) + const freshReview = computeEffectivePriority(40, 1699999700000, NOW) + expect(oldCompleted).toBeLessThan(freshReview) + + const dist = priorityDistribution(items) + expect(dist.low).toBeGreaterThan(dist.medium) + expect(dist.low).toBeGreaterThan(dist.high) + }) + + it('8: completed with operator action + PR artifact surfaces with the PR handle as title', () => { + const ctx = loadAndReplay(fixture('completed-review-pr')) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(1) + expect(inbox[0]!.title).toBe('feat(overseer): replay harness v0') + expect(inbox[0]!.category).toBe('FINALE') + }) + + it('9: completed with no action falls out of the queue but stays queryable', () => { + const ctx = loadAndReplay(fixture('completed-noise')) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(0) + expect(ctx.store.events.count()).toBe(1) + expect(ctx.store.events.list({ eventType: 'completed' })).toHaveLength(1) + }) + + it('10 (+1): hub-inferred stale silence is captured-only; worker self-reported stalled promotes', () => { + const ctx = loadAndReplay(fixture('stale-captured-only')) + const inbox = runPromotionPass(ctx) + // both events recorded... + expect(ctx.store.events.count()).toBe(2) + // ...but only the explicit self-report promotes to the inbox + expect(inbox).toHaveLength(1) + const vocalSessionId = ctx.sessionIdByKey.get('vocal')! + expect(inbox[0]!.relatedSessionId).toBe(vocalSessionId) + }) + + it('11: CI/worker contradiction is surfaced, not resolved', () => { + const ctx = loadAndReplay(fixture('ci-contradiction')) + const contradictions = detectContradictions(ctx.db) + expect(contradictions).toHaveLength(1) + expect(contradictions[0]!.failingEventId).toBe(ctx.eventIdBySid.get(1)!) + expect(contradictions[0]!.passingEventId).toBe(ctx.eventIdBySid.get(2)!) + }) + + it('12: operator noise demotion is recorded as a training label and clears the queue', () => { + const ctx = loadAndReplay(fixture('operator-noise-demotion')) + const inbox = runPromotionPass(ctx) + expect(inbox).toHaveLength(1) + const dismissed = ctx.store.inbox.recordOperatorAction(inbox[0]!.id, 'dismiss', 'that was noise') + expect(dismissed!.status).toBe('obsoleted') + expect(ctx.store.inbox.list({ activeOnly: true })).toHaveLength(0) + + const actions = ctx.db.prepare( + 'SELECT action, status_after FROM inbox_operator_actions WHERE inbox_item_id = ?' + ).all(inbox[0]!.id) as Array<{ action: string; status_after: string }> + expect(actions).toEqual([{ action: 'dismiss', status_after: 'obsoleted' }]) + }) +}) + +describe('Overseer replay harness — loader validation', () => { + const base = { + name: 'x', + description: 'x', + sessions: [{ key: 's' }], + events: [ + { sid: 1, ts: 1, sessionKey: 's', sourceKind: 'worker', eventType: 'blocked', attentionCandidate: 1, summary: 'b' } + ] + } + + it('rejects a dangling event link', () => { + const bad = JSON.stringify({ ...base, eventLinks: [{ fromSid: 1, toSid: 999, relationType: 'blocked_by' }] }) + expect(() => parseSnapshot(bad)).toThrow(/unknown toSid/) + }) + + it('rejects an event referencing an unknown session', () => { + const bad = JSON.stringify({ + ...base, + events: [{ sid: 1, ts: 1, sessionKey: 'ghost', sourceKind: 'worker', eventType: 'blocked', attentionCandidate: 1, summary: 'b' }] + }) + expect(() => parseSnapshot(bad)).toThrow(/unknown session/) + }) + + it('rejects a duplicate event sid', () => { + const bad = JSON.stringify({ + ...base, + events: [ + { sid: 1, ts: 1, sessionKey: 's', sourceKind: 'worker', eventType: 'blocked', attentionCandidate: 1, summary: 'b' }, + { sid: 1, ts: 2, sessionKey: 's', sourceKind: 'worker', eventType: 'blocked', attentionCandidate: 1, summary: 'c' } + ] + }) + expect(() => parseSnapshot(bad)).toThrow(/duplicate event sid/) + }) +}) diff --git a/hub/src/overseer/replayHarness.ts b/hub/src/overseer/replayHarness.ts new file mode 100644 index 0000000000..e6721021f5 --- /dev/null +++ b/hub/src/overseer/replayHarness.ts @@ -0,0 +1,454 @@ +/** + * Overseer replay / evaluation harness (Step 2.75). + * + * The harness loads a captured event-stream snapshot from disk, replays it into + * a sandbox (`:memory:`) Store WITHOUT touching the production DB, runs the + * promotion + prioritization logic once, and exposes the analytic helpers the + * golden-scenario assertions need (alarm-flood / stale / priority-distribution + * KPIs from EEMUA 191, blocked_by root-cause traversal, contradiction + * detection, and the §5 effective-priority scoring sketch). + * + * See docs/plans/2026-06-03-overseer-prioritization.md §6 (replay harness) and + * the golden-scenario table, plus docs/plans/2026-06-03-overseer-build-sequence.md + * Step 2.75. Fixtures are synthetic (contracts §7) and live under + * test/fixtures/overseer-replay/. + */ +import { readFileSync } from 'node:fs' +import { Database } from 'bun:sqlite' +import { + buildOverseerSessionIdentity, + mergeEventPayloadWithSession, + type OverseerSessionIdentity +} from '@hapi/protocol' +import { Store } from '../store' +import type { StoredInboxItem } from '../store/inboxStore' +import type { StoredSystemEvent } from '../store/events' + +/** A single captured event in a snapshot. `sid` is a snapshot-local id used for linking. */ +export type SnapshotEvent = { + sid: number + ts: number + sessionKey?: string | null + sourceKind: 'worker' | 'overseer' | 'operator' | 'system' | 'channel' + sourceRef?: string | null + sinkKind?: string | null + sinkRef?: string | null + eventType: string + attentionCandidate: 0 | 1 + operatorActionRequired?: 0 | 1 + riskDetected?: 0 | 1 + summary: string + artifactRefs?: unknown + dedupeKey?: string | null + idempotencyKey?: string | null + expiresAt?: number | null + provenance?: string | null + confidence?: number | null + severity?: number | null + payload?: Record +} + +export type SnapshotSession = { + key: string + tag?: string | null + flavor?: string + name?: string | null + path?: string | null +} + +export type SnapshotEventLink = { + fromSid: number + toSid: number + relationType: string + metadata?: unknown +} + +/** Pre-seeded baseline inbox item (for aging / stale / distribution scenarios). */ +export type SnapshotInboxItem = { + status: string + basePriority: number + category: string + title: string + summary: string + createdAt: number + sessionKey?: string | null + sourceSids?: number[] + attentionClass?: string +} + +/** + * Worker-facing dispatch records for the one-boss invariant (ADR-001). + * Empty in Step 2.75 fixtures (no dispatch writer yet) -> invariant passes + * vacuously. Step 4 populates these from the real dispatch envelope + messages + * tables and the same assertion shape activates automatically. + */ +export type SnapshotDispatchEnvelope = { + idempotencyKey: string + messageId: string + origin?: string + rationale?: string + relatedEventIds?: number[] + confirmationSource?: string +} + +export type SnapshotWorkerMessage = { + id: string + sessionKey?: string | null + role: string + renderedInstruction: string + metadata?: Record +} + +export type ReplaySnapshot = { + name: string + description: string + sessions: SnapshotSession[] + events: SnapshotEvent[] + eventLinks?: SnapshotEventLink[] + inboxItems?: SnapshotInboxItem[] + dispatchEnvelopes?: SnapshotDispatchEnvelope[] + workerMessages?: SnapshotWorkerMessage[] +} + +export type ReplayContext = { + store: Store + db: Database + snapshot: ReplaySnapshot + /** snapshot-local event id (sid) -> real inserted event id. */ + eventIdBySid: Map + /** snapshot session key -> denormalized identity used in event payloads. */ + identityByKey: Map + /** snapshot session key -> real session id. */ + sessionIdByKey: Map +} + +function assert(condition: unknown, message: string): asserts condition { + if (!condition) { + throw new Error(`[overseer-replay] invalid snapshot: ${message}`) + } +} + +/** Parse + structurally validate a snapshot JSON file. Throws on malformed input. */ +export function parseSnapshot(raw: string): ReplaySnapshot { + const parsed = JSON.parse(raw) as Partial + assert(typeof parsed.name === 'string', 'missing name') + assert(typeof parsed.description === 'string', 'missing description') + assert(Array.isArray(parsed.sessions), 'sessions must be an array') + assert(Array.isArray(parsed.events), 'events must be an array') + + const sessionKeys = new Set() + for (const session of parsed.sessions!) { + assert(typeof session.key === 'string' && session.key.length > 0, 'session.key required') + assert(!sessionKeys.has(session.key), `duplicate session key ${session.key}`) + sessionKeys.add(session.key) + } + + const sids = new Set() + for (const event of parsed.events!) { + assert(typeof event.sid === 'number', 'event.sid required') + assert(!sids.has(event.sid), `duplicate event sid ${event.sid}`) + sids.add(event.sid) + assert(typeof event.ts === 'number', `event ${event.sid} missing ts`) + assert(typeof event.eventType === 'string', `event ${event.sid} missing eventType`) + assert( + event.attentionCandidate === 0 || event.attentionCandidate === 1, + `event ${event.sid} attentionCandidate must be 0 or 1` + ) + if (event.sessionKey != null) { + assert(sessionKeys.has(event.sessionKey), `event ${event.sid} references unknown session ${event.sessionKey}`) + } + } + + for (const link of parsed.eventLinks ?? []) { + assert(sids.has(link.fromSid), `link references unknown fromSid ${link.fromSid}`) + assert(sids.has(link.toSid), `link references unknown toSid ${link.toSid}`) + assert(typeof link.relationType === 'string', 'link.relationType required') + } + + return parsed as ReplaySnapshot +} + +export function loadSnapshot(path: string): ReplaySnapshot { + return parseSnapshot(readFileSync(path, 'utf8')) +} + +function serializeArtifactRefs(refs: unknown): string | null { + if (refs == null) return null + if (typeof refs === 'string') return refs + return JSON.stringify(refs) +} + +/** + * Replay a snapshot into a fresh sandbox Store. Never touches the production DB: + * the Store is constructed with `:memory:`. + */ +export function replaySnapshot(snapshot: ReplaySnapshot): ReplayContext { + const store = new Store(':memory:') + const db = (store as unknown as { db: Database }).db + + const identityByKey = new Map() + const sessionIdByKey = new Map() + + for (const session of snapshot.sessions) { + const metadata = { + flavor: session.flavor ?? 'claude', + name: session.name ?? undefined, + path: session.path ?? undefined + } + const created = store.sessions.getOrCreateSession( + session.tag ?? session.key, + metadata, + null, + 'default' + ) + sessionIdByKey.set(session.key, created.id) + identityByKey.set( + session.key, + buildOverseerSessionIdentity({ + id: created.id, + flavor: session.flavor ?? 'claude', + tag: session.tag ?? null, + metadata + }) + ) + } + + // Replay in chronological (stream) order; preserve array order for ties. + const ordered = snapshot.events + .map((event, index) => ({ event, index })) + .sort((a, b) => (a.event.ts - b.event.ts) || (a.index - b.index)) + + const eventIdBySid = new Map() + + for (const { event } of ordered) { + const sessionId = event.sessionKey ? sessionIdByKey.get(event.sessionKey) ?? null : null + const identity = event.sessionKey ? identityByKey.get(event.sessionKey) : undefined + const payloadJson = identity + ? mergeEventPayloadWithSession(event.payload ?? {}, identity) + : event.payload + ? JSON.stringify(event.payload) + : null + + const stored = store.events.insert({ + ts: event.ts, + sourceKind: event.sourceKind, + sourceRef: event.sourceRef ?? null, + sinkKind: event.sinkKind ?? null, + sinkRef: event.sinkRef ?? null, + eventType: event.eventType, + attentionCandidate: event.attentionCandidate, + operatorActionRequired: event.operatorActionRequired ?? 0, + riskDetected: event.riskDetected ?? 0, + summary: event.summary, + payloadJson, + artifactRefs: serializeArtifactRefs(event.artifactRefs), + relatedSessionId: sessionId, + dedupeKey: event.dedupeKey ?? null, + expiresAt: event.expiresAt ?? null, + provenance: event.provenance ?? null, + idempotencyKey: event.idempotencyKey ?? null, + confidence: event.confidence ?? null, + severity: event.severity ?? null + }) + assert(stored, `event ${event.sid} failed to insert`) + eventIdBySid.set(event.sid, stored.id) + } + + for (const link of snapshot.eventLinks ?? []) { + store.events.linkEvents({ + fromEventId: eventIdBySid.get(link.fromSid)!, + toEventId: eventIdBySid.get(link.toSid)!, + relationType: link.relationType, + createdAt: Date.now(), + metadataJson: link.metadata != null ? JSON.stringify(link.metadata) : null + }) + } + + for (const item of snapshot.inboxItems ?? []) { + const sessionId = item.sessionKey ? sessionIdByKey.get(item.sessionKey) ?? null : null + const sourceEventIds = (item.sourceSids ?? []) + .map((sid) => eventIdBySid.get(sid)) + .filter((id): id is number => typeof id === 'number') + db.prepare(` + INSERT INTO inbox_items ( + status, priority, base_priority, source_event_ids, related_inbox_ids, + attention_class, created_at, updated_at, related_session_id, title, category, summary + ) VALUES (?, ?, ?, ?, '[]', ?, ?, ?, ?, ?, ?, ?) + `).run( + item.status, + item.basePriority, + item.basePriority, + JSON.stringify(sourceEventIds), + item.attentionClass ?? 'live', + item.createdAt, + item.createdAt, + sessionId, + item.title, + item.category, + item.summary + ) + } + + return { store, db, snapshot, eventIdBySid, identityByKey, sessionIdByKey } +} + +/** + * Run-once promotion + prioritization entry point. Promotes every replayed + * attention-candidate event into the inbox (mirroring the recorder's + * insert-time promotion) and returns the resulting active inbox queue. + */ +export function runPromotionPass(ctx: ReplayContext): StoredInboxItem[] { + const events = ctx.store.events.list({ limit: 200 }) + // events.list returns newest-first; promote oldest-first to mimic the stream. + const chronological = [...events].sort((a, b) => a.id - b.id) + for (const event of chronological) { + if (event.attentionCandidate === 1) { + ctx.store.inbox.promoteAttentionEvent(event) + } + } + return ctx.store.inbox.list({ activeOnly: true, limit: 200 }) +} + +export function loadAndReplay(path: string): ReplayContext { + return replaySnapshot(loadSnapshot(path)) +} + +// --------------------------------------------------------------------------- +// Analytic helpers — KPIs (EEMUA 191 / ISA-18.2) + §5 scoring sketch. +// --------------------------------------------------------------------------- + +export const ALARM_FLOOD_WINDOW_MS = 10 * 60 * 1000 +export const ALARM_FLOOD_THRESHOLD = 10 +export const STALE_ITEM_THRESHOLD_MS = 24 * 60 * 60 * 1000 + +/** + * §5 effective-priority sketch (v0). Lower number = more urgent. Aging bumps an + * item's urgency over time (classical OS aging / starvation prevention) by + * subtracting an age-proportional amount from the coarse base, floored so a + * routine item can never overtake a genuine APPROVAL/BLOCKED tier. + */ +export function computeEffectivePriority( + basePriority: number, + createdAt: number, + now: number = Date.now(), + agingSlopePerHour = 2, + maxAgingBump = 45 +): number { + const ageHours = Math.max(0, (now - createdAt) / 3_600_000) + const bump = Math.min(maxAgingBump, ageHours * agingSlopePerHour) + return basePriority - bump +} + +export type AlarmFloodResult = { + flood: boolean + peakCount: number + windowMs: number + threshold: number +} + +/** Sliding-window surface-rate / alarm-flood detection over attention candidates. */ +export function detectAlarmFlood( + events: StoredSystemEvent[], + windowMs: number = ALARM_FLOOD_WINDOW_MS, + threshold: number = ALARM_FLOOD_THRESHOLD +): AlarmFloodResult { + const candidates = events + .filter((e) => e.attentionCandidate === 1) + .map((e) => e.ts) + .sort((a, b) => a - b) + + let peak = 0 + let start = 0 + for (let end = 0; end < candidates.length; end += 1) { + while (candidates[end] - candidates[start] >= windowMs) start += 1 + peak = Math.max(peak, end - start + 1) + } + return { flood: peak > threshold, peakCount: peak, windowMs, threshold } +} + +export function countStaleItems( + items: StoredInboxItem[], + now: number = Date.now(), + thresholdMs: number = STALE_ITEM_THRESHOLD_MS +): number { + return items.filter( + (item) => isActiveStatus(item.status) && now - item.createdAt >= thresholdMs + ).length +} + +function isActiveStatus(status: string): boolean { + return status === 'new' || status === 'surfaced' || status === 'deferred' || status === 'snoozed' +} + +export type PriorityBucket = 'high' | 'medium' | 'low' + +export function priorityBucket(basePriority: number): PriorityBucket { + if (basePriority <= 20) return 'high' + if (basePriority <= 45) return 'medium' + return 'low' +} + +export function priorityDistribution(items: StoredInboxItem[]): Record { + const dist: Record = { high: 0, medium: 0, low: 0 } + for (const item of items) dist[priorityBucket(item.basePriority)] += 1 + return dist +} + +type EventLinkRow = { from_event_id: number; to_event_id: number } + +/** + * Walk `relation` edges (default `blocked_by`) from a symptom event up to the + * terminal upstream event — the root cause. Surfacing the root, not the + * symptoms, is the prioritization §6 requirement for the fan-in blocked case. + * Cycle-safe. + */ +export function findRootCauseEventId( + db: Database, + fromEventId: number, + relation = 'blocked_by' +): number { + const stmt = db.prepare( + 'SELECT from_event_id, to_event_id FROM event_links WHERE from_event_id = ? AND relation_type = ?' + ) + const seen = new Set([fromEventId]) + let current = fromEventId + for (;;) { + const next = stmt.get(current, relation) as EventLinkRow | undefined + if (!next || seen.has(next.to_event_id)) break + seen.add(next.to_event_id) + current = next.to_event_id + } + return current +} + +export type Contradiction = { + sessionId: string + failingEventId: number + passingEventId: number + note: string +} + +/** + * Detect sessions that report both failure and success without resolution. + * The Overseer must SURFACE the contradiction, not silently pick a winner + * (prioritization §6). Returns descriptors only — no resolution. + */ +export function detectContradictions(db: Database): Contradiction[] { + const rows = db.prepare(` + SELECT f.related_session_id AS sid, f.id AS failing, c.id AS passing + FROM events f + JOIN events c + ON c.related_session_id = f.related_session_id + WHERE f.related_session_id IS NOT NULL + AND f.event_type IN ('failed', 'blocked') + AND c.event_type = 'completed' + AND c.ts >= f.ts + `).all() as Array<{ sid: string; failing: number; passing: number }> + + return rows.map((row) => ({ + sessionId: row.sid, + failingEventId: row.failing, + passingEventId: row.passing, + note: 'failure and completion reported for same session; surface both, resolve neither' + })) +} diff --git a/test/fixtures/overseer-replay/README.md b/test/fixtures/overseer-replay/README.md new file mode 100644 index 0000000000..19914fca3f --- /dev/null +++ b/test/fixtures/overseer-replay/README.md @@ -0,0 +1,17 @@ +# Overseer replay fixtures + +Synthetic captured-event-stream snapshots for the Step 2.75 replay harness +(`hub/src/overseer/replayHarness.ts`). Each file is a `ReplaySnapshot`: sessions ++ events (+ optional event links, baseline inbox items, dispatch envelopes, +worker messages). + +**These are synthetic and hand-authored. They are NOT production transcripts.** +The Overseer contracts doc §7 (transcript retention) forbids using real operator +transcripts as fixtures; the replay harness only ever runs against invented +streams shaped to exercise a specific golden scenario. + +Golden scenarios are drawn from the prioritization doc §6 table. Time-relative +fixtures (`aging-and-stale.json`) bake a fixed reference epoch into the data; the +test passes that same `now` explicitly so the scenario is deterministic rather +than wall-clock dependent. The reference epoch is `1700000000000` +(2023-11-14T22:13:20Z). diff --git a/test/fixtures/overseer-replay/aging-and-stale.json b/test/fixtures/overseer-replay/aging-and-stale.json new file mode 100644 index 0000000000..6bb0b51d0e --- /dev/null +++ b/test/fixtures/overseer-replay/aging-and-stale.json @@ -0,0 +1,21 @@ +{ + "name": "aging-and-stale", + "description": "Baseline inbox items at a fixed reference epoch (now = 1700000000000). One 25h-old low-tier item plus a fresh review item, and a mostly-low priority distribution. Expected: countStaleItems flags the 25h item; aging makes the old item out-prioritize the fresh higher-tier one; distribution is mostly low.", + "sessions": [ + { "key": "s1", "tag": "p1", "flavor": "claude", "name": "p1" }, + { "key": "s2", "tag": "p2", "flavor": "codex", "name": "p2" } + ], + "events": [], + "inboxItems": [ + { "status": "new", "basePriority": 50, "category": "FINALE", "title": "old completed needs review", "summary": "PR open 25h", "createdAt": 1699910000000, "sessionKey": "s1" }, + { "status": "new", "basePriority": 40, "category": "REVIEW", "title": "fresh review", "summary": "just queued", "createdAt": 1699999700000, "sessionKey": "s2" }, + { "status": "new", "basePriority": 10, "category": "APPROVAL", "title": "approval high", "summary": "approve push", "createdAt": 1699999800000, "sessionKey": "s1" }, + { "status": "new", "basePriority": 30, "category": "QUESTION", "title": "decision medium", "summary": "which option", "createdAt": 1699999800000, "sessionKey": "s2" }, + { "status": "new", "basePriority": 60, "category": "STALE", "title": "low 1", "summary": "low", "createdAt": 1699999800000, "sessionKey": "s1" }, + { "status": "new", "basePriority": 60, "category": "STALE", "title": "low 2", "summary": "low", "createdAt": 1699999800000, "sessionKey": "s2" }, + { "status": "new", "basePriority": 70, "category": "QUESTION", "title": "low 3", "summary": "low", "createdAt": 1699999800000, "sessionKey": "s1" }, + { "status": "new", "basePriority": 70, "category": "QUESTION", "title": "low 4", "summary": "low", "createdAt": 1699999800000, "sessionKey": "s2" }, + { "status": "new", "basePriority": 50, "category": "FINALE", "title": "low 5", "summary": "low", "createdAt": 1699999800000, "sessionKey": "s1" }, + { "status": "new", "basePriority": 50, "category": "FINALE", "title": "low 6", "summary": "low", "createdAt": 1699999800000, "sessionKey": "s2" } + ] +} diff --git a/test/fixtures/overseer-replay/alarm-flood.json b/test/fixtures/overseer-replay/alarm-flood.json new file mode 100644 index 0000000000..e72aa9bec7 --- /dev/null +++ b/test/fixtures/overseer-replay/alarm-flood.json @@ -0,0 +1,21 @@ +{ + "name": "alarm-flood", + "description": "11 attention-candidate events inside a 10-minute window. Expected: detectAlarmFlood reports flood=true (EEMUA 191 >10/10min).", + "sessions": [ + { "key": "a", "tag": "peer-a", "flavor": "claude", "name": "peer-a" }, + { "key": "b", "tag": "peer-b", "flavor": "codex", "name": "peer-b" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "a", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "blocked 1" }, + { "sid": 2, "ts": 1700000030000, "sessionKey": "b", "sourceKind": "worker", "eventType": "needs_decision", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "decision 1" }, + { "sid": 3, "ts": 1700000060000, "sessionKey": "a", "sourceKind": "worker", "eventType": "failed", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "failed 1" }, + { "sid": 4, "ts": 1700000090000, "sessionKey": "b", "sourceKind": "worker", "eventType": "needs_review", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "review 1" }, + { "sid": 5, "ts": 1700000120000, "sessionKey": "a", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "blocked 2" }, + { "sid": 6, "ts": 1700000150000, "sessionKey": "b", "sourceKind": "worker", "eventType": "needs_decision", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "decision 2" }, + { "sid": 7, "ts": 1700000180000, "sessionKey": "a", "sourceKind": "worker", "eventType": "failed", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "failed 2" }, + { "sid": 8, "ts": 1700000210000, "sessionKey": "b", "sourceKind": "worker", "eventType": "needs_review", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "review 2" }, + { "sid": 9, "ts": 1700000240000, "sessionKey": "a", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "blocked 3" }, + { "sid": 10, "ts": 1700000270000, "sessionKey": "b", "sourceKind": "worker", "eventType": "needs_decision", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "decision 3" }, + { "sid": 11, "ts": 1700000300000, "sessionKey": "a", "sourceKind": "worker", "eventType": "failed", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "failed 3" } + ] +} diff --git a/test/fixtures/overseer-replay/approval-escalation.json b/test/fixtures/overseer-replay/approval-escalation.json new file mode 100644 index 0000000000..23c2300d6d --- /dev/null +++ b/test/fixtures/overseer-replay/approval-escalation.json @@ -0,0 +1,10 @@ +{ + "name": "approval-escalation", + "description": "Worker requests approval for a destructive action. Expected: surfaced as APPROVAL category at the highest coarse priority tier (lowest base_priority number).", + "sessions": [ + { "key": "s", "tag": "peer-z", "flavor": "claude", "name": "peer-z" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "system", "eventType": "approval_requested", "attentionCandidate": 1, "operatorActionRequired": 1, "riskDetected": 1, "summary": "Permission requested: Shell rm -rf node_modules", "payload": { "requestId": "req-1", "tool": "Shell" } } + ] +} diff --git a/test/fixtures/overseer-replay/blocked-by-fanin.json b/test/fixtures/overseer-replay/blocked-by-fanin.json new file mode 100644 index 0000000000..04bd301423 --- /dev/null +++ b/test/fixtures/overseer-replay/blocked-by-fanin.json @@ -0,0 +1,21 @@ +{ + "name": "blocked-by-fanin", + "description": "Three worker sessions blocked on one shared upstream dependency, linked via event_links(blocked_by). Expected: root-cause traversal from any symptom returns the upstream event (surface root, not symptoms).", + "sessions": [ + { "key": "infra", "tag": "infra", "flavor": "system", "name": "infra-dep" }, + { "key": "w1", "tag": "peer-1", "flavor": "claude", "name": "peer-1" }, + { "key": "w2", "tag": "peer-2", "flavor": "codex", "name": "peer-2" }, + { "key": "w3", "tag": "peer-3", "flavor": "claude", "name": "peer-3" } + ], + "events": [ + { "sid": 100, "ts": 1700000000000, "sessionKey": "infra", "sourceKind": "system", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "shared staging DB is down" }, + { "sid": 1, "ts": 1700000060000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "cannot run integration tests" }, + { "sid": 2, "ts": 1700000070000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "migration step times out" }, + { "sid": 3, "ts": 1700000080000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "seed script hangs" } + ], + "eventLinks": [ + { "fromSid": 1, "toSid": 100, "relationType": "blocked_by" }, + { "fromSid": 2, "toSid": 100, "relationType": "blocked_by" }, + { "fromSid": 3, "toSid": 100, "relationType": "blocked_by" } + ] +} diff --git a/test/fixtures/overseer-replay/ci-contradiction.json b/test/fixtures/overseer-replay/ci-contradiction.json new file mode 100644 index 0000000000..55485218bb --- /dev/null +++ b/test/fixtures/overseer-replay/ci-contradiction.json @@ -0,0 +1,11 @@ +{ + "name": "ci-contradiction", + "description": "CI reports failure while the worker self-reports a test pass / completion on the same session. Expected: the harness surfaces the contradiction and resolves neither side.", + "sessions": [ + { "key": "s", "tag": "peer-ci", "flavor": "claude", "name": "peer-ci" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "channel", "sourceRef": "github-actions", "eventType": "failed", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "CI: test job failed (3 tests red)" }, + { "sid": 2, "ts": 1700000060000, "sessionKey": "s", "sourceKind": "worker", "eventType": "completed", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "All tests pass locally, done" } + ] +} diff --git a/test/fixtures/overseer-replay/completed-noise.json b/test/fixtures/overseer-replay/completed-noise.json new file mode 100644 index 0000000000..d8858c2203 --- /dev/null +++ b/test/fixtures/overseer-replay/completed-noise.json @@ -0,0 +1,10 @@ +{ + "name": "completed-noise", + "description": "Worker completes with operator_action_required=false and risk_detected=false. Expected: falls out of the attention queue (no inbox item) but the event remains recorded and queryable.", + "sessions": [ + { "key": "s", "tag": "peer-done", "flavor": "codex", "name": "peer-done" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "worker", "eventType": "completed", "attentionCandidate": 0, "operatorActionRequired": 0, "riskDetected": 0, "summary": "Routine task finished, nothing to review" } + ] +} diff --git a/test/fixtures/overseer-replay/completed-review-pr.json b/test/fixtures/overseer-replay/completed-review-pr.json new file mode 100644 index 0000000000..0720a132c1 --- /dev/null +++ b/test/fixtures/overseer-replay/completed-review-pr.json @@ -0,0 +1,10 @@ +{ + "name": "completed-review-pr", + "description": "Worker completes with operator_action_required and a github_pr artifact. Expected: surfaced as a review-needed FINALE item whose title is the PR handle.", + "sessions": [ + { "key": "s", "tag": "peer-pr", "flavor": "claude", "name": "peer-pr" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "worker", "eventType": "completed", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "Implemented replay harness", "artifactRefs": [{ "kind": "github_pr", "title": "feat(overseer): replay harness v0", "url": "https://github.com/heavygee/hapi/pull/999" }], "payload": { "suggested_action": "review and merge PR #999" } } + ] +} diff --git a/test/fixtures/overseer-replay/idempotent-reemission.json b/test/fixtures/overseer-replay/idempotent-reemission.json new file mode 100644 index 0000000000..75a4f37018 --- /dev/null +++ b/test/fixtures/overseer-replay/idempotent-reemission.json @@ -0,0 +1,11 @@ +{ + "name": "idempotent-reemission", + "description": "Same event re-emitted with an identical idempotency_key. Expected: stored once (no double-count), one inbox item.", + "sessions": [ + { "key": "s", "tag": "peer-y", "flavor": "claude", "name": "peer-y" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "worker", "eventType": "needs_decision", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "Which migration strategy?", "idempotencyKey": "session:peer-y:message:m1:notify" }, + { "sid": 2, "ts": 1700000005000, "sessionKey": "s", "sourceKind": "worker", "eventType": "needs_decision", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "Which migration strategy?", "idempotencyKey": "session:peer-y:message:m1:notify" } + ] +} diff --git a/test/fixtures/overseer-replay/one-boss-clean.json b/test/fixtures/overseer-replay/one-boss-clean.json new file mode 100644 index 0000000000..b6f39d28b7 --- /dev/null +++ b/test/fixtures/overseer-replay/one-boss-clean.json @@ -0,0 +1,16 @@ +{ + "name": "one-boss-clean", + "description": "A future-shaped dispatched event whose worker message is correctly operator-attributed (role=user, no Overseer metadata, no attribution boilerplate). Expected: one-boss invariant checks 1 dispatch, finds 0 violations. Proves the assertion shape is wired and will activate at Step 4.", + "sessions": [ + { "key": "s", "tag": "peer-15", "flavor": "claude", "name": "peer-15" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "overseer", "eventType": "dispatched", "attentionCandidate": 0, "summary": "dispatched merge instruction to peer-15", "idempotencyKey": "dispatch:peer-15:abc123" } + ], + "dispatchEnvelopes": [ + { "idempotencyKey": "dispatch:peer-15:abc123", "messageId": "msg-1", "origin": "operator_confirmed", "rationale": "operator approved at breakpoint", "relatedEventIds": [42], "confirmationSource": "voice" } + ], + "workerMessages": [ + { "id": "msg-1", "sessionKey": "s", "role": "user", "renderedInstruction": "Rebase onto main and re-run the integration tests when green." } + ] +} diff --git a/test/fixtures/overseer-replay/one-boss-leak.json b/test/fixtures/overseer-replay/one-boss-leak.json new file mode 100644 index 0000000000..b34d183daf --- /dev/null +++ b/test/fixtures/overseer-replay/one-boss-leak.json @@ -0,0 +1,16 @@ +{ + "name": "one-boss-leak", + "description": "A dispatched event whose worker message LEAKS Overseer provenance: attribution boilerplate in the rendered instruction plus an overseer-origin metadata key. Expected: one-boss invariant catches the violations (proves it is not a no-op).", + "sessions": [ + { "key": "s", "tag": "peer-15", "flavor": "claude", "name": "peer-15" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "overseer", "eventType": "dispatched", "attentionCandidate": 0, "summary": "dispatched merge instruction to peer-15", "idempotencyKey": "dispatch:peer-15:leak1" } + ], + "dispatchEnvelopes": [ + { "idempotencyKey": "dispatch:peer-15:leak1", "messageId": "msg-1", "origin": "operator_confirmed", "rationale": "x", "relatedEventIds": [1], "confirmationSource": "voice" } + ], + "workerMessages": [ + { "id": "msg-1", "sessionKey": "s", "role": "user", "renderedInstruction": "The Overseer suggests you rebase onto main now.", "metadata": { "source": "overseer", "overseer_rationale": "fan-in blocker" } } + ] +} diff --git a/test/fixtures/overseer-replay/operator-noise-demotion.json b/test/fixtures/overseer-replay/operator-noise-demotion.json new file mode 100644 index 0000000000..19799497b4 --- /dev/null +++ b/test/fixtures/overseer-replay/operator-noise-demotion.json @@ -0,0 +1,10 @@ +{ + "name": "operator-noise-demotion", + "description": "An item the operator marks as noise. Expected: a dismiss operator-action moves the item out of the active queue (status obsoleted) and is recorded as a negative training label for future salience tuning.", + "sessions": [ + { "key": "s", "tag": "peer-noise", "flavor": "codex", "name": "peer-noise" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "worker", "eventType": "needs_review", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "linter warning batch (operator considers this noise)" } + ] +} diff --git a/test/fixtures/overseer-replay/routine-progress-flood.json b/test/fixtures/overseer-replay/routine-progress-flood.json new file mode 100644 index 0000000000..5f3559f685 --- /dev/null +++ b/test/fixtures/overseer-replay/routine-progress-flood.json @@ -0,0 +1,41 @@ +{ + "name": "routine-progress-flood", + "description": "30 routine progress events across 3 sessions. Expected: surface nothing (no attention candidates promoted).", + "sessions": [ + { "key": "w1", "tag": "peer-1", "flavor": "claude", "name": "peer-1" }, + { "key": "w2", "tag": "peer-2", "flavor": "codex", "name": "peer-2" }, + { "key": "w3", "tag": "peer-3", "flavor": "cursor", "name": "peer-3" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 2, "ts": 1700000010000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 3, "ts": 1700000020000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 4, "ts": 1700000030000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 5, "ts": 1700000040000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 6, "ts": 1700000050000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 7, "ts": 1700000060000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 8, "ts": 1700000070000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 9, "ts": 1700000080000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 10, "ts": 1700000090000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 11, "ts": 1700000100000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 12, "ts": 1700000110000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 13, "ts": 1700000120000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 14, "ts": 1700000130000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 15, "ts": 1700000140000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 16, "ts": 1700000150000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 17, "ts": 1700000160000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 18, "ts": 1700000170000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 19, "ts": 1700000180000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 20, "ts": 1700000190000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 21, "ts": 1700000200000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 22, "ts": 1700000210000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 23, "ts": 1700000220000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 24, "ts": 1700000230000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 25, "ts": 1700000240000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 26, "ts": 1700000250000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 27, "ts": 1700000260000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" }, + { "sid": 28, "ts": 1700000270000, "sessionKey": "w1", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "ran tests" }, + { "sid": 29, "ts": 1700000280000, "sessionKey": "w2", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "edited file" }, + { "sid": 30, "ts": 1700000290000, "sessionKey": "w3", "sourceKind": "worker", "eventType": "progress", "attentionCandidate": 0, "summary": "read docs" } + ] +} diff --git a/test/fixtures/overseer-replay/same-session-collapse.json b/test/fixtures/overseer-replay/same-session-collapse.json new file mode 100644 index 0000000000..d5a34d3fd5 --- /dev/null +++ b/test/fixtures/overseer-replay/same-session-collapse.json @@ -0,0 +1,11 @@ +{ + "name": "same-session-collapse", + "description": "Two attention events on the same session. Expected: collapse into ONE active inbox item with both source_event_ids merged.", + "sessions": [ + { "key": "s", "tag": "peer-x", "flavor": "codex", "name": "peer-x" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "s", "sourceKind": "worker", "eventType": "blocked", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "CI failed on push" }, + { "sid": 2, "ts": 1700000060000, "sessionKey": "s", "sourceKind": "worker", "eventType": "needs_review", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "PR ready for review" } + ] +} diff --git a/test/fixtures/overseer-replay/stale-captured-only.json b/test/fixtures/overseer-replay/stale-captured-only.json new file mode 100644 index 0000000000..83bc7d095a --- /dev/null +++ b/test/fixtures/overseer-replay/stale-captured-only.json @@ -0,0 +1,12 @@ +{ + "name": "stale-captured-only", + "description": "Hub-inferred stale silence (checkStaleSessions) is captured-only: recorded with attentionCandidate=0, NOT promoted to inbox. A worker that EXPLICITLY self-reports stalled keeps attentionCandidate=1 and DOES promote. Locks the fix/overseer-inbox-stale-noise behavior.", + "sessions": [ + { "key": "idle", "tag": "peer-idle", "flavor": "claude", "name": "peer-idle" }, + { "key": "vocal", "tag": "peer-vocal", "flavor": "codex", "name": "peer-vocal" } + ], + "events": [ + { "sid": 1, "ts": 1700000000000, "sessionKey": "idle", "sourceKind": "system", "eventType": "stale", "attentionCandidate": 0, "operatorActionRequired": 0, "summary": "No agent output for 35 minutes", "provenance": "hub-inferred from session silence threshold" }, + { "sid": 2, "ts": 1700000060000, "sessionKey": "vocal", "sourceKind": "worker", "eventType": "stale", "attentionCandidate": 1, "operatorActionRequired": 1, "summary": "I appear stuck waiting on an API key", "provenance": "AGENT_NOTIFY_SUMMARY" } + ] +}