Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions browse/src/domain-skills.ts
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,20 @@ export async function writeSkill(input: WriteSkillInput): Promise<DomainSkillRow
*
* Auto-promote logic:
* - increment use_count
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 → state:active
* - else stay quarantined with updated counter
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 AND L4 has scored
* the body (classifier_score > 0) → state:active
* - else stay quarantined with updated counter; user must run
* `domain-skill promote-to-global` manually
*
* The classifier_score > 0 gate is load-bearing: handleSave currently writes
* classifier_score=0 with the comment "L4 deferred to load-time / sidebar-agent
* fills this in on first prompt-injection load," but sidebar-agent was ripped
* (CLAUDE.md "Sidebar architecture") and nothing else updates the score, so
* skills authored via the production path never had their body scanned by L4.
* Without this gate, three benign uses promote any quarantined skill — including
* one written under the influence of a poisoned page — into the prompt context
* for every subsequent visit. The gate re-opens automatically the day L4 is
* rewired and writeSkill / recordSkillUse start receiving non-zero scores.
*/
export async function recordSkillUse(host: string, projectSlug: string, classifierFlagged: boolean): Promise<DomainSkillRow | null> {
const normalized = normalizeHost(host);
Expand All @@ -303,7 +315,12 @@ export async function recordSkillUse(host: string, projectSlug: string, classifi
const useCount = current.use_count + 1;
const flagCount = current.flag_count + (classifierFlagged ? 1 : 0);
let state: SkillState = current.state;
if (state === 'quarantined' && useCount >= PROMOTE_THRESHOLD && flagCount === 0) {
if (
state === 'quarantined' &&
useCount >= PROMOTE_THRESHOLD &&
flagCount === 0 &&
current.classifier_score > 0
) {
state = 'active';
}
const updated: DomainSkillRow = {
Expand Down
25 changes: 25 additions & 0 deletions browse/test/domain-skills-storage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,31 @@ describe('domain-skills: state machine (T6)', () => {
})
).rejects.toThrow(/classifier flagged/);
});

// domain-skill-commands.ts:140 (handleSave) writes classifier_score=0 with
// the comment "L4 deferred to load-time" — but sidebar-agent (the deferred
// scanner) was ripped per CLAUDE.md "Sidebar architecture." Without an
// explicit gate, three benign uses promote any quarantined skill, including
// one authored under a poisoned page, into prompt context permanently.
it('does NOT auto-promote when classifier_score is 0 (production handleSave shape)', async () => {
const m = await freshImport();
await m.writeSkill({
host: 'linkedin.com',
body: '# LinkedIn',
projectSlug: 'test-slug',
source: 'agent',
classifierScore: 0, // matches domain-skill-commands.ts:140 production path
});
const after3 = await m.recordSkillUse('linkedin.com', 'test-slug', false);
await m.recordSkillUse('linkedin.com', 'test-slug', false);
const final = await m.recordSkillUse('linkedin.com', 'test-slug', false);
expect(after3?.state).toBe('quarantined');
expect(final?.state).toBe('quarantined');
expect(final?.use_count).toBe(3);
// readSkill returns null for quarantined skills — they don't fire.
const read = await m.readSkill('linkedin.com', 'test-slug');
expect(read).toBeNull();
});
});

describe('domain-skills: scope shadowing (T4)', () => {
Expand Down
Loading