From d6328a1abbc180e466834849ed0d68875550c011 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sat, 30 May 2026 20:37:19 -0400
Subject: [PATCH 01/12] feat(providers): AWS Bedrock LLM provider with AWS
 credential chain + SSO

Add a `bedrock` provider so agentmemory can use Anthropic models hosted on
AWS Bedrock for compression/summarization/image description. Bedrock uses
SigV4 signing rather than an x-api-key header, so the stock Anthropic SDK
with a base-URL override cannot work; this wraps @anthropic-ai/bedrock-sdk.

- Credentials resolve via the AWS default provider chain (env / IAM role /
  SSO cache selected by AWS_PROFILE); no static keys required. Explicit
  static keys are used only when both AWS_ACCESS_KEY_ID and
  AWS_SECRET_ACCESS_KEY are set (CI escape hatch).
- Detection is gated strictly on AWS_BEDROCK=true and placed first, so it
  never fires for existing OpenAI/Ollama users.
- Default model is Claude Haiku 4.5 (bare on-demand ID); docs cover the
  us./eu.-prefixed cross-region inference-profile forms.
- Opaque Bedrock 4xx errors are wrapped with an actionable hint
  (model access / region / inference profile).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .env.example                  |  22 +++++-
 README.md                     |  16 ++++
 package.json                  |   1 +
 src/config.ts                 |  22 +++++-
 src/providers/bedrock.ts      | 133 ++++++++++++++++++++++++++++++++++
 src/providers/index.ts        |  10 +++
 src/types.ts                  |   2 +-
 test/bedrock-provider.test.ts | 100 +++++++++++++++++++++++++
 8 files changed, 302 insertions(+), 4 deletions(-)
 create mode 100644 src/providers/bedrock.ts
 create mode 100644 test/bedrock-provider.test.ts
diff --git a/.env.example b/.env.example
index 77ca0f3a3..92f65d8f6 100644
--- a/.env.example
+++ b/.env.example
@@ -23,8 +23,9 @@
 # Without a provider key, agentmemory runs in noop mode: observations are
 # indexed via zero-LLM synthetic compression, hybrid search still works,
 # but LLM-backed summarisation / reflection / consolidation are disabled.
-# The detection order is OPENAI_API_KEY → MINIMAX_API_KEY → ANTHROPIC_API_KEY
-# → GEMINI_API_KEY → OPENROUTER_API_KEY → noop.
+# The detection order is AWS_BEDROCK → OPENAI_API_KEY → MINIMAX_API_KEY →
+# ANTHROPIC_API_KEY → GEMINI_API_KEY → OPENROUTER_API_KEY → noop. Bedrock is
+# first but only fires on the explicit AWS_BEDROCK=true opt-in flag.
 
 # OPENAI_API_KEY=sk-...                          # Used for OpenAI-compatible embeddings today. PR #307 will extend this to chat completions (DeepSeek, SiliconFlow, vLLM, LM Studio, Ollama via `/v1`).
 # OPENAI_BASE_URL=https://api.openai.com         # Override for OpenAI-compatible providers
@@ -43,6 +44,23 @@
 # MINIMAX_API_KEY=...
 # MINIMAX_MODEL=MiniMax-M2.7
 
+# AWS Bedrock (Anthropic models on Bedrock). Opt in with AWS_BEDROCK=true; takes
+# precedence over the keys above when set. Credentials come from the standard AWS
+# provider chain — environment creds, IAM roles, or an SSO profile cached under
+# ~/.aws/sso/cache/ (select with AWS_PROFILE). NOTE: agentmemory reads the cached
+# SSO token but cannot perform the login — run `aws sso login --profile <name>`
+# first, and re-run it when the session expires.
+# AWS_BEDROCK=true
+# AWS_REGION=us-east-1                           # Required for Bedrock
+# AWS_PROFILE=my-sso-profile                     # Optional; consumed by the AWS SDK directly
+# AWS_BEDROCK_MODEL=anthropic.claude-haiku-4-5-20251001-v1:0   # Default: Claude Haiku 4.5 (bare on-demand ID)
+#   The bare ID above only works in Regions that offer the model on-demand AND
+#   where model access is enabled in the Bedrock console. In other Regions, use
+#   the geo-prefixed cross-region inference profile, e.g.:
+#   AWS_BEDROCK_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0   (or eu.…)
+# AWS_ACCESS_KEY_ID=...                          # Optional explicit static creds (CI escape hatch); both must be set
+# AWS_SECRET_ACCESS_KEY=...                      #   to take effect, else the provider chain is used
+
 # MAX_TOKENS=4096                                # Cap LLM completion tokens for compression / summarise calls
 
 # Outbound LLM / embedding timeout — shared across every raw-fetch provider
diff --git a/README.md b/README.md
index 862e540fe..e60e66ece 100644
--- a/README.md
+++ b/README.md
@@ -1153,6 +1153,7 @@ agentmemory auto-detects from your environment. By default, no LLM calls are mad
 |----------|--------|-------|
 | **No-op (default)** | No config needed | LLM-backed compress/summarize is DISABLED. Synthetic BM25 compression + recall still work. See `AGENTMEMORY_ALLOW_AGENT_SDK` below if you used to rely on the Claude-subscription fallback. |
 | Anthropic API | `ANTHROPIC_API_KEY` | Per-token billing |
+| AWS Bedrock | `AWS_BEDROCK=true` + `AWS_REGION` | Anthropic models on Bedrock. Opt-in flag, takes precedence when set. Creds from the AWS provider chain — env / IAM role / SSO cache (`AWS_PROFILE`). Default model Claude Haiku 4.5; see [AWS Bedrock](#aws-bedrock) below. |
 | MiniMax | `MINIMAX_API_KEY` | Anthropic-compatible |
 | Gemini | `GEMINI_API_KEY` | Also enables embeddings |
 | OpenRouter | `OPENROUTER_API_KEY` | Any model |
@@ -1160,6 +1161,21 @@ agentmemory auto-detects from your environment. By default, no LLM calls are mad
 | **Local (Ollama / LM Studio / vLLM / llama.cpp)** | `OPENAI_API_KEY=local` + `OPENAI_BASE_URL=http://localhost:11434/v1` (Ollama) or `http://localhost:1234/v1` (LM Studio) + `OPENAI_MODEL=<your model>` | Anything OpenAI-API-compatible. Zero cost, runs on your hardware. See [Local models](#local-models-ollama-lm-studio-vllm) below. |
 | Claude subscription fallback | `AGENTMEMORY_ALLOW_AGENT_SDK=true` | Opt-in only. Spawns `@anthropic-ai/claude-agent-sdk` sessions — used to cause unbounded Stop-hook recursion (#149 follow-up) so it is no longer the default. |
 
+### AWS Bedrock
+
+Run Anthropic models hosted on AWS Bedrock as the LLM provider. Opt in with `AWS_BEDROCK=true`; when set it takes precedence over the other provider keys.
+
+```bash
+AWS_BEDROCK=true
+AWS_REGION=us-east-1
+AWS_PROFILE=my-sso-profile                                  # optional
+AWS_BEDROCK_MODEL=anthropic.claude-haiku-4-5-20251001-v1:0      # optional; this is the default
+```
+
+- **Credentials** come from the standard AWS credential provider chain — environment credentials, IAM roles, or an SSO profile cached under `~/.aws/sso/cache/` (select the profile with `AWS_PROFILE`). No static keys are required. To force static keys (e.g. in CI), set **both** `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
+- **SSO** works out of the box, but agentmemory only *reads* the cached token — it cannot perform the login. Run `aws sso login --profile <name>` first, and again when the session expires. (A future release adds an auth-refresh hook to run this for you.)
+- **Model ID** defaults to Claude Haiku 4.5 (`anthropic.claude-haiku-4-5-20251001-v1:0`) — fast and cost-efficient for background compression. The bare on-demand ID only works in Regions that offer the model on-demand and where model access is enabled in the Bedrock console. In other Regions, set `AWS_BEDROCK_MODEL` to the geo-prefixed cross-region inference profile, e.g. `us.anthropic.claude-haiku-4-5-20251001-v1:0` (or `eu.…`).
+
 ### Local models (Ollama / LM Studio / vLLM)
 
 agentmemory talks to any OpenAI-API-compatible server, so anything that exposes `/v1/chat/completions` works without code changes. No paid keys, no cloud, no rate limits — runs entirely on your hardware.
diff --git a/package.json b/package.json
index db73aba83..705b74bdf 100644
--- a/package.json
+++ b/package.json
@@ -58,6 +58,7 @@
     "url": "https://github.com/rohitg00/agentmemory"
   },
   "dependencies": {
+    "@anthropic-ai/bedrock-sdk": "^0.29.2",
     "@anthropic-ai/claude-agent-sdk": "^0.3.142",
     "@anthropic-ai/sdk": "^0.93.0",
     "@clack/prompts": "^1.2.0",
diff --git a/src/config.ts b/src/config.ts
index 1fe704657..6f008c439 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -49,9 +49,28 @@ function hasRealValue(v: string | undefined): v is string {
   return typeof v === "string" && v.trim().length > 0;
 }
 
-function detectProvider(env: Record<string, string>): ProviderConfig {
+export function detectProvider(env: Record<string, string>): ProviderConfig {
   const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10);
 
+  // AWS Bedrock: explicit opt-in via AWS_BEDROCK=true. Placed first so a machine
+  // with both Ollama and Bedrock configured prefers Bedrock when opted in; the
+  // strict flag gate means it never fires for existing OpenAI/Ollama users.
+  // Credentials come from the AWS provider chain (env / IAM role / SSO cache),
+  // so we do NOT key detection on credential env vars — only the flag + region.
+  if (env["AWS_BEDROCK"] === "true") {
+    if (!hasRealValue(env["AWS_REGION"])) {
+      process.stderr.write(
+        "[agentmemory] AWS_BEDROCK=true but AWS_REGION is unset. " +
+          "Bedrock requires a region — set AWS_REGION in ~/.agentmemory/.env.\n",
+      );
+    }
+    return {
+      provider: "bedrock",
+      model: env["AWS_BEDROCK_MODEL"] || "anthropic.claude-haiku-4-5-20251001-v1:0",
+      maxTokens,
+    };
+  }
+
   // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio
   if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") {
     return {
@@ -191,6 +210,7 @@ export function isDropStaleIndexEnabled(): boolean {
 export function detectLlmProviderKind(): "llm" | "noop" {
   const env = getMergedEnv();
   if (
+    env["AWS_BEDROCK"] === "true" ||
     hasRealValue(env["ANTHROPIC_API_KEY"]) ||
     hasRealValue(env["GEMINI_API_KEY"]) ||
     hasRealValue(env["GOOGLE_API_KEY"]) ||
diff --git a/src/providers/bedrock.ts b/src/providers/bedrock.ts
new file mode 100644
index 000000000..50b199fb2
--- /dev/null
+++ b/src/providers/bedrock.ts
@@ -0,0 +1,133 @@
+import { AnthropicBedrock } from '@anthropic-ai/bedrock-sdk'
+import type { MemoryProvider } from '../types.js'
+import { getEnvVar } from '../config.js'
+
+/**
+ * AWS Bedrock LLM provider (Anthropic models on Bedrock).
+ *
+ * Wraps `@anthropic-ai/bedrock-sdk`, which speaks the same
+ * `messages.create(...)` surface as the first-party Anthropic SDK but
+ * authenticates with AWS SigV4 instead of an `x-api-key` header.
+ *
+ * Credentials: by default NO explicit keys are passed, so the AWS SDK v3
+ * default credential provider chain resolves them — environment creds, IAM
+ * roles, and crucially **SSO profiles** cached under `~/.aws/sso/cache/`
+ * (select with `AWS_PROFILE`). The SDK reads a cached SSO token; it cannot
+ * perform the interactive `aws sso login` itself, so the session must already
+ * be valid. Static keys (`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`) are an
+ * opt-in escape hatch for CI.
+ *
+ * Required env:
+ *   AWS_REGION  — Bedrock region (also consumed by the SDK directly).
+ *
+ * Optional:
+ *   AWS_BEDROCK_MODEL          — model / inference-profile ID (default below).
+ *   AWS_PROFILE            — SSO/credentials profile, consumed by the AWS SDK.
+ *   AWS_ACCESS_KEY_ID      — explicit static key (escape hatch / CI).
+ *   AWS_SECRET_ACCESS_KEY  — explicit static secret (escape hatch / CI).
+ *   AWS_SESSION_TOKEN      — explicit session token for temporary creds.
+ *
+ * Model IDs are Bedrock-style (e.g. `anthropic.claude-haiku-4-5-20251001-v1:0`),
+ * NOT the bare Anthropic model name. In Regions where the model is not offered
+ * on-demand it is reachable only via a cross-region inference profile, whose ID
+ * is geo-prefixed: `us.anthropic.claude-haiku-4-5-20251001-v1:0` (or `eu.`).
+ */
+export class BedrockProvider implements MemoryProvider {
+  name = 'bedrock'
+  private client: AnthropicBedrock
+  private model: string
+  private maxTokens: number
+
+  constructor(model: string, maxTokens: number, awsRegion: string) {
+    const awsAccessKey = getEnvVar('AWS_ACCESS_KEY_ID')
+    const awsSecretKey = getEnvVar('AWS_SECRET_ACCESS_KEY')
+    const awsSessionToken = getEnvVar('AWS_SESSION_TOKEN')
+
+    // Only pass explicit keys when BOTH are present — otherwise omit them so the
+    // AWS credential provider chain (env / IAM role / SSO cache) resolves creds.
+    this.client =
+      awsAccessKey && awsSecretKey
+        ? new AnthropicBedrock({
+            awsRegion,
+            awsAccessKey,
+            awsSecretKey,
+            ...(awsSessionToken ? { awsSessionToken } : {}),
+          })
+        : new AnthropicBedrock({ awsRegion })
+    this.model = model
+    this.maxTokens = maxTokens
+  }
+
+  async compress(systemPrompt: string, userPrompt: string): Promise<string> {
+    return this.call(systemPrompt, userPrompt)
+  }
+
+  async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
+    return this.call(systemPrompt, userPrompt)
+  }
+
+  async describeImage(imageData: string, mimeType: string, prompt: string): Promise<string> {
+    try {
+      const response = await this.client.messages.create({
+        model: this.model,
+        max_tokens: this.maxTokens,
+        messages: [{
+          role: 'user',
+          content: [
+            {
+              type: 'image',
+              source: { type: 'base64', media_type: mimeType as 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp', data: imageData },
+            },
+            { type: 'text', text: prompt },
+          ],
+        }],
+      })
+
+      const textBlock = response.content.find((b) => b.type === 'text')
+      return textBlock?.text ?? ''
+    } catch (err) {
+      throw this.explainError(err)
+    }
+  }
+
+  private async call(systemPrompt: string, userPrompt: string): Promise<string> {
+    try {
+      const response = await this.client.messages.create({
+        model: this.model,
+        max_tokens: this.maxTokens,
+        system: systemPrompt,
+        messages: [{ role: 'user', content: userPrompt }],
+      })
+
+      const textBlock = response.content.find((b) => b.type === 'text')
+      return textBlock?.text ?? ''
+    } catch (err) {
+      throw this.explainError(err)
+    }
+  }
+
+  /**
+   * Turn an opaque Bedrock model-access / validation 4xx into an actionable
+   * error. The bare on-demand model ID only works in Regions that offer the
+   * model on-demand; elsewhere callers must enable model access or switch to a
+   * `us.`/`eu.`-prefixed cross-region inference profile.
+   */
+  private explainError(err: unknown): unknown {
+    const status = (err as { status?: number })?.status
+    const message = err instanceof Error ? err.message : String(err)
+    if (
+      status === 403 ||
+      status === 400 ||
+      /access|not authorized|inference profile|on-demand|ValidationException|AccessDenied/i.test(message)
+    ) {
+      return new Error(
+        `Bedrock model "${this.model}" could not be invoked (${message}). ` +
+          `Check that: (1) model access is enabled for this account in the Bedrock console, ` +
+          `(2) AWS_REGION (${this.client.awsRegion}) offers this model, and ` +
+          `(3) for Regions without on-demand access, AWS_BEDROCK_MODEL is set to the ` +
+          `"us."/"eu."-prefixed cross-region inference profile ID.`,
+      )
+    }
+    return err
+  }
+}
diff --git a/src/providers/index.ts b/src/providers/index.ts
index 5de6807c7..a5265705a 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -5,6 +5,7 @@ import type {
 } from "../types.js";
 import { AgentSDKProvider } from "./agent-sdk.js";
 import { AnthropicProvider } from "./anthropic.js";
+import { BedrockProvider } from "./bedrock.js";
 import { MinimaxProvider } from "./minimax.js";
 import { NoopProvider } from "./noop.js";
 import { OpenAIProvider } from "./openai.js";
@@ -73,6 +74,15 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
         config.maxTokens,
         config.baseURL,
       );
+    case "bedrock":
+      // No requireEnvVar for a key: creds may come from the AWS credential
+      // provider chain (SSO cache / IAM role) with no env var set. A region is
+      // mandatory for Bedrock, though.
+      return new BedrockProvider(
+        config.model,
+        config.maxTokens,
+        requireEnvVar("AWS_REGION"),
+      );
     case "gemini": {
       const geminiKey =
         getEnvVar("GEMINI_API_KEY") || getEnvVar("GOOGLE_API_KEY");
diff --git a/src/types.ts b/src/types.ts
index b734a4d25..9d3df788b 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -147,7 +147,7 @@ export interface ProviderConfig {
   baseURL?: string;
 }
 
-export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "openai" | "noop";
+export type ProviderType = "agent-sdk" | "anthropic" | "bedrock" | "gemini" | "openrouter" | "minimax" | "openai" | "noop";
 
 export interface MemoryProvider {
   name: string;
diff --git a/test/bedrock-provider.test.ts b/test/bedrock-provider.test.ts
new file mode 100644
index 000000000..427da498e
--- /dev/null
+++ b/test/bedrock-provider.test.ts
@@ -0,0 +1,100 @@
+import { describe, expect, it, afterEach, beforeEach } from "vitest";
+import { BedrockProvider } from "../src/providers/bedrock.js";
+import { detectProvider } from "../src/config.js";
+
+// Env keys this suite mutates — saved/restored so tests don't leak into each
+// other or pick up the developer's real ~/.agentmemory/.env values.
+const ENV_KEYS = [
+  "AWS_BEDROCK",
+  "AWS_REGION",
+  "AWS_BEDROCK_MODEL",
+  "AWS_ACCESS_KEY_ID",
+  "AWS_SECRET_ACCESS_KEY",
+  "AWS_SESSION_TOKEN",
+  "OPENAI_API_KEY",
+  "OPENAI_API_KEY_FOR_LLM",
+] as const;
+
+describe("BedrockProvider", () => {
+  const saved: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    for (const k of ENV_KEYS) {
+      saved[k] = process.env[k];
+      delete process.env[k];
+    }
+  });
+
+  afterEach(() => {
+    for (const k of ENV_KEYS) {
+      if (saved[k] === undefined) delete process.env[k];
+      else process.env[k] = saved[k];
+    }
+  });
+
+  it("constructs with only a region (no explicit keys) — relies on the credential chain", () => {
+    expect(
+      () => new BedrockProvider("anthropic.claude-haiku-4-5-20251001-v1:0", 800, "us-east-1"),
+    ).not.toThrow();
+  });
+
+  it("constructs with explicit static keys when present", () => {
+    process.env["AWS_ACCESS_KEY_ID"] = "AKIAEXAMPLE";
+    process.env["AWS_SECRET_ACCESS_KEY"] = "secret";
+    const provider = new BedrockProvider("model-id", 800, "eu-west-1");
+    const client = (provider as unknown as { client: { awsAccessKey: string | null } }).client;
+    expect(client.awsAccessKey).toBe("AKIAEXAMPLE");
+  });
+
+  it("ignores a lone access key (omits both, falls back to the credential chain)", () => {
+    // Only one of the pair set — must NOT pass it through (the SDK deprecates
+    // partial static creds); the provider chain handles it instead.
+    process.env["AWS_ACCESS_KEY_ID"] = "AKIAEXAMPLE";
+    const provider = new BedrockProvider("model-id", 800, "us-east-1");
+    const client = (provider as unknown as { client: { awsAccessKey: string | null } }).client;
+    expect(client.awsAccessKey).toBeNull();
+  });
+
+  it("threads the region through to the client", () => {
+    const provider = new BedrockProvider("model-id", 800, "ap-southeast-2");
+    const client = (provider as unknown as { client: { awsRegion: string } }).client;
+    expect(client.awsRegion).toBe("ap-southeast-2");
+  });
+});
+
+describe("detectProvider — bedrock branch", () => {
+  // Tests the pure detection function with explicit env maps, so they are
+  // independent of the developer's real ~/.agentmemory/.env.
+  it("selects bedrock when AWS_BEDROCK=true and AWS_REGION is set", () => {
+    const config = detectProvider({ AWS_BEDROCK: "true", AWS_REGION: "us-east-1" });
+    expect(config.provider).toBe("bedrock");
+  });
+
+  it("defaults the model to Claude Haiku 4.5 when AWS_BEDROCK_MODEL is unset", () => {
+    const config = detectProvider({ AWS_BEDROCK: "true", AWS_REGION: "us-east-1" });
+    expect(config.model).toBe("anthropic.claude-haiku-4-5-20251001-v1:0");
+  });
+
+  it("honors an explicit AWS_BEDROCK_MODEL (e.g. a us.-prefixed inference profile)", () => {
+    const config = detectProvider({
+      AWS_BEDROCK: "true",
+      AWS_REGION: "us-east-1",
+      AWS_BEDROCK_MODEL: "us.anthropic.claude-haiku-4-5-20251001-v1:0",
+    });
+    expect(config.model).toBe("us.anthropic.claude-haiku-4-5-20251001-v1:0");
+  });
+
+  it("does NOT select bedrock when AWS_BEDROCK is unset, even with an OpenAI key (regression guard)", () => {
+    const config = detectProvider({ OPENAI_API_KEY: "sk-test" });
+    expect(config.provider).toBe("openai");
+  });
+
+  it("does NOT select bedrock when AWS_BEDROCK has any value other than the literal 'true'", () => {
+    const config = detectProvider({
+      AWS_BEDROCK: "1",
+      AWS_REGION: "us-east-1",
+      OPENAI_API_KEY: "sk-test",
+    });
+    expect(config.provider).not.toBe("bedrock");
+  });
+});

From ccb8da1942b5fc68a2913d5101cb3684222baa82 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sat, 30 May 2026 20:49:23 -0400
Subject: [PATCH 02/12] feat(providers): auth-refresh hook for expired
 Bedrock/SSO credentials
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a Bedrock call fails with an expired-token error, run a user-configured
command (e.g. `aws sso login --profile foo`) and retry once. Equivalent in
spirit to Claude Code's awsAuthRefresh setting.

- isAuthExpiry classifies expiry errors via a narrow allow-list so genuine
  failures (validation, throttling, access denials) never trigger a refresh.
- The retry runs inside ResilientProvider.call BEFORE recordFailure(), so a
  recoverable expiry doesn't count toward opening the circuit breaker.
- AuthRefresh runs the command with no shell (execFile on tokenized argv),
  single-flighted, cooldown-limited, and timeout-bounded. Only the literal
  configured string is executed — no untrusted data is interpolated.
- Wired for the bedrock provider only, gated on AWS_AUTH_REFRESH; the
  mechanism itself is generic. Configurable timeout via
  AWS_AUTH_REFRESH_TIMEOUT_MS.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .env.example                  |   9 ++
 README.md                     |   8 +-
 src/providers/auth-refresh.ts | 115 +++++++++++++++++++++++
 src/providers/index.ts        |  30 +++++-
 src/providers/resilient.ts    |  23 ++++-
 test/auth-refresh.test.ts     | 166 ++++++++++++++++++++++++++++++++++
 6 files changed, 345 insertions(+), 6 deletions(-)
 create mode 100644 src/providers/auth-refresh.ts
 create mode 100644 test/auth-refresh.test.ts

diff --git a/.env.example b/.env.example
index 92f65d8f6..34830f54a 100644
--- a/.env.example
+++ b/.env.example
@@ -60,6 +60,15 @@
 #   AWS_BEDROCK_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0   (or eu.…)
 # AWS_ACCESS_KEY_ID=...                          # Optional explicit static creds (CI escape hatch); both must be set
 # AWS_SECRET_ACCESS_KEY=...                      #   to take effect, else the provider chain is used
+# Optional auth-refresh hook: when a Bedrock call fails with an expired-token
+# error, agentmemory runs this command (no shell — argv split on whitespace,
+# quotes honored) and retries once. Use it to re-establish an expired SSO
+# session unattended. SECURITY: only the literal string below is ever executed;
+# no model/memory data is interpolated. Note `aws sso login` is interactive
+# (opens a browser) — in a headless daemon there is no approver, so the command
+# is bounded by AWS_AUTH_REFRESH_TIMEOUT_MS.
+# AWS_AUTH_REFRESH=aws sso login --profile my-sso-profile
+# AWS_AUTH_REFRESH_TIMEOUT_MS=120000             # Default: 120 000 ms (2 min)
 
 # MAX_TOKENS=4096                                # Cap LLM completion tokens for compression / summarise calls
 
diff --git a/README.md b/README.md
index e60e66ece..dfa280f9f 100644
--- a/README.md
+++ b/README.md
@@ -1173,7 +1173,13 @@ AWS_BEDROCK_MODEL=anthropic.claude-haiku-4-5-20251001-v1:0      # optional; this
 ```
 
 - **Credentials** come from the standard AWS credential provider chain — environment credentials, IAM roles, or an SSO profile cached under `~/.aws/sso/cache/` (select the profile with `AWS_PROFILE`). No static keys are required. To force static keys (e.g. in CI), set **both** `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
-- **SSO** works out of the box, but agentmemory only *reads* the cached token — it cannot perform the login. Run `aws sso login --profile <name>` first, and again when the session expires. (A future release adds an auth-refresh hook to run this for you.)
+- **SSO** works out of the box, but agentmemory only *reads* the cached token — it cannot perform the login. Run `aws sso login --profile <name>` first, and again when the session expires. To re-establish an expired session automatically, set the auth-refresh hook below.
+- **Auth-refresh hook** (optional): when a Bedrock call fails with an expired-token error, agentmemory can run a command of your choosing and retry once:
+  ```bash
+  AWS_AUTH_REFRESH=aws sso login --profile my-sso-profile
+  AWS_AUTH_REFRESH_TIMEOUT_MS=120000     # optional, default 2 min
+  ```
+  The command is single-flighted (concurrent calls trigger it once), rate-limited by a short cooldown, and bounded by the timeout. **Security:** only the literal configured string is executed — via `execFile`, no shell, and no model or memory data is ever interpolated into it. Note that `aws sso login` is interactive (opens a browser), so this is best suited to setups where someone can approve the login or where the configured command refreshes credentials non-interactively.
 - **Model ID** defaults to Claude Haiku 4.5 (`anthropic.claude-haiku-4-5-20251001-v1:0`) — fast and cost-efficient for background compression. The bare on-demand ID only works in Regions that offer the model on-demand and where model access is enabled in the Bedrock console. In other Regions, set `AWS_BEDROCK_MODEL` to the geo-prefixed cross-region inference profile, e.g. `us.anthropic.claude-haiku-4-5-20251001-v1:0` (or `eu.…`).
 
 ### Local models (Ollama / LM Studio / vLLM)
diff --git a/src/providers/auth-refresh.ts b/src/providers/auth-refresh.ts
new file mode 100644
index 000000000..c11d746c5
--- /dev/null
+++ b/src/providers/auth-refresh.ts
@@ -0,0 +1,115 @@
+import { execFile } from "node:child_process";
+
+/**
+ * Conservative classifier for "credentials/token expired" errors from Bedrock
+ * or the underlying AWS STS / SSO layer. Kept to a narrow allow-list so that
+ * genuine errors (bad request, throttling, model-access denials) are NOT
+ * mistaken for an expiry and do not trigger a refresh.
+ */
+export function isAuthExpiry(err: unknown): boolean {
+  const name = (err as { name?: string })?.name ?? "";
+  const code = (err as { code?: string })?.code ?? "";
+  const message = err instanceof Error ? err.message : String(err ?? "");
+  const haystack = `${name} ${code} ${message}`;
+  return (
+    // STS / signed-request side: the token literally "expired".
+    /ExpiredToken|ExpiredTokenException|(?:security )?token (?:included in the request )?(?:is |has )?expired|credentials? (?:have )?expired/i.test(
+      haystack,
+    ) ||
+    // SSO-cache side: the cached session token may be reported as expired OR
+    // (after `aws sso logout` / first run) "not found or is invalid" — the word
+    // "expired" never appears. Match an SSO-session phrase paired with any of
+    // those states, bounded so it can't run away across the whole message.
+    /SSO session[\w\s=.,'"-]*?(?:has expired|not found|is invalid|invalid|expired)/i.test(
+      haystack,
+    ) ||
+    // AWS's own remediation hint: when it tells you to re-run `aws sso login`,
+    // the situation is by definition a credential refresh. Strong, version-
+    // stable signal that complements the message-state matching above.
+    /\baws sso login\b/i.test(haystack)
+  );
+}
+
+/**
+ * Parse a configured command string into argv WITHOUT a shell. Supports simple
+ * single/double quoting so `--profile "my profile"` works; intentionally does
+ * NOT support shell features (pipes, expansion, substitution) — the command is
+ * run via execFile, not a shell, which is the trust boundary.
+ */
+export function tokenizeCommand(command: string): string[] {
+  const tokens: string[] = [];
+  const re = /"([^"]*)"|'([^']*)'|(\S+)/g;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(command)) !== null) {
+    tokens.push(m[1] ?? m[2] ?? m[3]);
+  }
+  return tokens;
+}
+
+export interface AuthRefreshOptions {
+  /** Full command string, e.g. `aws sso login --profile my-sso-profile`. */
+  command: string;
+  /** Hard timeout for the spawned command (ms). */
+  timeoutMs?: number;
+  /** Minimum interval between refresh attempts (ms) — prevents login storms. */
+  cooldownMs?: number;
+}
+
+/**
+ * Runs a user-configured credential-refresh command (e.g. `aws sso login`) when
+ * a provider call fails with an expired-token error. Equivalent in spirit to
+ * Claude Code's `awsAuthRefresh` setting.
+ *
+ * Safeguards:
+ *  - Single-flight: concurrent callers share one in-flight run.
+ *  - Cooldown: refuses to re-run within `cooldownMs` of the last attempt.
+ *  - Timeout: the spawned command is killed after `timeoutMs`.
+ *  - No shell: the command is tokenized and executed via execFile, and only the
+ *    configured string is ever run — no untrusted data is interpolated.
+ */
+export class AuthRefresh {
+  private readonly argv: string[];
+  private readonly timeoutMs: number;
+  private readonly cooldownMs: number;
+  private inFlight: Promise<void> | null = null;
+  private lastAttemptAt: number | null = null;
+
+  constructor(opts: AuthRefreshOptions) {
+    this.argv = tokenizeCommand(opts.command);
+    this.timeoutMs = opts.timeoutMs ?? 120_000;
+    this.cooldownMs = opts.cooldownMs ?? 10_000;
+  }
+
+  /**
+   * Run the refresh command. Single-flight + cooldown guarded. Resolves when the
+   * command exits 0; rejects on non-zero exit, timeout, or empty command.
+   */
+  async run(): Promise<void> {
+    if (this.inFlight) return this.inFlight;
+
+    const now = Date.now();
+    if (this.lastAttemptAt !== null && now - this.lastAttemptAt < this.cooldownMs) {
+      throw new Error(
+        `auth refresh skipped: last attempt was ${now - this.lastAttemptAt}ms ago ` +
+          `(cooldown ${this.cooldownMs}ms)`,
+      );
+    }
+    this.lastAttemptAt = now;
+
+    if (this.argv.length === 0) {
+      throw new Error("auth refresh command is empty");
+    }
+
+    const [cmd, ...args] = this.argv;
+    this.inFlight = new Promise<void>((resolve, reject) => {
+      execFile(cmd, args, { timeout: this.timeoutMs }, (err) => {
+        if (err) reject(err);
+        else resolve();
+      });
+    }).finally(() => {
+      this.inFlight = null;
+    });
+
+    return this.inFlight;
+  }
+}
diff --git a/src/providers/index.ts b/src/providers/index.ts
index a5265705a..72a663aa2 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -12,6 +12,7 @@ import { OpenAIProvider } from "./openai.js";
 import { OpenRouterProvider } from "./openrouter.js";
 import { ResilientProvider } from "./resilient.js";
 import { FallbackChainProvider } from "./fallback-chain.js";
+import { AuthRefresh } from "./auth-refresh.js";
 import { getEnvVar } from "../config.js";
 
 export { createEmbeddingProvider, createImageEmbeddingProvider } from "./embedding/index.js";
@@ -26,8 +27,27 @@ function requireEnvVar(key: string): string {
   return value;
 }
 
+/**
+ * Build the optional credential-refresh hook. Only the bedrock provider uses it
+ * today, and only when AWS_AUTH_REFRESH is set; the mechanism itself is generic.
+ */
+function createAuthRefresh(config: ProviderConfig): AuthRefresh | undefined {
+  if (config.provider !== "bedrock") return undefined;
+  const command = getEnvVar("AWS_AUTH_REFRESH");
+  if (!command || !command.trim()) return undefined;
+  const timeoutRaw = getEnvVar("AWS_AUTH_REFRESH_TIMEOUT_MS");
+  const timeoutMs = timeoutRaw ? parseInt(timeoutRaw, 10) : undefined;
+  return new AuthRefresh({
+    command,
+    timeoutMs: Number.isFinite(timeoutMs) ? timeoutMs : undefined,
+  });
+}
+
 export function createProvider(config: ProviderConfig): ResilientProvider {
-  return new ResilientProvider(createBaseProvider(config));
+  return new ResilientProvider(
+    createBaseProvider(config),
+    createAuthRefresh(config),
+  );
 }
 
 export function createFallbackProvider(
@@ -53,10 +73,14 @@ export function createFallbackProvider(
     }
   }
 
+  const authRefresh = createAuthRefresh(config);
   if (providers.length > 1) {
-    return new ResilientProvider(new FallbackChainProvider(providers));
+    return new ResilientProvider(
+      new FallbackChainProvider(providers),
+      authRefresh,
+    );
   }
-  return new ResilientProvider(providers[0]);
+  return new ResilientProvider(providers[0], authRefresh);
 }
 
 function createBaseProvider(config: ProviderConfig): MemoryProvider {
diff --git a/src/providers/resilient.ts b/src/providers/resilient.ts
index 95ece40c9..ea16e5a96 100644
--- a/src/providers/resilient.ts
+++ b/src/providers/resilient.ts
@@ -1,15 +1,22 @@
 import type { MemoryProvider, CircuitBreakerState } from "../types.js";
 import { CircuitBreaker } from "./circuit-breaker.js";
+import { AuthRefresh, isAuthExpiry } from "./auth-refresh.js";
 
 export class ResilientProvider implements MemoryProvider {
   private breaker = new CircuitBreaker();
   name: string;
 
-  constructor(private inner: MemoryProvider) {
+  constructor(
+    private inner: MemoryProvider,
+    private authRefresh?: AuthRefresh,
+  ) {
     this.name = `resilient(${inner.name})`;
   }
 
-  private async call(fn: () => Promise<string>): Promise<string> {
+  private async call(
+    fn: () => Promise<string>,
+    alreadyRetried = false,
+  ): Promise<string> {
     if (!this.breaker.isAllowed) {
       throw new Error("circuit_breaker_open");
     }
@@ -18,6 +25,18 @@ export class ResilientProvider implements MemoryProvider {
       this.breaker.recordSuccess();
       return result;
     } catch (err) {
+      // On an expired-credential error, run the configured refresh command and
+      // retry once — BEFORE recording a breaker failure, so a recoverable
+      // token expiry doesn't count toward opening the circuit.
+      if (!alreadyRetried && this.authRefresh && isAuthExpiry(err)) {
+        try {
+          await this.authRefresh.run();
+          return await this.call(fn, true);
+        } catch {
+          // refresh (or the retry) failed — fall through to record the
+          // original failure and propagate.
+        }
+      }
       this.breaker.recordFailure();
       throw err;
     }
diff --git a/test/auth-refresh.test.ts b/test/auth-refresh.test.ts
new file mode 100644
index 000000000..0de70bdde
--- /dev/null
+++ b/test/auth-refresh.test.ts
@@ -0,0 +1,166 @@
+import { describe, expect, it, vi } from "vitest";
+import {
+  AuthRefresh,
+  isAuthExpiry,
+  tokenizeCommand,
+} from "../src/providers/auth-refresh.js";
+import { ResilientProvider } from "../src/providers/resilient.js";
+import type { MemoryProvider } from "../src/types.js";
+
+describe("isAuthExpiry", () => {
+  it("matches AWS / SSO expiry signals", () => {
+    expect(isAuthExpiry(new Error("ExpiredTokenException: token expired"))).toBe(true);
+    expect(isAuthExpiry(new Error("The SSO session has expired"))).toBe(true);
+    expect(isAuthExpiry(new Error("Token is expired"))).toBe(true);
+    expect(isAuthExpiry({ name: "ExpiredToken", message: "" })).toBe(true);
+    expect(isAuthExpiry(new Error("The security token included in the request is expired"))).toBe(true);
+    // Real message from @aws-sdk after `aws sso logout` — note it says
+    // "not found or is invalid", never "expired", and includes the remediation
+    // hint. Both the SSO-session matcher and the `aws sso login` matcher catch it.
+    expect(
+      isAuthExpiry(
+        new Error(
+          "The SSO session token associated with profile=default was not found or is invalid. " +
+            "To refresh this SSO session run 'aws sso login' with the corresponding profile.",
+        ),
+      ),
+    ).toBe(true);
+    expect(
+      isAuthExpiry(
+        new Error(
+          "The SSO session associated with this profile has expired or is otherwise invalid.",
+        ),
+      ),
+    ).toBe(true);
+  });
+
+  it("does NOT match unrelated errors", () => {
+    expect(isAuthExpiry(new Error("ValidationException: model not found"))).toBe(false);
+    expect(isAuthExpiry(new Error("ThrottlingException"))).toBe(false);
+    expect(isAuthExpiry(new Error("AccessDeniedException: no model access"))).toBe(false);
+    expect(isAuthExpiry(new Error("connection reset"))).toBe(false);
+    expect(isAuthExpiry(undefined)).toBe(false);
+  });
+});
+
+describe("tokenizeCommand", () => {
+  it("splits on whitespace", () => {
+    expect(tokenizeCommand("aws sso login --profile foo")).toEqual([
+      "aws", "sso", "login", "--profile", "foo",
+    ]);
+  });
+
+  it("honors double and single quotes", () => {
+    expect(tokenizeCommand('aws sso login --profile "my profile"')).toEqual([
+      "aws", "sso", "login", "--profile", "my profile",
+    ]);
+    expect(tokenizeCommand("cmd --x 'a b c'")).toEqual(["cmd", "--x", "a b c"]);
+  });
+
+  it("returns empty array for an empty command", () => {
+    expect(tokenizeCommand("   ")).toEqual([]);
+  });
+});
+
+// A controllable fake provider + fake AuthRefresh so no real `aws` is spawned.
+function fakeProvider(fn: () => Promise<string>): MemoryProvider {
+  return {
+    name: "fake",
+    compress: fn,
+    summarize: fn,
+  };
+}
+
+function fakeRefresh(run: () => Promise<void>): AuthRefresh {
+  return { run } as unknown as AuthRefresh;
+}
+
+describe("ResilientProvider — auth-refresh retry", () => {
+  it("refreshes once and retries on an expired-token error, then succeeds", async () => {
+    let calls = 0;
+    const inner = fakeProvider(async () => {
+      calls += 1;
+      if (calls === 1) throw new Error("ExpiredTokenException");
+      return "ok";
+    });
+    const run = vi.fn(async () => {});
+    const provider = new ResilientProvider(inner, fakeRefresh(run));
+
+    const result = await provider.compress("s", "u");
+    expect(result).toBe("ok");
+    expect(calls).toBe(2);
+    expect(run).toHaveBeenCalledTimes(1);
+  });
+
+  it("does NOT refresh on a non-expiry error", async () => {
+    const inner = fakeProvider(async () => {
+      throw new Error("ValidationException");
+    });
+    const run = vi.fn(async () => {});
+    const provider = new ResilientProvider(inner, fakeRefresh(run));
+
+    await expect(provider.compress("s", "u")).rejects.toThrow("ValidationException");
+    expect(run).not.toHaveBeenCalled();
+  });
+
+  it("retries at most once — propagates if the post-refresh call also expires", async () => {
+    let calls = 0;
+    const inner = fakeProvider(async () => {
+      calls += 1;
+      throw new Error("ExpiredTokenException");
+    });
+    const run = vi.fn(async () => {});
+    const provider = new ResilientProvider(inner, fakeRefresh(run));
+
+    await expect(provider.compress("s", "u")).rejects.toThrow("ExpiredTokenException");
+    expect(calls).toBe(2); // original + one retry, no more
+    expect(run).toHaveBeenCalledTimes(1);
+  });
+
+  it("propagates the original error if the refresh command itself fails", async () => {
+    const inner = fakeProvider(async () => {
+      throw new Error("ExpiredTokenException");
+    });
+    const run = vi.fn(async () => {
+      throw new Error("aws sso login failed");
+    });
+    const provider = new ResilientProvider(inner, fakeRefresh(run));
+
+    await expect(provider.compress("s", "u")).rejects.toThrow("ExpiredTokenException");
+    expect(run).toHaveBeenCalledTimes(1);
+  });
+
+  it("behaves exactly as before when no AuthRefresh is configured (regression guard)", async () => {
+    const inner = fakeProvider(async () => {
+      throw new Error("ExpiredTokenException");
+    });
+    const provider = new ResilientProvider(inner); // no refresh
+    await expect(provider.compress("s", "u")).rejects.toThrow("ExpiredTokenException");
+  });
+});
+
+describe("AuthRefresh — single-flight + cooldown", () => {
+  it("coalesces concurrent calls into a single command run (single-flight)", async () => {
+    const refresh = new AuthRefresh({ command: "true" }); // /usr/bin/true exits 0
+    const spy = vi.spyOn(
+      refresh as unknown as { run: () => Promise<void> },
+      "run",
+    );
+    // Fire three concurrently; the in-flight promise is shared.
+    await Promise.all([refresh.run(), refresh.run(), refresh.run()]);
+    // The spy wraps the public method so all three are counted, but the
+    // underlying execFile should only run once — assert via timing/no throw.
+    expect(spy).toHaveBeenCalled();
+  });
+
+  it("rejects an empty command", async () => {
+    const refresh = new AuthRefresh({ command: "   " });
+    await expect(refresh.run()).rejects.toThrow(/empty/);
+  });
+
+  it("enforces a cooldown between sequential attempts", async () => {
+    const refresh = new AuthRefresh({ command: "true", cooldownMs: 60_000 });
+    await refresh.run(); // first succeeds
+    await expect(refresh.run()).rejects.toThrow(/cooldown/);
+  });
+});

From eb4b25080b4c5605da45b31aa685d9940af2cee6 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sat, 30 May 2026 21:07:43 -0400
Subject: [PATCH 03/12] fix(compress-file): surface provider errors instead of
 "[object Object]"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The provider.summarize() call in mem::compress-file was the one provider
invocation in the function not wrapped in try/catch. On a provider throw the
error escaped the handler, and the iii engine serialized the Error object as
the opaque `{"error":"[object Object]"}` with an error_id — hiding actionable
messages such as the Bedrock provider's model-access / cross-region
inference-profile guidance.

Wrap the call and return a structured { success: false, error: <message> },
matching the convention already used in compress.ts and summarize.ts. The
original file is left untouched and no backup is written on failure.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/functions/compress-file.ts | 23 +++++++++++++++++++----
 test/compress-file.test.ts     | 21 +++++++++++++++++++++
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/src/functions/compress-file.ts b/src/functions/compress-file.ts
index 0a54452b2..b2e816236 100644
--- a/src/functions/compress-file.ts
+++ b/src/functions/compress-file.ts
@@ -5,6 +5,7 @@ import type { ISdk } from "iii-sdk";
 import type { MemoryProvider } from "../types.js";
 import type { StateKV } from "../state/kv.js";
 import { recordAudit } from "./audit.js";
+import { logger } from "../logger.js";
 
 const SENSITIVE_PATH_TERMS = [
   "secret",
@@ -133,10 +134,24 @@ export function registerCompressFileFunction(
         return { success: true, skipped: true, reason: "file is empty" };
       }
 
-      const response = await provider.summarize(
-        COMPRESS_FILE_SYSTEM_PROMPT,
-        `Compress this markdown file while preserving structure and code blocks:\n\n${original}`,
-      );
+      let response: string;
+      try {
+        response = await provider.summarize(
+          COMPRESS_FILE_SYSTEM_PROMPT,
+          `Compress this markdown file while preserving structure and code blocks:\n\n${original}`,
+        );
+      } catch (err) {
+        // Surface the provider's message as a structured error. Without this the
+        // throw escapes the function and the engine serializes it as the opaque
+        // "[object Object]", hiding actionable hints (e.g. the Bedrock provider's
+        // model-access / inference-profile guidance).
+        const msg = err instanceof Error ? err.message : String(err);
+        logger.error("compress-file provider call failed", {
+          filePath: absolutePath,
+          error: msg,
+        });
+        return { success: false, error: msg };
+      }
       const compressed = stripMarkdownFence(response);
       const validationErrors = validateCompression(original, compressed);
       if (validationErrors.length > 0) {
diff --git a/test/compress-file.test.ts b/test/compress-file.test.ts
index 9b6820b3e..5efdae444 100644
--- a/test/compress-file.test.ts
+++ b/test/compress-file.test.ts
@@ -193,6 +193,27 @@ describe("mem::compress-file", () => {
     expect(fileStore.get("/tmp/guide.original.md")).toBeUndefined();
   });
 
+  it("surfaces the provider error message instead of letting it escape (Bedrock hint)", async () => {
+    const path = "/tmp/notes.md";
+    fileStore.set(path, "# Title\n\nLong original body.");
+    summarize.mockRejectedValue(
+      new Error(
+        'Bedrock model "anthropic.claude-haiku-4-5-20251001-v1:0" could not be invoked: ' +
+          "set AWS_BEDROCK_MODEL to the us./eu.-prefixed cross-region inference profile ID.",
+      ),
+    );
+
+    const result = (await sdk.trigger("mem::compress-file", {
+      filePath: path,
+    })) as { success: boolean; error: string };
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain("cross-region inference profile");
+    // The original file is untouched and no backup is written on provider failure.
+    expect(fileStore.get(path)).toBe("# Title\n\nLong original body.");
+    expect(fileStore.get("/tmp/notes.original.md")).toBeUndefined();
+  });
+
   it("uses a distinct backup path for *.original.md inputs", async () => {
     const path = "/tmp/notes.original.md";
     fileStore.set(path, "# Title\n\nLong original body.");

From 184ef99a2d74be7db03698d810e688d17f40254a Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sat, 30 May 2026 23:11:43 -0400
Subject: [PATCH 04/12] feat(embedding): AWS Bedrock embedding provider (Cohere
 / Titan)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a `bedrock` embedding provider so memory vectors can be generated by
Cohere / Amazon Titan embedding models on AWS Bedrock, alongside the existing
Bedrock LLM provider.

- Uses the AWS Bedrock Runtime InvokeModel API (@aws-sdk/client-bedrock-runtime,
  now a direct dependency) — the Anthropic bedrock-sdk has no embeddings.
- Credentials resolve via the AWS provider chain (env / IAM role / SSO cache via
  AWS_PROFILE), reusing AWS_REGION; no key env var. Static keys honored only when
  both AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are set.
- Config mirrors the OPENAI_ embedding knobs for parity:
  AWS_BEDROCK_EMBEDDING_MODEL (default cohere.embed-v4:0) and
  AWS_BEDROCK_EMBEDDING_DIMENSIONS (default 1024). No API-key or base-URL knob —
  creds come from the AWS chain and the endpoint is region-derived.
- Selected ONLY via explicit EMBEDDING_PROVIDER=bedrock; AWS_BEDROCK=true does
  not auto-switch embeddings, preserving Bedrock-LLM + local-embeddings setups.
- Table-driven model families (cohere. vs amazon.titan-embed): Cohere native
  batch (v4 keyed-by-type float response; v3 bare-array), Titan one-call-per-text
  fan-out with bounded concurrency. Unknown model without a dimensions override
  throws rather than risk silent index corruption.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .env.example                            |  18 +-
 README.md                               |   2 +
 package.json                            |   1 +
 src/providers/embedding/bedrock.ts      | 239 ++++++++++++++++++++++++
 src/providers/embedding/index.ts        |   4 +
 test/bedrock-embedding-provider.test.ts | 182 ++++++++++++++++++
 6 files changed, 445 insertions(+), 1 deletion(-)
 create mode 100644 src/providers/embedding/bedrock.ts
 create mode 100644 test/bedrock-embedding-provider.test.ts

diff --git a/.env.example b/.env.example
index 34830f54a..d2f3f4a8c 100644
--- a/.env.example
+++ b/.env.example
@@ -94,7 +94,7 @@
 # OPENAI_API_KEY → VOYAGE_API_KEY → COHERE_API_KEY → OPENROUTER_API_KEY →
 # local (Xenova/all-MiniLM-L6-v2, 384-dim).
 
-# EMBEDDING_PROVIDER=local                       # local | openai | voyage | cohere | gemini | openrouter
+# EMBEDDING_PROVIDER=local                       # local | openai | voyage | cohere | gemini | openrouter | bedrock
 
 # VOYAGE_API_KEY=pa-...                          # Optimised for code embeddings
 
@@ -106,6 +106,22 @@
 
 # OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small  # When EMBEDDING_PROVIDER=openrouter
 
+# AWS Bedrock embeddings (Cohere / Amazon Titan). Set EMBEDDING_PROVIDER=bedrock
+# to use it — NOT auto-selected by AWS_BEDROCK=true (that opts into the Bedrock
+# LLM only; embeddings stay on their current provider). Credentials come from the
+# AWS provider chain (env / IAM role / SSO cache via AWS_PROFILE) — no key var —
+# and the region is the shared AWS_REGION.
+# AWS_BEDROCK_EMBEDDING_MODEL=cohere.embed-v4:0          # Default. Also: amazon.titan-embed-text-v2:0, cohere.embed-*-v3
+#   Some models are INFERENCE_PROFILE-only in a given region (e.g. cohere.embed-v4:0
+#   is not on-demand in us-east-2) and must use the geo-prefixed profile ID, e.g.:
+#   AWS_BEDROCK_EMBEDDING_MODEL=us.cohere.embed-v4:0     (or global.…). Titan v2 is
+#   on-demand and works with the bare ID. The us./eu./apac./global. prefix is
+#   stripped for model-family + known-dimensions detection.
+# AWS_BEDROCK_EMBEDDING_DIMENSIONS=1024                  # Default 1024. Cohere v4: 256/512/1024/1536; Titan v2: 256/512/1024.
+#   NOTE: the dimension is baked into the vector index — changing it later
+#   requires re-embedding all stored memories. Required for models not in the
+#   built-in known-dimensions table.
+
 # -----------------------------------------------------------------------------
 # 3. Auth & security
 # -----------------------------------------------------------------------------
diff --git a/README.md b/README.md
index dfa280f9f..714ef5beb 100644
--- a/README.md
+++ b/README.md
@@ -882,6 +882,7 @@ npm install @xenova/transformers
 | Voyage AI | `voyage-code-3` | Paid | Optimized for code |
 | Cohere | `embed-english-v3.0` | Free trial | General purpose |
 | OpenRouter | Any model | Varies | Multi-model proxy |
+| AWS Bedrock | `cohere.embed-v4:0` (default), `amazon.titan-embed-text-v2:0` | Paid (AWS) | Set `EMBEDDING_PROVIDER=bedrock`; creds via AWS chain / SSO; default 1024-dim. See [AWS Bedrock](#aws-bedrock). |
 
 ---
 
@@ -1181,6 +1182,7 @@ AWS_BEDROCK_MODEL=anthropic.claude-haiku-4-5-20251001-v1:0      # optional; this
   ```
   The command is single-flighted (concurrent calls trigger it once), rate-limited by a short cooldown, and bounded by the timeout. **Security:** only the literal configured string is executed — via `execFile`, no shell, and no model or memory data is ever interpolated into it. Note that `aws sso login` is interactive (opens a browser), so this is best suited to setups where someone can approve the login or where the configured command refreshes credentials non-interactively.
 - **Model ID** defaults to Claude Haiku 4.5 (`anthropic.claude-haiku-4-5-20251001-v1:0`) — fast and cost-efficient for background compression. The bare on-demand ID only works in Regions that offer the model on-demand and where model access is enabled in the Bedrock console. In other Regions, set `AWS_BEDROCK_MODEL` to the geo-prefixed cross-region inference profile, e.g. `us.anthropic.claude-haiku-4-5-20251001-v1:0` (or `eu.…`).
+- **Embeddings on Bedrock** (separate from the LLM): set `EMBEDDING_PROVIDER=bedrock` to use Cohere / Titan embeddings via the same AWS credentials. It is *not* auto-enabled by `AWS_BEDROCK=true` — so you can run the Bedrock LLM with local (or any other) embeddings. Defaults to `cohere.embed-v4:0` at 1024 dims; override with `AWS_BEDROCK_EMBEDDING_MODEL` / `AWS_BEDROCK_EMBEDDING_DIMENSIONS`. As with the LLM, some embedding models aren't available on-demand in every Region — `cohere.embed-v4:0` is inference-profile-only in several Regions, so set the geo-prefixed ID there, e.g. `AWS_BEDROCK_EMBEDDING_MODEL=us.cohere.embed-v4:0` (Titan v2 works on-demand with the bare ID). The dimension is baked into the vector index, so changing it later means re-embedding stored memories.
 
 ### Local models (Ollama / LM Studio / vLLM)
 
diff --git a/package.json b/package.json
index 705b74bdf..b47f694fd 100644
--- a/package.json
+++ b/package.json
@@ -61,6 +61,7 @@
     "@anthropic-ai/bedrock-sdk": "^0.29.2",
     "@anthropic-ai/claude-agent-sdk": "^0.3.142",
     "@anthropic-ai/sdk": "^0.93.0",
+    "@aws-sdk/client-bedrock-runtime": "^3.1057.0",
     "@clack/prompts": "^1.2.0",
     "dotenv": "^17.4.2",
     "iii-sdk": "0.11.2",
diff --git a/src/providers/embedding/bedrock.ts b/src/providers/embedding/bedrock.ts
new file mode 100644
index 000000000..6f84e1b09
--- /dev/null
+++ b/src/providers/embedding/bedrock.ts
@@ -0,0 +1,239 @@
+import {
+  BedrockRuntimeClient,
+  InvokeModelCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+import type { EmbeddingProvider } from "../../types.js";
+import { getEnvVar } from "../../config.js";
+
+const DEFAULT_MODEL = "cohere.embed-v4:0";
+
+/**
+ * Known embedding dimensions by Bedrock model ID. Override in any case via
+ * AWS_BEDROCK_EMBEDDING_DIMENSIONS. Models not listed here REQUIRE that override
+ * — we refuse to guess, because a wrong dimension silently corrupts the vector
+ * index (see withDimensionGuard).
+ *
+ * Cohere v4 + Titan v2 are Matryoshka models (selectable output dims); the
+ * default of 1024 is sent in the request body, not just reported.
+ */
+const MODEL_DIMENSIONS: Record<string, number> = {
+  "cohere.embed-v4:0": 1024,
+  "cohere.embed-english-v3": 1024,
+  "cohere.embed-multilingual-v3": 1024,
+  "amazon.titan-embed-text-v2:0": 1024,
+  "amazon.titan-embed-text-v1": 1536,
+};
+
+// Titan has no native batch endpoint — embedBatch fans out one InvokeModel call
+// per input. Bound the in-flight count to stay within Bedrock rate limits while
+// keeping throughput reasonable (mirrors summarize.ts's chunk concurrency).
+const TITAN_BATCH_CONCURRENCY = 6;
+
+// Cohere caps texts at 96 per InvokeModel call.
+const COHERE_MAX_BATCH = 96;
+
+/**
+ * Strip a leading cross-region inference-profile geo prefix (`us.`, `eu.`,
+ * `apac.`, `global.`) so model-family detection and the known-dimensions lookup
+ * work against the underlying model ID. Bedrock requires the prefixed profile ID
+ * for models that don't support on-demand throughput (e.g. cohere.embed-v4:0 in
+ * us-east-2 → us.cohere.embed-v4:0), but the family/dims are the same model.
+ */
+function stripInferenceProfilePrefix(model: string): string {
+  return model.replace(/^(?:us|eu|apac|global)\./, "");
+}
+
+function resolveDimensions(model: string, override: string | undefined): number {
+  if (override !== undefined && override.trim().length > 0) {
+    const parsed = parseInt(override, 10);
+    if (!Number.isFinite(parsed) || parsed <= 0) {
+      throw new Error(
+        `AWS_BEDROCK_EMBEDDING_DIMENSIONS must be a positive integer, got: ${override}`,
+      );
+    }
+    return parsed;
+  }
+  const known = MODEL_DIMENSIONS[stripInferenceProfilePrefix(model)];
+  if (known === undefined) {
+    throw new Error(
+      `Unknown Bedrock embedding model "${model}" — set AWS_BEDROCK_EMBEDDING_DIMENSIONS ` +
+        `to its output dimension (a wrong value silently corrupts the vector index).`,
+    );
+  }
+  return known;
+}
+
+type ModelFamily = "cohere" | "titan";
+
+function familyOf(model: string): ModelFamily {
+  const base = stripInferenceProfilePrefix(model);
+  if (base.startsWith("cohere.")) return "cohere";
+  if (base.startsWith("amazon.titan-embed")) return "titan";
+  throw new Error(
+    `Unsupported Bedrock embedding model "${model}" — expected a "cohere." or ` +
+      `"amazon.titan-embed" model ID (optionally with a us./eu./apac./global. ` +
+      `inference-profile prefix).`,
+  );
+}
+
+/**
+ * AWS Bedrock embedding provider (Cohere / Amazon Titan embeddings on Bedrock).
+ *
+ * Uses the AWS Bedrock Runtime InvokeModel API (not the Anthropic bedrock-sdk,
+ * which has no embeddings). Credentials resolve via the AWS default provider
+ * chain — env / IAM role / SSO cache (select with AWS_PROFILE) — exactly like
+ * the Bedrock LLM provider, so no key env var is needed. Static keys are honored
+ * only when both AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are set.
+ *
+ * Required env:
+ *   AWS_REGION                      — Bedrock region (shared with the LLM provider).
+ *
+ * Optional:
+ *   AWS_BEDROCK_EMBEDDING_MODEL      — model ID (default: cohere.embed-v4:0).
+ *   AWS_BEDROCK_EMBEDDING_DIMENSIONS — output dims (default 1024; required for
+ *                                      models not in the known-dims table).
+ *   AWS_PROFILE / AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY / AWS_SESSION_TOKEN
+ *                                    — same credential knobs as the LLM provider.
+ */
+export class BedrockEmbeddingProvider implements EmbeddingProvider {
+  readonly name = "bedrock";
+  readonly dimensions: number;
+  private client: BedrockRuntimeClient;
+  private model: string;
+  private family: ModelFamily;
+
+  constructor() {
+    const region = getEnvVar("AWS_REGION");
+    if (!region) {
+      throw new Error("AWS_REGION is required for the bedrock embedding provider");
+    }
+    this.model = getEnvVar("AWS_BEDROCK_EMBEDDING_MODEL") || DEFAULT_MODEL;
+    this.family = familyOf(this.model);
+    this.dimensions = resolveDimensions(
+      this.model,
+      getEnvVar("AWS_BEDROCK_EMBEDDING_DIMENSIONS"),
+    );
+
+    const accessKeyId = getEnvVar("AWS_ACCESS_KEY_ID");
+    const secretAccessKey = getEnvVar("AWS_SECRET_ACCESS_KEY");
+    const sessionToken = getEnvVar("AWS_SESSION_TOKEN");
+    // Pass explicit creds only when both halves are present; otherwise omit so
+    // the AWS provider chain (env / IAM role / SSO cache) resolves them.
+    this.client =
+      accessKeyId && secretAccessKey
+        ? new BedrockRuntimeClient({
+            region,
+            credentials: {
+              accessKeyId,
+              secretAccessKey,
+              ...(sessionToken ? { sessionToken } : {}),
+            },
+          })
+        : new BedrockRuntimeClient({ region });
+  }
+
+  async embed(text: string): Promise<Float32Array> {
+    const [result] = await this.embedBatch([text]);
+    return result;
+  }
+
+  async embedBatch(texts: string[]): Promise<Float32Array[]> {
+    return this.family === "cohere"
+      ? this.embedCohere(texts)
+      : this.embedTitan(texts);
+  }
+
+  // Cohere: native batch, up to 96 texts per call. Request a single float
+  // embedding type, which yields the keyed-by-type response shape
+  // { embeddings: { float: [[...]] } }.
+  private async embedCohere(texts: string[]): Promise<Float32Array[]> {
+    const out: Float32Array[] = [];
+    for (let i = 0; i < texts.length; i += COHERE_MAX_BATCH) {
+      const slice = texts.slice(i, i + COHERE_MAX_BATCH);
+      const body: Record<string, unknown> = {
+        input_type: "search_document",
+        texts: slice,
+        embedding_types: ["float"],
+      };
+      // Only Cohere v4 accepts output_dimension; v3 is fixed at 1024.
+      if (this.model.includes("embed-v4")) body.output_dimension = this.dimensions;
+
+      const json = await this.invoke(body);
+      // v4 (embedding_types specified) → { embeddings: { float: [[...]] } }.
+      // v3 → { embeddings: [[...]] }.
+      const embeddings =
+        (json.embeddings as { float?: number[][] } | number[][] | undefined) ?? [];
+      const rows = Array.isArray(embeddings)
+        ? (embeddings as number[][])
+        : (embeddings.float ?? []);
+      for (const row of rows) out.push(new Float32Array(row));
+    }
+    return out;
+  }
+
+  // Titan: one input per call, no batch endpoint — fan out with bounded concurrency.
+  private async embedTitan(texts: string[]): Promise<Float32Array[]> {
+    const results: Float32Array[] = new Array(texts.length);
+    let next = 0;
+    const worker = async (): Promise<void> => {
+      while (next < texts.length) {
+        const idx = next++;
+        const json = await this.invoke({
+          inputText: texts[idx],
+          dimensions: this.dimensions,
+          normalize: true,
+        });
+        results[idx] = new Float32Array((json.embedding as number[]) ?? []);
+      }
+    };
+    const workers = Array.from(
+      { length: Math.min(TITAN_BATCH_CONCURRENCY, texts.length) },
+      () => worker(),
+    );
+    await Promise.all(workers);
+    return results;
+  }
+
+  private async invoke(body: Record<string, unknown>): Promise<Record<string, unknown>> {
+    try {
+      const response = await this.client.send(
+        new InvokeModelCommand({
+          modelId: this.model,
+          contentType: "application/json",
+          accept: "application/json",
+          body: JSON.stringify(body),
+        }),
+      );
+      const text = new TextDecoder().decode(response.body);
+      return JSON.parse(text) as Record<string, unknown>;
+    } catch (err) {
+      throw this.explainError(err);
+    }
+  }
+
+  /**
+   * Turn an opaque Bedrock model-access / validation 4xx into an actionable
+   * error, mirroring the LLM provider's guidance.
+   */
+  private explainError(err: unknown): unknown {
+    const status =
+      (err as { $metadata?: { httpStatusCode?: number } })?.$metadata
+        ?.httpStatusCode;
+    const message = err instanceof Error ? err.message : String(err);
+    if (
+      status === 403 ||
+      status === 400 ||
+      /access|not authorized|inference profile|on-demand|ValidationException|AccessDenied/i.test(
+        message,
+      )
+    ) {
+      return new Error(
+        `Bedrock embedding model "${this.model}" could not be invoked (${message}). ` +
+          `Check that: (1) model access is enabled for this account in the Bedrock console, ` +
+          `(2) AWS_REGION offers this embedding model, and ` +
+          `(3) AWS_BEDROCK_EMBEDDING_MODEL is a valid Bedrock embedding model ID.`,
+      );
+    }
+    return err;
+  }
+}
diff --git a/src/providers/embedding/index.ts b/src/providers/embedding/index.ts
index d18de2328..67f6b5633 100644
--- a/src/providers/embedding/index.ts
+++ b/src/providers/embedding/index.ts
@@ -5,6 +5,7 @@ import { OpenAIEmbeddingProvider } from "./openai.js";
 import { VoyageEmbeddingProvider } from "./voyage.js";
 import { CohereEmbeddingProvider } from "./cohere.js";
 import { OpenRouterEmbeddingProvider } from "./openrouter.js";
+import { BedrockEmbeddingProvider } from "./bedrock.js";
 import { LocalEmbeddingProvider } from "./local.js";
 import { ClipEmbeddingProvider } from "./clip.js";
 
@@ -14,6 +15,7 @@ export {
   VoyageEmbeddingProvider,
   CohereEmbeddingProvider,
   OpenRouterEmbeddingProvider,
+  BedrockEmbeddingProvider,
   LocalEmbeddingProvider,
   ClipEmbeddingProvider,
 };
@@ -42,6 +44,8 @@ export function createEmbeddingProvider(): EmbeddingProvider | null {
       return withDimensionGuard(new CohereEmbeddingProvider(getEnvVar("COHERE_API_KEY")!));
     case "openrouter":
       return withDimensionGuard(new OpenRouterEmbeddingProvider(getEnvVar("OPENROUTER_API_KEY")!));
+    case "bedrock":
+      return withDimensionGuard(new BedrockEmbeddingProvider());
     case "local":
       return withDimensionGuard(new LocalEmbeddingProvider());
     default:
diff --git a/test/bedrock-embedding-provider.test.ts b/test/bedrock-embedding-provider.test.ts
new file mode 100644
index 000000000..4bcfc9771
--- /dev/null
+++ b/test/bedrock-embedding-provider.test.ts
@@ -0,0 +1,182 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+// Capture the bodies sent to InvokeModel and return canned responses, so no
+// real AWS call is made. The mock records each request body for assertions.
+const sentBodies: Array<Record<string, unknown>> = [];
+let cannedResponse: (body: Record<string, unknown>) => unknown;
+
+vi.mock("@aws-sdk/client-bedrock-runtime", () => {
+  class InvokeModelCommand {
+    input: { body: string; modelId: string };
+    constructor(input: { body: string; modelId: string }) {
+      this.input = input;
+    }
+  }
+  class BedrockRuntimeClient {
+    config: unknown;
+    constructor(config: unknown) {
+      this.config = config;
+    }
+    async send(cmd: InvokeModelCommand) {
+      const body = JSON.parse(cmd.input.body) as Record<string, unknown>;
+      sentBodies.push(body);
+      const payload = cannedResponse(body);
+      return { body: new TextEncoder().encode(JSON.stringify(payload)) };
+    }
+  }
+  return { BedrockRuntimeClient, InvokeModelCommand };
+});
+
+import { BedrockEmbeddingProvider } from "../src/providers/embedding/bedrock.js";
+import { detectEmbeddingProvider } from "../src/config.js";
+
+const ENV_KEYS = [
+  "AWS_REGION",
+  "AWS_BEDROCK_EMBEDDING_MODEL",
+  "AWS_BEDROCK_EMBEDDING_DIMENSIONS",
+  "AWS_ACCESS_KEY_ID",
+  "AWS_SECRET_ACCESS_KEY",
+  "EMBEDDING_PROVIDER",
+  "AWS_BEDROCK",
+  "OPENAI_API_KEY",
+] as const;
+
+describe("BedrockEmbeddingProvider", () => {
+  const saved: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    sentBodies.length = 0;
+    for (const k of ENV_KEYS) {
+      saved[k] = process.env[k];
+      delete process.env[k];
+    }
+    process.env["AWS_REGION"] = "us-east-2";
+    // Default canned response: float vectors of the right length, one per text.
+    cannedResponse = (body) => {
+      const dim = (body.output_dimension as number) ?? 1024;
+      const texts = (body.texts as string[]) ?? [body.inputText as string];
+      return { embeddings: { float: texts.map(() => new Array(dim).fill(0.1)) } };
+    };
+  });
+
+  afterEach(() => {
+    for (const k of ENV_KEYS) {
+      if (saved[k] === undefined) delete process.env[k];
+      else process.env[k] = saved[k];
+    }
+  });
+
+  it("defaults to cohere.embed-v4:0 at 1024 dimensions", () => {
+    const p = new BedrockEmbeddingProvider();
+    expect(p.name).toBe("bedrock");
+    expect(p.dimensions).toBe(1024);
+  });
+
+  // Note: the AWS_REGION-required guard is not unit-tested here because
+  // getEnvVar merges the real ~/.agentmemory/.env (which may set AWS_REGION),
+  // so the absence can't be reliably simulated through the merged-env path.
+
+  it("honors AWS_BEDROCK_EMBEDDING_DIMENSIONS override", () => {
+    process.env["AWS_BEDROCK_EMBEDDING_DIMENSIONS"] = "512";
+    const p = new BedrockEmbeddingProvider();
+    expect(p.dimensions).toBe(512);
+  });
+
+  it("throws for an unknown model with no dimensions override", () => {
+    process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "cohere.embed-future-v9:0";
+    expect(() => new BedrockEmbeddingProvider()).toThrow(/AWS_BEDROCK_EMBEDDING_DIMENSIONS/);
+  });
+
+  it("rejects a non-cohere/non-titan model family", () => {
+    process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "meta.llama-embed";
+    process.env["AWS_BEDROCK_EMBEDDING_DIMENSIONS"] = "1024";
+    expect(() => new BedrockEmbeddingProvider()).toThrow(/cohere\.|titan/);
+  });
+
+  it("accepts a us.-prefixed cross-region inference profile ID (family + dims resolve)", () => {
+    // cohere.embed-v4:0 is INFERENCE_PROFILE-only in some regions, so users set
+    // us.cohere.embed-v4:0 — family detection and known-dims must see through the
+    // geo prefix rather than demanding a dimensions override or throwing.
+    process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "us.cohere.embed-v4:0";
+    const p = new BedrockEmbeddingProvider();
+    expect(p.dimensions).toBe(1024);
+  });
+
+  it("uses the Cohere body shape for a global.-prefixed profile ID", async () => {
+    process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "global.cohere.embed-v4:0";
+    const p = new BedrockEmbeddingProvider();
+    await p.embedBatch(["x"]);
+    expect(sentBodies[0]).toMatchObject({
+      input_type: "search_document",
+      embedding_types: ["float"],
+      output_dimension: 1024,
+    });
+  });
+
+  it("uses the Cohere body shape and reads embeddings.float (v4)", async () => {
+    const p = new BedrockEmbeddingProvider();
+    const vecs = await p.embedBatch(["hello", "world"]);
+    expect(vecs).toHaveLength(2);
+    expect(vecs[0]).toBeInstanceOf(Float32Array);
+    expect(vecs[0].length).toBe(1024);
+    // v4 request: input_type required, float type, explicit output_dimension.
+    expect(sentBodies[0]).toMatchObject({
+      input_type: "search_document",
+      texts: ["hello", "world"],
+      embedding_types: ["float"],
+      output_dimension: 1024,
+    });
+  });
+
+  it("parses the bare-array response shape for Cohere v3", async () => {
+    process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "cohere.embed-english-v3";
+    cannedResponse = (body) => {
+      const texts = (body.texts as string[]) ?? [];
+      return { embeddings: texts.map(() => new Array(1024).fill(0.2)) };
+    };
+    const p = new BedrockEmbeddingProvider();
+    const vecs = await p.embedBatch(["a"]);
+    expect(vecs[0].length).toBe(1024);
+    // v3 does not send output_dimension.
+    expect(sentBodies[0].output_dimension).toBeUndefined();
+  });
+
+  it("uses the Titan body shape (inputText) and fans out one call per text", async () => {
+    process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "amazon.titan-embed-text-v2:0";
+    cannedResponse = (body) => ({
+      embedding: new Array((body.dimensions as number) ?? 1024).fill(0.3),
+    });
+    const p = new BedrockEmbeddingProvider();
+    const vecs = await p.embedBatch(["one", "two", "three"]);
+    expect(vecs).toHaveLength(3);
+    expect(vecs[0].length).toBe(1024);
+    expect(sentBodies).toHaveLength(3); // one InvokeModel call per input
+    expect(sentBodies[0]).toMatchObject({
+      inputText: expect.any(String),
+      dimensions: 1024,
+      normalize: true,
+    });
+  });
+
+  it("passes explicit static creds only when both halves are set", () => {
+    process.env["AWS_ACCESS_KEY_ID"] = "AKIA";
+    process.env["AWS_SECRET_ACCESS_KEY"] = "secret";
+    const p = new BedrockEmbeddingProvider();
+    const cfg = (p as unknown as { client: { config: { credentials?: unknown } } })
+      .client.config;
+    expect(cfg.credentials).toMatchObject({ accessKeyId: "AKIA", secretAccessKey: "secret" });
+  });
+});
+
+describe("detectEmbeddingProvider — bedrock", () => {
+  it("selects bedrock when EMBEDDING_PROVIDER=bedrock", () => {
+    expect(detectEmbeddingProvider({ EMBEDDING_PROVIDER: "bedrock" })).toBe("bedrock");
+  });
+
+  it("does NOT auto-select bedrock from AWS_BEDROCK=true (local-embeddings stays)", () => {
+    // AWS_BEDROCK opts into the LLM provider only; embeddings need an explicit
+    // EMBEDDING_PROVIDER. With no embedding key set, detection returns null
+    // (caller falls back to local).
+    expect(detectEmbeddingProvider({ AWS_BEDROCK: "true" })).toBeNull();
+  });
+});

From 6c9274f105ab3079c168c38873c28a63b1bd51f3 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sun, 31 May 2026 11:21:38 -0400
Subject: [PATCH 05/12] feat(auth-refresh): log refresh attempts + suppress
 relaunch after timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The refresh path was entirely silent: a swallowed catch meant there was no way
to tell whether the hook fired, was skipped by cooldown, failed, or timed out —
the original error just propagated (e.g. background consolidation showing a bare
"Token is expired" with no sign the refresh ran).

- Log every refresh attempt and outcome: running, succeeded, command-failed,
  timed-out, cooldown-skipped, and whether the post-refresh retry recovered the
  call (in resilient.ts).
- After a timeout, suppress re-runs for postTimeoutCooldownMs (default 15m). A
  timeout means an interactive login (browser device-auth) was likely left open
  awaiting approval; relaunching on every background trigger would fill the
  browser with stale login pages. Timeout is detected via execFile's
  killed/SIGTERM signal. Ordinary non-zero exits keep the short cooldown.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/providers/auth-refresh.ts | 66 +++++++++++++++++++++++++++++++++--
 src/providers/resilient.ts    | 21 ++++++++---
 test/auth-refresh.test.ts     | 16 +++++++++
 3 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/src/providers/auth-refresh.ts b/src/providers/auth-refresh.ts
index c11d746c5..24b436926 100644
--- a/src/providers/auth-refresh.ts
+++ b/src/providers/auth-refresh.ts
@@ -1,4 +1,5 @@
 import { execFile } from "node:child_process";
+import { logger } from "../logger.js";
 
 /**
  * Conservative classifier for "credentials/token expired" errors from Bedrock
@@ -53,6 +54,14 @@ export interface AuthRefreshOptions {
   timeoutMs?: number;
   /** Minimum interval between refresh attempts (ms) — prevents login storms. */
   cooldownMs?: number;
+  /**
+   * Suppression window (ms) applied AFTER a timed-out attempt. A timeout means an
+   * interactive login (e.g. a browser device-auth page) was almost certainly left
+   * open awaiting approval; re-running would stack up more stale login pages. We
+   * back off for much longer than the ordinary cooldown so the user has time to
+   * complete (or abandon) the pending login. Default: 15 min.
+   */
+  postTimeoutCooldownMs?: number;
 }
 
 /**
@@ -63,6 +72,9 @@ export interface AuthRefreshOptions {
  * Safeguards:
  *  - Single-flight: concurrent callers share one in-flight run.
  *  - Cooldown: refuses to re-run within `cooldownMs` of the last attempt.
+ *  - Post-timeout backoff: after a timeout, suppresses re-runs for
+ *    `postTimeoutCooldownMs` so a hung interactive login isn't relaunched on
+ *    every background trigger (which would fill the browser with stale pages).
  *  - Timeout: the spawned command is killed after `timeoutMs`.
  *  - No shell: the command is tokenized and executed via execFile, and only the
  *    configured string is ever run — no untrusted data is interpolated.
@@ -71,24 +83,48 @@ export class AuthRefresh {
   private readonly argv: string[];
   private readonly timeoutMs: number;
   private readonly cooldownMs: number;
+  private readonly postTimeoutCooldownMs: number;
   private inFlight: Promise<void> | null = null;
   private lastAttemptAt: number | null = null;
+  /** Set when the previous attempt timed out — gates re-runs for longer. */
+  private suppressedUntil: number | null = null;
 
   constructor(opts: AuthRefreshOptions) {
     this.argv = tokenizeCommand(opts.command);
     this.timeoutMs = opts.timeoutMs ?? 120_000;
     this.cooldownMs = opts.cooldownMs ?? 10_000;
+    this.postTimeoutCooldownMs = opts.postTimeoutCooldownMs ?? 900_000;
   }
 
   /**
    * Run the refresh command. Single-flight + cooldown guarded. Resolves when the
-   * command exits 0; rejects on non-zero exit, timeout, or empty command.
+   * command exits 0; rejects on non-zero exit, timeout, empty command, or while a
+   * post-timeout suppression window is active.
    */
   async run(): Promise<void> {
     if (this.inFlight) return this.inFlight;
 
     const now = Date.now();
+
+    // Post-timeout backoff: a prior attempt timed out, so an interactive login is
+    // likely still pending. Don't launch another until the window elapses.
+    if (this.suppressedUntil !== null && now < this.suppressedUntil) {
+      const waitMs = this.suppressedUntil - now;
+      logger.warn("auth refresh suppressed after a prior timeout", {
+        command: this.argv[0],
+        retryInMs: waitMs,
+      });
+      throw new Error(
+        `auth refresh suppressed: a previous attempt timed out; not retrying for ` +
+          `another ${waitMs}ms (a pending interactive login may still be open)`,
+      );
+    }
+
     if (this.lastAttemptAt !== null && now - this.lastAttemptAt < this.cooldownMs) {
+      logger.info("auth refresh skipped (cooldown)", {
+        sinceLastMs: now - this.lastAttemptAt,
+        cooldownMs: this.cooldownMs,
+      });
       throw new Error(
         `auth refresh skipped: last attempt was ${now - this.lastAttemptAt}ms ago ` +
           `(cooldown ${this.cooldownMs}ms)`,
@@ -101,10 +137,34 @@ export class AuthRefresh {
     }
 
     const [cmd, ...args] = this.argv;
+    logger.info("auth refresh: running credential command", { command: cmd });
     this.inFlight = new Promise<void>((resolve, reject) => {
       execFile(cmd, args, { timeout: this.timeoutMs }, (err) => {
-        if (err) reject(err);
-        else resolve();
+        if (err) {
+          // execFile flags a timeout kill via `killed` + the configured signal.
+          const timedOut =
+            (err as { killed?: boolean }).killed === true ||
+            (err as { signal?: string }).signal === "SIGTERM";
+          if (timedOut) {
+            this.suppressedUntil = Date.now() + this.postTimeoutCooldownMs;
+            logger.error("auth refresh command timed out", {
+              command: cmd,
+              timeoutMs: this.timeoutMs,
+              suppressForMs: this.postTimeoutCooldownMs,
+            });
+          } else {
+            logger.error("auth refresh command failed", {
+              command: cmd,
+              error: err.message,
+            });
+          }
+          reject(err);
+        } else {
+          logger.info("auth refresh: credential command succeeded", {
+            command: cmd,
+          });
+          resolve();
+        }
       });
     }).finally(() => {
       this.inFlight = null;
diff --git a/src/providers/resilient.ts b/src/providers/resilient.ts
index ea16e5a96..1885fd509 100644
--- a/src/providers/resilient.ts
+++ b/src/providers/resilient.ts
@@ -1,6 +1,7 @@
 import type { MemoryProvider, CircuitBreakerState } from "../types.js";
 import { CircuitBreaker } from "./circuit-breaker.js";
 import { AuthRefresh, isAuthExpiry } from "./auth-refresh.js";
+import { logger } from "../logger.js";
 
 export class ResilientProvider implements MemoryProvider {
   private breaker = new CircuitBreaker();
@@ -29,12 +30,24 @@ export class ResilientProvider implements MemoryProvider {
       // retry once — BEFORE recording a breaker failure, so a recoverable
       // token expiry doesn't count toward opening the circuit.
       if (!alreadyRetried && this.authRefresh && isAuthExpiry(err)) {
+        logger.warn("provider call failed with expired credentials — attempting auth refresh", {
+          provider: this.inner.name,
+          error: err instanceof Error ? err.message : String(err),
+        });
         try {
           await this.authRefresh.run();
-          return await this.call(fn, true);
-        } catch {
-          // refresh (or the retry) failed — fall through to record the
-          // original failure and propagate.
+          const result = await this.call(fn, true);
+          logger.info("auth refresh recovered the provider call", {
+            provider: this.inner.name,
+          });
+          return result;
+        } catch (refreshErr) {
+          // Refresh (or the post-refresh retry) failed. Log why, then fall
+          // through to record the original failure and propagate it.
+          logger.error("auth refresh did not recover the provider call", {
+            provider: this.inner.name,
+            reason: refreshErr instanceof Error ? refreshErr.message : String(refreshErr),
+          });
         }
       }
       this.breaker.recordFailure();
diff --git a/test/auth-refresh.test.ts b/test/auth-refresh.test.ts
index 0de70bdde..129f10644 100644
--- a/test/auth-refresh.test.ts
+++ b/test/auth-refresh.test.ts
@@ -163,4 +163,20 @@ describe("AuthRefresh — single-flight + cooldown", () => {
     await refresh.run(); // first succeeds
     await expect(refresh.run()).rejects.toThrow(/cooldown/);
   });
+
+  it("does NOT relaunch after a timeout (post-timeout suppression window)", async () => {
+    // `sleep 5` exceeds the 50ms timeout → execFile kills it → counts as a
+    // timed-out interactive login. cooldownMs:0 isolates the suppression path:
+    // any rejection on the next run() must come from post-timeout backoff, not
+    // the ordinary cooldown.
+    const refresh = new AuthRefresh({
+      command: "sleep 5",
+      timeoutMs: 50,
+      cooldownMs: 0,
+      postTimeoutCooldownMs: 60_000,
+    });
+    await expect(refresh.run()).rejects.toThrow(); // times out
+    // Second attempt must be suppressed, not relaunched (no new stale login).
+    await expect(refresh.run()).rejects.toThrow(/suppress|timed out/i);
+  });
 });

From 94238eb8859e885319d9385e205e67966fd19297 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sun, 31 May 2026 20:49:30 -0400
Subject: [PATCH 06/12] fix(config): reject incomplete Bedrock config + allow
 bedrock fallback

Address PR review:
- detectProvider no longer returns a bedrock config when AWS_REGION is unset; it
  falls through to the next provider instead of deferring an unconstructable
  config to runtime. New isBedrockUsable() helper is shared with
  detectLlmProviderKind so capability detection can't report "llm" for a Bedrock
  config that cannot be built.
- Add "bedrock" to VALID_PROVIDERS so FALLBACK_PROVIDERS=bedrock is honored
  instead of silently dropped.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/config.ts                 | 36 ++++++++++++++++++++++++-----------
 test/bedrock-provider.test.ts | 12 ++++++++++++
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 6f008c439..3fa31d811 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -49,6 +49,16 @@ function hasRealValue(v: string | undefined): v is string {
   return typeof v === "string" && v.trim().length > 0;
 }
 
+/**
+ * Bedrock is usable only when opted in (AWS_BEDROCK=true) AND a region is set —
+ * the AWS SDK needs a region to construct a client. Shared between detectProvider
+ * and detectLlmProviderKind so capability detection never reports a Bedrock
+ * config that cannot actually be built.
+ */
+function isBedrockUsable(env: Record<string, string>): boolean {
+  return env["AWS_BEDROCK"] === "true" && hasRealValue(env["AWS_REGION"]);
+}
+
 export function detectProvider(env: Record<string, string>): ProviderConfig {
   const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10);
 
@@ -57,18 +67,21 @@ export function detectProvider(env: Record<string, string>): ProviderConfig {
   // strict flag gate means it never fires for existing OpenAI/Ollama users.
   // Credentials come from the AWS provider chain (env / IAM role / SSO cache),
   // so we do NOT key detection on credential env vars — only the flag + region.
+  // Region is mandatory: without it Bedrock cannot be constructed, so we reject
+  // here and fall through rather than returning an unusable bedrock config.
   if (env["AWS_BEDROCK"] === "true") {
-    if (!hasRealValue(env["AWS_REGION"])) {
-      process.stderr.write(
-        "[agentmemory] AWS_BEDROCK=true but AWS_REGION is unset. " +
-          "Bedrock requires a region — set AWS_REGION in ~/.agentmemory/.env.\n",
-      );
+    if (isBedrockUsable(env)) {
+      return {
+        provider: "bedrock",
+        model: env["AWS_BEDROCK_MODEL"] || "anthropic.claude-haiku-4-5-20251001-v1:0",
+        maxTokens,
+      };
     }
-    return {
-      provider: "bedrock",
-      model: env["AWS_BEDROCK_MODEL"] || "anthropic.claude-haiku-4-5-20251001-v1:0",
-      maxTokens,
-    };
+    process.stderr.write(
+      "[agentmemory] AWS_BEDROCK=true but AWS_REGION is unset — ignoring Bedrock " +
+        "and falling through to the next provider. Set AWS_REGION in " +
+        "~/.agentmemory/.env to enable Bedrock.\n",
+    );
   }
 
   // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio
@@ -210,7 +223,7 @@ export function isDropStaleIndexEnabled(): boolean {
 export function detectLlmProviderKind(): "llm" | "noop" {
   const env = getMergedEnv();
   if (
-    env["AWS_BEDROCK"] === "true" ||
+    isBedrockUsable(env) ||
     hasRealValue(env["ANTHROPIC_API_KEY"]) ||
     hasRealValue(env["GEMINI_API_KEY"]) ||
     hasRealValue(env["GOOGLE_API_KEY"]) ||
@@ -409,6 +422,7 @@ export function getStandalonePersistPath(): string {
 
 const VALID_PROVIDERS = new Set([
   "anthropic",
+  "bedrock",
   "gemini",
   "openrouter",
   "agent-sdk",
diff --git a/test/bedrock-provider.test.ts b/test/bedrock-provider.test.ts
index 427da498e..0b5264224 100644
--- a/test/bedrock-provider.test.ts
+++ b/test/bedrock-provider.test.ts
@@ -97,4 +97,16 @@ describe("detectProvider — bedrock branch", () => {
     });
     expect(config.provider).not.toBe("bedrock");
   });
+
+  it("rejects bedrock when AWS_REGION is unset and falls through to the next provider", () => {
+    // AWS_BEDROCK=true but no region → Bedrock can't be constructed, so detection
+    // must not return an unusable bedrock config; it falls through to OpenAI here.
+    const config = detectProvider({ AWS_BEDROCK: "true", OPENAI_API_KEY: "sk-test" });
+    expect(config.provider).toBe("openai");
+  });
+
+  it("falls through to noop when AWS_BEDROCK=true but no region and no other provider", () => {
+    const config = detectProvider({ AWS_BEDROCK: "true" });
+    expect(config.provider).toBe("noop");
+  });
 });

From 9ac272aeae9da102a7d55cda6e969a1d309fe3fa Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sun, 31 May 2026 20:49:47 -0400
Subject: [PATCH 07/12] fix(auth-refresh): propagate retry errors + cover
 bedrock in fallback chains

Address PR review:
- resilient.ts: scope the inner catch to authRefresh.run() only. Previously it
  also trapped the post-refresh retry, so a failed retry surfaced the stale
  auth-expiry error and double-counted the circuit breaker. The retry now runs
  outside the try and propagates (and accounts for the breaker) normally.
- index.ts: derive the auth-refresh hook from every provider built (primary +
  fallback chain), not just config.provider, so a bedrock provider reachable
  only via FALLBACK_PROVIDERS still refreshes expired credentials.
- test: mock node:child_process and assert the real execFile call count for the
  single-flight, cooldown, and post-timeout-suppression cases (the old spy on
  run() only counted wrapper calls and depended on a host `true` binary).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/providers/index.ts     | 16 ++++++---
 src/providers/resilient.ts | 21 ++++++++----
 test/auth-refresh.test.ts  | 67 +++++++++++++++++++++++++++-----------
 3 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/src/providers/index.ts b/src/providers/index.ts
index 72a663aa2..613cc5859 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -1,6 +1,7 @@
 import type {
   MemoryProvider,
   ProviderConfig,
+  ProviderType,
   FallbackConfig,
 } from "../types.js";
 import { AgentSDKProvider } from "./agent-sdk.js";
@@ -30,9 +31,11 @@ function requireEnvVar(key: string): string {
 /**
  * Build the optional credential-refresh hook. Only the bedrock provider uses it
  * today, and only when AWS_AUTH_REFRESH is set; the mechanism itself is generic.
+ * Accepts every provider type that may be invoked (primary + fallback chain) so
+ * a bedrock provider reachable only via the fallback path still gets the hook.
  */
-function createAuthRefresh(config: ProviderConfig): AuthRefresh | undefined {
-  if (config.provider !== "bedrock") return undefined;
+function createAuthRefresh(providerTypes: ProviderType[]): AuthRefresh | undefined {
+  if (!providerTypes.includes("bedrock")) return undefined;
   const command = getEnvVar("AWS_AUTH_REFRESH");
   if (!command || !command.trim()) return undefined;
   const timeoutRaw = getEnvVar("AWS_AUTH_REFRESH_TIMEOUT_MS");
@@ -46,7 +49,7 @@ function createAuthRefresh(config: ProviderConfig): AuthRefresh | undefined {
 export function createProvider(config: ProviderConfig): ResilientProvider {
   return new ResilientProvider(
     createBaseProvider(config),
-    createAuthRefresh(config),
+    createAuthRefresh([config.provider]),
   );
 }
 
@@ -59,6 +62,7 @@ export function createFallbackProvider(
   }
 
   const providers: MemoryProvider[] = [createBaseProvider(config)];
+  const builtTypes: ProviderType[] = [config.provider];
   for (const providerType of fallbackConfig.providers) {
     if (providerType === config.provider) continue;
     try {
@@ -68,12 +72,16 @@ export function createFallbackProvider(
         maxTokens: config.maxTokens,
       };
       providers.push(createBaseProvider(fbConfig));
+      builtTypes.push(providerType);
     } catch {
       // skip unavailable fallback providers
     }
   }
 
-  const authRefresh = createAuthRefresh(config);
+  // Derive the refresh hook from every provider actually built (primary +
+  // fallbacks), so a bedrock provider reachable only via the fallback chain
+  // still refreshes expired credentials.
+  const authRefresh = createAuthRefresh(builtTypes);
   if (providers.length > 1) {
     return new ResilientProvider(
       new FallbackChainProvider(providers),
diff --git a/src/providers/resilient.ts b/src/providers/resilient.ts
index 1885fd509..6ca37bbfd 100644
--- a/src/providers/resilient.ts
+++ b/src/providers/resilient.ts
@@ -34,20 +34,27 @@ export class ResilientProvider implements MemoryProvider {
           provider: this.inner.name,
           error: err instanceof Error ? err.message : String(err),
         });
+        // Scope this catch to the refresh command ONLY. If refresh succeeds, the
+        // retry runs outside the try so its own error (and breaker accounting)
+        // propagates normally — otherwise a failed retry would surface the stale
+        // auth-expiry error and double-count the breaker (once in the retried
+        // call, once here).
+        let refreshed = false;
         try {
           await this.authRefresh.run();
+          refreshed = true;
+        } catch (refreshErr) {
+          logger.error("auth refresh command did not run", {
+            provider: this.inner.name,
+            reason: refreshErr instanceof Error ? refreshErr.message : String(refreshErr),
+          });
+        }
+        if (refreshed) {
           const result = await this.call(fn, true);
           logger.info("auth refresh recovered the provider call", {
             provider: this.inner.name,
           });
           return result;
-        } catch (refreshErr) {
-          // Refresh (or the post-refresh retry) failed. Log why, then fall
-          // through to record the original failure and propagate it.
-          logger.error("auth refresh did not recover the provider call", {
-            provider: this.inner.name,
-            reason: refreshErr instanceof Error ? refreshErr.message : String(refreshErr),
-          });
         }
       }
       this.breaker.recordFailure();
diff --git a/test/auth-refresh.test.ts b/test/auth-refresh.test.ts
index 129f10644..c00c979bc 100644
--- a/test/auth-refresh.test.ts
+++ b/test/auth-refresh.test.ts
@@ -1,4 +1,25 @@
-import { describe, expect, it, vi } from "vitest";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+// Controllable execFile mock: records call count and lets each test decide how
+// the spawned command resolves (success / error / timeout), so single-flight and
+// cooldown can be asserted on the REAL underlying call count rather than a spy on
+// the public method — and without depending on a host `true`/`sleep` binary.
+const execFileCalls: Array<{ cmd: string; args: string[] }> = [];
+let execFileBehavior: (cmd: string) => Error | null = () => null;
+
+vi.mock("node:child_process", () => ({
+  execFile: (
+    cmd: string,
+    args: string[],
+    _opts: unknown,
+    cb: (err: Error | null) => void,
+  ) => {
+    execFileCalls.push({ cmd, args });
+    // Resolve on a microtask so concurrent run() calls share one in-flight promise.
+    queueMicrotask(() => cb(execFileBehavior(cmd)));
+  },
+}));
+
 import {
   AuthRefresh,
   isAuthExpiry,
@@ -140,43 +161,51 @@ describe("ResilientProvider — auth-refresh retry", () => {
 });
 
 describe("AuthRefresh — single-flight + cooldown", () => {
+  beforeEach(() => {
+    execFileCalls.length = 0;
+    execFileBehavior = () => null; // default: command succeeds
+  });
+  afterEach(() => {
+    execFileBehavior = () => null;
+  });
+
   it("coalesces concurrent calls into a single command run (single-flight)", async () => {
-    const refresh = new AuthRefresh({ command: "true" }); // /usr/bin/true exits 0
-    const spy = vi.spyOn(
-      refresh as unknown as { run: () => Promise<void> },
-      "run",
-    );
-    // Fire three concurrently; the in-flight promise is shared.
+    const refresh = new AuthRefresh({ command: "aws sso login --profile p" });
+    // Fire three concurrently; they share one in-flight promise, so execFile —
+    // the REAL underlying spawn — must run exactly once.
     await Promise.all([refresh.run(), refresh.run(), refresh.run()]);
-    // The spy wraps the public method so all three are counted, but the
-    // underlying execFile should only run once — assert via timing/no throw.
-    expect(spy).toHaveBeenCalled();
+    expect(execFileCalls).toHaveLength(1);
+    expect(execFileCalls[0]).toEqual({ cmd: "aws", args: ["sso", "login", "--profile", "p"] });
   });
 
   it("rejects an empty command", async () => {
     const refresh = new AuthRefresh({ command: "   " });
     await expect(refresh.run()).rejects.toThrow(/empty/);
+    expect(execFileCalls).toHaveLength(0);
   });
 
-  it("enforces a cooldown between sequential attempts", async () => {
-    const refresh = new AuthRefresh({ command: "true", cooldownMs: 60_000 });
-    await refresh.run(); // first succeeds
+  it("enforces a cooldown between sequential attempts (no second spawn)", async () => {
+    const refresh = new AuthRefresh({ command: "aws sso login", cooldownMs: 60_000 });
+    await refresh.run(); // first succeeds → 1 spawn
     await expect(refresh.run()).rejects.toThrow(/cooldown/);
+    expect(execFileCalls).toHaveLength(1); // cooldown blocked the second spawn
   });
 
   it("does NOT relaunch after a timeout (post-timeout suppression window)", async () => {
-    // `sleep 5` exceeds the 50ms timeout → execFile kills it → counts as a
-    // timed-out interactive login. cooldownMs:0 isolates the suppression path:
-    // any rejection on the next run() must come from post-timeout backoff, not
-    // the ordinary cooldown.
+    // Make the mocked command resolve with a timeout-shaped error (execFile sets
+    // killed:true + SIGTERM on timeout). cooldownMs:0 isolates the suppression
+    // path: any rejection on the next run() must come from post-timeout backoff.
+    execFileBehavior = () =>
+      Object.assign(new Error("timed out"), { killed: true, signal: "SIGTERM" });
     const refresh = new AuthRefresh({
-      command: "sleep 5",
+      command: "aws sso login",
       timeoutMs: 50,
       cooldownMs: 0,
       postTimeoutCooldownMs: 60_000,
     });
-    await expect(refresh.run()).rejects.toThrow(); // times out
+    await expect(refresh.run()).rejects.toThrow(); // times out → 1 spawn
     // Second attempt must be suppressed, not relaunched (no new stale login).
     await expect(refresh.run()).rejects.toThrow(/suppress|timed out/i);
+    expect(execFileCalls).toHaveLength(1); // suppression blocked the relaunch
   });
 });

From f1ab618c4a5914492790b36557a0daf57fa54178 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sun, 31 May 2026 20:49:47 -0400
Subject: [PATCH 08/12] fix(embedding): assert Bedrock returns one vector per
 input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address PR review: if the Cohere response returns fewer rows than the input
batch, the texts↔vectors mapping silently misaligns downstream
(withDimensionGuard only checks each vector's length, not batch cardinality).
Throw on a row-count mismatch instead.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/providers/embedding/bedrock.ts      | 9 +++++++++
 test/bedrock-embedding-provider.test.ts | 8 ++++++++
 2 files changed, 17 insertions(+)

diff --git a/src/providers/embedding/bedrock.ts b/src/providers/embedding/bedrock.ts
index 6f84e1b09..c0fb3e767 100644
--- a/src/providers/embedding/bedrock.ts
+++ b/src/providers/embedding/bedrock.ts
@@ -166,6 +166,15 @@ export class BedrockEmbeddingProvider implements EmbeddingProvider {
       const rows = Array.isArray(embeddings)
         ? (embeddings as number[][])
         : (embeddings.float ?? []);
+      // Fail fast on a cardinality mismatch: fewer rows than inputs would
+      // silently misalign texts to vectors downstream (withDimensionGuard only
+      // checks each vector's length, not the batch count).
+      if (rows.length !== slice.length) {
+        throw new Error(
+          `Bedrock embedding returned ${rows.length} vectors for ${slice.length} inputs ` +
+            `(model "${this.model}") — refusing to misalign texts to vectors.`,
+        );
+      }
       for (const row of rows) out.push(new Float32Array(row));
     }
     return out;
diff --git a/test/bedrock-embedding-provider.test.ts b/test/bedrock-embedding-provider.test.ts
index 4bcfc9771..3c3e43837 100644
--- a/test/bedrock-embedding-provider.test.ts
+++ b/test/bedrock-embedding-provider.test.ts
@@ -128,6 +128,14 @@ describe("BedrockEmbeddingProvider", () => {
     });
   });
 
+  it("throws when the response returns fewer vectors than inputs (no silent misalignment)", async () => {
+    // Two inputs, but the model returns one vector — must fail fast rather than
+    // misalign texts to vectors downstream.
+    cannedResponse = () => ({ embeddings: { float: [new Array(1024).fill(0.1)] } });
+    const p = new BedrockEmbeddingProvider();
+    await expect(p.embedBatch(["one", "two"])).rejects.toThrow(/1 vectors for 2 inputs|misalign/);
+  });
+
   it("parses the bare-array response shape for Cohere v3", async () => {
     process.env["AWS_BEDROCK_EMBEDDING_MODEL"] = "cohere.embed-english-v3";
     cannedResponse = (body) => {

From 021180a991eda7dfbe9a707f5fdc3033d56a4074 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Sun, 31 May 2026 20:53:51 -0400
Subject: [PATCH 09/12] update README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 714ef5beb..b6a293694 100644
--- a/README.md
+++ b/README.md
@@ -1174,7 +1174,7 @@ AWS_BEDROCK_MODEL=anthropic.claude-haiku-4-5-20251001-v1:0      # optional; this
 ```
 
 - **Credentials** come from the standard AWS credential provider chain — environment credentials, IAM roles, or an SSO profile cached under `~/.aws/sso/cache/` (select the profile with `AWS_PROFILE`). No static keys are required. To force static keys (e.g. in CI), set **both** `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
-- **SSO** works out of the box, but agentmemory only *reads* the cached token — it cannot perform the login. Run `aws sso login --profile <name>` first, and again when the session expires. To re-establish an expired session automatically, set the auth-refresh hook below.
+- **SSO** works out of the box, but agentmemory only *reads* the cached token — it cannot perform the login. Run `aws sso login --profile <name>` first. When the session expires, either re-run it manually or configure the auth-refresh hook (below) to automate re-authentication.
 - **Auth-refresh hook** (optional): when a Bedrock call fails with an expired-token error, agentmemory can run a command of your choosing and retry once:
   ```bash
   AWS_AUTH_REFRESH=aws sso login --profile my-sso-profile

From ecc7248df72b2eb919df4eeb3fa52fa1a59dece6 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Mon, 1 Jun 2026 10:19:09 -0400
Subject: [PATCH 10/12] fix(config): accept case-insensitive AWS_BEDROCK flag

AWS_BEDROCK was gated on an exact `=== "true"` match, so a natural
`AWS_BEDROCK=True`/`TRUE` silently skipped the Bedrock branch and fell
through to the no-op provider with a misleading "No LLM provider key
found" warning. Normalize the flag via a case-insensitive isEnvTrue
helper (trims whitespace too) shared by the detectProvider gate and
isBedrockUsable, so True/TRUE/" true " all opt in while non-boolean
values like "1" are still rejected.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/config.ts                 | 22 ++++++++++++++++++++--
 test/bedrock-provider.test.ts |  7 +++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 3fa31d811..6cddd8370 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -49,6 +49,24 @@ function hasRealValue(v: string | undefined): v is string {
   return typeof v === "string" && v.trim().length > 0;
 }
 
+/**
+ * Case-insensitive boolean-env check. Accepts "true" in any case with
+ * surrounding whitespace (e.g. "True", "TRUE", " true ") so a natural
+ * capitalization of AWS_BEDROCK=True doesn't silently disable Bedrock.
+ */
+function isEnvTrue(v: string | undefined): boolean {
+  return typeof v === "string" && v.trim().toLowerCase() === "true";
+}
+
+/**
+ * Bedrock is opted in when AWS_BEDROCK is truthy. The AWS_BEDROCK === "true"
+ * gate in detectProvider and the usability check below share this so both
+ * accept the same set of values.
+ */
+function isBedrockOptIn(env: Record<string, string>): boolean {
+  return isEnvTrue(env["AWS_BEDROCK"]);
+}
+
 /**
  * Bedrock is usable only when opted in (AWS_BEDROCK=true) AND a region is set —
  * the AWS SDK needs a region to construct a client. Shared between detectProvider
@@ -56,7 +74,7 @@ function hasRealValue(v: string | undefined): v is string {
  * config that cannot actually be built.
  */
 function isBedrockUsable(env: Record<string, string>): boolean {
-  return env["AWS_BEDROCK"] === "true" && hasRealValue(env["AWS_REGION"]);
+  return isBedrockOptIn(env) && hasRealValue(env["AWS_REGION"]);
 }
 
 export function detectProvider(env: Record<string, string>): ProviderConfig {
@@ -69,7 +87,7 @@ export function detectProvider(env: Record<string, string>): ProviderConfig {
   // so we do NOT key detection on credential env vars — only the flag + region.
   // Region is mandatory: without it Bedrock cannot be constructed, so we reject
   // here and fall through rather than returning an unusable bedrock config.
-  if (env["AWS_BEDROCK"] === "true") {
+  if (isBedrockOptIn(env)) {
     if (isBedrockUsable(env)) {
       return {
         provider: "bedrock",
diff --git a/test/bedrock-provider.test.ts b/test/bedrock-provider.test.ts
index 0b5264224..437ebc8e7 100644
--- a/test/bedrock-provider.test.ts
+++ b/test/bedrock-provider.test.ts
@@ -98,6 +98,13 @@ describe("detectProvider — bedrock branch", () => {
     expect(config.provider).not.toBe("bedrock");
   });
 
+  it("selects bedrock for a case-insensitive AWS_BEDROCK (True/TRUE/ true )", () => {
+    for (const flag of ["True", "TRUE", " true "]) {
+      const config = detectProvider({ AWS_BEDROCK: flag, AWS_REGION: "us-east-1" });
+      expect(config.provider).toBe("bedrock");
+    }
+  });
+
   it("rejects bedrock when AWS_REGION is unset and falls through to the next provider", () => {
     // AWS_BEDROCK=true but no region → Bedrock can't be constructed, so detection
     // must not return an unusable bedrock config; it falls through to OpenAI here.

From 279377e6c2446561b0e7a5a0d6a09e21678e24bf Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Mon, 1 Jun 2026 11:52:50 -0400
Subject: [PATCH 11/12] Add "prepare" to scripts in package.json to allow
 from-github npm installation

---
 package.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/package.json b/package.json
index b47f694fd..63dc2de07 100644
--- a/package.json
+++ b/package.json
@@ -18,6 +18,7 @@
   },
   "scripts": {
     "build": "tsdown && (cp iii-config.yaml dist/ 2>/dev/null || true) && (cp iii-config.docker.yaml dist/ 2>/dev/null || true) && (cp docker-compose.yml dist/ 2>/dev/null || true) && (cp .env.example dist/ 2>/dev/null || true) && mkdir -p dist/viewer && cp src/viewer/index.html dist/viewer/ && cp src/viewer/favicon.svg dist/viewer/",
+    "prepare": "npm run build",
     "dev": "tsx src/index.ts",
     "start": "node dist/cli.mjs",
     "migrate": "node dist/functions/migrate.js",

From cc115bde9ccff0559de4be2f7728a48404f9ca66 Mon Sep 17 00:00:00 2001
From: acpiper <andy.piper@gmail.com>
Date: Mon, 1 Jun 2026 15:23:12 -0400
Subject: [PATCH 12/12] refactor(config): trim WHAT-comments on bedrock env
 helpers

Per repo guideline (no WHAT-comments in src/), reduce the docblocks on
isEnvTrue/isBedrockOptIn/isBedrockUsable to one-line WHY notes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/config.ts | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 6cddd8370..91d6a7863 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -49,30 +49,17 @@ function hasRealValue(v: string | undefined): v is string {
   return typeof v === "string" && v.trim().length > 0;
 }
 
-/**
- * Case-insensitive boolean-env check. Accepts "true" in any case with
- * surrounding whitespace (e.g. "True", "TRUE", " true ") so a natural
- * capitalization of AWS_BEDROCK=True doesn't silently disable Bedrock.
- */
+/** Prevents AWS_BEDROCK=True / TRUE from silently disabling Bedrock. */
 function isEnvTrue(v: string | undefined): boolean {
   return typeof v === "string" && v.trim().toLowerCase() === "true";
 }
 
-/**
- * Bedrock is opted in when AWS_BEDROCK is truthy. The AWS_BEDROCK === "true"
- * gate in detectProvider and the usability check below share this so both
- * accept the same set of values.
- */
+/** Shared so detectProvider and isBedrockUsable gate on the same opt-in values. */
 function isBedrockOptIn(env: Record<string, string>): boolean {
   return isEnvTrue(env["AWS_BEDROCK"]);
 }
 
-/**
- * Bedrock is usable only when opted in (AWS_BEDROCK=true) AND a region is set —
- * the AWS SDK needs a region to construct a client. Shared between detectProvider
- * and detectLlmProviderKind so capability detection never reports a Bedrock
- * config that cannot actually be built.
- */
+/** A region is required to construct the client, so capability detection never reports an unbuildable config. */
 function isBedrockUsable(env: Record<string, string>): boolean {
   return isBedrockOptIn(env) && hasRealValue(env["AWS_REGION"]);
 }