Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@

# MAX_TOKENS=4096 # Cap LLM completion tokens for compression / summarise calls

# Optional cheaper model for compress() work only: per-observation compression,
# graph extraction, query expansion — the bulk of background LLM volume (one
# call per tool use under AGENTMEMORY_AUTO_COMPRESS=true). Summarization,
# consolidation synthesis, and reflection stay on the main model above.
# The value must be a model name valid for the detected provider. Ignored by
# the agent-sdk / noop providers and by FALLBACK_PROVIDERS chains (model names
# are provider-specific — same reasoning as #778).
# AGENTMEMORY_COMPRESS_MODEL=gpt-5.4-nano

# Outbound LLM / embedding timeout — shared across every raw-fetch provider
# (Gemini, OpenRouter, MiniMax, OpenAI LLM, and OpenAI/Cohere/Voyage/OpenRouter
# embedding). The OpenAI LLM path also honours the OpenAI-scoped
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,15 @@ agentmemory prints a runtime warning when `OPENROUTER_MODEL` matches a premium-t

Quality vs cost tradeoff for memory work: compression is a summarization task with relatively loose quality bars (the agent re-reads the summary, not the user). DeepSeek-V4-Pro / Qwen3-Coder land within rounding error of Sonnet on this task while costing ~10× less. Save the premium-tier models for queries you read directly.

You can also split the workload instead of downgrading everything: `AGENTMEMORY_COMPRESS_MODEL` routes `compress()`-side work (per-observation compression, graph extraction, query expansion — the bulk of background call volume) to a cheaper model while summarization, consolidation synthesis, and reflection stay on your main model.

```env
OPENAI_MODEL=your-main-model # summaries, consolidation, lessons
AGENTMEMORY_COMPRESS_MODEL=your-cheap-model # per-observation compression etc.
```

The value must be a model name valid for the detected provider. It is ignored by the agent-sdk / noop providers and by `FALLBACK_PROVIDERS` chains (model names are provider-specific).

Sources: [OpenRouter pricing for Sonnet 4.6](https://openrouter.ai/anthropic/claude-sonnet-4.6/pricing), [DeepSeek V4 Pro](https://openrouter.ai/deepseek/deepseek-v4-pro), [DeepSeek pricing notes](https://api-docs.deepseek.com/quick_start/pricing/).

### Multi-agent memory (`AGENT_ID` + `AGENTMEMORY_AGENT_SCOPE`)
Expand Down Expand Up @@ -1408,6 +1417,11 @@ Create `~/.agentmemory/.env`:
# # but no content.
# OPENAI_API_KEY_FOR_LLM=false # Optional: set to false to skip OpenAI auto-detection
# # for LLM (useful if you only want OpenAI for embeddings)
# Optional cheaper model for compress()-side work only (per-observation
# compression, graph extraction, query expansion); summaries / consolidation /
# reflection stay on the main model. Must be valid for the detected provider.
# AGENTMEMORY_COMPRESS_MODEL=gpt-5.4-nano

# Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk);
# leave OFF unless you understand the Stop-hook recursion risk (#149 follow-up):
# AGENTMEMORY_ALLOW_AGENT_SDK=true
Expand Down
14 changes: 13 additions & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,20 @@ function hasRealValue(v: string | undefined): v is string {

function detectProvider(env: Record<string, string>): ProviderConfig {
const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10);
// Compression dominates background LLM volume (one call per observation),
// so it gets an optional dedicated model while summarize/consolidate/reflect
// stay on the main one. Provider-agnostic: the value must be a model name
// valid for whichever provider is detected below.
const compressModel = hasRealValue(env["AGENTMEMORY_COMPRESS_MODEL"])
? { compressModel: env["AGENTMEMORY_COMPRESS_MODEL"] }
: {};

// OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio
if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") {
return {
provider: "openai",
model: env["OPENAI_MODEL"] || "gpt-4o-mini",
...compressModel,
maxTokens,
baseURL: env["OPENAI_BASE_URL"],
};
Expand All @@ -67,6 +75,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
return {
provider: "minimax",
model: env["MINIMAX_MODEL"] || "MiniMax-M2.7",
...compressModel,
maxTokens,
};
}
Expand All @@ -75,6 +84,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
return {
provider: "anthropic",
model: env["ANTHROPIC_MODEL"] || "claude-sonnet-4-20250514",
...compressModel,
maxTokens,
baseURL: env["ANTHROPIC_BASE_URL"],
};
Expand All @@ -89,6 +99,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
return {
provider: "gemini",
model: env["GEMINI_MODEL"] || "gemini-2.5-flash",
...compressModel,
maxTokens,
};
}
Expand Down Expand Up @@ -119,6 +130,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
return {
provider: "openrouter",
model,
...compressModel,
maxTokens,
};
}
Expand Down Expand Up @@ -182,7 +194,7 @@ export function loadConfig(): AgentMemoryConfig {
provider,
tokenBudget: safeParseInt(env["TOKEN_BUDGET"], 2000),
maxObservationsPerSession: safeParseInt(env["MAX_OBS_PER_SESSION"], 500),
compressionModel: provider.model,
compressionModel: provider.compressModel ?? provider.model,
dataDir: DATA_DIR,
};
}
Expand Down
20 changes: 16 additions & 4 deletions src/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@ export class AnthropicProvider implements MemoryProvider {
name = 'anthropic'
private client: Anthropic
private model: string
private compressModel?: string
private maxTokens: number

constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) {
constructor(
apiKey: string,
model: string,
maxTokens: number,
baseURL?: string,
compressModel?: string,
) {
this.client = new Anthropic({ apiKey, ...(baseURL ? { baseURL } : {}) })
this.model = model
this.compressModel = compressModel
this.maxTokens = maxTokens
}

async compress(systemPrompt: string, userPrompt: string): Promise<string> {
return this.call(systemPrompt, userPrompt)
return this.call(systemPrompt, userPrompt, this.compressModel)
}

async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
Expand All @@ -41,9 +49,13 @@ export class AnthropicProvider implements MemoryProvider {
return textBlock?.text ?? ''
}

private async call(systemPrompt: string, userPrompt: string): Promise<string> {
private async call(
systemPrompt: string,
userPrompt: string,
modelOverride?: string,
): Promise<string> {
const response = await this.client.messages.create({
model: this.model,
model: modelOverride ?? this.model,
max_tokens: this.maxTokens,
system: systemPrompt,
messages: [{ role: 'user', content: userPrompt }],
Expand Down
8 changes: 7 additions & 1 deletion src/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ export function createFallbackProvider(
// override) rather than copying config.model from the primary.
// Without this, FALLBACK_PROVIDERS=gemini on an OpenAI primary
// would call Gemini with `gpt-4o-mini`, get a 404 every time,
// and trip the circuit breaker.
// and trip the circuit breaker. AGENTMEMORY_COMPRESS_MODEL is
// excluded for the same reason: model names are provider-specific.
const fbConfig: ProviderConfig = {
provider: providerType,
model: defaultModelFor(providerType),
Expand All @@ -99,13 +100,15 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
requireEnvVar("MINIMAX_API_KEY"),
config.model,
config.maxTokens,
config.compressModel,
);
case "anthropic":
return new AnthropicProvider(
requireEnvVar("ANTHROPIC_API_KEY"),
config.model,
config.maxTokens,
config.baseURL,
config.compressModel,
);
case "gemini": {
const geminiKey =
Expand All @@ -120,6 +123,7 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
config.model,
config.maxTokens,
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
config.compressModel,
);
}
case "openrouter":
Expand All @@ -128,6 +132,7 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
config.model,
config.maxTokens,
"https://openrouter.ai/api/v1/chat/completions",
config.compressModel,
);
case "openai": {
const openaiKey = getEnvVar("OPENAI_API_KEY");
Expand All @@ -141,6 +146,7 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
config.model,
config.maxTokens,
config.baseURL,
config.compressModel,
);
}
case "noop":
Expand Down
14 changes: 10 additions & 4 deletions src/providers/minimax.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,32 @@ export class MinimaxProvider implements MemoryProvider {
name = 'minimax'
private apiKey: string
private model: string
private compressModel?: string
private maxTokens: number
private baseUrl: string

constructor(apiKey: string, model: string, maxTokens: number) {
constructor(apiKey: string, model: string, maxTokens: number, compressModel?: string) {
this.apiKey = apiKey
this.model = model
this.compressModel = compressModel
this.maxTokens = maxTokens
this.baseUrl =
getEnvVar('MINIMAX_BASE_URL') || 'https://api.minimax.io/anthropic'
}

async compress(systemPrompt: string, userPrompt: string): Promise<string> {
return this.call(systemPrompt, userPrompt)
return this.call(systemPrompt, userPrompt, this.compressModel)
}

async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
return this.call(systemPrompt, userPrompt)
}

private async call(systemPrompt: string, userPrompt: string): Promise<string> {
private async call(
systemPrompt: string,
userPrompt: string,
modelOverride?: string,
): Promise<string> {
const url = `${this.baseUrl}/v1/messages`
const response = await fetchWithTimeout(url, {
method: 'POST',
Expand All @@ -49,7 +55,7 @@ export class MinimaxProvider implements MemoryProvider {
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({
model: this.model,
model: modelOverride ?? this.model,
max_tokens: this.maxTokens,
system: systemPrompt,
messages: [{ role: 'user', content: userPrompt }],
Expand Down
20 changes: 16 additions & 4 deletions src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,24 @@ export class OpenAIProvider implements MemoryProvider {
name = "openai";
private apiKey: string;
private model: string;
private compressModel?: string;
private maxTokens: number;
private baseUrl: string;
private reasoningEffort?: string;
private timeoutMs: number;
private isAzure: boolean;
private azureApiVersion: string;

constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) {
constructor(
apiKey: string,
model: string,
maxTokens: number,
baseURL?: string,
compressModel?: string,
) {
this.apiKey = apiKey;
this.model = model;
this.compressModel = compressModel;
this.maxTokens = maxTokens;
this.baseUrl = normalizeBaseUrl(baseURL || getEnvVar("OPENAI_BASE_URL"));
this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined;
Expand All @@ -68,17 +76,21 @@ export class OpenAIProvider implements MemoryProvider {
}

async compress(systemPrompt: string, userPrompt: string): Promise<string> {
return this.call(systemPrompt, userPrompt);
return this.call(systemPrompt, userPrompt, this.compressModel);
}

async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
return this.call(systemPrompt, userPrompt);
}

private async call(systemPrompt: string, userPrompt: string): Promise<string> {
private async call(
systemPrompt: string,
userPrompt: string,
modelOverride?: string,
): Promise<string> {
const url = buildChatUrl(this.baseUrl, this.isAzure, this.azureApiVersion);
const body: Record<string, unknown> = {
model: this.model,
model: modelOverride ?? this.model,
max_tokens: this.maxTokens,
// OpenAI API spec defines `stream` as defaulting to false, so omitting
// it should yield a JSON response. Some OpenAI-compatible proxies
Expand Down
8 changes: 6 additions & 2 deletions src/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export class OpenRouterProvider implements MemoryProvider {
name: string;
private apiKey: string;
private model: string;
private compressModel?: string;
private maxTokens: number;
private baseUrl: string;

Expand All @@ -13,16 +14,18 @@ export class OpenRouterProvider implements MemoryProvider {
model: string,
maxTokens: number,
baseUrl: string,
compressModel?: string,
) {
this.apiKey = apiKey;
this.model = model;
this.compressModel = compressModel;
this.maxTokens = maxTokens;
this.baseUrl = baseUrl;
this.name = baseUrl.includes("openrouter") ? "openrouter" : "gemini";
}

async compress(systemPrompt: string, userPrompt: string): Promise<string> {
return this.call(systemPrompt, userPrompt);
return this.call(systemPrompt, userPrompt, this.compressModel);
}

async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
Expand All @@ -32,6 +35,7 @@ export class OpenRouterProvider implements MemoryProvider {
private async call(
systemPrompt: string,
userPrompt: string,
modelOverride?: string,
): Promise<string> {
const response = await fetchWithTimeout(this.baseUrl, {
method: "POST",
Expand All @@ -43,7 +47,7 @@ export class OpenRouterProvider implements MemoryProvider {
: {}),
},
body: JSON.stringify({
model: this.model,
model: modelOverride ?? this.model,
max_tokens: this.maxTokens,
messages: [
{ role: "system", content: systemPrompt },
Expand Down
2 changes: 2 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ export interface HookPayload {
export interface ProviderConfig {
provider: ProviderType;
model: string;
/** Optional cheaper model for compress() calls only; summarize() stays on `model` */
compressModel?: string;
maxTokens: number;
/** Optional base URL override (e.g. for Anthropic-compatible APIs or local proxies) */
baseURL?: string;
Expand Down
Loading