Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
message content arrays so provider prefix caches can keep matching the stable
user-input prefix across date, route, and working-set changes.
- Projected mode, approval, and tool-taxonomy prompt metadata per request
instead of mutating stored system prompts, keeping provider prefix-cache
inputs byte-stable while preserving mode-specific instructions (#2687).
Thanks @LeoAlex0 for the implementation.
- Softened contribution intake automation: external issues now receive a warm
triage note and are never auto-closed by the contribution gate, while the PR
gate copy makes clear that dry-run observations are about maintainer safety,
Expand Down
4 changes: 4 additions & 0 deletions crates/tui/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
message content arrays so provider prefix caches can keep matching the stable
user-input prefix across date, route, and working-set changes.
- Projected mode, approval, and tool-taxonomy prompt metadata per request
instead of mutating stored system prompts, keeping provider prefix-cache
inputs byte-stable while preserving mode-specific instructions (#2687).
Thanks @LeoAlex0 for the implementation.
- Softened contribution intake automation: external issues now receive a warm
triage note and are never auto-closed by the contribution gate, while the PR
gate copy makes clear that dry-run observations are about maintainer safety,
Expand Down
23 changes: 19 additions & 4 deletions crates/tui/src/commands/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1225,10 +1225,25 @@ mod tests {
let result = cache(&mut app, Some("inspect"));
let msg = result.message.expect("inspect output");

assert!(msg.contains("original_chars=14000"), "got: {msg}");
assert!(msg.contains("truncated=true"), "got: {msg}");
assert!(msg.contains("deduplicated=false"), "got: {msg}");
assert!(msg.contains("deduplicated=true"), "got: {msg}");
let tool_budget_lines: Vec<_> = msg
.lines()
.filter(|line| line.contains("original_chars=14000"))
.collect();
assert_eq!(tool_budget_lines.len(), 2, "got: {msg}");

let first_sighting = tool_budget_lines
.iter()
.find(|line| line.contains("deduplicated=false"))
.expect("first tool-result sighting should report non-dedup metadata");
assert!(first_sighting.contains("sent_chars="), "got: {msg}");
assert!(first_sighting.contains("truncated=true"), "got: {msg}");

let repeat_sighting = tool_budget_lines
.iter()
.find(|line| line.contains("deduplicated=true"))
.expect("repeat tool-result sighting should report dedup metadata");
assert!(repeat_sighting.contains("sent_chars="), "got: {msg}");
assert!(repeat_sighting.contains("truncated=false"), "got: {msg}");
}

#[test]
Expand Down
138 changes: 116 additions & 22 deletions crates/tui/src/core/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,6 @@ impl Engine {
show_thinking: config.show_thinking,
allow_shell: config.allow_shell,
},
session.approval_mode,
);
let stable_prompt = Some(system_prompt);
session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
Expand Down Expand Up @@ -853,11 +852,12 @@ impl Engine {
self.session.trust_mode = trust_mode;
self.config.trust_mode = trust_mode;
self.session.auto_approve = auto_approve;
self.session.approval_mode = if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
approval_mode
};
let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
// Only track the Agent-mode approval — Yolo/Plan have fixed
// approval policies that are derived from the mode itself.
if mode == AppMode::Agent {
self.session.approval_mode = agent_approval_mode;
}

let _ = self
.tx_event
Expand Down Expand Up @@ -1236,7 +1236,6 @@ impl Engine {
Op::ChangeMode { mode } => {
let previous_mode = self.current_mode;
self.current_mode = mode;
self.refresh_system_prompt(mode);
self.emit_session_updated().await;
// Notify the agent that the mode has changed so it can re-evaluate
// any operations that were blocked by the previous mode's policy.
Expand All @@ -1253,11 +1252,11 @@ impl Engine {
)))
.await;
}
Op::SetModel { model, mode } => {
Op::SetModel { model, mode: _ } => {
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
self.session.model = model;
self.config.model.clone_from(&self.session.model);
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;
let _ = self
.tx_event
Expand Down Expand Up @@ -1304,6 +1303,10 @@ impl Engine {
self.session.compaction_summary_prompt =
extract_compaction_summary_prompt(system_prompt.clone());
self.session.system_prompt = system_prompt;
self.session.last_system_prompt_hash =
Some(system_prompt_hash(self.session.system_prompt.as_ref()));
// Host-supplied prompts are persisted prefixes. Keep them
// byte-stable; mode/runtime state is projected per request.
self.session.system_prompt_override =
system_prompt_override && self.session.system_prompt.is_some();
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
Expand Down Expand Up @@ -1485,6 +1488,18 @@ In {new} mode: {policy}\n\n\
}
}

fn runtime_prompt_message(&self) -> Message {
let mode = self.current_mode;
let approval_mode = approval_mode_for(mode, self.session.approval_mode);
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: runtime_prompt_text(mode, approval_mode),
cache_control: None,
}],
}
}

fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
self.user_text_message_with_turn_metadata_for_route(
text,
Expand Down Expand Up @@ -1633,6 +1648,14 @@ In {new} mode: {policy}\n\n\
.observe_user_message(&content, &self.session.workspace);
let force_update_plan_first = should_force_update_plan_first(mode, &content);

let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
self.session.auto_approve = auto_approve;
// Only track the Agent-mode approval — Yolo/Plan have fixed
// approval policies that are derived from the mode itself.
if mode == AppMode::Agent {
self.session.approval_mode = agent_approval_mode;
}

// Add user message to session
let user_msg = self.user_text_message_with_turn_metadata_for_route(
content,
Expand Down Expand Up @@ -1670,15 +1693,10 @@ In {new} mode: {policy}\n\n\
self.config.trust_mode = trust_mode;
self.config.translation_enabled = translation_enabled;
self.config.show_thinking = show_thinking;
self.session.auto_approve = auto_approve;
self.session.approval_mode = if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
approval_mode
};

// Update system prompt to match current mode and include persisted compaction context.
self.refresh_system_prompt(mode);
// Refresh stable prompt context. Current mode is carried by the
// request-time runtime prompt projection.
self.refresh_system_prompt();
self.emit_session_updated().await;

// Build tool registry and tool list for the current mode
Expand Down Expand Up @@ -2430,16 +2448,16 @@ In {new} mode: {policy}\n\n\
)))
.await;
}
/// Refresh the system prompt based on current mode and context.
fn refresh_system_prompt(&mut self, mode: AppMode) {
/// Refresh the stable system prompt based on current non-mode context.
fn refresh_system_prompt(&mut self) {
let user_memory_block =
crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
let prompt_goal_objective = goal_objective_for_prompt(
self.config.goal_objective.as_deref(),
&self.config.goal_state,
);
let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval(
mode,
AppMode::Agent,
&self.config.workspace,
None,
Some(&self.config.skills_dir),
Expand All @@ -2454,7 +2472,6 @@ In {new} mode: {policy}\n\n\
show_thinking: self.config.show_thinking,
allow_shell: self.session.allow_shell,
},
self.session.approval_mode,
);
let mut stable_prompt =
merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
Expand All @@ -2472,7 +2489,6 @@ In {new} mode: {policy}\n\n\

let stable_hash = system_prompt_hash(stable_prompt.as_ref());
if self.session.system_prompt_override {
self.session.last_system_prompt_hash = Some(stable_hash);
return;
}
if self.session.last_system_prompt_hash != Some(stable_hash) {
Expand Down Expand Up @@ -2634,6 +2650,84 @@ fn goal_objective_for_prompt(
normalized_goal_objective(configured_goal)
}

// ── Mode & approval prompts as request-time runtime metadata ─────────
//
// Mode contracts and approval policies are not persisted in the session
// history and are not sent as extra system messages. Instead, each API
// request projects a transient user-role runtime metadata message at the
// tail. The stable system prompt remains byte-stable, stored history remains
// byte-stable, and strict chat-template providers never see a system message
// outside messages[0].

fn approval_mode_for(
mode: AppMode,
session_approval: crate::tui::approval::ApprovalMode,
) -> crate::tui::approval::ApprovalMode {
match mode {
AppMode::Yolo => crate::tui::approval::ApprovalMode::Auto,
AppMode::Plan => crate::tui::approval::ApprovalMode::Never,
AppMode::Agent => session_approval,
}
}

fn agent_approval_mode_for_turn(
auto_approve: bool,
approval_mode: crate::tui::approval::ApprovalMode,
) -> crate::tui::approval::ApprovalMode {
if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
approval_mode
}
}

fn mode_prompt_marker(mode: AppMode) -> String {
format!(
"<mode_prompt mode=\"{}\">",
match mode {
AppMode::Agent => "agent",
AppMode::Plan => "plan",
AppMode::Yolo => "yolo",
}
)
}

fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
format!(
"<approval_policy policy=\"{}\">",
match approval_mode {
crate::tui::approval::ApprovalMode::Auto => "auto",
crate::tui::approval::ApprovalMode::Suggest => "suggest",
crate::tui::approval::ApprovalMode::Never => "never",
}
)
}

fn mode_prompt_text(mode: AppMode) -> &'static str {
match mode {
AppMode::Agent => prompts::AGENT_MODE,
AppMode::Plan => prompts::PLAN_MODE,
AppMode::Yolo => prompts::YOLO_MODE,
}
}

fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
let marker = mode_prompt_marker(mode);
let mode_text = mode_prompt_text(mode).trim();
let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
let approval_marker = approval_prompt_marker(approval_mode);
let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
format!(
"<runtime_prompt visibility=\"internal\">\n\
This is runtime control metadata for the current request, not user input. \
Apply it to the next assistant response and tool calls. It supersedes any \
earlier mode or approval metadata in the transcript.\n\n\
{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
{approval_marker}\n{approval_text}\n</approval_policy>\n\
</runtime_prompt>"
)
}

/// Spawn the engine in a background task
pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
let (engine, handle) = Engine::new(config, api_config);
Expand Down
18 changes: 7 additions & 11 deletions crates/tui/src/core/engine/capacity_flow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl Engine {
pub(super) async fn run_capacity_post_tool_checkpoint(
&mut self,
turn: &TurnContext,
mode: AppMode,

tool_registry: Option<&crate::tools::ToolRegistry>,
tool_exec_lock: Arc<RwLock<()>>,
mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
Expand All @@ -56,7 +56,6 @@ impl Engine {
let _ = self
.apply_verify_with_tool_replay(
turn,
mode,
snapshot.as_ref(),
tool_registry,
tool_exec_lock,
Expand All @@ -66,7 +65,7 @@ impl Engine {
false
}
GuardrailAction::VerifyAndReplan => {
self.apply_verify_and_replan(turn, mode, snapshot.as_ref(), "high_risk_post_tool")
self.apply_verify_and_replan(turn, snapshot.as_ref(), "high_risk_post_tool")
.await
}
GuardrailAction::NoIntervention | GuardrailAction::TargetedContextRefresh => false,
Expand All @@ -76,7 +75,7 @@ impl Engine {
pub(super) async fn run_capacity_error_escalation_checkpoint(
&mut self,
turn: &TurnContext,
mode: AppMode,

step_error_count: usize,
consecutive_tool_error_steps: u32,
error_categories: &[ErrorCategory],
Expand Down Expand Up @@ -136,7 +135,6 @@ impl Engine {
let category_labels: Vec<String> = error_categories.iter().map(|c| c.to_string()).collect();
self.apply_verify_and_replan(
turn,
mode,
Some(&forced),
&format!(
"error_escalation: step_errors={}, consecutive_steps={}, categories={}",
Expand Down Expand Up @@ -385,7 +383,7 @@ impl Engine {
&mut self,
turn: &TurnContext,
client: Option<&DeepSeekClient>,
mode: AppMode,
_mode: AppMode,
snapshot: Option<&CapacitySnapshot>,
) -> bool {
let before_tokens = self.estimated_input_tokens();
Expand Down Expand Up @@ -465,7 +463,7 @@ impl Engine {
GuardrailAction::TargetedContextRefresh,
None,
)));
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;

let after_tokens = self.estimated_input_tokens();
Expand All @@ -487,7 +485,6 @@ impl Engine {
pub(super) async fn apply_verify_with_tool_replay(
&mut self,
turn: &TurnContext,
mode: AppMode,
snapshot: Option<&CapacitySnapshot>,
tool_registry: Option<&crate::tools::ToolRegistry>,
tool_exec_lock: Arc<RwLock<()>>,
Expand Down Expand Up @@ -617,7 +614,7 @@ impl Engine {
GuardrailAction::VerifyWithToolReplay,
Some(&verification_note),
)));
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;

let after_tokens = self.estimated_input_tokens();
Expand All @@ -638,7 +635,6 @@ impl Engine {
pub(super) async fn apply_verify_and_replan(
&mut self,
turn: &TurnContext,
mode: AppMode,
snapshot: Option<&CapacitySnapshot>,
reason: &str,
) -> bool {
Expand Down Expand Up @@ -685,7 +681,7 @@ impl Engine {
GuardrailAction::VerifyAndReplan,
Some("Replan now from canonical state. Keep steps minimal and verifiable."),
)));
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;

let _ = self
Expand Down
Loading
Loading