Hmbown · Hmbown · Jun 5, 2026 · Jun 3, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Appended volatile `<turn_meta>` blocks after user text in outgoing user
   message content arrays so provider prefix caches can keep matching the stable
   user-input prefix across date, route, and working-set changes.
+- Projected mode, approval, and tool-taxonomy prompt metadata per request
+  instead of mutating stored system prompts, keeping provider prefix-cache
+  inputs byte-stable while preserving mode-specific instructions (#2687).
+  Thanks @LeoAlex0 for the implementation.
 - Softened contribution intake automation: external issues now receive a warm
   triage note and are never auto-closed by the contribution gate, while the PR
   gate copy makes clear that dry-run observations are about maintainer safety,

diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Appended volatile `<turn_meta>` blocks after user text in outgoing user
   message content arrays so provider prefix caches can keep matching the stable
   user-input prefix across date, route, and working-set changes.
+- Projected mode, approval, and tool-taxonomy prompt metadata per request
+  instead of mutating stored system prompts, keeping provider prefix-cache
+  inputs byte-stable while preserving mode-specific instructions (#2687).
+  Thanks @LeoAlex0 for the implementation.
 - Softened contribution intake automation: external issues now receive a warm
   triage note and are never auto-closed by the contribution gate, while the PR
   gate copy makes clear that dry-run observations are about maintainer safety,

diff --git a/crates/tui/src/commands/debug.rs b/crates/tui/src/commands/debug.rs
@@ -1225,10 +1225,25 @@ mod tests {
         let result = cache(&mut app, Some("inspect"));
         let msg = result.message.expect("inspect output");
 
-        assert!(msg.contains("original_chars=14000"), "got: {msg}");
-        assert!(msg.contains("truncated=true"), "got: {msg}");
-        assert!(msg.contains("deduplicated=false"), "got: {msg}");
-        assert!(msg.contains("deduplicated=true"), "got: {msg}");
+        let tool_budget_lines: Vec<_> = msg
+            .lines()
+            .filter(|line| line.contains("original_chars=14000"))
+            .collect();
+        assert_eq!(tool_budget_lines.len(), 2, "got: {msg}");
+
+        let first_sighting = tool_budget_lines
+            .iter()
+            .find(|line| line.contains("deduplicated=false"))
+            .expect("first tool-result sighting should report non-dedup metadata");
+        assert!(first_sighting.contains("sent_chars="), "got: {msg}");
+        assert!(first_sighting.contains("truncated=true"), "got: {msg}");
+
+        let repeat_sighting = tool_budget_lines
+            .iter()
+            .find(|line| line.contains("deduplicated=true"))
+            .expect("repeat tool-result sighting should report dedup metadata");
+        assert!(repeat_sighting.contains("sent_chars="), "got: {msg}");
+        assert!(repeat_sighting.contains("truncated=false"), "got: {msg}");
     }
 
     #[test]

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
@@ -687,7 +687,6 @@ impl Engine {
                     show_thinking: config.show_thinking,
                     allow_shell: config.allow_shell,
                 },
-                session.approval_mode,
             );
         let stable_prompt = Some(system_prompt);
         session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
@@ -853,11 +852,12 @@ impl Engine {
         self.session.trust_mode = trust_mode;
         self.config.trust_mode = trust_mode;
         self.session.auto_approve = auto_approve;
-        self.session.approval_mode = if auto_approve {
-            crate::tui::approval::ApprovalMode::Auto
-        } else {
-            approval_mode
-        };
+        let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
+        // Only track the Agent-mode approval — Yolo/Plan have fixed
+        // approval policies that are derived from the mode itself.
+        if mode == AppMode::Agent {
+            self.session.approval_mode = agent_approval_mode;
+        }
 
         let _ = self
             .tx_event
@@ -1236,7 +1236,6 @@ impl Engine {
                 Op::ChangeMode { mode } => {
                     let previous_mode = self.current_mode;
                     self.current_mode = mode;
-                    self.refresh_system_prompt(mode);
                     self.emit_session_updated().await;
                     // Notify the agent that the mode has changed so it can re-evaluate
                     // any operations that were blocked by the previous mode's policy.
@@ -1253,11 +1252,11 @@ impl Engine {
                         )))
                         .await;
                 }
-                Op::SetModel { model, mode } => {
+                Op::SetModel { model, mode: _ } => {
                     self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
                     self.session.model = model;
                     self.config.model.clone_from(&self.session.model);
-                    self.refresh_system_prompt(mode);
+                    self.refresh_system_prompt();
                     self.emit_session_updated().await;
                     let _ = self
                         .tx_event
@@ -1304,6 +1303,10 @@ impl Engine {
                     self.session.compaction_summary_prompt =
                         extract_compaction_summary_prompt(system_prompt.clone());
                     self.session.system_prompt = system_prompt;
+                    self.session.last_system_prompt_hash =
+                        Some(system_prompt_hash(self.session.system_prompt.as_ref()));
+                    // Host-supplied prompts are persisted prefixes. Keep them
+                    // byte-stable; mode/runtime state is projected per request.
                     self.session.system_prompt_override =
                         system_prompt_override && self.session.system_prompt.is_some();
                     self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
@@ -1485,6 +1488,18 @@ In {new} mode: {policy}\n\n\
         }
     }
 
+    fn runtime_prompt_message(&self) -> Message {
+        let mode = self.current_mode;
+        let approval_mode = approval_mode_for(mode, self.session.approval_mode);
+        Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: runtime_prompt_text(mode, approval_mode),
+                cache_control: None,
+            }],
+        }
+    }
+
     fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
         self.user_text_message_with_turn_metadata_for_route(
             text,
@@ -1633,6 +1648,14 @@ In {new} mode: {policy}\n\n\
             .observe_user_message(&content, &self.session.workspace);
         let force_update_plan_first = should_force_update_plan_first(mode, &content);
 
+        let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
+        self.session.auto_approve = auto_approve;
+        // Only track the Agent-mode approval — Yolo/Plan have fixed
+        // approval policies that are derived from the mode itself.
+        if mode == AppMode::Agent {
+            self.session.approval_mode = agent_approval_mode;
+        }
+
         // Add user message to session
         let user_msg = self.user_text_message_with_turn_metadata_for_route(
             content,
@@ -1670,15 +1693,10 @@ In {new} mode: {policy}\n\n\
         self.config.trust_mode = trust_mode;
         self.config.translation_enabled = translation_enabled;
         self.config.show_thinking = show_thinking;
-        self.session.auto_approve = auto_approve;
-        self.session.approval_mode = if auto_approve {
-            crate::tui::approval::ApprovalMode::Auto
-        } else {
-            approval_mode
-        };
 
-        // Update system prompt to match current mode and include persisted compaction context.
-        self.refresh_system_prompt(mode);
+        // Refresh stable prompt context. Current mode is carried by the
+        // request-time runtime prompt projection.
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         // Build tool registry and tool list for the current mode
@@ -2430,16 +2448,16 @@ In {new} mode: {policy}\n\n\
             )))
             .await;
     }
-    /// Refresh the system prompt based on current mode and context.
-    fn refresh_system_prompt(&mut self, mode: AppMode) {
+    /// Refresh the stable system prompt based on current non-mode context.
+    fn refresh_system_prompt(&mut self) {
         let user_memory_block =
             crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
         let prompt_goal_objective = goal_objective_for_prompt(
             self.config.goal_objective.as_deref(),
             &self.config.goal_state,
         );
         let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval(
-            mode,
+            AppMode::Agent,
             &self.config.workspace,
             None,
             Some(&self.config.skills_dir),
@@ -2454,7 +2472,6 @@ In {new} mode: {policy}\n\n\
                 show_thinking: self.config.show_thinking,
                 allow_shell: self.session.allow_shell,
             },
-            self.session.approval_mode,
         );
         let mut stable_prompt =
             merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
@@ -2472,7 +2489,6 @@ In {new} mode: {policy}\n\n\
 
         let stable_hash = system_prompt_hash(stable_prompt.as_ref());
         if self.session.system_prompt_override {
-            self.session.last_system_prompt_hash = Some(stable_hash);
             return;
         }
         if self.session.last_system_prompt_hash != Some(stable_hash) {
@@ -2634,6 +2650,84 @@ fn goal_objective_for_prompt(
     normalized_goal_objective(configured_goal)
 }
 
+// ── Mode & approval prompts as request-time runtime metadata ─────────
+//
+// Mode contracts and approval policies are not persisted in the session
+// history and are not sent as extra system messages. Instead, each API
+// request projects a transient user-role runtime metadata message at the
+// tail. The stable system prompt remains byte-stable, stored history remains
+// byte-stable, and strict chat-template providers never see a system message
+// outside messages[0].
+
+fn approval_mode_for(
+    mode: AppMode,
+    session_approval: crate::tui::approval::ApprovalMode,
+) -> crate::tui::approval::ApprovalMode {
+    match mode {
+        AppMode::Yolo => crate::tui::approval::ApprovalMode::Auto,
+        AppMode::Plan => crate::tui::approval::ApprovalMode::Never,
+        AppMode::Agent => session_approval,
+    }
+}
+
+fn agent_approval_mode_for_turn(
+    auto_approve: bool,
+    approval_mode: crate::tui::approval::ApprovalMode,
+) -> crate::tui::approval::ApprovalMode {
+    if auto_approve {
+        crate::tui::approval::ApprovalMode::Auto
+    } else {
+        approval_mode
+    }
+}
+
+fn mode_prompt_marker(mode: AppMode) -> String {
+    format!(
+        "<mode_prompt mode=\"{}\">",
+        match mode {
+            AppMode::Agent => "agent",
+            AppMode::Plan => "plan",
+            AppMode::Yolo => "yolo",
+        }
+    )
+}
+
+fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
+    format!(
+        "<approval_policy policy=\"{}\">",
+        match approval_mode {
+            crate::tui::approval::ApprovalMode::Auto => "auto",
+            crate::tui::approval::ApprovalMode::Suggest => "suggest",
+            crate::tui::approval::ApprovalMode::Never => "never",
+        }
+    )
+}
+
+fn mode_prompt_text(mode: AppMode) -> &'static str {
+    match mode {
+        AppMode::Agent => prompts::AGENT_MODE,
+        AppMode::Plan => prompts::PLAN_MODE,
+        AppMode::Yolo => prompts::YOLO_MODE,
+    }
+}
+
+fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
+    let marker = mode_prompt_marker(mode);
+    let mode_text = mode_prompt_text(mode).trim();
+    let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
+    let approval_marker = approval_prompt_marker(approval_mode);
+    let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
+    format!(
+        "<runtime_prompt visibility=\"internal\">\n\
+This is runtime control metadata for the current request, not user input. \
+Apply it to the next assistant response and tool calls. It supersedes any \
+earlier mode or approval metadata in the transcript.\n\n\
+{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
+{approval_marker}\n{approval_text}\n</approval_policy>\n\
+</runtime_prompt>"
+    )
+}
+
 /// Spawn the engine in a background task
 pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
     let (engine, handle) = Engine::new(config, api_config);

diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs
@@ -36,7 +36,7 @@ impl Engine {
     pub(super) async fn run_capacity_post_tool_checkpoint(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
+
         tool_registry: Option<&crate::tools::ToolRegistry>,
         tool_exec_lock: Arc<RwLock<()>>,
         mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
@@ -56,7 +56,6 @@ impl Engine {
                 let _ = self
                     .apply_verify_with_tool_replay(
                         turn,
-                        mode,
                         snapshot.as_ref(),
                         tool_registry,
                         tool_exec_lock,
@@ -66,7 +65,7 @@ impl Engine {
                 false
             }
             GuardrailAction::VerifyAndReplan => {
-                self.apply_verify_and_replan(turn, mode, snapshot.as_ref(), "high_risk_post_tool")
+                self.apply_verify_and_replan(turn, snapshot.as_ref(), "high_risk_post_tool")
                     .await
             }
             GuardrailAction::NoIntervention | GuardrailAction::TargetedContextRefresh => false,
@@ -76,7 +75,7 @@ impl Engine {
     pub(super) async fn run_capacity_error_escalation_checkpoint(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
+
         step_error_count: usize,
         consecutive_tool_error_steps: u32,
         error_categories: &[ErrorCategory],
@@ -136,7 +135,6 @@ impl Engine {
         let category_labels: Vec<String> = error_categories.iter().map(|c| c.to_string()).collect();
         self.apply_verify_and_replan(
             turn,
-            mode,
             Some(&forced),
             &format!(
                 "error_escalation: step_errors={}, consecutive_steps={}, categories={}",
@@ -385,7 +383,7 @@ impl Engine {
         &mut self,
         turn: &TurnContext,
         client: Option<&DeepSeekClient>,
-        mode: AppMode,
+        _mode: AppMode,
         snapshot: Option<&CapacitySnapshot>,
     ) -> bool {
         let before_tokens = self.estimated_input_tokens();
@@ -465,7 +463,7 @@ impl Engine {
             GuardrailAction::TargetedContextRefresh,
             None,
         )));
-        self.refresh_system_prompt(mode);
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         let after_tokens = self.estimated_input_tokens();
@@ -487,7 +485,6 @@ impl Engine {
     pub(super) async fn apply_verify_with_tool_replay(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
         snapshot: Option<&CapacitySnapshot>,
         tool_registry: Option<&crate::tools::ToolRegistry>,
         tool_exec_lock: Arc<RwLock<()>>,
@@ -617,7 +614,7 @@ impl Engine {
             GuardrailAction::VerifyWithToolReplay,
             Some(&verification_note),
         )));
-        self.refresh_system_prompt(mode);
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         let after_tokens = self.estimated_input_tokens();
@@ -638,7 +635,6 @@ impl Engine {
     pub(super) async fn apply_verify_and_replan(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
         snapshot: Option<&CapacitySnapshot>,
         reason: &str,
     ) -> bool {
@@ -685,7 +681,7 @@ impl Engine {
             GuardrailAction::VerifyAndReplan,
             Some("Replan now from canonical state. Keep steps minimal and verifiable."),
         )));
-        self.refresh_system_prompt(mode);
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         let _ = self