livekit · AKomplished-bug · Dec 11, 2025
diff --git a/livekit-agents/livekit/agents/voice/__init__.py b/livekit-agents/livekit/agents/voice/__init__.py
@@ -10,6 +10,7 @@
     ConversationItemAddedEvent,
     ErrorEvent,
     FunctionToolsExecutedEvent,
+    FunctionToolsExecutingEvent,
     MetricsCollectedEvent,
     RunContext,
     SpeechCreatedEvent,
@@ -43,6 +44,7 @@
     "UserStateChangedEvent",
     "AgentStateChangedEvent",
     "FunctionToolsExecutedEvent",
+    "FunctionToolsExecutingEvent",
     "AgentFalseInterruptionEvent",
     "TranscriptSynchronizer",
     "io",

diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -57,6 +57,7 @@
     AgentFalseInterruptionEvent,
     ErrorEvent,
     FunctionToolsExecutedEvent,
+    FunctionToolsExecutingEvent,
     MetricsCollectedEvent,
     SpeechCreatedEvent,
     UserInputTranscribedEvent,
@@ -1926,6 +1927,10 @@ def _on_first_frame(_: asyncio.Future[None]) -> None:
         # messages in RunResult are ordered by the `created_at` field
         def _tool_execution_started_cb(fnc_call: llm.FunctionCall) -> None:
             speech_handle._item_added([fnc_call])
+            self._session.emit(
+                "function_tools_executing",
+                FunctionToolsExecutingEvent(function_call=fnc_call),
+            )
 
         def _tool_execution_completed_cb(out: ToolExecutionOutput) -> None:
             if out.fnc_call_out:
@@ -2396,6 +2401,10 @@ def _tool_execution_started_cb(fnc_call: llm.FunctionCall) -> None:
             speech_handle._item_added([fnc_call])
             self._agent._chat_ctx.items.append(fnc_call)
             self._session._tool_items_added([fnc_call])
+            self._session.emit(
+                "function_tools_executing",
+                FunctionToolsExecutingEvent(function_call=fnc_call),
+            )
 
         def _tool_execution_completed_cb(out: ToolExecutionOutput) -> None:
             if out.fnc_call_out:

diff --git a/livekit-agents/livekit/agents/voice/background_audio.py b/livekit-agents/livekit/agents/voice/background_audio.py
@@ -20,7 +20,7 @@
 from ..utils.aio import cancel_and_wait
 from ..utils.audio import audio_frames_from_file
 from .agent_session import AgentSession
-from .events import AgentStateChangedEvent
+from .events import AgentStateChangedEvent, FunctionToolsExecutedEvent, FunctionToolsExecutingEvent
 
 _resource_stack = contextlib.ExitStack()
 atexit.register(_resource_stack.close)
@@ -73,18 +73,21 @@ def __init__(
         thinking_sound: NotGivenOr[
             AudioSource | AudioConfig | list[AudioConfig] | None
         ] = NOT_GIVEN,
+        tool_calling_sound: NotGivenOr[
+            AudioSource | AudioConfig | list[AudioConfig] | None
+        ] = NOT_GIVEN,
         stream_timeout_ms: int = 200,
     ) -> None:
         """
-        Initializes the BackgroundAudio component with optional ambient and thinking sounds.
+        Initializes the BackgroundAudio component with optional ambient, thinking, and tool calling sounds.
 
         This component creates and publishes a continuous audio track to a LiveKit room while managing
-        the playback of ambient and agent “thinking” sounds. It supports three types of audio sources:
+        the playback of ambient and agent "thinking" sounds. It supports three types of audio sources:
         - A BuiltinAudioClip enum value, which will use a pre-defined sound from the package resources
         - A file path (string) pointing to an audio file, which can be looped.
         - An AsyncIterator that yields rtc.AudioFrame
 
-        When a list (or AudioConfig) is supplied, the component considers each sound’s volume and probability:
+        When a list (or AudioConfig) is supplied, the component considers each sound's volume and probability:
         - The probability value determines the chance that a particular sound is selected for playback.
         - A total probability below 1.0 means there is a chance no sound will be selected (resulting in silence).
 
@@ -94,13 +97,21 @@ def __init__(
                 For AsyncIterator sources, ensure the iterator is infinite or looped.
 
             thinking_sound (NotGivenOr[Union[AudioSource, AudioConfig, List[AudioConfig], None]], optional):
-                The sound to be played when the associated agent enters a “thinking” state. This can be a single
-                sound source or a list of AudioConfig objects (with volume and probability settings).
+                The sound to be played when the associated agent enters a "thinking" state (LLM processing).
+                This can be a single sound source or a list of AudioConfig objects (with volume and
+                probability settings). If tool_calling_sound is also provided, this sound will be stopped
+                when a tool starts executing.
+
+            tool_calling_sound (NotGivenOr[Union[AudioSource, AudioConfig, List[AudioConfig], None]], optional):
+                The sound to be played when a function tool starts executing. This allows for a different
+                sound during tool execution vs regular LLM thinking. When tool execution completes, the
+                sound will stop. If not provided, thinking_sound will continue playing during tool execution.
 
         """  # noqa: E501
 
         self._ambient_sound = ambient_sound if is_given(ambient_sound) else None
         self._thinking_sound = thinking_sound if is_given(thinking_sound) else None
+        self._tool_calling_sound = tool_calling_sound if is_given(tool_calling_sound) else None
 
         self._audio_source = rtc.AudioSource(48000, 1, queue_size_ms=_AUDIO_SOURCE_BUFFER_MS)
         self._audio_mixer = rtc.AudioMixer(
@@ -116,6 +127,7 @@ def __init__(
 
         self._ambient_handle: PlayHandle | None = None
         self._thinking_handle: PlayHandle | None = None
+        self._tool_calling_handle: PlayHandle | None = None
 
     def _select_sound_from_list(self, sounds: list[AudioConfig]) -> AudioConfig | None:
         """
@@ -266,6 +278,11 @@ async def start(
 
             if self._agent_session:
                 self._agent_session.on("agent_state_changed", self._agent_state_changed)
+                if self._tool_calling_sound:
+                    self._agent_session.on(
+                        "function_tools_executing", self._function_tools_executing
+                    )
+                    self._agent_session.on("function_tools_executed", self._function_tools_executed)
 
             if self._ambient_sound:
                 normalized = self._normalize_sound_source(
@@ -301,6 +318,13 @@ async def aclose(self) -> None:
 
             if self._agent_session:
                 self._agent_session.off("agent_state_changed", self._agent_state_changed)
+                if self._tool_calling_sound:
+                    self._agent_session.off(
+                        "function_tools_executing", self._function_tools_executing
+                    )
+                    self._agent_session.off(
+                        "function_tools_executed", self._function_tools_executed
+                    )
 
             self._room.off("reconnected", self._on_reconnected)
 
@@ -331,6 +355,29 @@ def _agent_state_changed(self, ev: AgentStateChangedEvent) -> None:
         elif self._thinking_handle:
             self._thinking_handle.stop()
 
+    def _function_tools_executing(self, ev: FunctionToolsExecutingEvent) -> None:
+        """Handle tool execution start - switch from thinking sound to tool calling sound."""
+        if not self._tool_calling_sound:
+            return
+
+        # Stop thinking sound if playing
+        if self._thinking_handle and not self._thinking_handle.done():
+            self._thinking_handle.stop()
+
+        # Start tool calling sound if not already playing (loop until execution completes)
+        if self._tool_calling_handle and not self._tool_calling_handle.done():
+            return
+
+        self._tool_calling_handle = self.play(
+            cast(Union[AudioSource, AudioConfig, list[AudioConfig]], self._tool_calling_sound),
+            loop=True,
+        )
+
+    def _function_tools_executed(self, ev: FunctionToolsExecutedEvent) -> None:
+        """Handle tool execution completion - stop tool calling sound."""
+        if self._tool_calling_handle and not self._tool_calling_handle.done():
+            self._tool_calling_handle.stop()
+
     @log_exceptions(logger=logger)
     async def _play_task(
         self, play_handle: PlayHandle, sound: AudioSource, volume: float, loop: bool

diff --git a/livekit-agents/livekit/agents/voice/events.py b/livekit-agents/livekit/agents/voice/events.py
@@ -87,6 +87,7 @@ async def wait_for_playout(self) -> None:
     "user_input_transcribed",
     "conversation_item_added",
     "agent_false_interruption",
+    "function_tools_executing",
     "function_tools_executed",
     "metrics_collected",
     "speech_created",
@@ -155,6 +156,19 @@ class ConversationItemAddedEvent(BaseModel):
     created_at: float = Field(default_factory=time.time)
 
 
+class FunctionToolsExecutingEvent(BaseModel):
+    """Event emitted when a function tool starts executing.
+
+    This event is fired before the tool execution begins, allowing listeners
+    to react immediately (e.g., play a different sound during tool execution).
+    """
+
+    type: Literal["function_tools_executing"] = "function_tools_executing"
+    function_call: FunctionCall
+    """The function call that is about to be executed."""
+    created_at: float = Field(default_factory=time.time)
+
+
 class FunctionToolsExecutedEvent(BaseModel):
     type: Literal["function_tools_executed"] = "function_tools_executed"
     function_calls: list[FunctionCall]
@@ -233,6 +247,7 @@ class CloseEvent(BaseModel):
         AgentFalseInterruptionEvent,
         MetricsCollectedEvent,
         ConversationItemAddedEvent,
+        FunctionToolsExecutingEvent,
         FunctionToolsExecutedEvent,
         SpeechCreatedEvent,
         ErrorEvent,