Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12,088 changes: 12,086 additions & 2 deletions .fern/replay.lock

Large diffs are not rendered by default.

21 changes: 13 additions & 8 deletions reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@ Create and start a Conversational AI agent instance.
<dd>

```python
from agora_agent import Agora, MicrosoftTtsParams, Tts_Microsoft
from agora_agent import (
Agora,
Asr_Ares,
Llm,
LlmParams,
MicrosoftTtsParams,
Tts_Microsoft,
)
from agora_agent.agents import (
StartAgentsRequestProperties,
StartAgentsRequestPropertiesAsr,
StartAgentsRequestPropertiesLlm,
StartAgentsRequestPropertiesTurnDetection,
StartAgentsRequestPropertiesTurnDetectionConfig,
StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech,
Expand All @@ -51,23 +56,23 @@ client.agents.start(
agent_rtc_uid="1001",
remote_rtc_uids=["1002"],
idle_timeout=120,
asr=StartAgentsRequestPropertiesAsr(
language="en-US",
),
asr=Asr_Ares(),
tts=Tts_Microsoft(
params=MicrosoftTtsParams(
key="key",
region="region",
voice_name="voice_name",
),
),
llm=StartAgentsRequestPropertiesLlm(
llm=Llm(
url="https://api.openai.com/v1/chat/completions",
api_key="<your_llm_key>",
system_messages=[
{"role": "system", "content": "You are a helpful chatbot."}
],
params={"model": "gpt-4o-mini"},
params=LlmParams(
model="gpt-4o-mini",
),
max_history=32,
greeting_message="Hello, how can I assist you today?",
failure_message="Please hold on a second.",
Expand Down
2 changes: 2 additions & 0 deletions src/agora_agent/agentkit/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig
from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule
from ..types.tts import Tts
from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule
from ..types.tts import Tts
from ..agent_management.types.agent_think_agent_management_request_on_listening_action import (
AgentThinkAgentManagementRequestOnListeningAction,
)
Expand Down
1 change: 1 addition & 0 deletions src/agora_agent/agentkit/agent_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
is_generic_avatar,
is_heygen_avatar,
is_live_avatar_avatar,
is_rtc_avatar,
validate_avatar_config,
validate_tts_sample_rate,
)
Expand Down
43 changes: 43 additions & 0 deletions src/agora_agent/agentkit/vendors/avatar.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,49 @@ def to_config(self) -> Dict[str, Any]:
return {"enable": enable, "vendor": "generic", "params": params}


class GenericAvatarOptions(BaseModel):
model_config = ConfigDict(extra="forbid")

api_key: str = Field(..., description="Generic avatar provider API key")
api_base_url: str = Field(..., description="Avatar provider API base URL")
avatar_id: str = Field(..., description="Avatar ID")
agora_uid: str = Field(..., description="Agora UID for the avatar video stream")
agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted")
agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted")
agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted")
enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)")
additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters")


class GenericAvatar(BaseAvatar):
def __init__(self, **kwargs: Any):
self.options = GenericAvatarOptions(**kwargs)

@property
def required_sample_rate(self) -> int:
return 0

def to_config(self) -> Dict[str, Any]:
params: Dict[str, Any] = {
"api_key": self.options.api_key,
"api_base_url": self.options.api_base_url,
"avatar_id": self.options.avatar_id,
"agora_uid": self.options.agora_uid,
}

if self.options.agora_appid is not None:
params["agora_appid"] = self.options.agora_appid
if self.options.agora_token is not None:
params["agora_token"] = self.options.agora_token
if self.options.agora_channel is not None:
params["agora_channel"] = self.options.agora_channel
if self.options.additional_params is not None:
params = {**self.options.additional_params, **params}

enable = self.options.enable if self.options.enable is not None else True
return {"enable": enable, "vendor": "generic", "params": params}


class AnamAvatarOptions(BaseModel):
model_config = ConfigDict(extra="forbid")

Expand Down
1 change: 1 addition & 0 deletions src/agora_agent/agentkit/vendors/mllm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, ConfigDict, Field
Expand Down
42 changes: 26 additions & 16 deletions src/agora_agent/agents/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ def start(

Examples
--------
from agora_agent import Agora, MicrosoftTtsParams, Tts_Microsoft
from agora_agent import (
Agora,
Asr_Ares,
Llm,
LlmParams,
MicrosoftTtsParams,
Tts_Microsoft,
)
from agora_agent.agents import (
StartAgentsRequestProperties,
StartAgentsRequestPropertiesAsr,
StartAgentsRequestPropertiesLlm,
StartAgentsRequestPropertiesTurnDetection,
StartAgentsRequestPropertiesTurnDetectionConfig,
StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech,
Expand All @@ -108,23 +113,23 @@ def start(
agent_rtc_uid="1001",
remote_rtc_uids=["1002"],
idle_timeout=120,
asr=StartAgentsRequestPropertiesAsr(
language="en-US",
),
asr=Asr_Ares(),
tts=Tts_Microsoft(
params=MicrosoftTtsParams(
key="key",
region="region",
voice_name="voice_name",
),
),
llm=StartAgentsRequestPropertiesLlm(
llm=Llm(
url="https://api.openai.com/v1/chat/completions",
api_key="<your_llm_key>",
system_messages=[
{"role": "system", "content": "You are a helpful chatbot."}
],
params={"model": "gpt-4o-mini"},
params=LlmParams(
model="gpt-4o-mini",
),
max_history=32,
greeting_message="Hello, how can I assist you today?",
failure_message="Please hold on a second.",
Expand Down Expand Up @@ -641,11 +646,16 @@ async def start(
--------
import asyncio

from agora_agent import AsyncAgora, MicrosoftTtsParams, Tts_Microsoft
from agora_agent import (
Asr_Ares,
AsyncAgora,
Llm,
LlmParams,
MicrosoftTtsParams,
Tts_Microsoft,
)
from agora_agent.agents import (
StartAgentsRequestProperties,
StartAgentsRequestPropertiesAsr,
StartAgentsRequestPropertiesLlm,
StartAgentsRequestPropertiesTurnDetection,
StartAgentsRequestPropertiesTurnDetectionConfig,
StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech,
Expand All @@ -668,23 +678,23 @@ async def main() -> None:
agent_rtc_uid="1001",
remote_rtc_uids=["1002"],
idle_timeout=120,
asr=StartAgentsRequestPropertiesAsr(
language="en-US",
),
asr=Asr_Ares(),
tts=Tts_Microsoft(
params=MicrosoftTtsParams(
key="key",
region="region",
voice_name="voice_name",
),
),
llm=StartAgentsRequestPropertiesLlm(
llm=Llm(
url="https://api.openai.com/v1/chat/completions",
api_key="<your_llm_key>",
system_messages=[
{"role": "system", "content": "You are a helpful chatbot."}
],
params={"model": "gpt-4o-mini"},
params=LlmParams(
model="gpt-4o-mini",
),
max_history=32,
greeting_message="Hello, how can I assist you today?",
failure_message="Please hold on a second.",
Expand Down
12 changes: 6 additions & 6 deletions src/agora_agent/agents/types/start_agents_request_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import pydantic
from ...core.pydantic_utilities import IS_PYDANTIC_V2
from ...core.unchecked_base_model import UncheckedBaseModel
from ...types.asr import Asr
from ...types.llm import Llm
from ...types.mllm import Mllm
from ...types.tts import Tts
from .start_agents_request_properties_advanced_features import StartAgentsRequestPropertiesAdvancedFeatures
from .start_agents_request_properties_asr import StartAgentsRequestPropertiesAsr
from .start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar
from .start_agents_request_properties_filler_words import StartAgentsRequestPropertiesFillerWords
from .start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence
from .start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption
from .start_agents_request_properties_llm import StartAgentsRequestPropertiesLlm
from .start_agents_request_properties_mllm import StartAgentsRequestPropertiesMllm
from .start_agents_request_properties_parameters import StartAgentsRequestPropertiesParameters
from .start_agents_request_properties_rtc import StartAgentsRequestPropertiesRtc
from .start_agents_request_properties_sal import StartAgentsRequestPropertiesSal
Expand Down Expand Up @@ -67,7 +67,7 @@ class StartAgentsRequestProperties(UncheckedBaseModel):
Advanced features configuration.
"""

asr: typing.Optional[StartAgentsRequestPropertiesAsr] = pydantic.Field(default=None)
asr: typing.Optional[Asr] = pydantic.Field(default=None)
"""
Automatic Speech Recognition (ASR) configuration.
"""
Expand All @@ -77,12 +77,12 @@ class StartAgentsRequestProperties(UncheckedBaseModel):
Text-to-speech (TTS) module configuration.
"""

llm: typing.Optional[StartAgentsRequestPropertiesLlm] = pydantic.Field(default=None)
llm: typing.Optional[Llm] = pydantic.Field(default=None)
"""
Large language model (LLM) configuration.
"""

mllm: typing.Optional[StartAgentsRequestPropertiesMllm] = pydantic.Field(default=None)
mllm: typing.Optional[Mllm] = pydantic.Field(default=None)
"""
Multimodal Large Language Model (MLLM) configuration for real-time audio and text processing. `mllm` is an exclusive alternative to the standard `asr` + `llm` + `tts` pipeline.
"""
Expand Down

This file was deleted.

This file was deleted.

Loading
Loading