Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,6 +1448,16 @@ class ChatProviderTemplate(TypedDict):
"stt_model": "iic/SenseVoiceSmall",
"is_emotion": False,
},
"GLM-ASR(API)": {
"id": "glm_asr",
"type": "glm_asr",
"provider": "bigmodel",
"provider_type": "speech_to_text",
"enable": False,
"api_key": "",
"model": "glm-asr-2512",
"timeout": 120,
},
"OpenAI TTS(API)": {
"id": "openai_tts",
"type": "openai_tts_api",
Expand Down Expand Up @@ -1621,6 +1631,19 @@ class ChatProviderTemplate(TypedDict):
"gemini_tts_voice_name": "Leda",
"proxy": "",
},
"GLM TTS(API)": {
"id": "glm_tts",
"type": "glm_tts",
"provider": "bigmodel",
"provider_type": "text_to_speech",
"enable": False,
"api_key": "",
"model": "glm-tts",
"glm_tts_voice": "tongtong",
"glm_tts_speed": 1.0,
"glm_tts_volume": 1.0,
"timeout": 30,
},
"OpenAI Embedding": {
"id": "openai_embedding",
"type": "openai_embedding",
Expand Down
8 changes: 5 additions & 3 deletions astrbot/core/message/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,19 @@ def __init__(self, file: str | None, **_) -> None:

@staticmethod
def fromFileSystem(path, **_):
return Record(file=f"file:///{os.path.abspath(path)}", path=path, **_)
file_url = f"file:///{os.path.abspath(path)}"
return Record(file=file_url, url=file_url, path=path, **_)

@staticmethod
def fromURL(url: str, **_):
if url.startswith("http://") or url.startswith("https://"):
return Record(file=url, **_)
return Record(file=url, url=url, **_)
raise Exception("not a valid url")

@staticmethod
def fromBase64(bs64_data: str, **_):
return Record(file=f"base64://{bs64_data}", **_)
base64_url = f"base64://{bs64_data}"
return Record(file=base64_url, url=base64_url, **_)

async def convert_to_file_path(self) -> str:
"""将这个语音统一转换为本地文件路径。这个方法避免了手动判断语音数据类型,直接返回语音数据的本地路径(如果是网络 URL, 则会自动进行下载)。
Expand Down
8 changes: 8 additions & 0 deletions astrbot/core/provider/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,14 @@ def dynamic_import_provider(self, type: str) -> None:
from .sources.gemini_tts_source import (
ProviderGeminiTTSAPI as ProviderGeminiTTSAPI,
)
case "glm_asr":
from .sources.glm_asr_source import (
ProviderGLMASR as ProviderGLMASR,
)
case "glm_tts":
from .sources.glm_tts_source import (
ProviderGLMTTS as ProviderGLMTTS,
)
case "openai_embedding":
from .sources.openai_embedding_source import (
OpenAIEmbeddingProvider as OpenAIEmbeddingProvider,
Expand Down
146 changes: 146 additions & 0 deletions astrbot/core/provider/sources/glm_asr_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import base64
import os
import uuid

import aiohttp

from astrbot.api import logger
from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
from astrbot.core.utils.io import download_file
from astrbot.core.utils.tencent_record_helper import (
convert_to_pcm_wav,
tencent_silk_to_wav,
)

from ..entities import ProviderType
from ..provider import STTProvider
from ..register import register_provider_adapter


@register_provider_adapter(
"glm_asr",
"GLM-ASR API",
provider_type=ProviderType.SPEECH_TO_TEXT,
)
class ProviderGLMASR(STTProvider):
def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
self.api_key: str = provider_config.get("api_key", "")
if not self.api_key:
raise ValueError("GLM-ASR requires api_key to be configured")
self.model_name: str = provider_config.get("model", "glm-asr-2512")
self.timeout: int = provider_config.get("timeout", 120)
self.api_base: str = "https://open.bigmodel.cn/api/paas/v4/audio/transcriptions"
self._session: aiohttp.ClientSession | None = None

async def initialize(self) -> None:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout),
)

async def terminate(self) -> None:
if self._session and not self._session.closed:
await self._session.close()
self._session = None

def _get_audio_format(self, file_path: str) -> str | None:
silk_header = b"SILK"
amr_header = b"#!AMR"

try:
with open(file_path, "rb") as f:
file_header = f.read(8)
except FileNotFoundError:
return None

if silk_header in file_header:
return "silk"
if amr_header in file_header:
return "amr"
return None

async def get_text(self, audio_url: str) -> str:
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}

downloaded_path = None
output_path = None

if audio_url.startswith("http"):
temp_dir = get_astrbot_temp_path()
downloaded_path = os.path.join(
temp_dir, f"glm_asr_{uuid.uuid4().hex[:8]}.input"
)
await download_file(audio_url, downloaded_path)
audio_url = downloaded_path

if not os.path.exists(audio_url):
raise FileNotFoundError(f"Audio file not found: {audio_url}")

file_format = self._get_audio_format(audio_url)

if file_format in ["silk", "amr"]:
temp_dir = get_astrbot_temp_path()
output_path = os.path.join(temp_dir, f"glm_asr_{uuid.uuid4().hex[:8]}.wav")

logger.info(f"Converting {file_format} file to wav for GLM-ASR...")
if file_format == "silk":
await tencent_silk_to_wav(audio_url, output_path)
elif file_format == "amr":
await convert_to_pcm_wav(audio_url, output_path)

audio_url = output_path

with open(audio_url, "rb") as f:
audio_base64 = base64.b64encode(f.read()).decode("utf-8")

payload = {
"model": self.model_name,
"file_base64": audio_base64,
}

try:
if not self._session or self._session.closed:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout),
)
async with self._session.post(
self.api_base,
headers=headers,
json=payload,
) as response:
if response.status != 200:
error_text = await response.text()
logger.error(
f"GLM-ASR API error: {response.status}, body: {error_text}"
)
response.raise_for_status()

result = await response.json()

if result.get("error"):
error_msg = result["error"].get("message", "Unknown error")
raise Exception(f"GLM-ASR API error: {error_msg}")

text = result.get("text", "")
return text

except aiohttp.ClientError as e:
raise Exception(f"GLM-ASR API request failed: {e!s}")
finally:
if output_path and os.path.exists(output_path):
try:
os.remove(output_path)
except Exception as e:
logger.warning(f"Failed to remove temp file {output_path}: {e}")
if downloaded_path and os.path.exists(downloaded_path):
try:
os.remove(downloaded_path)
except Exception as e:
logger.warning(f"Failed to remove temp file {downloaded_path}: {e}")
94 changes: 94 additions & 0 deletions astrbot/core/provider/sources/glm_tts_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import os
import uuid

import aiohttp

from astrbot.api import logger
from astrbot.core.utils.astrbot_path import get_astrbot_temp_path

from ..entities import ProviderType
from ..provider import TTSProvider
from ..register import register_provider_adapter


@register_provider_adapter(
"glm_tts",
"GLM-TTS API",
provider_type=ProviderType.TEXT_TO_SPEECH,
)
class ProviderGLMTTS(TTSProvider):
def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
self.api_key: str = provider_config.get("api_key", "")
if not self.api_key:
raise ValueError("GLM-TTS requires api_key to be configured")
self.model_name: str = provider_config.get("model", "glm-tts")
self.voice: str = provider_config.get("glm_tts_voice", "tongtong")
self.speed: float = float(provider_config.get("glm_tts_speed", 1.0))
if not (0.5 <= self.speed <= 2.0):
self.speed = max(0.5, min(2.0, self.speed))
logger.warning(
f"GLM-TTS speed out of range [0.5, 2.0], clamped to {self.speed}"
)

self.volume: float = float(provider_config.get("glm_tts_volume", 1.0))
if not (0 < self.volume <= 10):
self.volume = max(0.01, min(10.0, self.volume))
logger.warning(
f"GLM-TTS volume out of range (0, 10], clamped to {self.volume}"
)
self.timeout: int = provider_config.get("timeout", 30)
self.api_base: str = "https://open.bigmodel.cn/api/paas/v4/audio/speech"

async def get_audio(self, text: str) -> str:
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}

payload = {
"model": self.model_name,
"input": text,
"voice": self.voice,
"response_format": "wav",
"speed": self.speed,
"volume": self.volume,
}

temp_dir = get_astrbot_temp_path()
os.makedirs(temp_dir, exist_ok=True)
output_path = os.path.join(temp_dir, f"glm_tts_{uuid.uuid4()}.wav")

try:
async with aiohttp.ClientSession() as session:
async with session.post(
self.api_base,
headers=headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=self.timeout),
) as response:
response.raise_for_status()

if response.content_type != "audio/wav":
error_msg = f"Unexpected content type: {response.content_type}"
raise Exception(f"GLM-TTS API error: {error_msg}")

audio_data = await response.read()

if not audio_data:
raise Exception("GLM-TTS API returned empty audio data")

with open(output_path, "wb") as f:
f.write(audio_data)

return output_path

except aiohttp.ClientError as e:
raise Exception(f"GLM-TTS API request failed: {e!s}")

async def terminate(self):
pass
4 changes: 3 additions & 1 deletion dashboard/src/composables/useProviderSources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,9 @@ export function useProviderSources(options: UseProviderSourcesOptions) {
dashscope_tts: 'text_to_speech',
azure_tts: 'text_to_speech',
minimax_tts_api: 'text_to_speech',
volcengine_tts: 'text_to_speech'
volcengine_tts: 'text_to_speech',
glm_asr: 'speech_to_text',
glm_tts: 'text_to_speech'
}
return oldVersionProviderTypeMapping[provider.type]
}
Expand Down
12 changes: 12 additions & 0 deletions dashboard/src/i18n/locales/en-US/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1507,6 +1507,18 @@
"auto_save_history": {
"description": "Conversation history managed by Coze",
"hint": "When enabled, Coze manages conversation history. AstrBot's locally saved context will not take effect (read-only), and operations on AstrBot context will not apply. If disabled, AstrBot manages the context."
},
"glm_tts_voice": {
"description": "Voice",
"hint": "GLM-TTS voice. Available voices: tongtong, chuichui, xiaochen, jam, kazi, douji, luodo."
},
"glm_tts_speed": {
"description": "Speech rate",
"hint": "Speech speed for synthesis, range [0.5, 2.0], default 1.0."
},
"glm_tts_volume": {
"description": "Volume",
"hint": "Volume for synthesis, range (0, 10], default 1.0."
}
}
},
Expand Down
12 changes: 12 additions & 0 deletions dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1509,6 +1509,18 @@
"auto_save_history": {
"description": "由 Coze 管理对话记录",
"hint": "启用后,将由 Coze 进行对话历史记录管理, 此时 AstrBot 本地保存的上下文不会生效(仅供浏览), 对 AstrBot 的上下文进行的操作也不会生效。如果为禁用, 则使用 AstrBot 管理上下文。"
},
"glm_tts_voice": {
"description": "声音",
"hint": "GLM-TTS 声音。可选声音:tongtong, chuichui, xiaochen, jam, kazi, douji, luodo。"
},
"glm_tts_speed": {
"description": "语速",
"hint": "合成语速,范围 [0.5, 2.0],默认 1.0。"
},
"glm_tts_volume": {
"description": "音量",
"hint": "合成音量,范围 (0, 10],默认 1.0。"
}
}
},
Expand Down
Loading