AstrBotDevs · xwsjjctz · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
@@ -1448,6 +1448,16 @@ class ChatProviderTemplate(TypedDict):
                         "stt_model": "iic/SenseVoiceSmall",
                         "is_emotion": False,
                     },
+                    "GLM-ASR(API)": {
+                        "id": "glm_asr",
+                        "type": "glm_asr",
+                        "provider": "bigmodel",
+                        "provider_type": "speech_to_text",
+                        "enable": False,
+                        "api_key": "",
+                        "model": "glm-asr-2512",
+                        "timeout": 120,
+                    },
                     "OpenAI TTS(API)": {
                         "id": "openai_tts",
                         "type": "openai_tts_api",
@@ -1621,6 +1631,19 @@ class ChatProviderTemplate(TypedDict):
                         "gemini_tts_voice_name": "Leda",
                         "proxy": "",
                     },
+                    "GLM TTS(API)": {
+                        "id": "glm_tts",
+                        "type": "glm_tts",
+                        "provider": "bigmodel",
+                        "provider_type": "text_to_speech",
+                        "enable": False,
+                        "api_key": "",
+                        "model": "glm-tts",
+                        "glm_tts_voice": "tongtong",
+                        "glm_tts_speed": 1.0,
+                        "glm_tts_volume": 1.0,
+                        "timeout": 30,
+                    },
                     "OpenAI Embedding": {
                         "id": "openai_embedding",
                         "type": "openai_embedding",

diff --git a/astrbot/core/message/components.py b/astrbot/core/message/components.py
@@ -133,17 +133,19 @@ def __init__(self, file: str | None, **_) -> None:
 
     @staticmethod
     def fromFileSystem(path, **_):
-        return Record(file=f"file:///{os.path.abspath(path)}", path=path, **_)
+        file_url = f"file:///{os.path.abspath(path)}"
+        return Record(file=file_url, url=file_url, path=path, **_)
 
     @staticmethod
     def fromURL(url: str, **_):
         if url.startswith("http://") or url.startswith("https://"):
-            return Record(file=url, **_)
+            return Record(file=url, url=url, **_)
         raise Exception("not a valid url")
 
     @staticmethod
     def fromBase64(bs64_data: str, **_):
-        return Record(file=f"base64://{bs64_data}", **_)
+        base64_url = f"base64://{bs64_data}"
+        return Record(file=base64_url, url=base64_url, **_)
 
     async def convert_to_file_path(self) -> str:
         """将这个语音统一转换为本地文件路径。这个方法避免了手动判断语音数据类型，直接返回语音数据的本地路径（如果是网络 URL, 则会自动进行下载）。

diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py
@@ -439,6 +439,14 @@ def dynamic_import_provider(self, type: str) -> None:
                 from .sources.gemini_tts_source import (
                     ProviderGeminiTTSAPI as ProviderGeminiTTSAPI,
                 )
+            case "glm_asr":
+                from .sources.glm_asr_source import (
+                    ProviderGLMASR as ProviderGLMASR,
+                )
+            case "glm_tts":
+                from .sources.glm_tts_source import (
+                    ProviderGLMTTS as ProviderGLMTTS,
+                )
             case "openai_embedding":
                 from .sources.openai_embedding_source import (
                     OpenAIEmbeddingProvider as OpenAIEmbeddingProvider,

diff --git a/astrbot/core/provider/sources/glm_asr_source.py b/astrbot/core/provider/sources/glm_asr_source.py
@@ -0,0 +1,146 @@
+import base64
+import os
+import uuid
+
+import aiohttp
+
+from astrbot.api import logger
+from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
+from astrbot.core.utils.io import download_file
+from astrbot.core.utils.tencent_record_helper import (
+    convert_to_pcm_wav,
+    tencent_silk_to_wav,
+)
+
+from ..entities import ProviderType
+from ..provider import STTProvider
+from ..register import register_provider_adapter
+
+
+@register_provider_adapter(
+    "glm_asr",
+    "GLM-ASR API",
+    provider_type=ProviderType.SPEECH_TO_TEXT,
+)
+class ProviderGLMASR(STTProvider):
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+        self.api_key: str = provider_config.get("api_key", "")
+        if not self.api_key:
+            raise ValueError("GLM-ASR requires api_key to be configured")
+        self.model_name: str = provider_config.get("model", "glm-asr-2512")
+        self.timeout: int = provider_config.get("timeout", 120)
+        self.api_base: str = "https://open.bigmodel.cn/api/paas/v4/audio/transcriptions"
+        self._session: aiohttp.ClientSession | None = None
+
+    async def initialize(self) -> None:
+        self._session = aiohttp.ClientSession(
+            timeout=aiohttp.ClientTimeout(total=self.timeout),
+        )
+
+    async def terminate(self) -> None:
+        if self._session and not self._session.closed:
+            await self._session.close()
+            self._session = None
+
+    def _get_audio_format(self, file_path: str) -> str | None:
+        silk_header = b"SILK"
+        amr_header = b"#!AMR"
+
+        try:
+            with open(file_path, "rb") as f:
+                file_header = f.read(8)
+        except FileNotFoundError:
+            return None
+
+        if silk_header in file_header:
+            return "silk"
+        if amr_header in file_header:
+            return "amr"
+        return None
+
+    async def get_text(self, audio_url: str) -> str:
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        downloaded_path = None
+        output_path = None
+
+        if audio_url.startswith("http"):
+            temp_dir = get_astrbot_temp_path()
+            downloaded_path = os.path.join(
+                temp_dir, f"glm_asr_{uuid.uuid4().hex[:8]}.input"
+            )
+            await download_file(audio_url, downloaded_path)
+            audio_url = downloaded_path
+
+        if not os.path.exists(audio_url):
+            raise FileNotFoundError(f"Audio file not found: {audio_url}")
+
+        file_format = self._get_audio_format(audio_url)
+
+        if file_format in ["silk", "amr"]:
+            temp_dir = get_astrbot_temp_path()
+            output_path = os.path.join(temp_dir, f"glm_asr_{uuid.uuid4().hex[:8]}.wav")
+
+            logger.info(f"Converting {file_format} file to wav for GLM-ASR...")
+            if file_format == "silk":
+                await tencent_silk_to_wav(audio_url, output_path)
+            elif file_format == "amr":
+                await convert_to_pcm_wav(audio_url, output_path)
+
+            audio_url = output_path
+
+        with open(audio_url, "rb") as f:
+            audio_base64 = base64.b64encode(f.read()).decode("utf-8")
+
+        payload = {
+            "model": self.model_name,
+            "file_base64": audio_base64,
+        }
+
+        try:
+            if not self._session or self._session.closed:
+                self._session = aiohttp.ClientSession(
+                    timeout=aiohttp.ClientTimeout(total=self.timeout),
+                )
+            async with self._session.post(
+                self.api_base,
+                headers=headers,
+                json=payload,
+            ) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(
+                        f"GLM-ASR API error: {response.status}, body: {error_text}"
+                    )
+                    response.raise_for_status()
+
+                result = await response.json()
+
+                if result.get("error"):
+                    error_msg = result["error"].get("message", "Unknown error")
+                    raise Exception(f"GLM-ASR API error: {error_msg}")
+
+                text = result.get("text", "")
+                return text
+
+        except aiohttp.ClientError as e:
+            raise Exception(f"GLM-ASR API request failed: {e!s}")
+        finally:
+            if output_path and os.path.exists(output_path):
+                try:
+                    os.remove(output_path)
+                except Exception as e:
+                    logger.warning(f"Failed to remove temp file {output_path}: {e}")
+            if downloaded_path and os.path.exists(downloaded_path):
+                try:
+                    os.remove(downloaded_path)
+                except Exception as e:
+                    logger.warning(f"Failed to remove temp file {downloaded_path}: {e}")
diff --git a/astrbot/core/provider/sources/glm_tts_source.py b/astrbot/core/provider/sources/glm_tts_source.py
@@ -0,0 +1,94 @@
+import os
+import uuid
+
+import aiohttp
+
+from astrbot.api import logger
+from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
+
+from ..entities import ProviderType
+from ..provider import TTSProvider
+from ..register import register_provider_adapter
+
+
+@register_provider_adapter(
+    "glm_tts",
+    "GLM-TTS API",
+    provider_type=ProviderType.TEXT_TO_SPEECH,
+)
+class ProviderGLMTTS(TTSProvider):
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+        self.api_key: str = provider_config.get("api_key", "")
+        if not self.api_key:
+            raise ValueError("GLM-TTS requires api_key to be configured")
+        self.model_name: str = provider_config.get("model", "glm-tts")
+        self.voice: str = provider_config.get("glm_tts_voice", "tongtong")
+        self.speed: float = float(provider_config.get("glm_tts_speed", 1.0))
+        if not (0.5 <= self.speed <= 2.0):
+            self.speed = max(0.5, min(2.0, self.speed))
+            logger.warning(
+                f"GLM-TTS speed out of range [0.5, 2.0], clamped to {self.speed}"
+            )
+
+        self.volume: float = float(provider_config.get("glm_tts_volume", 1.0))
+        if not (0 < self.volume <= 10):
+            self.volume = max(0.01, min(10.0, self.volume))
+            logger.warning(
+                f"GLM-TTS volume out of range (0, 10], clamped to {self.volume}"
+            )
+        self.timeout: int = provider_config.get("timeout", 30)
+        self.api_base: str = "https://open.bigmodel.cn/api/paas/v4/audio/speech"
+
+    async def get_audio(self, text: str) -> str:
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "model": self.model_name,
+            "input": text,
+            "voice": self.voice,
+            "response_format": "wav",
+            "speed": self.speed,
+            "volume": self.volume,
+        }
+
+        temp_dir = get_astrbot_temp_path()
+        os.makedirs(temp_dir, exist_ok=True)
+        output_path = os.path.join(temp_dir, f"glm_tts_{uuid.uuid4()}.wav")
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    self.api_base,
+                    headers=headers,
+                    json=payload,
+                    timeout=aiohttp.ClientTimeout(total=self.timeout),
+                ) as response:
+                    response.raise_for_status()
+
+                    if response.content_type != "audio/wav":
+                        error_msg = f"Unexpected content type: {response.content_type}"
+                        raise Exception(f"GLM-TTS API error: {error_msg}")
+
+                    audio_data = await response.read()
+
+                    if not audio_data:
+                        raise Exception("GLM-TTS API returned empty audio data")
+
+                    with open(output_path, "wb") as f:
+                        f.write(audio_data)
+
+                    return output_path
+
+        except aiohttp.ClientError as e:
+            raise Exception(f"GLM-TTS API request failed: {e!s}")
+
+    async def terminate(self):
+        pass
diff --git a/dashboard/src/composables/useProviderSources.ts b/dashboard/src/composables/useProviderSources.ts
@@ -331,7 +331,9 @@ export function useProviderSources(options: UseProviderSourcesOptions) {
       dashscope_tts: 'text_to_speech',
       azure_tts: 'text_to_speech',
       minimax_tts_api: 'text_to_speech',
-      volcengine_tts: 'text_to_speech'
+      volcengine_tts: 'text_to_speech',
+      glm_asr: 'speech_to_text',
+      glm_tts: 'text_to_speech'
     }
     return oldVersionProviderTypeMapping[provider.type]
   }

diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -1507,6 +1507,18 @@
       "auto_save_history": {
         "description": "Conversation history managed by Coze",
         "hint": "When enabled, Coze manages conversation history. AstrBot's locally saved context will not take effect (read-only), and operations on AstrBot context will not apply. If disabled, AstrBot manages the context."
+      },
+      "glm_tts_voice": {
+        "description": "Voice",
+        "hint": "GLM-TTS voice. Available voices: tongtong, chuichui, xiaochen, jam, kazi, douji, luodo."
+      },
+      "glm_tts_speed": {
+        "description": "Speech rate",
+        "hint": "Speech speed for synthesis, range [0.5, 2.0], default 1.0."
+      },
+      "glm_tts_volume": {
+        "description": "Volume",
+        "hint": "Volume for synthesis, range (0, 10], default 1.0."
       }
     }
   },

diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
@@ -1509,6 +1509,18 @@
       "auto_save_history": {
         "description": "由 Coze 管理对话记录",
         "hint": "启用后，将由 Coze 进行对话历史记录管理, 此时 AstrBot 本地保存的上下文不会生效(仅供浏览), 对 AstrBot 的上下文进行的操作也不会生效。如果为禁用, 则使用 AstrBot 管理上下文。"
+      },
+      "glm_tts_voice": {
+        "description": "声音",
+        "hint": "GLM-TTS 声音。可选声音：tongtong, chuichui, xiaochen, jam, kazi, douji, luodo。"
+      },
+      "glm_tts_speed": {
+        "description": "语速",
+        "hint": "合成语速，范围 [0.5, 2.0]，默认 1.0。"
+      },
+      "glm_tts_volume": {
+        "description": "音量",
+        "hint": "合成音量，范围 (0, 10]，默认 1.0。"
       }
     }
   },