diff --git a/README.md b/README.md index b244102..1254104 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,9 @@ use yoagent::provider::{ModelConfig, ProviderRegistry}; // Use a first-class OpenAI-compatible provider preset let model = ModelConfig::groq("llama-3.3-70b-versatile", "Llama 3.3 70B"); +// Or Qwen / DashScope +let model = ModelConfig::qwen("qwen3.6-plus", "Qwen 3.6 Plus"); + // Or Google Gemini let model = ModelConfig::google("gemini-2.5-pro", "Gemini 2.5 Pro"); @@ -199,7 +202,7 @@ let registry = ProviderRegistry::default(); | Protocol | Providers | |----------|-----------| | Anthropic Messages | Anthropic (Claude) | -| OpenAI Completions | OpenAI, xAI, Groq, Mistral, DeepSeek, MiniMax, Z.ai, Ollama, local servers, and custom compatible APIs | +| OpenAI Completions | OpenAI, xAI, Groq, Mistral, DeepSeek, MiniMax, Z.ai, Qwen, Ollama, local servers, and custom compatible APIs | | OpenAI Responses | OpenAI (newer API) | | Azure OpenAI | Azure OpenAI | | Google Generative AI | Google Gemini | diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index 02f05da..9ad6345 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -45,7 +45,7 @@ async fn main() { ## Example with OpenAI-Compatible Provider -For OpenAI, xAI, Groq, DeepSeek, Mistral, MiniMax, Z.ai, Ollama, or any compatible API, use `OpenAiCompatProvider` with a `ModelConfig`: +For OpenAI, xAI, Groq, DeepSeek, Mistral, MiniMax, Z.ai, Qwen, Ollama, or any compatible API, use `OpenAiCompatProvider` with a `ModelConfig`: ```rust use yoagent::{Agent, AgentEvent}; diff --git a/docs/providers/model-presets.md b/docs/providers/model-presets.md index 4205f44..67d54a8 100644 --- a/docs/providers/model-presets.md +++ b/docs/providers/model-presets.md @@ -17,7 +17,9 @@ Use a preset when the provider is listed here. Use a custom `ModelConfig` when y | `ModelConfig::mistral(id, name)` | Mistral | `OpenAiCompletions` | `https://api.mistral.ai/v1` | 128K | 4,096 | | `ModelConfig::minimax(id, name)` | MiniMax | `OpenAiCompletions` | `https://api.minimaxi.chat/v1` | 1M | 4,096 | | `ModelConfig::zai(id, name)` | Z.ai | `OpenAiCompletions` | `https://api.z.ai/api/paas/v4` | 128K | 4,096 | +| `ModelConfig::qwen(id, name)` | Qwen / DashScope | `OpenAiCompletions` | `https://dashscope-intl.aliyuncs.com/compatible-mode/v1` | 128K | 4,096 | | `ModelConfig::ollama(base_url, model_id)` | Ollama | `OpenAiCompletions` | caller provided | 128K | 4,096 | +| `ModelConfig::openai_compat(base_url, model_id, provider, compat)` | Custom compatible server | `OpenAiCompletions` | caller provided | 128K | 4,096 | | `ModelConfig::local(base_url, model_id)` | Local compatible server | `OpenAiCompletions` | caller provided | 128K | 4,096 | The constructors do not validate model IDs. They send the `id` you pass through to the provider, which lets you use newly released model IDs before yoagent updates its examples. @@ -50,6 +52,52 @@ let llama = ModelConfig::ollama("http://localhost:11434/v1", "llama3.1:8b"); Ollama remains separate from `ModelConfig::local(...)` because some Ollama-served models need an assistant message after tool results, while other local OpenAI-compatible servers may not. The Ollama preset enables that transcript workaround; the generic local preset stays neutral. +## Qwen Models + +Use `ModelConfig::qwen` for hosted Qwen / DashScope: + +```rust +let qwen = ModelConfig::qwen("qwen3.6-plus", "Qwen 3.6 Plus"); +``` + +The default base URL is the international DashScope endpoint. For other regions, override `base_url` after construction: + +```rust +let mut qwen = ModelConfig::qwen("qwen3.6-plus", "Qwen 3.6 Plus"); +qwen.base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1".into(); +``` + +Region endpoints: + +- International/Singapore: `https://dashscope-intl.aliyuncs.com/compatible-mode/v1` +- China/Beijing: `https://dashscope.aliyuncs.com/compatible-mode/v1` +- US/Virginia: `https://dashscope-us.aliyuncs.com/compatible-mode/v1` + +For locally deployed Qwen, keep the local endpoint and opt into Qwen's model-family compat flags: + +```rust +let qwen_local = ModelConfig::openai_compat( + "http://localhost:1234/v1", + "qwen3-local", + "qwen", + OpenAiCompat::qwen(), +); +``` + +If a local serving layer also has its own quirks, combine the compat flags explicitly. For example, Qwen served by Ollama may need both Qwen reasoning parsing and Ollama's tool-result transcript workaround: + +```rust +let mut compat = OpenAiCompat::qwen(); +compat.requires_assistant_after_tool_result = true; + +let qwen_ollama = ModelConfig::openai_compat( + "http://localhost:11434/v1", + "qwen2.5-coder:7b", + "ollama", + compat, +); +``` + ## DeepSeek Models Use the current DeepSeek API model IDs by default: @@ -93,4 +141,4 @@ Older DeepSeek reasoning models had stricter feature limits than the current V4 ## Compat Flags Without Constructors -`OpenAiCompat` also has quirk presets such as `OpenAiCompat::cerebras()` and `OpenAiCompat::openrouter()`. Those are compatibility profiles, not full `ModelConfig` constructors. To use them, build a custom `ModelConfig` with the provider name, base URL, protocol, and `compat` value you need. +`OpenAiCompat` also has quirk presets such as `OpenAiCompat::cerebras()` and `OpenAiCompat::openrouter()`. Those are compatibility profiles, not full `ModelConfig` constructors. To use them, call `ModelConfig::openai_compat(...)` with the provider name, base URL, and `compat` value you need. diff --git a/docs/providers/openai-compat.md b/docs/providers/openai-compat.md index cc03f8a..1d85a4d 100644 --- a/docs/providers/openai-compat.md +++ b/docs/providers/openai-compat.md @@ -1,6 +1,6 @@ # OpenAI Compatible Provider -`OpenAiCompatProvider` implements the OpenAI Chat Completions API. One implementation covers OpenAI, xAI, Groq, Cerebras, OpenRouter, Mistral, DeepSeek, MiniMax, Z.ai, and any other compatible API. +`OpenAiCompatProvider` implements the OpenAI Chat Completions API. One implementation covers OpenAI, xAI, Groq, Cerebras, OpenRouter, Mistral, DeepSeek, MiniMax, Z.ai, Qwen, Ollama, and any other compatible API. For the first-class `ModelConfig::*` constructors and default model metadata, see [Model Presets](model-presets.md). @@ -47,6 +47,7 @@ pub struct OpenAiCompat { | DeepSeek | `OpenAiCompat::deepseek()` | `max_tokens`, `thinking`, `reasoning_effort`, 1M context window | | MiniMax | `OpenAiCompat::minimax()` | Standard defaults, 1M context window | | Z.ai (Zhipu) | `OpenAiCompat::zai()` | Standard defaults | +| Qwen | `OpenAiCompat::qwen()` | Qwen reasoning content format, `max_tokens`, streaming usage | | Ollama | `OpenAiCompat::ollama()` | Inserts an empty assistant message after tool result runs | `OpenAiCompat` presets are lower-level quirk flags. A provider is first-class when it also has a `ModelConfig::*` constructor; see [Model Presets](model-presets.md). @@ -70,15 +71,12 @@ impl OpenAiCompat { 2. Create a `ModelConfig` that uses it: ```rust -let config = ModelConfig { - id: "my-model".into(), - name: "My Model".into(), - api: ApiProtocol::OpenAiCompletions, - provider: "my-provider".into(), - base_url: "https://api.myprovider.com/v1".into(), - compat: Some(OpenAiCompat::my_provider()), - // ... -}; +let config = ModelConfig::openai_compat( + "https://api.myprovider.com/v1", + "my-model", + "my-provider", + OpenAiCompat::my_provider(), +); ``` ## Thinking/Reasoning @@ -118,6 +116,31 @@ Or via the CLI example: cargo run --example cli -- --api-url http://localhost:1234/v1 --model my-model ``` +For locally deployed open-source model families, keep the local endpoint and choose the model-family compat profile: + +```rust +let qwen_local = ModelConfig::openai_compat( + "http://localhost:1234/v1", + "qwen3-local", + "qwen", + OpenAiCompat::qwen(), +); +``` + +Serving-layer quirks and model-family quirks can be combined because `OpenAiCompat` fields are public: + +```rust +let mut compat = OpenAiCompat::qwen(); +compat.requires_assistant_after_tool_result = true; + +let qwen_on_ollama = ModelConfig::openai_compat( + "http://localhost:11434/v1", + "qwen2.5-coder:7b", + "ollama", + compat, +); +``` + ## Auth Uses `Authorization: Bearer {api_key}` header. Extra headers can be added via `ModelConfig.headers`. diff --git a/docs/providers/overview.md b/docs/providers/overview.md index 79deace..ced627a 100644 --- a/docs/providers/overview.md +++ b/docs/providers/overview.md @@ -60,6 +60,7 @@ let deepseek = ModelConfig::deepseek("deepseek-v4-flash", "DeepSeek V4 Flash"); let mistral = ModelConfig::mistral("mistral-large-latest", "Mistral Large"); let minimax = ModelConfig::minimax("MiniMax-Text-01", "MiniMax Text 01"); let zai = ModelConfig::zai("glm-4.7", "GLM 4.7"); +let qwen = ModelConfig::qwen("qwen3.6-plus", "Qwen 3.6 Plus"); let ollama = ModelConfig::ollama("http://localhost:11434/v1", "llama3.1:8b"); let local = ModelConfig::local("http://localhost:1234/v1", "my-model"); ``` diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 68c7a87..201e374 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -111,6 +111,6 @@ pub struct CostConfig { ## ModelConfig Presets -yoagent provides first-class `ModelConfig::*` constructors for Anthropic, OpenAI, Google Gemini, xAI, Groq, DeepSeek, Mistral, MiniMax, Z.ai, Ollama, and local OpenAI-compatible servers. +yoagent provides first-class `ModelConfig::*` constructors for Anthropic, OpenAI, Google Gemini, xAI, Groq, DeepSeek, Mistral, MiniMax, Z.ai, Qwen, Ollama, and local OpenAI-compatible servers. See [Model Presets](../providers/model-presets.md) for the full table of constructors, default base URLs, context windows, and DeepSeek legacy alias notes. diff --git a/examples/cli.rs b/examples/cli.rs index 45adccb..2627f0d 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -11,8 +11,9 @@ //! ANTHROPIC_API_KEY=sk-... cargo run --example cli -- --model claude-sonnet-4-20250514 //! ANTHROPIC_API_KEY=sk-... cargo run --example cli -- --skills ./skills //! -//! Run with a named provider (zai, openai, xai, groq, deepseek, mistral, minimax, ollama, google): +//! Run with a named provider (zai, qwen, openai, xai, groq, deepseek, mistral, minimax, ollama, google): //! API_KEY=... cargo run --example cli -- --provider zai --model glm-4.7 +//! DASHSCOPE_API_KEY=... cargo run --example cli -- --provider qwen --model qwen3.6-plus //! cargo run --example cli -- --provider ollama --model llama3.1:8b //! //! Run with LM Studio / local OpenAI-compatible server: @@ -76,7 +77,11 @@ async fn main() { .cloned(); let api_key_optional = api_url.is_some() || provider_name.as_deref() == Some("ollama"); - let api_key = if api_key_optional { + let api_key = if provider_name.as_deref() == Some("qwen") { + std::env::var("DASHSCOPE_API_KEY") + .or_else(|_| std::env::var("API_KEY")) + .expect("Set DASHSCOPE_API_KEY or API_KEY") + } else if api_key_optional { std::env::var("ANTHROPIC_API_KEY") .or_else(|_| std::env::var("API_KEY")) .unwrap_or_default() // empty string OK for local/Ollama @@ -88,6 +93,7 @@ async fn main() { let default_model = match provider_name.as_deref() { Some("zai") => "glm-4.7", + Some("qwen") => "qwen3.6-plus", Some("openai") => "gpt-4o", Some("xai") => "grok-3-mini", Some("groq") => "llama-3.3-70b-versatile", @@ -314,6 +320,9 @@ async fn main() { fn make_provider_agent(provider: &str, model: &str) -> Agent { match provider { "zai" => Agent::new(OpenAiCompatProvider).with_model_config(ModelConfig::zai(model, model)), + "qwen" => { + Agent::new(OpenAiCompatProvider).with_model_config(ModelConfig::qwen(model, model)) + } "openai" => { Agent::new(OpenAiCompatProvider).with_model_config(ModelConfig::openai(model, model)) } @@ -334,7 +343,7 @@ fn make_provider_agent(provider: &str, model: &str) -> Agent { .with_model_config(ModelConfig::ollama("http://localhost:11434/v1", model)), "google" => Agent::new(GoogleProvider).with_model_config(ModelConfig::google(model, model)), other => { - eprintln!("Unknown provider: {other}. Supported: zai, openai, xai, groq, deepseek, mistral, minimax, ollama, google."); + eprintln!("Unknown provider: {other}. Supported: zai, qwen, openai, xai, groq, deepseek, mistral, minimax, ollama, google."); std::process::exit(1); } } diff --git a/src/provider/model.rs b/src/provider/model.rs index 442a35c..035b3ee 100644 --- a/src/provider/model.rs +++ b/src/provider/model.rs @@ -193,6 +193,16 @@ impl OpenAiCompat { } } + /// Compat flags for Qwen / DashScope. + pub fn qwen() -> Self { + Self { + supports_usage_in_streaming: true, + max_tokens_field: MaxTokensField::MaxTokens, + thinking_format: ThinkingFormat::Qwen, + ..Default::default() + } + } + /// Compat flags for Ollama's OpenAI-compatible API. pub fn ollama() -> Self { Self { @@ -285,6 +295,29 @@ impl ModelConfig { } } + /// Create a config for a custom OpenAI-compatible endpoint with explicit compat flags. + pub fn openai_compat( + base_url: impl Into, + model_id: impl Into, + provider: impl Into, + compat: OpenAiCompat, + ) -> Self { + let id = model_id.into(); + Self { + id: id.clone(), + name: id, + api: ApiProtocol::OpenAiCompletions, + provider: provider.into(), + base_url: base_url.into(), + reasoning: false, + context_window: 128_000, + max_tokens: 4096, + cost: CostConfig::default(), + headers: HashMap::new(), + compat: Some(compat), + } + } + /// Create a config for Ollama's OpenAI-compatible API. /// /// Default local base URL: `http://localhost:11434/v1`. @@ -343,6 +376,25 @@ impl ModelConfig { } } + /// Create a new Qwen / DashScope model config. + /// + /// Models: `qwen3.6-plus`, `qwen3.5-plus`, `qwen-plus`, `qwen-flash`, etc. + pub fn qwen(id: impl Into, name: impl Into) -> Self { + Self { + id: id.into(), + name: name.into(), + api: ApiProtocol::OpenAiCompletions, + provider: "qwen".into(), + base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".into(), + reasoning: true, + context_window: 128_000, + max_tokens: 4096, + cost: CostConfig::default(), + headers: HashMap::new(), + compat: Some(OpenAiCompat::qwen()), + } + } + /// Create a new xAI (Grok) model config. /// /// Models: `grok-3-mini`, `grok-3`, etc. @@ -488,6 +540,13 @@ mod tests { let ollama = OpenAiCompat::ollama(); assert!(ollama.requires_assistant_after_tool_result); assert!(!ollama.requires_tool_result_name); + + let qwen = OpenAiCompat::qwen(); + assert_eq!(qwen.thinking_format, ThinkingFormat::Qwen); + assert_eq!(qwen.max_tokens_field, MaxTokensField::MaxTokens); + assert!(qwen.supports_usage_in_streaming); + assert!(!qwen.supports_reasoning_effort); + assert!(!qwen.supports_thinking_control); } #[test] @@ -529,6 +588,38 @@ mod tests { assert!(compat.requires_assistant_after_tool_result); } + #[test] + fn test_model_config_openai_compat() { + let config = ModelConfig::openai_compat( + "http://localhost:1234/v1", + "qwen3-local", + "qwen", + OpenAiCompat::qwen(), + ); + assert_eq!(config.api, ApiProtocol::OpenAiCompletions); + assert_eq!(config.provider, "qwen"); + assert_eq!(config.id, "qwen3-local"); + assert_eq!(config.name, "qwen3-local"); + assert_eq!(config.base_url, "http://localhost:1234/v1"); + let compat = config.compat.unwrap(); + assert_eq!(compat.thinking_format, ThinkingFormat::Qwen); + } + + #[test] + fn test_model_config_qwen() { + let config = ModelConfig::qwen("qwen3.6-plus", "Qwen 3.6 Plus"); + assert_eq!(config.api, ApiProtocol::OpenAiCompletions); + assert_eq!(config.provider, "qwen"); + assert_eq!( + config.base_url, + "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" + ); + assert!(config.reasoning); + let compat = config.compat.unwrap(); + assert_eq!(compat.thinking_format, ThinkingFormat::Qwen); + assert_eq!(compat.max_tokens_field, MaxTokensField::MaxTokens); + } + #[test] fn test_model_config_zai() { let config = ModelConfig::zai("glm-4.7", "GLM 4.7"); diff --git a/src/provider/openai_compat.rs b/src/provider/openai_compat.rs index cdd31f6..d4fd392 100644 --- a/src/provider/openai_compat.rs +++ b/src/provider/openai_compat.rs @@ -621,6 +621,67 @@ mod tests { assert_eq!(body["max_tokens"], 384_000); } + #[test] + fn test_build_request_body_qwen_uses_max_tokens_and_streaming_usage() { + let model_config = ModelConfig::qwen("qwen3.6-plus", "Qwen 3.6 Plus"); + let compat = model_config.compat.as_ref().unwrap().clone(); + let config = StreamConfig { + model: "qwen3.6-plus".into(), + system_prompt: "You are helpful.".into(), + messages: vec![Message::user("Hello")], + tools: vec![], + thinking_level: ThinkingLevel::High, + api_key: "test".into(), + max_tokens: Some(2048), + temperature: None, + model_config: Some(model_config.clone()), + cache_config: CacheConfig::default(), + }; + + let body = build_request_body(&config, &model_config, &compat); + assert_eq!(body["messages"][0]["role"], "system"); + assert_eq!(body["max_tokens"], 2048); + assert!(body.get("max_completion_tokens").is_none()); + assert_eq!(body["stream_options"]["include_usage"], true); + assert!(body.get("reasoning_effort").is_none()); + assert!(body.get("thinking").is_none()); + } + + #[test] + fn test_build_request_body_qwen_tools_use_openai_shape() { + let model_config = ModelConfig::qwen("qwen3-coder-plus", "Qwen 3 Coder Plus"); + let compat = model_config.compat.as_ref().unwrap().clone(); + let config = StreamConfig { + model: "qwen3-coder-plus".into(), + system_prompt: String::new(), + messages: vec![Message::user("List files")], + tools: vec![ToolDefinition { + name: "list_files".into(), + description: "List files".into(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "path": {"type": "string"} + } + }), + }], + thinking_level: ThinkingLevel::Off, + api_key: "test".into(), + max_tokens: None, + temperature: None, + model_config: Some(model_config.clone()), + cache_config: CacheConfig::default(), + }; + + let body = build_request_body(&config, &model_config, &compat); + assert_eq!(body["tools"][0]["type"], "function"); + assert_eq!(body["tools"][0]["function"]["name"], "list_files"); + assert_eq!( + body["tools"][0]["function"]["parameters"]["properties"]["path"]["type"], + "string" + ); + } + #[test] fn test_deepseek_usage_cache_fields_parse() { let chunk: OpenAiChunk = serde_json::from_value(serde_json::json!({