@@ -34,13 +34,28 @@ func newComposeCmd() *cobra.Command {
3434 return c
3535}
3636
37+ // Reasoning budget constants for the think parameter conversion
38+ const (
39+ reasoningBudgetUnlimited int32 = - 1
40+ reasoningBudgetDisabled int32 = 0
41+ reasoningBudgetMedium int32 = 1024
42+ reasoningBudgetLow int32 = 256
43+ )
44+
45+ // ptr is a helper function to create a pointer to int32
46+ func ptr (v int32 ) * int32 {
47+ return & v
48+ }
49+
3750func newUpCommand () * cobra.Command {
3851 var models []string
3952 var ctxSize int64
4053 var backend string
4154 var draftModel string
4255 var numTokens int
4356 var minAcceptanceRate float64
57+ var mode string
58+ var think string
4459 c := & cobra.Command {
4560 Use : "up" ,
4661 RunE : func (cmd * cobra.Command , args []string ) error {
@@ -81,6 +96,30 @@ func newUpCommand() *cobra.Command {
8196 sendInfo (fmt .Sprintf ("Enabling speculative decoding with draft model: %s" , draftModel ))
8297 }
8398
99+ // Parse mode if provided
100+ var backendMode * inference.BackendMode
101+ if mode != "" {
102+ parsedMode , err := parseBackendMode (mode )
103+ if err != nil {
104+ _ = sendError (err .Error ())
105+ return err
106+ }
107+ backendMode = & parsedMode
108+ sendInfo (fmt .Sprintf ("Setting backend mode to %s" , mode ))
109+ }
110+
111+ // Parse think parameter for reasoning budget
112+ var reasoningBudget * int32
113+ if think != "" {
114+ budget , err := parseThinkToReasoningBudget (think )
115+ if err != nil {
116+ _ = sendError (err .Error ())
117+ return err
118+ }
119+ reasoningBudget = budget
120+ sendInfo (fmt .Sprintf ("Setting think mode to %s" , think ))
121+ }
122+
84123 for _ , model := range models {
85124 configuration := inference.BackendConfiguration {
86125 Speculative : speculativeConfig ,
@@ -91,8 +130,17 @@ func newUpCommand() *cobra.Command {
91130 configuration .ContextSize = & v
92131 }
93132
133+ // Set llama.cpp-specific reasoning budget if provided
134+ if reasoningBudget != nil {
135+ if configuration .LlamaCpp == nil {
136+ configuration .LlamaCpp = & inference.LlamaCppConfig {}
137+ }
138+ configuration .LlamaCpp .ReasoningBudget = reasoningBudget
139+ }
140+
94141 if err := desktopClient .ConfigureBackend (scheduling.ConfigureRequest {
95142 Model : model ,
143+ Mode : backendMode ,
96144 BackendConfiguration : configuration ,
97145 }); err != nil {
98146 configErrFmtString := "failed to configure backend for model %s with context-size %d"
@@ -123,10 +171,57 @@ func newUpCommand() *cobra.Command {
123171 c .Flags ().StringVar (& draftModel , "speculative-draft-model" , "" , "draft model for speculative decoding" )
124172 c .Flags ().IntVar (& numTokens , "speculative-num-tokens" , 0 , "number of tokens to predict speculatively" )
125173 c .Flags ().Float64Var (& minAcceptanceRate , "speculative-min-acceptance-rate" , 0 , "minimum acceptance rate for speculative decoding" )
174+ c .Flags ().StringVar (& mode , "mode" , "" , "backend operation mode (completion, embedding, reranking)" )
175+ c .Flags ().StringVar (& think , "think" , "" , "enable reasoning mode for thinking models (true/false/high/medium/low)" )
126176 _ = c .MarkFlagRequired ("model" )
127177 return c
128178}
129179
180+ // parseBackendMode parses a string mode value into an inference.BackendMode.
181+ func parseBackendMode (mode string ) (inference.BackendMode , error ) {
182+ switch strings .ToLower (mode ) {
183+ case "completion" :
184+ return inference .BackendModeCompletion , nil
185+ case "embedding" :
186+ return inference .BackendModeEmbedding , nil
187+ case "reranking" :
188+ return inference .BackendModeReranking , nil
189+ default :
190+ return inference .BackendModeCompletion , fmt .Errorf ("invalid mode %q: must be one of completion, embedding, reranking" , mode )
191+ }
192+ }
193+
194+ // parseThinkToReasoningBudget converts the think parameter string to a reasoning budget value.
195+ // Accepts: "true", "false", "high", "medium", "low"
196+ // Returns:
197+ // - nil for empty string or "true" (use server default, which is unlimited)
198+ // - -1 for "high" (explicitly set unlimited)
199+ // - 0 for "false" (disable thinking)
200+ // - 1024 for "medium"
201+ // - 256 for "low"
202+ func parseThinkToReasoningBudget (think string ) (* int32 , error ) {
203+ if think == "" {
204+ return nil , nil
205+ }
206+
207+ switch strings .ToLower (think ) {
208+ case "true" :
209+ // Use nil to let the server use its default (currently unlimited)
210+ return nil , nil
211+ case "high" :
212+ // Explicitly set unlimited reasoning budget
213+ return ptr (reasoningBudgetUnlimited ), nil
214+ case "false" :
215+ return ptr (reasoningBudgetDisabled ), nil
216+ case "medium" :
217+ return ptr (reasoningBudgetMedium ), nil
218+ case "low" :
219+ return ptr (reasoningBudgetLow ), nil
220+ default :
221+ return nil , fmt .Errorf ("invalid think value %q: must be one of true, false, high, medium, low" , think )
222+ }
223+ }
224+
130225func newDownCommand () * cobra.Command {
131226 c := & cobra.Command {
132227 Use : "down" ,
0 commit comments