@@ -34,28 +34,13 @@ func newComposeCmd() *cobra.Command {
3434 return c
3535}
3636
37- // Reasoning budget constants for the think parameter conversion
38- const (
39- reasoningBudgetUnlimited int32 = - 1
40- reasoningBudgetDisabled int32 = 0
41- reasoningBudgetMedium int32 = 1024
42- reasoningBudgetLow int32 = 256
43- )
44-
45- // ptr is a helper function to create a pointer to int32
46- func ptr (v int32 ) * int32 {
47- return & v
48- }
49-
5037func newUpCommand () * cobra.Command {
5138 var models []string
5239 var ctxSize int64
5340 var backend string
5441 var draftModel string
5542 var numTokens int
5643 var minAcceptanceRate float64
57- var mode string
58- var think string
5944 c := & cobra.Command {
6045 Use : "up" ,
6146 RunE : func (cmd * cobra.Command , args []string ) error {
@@ -81,7 +66,7 @@ func newUpCommand() *cobra.Command {
8166 return err
8267 }
8368
84- if cmd . Flags (). Changed ( "context-size" ) {
69+ if ctxSize > 0 {
8570 sendInfo (fmt .Sprintf ("Setting context size to %d" , ctxSize ))
8671 }
8772
@@ -96,52 +81,14 @@ func newUpCommand() *cobra.Command {
9681 sendInfo (fmt .Sprintf ("Enabling speculative decoding with draft model: %s" , draftModel ))
9782 }
9883
99- // Parse mode if provided
100- var backendMode * inference.BackendMode
101- if mode != "" {
102- parsedMode , err := parseBackendMode (mode )
103- if err != nil {
104- _ = sendError (err .Error ())
105- return err
106- }
107- backendMode = & parsedMode
108- sendInfo (fmt .Sprintf ("Setting backend mode to %s" , mode ))
109- }
110-
111- // Parse think parameter for reasoning budget
112- var reasoningBudget * int32
113- if think != "" {
114- budget , err := parseThinkToReasoningBudget (think )
115- if err != nil {
116- _ = sendError (err .Error ())
117- return err
118- }
119- reasoningBudget = budget
120- sendInfo (fmt .Sprintf ("Setting think mode to %s" , think ))
121- }
122-
12384 for _ , model := range models {
124- configuration := inference.BackendConfiguration {
125- Speculative : speculativeConfig ,
126- }
127- if cmd .Flags ().Changed ("context-size" ) {
128- // TODO is the context size the same for all models?
129- v := int32 (ctxSize )
130- configuration .ContextSize = & v
131- }
132-
133- // Set llama.cpp-specific reasoning budget if provided
134- if reasoningBudget != nil {
135- if configuration .LlamaCpp == nil {
136- configuration .LlamaCpp = & inference.LlamaCppConfig {}
137- }
138- configuration .LlamaCpp .ReasoningBudget = reasoningBudget
139- }
140-
85+ size := int32 (ctxSize )
14186 if err := desktopClient .ConfigureBackend (scheduling.ConfigureRequest {
142- Model : model ,
143- Mode : backendMode ,
144- BackendConfiguration : configuration ,
87+ Model : model ,
88+ BackendConfiguration : inference.BackendConfiguration {
89+ ContextSize : & size ,
90+ Speculative : speculativeConfig ,
91+ },
14592 }); err != nil {
14693 configErrFmtString := "failed to configure backend for model %s with context-size %d"
14794 _ = sendErrorf (configErrFmtString + ": %v" , model , ctxSize , err )
@@ -171,57 +118,10 @@ func newUpCommand() *cobra.Command {
171118 c .Flags ().StringVar (& draftModel , "speculative-draft-model" , "" , "draft model for speculative decoding" )
172119 c .Flags ().IntVar (& numTokens , "speculative-num-tokens" , 0 , "number of tokens to predict speculatively" )
173120 c .Flags ().Float64Var (& minAcceptanceRate , "speculative-min-acceptance-rate" , 0 , "minimum acceptance rate for speculative decoding" )
174- c .Flags ().StringVar (& mode , "mode" , "" , "backend operation mode (completion, embedding, reranking)" )
175- c .Flags ().StringVar (& think , "think" , "" , "enable reasoning mode for thinking models (true/false/high/medium/low)" )
176121 _ = c .MarkFlagRequired ("model" )
177122 return c
178123}
179124
180- // parseBackendMode parses a string mode value into an inference.BackendMode.
181- func parseBackendMode (mode string ) (inference.BackendMode , error ) {
182- switch strings .ToLower (mode ) {
183- case "completion" :
184- return inference .BackendModeCompletion , nil
185- case "embedding" :
186- return inference .BackendModeEmbedding , nil
187- case "reranking" :
188- return inference .BackendModeReranking , nil
189- default :
190- return inference .BackendModeCompletion , fmt .Errorf ("invalid mode %q: must be one of completion, embedding, reranking" , mode )
191- }
192- }
193-
194- // parseThinkToReasoningBudget converts the think parameter string to a reasoning budget value.
195- // Accepts: "true", "false", "high", "medium", "low"
196- // Returns:
197- // - nil for empty string or "true" (use server default, which is unlimited)
198- // - -1 for "high" (explicitly set unlimited)
199- // - 0 for "false" (disable thinking)
200- // - 1024 for "medium"
201- // - 256 for "low"
202- func parseThinkToReasoningBudget (think string ) (* int32 , error ) {
203- if think == "" {
204- return nil , nil
205- }
206-
207- switch strings .ToLower (think ) {
208- case "true" :
209- // Use nil to let the server use its default (currently unlimited)
210- return nil , nil
211- case "high" :
212- // Explicitly set unlimited reasoning budget
213- return ptr (reasoningBudgetUnlimited ), nil
214- case "false" :
215- return ptr (reasoningBudgetDisabled ), nil
216- case "medium" :
217- return ptr (reasoningBudgetMedium ), nil
218- case "low" :
219- return ptr (reasoningBudgetLow ), nil
220- default :
221- return nil , fmt .Errorf ("invalid think value %q: must be one of true, false, high, medium, low" , think )
222- }
223- }
224-
225125func newDownCommand () * cobra.Command {
226126 c := & cobra.Command {
227127 Use : "down" ,
0 commit comments