-
Notifications
You must be signed in to change notification settings - Fork 70
fix: change context size and reasoning budget types from uint64 to int32 #487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -321,7 +321,7 @@ func cmdPackage(args []string) int { | |
|
|
||
| if contextSize > 0 { | ||
| fmt.Println("Setting context size:", contextSize) | ||
| b = b.WithContextSize(contextSize) | ||
| b = b.WithContextSize(int32(contextSize)) | ||
| } | ||
|
Comment on lines
322
to
325
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to another file, the conversion from if contextSize > 0 {
if contextSize > 2147483647 { // math.MaxInt32
fmt.Fprintf(os.Stderr, "context size %d is too large, must be less than or equal to 2147483647\n", contextSize)
return 1
}
fmt.Println("Setting context size:", contextSize)
b = b.WithContextSize(int32(contextSize))
} |
||
|
|
||
| if mmproj != "" { | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -198,7 +198,11 @@ | |||||
| return inference.RequiredMemory{}, &inference.ErrGGUFParse{Err: err} | ||||||
| } | ||||||
|
|
||||||
| contextSize := GetContextSize(mdlConfig, config) | ||||||
| configuredContextSize := GetContextSize(mdlConfig, config) | ||||||
| contextSize := int32(4096) // default context size | ||||||
| if configuredContextSize != nil { | ||||||
| contextSize = int32(*configuredContextSize) | ||||||
|
Check failure on line 204 in pkg/inference/backends/llamacpp/llamacpp.go
|
||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The type cast
Suggested change
|
||||||
| } | ||||||
|
|
||||||
| var ngl uint64 | ||||||
| if l.gpuSupported { | ||||||
|
|
@@ -240,9 +244,9 @@ | |||||
| } | ||||||
|
|
||||||
| // estimateMemoryFromGGUF estimates memory requirements from a parsed GGUF file. | ||||||
| func (l *llamaCpp) estimateMemoryFromGGUF(ggufFile *parser.GGUFFile, contextSize uint64, ngl uint64) inference.RequiredMemory { | ||||||
| func (l *llamaCpp) estimateMemoryFromGGUF(ggufFile *parser.GGUFFile, contextSize int32, ngl uint64) inference.RequiredMemory { | ||||||
| estimate := ggufFile.EstimateLLaMACppRun( | ||||||
| parser.WithLLaMACppContextSize(int32(contextSize)), | ||||||
| parser.WithLLaMACppContextSize(contextSize), | ||||||
| parser.WithLLaMACppLogicalBatchSize(2048), | ||||||
| parser.WithLLaMACppOffloadLayers(ngl), | ||||||
| ) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,8 @@ import ( | |
| "github.com/docker/model-runner/pkg/inference" | ||
| ) | ||
|
|
||
| const UnlimitedContextSize = -1 | ||
|
|
||
| // Config is the configuration for the llama.cpp backend. | ||
| type Config struct { | ||
| // Args are the base arguments that are always included. | ||
|
|
@@ -68,11 +70,14 @@ func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference | |
| } | ||
|
|
||
| if budget := GetReasoningBudget(config); budget != nil { | ||
| args = append(args, "--reasoning-budget", strconv.FormatInt(*budget, 10)) | ||
| args = append(args, "--reasoning-budget", strconv.FormatInt(int64(*budget), 10)) | ||
| } | ||
|
|
||
| // Add context size from model config or backend config | ||
| args = append(args, "--ctx-size", strconv.FormatUint(GetContextSize(bundle.RuntimeConfig(), config), 10)) | ||
| contextSize := GetContextSize(bundle.RuntimeConfig(), config) | ||
| if contextSize != nil { | ||
| args = append(args, "--ctx-size", strconv.FormatInt(int64(*contextSize), 10)) | ||
| } | ||
|
|
||
| // Add arguments for Multimodal projector or jinja (they are mutually exclusive) | ||
| if path := bundle.MMPROJPath(); path != "" { | ||
|
|
@@ -84,20 +89,19 @@ func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference | |
| return args, nil | ||
| } | ||
|
|
||
| func GetContextSize(modelCfg types.Config, backendCfg *inference.BackendConfiguration) uint64 { | ||
| func GetContextSize(modelCfg types.Config, backendCfg *inference.BackendConfiguration) *int32 { | ||
| // Model config takes precedence | ||
| if modelCfg.ContextSize != nil { | ||
| return *modelCfg.ContextSize | ||
| if modelCfg.ContextSize != nil && (*modelCfg.ContextSize == UnlimitedContextSize || *modelCfg.ContextSize > 0) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The condition |
||
| return modelCfg.ContextSize | ||
| } | ||
| // else use backend config | ||
| if backendCfg != nil && backendCfg.ContextSize > 0 { | ||
| return uint64(backendCfg.ContextSize) | ||
| // Fallback to backend config | ||
| if backendCfg != nil && backendCfg.ContextSize != nil && (*backendCfg.ContextSize == UnlimitedContextSize || *backendCfg.ContextSize > 0) { | ||
| return backendCfg.ContextSize | ||
| } | ||
| // finally return default | ||
| return 4096 // llama.cpp default | ||
| return nil | ||
| } | ||
|
|
||
| func GetReasoningBudget(backendCfg *inference.BackendConfiguration) *int64 { | ||
| func GetReasoningBudget(backendCfg *inference.BackendConfiguration) *int32 { | ||
| if backendCfg != nil && backendCfg.LlamaCpp != nil && backendCfg.LlamaCpp.ReasoningBudget != nil { | ||
| return backendCfg.LlamaCpp.ReasoningBudget | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The conversion from
uint64toint32forcontextSizecould lead to an integer overflow if a user provides a value larger thanmath.MaxInt32. While unlikely for a context size, adding a validation check would make the code more robust.