LeoCoder/config.yaml at main · CreatorsHiring/LeoCoder · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Smart LLM Router Configuration

# Local model settings
local:
  # Providers to check for local models
  providers:
    - name: ollama
      enabled: true
      base_url: "http://localhost:11434"
    - name: lmstudio
      enabled: true
      base_url: "http://localhost:1234"

  # Models that can run on your hardware (i5-8th Gen, 128MB VRAM)
  # These should be small quantized models
  preferred_models:
    - phi-3-mini
    - tinyllama
    - stablelm-2-zephyr
    - qwen2.5-coder-1.5b
    - deepseek-coder-1.3b

  # Maximum model size in GB that your system can handle
  max_model_size_gb: 4

  # Use CPU inference (since VRAM is limited)
  prefer_cpu: true

# Cloud model settings
cloud:
  providers:
    - name: groq
      enabled: true
      api_key_env: "GROQ_API_KEY"
      default_model: "llama-3.1-8b-instant"
      free_tier: true
    - name: gemini
      enabled: true
      api_key_env: "GEMINI_API_KEY"
      default_model: "gemini-1.5-flash"
      free_tier: true
    - name: anthropic
      enabled: false
      api_key_env: "ANTHROPIC_API_KEY"
      default_model: "claude-sonnet-4-20250514"
      free_tier: false

  # Token budget management
  token_budget:
    daily_limit: 100000  # tokens per day
    warning_threshold: 0.8  # warn at 80% usage

# Routing rules
routing:
  # Task complexity thresholds
  complexity:
    # Use local for these simple tasks
    local_threshold: 3  # out of 10

    # Force cloud for complex tasks
    cloud_threshold: 7  # out of 10

  # Task type routing
  task_types:
    # Always use local for these (saves tokens)
    local_preferred:
      - "code_completion"
      - "simple_refactor"
      - "explain_code"
      - "find_bug"
      - "format_code"

    # Always use cloud for these (needs intelligence)
    cloud_required:
      - "architecture_design"
      - "complex_refactor"
      - "security_audit"
      - "new_feature_design"

    # Smart routing based on complexity
    smart_routing:
      - "debug"
      - "optimize"
      - "test_generation"
      - "documentation"

# Vibe coding session settings
vibe_coding:
  # Auto-save context to reduce re-tokenization
  context_cache: true
  cache_ttl_minutes: 30

  # Chunk large files to reduce token usage
  file_chunking:
    enabled: true
    max_lines_per_chunk: 100

  # Track token savings
  track_savings: true