fireform-core · marcvergees · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ venv
 node_modules/
 frontend/dist/
 frontend/out/
+src/inputs/
 
 # macOS
 .DS_Store
@@ -22,4 +23,7 @@ src/inputs/*.pdf
 .codex/
 
 # Electron build artifacts
-frontend/release/
+frontend/release/
+
+# Local Claude Code instructions
+CLAUDE.md
diff --git a/Makefile b/Makefile
@@ -1,5 +1,9 @@
 .PHONY: help build up down logs shell exec pull-model test clean fireform logs-app logs-ollama logs-frontend super-clean
 
+# The extraction model pulled into Ollama and used by src/llm.py. Override with
+# `make pull-model OLLAMA_MODEL=...`. A 1.5B model keeps per-field fills fast.
+OLLAMA_MODEL ?= qwen2.5:1.5b
+
 help:
 	@printf '%s\n' \
 	'    ______                ______                     ' \
@@ -21,13 +25,13 @@ help:
 	@echo "make logs-ollama  - View Ollama container logs"
 	@echo "make shell        - Open Python shell in app container"
 	@echo "make exec         - Execute Python script in container"
-	@echo "make pull-model   - Pull Mistral model into Ollama"
+	@echo "make pull-model   - Pull the extraction model ($(OLLAMA_MODEL)) into Ollama"
 	@echo "make test         - Run tests"
 	@echo "make clean        - Remove containers"
 	@echo "make super-clean  - [CAUTION] Use carefully. Cleans up ALL stopped  containers, networks, build cache..."
 
 # Fix #382 — pull-model is now part of the main setup flow
-# Mistral is pulled automatically before you need it
+# The extraction model is pulled automatically before you need it
 fireform: build up pull-model
 	@echo ""
 	@echo "✅ FireForm is ready!"
@@ -69,7 +73,7 @@ exec:
 	docker compose exec app python3 src/main.py
 
 pull-model:
-	docker compose exec ollama ollama pull mistral
+	docker compose exec ollama ollama pull $(OLLAMA_MODEL)
 
 # Fix — correct test directory (was src/test/ which doesn't exist)
 test:

diff --git a/api/routes/forms.py b/api/routes/forms.py
@@ -1,7 +1,15 @@
-from fastapi import APIRouter, Depends
+import os
+
+import requests
+from fastapi import APIRouter, Depends, File, UploadFile
 from sqlmodel import Session
 from api.deps import get_db
-from api.schemas.forms import FormFill, FormFillResponse
+from api.schemas.forms import (
+    FormFill,
+    FormFillResponse,
+    ModelsResponse,
+    TranscriptionResponse,
+)
 from api.db.repositories import create_form, get_template
 from api.db.models import FormSubmission
 from api.errors.base import AppError
@@ -23,9 +31,77 @@ def fill_form(form: FormFill, db: Session = Depends(get_db)):
             user_input=form.input_text,
             fields=fetched_template.fields,
             pdf_form_path=fetched_template.pdf_path,
+            model=form.model,
+        )
+
+        # `model` is a runtime override, not a column — keep it out of the DB row.
+        submission = FormSubmission(
+            **form.model_dump(exclude={"model"}), output_pdf_path=path
         )
-
-        submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
         return create_form(db, submission)
     except Exception as e:
         raise AppError(str(e), status_code=500)
+
+
+@router.get("/models", response_model=ModelsResponse)
+def list_models():
+    """List the Whisper-independent extraction models available in the local
+    Ollama instance, plus the configured default. Used by the Fill Form UI's
+    model picker. Falls back to just the default if Ollama is unreachable."""
+    default_model = os.getenv("OLLAMA_MODEL", "qwen2.5:1.5b")
+    ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
+
+    models: list[str] = []
+    try:
+        response = requests.get(f"{ollama_host}/api/tags", timeout=10)
+        response.raise_for_status()
+        models = [m["name"] for m in response.json().get("models", []) if m.get("name")]
+    except requests.exceptions.RequestException:
+        models = []
+
+    # Always surface the configured default, even if Ollama hasn't pulled it yet.
+    if default_model not in models:
+        models.insert(0, default_model)
+
+    return ModelsResponse(models=models, default=default_model)
+
+
+@router.post("/transcribe", response_model=TranscriptionResponse)
+def transcribe(audio: UploadFile = File(...)):
+    """Forward recorded audio to the local Whisper ASR sidecar and return text.
+
+    Mirrors the Ollama wiring: WHISPER_HOST points at the whisper service
+    (http://whisper:9000 inside Docker, http://localhost:9000 otherwise). The
+    audio is streamed straight through to the local STT service and never
+    persisted — no PII leaves the machine.
+    """
+    whisper_host = os.getenv("WHISPER_HOST", "http://localhost:9000").rstrip("/")
+    whisper_url = f"{whisper_host}/asr"
+
+    files = {
+        "audio_file": (
+            audio.filename or "audio.wav",
+            audio.file.read(),
+            audio.content_type or "audio/wav",
+        )
+    }
+    params = {"task": "transcribe", "output": "json", "encode": "true"}
+
+    try:
+        response = requests.post(whisper_url, params=params, files=files, timeout=120)
+        response.raise_for_status()
+    except requests.exceptions.ConnectionError:
+        raise AppError(
+            f"Could not connect to the speech-to-text service at {whisper_url}. "
+            "Please ensure the whisper service is running.",
+            status_code=503,
+        )
+    except requests.exceptions.RequestException as e:
+        raise AppError(f"Transcription failed: {e}", status_code=502)
+
+    try:
+        text = (response.json().get("text") or "").strip()
+    except ValueError:
+        text = response.text.strip()
+
+    return TranscriptionResponse(text=text)
diff --git a/api/schemas/forms.py b/api/schemas/forms.py
@@ -3,6 +3,9 @@
 class FormFill(BaseModel):
     template_id: int
     input_text: str
+    # Optional Ollama model override for this fill; falls back to OLLAMA_MODEL.
+    # Not persisted (no DB column) — excluded before building FormSubmission.
+    model: str | None = None
 
     @field_validator("input_text")
     def validate_input_text(cls, value):
@@ -18,4 +21,13 @@ class FormFillResponse(BaseModel):
     output_pdf_path: str
 
     class Config:
-        from_attributes = True
+        from_attributes = True
+
+
+class TranscriptionResponse(BaseModel):
+    text: str
+
+
+class ModelsResponse(BaseModel):
+    models: list[str]
+    default: str
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -15,6 +15,30 @@ services:
       retries: 5
       start_period: 30s
 
+  whisper:
+    # Multi-arch (arm64 + amd64) Whisper ASR service — runs natively on Apple
+    # Silicon. Uses the faster-whisper (CTranslate2) engine and bundles ffmpeg,
+    # so it accepts any audio the browser produces. Model is pulled from
+    # Hugging Face on first request into the whisper_models volume.
+    image: onerahmet/openai-whisper-asr-webservice:latest
+    container_name: fireform-whisper
+    environment:
+      - ASR_ENGINE=faster_whisper
+      - ASR_MODEL=small.en
+      - ASR_MODEL_PATH=/data/whisper
+    volumes:
+      - whisper_models:/data/whisper
+    ports:
+      - "127.0.0.1:9000:9000"
+    networks:
+      - fireform-network
+    healthcheck:
+      test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:9000/docs')\" || exit 1"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+      start_period: 60s
+
   app:
     build:
       context: .
@@ -23,9 +47,14 @@ services:
     depends_on:
       ollama:
         condition: service_healthy
+      whisper:
+        condition: service_started
     command: /bin/sh -c "python3 -m api.db.init_db && python3 -m uvicorn api.main:app --host 0.0.0.0 --port 8000"
     volumes:
       - .:/app
+      # Persist the SQLite DB (~/.fireform) across container rebuilds so created
+      # templates aren't wiped each time the image is recreated.
+      - fireform_db:/root/.fireform
     ports:
       - "8000:8000"
     environment:
@@ -35,6 +64,8 @@ services:
       - PYTHONPATH=/app
       - OLLAMA_HOST=http://ollama:11434
       - OLLAMA_TIMEOUT=300
+      - OLLAMA_MODEL=qwen2.5:1.5b
+      - WHISPER_HOST=http://whisper:9000
     networks:
       - fireform-network
 
@@ -56,6 +87,10 @@ services:
 volumes:
   ollama_data:
     driver: local
+  whisper_models:
+    driver: local
+  fireform_db:
+    driver: local
 
 networks:
   fireform-network: