-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdocker-compose.gpu.yml
More file actions
139 lines (134 loc) · 5.43 KB
/
docker-compose.gpu.yml
File metadata and controls
139 lines (134 loc) · 5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
name: vowel-self-hosted-gpu
# NVIDIA GPU-accelerated version of the vowel stack
# Requires: NVIDIA Container Toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
#
# Usage:
# docker compose -f docker-compose.gpu.yml up -d
#
# Or with the helper script:
# bun run stack:up --gpu
services:
echoline:
image: ghcr.io/vowel/echoline:latest-cuda
container_name: vowel-echoline
ports:
- "${ECHOLINE_HOST_PORT:-8000}:8000"
environment:
CHAT_COMPLETION_BASE_URL: ${ECHOLINE_CHAT_COMPLETION_BASE_URL:-http://host.docker.internal:8787/v1}
CHAT_COMPLETION_API_KEY: ${ECHOLINE_CHAT_COMPLETION_API_KEY:-${ENGINE_API_KEY:-}}
HF_TOKEN: ${HF_TOKEN:-}
ECHOLINE_LOG_LEVEL: ${ECHOLINE_LOG_LEVEL:-INFO}
volumes:
- echoline-cache:/home/ubuntu/.cache/huggingface/hub
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
profiles:
- echoline
- full-self-hosted
engine:
build:
context: ./engine
dockerfile: Dockerfile.runtime-node
container_name: vowel-engine
ports:
- "${ENGINE_HOST_PORT:-8787}:8787"
environment:
PORT: 8787
NODE_ENV: production
RUNTIME_CONFIG_PATH: /app/data/config/runtime.yaml
API_KEY: ${ENGINE_API_KEY}
JWT_SECRET: ${JWT_SECRET}
TEST_MODE: ${TEST_MODE:-true}
LLM_PROVIDER: ${LLM_PROVIDER:-openrouter}
GROQ_API_KEY: ${GROQ_API_KEY:-local-stack-placeholder}
GROQ_MODEL: ${GROQ_MODEL:-openai/gpt-oss-20b}
GROQ_WHISPER_MODEL: ${GROQ_WHISPER_MODEL:-whisper-large-v3}
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-}
OPENROUTER_MODEL: ${OPENROUTER_MODEL:-openrouter/healer-alpha}
OPENAI_COMPATIBLE_BASE_URL: ${OPENAI_COMPATIBLE_BASE_URL:-http://echoline:8000/v1}
OPENAI_COMPATIBLE_API_KEY: ${OPENAI_COMPATIBLE_API_KEY:-}
STT_PROVIDER: ${STT_PROVIDER:-deepgram}
DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY:-}
DEEPGRAM_STT_MODEL: ${DEEPGRAM_STT_MODEL:-nova-3}
DEEPGRAM_STT_LANGUAGE: ${DEEPGRAM_STT_LANGUAGE:-en-US}
TTS_PROVIDER: ${TTS_PROVIDER:-deepgram}
DEEPGRAM_TTS_MODEL: ${DEEPGRAM_TTS_MODEL:-aura-2-thalia-en}
VAD_PROVIDER: ${VAD_PROVIDER:-silero}
VAD_ENABLED: ${VAD_ENABLED:-true}
# NVIDIA GPU visibility for ONNX Runtime CUDA execution provider
NVIDIA_VISIBLE_DEVICES: all
CUDA_VISIBLE_DEVICES: all
# Enable CUDA execution provider for Silero VAD
ORT_CUDA_PROVIDER: "1"
volumes:
- engine-data:/app/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
depends_on:
echoline:
condition: service_started
required: false
core:
build:
context: .
dockerfile: core/Dockerfile
container_name: vowel-core
depends_on:
engine:
condition: service_healthy
ports:
- "${CORE_HOST_PORT:-3000}:3000"
environment:
PORT: 3000
DB_PATH: /app/data/core.db
ENCRYPTION_KEY: ${ENCRYPTION_KEY}
ENGINE_URL: http://engine:8787
ENGINE_WS_URL: ws://localhost:${ENGINE_HOST_PORT:-8787}/v1/realtime
VOWEL_ENGINE_URL: http://engine:8787
VOWEL_ENGINE_WS_URL: ws://localhost:${ENGINE_HOST_PORT:-8787}/v1/realtime
VOWEL_ENGINE_API_KEY: ${ENGINE_API_KEY}
ENGINE_API_KEY: ${ENGINE_API_KEY}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
XAI_API_KEY: ${XAI_API_KEY:-}
DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY:-}
DEEPGRAM_STT_MODEL: ${DEEPGRAM_STT_MODEL:-nova-3}
DEEPGRAM_STT_LANGUAGE: ${DEEPGRAM_STT_LANGUAGE:-en-US}
DEEPGRAM_TTS_MODEL: ${DEEPGRAM_TTS_MODEL:-aura-2-thalia-en}
OPENAI_COMPATIBLE_BASE_URL: ${OPENAI_COMPATIBLE_BASE_URL:-http://echoline:8000/v1}
OPENAI_COMPATIBLE_API_KEY: ${OPENAI_COMPATIBLE_API_KEY:-}
ECHOLINE_STT_MODEL: ${ECHOLINE_STT_MODEL:-Systran/faster-whisper-tiny}
ECHOLINE_TTS_MODEL: ${ECHOLINE_TTS_MODEL:-onnx-community/Kokoro-82M-v1.0-ONNX}
ECHOLINE_TTS_VOICE: ${ECHOLINE_TTS_VOICE:-af_heart}
DEFAULT_STT_PROVIDER: ${DEFAULT_STT_PROVIDER:-${STT_PROVIDER:-deepgram}}
DEFAULT_TTS_PROVIDER: ${DEFAULT_TTS_PROVIDER:-${TTS_PROVIDER:-deepgram}}
CORE_ENABLE_DEV_VOICE_OVERRIDES: ${CORE_ENABLE_DEV_VOICE_OVERRIDES:-false}
ENDPOINT_PRESET_VOWEL_PRIME_STAGING_HTTP_URL: http://engine:8787
ENDPOINT_PRESET_VOWEL_PRIME_STAGING_WS_URL: ws://localhost:${ENGINE_HOST_PORT:-8787}/v1/realtime
CORE_BOOTSTRAP_APP_ID: ${CORE_BOOTSTRAP_APP_ID:-default}
CORE_BOOTSTRAP_APP_NAME: ${CORE_BOOTSTRAP_APP_NAME:-Local Stack App}
CORE_BOOTSTRAP_APP_DESCRIPTION: ${CORE_BOOTSTRAP_APP_DESCRIPTION:-Bootstrap app for the self-hosted Docker stack}
CORE_BOOTSTRAP_API_KEY_LABEL: ${CORE_BOOTSTRAP_API_KEY_LABEL:-Local Stack Key}
CORE_BOOTSTRAP_SCOPES: ${CORE_BOOTSTRAP_SCOPES:-mint_ephemeral}
CORE_BOOTSTRAP_ALLOWED_PROVIDERS: ${CORE_BOOTSTRAP_ALLOWED_PROVIDERS:-vowel-prime}
CORE_BOOTSTRAP_ALLOWED_ENDPOINT_PRESETS: ${CORE_BOOTSTRAP_ALLOWED_ENDPOINT_PRESETS:-staging}
CORE_BOOTSTRAP_DEFAULT_ENDPOINT_PRESET: ${CORE_BOOTSTRAP_DEFAULT_ENDPOINT_PRESET:-staging}
CORE_BOOTSTRAP_PUBLISHABLE_KEY: ${CORE_BOOTSTRAP_PUBLISHABLE_KEY}
volumes:
- core-data:/app/data
restart: unless-stopped
volumes:
core-data:
engine-data:
echoline-cache: