Coverage for src/ai_shell/container.py: 75%
597 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 22:06 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 22:06 +0000
1"""Docker container lifecycle management.
3Replaces docker-compose.yml by using Docker SDK to create and manage containers
4with the exact same configuration.
5"""
7from __future__ import annotations
9import json
10import logging
11import os
12import subprocess
13import sys
14import time
15from pathlib import Path
16from typing import TYPE_CHECKING, NoReturn
18from docker.errors import APIError, ImageNotFound, NotFound
19from docker.types import DeviceRequest, Mount
21import docker
22from ai_shell.defaults import (
23 COMFYUI_CONTAINER,
24 COMFYUI_DATA_VOLUME,
25 COMFYUI_IMAGE,
26 KOKORO_CONTAINER,
27 KOKORO_IMAGE_CPU,
28 KOKORO_IMAGE_GPU,
29 LLM_NETWORK,
30 N8N_CONTAINER,
31 N8N_IMAGE,
32 OLLAMA_CONTAINER,
33 OLLAMA_CPU_SHARES,
34 OLLAMA_DATA_VOLUME,
35 OLLAMA_IMAGE,
36 OLLAMA_VRAM_BUFFER_BYTES,
37 SHM_SIZE,
38 VOICE_AGENT_CONTAINER,
39 VOICE_AGENT_DATA_VOLUME,
40 VOICE_AGENT_IMAGE,
41 WEBUI_CONTAINER,
42 WEBUI_DATA_VOLUME,
43 WEBUI_IMAGE,
44 WHISPER_CONTAINER,
45 WHISPER_DATA_VOLUME,
46 WHISPER_IMAGE_CPU,
47 WHISPER_IMAGE_GPU,
48 _resolve_env,
49 build_dev_environment,
50 build_dev_mounts,
51 build_n8n_environment,
52 build_n8n_mounts,
53 dev_container_name,
54 project_dev_port,
55)
56from ai_shell.exceptions import (
57 ContainerNotFoundError,
58 DockerNotAvailableError,
59 GpuRequiredError,
60 ImagePullError,
61)
62from ai_shell.gpu import detect_gpu, get_vram_info
64if TYPE_CHECKING:
65 from docker.models.containers import Container
66 from docker.models.images import Image
68 from ai_shell.config import AiShellConfig
70logger = logging.getLogger(__name__)
73def _exec_docker(args: list[str]) -> NoReturn:
74 """Execute a docker CLI command with cross-platform TTY support.
76 Uses subprocess.run instead of os.execvp for Windows compatibility.
77 On Windows, os.execvp doesn't truly replace the process, causing TTY issues.
78 """
79 logger.debug("exec: %s", " ".join(args))
80 sys.stdout.flush()
81 sys.stderr.flush()
82 result = subprocess.run(args)
83 sys.exit(result.returncode)
86def _run_docker(args: list[str]) -> tuple[int, float]:
87 """Run a docker CLI command and return (exit_code, elapsed_seconds).
89 Unlike _exec_docker, this does NOT call sys.exit().
90 """
91 logger.debug("run: %s", " ".join(args))
92 sys.stdout.flush()
93 sys.stderr.flush()
94 start = time.monotonic()
95 result = subprocess.run(args)
96 elapsed = time.monotonic() - start
97 return result.returncode, elapsed
100def _run_docker_with_typeahead(args: list[str], typeahead: bytes) -> tuple[int, float]:
101 """Run docker exec under a PTY, pre-injecting typeahead bytes.
103 Used when the user typed during the slow startup phase: those bytes need to
104 be replayed into the inner process exactly as if they had been typed once
105 the shell attached. Standard subprocess inheritance can't do that because
106 we need to inject our own bytes ahead of the live stdin stream.
107 """
108 import pty
109 import select
110 import signal
111 import termios
112 import tty
114 logger.debug("run+pty: %s", " ".join(args))
115 sys.stdout.flush()
116 sys.stderr.flush()
118 master_fd, slave_fd = pty.openpty()
119 stdin_fd = sys.stdin.fileno()
120 stdout_fd = sys.stdout.fileno()
121 original_termios = termios.tcgetattr(stdin_fd)
123 # Match the PTY size to the host terminal so curses-based tools render correctly.
124 try:
125 import fcntl
127 size = fcntl.ioctl(stdout_fd, termios.TIOCGWINSZ, b"\x00" * 8)
128 fcntl.ioctl(slave_fd, termios.TIOCSWINSZ, size)
129 except OSError:
130 pass
132 def _on_winch(_signum: int, _frame: object) -> None:
133 try:
134 import fcntl
136 size = fcntl.ioctl(stdout_fd, termios.TIOCGWINSZ, b"\x00" * 8)
137 fcntl.ioctl(slave_fd, termios.TIOCSWINSZ, size)
138 except OSError:
139 pass
141 previous_winch = signal.signal(signal.SIGWINCH, _on_winch)
143 start = time.monotonic()
144 proc = subprocess.Popen(
145 args,
146 stdin=slave_fd,
147 stdout=slave_fd,
148 stderr=slave_fd,
149 close_fds=True,
150 )
151 os.close(slave_fd)
153 try:
154 tty.setraw(stdin_fd)
155 if typeahead:
156 os.write(master_fd, typeahead)
158 while True:
159 if proc.poll() is not None:
160 # Drain any final output.
161 try:
162 while True:
163 chunk = os.read(master_fd, 4096)
164 if not chunk:
165 break
166 os.write(stdout_fd, chunk)
167 except OSError:
168 pass
169 break
170 try:
171 ready, _, _ = select.select([master_fd, stdin_fd], [], [], 0.1)
172 except (OSError, ValueError):
173 break
174 if master_fd in ready:
175 try:
176 chunk = os.read(master_fd, 4096)
177 except OSError:
178 chunk = b""
179 if not chunk:
180 break
181 os.write(stdout_fd, chunk)
182 if stdin_fd in ready:
183 try:
184 chunk = os.read(stdin_fd, 4096)
185 except OSError:
186 chunk = b""
187 if chunk:
188 os.write(master_fd, chunk)
189 finally:
190 termios.tcsetattr(stdin_fd, termios.TCSADRAIN, original_termios)
191 signal.signal(signal.SIGWINCH, previous_winch)
192 try:
193 os.close(master_fd)
194 except OSError:
195 pass
196 if proc.poll() is None:
197 proc.wait()
199 elapsed = time.monotonic() - start
200 return proc.returncode, elapsed
203class ContainerManager:
204 """Manages Docker containers for ai-shell.
206 Handles the dev container (per-project) and LLM stack (host-level singletons).
207 """
209 def __init__(self, config: AiShellConfig) -> None:
210 self.config = config
211 try:
212 self.client = docker.from_env() # type: ignore[attr-defined]
213 self.client.ping()
214 except docker.errors.DockerException as e:
215 raise DockerNotAvailableError(
216 f"Docker is not available. Is the Docker daemon running?\n Error: {e}"
217 ) from e
219 # =========================================================================
220 # Dev container (per-project)
221 # =========================================================================
223 def resolve_dev_container(self) -> tuple[str, Container | None]:
224 """Resolve the dev container, checking both current and legacy names.
226 Returns ``(name, container)`` where *container* is ``None`` when no
227 matching container exists. When no container is found under either
228 name, the current hash-based name is returned so callers can use it
229 for creation.
230 """
231 name = dev_container_name(self.config.project_name, self.config.project_dir)
232 container = self._get_container(name)
233 if container is not None:
234 return name, container
236 legacy_name = dev_container_name(self.config.project_name)
237 legacy_container = self._get_container(legacy_name)
238 if legacy_container is not None and self._container_matches_project(
239 legacy_container, self.config.project_dir
240 ):
241 return legacy_name, legacy_container
243 return name, None
245 def ensure_dev_container(self) -> str:
246 """Get or create the dev container for the current project.
248 If the container exists but is stopped, it is started.
249 If it doesn't exist, it is created with the full configuration.
250 If using the ``latest`` tag and a newer image is available, the
251 existing container is replaced automatically.
253 Returns the container name.
254 """
255 name, container = self.resolve_dev_container()
257 if container is not None:
258 if self._recreate_if_image_stale(container, name):
259 # Container was removed; fall through to create a new one.
260 container = None
261 else:
262 if container.status != "running":
263 logger.info("Starting existing container: %s", name)
264 container.start()
265 return name
267 logger.info("Creating dev container: %s", name)
268 self._pull_image_if_needed(self.config.full_image)
269 self._create_dev_container(name)
270 return name
272 def _create_dev_container(self, name: str) -> Container:
273 """Create the dev container with all docker-compose config."""
274 mounts = build_dev_mounts(
275 self.config.project_dir,
276 self.config.project_name,
277 extra_node_modules_paths=self.config.node_modules_paths,
278 )
279 environment = build_dev_environment(
280 self.config.extra_env,
281 self.config.project_dir,
282 project_name=self.config.project_name,
283 aws_profile=self.config.ai_profile,
284 aws_region=self.config.aws_region,
285 )
287 # MOTD metadata — injected at creation time so the in-container
288 # motd.sh script can display version, container identity, and port
289 # mappings without querying Docker from inside the container.
290 from ai_shell import __version__
292 environment["AUGINT_SHELL_VERSION"] = __version__
293 environment["AUGINT_CONTAINER_NAME"] = name
294 environment["AUGINT_PROJECT_NAME"] = self.config.project_name
295 environment["AUGINT_DEV_PORTS"] = ",".join(
296 f"{port}:{project_dev_port(self.config.project_dir, port, self.config.project_name)}"
297 for port in self.config.dev_ports
298 )
299 environment["AUGINT_LLM_PORTS"] = ",".join(
300 [
301 f"ollama:{self.config.ollama_port}",
302 f"webui:{self.config.webui_port}",
303 f"kokoro:{self.config.kokoro_port}",
304 f"whisper:{self.config.whisper_port}",
305 f"n8n:{self.config.n8n_port}",
306 f"comfyui:{self.config.comfyui_port}",
307 ]
308 )
310 # Add any extra volumes from config
311 for vol_spec in self.config.extra_volumes:
312 parts = vol_spec.split(":")
313 if len(parts) >= 2:
314 source, target = parts[0], parts[1]
315 read_only = len(parts) > 2 and parts[2] == "ro"
316 mounts.append(
317 Mount(
318 target=target,
319 source=source,
320 type="bind",
321 read_only=read_only,
322 )
323 )
325 container: Container = self.client.containers.run(
326 image=self.config.full_image,
327 name=name,
328 mounts=mounts,
329 environment=environment,
330 working_dir=f"/root/projects/{self.config.project_name}",
331 command="tail -f /dev/null",
332 stdin_open=True,
333 tty=True,
334 shm_size=SHM_SIZE,
335 init=True,
336 extra_hosts={"host.docker.internal": "host-gateway"},
337 ports={
338 f"{port}/tcp": (
339 (
340 "0.0.0.0",
341 project_dev_port(self.config.project_dir, port, self.config.project_name),
342 ) # nosec B104
343 if self.config.project_dir
344 else None
345 )
346 for port in self.config.dev_ports
347 },
348 detach=True,
349 )
350 logger.info("Container created: %s", name)
352 subprocess.run(
353 [
354 "docker",
355 "exec",
356 name,
357 "sh",
358 "-c",
359 "echo 'export PATH=\"/root/.local/bin:/root/.opencode/bin:$PATH\"'"
360 " > /etc/profile.d/ai-shell-path.sh",
361 ],
362 check=False,
363 capture_output=True,
364 )
366 return container
368 def exec_interactive(
369 self,
370 container_name: str,
371 command: list[str],
372 extra_env: dict[str, str] | None = None,
373 workdir: str | None = None,
374 typeahead: bytes = b"",
375 ) -> NoReturn:
376 """Execute an interactive command in a container.
378 Uses subprocess.run for cross-platform TTY compatibility.
379 Detects whether stdin is a TTY to decide on -i/-t flags.
380 If *workdir* is given it is passed as ``-w`` to ``docker exec``.
381 When *typeahead* is non-empty and stdin is a TTY, runs the docker exec
382 under a PTY so the captured bytes can be replayed into the inner process.
383 """
384 args = ["docker", "exec"]
386 if sys.stdin.isatty():
387 args.append("-it")
389 if workdir:
390 args.extend(["-w", workdir])
392 if extra_env:
393 for key, value in extra_env.items():
394 args.extend(["-e", f"{key}={value}"])
396 args.append(container_name)
397 args.extend(command)
399 if typeahead and sys.platform != "win32" and sys.stdin.isatty():
400 exit_code, _ = _run_docker_with_typeahead(args, typeahead)
401 sys.exit(exit_code)
403 _exec_docker(args)
405 def run_interactive(
406 self,
407 container_name: str,
408 command: list[str],
409 extra_env: dict[str, str] | None = None,
410 workdir: str | None = None,
411 typeahead: bytes = b"",
412 ) -> tuple[int, float]:
413 """Execute an interactive command, returning (exit_code, elapsed_seconds).
415 Same as exec_interactive but does not call sys.exit().
416 Used for retry logic (e.g., claude -c fallback).
417 If *workdir* is given it is passed as ``-w`` to ``docker exec``.
418 When *typeahead* is non-empty and stdin is a TTY, runs the docker exec
419 under a PTY so the captured bytes can be replayed into the inner process.
420 """
421 args = ["docker", "exec"]
423 if sys.stdin.isatty():
424 args.append("-it")
426 if workdir:
427 args.extend(["-w", workdir])
429 if extra_env:
430 for key, value in extra_env.items():
431 args.extend(["-e", f"{key}={value}"])
433 args.append(container_name)
434 args.extend(command)
436 if typeahead and sys.platform != "win32" and sys.stdin.isatty():
437 return _run_docker_with_typeahead(args, typeahead)
439 return _run_docker(args)
441 def exec_detached(
442 self,
443 container_name: str,
444 command: list[str],
445 extra_env: dict[str, str] | None = None,
446 workdir: str | None = None,
447 ) -> subprocess.CompletedProcess[bytes]:
448 """Run a command in a container without waiting (docker exec -d)."""
449 args = ["docker", "exec", "-d"]
450 if workdir:
451 args.extend(["-w", workdir])
452 if extra_env:
453 for key, value in extra_env.items():
454 args.extend(["-e", f"{key}={value}"])
455 args.append(container_name)
456 args.extend(command)
457 logger.debug("exec-detached: %s", " ".join(args))
458 return subprocess.run(args, check=True)
460 # =========================================================================
461 # LLM stack (host-level singletons)
462 # =========================================================================
464 @staticmethod
465 def _container_has_gpu(container: Container) -> bool:
466 """Return True if *container* was created with a GPU device request."""
467 device_requests = container.attrs.get("HostConfig", {}).get("DeviceRequests") or []
468 return any(
469 "gpu" in (cap for caps in (dr.get("Capabilities") or []) for cap in caps)
470 for dr in device_requests
471 )
473 def _recreate_if_gpu_changed(
474 self, container: Container, gpu_available: bool, label: str
475 ) -> bool:
476 """Remove *container* if its GPU state doesn't match *gpu_available*.
478 Returns True if the container was removed (caller must recreate).
479 """
480 has_gpu = self._container_has_gpu(container)
481 if has_gpu == gpu_available:
482 return False
483 want = "GPU" if gpu_available else "CPU-only"
484 had = "GPU" if has_gpu else "CPU-only"
485 logger.warning(
486 "%s container has %s but system now offers %s — recreating",
487 label,
488 had,
489 want,
490 )
491 container.remove(force=True)
492 return True
494 def _recreate_if_image_stale(self, container: Container, name: str) -> bool:
495 """Pull the latest image and recreate the container if it is outdated.
497 Only acts when the configured tag is ``latest``. For pinned
498 version tags the image is immutable so staleness doesn't apply.
500 The pull is skipped when ``config.image_pull_cache_ttl`` seconds have
501 not yet elapsed since the last successful pull (default 15 min) so
502 normal launches don't pay the network round-trip.
504 Returns True if the container was removed (caller must recreate).
505 """
506 from ai_shell.cache import is_fresh, mark_fresh
508 tag = self.config.image_tag
509 if tag != "latest":
510 return False
512 image_ref = self.config.full_image
513 if is_fresh("image-pull", image_ref, self.config.image_pull_cache_ttl):
514 logger.debug("Image-pull cache fresh for %s — skipping pull", image_ref)
515 return False
517 try:
518 pulled = self.client.images.pull(*image_ref.rsplit(":", 1))
519 except APIError:
520 logger.debug("Could not pull %s — skipping staleness check", image_ref)
521 return False
523 mark_fresh("image-pull", image_ref)
524 self._warn_if_image_below_minimum(pulled)
526 container_image_id = container.image.id
527 pulled_image_id = pulled.id
529 if container_image_id == pulled_image_id:
530 return False
532 logger.warning(
533 "Dev container %s uses an outdated image — recreating with %s",
534 name,
535 image_ref,
536 )
537 container.remove(force=True)
538 return True
540 @staticmethod
541 def _warn_if_image_below_minimum(image: Image) -> None:
542 """Log a warning if the pulled image version is below the CLI version."""
543 from ai_shell import __version__
545 labels = image.labels or {}
546 image_version_str = labels.get("org.opencontainers.image.version", "")
547 if not image_version_str:
548 return
550 def _parse_version(v: str) -> tuple[int, ...] | None:
551 try:
552 return tuple(int(x) for x in v.split("."))
553 except (ValueError, AttributeError):
554 return None
556 image_ver = _parse_version(image_version_str)
557 cli_ver = _parse_version(__version__)
558 if image_ver is None or cli_ver is None:
559 return
561 if image_ver < cli_ver:
562 logger.warning(
563 "Container image version %s is older than CLI version %s "
564 "— rebuild the image to get the latest tools",
565 image_version_str,
566 __version__,
567 )
569 def _ensure_llm_network(self) -> str:
570 """Get or create the shared Docker network for the LLM stack."""
571 try:
572 self.client.networks.get(LLM_NETWORK)
573 except NotFound:
574 logger.info("Creating LLM network: %s", LLM_NETWORK)
575 self.client.networks.create(LLM_NETWORK, driver="bridge")
576 return LLM_NETWORK
578 def ensure_ollama(self) -> str:
579 """Get or create the Ollama container with GPU auto-detection.
581 Recreates the container if GPU availability has changed since creation.
582 """
583 gpu_available = detect_gpu()
584 container = self._get_container(OLLAMA_CONTAINER)
586 if container is not None:
587 if self._recreate_if_gpu_changed(container, gpu_available, "Ollama"):
588 pass # fall through to creation
589 else:
590 if container.status != "running":
591 logger.info("Starting existing Ollama container")
592 container.start()
593 return OLLAMA_CONTAINER
595 logger.info("Creating Ollama container")
596 self._pull_image_if_needed(OLLAMA_IMAGE)
597 network_name = self._ensure_llm_network()
598 device_requests = None
599 env: dict[str, str] = {
600 "OLLAMA_CONTEXT_LENGTH": str(self.config.context_size),
601 # Flash attention trims activation memory at no quality cost and
602 # is a prerequisite for KV cache quantization.
603 "OLLAMA_FLASH_ATTENTION": "1",
604 # Quantize the KV cache to 8-bit; near-lossless, halves cache
605 # size. Combined with Ollama's dynamic GPU/CPU offload, this
606 # buys significant headroom for large models without hard-pinning
607 # a num_gpu value in any Modelfile.
608 "OLLAMA_KV_CACHE_TYPE": "q8_0",
609 }
610 if gpu_available:
611 device_requests = [DeviceRequest(count=1, capabilities=[["gpu"]])]
612 vram = get_vram_info()
613 if vram:
614 overhead = vram["used"] + OLLAMA_VRAM_BUFFER_BYTES
615 env["OLLAMA_GPU_OVERHEAD"] = str(overhead)
616 logger.info(
617 "VRAM: %.1f GiB total, %.1f GiB free. Reserving %.1f GiB overhead for Ollama.",
618 vram["total"] / 1024**3,
619 vram["free"] / 1024**3,
620 overhead / 1024**3,
621 )
622 else:
623 logger.info("GPU detected - Ollama will use NVIDIA GPU")
624 else:
625 logger.warning("No GPU detected - Ollama will run on CPU (slower inference)")
627 kwargs: dict = {
628 "image": OLLAMA_IMAGE,
629 "name": OLLAMA_CONTAINER,
630 "ports": {"11434/tcp": ("0.0.0.0", self.config.ollama_port)}, # nosec B104
631 "mounts": [
632 Mount(
633 target="/root/.ollama",
634 source=OLLAMA_DATA_VOLUME,
635 type="volume",
636 )
637 ],
638 "restart_policy": {"Name": "unless-stopped"},
639 "detach": True,
640 "network": network_name,
641 "cpu_shares": OLLAMA_CPU_SHARES,
642 }
644 if device_requests:
645 kwargs["device_requests"] = device_requests
646 if env:
647 kwargs["environment"] = env
649 self.client.containers.run(**kwargs)
650 logger.info("Ollama container created on port %d", self.config.ollama_port)
651 return OLLAMA_CONTAINER
653 def ensure_webui(
654 self,
655 voice_enabled: bool = False,
656 whisper_enabled: bool = False,
657 image_gen_enabled: bool = False,
658 env_file: Path | None = None,
659 ) -> str:
660 """Get or create the Open WebUI container.
662 When *voice_enabled* is True, pre-wires Kokoro TTS as the speech
663 backend. When *whisper_enabled* is True, pre-wires Speaches STT
664 as the transcription backend. When *image_gen_enabled* is True,
665 pre-wires ComfyUI as the image-generation backend. API keys from
666 *env_file* (or host environment) are passed through so WebUI can
667 offer external LLM providers alongside Ollama.
668 """
669 container = self._get_container(WEBUI_CONTAINER)
671 if container is not None:
672 if container.status != "running":
673 logger.info("Starting existing WebUI container")
674 container.start()
675 return WEBUI_CONTAINER
677 logger.info("Creating Open WebUI container")
678 self._pull_image_if_needed(WEBUI_IMAGE)
679 network_name = self._ensure_llm_network()
681 from dotenv import dotenv_values
683 from ai_shell.defaults import _resolve_env
685 dotenv: dict[str, str | None] = {}
686 if env_file is not None:
687 dotenv = dotenv_values(env_file)
689 environment: dict[str, str] = {
690 "OLLAMA_BASE_URL": f"http://{OLLAMA_CONTAINER}:11434",
691 "WEBUI_AUTH": "false",
692 # DEFAULT_MODELS is a PersistentConfig: env seeds the DB on first
693 # boot and UI edits win after that. Point new chats at the
694 # primary chat slot; users can pick the secondary (uncensored)
695 # from the model dropdown.
696 "DEFAULT_MODELS": self.config.primary_chat_model,
697 }
698 if voice_enabled:
699 environment.update(
700 {
701 "AUDIO_TTS_ENGINE": "openai",
702 "AUDIO_TTS_OPENAI_API_BASE_URL": f"http://{KOKORO_CONTAINER}:8880/v1",
703 "AUDIO_TTS_OPENAI_API_KEY": "dummy",
704 "AUDIO_TTS_MODEL": "kokoro",
705 "AUDIO_TTS_VOICE": self.config.kokoro_voice,
706 }
707 )
708 if whisper_enabled:
709 environment.update(
710 {
711 "AUDIO_STT_ENGINE": "openai",
712 "AUDIO_STT_OPENAI_API_BASE_URL": f"http://{WHISPER_CONTAINER}:8000/v1",
713 "AUDIO_STT_OPENAI_API_KEY": "dummy",
714 "AUDIO_STT_MODEL": self.config.whisper_model,
715 }
716 )
717 if image_gen_enabled:
718 # ENABLE_IMAGE_GENERATION + IMAGE_GENERATION_ENGINE=comfyui are
719 # PersistentConfig keys; they seed the DB on first boot. Users can
720 # later override the default workflow or model via Settings > Images.
721 environment.update(
722 {
723 "ENABLE_IMAGE_GENERATION": "true",
724 "IMAGE_GENERATION_ENGINE": "comfyui",
725 "COMFYUI_BASE_URL": f"http://{COMFYUI_CONTAINER}:8188",
726 "IMAGE_SIZE": "1024x1024",
727 "IMAGE_STEPS": "25",
728 }
729 )
731 # External LLM providers — pass through API keys when available.
732 openai_urls: list[str] = []
733 openai_keys: list[str] = []
735 openai_key = _resolve_env(dotenv, "OPENAI_API_KEY")
736 if openai_key:
737 openai_urls.append("https://api.openai.com/v1")
738 openai_keys.append(openai_key)
740 gh_token = _resolve_env(dotenv, "GH_TOKEN")
741 if gh_token:
742 openai_urls.append("https://models.inference.ai.azure.com/v1")
743 openai_keys.append(gh_token)
745 if openai_urls:
746 environment["OPENAI_API_BASE_URLS"] = ";".join(openai_urls)
747 environment["OPENAI_API_KEYS"] = ";".join(openai_keys)
749 anthropic_key = _resolve_env(dotenv, "ANTHROPIC_API_KEY")
750 if anthropic_key:
751 environment["ANTHROPIC_API_KEY"] = anthropic_key
753 self.client.containers.run(
754 image=WEBUI_IMAGE,
755 name=WEBUI_CONTAINER,
756 ports={"8080/tcp": ("0.0.0.0", self.config.webui_port)}, # nosec B104
757 environment=environment,
758 mounts=[
759 Mount(
760 target="/app/backend/data",
761 source=WEBUI_DATA_VOLUME,
762 type="volume",
763 )
764 ],
765 restart_policy={"Name": "unless-stopped"},
766 detach=True,
767 network=network_name,
768 )
770 logger.info("Open WebUI container created on port %d", self.config.webui_port)
771 return WEBUI_CONTAINER
773 def ensure_kokoro(self) -> str:
774 """Get or create the Kokoro-FastAPI (local TTS) container.
776 Exposes an OpenAI-compatible ``/v1/audio/speech`` endpoint on the
777 configured port. GPU image is used when NVIDIA is detected;
778 otherwise the CPU image. Recreates if GPU availability has changed.
779 """
780 gpu_available = detect_gpu()
781 container = self._get_container(KOKORO_CONTAINER)
782 if container is not None:
783 if self._recreate_if_gpu_changed(container, gpu_available, "Kokoro"):
784 pass # fall through to creation
785 else:
786 if container.status != "running":
787 logger.info("Starting existing Kokoro container")
788 container.start()
789 return KOKORO_CONTAINER
790 image = KOKORO_IMAGE_GPU if gpu_available else KOKORO_IMAGE_CPU
791 logger.info("Creating Kokoro container (%s)", "GPU" if gpu_available else "CPU")
792 self._pull_image_if_needed(image)
793 network_name = self._ensure_llm_network()
795 kwargs: dict = {
796 "image": image,
797 "name": KOKORO_CONTAINER,
798 "ports": {"8880/tcp": ("0.0.0.0", self.config.kokoro_port)}, # nosec B104
799 "restart_policy": {"Name": "unless-stopped"},
800 "detach": True,
801 "network": network_name,
802 }
803 if gpu_available:
804 kwargs["device_requests"] = [DeviceRequest(count=1, capabilities=[["gpu"]])]
806 self.client.containers.run(**kwargs)
807 logger.info("Kokoro container created on port %d", self.config.kokoro_port)
808 return KOKORO_CONTAINER
810 def ensure_whisper(self) -> str:
811 """Get or create the Speaches (local STT) container.
813 Exposes an OpenAI-compatible ``/v1/audio/transcriptions`` endpoint on
814 the configured port. GPU image is used when NVIDIA is detected;
815 otherwise the CPU image. Recreates if GPU availability has changed.
816 The Hugging Face model cache persists in a named volume (Speaches runs
817 as ``ubuntu`` UID 1000 — a named volume inherits the correct ownership;
818 bind-mounting a host dir here would require an explicit chown).
819 """
820 gpu_available = detect_gpu()
821 container = self._get_container(WHISPER_CONTAINER)
822 if container is not None:
823 if self._recreate_if_gpu_changed(container, gpu_available, "Whisper"):
824 pass # fall through to creation
825 else:
826 if container.status != "running":
827 logger.info("Starting existing Whisper container")
828 container.start()
829 return WHISPER_CONTAINER
830 image = WHISPER_IMAGE_GPU if gpu_available else WHISPER_IMAGE_CPU
831 logger.info("Creating Whisper container (%s)", "GPU" if gpu_available else "CPU")
832 self._pull_image_if_needed(image)
833 network_name = self._ensure_llm_network()
835 # PRELOAD_MODELS uses pydantic-settings JSON array syntax, not CSV.
836 # json.dumps guarantees correct escaping for any model id.
837 environment = {
838 "WHISPER__INFERENCE_DEVICE": "auto",
839 "PRELOAD_MODELS": json.dumps([self.config.whisper_model]),
840 }
842 kwargs: dict = {
843 "image": image,
844 "name": WHISPER_CONTAINER,
845 "ports": {"8000/tcp": ("0.0.0.0", self.config.whisper_port)}, # nosec B104
846 "environment": environment,
847 "mounts": [
848 Mount(
849 target="/home/ubuntu/.cache/huggingface/hub",
850 source=WHISPER_DATA_VOLUME,
851 type="volume",
852 )
853 ],
854 "restart_policy": {"Name": "unless-stopped"},
855 "detach": True,
856 "network": network_name,
857 }
858 if gpu_available:
859 kwargs["device_requests"] = [DeviceRequest(count=1, capabilities=[["gpu"]])]
861 self.client.containers.run(**kwargs)
862 logger.info("Whisper container created on port %d", self.config.whisper_port)
863 return WHISPER_CONTAINER
865 def ensure_voice_agent(self) -> str:
866 """Get or create the voice-agent container.
868 The image is **built locally** from ``docker/voice-agent/`` on first
869 call because Phase 2 doesn't publish it. Later phases may switch to
870 a pulled tag. Phase 2 scope: no filesystem / auth / provider-key
871 mounts — those land in Phases 3-4. A named volume is mounted at
872 ``/data`` so the Phase 5 sqlite file will survive container
873 recreations from the start. Service-discovery URLs for sibling
874 stacks (ComfyUI, etc.) are exported so later phases can dispatch
875 tool calls without re-reading config.
876 """
877 container = self._get_container(VOICE_AGENT_CONTAINER)
878 if container is not None:
879 if container.status != "running":
880 logger.info("Starting existing voice-agent container")
881 container.start()
882 return VOICE_AGENT_CONTAINER
884 logger.info("Creating voice-agent container")
885 self._build_image_if_needed(VOICE_AGENT_IMAGE, self._voice_agent_build_context())
886 network_name = self._ensure_llm_network()
888 kwargs: dict = {
889 "image": VOICE_AGENT_IMAGE,
890 "name": VOICE_AGENT_CONTAINER,
891 "ports": {"8000/tcp": ("0.0.0.0", self.config.voice_agent.port)}, # nosec B104
892 "environment": {
893 "COMFYUI_BASE_URL": f"http://{COMFYUI_CONTAINER}:8188",
894 },
895 "mounts": [
896 Mount(
897 target="/data",
898 source=VOICE_AGENT_DATA_VOLUME,
899 type="volume",
900 )
901 ],
902 "restart_policy": {"Name": "unless-stopped"},
903 "detach": True,
904 "network": network_name,
905 }
907 self.client.containers.run(**kwargs)
908 logger.info("voice-agent container created on port %d", self.config.voice_agent.port)
909 return VOICE_AGENT_CONTAINER
911 @staticmethod
912 def _voice_agent_build_context() -> str:
913 """Return the path to the voice-agent Dockerfile build context."""
914 # Package layout: src/ai_shell/container.py — the Dockerfile lives in
915 # <repo root>/docker/voice-agent. When installed as a wheel the user
916 # is expected to have the source checked out; voice-agent is
917 # experimental and locally built for now.
918 here = Path(__file__).resolve()
919 return str(here.parents[2] / "docker" / "voice-agent")
921 def ensure_comfyui(self, env_file: Path | None = None) -> str:
922 """Get or create the ComfyUI image-generation container.
924 GPU-required (the ai-dock image has no CPU variant). On first
925 boot, ai-dock runs the bind-mounted provisioning script which
926 downloads SDXL unconditionally and FLUX.1-dev when ``HF_TOKEN``
927 is present in *env_file* or the host environment. Model files
928 persist in a named volume so subsequent containers start without
929 re-downloading ~25 GB. Recreates the container when GPU
930 availability toggles, matching the Kokoro/Whisper pattern.
931 """
932 from dotenv import dotenv_values
934 gpu_available = detect_gpu()
935 container = self._get_container(COMFYUI_CONTAINER)
936 if container is not None:
937 if self._recreate_if_gpu_changed(container, gpu_available, "ComfyUI"):
938 pass # fall through to creation
939 else:
940 if container.status != "running":
941 logger.info("Starting existing ComfyUI container")
942 container.start()
943 return COMFYUI_CONTAINER
945 if not gpu_available:
946 raise GpuRequiredError("ComfyUI")
948 logger.info("Creating ComfyUI container")
949 self._pull_image_if_needed(COMFYUI_IMAGE)
950 network_name = self._ensure_llm_network()
952 dotenv: dict[str, str | None] = {}
953 if env_file is not None:
954 dotenv = dotenv_values(env_file)
956 hf_token = _resolve_env(dotenv, "HF_TOKEN") or _resolve_env(
957 dotenv, "HUGGING_FACE_HUB_TOKEN"
958 )
960 environment: dict[str, str] = {
961 # --lowvram keeps FLUX's 12B weights offloading through CPU RAM so
962 # Ollama can keep a chat model resident on the same GPU. --listen
963 # binds on 0.0.0.0 so other containers on the LLM network can reach
964 # the API.
965 "CLI_ARGS": "--lowvram --listen 0.0.0.0",
966 # ai-dock runs PROVISIONING_SCRIPT once per workspace. We bind-mount
967 # our own script at a known path rather than hosting one remotely.
968 "PROVISIONING_SCRIPT": "/opt/augint/provision.sh",
969 # Local-dev deployment: disable ai-dock's Caddy basic-auth layer so
970 # the port-8188 service is reachable without redirect-to-/login.
971 # Matches WEBUI_AUTH=false on Open WebUI.
972 "WEB_ENABLE_AUTH": "false",
973 "CF_QUICK_TUNNELS": "false",
974 }
975 if hf_token:
976 # ai-dock's provisioner reads HF_TOKEN; upstream HF libs read
977 # HUGGING_FACE_HUB_TOKEN. Set both to avoid edge cases.
978 environment["HF_TOKEN"] = hf_token
979 environment["HUGGING_FACE_HUB_TOKEN"] = hf_token
981 provision_path = Path(__file__).parent / "assets" / "comfyui" / "provision.sh"
982 mounts: list[Mount] = [
983 Mount(
984 target="/opt/ComfyUI/models",
985 source=COMFYUI_DATA_VOLUME,
986 type="volume",
987 )
988 ]
989 if provision_path.is_file():
990 mounts.append(
991 Mount(
992 target="/opt/augint/provision.sh",
993 source=str(provision_path),
994 type="bind",
995 read_only=True,
996 )
997 )
999 kwargs: dict = {
1000 "image": COMFYUI_IMAGE,
1001 "name": COMFYUI_CONTAINER,
1002 "ports": {"8188/tcp": ("0.0.0.0", self.config.comfyui_port)}, # nosec B104
1003 "environment": environment,
1004 "mounts": mounts,
1005 "restart_policy": {"Name": "unless-stopped"},
1006 "detach": True,
1007 "network": network_name,
1008 "device_requests": [DeviceRequest(count=1, capabilities=[["gpu"]])],
1009 }
1011 self.client.containers.run(**kwargs)
1012 logger.info("ComfyUI container created on port %d", self.config.comfyui_port)
1013 return COMFYUI_CONTAINER
1015 def ensure_n8n(self, env_file: Path | None = None) -> str:
1016 """Get or create the n8n workflow automation container.
1018 Pre-wires service discovery URLs (Ollama, Kokoro, Speaches,
1019 Voice Agent, WebUI) and passes through API keys (OpenAI,
1020 Anthropic, GitHub, AWS) from *env_file* or the host environment.
1021 Credential directories (``~/.aws``, ``~/.config/gh``) are mounted
1022 read-only so n8n's AWS and GitHub nodes authenticate automatically.
1023 """
1024 container = self._get_container(N8N_CONTAINER)
1026 if container is not None:
1027 if container.status != "running":
1028 logger.info("Starting existing n8n container")
1029 container.start()
1030 return N8N_CONTAINER
1032 logger.info("Creating n8n container")
1033 self._pull_image_if_needed(N8N_IMAGE)
1034 network_name = self._ensure_llm_network()
1036 environment = build_n8n_environment(
1037 env_file=env_file,
1038 aws_profile=self.config.ai_profile,
1039 aws_region=self.config.aws_region,
1040 )
1042 workflow_dir = Path(__file__).parent / "templates" / "n8n" / "workflows"
1043 mounts = build_n8n_mounts(
1044 workflow_dir=workflow_dir if workflow_dir.is_dir() else None,
1045 )
1047 created = True
1048 self.client.containers.run(
1049 image=N8N_IMAGE,
1050 name=N8N_CONTAINER,
1051 ports={"5678/tcp": ("0.0.0.0", self.config.n8n_port)}, # nosec B104
1052 environment=environment,
1053 mounts=mounts,
1054 restart_policy={"Name": "unless-stopped"},
1055 detach=True,
1056 network=network_name,
1057 )
1059 logger.info("n8n container created on port %d", self.config.n8n_port)
1061 if created and workflow_dir.is_dir():
1062 self._seed_n8n_workflows()
1064 return N8N_CONTAINER
1066 def _seed_n8n_workflows(self) -> None:
1067 """Import starter workflow templates into a freshly-created n8n.
1069 Workflows are mounted at ``/workflows`` inside the container. We
1070 wait for n8n to be ready, then use ``n8n import:workflow`` to load
1071 each JSON file. Failures are logged but never fatal.
1072 """
1073 container = self._get_container(N8N_CONTAINER)
1074 if container is None:
1075 return
1077 # Wait for n8n to become ready (max ~30 s).
1078 for _i in range(15):
1079 try:
1080 exit_code, _ = container.exec_run("curl -sf http://localhost:5678/healthz")
1081 if exit_code == 0:
1082 break
1083 except Exception:
1084 pass
1085 time.sleep(2)
1086 else:
1087 logger.warning("n8n did not become healthy in 30 s; skipping workflow seed")
1088 return
1090 # Check for the seed marker to avoid duplicate imports.
1091 exit_code, _ = container.exec_run("test -f /home/node/.n8n/.workflows-seeded")
1092 if exit_code == 0:
1093 logger.debug("n8n workflows already seeded; skipping")
1094 return
1096 # Import each workflow template.
1097 exit_code, output = container.exec_run("ls /workflows/")
1098 if exit_code != 0:
1099 logger.debug("No /workflows directory in n8n container")
1100 return
1102 for line in output.decode().strip().splitlines():
1103 fname = line.strip()
1104 if not fname.endswith(".json"):
1105 continue
1106 logger.info("Importing n8n workflow: %s", fname)
1107 exit_code, out = container.exec_run(f"n8n import:workflow --input=/workflows/{fname}")
1108 if exit_code != 0:
1109 logger.warning("Failed to import %s: %s", fname, out.decode())
1111 # Write seed marker so we don't re-import on next restart.
1112 container.exec_run("touch /home/node/.n8n/.workflows-seeded")
1114 def exec_in_ollama(self, command: list[str]) -> str:
1115 """Run a command in the Ollama container and return stdout.
1117 Used for: ollama pull, ollama list, ollama create.
1118 """
1119 container = self._get_container(OLLAMA_CONTAINER)
1120 if container is None or container.status != "running":
1121 raise ContainerNotFoundError(OLLAMA_CONTAINER)
1123 exit_code, output = container.exec_run(
1124 cmd=command,
1125 stdout=True,
1126 stderr=True,
1127 )
1128 decoded: str = output.decode("utf-8", errors="replace")
1129 if exit_code != 0:
1130 logger.error("Command failed in ollama: %s\n%s", " ".join(command), decoded)
1131 return decoded
1133 # =========================================================================
1134 # Container lifecycle
1135 # =========================================================================
1137 def stop_container(self, name: str) -> None:
1138 """Stop a container by name."""
1139 container = self._get_container(name)
1140 if container is None:
1141 raise ContainerNotFoundError(name)
1142 if container.status == "running":
1143 container.stop()
1144 logger.info("Stopped container: %s", name)
1146 def remove_container(self, name: str) -> None:
1147 """Remove a container by name, stopping it first if running."""
1148 container = self._get_container(name)
1149 if container is None:
1150 raise ContainerNotFoundError(name)
1151 if container.status == "running":
1152 container.stop()
1153 logger.info("Stopped container: %s", name)
1154 container.remove()
1155 logger.info("Removed container: %s", name)
1157 def remove_volume(self, name: str) -> bool:
1158 """Remove a named Docker volume.
1160 Returns True if a volume was removed, False if it did not exist.
1161 """
1162 try:
1163 volume = self.client.volumes.get(name)
1164 except NotFound:
1165 return False
1166 volume.remove()
1167 logger.info("Removed volume: %s", name)
1168 return True
1170 def container_ports(self, name: str) -> dict[str, str] | None:
1171 """Get the port mappings for a container.
1173 Returns a dict mapping container ports (e.g. '3000/tcp') to host
1174 addresses (e.g. '0.0.0.0:49152'), or None if the container doesn't exist.
1175 """
1176 container = self._get_container(name)
1177 if container is None:
1178 return None
1179 container.reload()
1180 ports_data = container.attrs.get("NetworkSettings", {}).get("Ports") or {}
1181 result: dict[str, str] = {}
1182 for container_port, bindings in sorted(ports_data.items()):
1183 if bindings:
1184 binding = bindings[0]
1185 result[container_port] = f"{binding['HostIp']}:{binding['HostPort']}"
1186 return result
1188 def container_status(self, name: str) -> str | None:
1189 """Get the status of a container, or None if it doesn't exist."""
1190 container = self._get_container(name)
1191 if container is None:
1192 return None
1193 return container.status # type: ignore[no-any-return]
1195 def container_logs(self, name: str, follow: bool = False, tail: int = 100) -> None:
1196 """Print container logs. If follow=True, streams via docker CLI."""
1197 if follow:
1198 # Use docker CLI for streaming
1199 args = ["docker", "logs", "-f", name]
1200 _exec_docker(args)
1201 else:
1202 container = self._get_container(name)
1203 if container is None:
1204 raise ContainerNotFoundError(name)
1205 logs = container.logs(tail=tail).decode("utf-8", errors="replace")
1206 print(logs)
1208 # =========================================================================
1209 # Internal helpers
1210 # =========================================================================
1212 def _get_container(self, name: str) -> Container | None:
1213 """Get a container by name, or None if it doesn't exist."""
1214 try:
1215 return self.client.containers.get(name)
1216 except NotFound:
1217 return None
1219 def _container_matches_project(self, container: Container, project_dir: Path) -> bool:
1220 """Check whether a container's project mount matches *project_dir*."""
1221 resolved_project_dir = str(project_dir.resolve())
1222 mounts = container.attrs.get("Mounts", [])
1223 for mount in mounts:
1224 if mount.get("Source") == resolved_project_dir:
1225 return True
1226 return False
1228 # AUTO-UPDATE: Pre-launch tool freshness check
1229 def ensure_tool_fresh(self, container_name: str, tool_name: str) -> None:
1230 """Check if a tool is stale and update it before launch.
1232 Runs ``update-tools.sh --check <tool>`` inside the container.
1233 If stale (exit code 1), runs ``--tool <tool>`` in the foreground
1234 (blocking) which also kicks off background updates for other tools.
1236 Silently does nothing if ``update-tools.sh`` is not present in the
1237 container (backward compatibility with older images), or if
1238 ``config.skip_updates`` is True (``--skip-updates`` flag).
1239 """
1240 if self.config.skip_updates:
1241 logger.debug("Skipping tool freshness check (--skip-updates)")
1242 return
1244 update_script = "/usr/local/bin/update-tools.sh"
1246 # Check if update script exists in the container
1247 check_exists = subprocess.run(
1248 ["docker", "exec", container_name, "test", "-x", update_script],
1249 capture_output=True,
1250 )
1251 if check_exists.returncode != 0:
1252 logger.debug(
1253 "update-tools.sh not found in %s, skipping freshness check",
1254 container_name,
1255 )
1256 return
1258 # Check freshness
1259 check_result = subprocess.run(
1260 ["docker", "exec", container_name, update_script, "--check", tool_name],
1261 capture_output=True,
1262 )
1263 if check_result.returncode == 0:
1264 logger.debug("Tool %s is fresh, skipping update", tool_name)
1265 return
1267 # Tool is stale — update it in foreground (--tool also backgrounds the rest)
1268 from rich.console import Console
1270 console = Console(stderr=True)
1271 with console.status(f"[bold]Updating {tool_name}...[/bold]", spinner="dots"):
1272 update_result = subprocess.run(
1273 ["docker", "exec", container_name, update_script, "--tool", tool_name],
1274 capture_output=True,
1275 text=True,
1276 timeout=300, # 5 minute timeout
1277 )
1278 if update_result.returncode == 0:
1279 console.print(f"[green]Updated {tool_name}[/green]")
1280 else:
1281 console.print(f"[yellow]Update for {tool_name} had issues, continuing anyway[/yellow]")
1282 logger.debug("Update stderr: %s", update_result.stderr)
1284 def _build_image_if_needed(self, image: str, context_path: str) -> None:
1285 """Build a Docker image locally if it isn't already present.
1287 Used for images we don't pull from a registry (experimental
1288 components shipped as a Dockerfile in this repo). The local tag
1289 is cached between runs; a rebuild requires removing the image
1290 first (``docker rmi <tag>``).
1291 """
1292 try:
1293 self.client.images.get(image)
1294 logger.debug("Image already built: %s", image)
1295 return
1296 except ImageNotFound:
1297 pass
1299 logger.info("Building image: %s from %s ...", image, context_path)
1300 try:
1301 self.client.images.build(path=context_path, tag=image, rm=True)
1302 logger.info("Image built: %s", image)
1303 except APIError as e:
1304 raise ImagePullError(image, f"build failed: {e}") from e
1306 def _pull_image_if_needed(self, image: str) -> None:
1307 """Pull a Docker image if not available locally.
1309 For the ``latest`` tag, always pull to ensure the freshest digest
1310 since the local cache may be stale. If the pull fails but a
1311 cached copy exists, falls back to the cached version with a
1312 warning.
1313 """
1314 tag = image.rsplit(":", 1)[-1] if ":" in image else "latest"
1316 # AUTO-UPDATE: Always pull 'latest' to get fresh images
1317 if tag != "latest":
1318 try:
1319 self.client.images.get(image)
1320 logger.debug("Image already available: %s", image)
1321 return
1322 except ImageNotFound:
1323 pass
1325 logger.info("Pulling image: %s (this may take a while)...", image)
1326 try:
1327 pulled = self.client.images.pull(*image.rsplit(":", 1))
1328 self._warn_if_image_below_minimum(pulled)
1329 logger.info("Image pulled: %s", image)
1330 except APIError as e:
1331 if tag == "latest":
1332 try:
1333 self.client.images.get(image)
1334 logger.warning("Failed to pull latest image, using cached version: %s", e)
1335 return
1336 except ImageNotFound:
1337 pass
1338 raise ImagePullError(image, str(e)) from e