Coverage for src/ai_shell/container.py: 75%

1"""Docker container lifecycle management.

3Replaces docker-compose.yml by using Docker SDK to create and manage containers

4with the exact same configuration.

5"""

7from __future__ import annotations

9import json

10import logging

11import os

12import subprocess

13import sys

14import time

15from pathlib import Path

16from typing import TYPE_CHECKING, NoReturn

18from docker.errors import APIError, ImageNotFound, NotFound

19from docker.types import DeviceRequest, Mount

21import docker

22from ai_shell.defaults import (

23 COMFYUI_CONTAINER,

24 COMFYUI_DATA_VOLUME,

25 COMFYUI_IMAGE,

26 KOKORO_CONTAINER,

27 KOKORO_IMAGE_CPU,

28 KOKORO_IMAGE_GPU,

29 LLM_NETWORK,

30 N8N_CONTAINER,

31 N8N_IMAGE,

32 OLLAMA_CONTAINER,

33 OLLAMA_CPU_SHARES,

34 OLLAMA_DATA_VOLUME,

35 OLLAMA_IMAGE,

36 OLLAMA_VRAM_BUFFER_BYTES,

37 SHM_SIZE,

38 VOICE_AGENT_CONTAINER,

39 VOICE_AGENT_DATA_VOLUME,

40 VOICE_AGENT_IMAGE,

41 WEBUI_CONTAINER,

42 WEBUI_DATA_VOLUME,

43 WEBUI_IMAGE,

44 WHISPER_CONTAINER,

45 WHISPER_DATA_VOLUME,

46 WHISPER_IMAGE_CPU,

47 WHISPER_IMAGE_GPU,

48 _resolve_env,

49 build_dev_environment,

50 build_dev_mounts,

51 build_n8n_environment,

52 build_n8n_mounts,

53 dev_container_name,

54 project_dev_port,

55)

56from ai_shell.exceptions import (

57 ContainerNotFoundError,

58 DockerNotAvailableError,

59 GpuRequiredError,

60 ImagePullError,

61)

62from ai_shell.gpu import detect_gpu, get_vram_info

64if TYPE_CHECKING:

65 from docker.models.containers import Container

66 from docker.models.images import Image

68 from ai_shell.config import AiShellConfig

70logger = logging.getLogger(__name__)

73def _exec_docker(args: list[str]) -> NoReturn:

74 """Execute a docker CLI command with cross-platform TTY support.

76 Uses subprocess.run instead of os.execvp for Windows compatibility.

77 On Windows, os.execvp doesn't truly replace the process, causing TTY issues.

78 """

79 logger.debug("exec: %s", " ".join(args))

80 sys.stdout.flush()

81 sys.stderr.flush()

82 result = subprocess.run(args)

83 sys.exit(result.returncode)

86def _run_docker(args: list[str]) -> tuple[int, float]:

87 """Run a docker CLI command and return (exit_code, elapsed_seconds).

89 Unlike _exec_docker, this does NOT call sys.exit().

90 """

91 logger.debug("run: %s", " ".join(args))

92 sys.stdout.flush()

93 sys.stderr.flush()

94 start = time.monotonic()

95 result = subprocess.run(args)

96 elapsed = time.monotonic() - start

97 return result.returncode, elapsed

100def _run_docker_with_typeahead(args: list[str], typeahead: bytes) -> tuple[int, float]:

101 """Run docker exec under a PTY, pre-injecting typeahead bytes.

102

103 Used when the user typed during the slow startup phase: those bytes need to

104 be replayed into the inner process exactly as if they had been typed once

105 the shell attached. Standard subprocess inheritance can't do that because

106 we need to inject our own bytes ahead of the live stdin stream.

107 """

108 import pty

109 import select

110 import signal

111 import termios

112 import tty

113

114 logger.debug("run+pty: %s", " ".join(args))

115 sys.stdout.flush()

116 sys.stderr.flush()

117

118 master_fd, slave_fd = pty.openpty()

119 stdin_fd = sys.stdin.fileno()

120 stdout_fd = sys.stdout.fileno()

121 original_termios = termios.tcgetattr(stdin_fd)

122

123 # Match the PTY size to the host terminal so curses-based tools render correctly.

124 try:

125 import fcntl

126

127 size = fcntl.ioctl(stdout_fd, termios.TIOCGWINSZ, b"\x00" * 8)

128 fcntl.ioctl(slave_fd, termios.TIOCSWINSZ, size)

129 except OSError:

130 pass

131

132 def _on_winch(_signum: int, _frame: object) -> None:

133 try:

134 import fcntl

135

136 size = fcntl.ioctl(stdout_fd, termios.TIOCGWINSZ, b"\x00" * 8)

137 fcntl.ioctl(slave_fd, termios.TIOCSWINSZ, size)

138 except OSError:

139 pass

140

141 previous_winch = signal.signal(signal.SIGWINCH, _on_winch)

142

143 start = time.monotonic()

144 proc = subprocess.Popen(

145 args,

146 stdin=slave_fd,

147 stdout=slave_fd,

148 stderr=slave_fd,

149 close_fds=True,

150 )

151 os.close(slave_fd)

152

153 try:

154 tty.setraw(stdin_fd)

155 if typeahead:

156 os.write(master_fd, typeahead)

157

158 while True:

159 if proc.poll() is not None:

160 # Drain any final output.

161 try:

162 while True:

163 chunk = os.read(master_fd, 4096)

164 if not chunk:

165 break

166 os.write(stdout_fd, chunk)

167 except OSError:

168 pass

169 break

170 try:

171 ready, _, _ = select.select([master_fd, stdin_fd], [], [], 0.1)

172 except (OSError, ValueError):

173 break

174 if master_fd in ready:

175 try:

176 chunk = os.read(master_fd, 4096)

177 except OSError:

178 chunk = b""

179 if not chunk:

180 break

181 os.write(stdout_fd, chunk)

182 if stdin_fd in ready:

183 try:

184 chunk = os.read(stdin_fd, 4096)

185 except OSError:

186 chunk = b""

187 if chunk:

188 os.write(master_fd, chunk)

189 finally:

190 termios.tcsetattr(stdin_fd, termios.TCSADRAIN, original_termios)

191 signal.signal(signal.SIGWINCH, previous_winch)

192 try:

193 os.close(master_fd)

194 except OSError:

195 pass

196 if proc.poll() is None:

197 proc.wait()

198

199 elapsed = time.monotonic() - start

200 return proc.returncode, elapsed

201

202

203class ContainerManager:

204 """Manages Docker containers for ai-shell.

205

206 Handles the dev container (per-project) and LLM stack (host-level singletons).

207 """

208

209 def __init__(self, config: AiShellConfig) -> None:

210 self.config = config

211 try:

212 self.client = docker.from_env() # type: ignore[attr-defined]

213 self.client.ping()

214 except docker.errors.DockerException as e:

215 raise DockerNotAvailableError(

216 f"Docker is not available. Is the Docker daemon running?\n Error: {e}"

217 ) from e

218

219 # =========================================================================

220 # Dev container (per-project)

221 # =========================================================================

222

223 def resolve_dev_container(self) -> tuple[str, Container | None]:

224 """Resolve the dev container, checking both current and legacy names.

225

226 Returns ``(name, container)`` where *container* is ``None`` when no

227 matching container exists. When no container is found under either

228 name, the current hash-based name is returned so callers can use it

229 for creation.

230 """

231 name = dev_container_name(self.config.project_name, self.config.project_dir)

232 container = self._get_container(name)

233 if container is not None:

234 return name, container

235

236 legacy_name = dev_container_name(self.config.project_name)

237 legacy_container = self._get_container(legacy_name)

238 if legacy_container is not None and self._container_matches_project(

239 legacy_container, self.config.project_dir

240 ):

241 return legacy_name, legacy_container

242

243 return name, None

244

245 def ensure_dev_container(self) -> str:

246 """Get or create the dev container for the current project.

247

248 If the container exists but is stopped, it is started.

249 If it doesn't exist, it is created with the full configuration.

250 If using the ``latest`` tag and a newer image is available, the

251 existing container is replaced automatically.

252

253 Returns the container name.

254 """

255 name, container = self.resolve_dev_container()

256

257 if container is not None:

258 if self._recreate_if_image_stale(container, name):

259 # Container was removed; fall through to create a new one.

260 container = None

261 else:

262 if container.status != "running":

263 logger.info("Starting existing container: %s", name)

264 container.start()

265 return name

266

267 logger.info("Creating dev container: %s", name)

268 self._pull_image_if_needed(self.config.full_image)

269 self._create_dev_container(name)

270 return name

271

272 def _create_dev_container(self, name: str) -> Container:

273 """Create the dev container with all docker-compose config."""

274 mounts = build_dev_mounts(

275 self.config.project_dir,

276 self.config.project_name,

277 extra_node_modules_paths=self.config.node_modules_paths,

278 )

279 environment = build_dev_environment(

280 self.config.extra_env,

281 self.config.project_dir,

282 project_name=self.config.project_name,

283 aws_profile=self.config.ai_profile,

284 aws_region=self.config.aws_region,

285 )

286

287 # MOTD metadata — injected at creation time so the in-container

288 # motd.sh script can display version, container identity, and port

289 # mappings without querying Docker from inside the container.

290 from ai_shell import __version__

291

292 environment["AUGINT_SHELL_VERSION"] = __version__

293 environment["AUGINT_CONTAINER_NAME"] = name

294 environment["AUGINT_PROJECT_NAME"] = self.config.project_name

295 environment["AUGINT_DEV_PORTS"] = ",".join(

296 f"{port}:{project_dev_port(self.config.project_dir, port, self.config.project_name)}"

297 for port in self.config.dev_ports

298 )

299 environment["AUGINT_LLM_PORTS"] = ",".join(

300 [

301 f"ollama:{self.config.ollama_port}",

302 f"webui:{self.config.webui_port}",

303 f"kokoro:{self.config.kokoro_port}",

304 f"whisper:{self.config.whisper_port}",

305 f"n8n:{self.config.n8n_port}",

306 f"comfyui:{self.config.comfyui_port}",

307 ]

308 )

309

310 # Add any extra volumes from config

311 for vol_spec in self.config.extra_volumes:

312 parts = vol_spec.split(":")

313 if len(parts) >= 2:

314 source, target = parts[0], parts[1]

315 read_only = len(parts) > 2 and parts[2] == "ro"

316 mounts.append(

317 Mount(

318 target=target,

319 source=source,

320 type="bind",

321 read_only=read_only,

322 )

323 )

324

325 container: Container = self.client.containers.run(

326 image=self.config.full_image,

327 name=name,

328 mounts=mounts,

329 environment=environment,

330 working_dir=f"/root/projects/{self.config.project_name}",

331 command="tail -f /dev/null",

332 stdin_open=True,

333 tty=True,

334 shm_size=SHM_SIZE,

335 init=True,

336 extra_hosts={"host.docker.internal": "host-gateway"},

337 ports={

338 f"{port}/tcp": (

339 (

340 "0.0.0.0",

341 project_dev_port(self.config.project_dir, port, self.config.project_name),

342 ) # nosec B104

343 if self.config.project_dir

344 else None

345 )

346 for port in self.config.dev_ports

347 },

348 detach=True,

349 )

350 logger.info("Container created: %s", name)

351

352 subprocess.run(

353 [

354 "docker",

355 "exec",

356 name,

357 "sh",

358 "-c",

359 "echo 'export PATH=\"/root/.local/bin:/root/.opencode/bin:$PATH\"'"

360 " > /etc/profile.d/ai-shell-path.sh",

361 ],

362 check=False,

363 capture_output=True,

364 )

365

366 return container

367

368 def exec_interactive(

369 self,

370 container_name: str,

371 command: list[str],

372 extra_env: dict[str, str] | None = None,

373 workdir: str | None = None,

374 typeahead: bytes = b"",

375 ) -> NoReturn:

376 """Execute an interactive command in a container.

377

378 Uses subprocess.run for cross-platform TTY compatibility.

379 Detects whether stdin is a TTY to decide on -i/-t flags.

380 If *workdir* is given it is passed as ``-w`` to ``docker exec``.

381 When *typeahead* is non-empty and stdin is a TTY, runs the docker exec

382 under a PTY so the captured bytes can be replayed into the inner process.

383 """

384 args = ["docker", "exec"]

385

386 if sys.stdin.isatty():

387 args.append("-it")

388

389 if workdir:

390 args.extend(["-w", workdir])

391

392 if extra_env:

393 for key, value in extra_env.items():

394 args.extend(["-e", f"{key}={value}"])

395

396 args.append(container_name)

397 args.extend(command)

398

399 if typeahead and sys.platform != "win32" and sys.stdin.isatty():

400 exit_code, _ = _run_docker_with_typeahead(args, typeahead)

401 sys.exit(exit_code)

402

403 _exec_docker(args)

404

405 def run_interactive(

406 self,

407 container_name: str,

408 command: list[str],

409 extra_env: dict[str, str] | None = None,

410 workdir: str | None = None,

411 typeahead: bytes = b"",

412 ) -> tuple[int, float]:

413 """Execute an interactive command, returning (exit_code, elapsed_seconds).

414

415 Same as exec_interactive but does not call sys.exit().

416 Used for retry logic (e.g., claude -c fallback).

417 If *workdir* is given it is passed as ``-w`` to ``docker exec``.

418 When *typeahead* is non-empty and stdin is a TTY, runs the docker exec

419 under a PTY so the captured bytes can be replayed into the inner process.

420 """

421 args = ["docker", "exec"]

422

423 if sys.stdin.isatty():

424 args.append("-it")

425

426 if workdir:

427 args.extend(["-w", workdir])

428

429 if extra_env:

430 for key, value in extra_env.items():

431 args.extend(["-e", f"{key}={value}"])

432

433 args.append(container_name)

434 args.extend(command)

435

436 if typeahead and sys.platform != "win32" and sys.stdin.isatty():

437 return _run_docker_with_typeahead(args, typeahead)

438

439 return _run_docker(args)

440

441 def exec_detached(

442 self,

443 container_name: str,

444 command: list[str],

445 extra_env: dict[str, str] | None = None,

446 workdir: str | None = None,

447 ) -> subprocess.CompletedProcess[bytes]:

448 """Run a command in a container without waiting (docker exec -d)."""

449 args = ["docker", "exec", "-d"]

450 if workdir:

451 args.extend(["-w", workdir])

452 if extra_env:

453 for key, value in extra_env.items():

454 args.extend(["-e", f"{key}={value}"])

455 args.append(container_name)

456 args.extend(command)

457 logger.debug("exec-detached: %s", " ".join(args))

458 return subprocess.run(args, check=True)

459

460 # =========================================================================

461 # LLM stack (host-level singletons)

462 # =========================================================================

463

464 @staticmethod

465 def _container_has_gpu(container: Container) -> bool:

466 """Return True if *container* was created with a GPU device request."""

467 device_requests = container.attrs.get("HostConfig", {}).get("DeviceRequests") or []

468 return any(

469 "gpu" in (cap for caps in (dr.get("Capabilities") or []) for cap in caps)

470 for dr in device_requests

471 )

472

473 def _recreate_if_gpu_changed(

474 self, container: Container, gpu_available: bool, label: str

475 ) -> bool:

476 """Remove *container* if its GPU state doesn't match *gpu_available*.

477

478 Returns True if the container was removed (caller must recreate).

479 """

480 has_gpu = self._container_has_gpu(container)

481 if has_gpu == gpu_available:

482 return False

483 want = "GPU" if gpu_available else "CPU-only"

484 had = "GPU" if has_gpu else "CPU-only"

485 logger.warning(

486 "%s container has %s but system now offers %s — recreating",

487 label,

488 had,

489 want,

490 )

491 container.remove(force=True)

492 return True

493

494 def _recreate_if_image_stale(self, container: Container, name: str) -> bool:

495 """Pull the latest image and recreate the container if it is outdated.

496

497 Only acts when the configured tag is ``latest``. For pinned

498 version tags the image is immutable so staleness doesn't apply.

499

500 The pull is skipped when ``config.image_pull_cache_ttl`` seconds have

501 not yet elapsed since the last successful pull (default 15 min) so

502 normal launches don't pay the network round-trip.

503

504 Returns True if the container was removed (caller must recreate).

505 """

506 from ai_shell.cache import is_fresh, mark_fresh

507

508 tag = self.config.image_tag

509 if tag != "latest":

510 return False

511

512 image_ref = self.config.full_image

513 if is_fresh("image-pull", image_ref, self.config.image_pull_cache_ttl):

514 logger.debug("Image-pull cache fresh for %s — skipping pull", image_ref)

515 return False

516

517 try:

518 pulled = self.client.images.pull(*image_ref.rsplit(":", 1))

519 except APIError:

520 logger.debug("Could not pull %s — skipping staleness check", image_ref)

521 return False

522

523 mark_fresh("image-pull", image_ref)

524 self._warn_if_image_below_minimum(pulled)

525

526 container_image_id = container.image.id

527 pulled_image_id = pulled.id

528

529 if container_image_id == pulled_image_id:

530 return False

531

532 logger.warning(

533 "Dev container %s uses an outdated image — recreating with %s",

534 name,

535 image_ref,

536 )

537 container.remove(force=True)

538 return True

539

540 @staticmethod

541 def _warn_if_image_below_minimum(image: Image) -> None:

542 """Log a warning if the pulled image version is below the CLI version."""

543 from ai_shell import __version__

544

545 labels = image.labels or {}

546 image_version_str = labels.get("org.opencontainers.image.version", "")

547 if not image_version_str:

548 return

549

550 def _parse_version(v: str) -> tuple[int, ...] | None:

551 try:

552 return tuple(int(x) for x in v.split("."))

553 except (ValueError, AttributeError):

554 return None

555

556 image_ver = _parse_version(image_version_str)

557 cli_ver = _parse_version(__version__)

558 if image_ver is None or cli_ver is None:

559 return

560

561 if image_ver < cli_ver:

562 logger.warning(

563 "Container image version %s is older than CLI version %s "

564 "— rebuild the image to get the latest tools",

565 image_version_str,

566 __version__,

567 )

568

569 def _ensure_llm_network(self) -> str:

570 """Get or create the shared Docker network for the LLM stack."""

571 try:

572 self.client.networks.get(LLM_NETWORK)

573 except NotFound:

574 logger.info("Creating LLM network: %s", LLM_NETWORK)

575 self.client.networks.create(LLM_NETWORK, driver="bridge")

576 return LLM_NETWORK

577

578 def ensure_ollama(self) -> str:

579 """Get or create the Ollama container with GPU auto-detection.

580

581 Recreates the container if GPU availability has changed since creation.

582 """

583 gpu_available = detect_gpu()

584 container = self._get_container(OLLAMA_CONTAINER)

585

586 if container is not None:

587 if self._recreate_if_gpu_changed(container, gpu_available, "Ollama"):

588 pass # fall through to creation

589 else:

590 if container.status != "running":

591 logger.info("Starting existing Ollama container")

592 container.start()

593 return OLLAMA_CONTAINER

594

595 logger.info("Creating Ollama container")

596 self._pull_image_if_needed(OLLAMA_IMAGE)

597 network_name = self._ensure_llm_network()

598 device_requests = None

599 env: dict[str, str] = {

600 "OLLAMA_CONTEXT_LENGTH": str(self.config.context_size),

601 # Flash attention trims activation memory at no quality cost and

602 # is a prerequisite for KV cache quantization.

603 "OLLAMA_FLASH_ATTENTION": "1",

604 # Quantize the KV cache to 8-bit; near-lossless, halves cache

605 # size. Combined with Ollama's dynamic GPU/CPU offload, this

606 # buys significant headroom for large models without hard-pinning

607 # a num_gpu value in any Modelfile.

608 "OLLAMA_KV_CACHE_TYPE": "q8_0",

609 }

610 if gpu_available:

611 device_requests = [DeviceRequest(count=1, capabilities=[["gpu"]])]

612 vram = get_vram_info()

613 if vram:

614 overhead = vram["used"] + OLLAMA_VRAM_BUFFER_BYTES

615 env["OLLAMA_GPU_OVERHEAD"] = str(overhead)

616 logger.info(

617 "VRAM: %.1f GiB total, %.1f GiB free. Reserving %.1f GiB overhead for Ollama.",

618 vram["total"] / 1024**3,

619 vram["free"] / 1024**3,

620 overhead / 1024**3,

621 )

622 else:

623 logger.info("GPU detected - Ollama will use NVIDIA GPU")

624 else:

625 logger.warning("No GPU detected - Ollama will run on CPU (slower inference)")

626

627 kwargs: dict = {

628 "image": OLLAMA_IMAGE,

629 "name": OLLAMA_CONTAINER,

630 "ports": {"11434/tcp": ("0.0.0.0", self.config.ollama_port)}, # nosec B104

631 "mounts": [

632 Mount(

633 target="/root/.ollama",

634 source=OLLAMA_DATA_VOLUME,

635 type="volume",

636 )

637 ],

638 "restart_policy": {"Name": "unless-stopped"},

639 "detach": True,

640 "network": network_name,

641 "cpu_shares": OLLAMA_CPU_SHARES,

642 }

643

644 if device_requests:

645 kwargs["device_requests"] = device_requests

646 if env:

647 kwargs["environment"] = env

648

649 self.client.containers.run(**kwargs)

650 logger.info("Ollama container created on port %d", self.config.ollama_port)

651 return OLLAMA_CONTAINER

652

653 def ensure_webui(

654 self,

655 voice_enabled: bool = False,

656 whisper_enabled: bool = False,

657 image_gen_enabled: bool = False,

658 env_file: Path | None = None,

659 ) -> str:

660 """Get or create the Open WebUI container.

661

662 When *voice_enabled* is True, pre-wires Kokoro TTS as the speech

663 backend. When *whisper_enabled* is True, pre-wires Speaches STT

664 as the transcription backend. When *image_gen_enabled* is True,

665 pre-wires ComfyUI as the image-generation backend. API keys from

666 *env_file* (or host environment) are passed through so WebUI can

667 offer external LLM providers alongside Ollama.

668 """

669 container = self._get_container(WEBUI_CONTAINER)

670

671 if container is not None:

672 if container.status != "running":

673 logger.info("Starting existing WebUI container")

674 container.start()

675 return WEBUI_CONTAINER

676

677 logger.info("Creating Open WebUI container")

678 self._pull_image_if_needed(WEBUI_IMAGE)

679 network_name = self._ensure_llm_network()

680

681 from dotenv import dotenv_values

682

683 from ai_shell.defaults import _resolve_env

684

685 dotenv: dict[str, str | None] = {}

686 if env_file is not None:

687 dotenv = dotenv_values(env_file)

688

689 environment: dict[str, str] = {

690 "OLLAMA_BASE_URL": f"http://{OLLAMA_CONTAINER}:11434",

691 "WEBUI_AUTH": "false",

692 # DEFAULT_MODELS is a PersistentConfig: env seeds the DB on first

693 # boot and UI edits win after that. Point new chats at the

694 # primary chat slot; users can pick the secondary (uncensored)

695 # from the model dropdown.

696 "DEFAULT_MODELS": self.config.primary_chat_model,

697 }

698 if voice_enabled:

699 environment.update(

700 {

701 "AUDIO_TTS_ENGINE": "openai",

702 "AUDIO_TTS_OPENAI_API_BASE_URL": f"http://{KOKORO_CONTAINER}:8880/v1",

703 "AUDIO_TTS_OPENAI_API_KEY": "dummy",

704 "AUDIO_TTS_MODEL": "kokoro",

705 "AUDIO_TTS_VOICE": self.config.kokoro_voice,

706 }

707 )

708 if whisper_enabled:

709 environment.update(

710 {

711 "AUDIO_STT_ENGINE": "openai",

712 "AUDIO_STT_OPENAI_API_BASE_URL": f"http://{WHISPER_CONTAINER}:8000/v1",

713 "AUDIO_STT_OPENAI_API_KEY": "dummy",

714 "AUDIO_STT_MODEL": self.config.whisper_model,

715 }

716 )

717 if image_gen_enabled:

718 # ENABLE_IMAGE_GENERATION + IMAGE_GENERATION_ENGINE=comfyui are

719 # PersistentConfig keys; they seed the DB on first boot. Users can

720 # later override the default workflow or model via Settings > Images.

721 environment.update(

722 {

723 "ENABLE_IMAGE_GENERATION": "true",

724 "IMAGE_GENERATION_ENGINE": "comfyui",

725 "COMFYUI_BASE_URL": f"http://{COMFYUI_CONTAINER}:8188",

726 "IMAGE_SIZE": "1024x1024",

727 "IMAGE_STEPS": "25",

728 }

729 )

730

731 # External LLM providers — pass through API keys when available.

732 openai_urls: list[str] = []

733 openai_keys: list[str] = []

734

735 openai_key = _resolve_env(dotenv, "OPENAI_API_KEY")

736 if openai_key:

737 openai_urls.append("https://api.openai.com/v1")

738 openai_keys.append(openai_key)

739

740 gh_token = _resolve_env(dotenv, "GH_TOKEN")

741 if gh_token:

742 openai_urls.append("https://models.inference.ai.azure.com/v1")

743 openai_keys.append(gh_token)

744

745 if openai_urls:

746 environment["OPENAI_API_BASE_URLS"] = ";".join(openai_urls)

747 environment["OPENAI_API_KEYS"] = ";".join(openai_keys)

748

749 anthropic_key = _resolve_env(dotenv, "ANTHROPIC_API_KEY")

750 if anthropic_key:

751 environment["ANTHROPIC_API_KEY"] = anthropic_key

752

753 self.client.containers.run(

754 image=WEBUI_IMAGE,

755 name=WEBUI_CONTAINER,

756 ports={"8080/tcp": ("0.0.0.0", self.config.webui_port)}, # nosec B104

757 environment=environment,

758 mounts=[

759 Mount(

760 target="/app/backend/data",

761 source=WEBUI_DATA_VOLUME,

762 type="volume",

763 )

764 ],

765 restart_policy={"Name": "unless-stopped"},

766 detach=True,

767 network=network_name,

768 )

769

770 logger.info("Open WebUI container created on port %d", self.config.webui_port)

771 return WEBUI_CONTAINER

772

773 def ensure_kokoro(self) -> str:

774 """Get or create the Kokoro-FastAPI (local TTS) container.

775

776 Exposes an OpenAI-compatible ``/v1/audio/speech`` endpoint on the

777 configured port. GPU image is used when NVIDIA is detected;

778 otherwise the CPU image. Recreates if GPU availability has changed.

779 """

780 gpu_available = detect_gpu()

781 container = self._get_container(KOKORO_CONTAINER)

782 if container is not None:

783 if self._recreate_if_gpu_changed(container, gpu_available, "Kokoro"):

784 pass # fall through to creation

785 else:

786 if container.status != "running":

787 logger.info("Starting existing Kokoro container")

788 container.start()

789 return KOKORO_CONTAINER

790 image = KOKORO_IMAGE_GPU if gpu_available else KOKORO_IMAGE_CPU

791 logger.info("Creating Kokoro container (%s)", "GPU" if gpu_available else "CPU")

792 self._pull_image_if_needed(image)

793 network_name = self._ensure_llm_network()

794

795 kwargs: dict = {

796 "image": image,

797 "name": KOKORO_CONTAINER,

798 "ports": {"8880/tcp": ("0.0.0.0", self.config.kokoro_port)}, # nosec B104

799 "restart_policy": {"Name": "unless-stopped"},

800 "detach": True,

801 "network": network_name,

802 }

803 if gpu_available:

804 kwargs["device_requests"] = [DeviceRequest(count=1, capabilities=[["gpu"]])]

805

806 self.client.containers.run(**kwargs)

807 logger.info("Kokoro container created on port %d", self.config.kokoro_port)

808 return KOKORO_CONTAINER

809

810 def ensure_whisper(self) -> str:

811 """Get or create the Speaches (local STT) container.

812

813 Exposes an OpenAI-compatible ``/v1/audio/transcriptions`` endpoint on

814 the configured port. GPU image is used when NVIDIA is detected;

815 otherwise the CPU image. Recreates if GPU availability has changed.

816 The Hugging Face model cache persists in a named volume (Speaches runs

817 as ``ubuntu`` UID 1000 — a named volume inherits the correct ownership;

818 bind-mounting a host dir here would require an explicit chown).

819 """

820 gpu_available = detect_gpu()

821 container = self._get_container(WHISPER_CONTAINER)

822 if container is not None:

823 if self._recreate_if_gpu_changed(container, gpu_available, "Whisper"):

824 pass # fall through to creation

825 else:

826 if container.status != "running":

827 logger.info("Starting existing Whisper container")

828 container.start()

829 return WHISPER_CONTAINER

830 image = WHISPER_IMAGE_GPU if gpu_available else WHISPER_IMAGE_CPU

831 logger.info("Creating Whisper container (%s)", "GPU" if gpu_available else "CPU")

832 self._pull_image_if_needed(image)

833 network_name = self._ensure_llm_network()

834

835 # PRELOAD_MODELS uses pydantic-settings JSON array syntax, not CSV.

836 # json.dumps guarantees correct escaping for any model id.

837 environment = {

838 "WHISPER__INFERENCE_DEVICE": "auto",

839 "PRELOAD_MODELS": json.dumps([self.config.whisper_model]),

840 }

841

842 kwargs: dict = {

843 "image": image,

844 "name": WHISPER_CONTAINER,

845 "ports": {"8000/tcp": ("0.0.0.0", self.config.whisper_port)}, # nosec B104

846 "environment": environment,

847 "mounts": [

848 Mount(

849 target="/home/ubuntu/.cache/huggingface/hub",

850 source=WHISPER_DATA_VOLUME,

851 type="volume",

852 )

853 ],

854 "restart_policy": {"Name": "unless-stopped"},

855 "detach": True,

856 "network": network_name,

857 }

858 if gpu_available:

859 kwargs["device_requests"] = [DeviceRequest(count=1, capabilities=[["gpu"]])]

860

861 self.client.containers.run(**kwargs)

862 logger.info("Whisper container created on port %d", self.config.whisper_port)

863 return WHISPER_CONTAINER

864

865 def ensure_voice_agent(self) -> str:

866 """Get or create the voice-agent container.

867

868 The image is **built locally** from ``docker/voice-agent/`` on first

869 call because Phase 2 doesn't publish it. Later phases may switch to

870 a pulled tag. Phase 2 scope: no filesystem / auth / provider-key

871 mounts — those land in Phases 3-4. A named volume is mounted at

872 ``/data`` so the Phase 5 sqlite file will survive container

873 recreations from the start. Service-discovery URLs for sibling

874 stacks (ComfyUI, etc.) are exported so later phases can dispatch

875 tool calls without re-reading config.

876 """

877 container = self._get_container(VOICE_AGENT_CONTAINER)

878 if container is not None:

879 if container.status != "running":

880 logger.info("Starting existing voice-agent container")

881 container.start()

882 return VOICE_AGENT_CONTAINER

883

884 logger.info("Creating voice-agent container")

885 self._build_image_if_needed(VOICE_AGENT_IMAGE, self._voice_agent_build_context())

886 network_name = self._ensure_llm_network()

887

888 kwargs: dict = {

889 "image": VOICE_AGENT_IMAGE,

890 "name": VOICE_AGENT_CONTAINER,

891 "ports": {"8000/tcp": ("0.0.0.0", self.config.voice_agent.port)}, # nosec B104

892 "environment": {

893 "COMFYUI_BASE_URL": f"http://{COMFYUI_CONTAINER}:8188",

894 },

895 "mounts": [

896 Mount(

897 target="/data",

898 source=VOICE_AGENT_DATA_VOLUME,

899 type="volume",

900 )

901 ],

902 "restart_policy": {"Name": "unless-stopped"},

903 "detach": True,

904 "network": network_name,

905 }

906

907 self.client.containers.run(**kwargs)

908 logger.info("voice-agent container created on port %d", self.config.voice_agent.port)

909 return VOICE_AGENT_CONTAINER

910

911 @staticmethod

912 def _voice_agent_build_context() -> str:

913 """Return the path to the voice-agent Dockerfile build context."""

914 # Package layout: src/ai_shell/container.py — the Dockerfile lives in

915 # <repo root>/docker/voice-agent. When installed as a wheel the user

916 # is expected to have the source checked out; voice-agent is

917 # experimental and locally built for now.

918 here = Path(__file__).resolve()

919 return str(here.parents[2] / "docker" / "voice-agent")

920

921 def ensure_comfyui(self, env_file: Path | None = None) -> str:

922 """Get or create the ComfyUI image-generation container.

923

924 GPU-required (the ai-dock image has no CPU variant). On first

925 boot, ai-dock runs the bind-mounted provisioning script which

926 downloads SDXL unconditionally and FLUX.1-dev when ``HF_TOKEN``

927 is present in *env_file* or the host environment. Model files

928 persist in a named volume so subsequent containers start without

929 re-downloading ~25 GB. Recreates the container when GPU

930 availability toggles, matching the Kokoro/Whisper pattern.

931 """

932 from dotenv import dotenv_values

933

934 gpu_available = detect_gpu()

935 container = self._get_container(COMFYUI_CONTAINER)

936 if container is not None:

937 if self._recreate_if_gpu_changed(container, gpu_available, "ComfyUI"):

938 pass # fall through to creation

939 else:

940 if container.status != "running":

941 logger.info("Starting existing ComfyUI container")

942 container.start()

943 return COMFYUI_CONTAINER

944

945 if not gpu_available:

946 raise GpuRequiredError("ComfyUI")

947

948 logger.info("Creating ComfyUI container")

949 self._pull_image_if_needed(COMFYUI_IMAGE)

950 network_name = self._ensure_llm_network()

951

952 dotenv: dict[str, str | None] = {}

953 if env_file is not None:

954 dotenv = dotenv_values(env_file)

955

956 hf_token = _resolve_env(dotenv, "HF_TOKEN") or _resolve_env(

957 dotenv, "HUGGING_FACE_HUB_TOKEN"

958 )

959

960 environment: dict[str, str] = {

961 # --lowvram keeps FLUX's 12B weights offloading through CPU RAM so

962 # Ollama can keep a chat model resident on the same GPU. --listen

963 # binds on 0.0.0.0 so other containers on the LLM network can reach

964 # the API.

965 "CLI_ARGS": "--lowvram --listen 0.0.0.0",

966 # ai-dock runs PROVISIONING_SCRIPT once per workspace. We bind-mount

967 # our own script at a known path rather than hosting one remotely.

968 "PROVISIONING_SCRIPT": "/opt/augint/provision.sh",

969 # Local-dev deployment: disable ai-dock's Caddy basic-auth layer so

970 # the port-8188 service is reachable without redirect-to-/login.

971 # Matches WEBUI_AUTH=false on Open WebUI.

972 "WEB_ENABLE_AUTH": "false",

973 "CF_QUICK_TUNNELS": "false",

974 }

975 if hf_token:

976 # ai-dock's provisioner reads HF_TOKEN; upstream HF libs read

977 # HUGGING_FACE_HUB_TOKEN. Set both to avoid edge cases.

978 environment["HF_TOKEN"] = hf_token

979 environment["HUGGING_FACE_HUB_TOKEN"] = hf_token

980

981 provision_path = Path(__file__).parent / "assets" / "comfyui" / "provision.sh"

982 mounts: list[Mount] = [

983 Mount(

984 target="/opt/ComfyUI/models",

985 source=COMFYUI_DATA_VOLUME,

986 type="volume",

987 )

988 ]

989 if provision_path.is_file():

990 mounts.append(

991 Mount(

992 target="/opt/augint/provision.sh",

993 source=str(provision_path),

994 type="bind",

995 read_only=True,

996 )

997 )

998

999 kwargs: dict = {

1000 "image": COMFYUI_IMAGE,

1001 "name": COMFYUI_CONTAINER,

1002 "ports": {"8188/tcp": ("0.0.0.0", self.config.comfyui_port)}, # nosec B104

1003 "environment": environment,

1004 "mounts": mounts,

1005 "restart_policy": {"Name": "unless-stopped"},

1006 "detach": True,

1007 "network": network_name,

1008 "device_requests": [DeviceRequest(count=1, capabilities=[["gpu"]])],

1009 }

1010

1011 self.client.containers.run(**kwargs)

1012 logger.info("ComfyUI container created on port %d", self.config.comfyui_port)

1013 return COMFYUI_CONTAINER

1014

1015 def ensure_n8n(self, env_file: Path | None = None) -> str:

1016 """Get or create the n8n workflow automation container.

1017

1018 Pre-wires service discovery URLs (Ollama, Kokoro, Speaches,

1019 Voice Agent, WebUI) and passes through API keys (OpenAI,

1020 Anthropic, GitHub, AWS) from *env_file* or the host environment.

1021 Credential directories (``~/.aws``, ``~/.config/gh``) are mounted

1022 read-only so n8n's AWS and GitHub nodes authenticate automatically.

1023 """

1024 container = self._get_container(N8N_CONTAINER)

1025

1026 if container is not None:

1027 if container.status != "running":

1028 logger.info("Starting existing n8n container")

1029 container.start()

1030 return N8N_CONTAINER

1031

1032 logger.info("Creating n8n container")

1033 self._pull_image_if_needed(N8N_IMAGE)

1034 network_name = self._ensure_llm_network()

1035

1036 environment = build_n8n_environment(

1037 env_file=env_file,

1038 aws_profile=self.config.ai_profile,

1039 aws_region=self.config.aws_region,

1040 )

1041

1042 workflow_dir = Path(__file__).parent / "templates" / "n8n" / "workflows"

1043 mounts = build_n8n_mounts(

1044 workflow_dir=workflow_dir if workflow_dir.is_dir() else None,

1045 )

1046

1047 created = True

1048 self.client.containers.run(

1049 image=N8N_IMAGE,

1050 name=N8N_CONTAINER,

1051 ports={"5678/tcp": ("0.0.0.0", self.config.n8n_port)}, # nosec B104

1052 environment=environment,

1053 mounts=mounts,

1054 restart_policy={"Name": "unless-stopped"},

1055 detach=True,

1056 network=network_name,

1057 )

1058

1059 logger.info("n8n container created on port %d", self.config.n8n_port)

1060

1061 if created and workflow_dir.is_dir():

1062 self._seed_n8n_workflows()

1063

1064 return N8N_CONTAINER

1065

1066 def _seed_n8n_workflows(self) -> None:

1067 """Import starter workflow templates into a freshly-created n8n.

1068

1069 Workflows are mounted at ``/workflows`` inside the container. We

1070 wait for n8n to be ready, then use ``n8n import:workflow`` to load

1071 each JSON file. Failures are logged but never fatal.

1072 """

1073 container = self._get_container(N8N_CONTAINER)

1074 if container is None:

1075 return

1076

1077 # Wait for n8n to become ready (max ~30 s).

1078 for _i in range(15):

1079 try:

1080 exit_code, _ = container.exec_run("curl -sf http://localhost:5678/healthz")

1081 if exit_code == 0:

1082 break

1083 except Exception:

1084 pass

1085 time.sleep(2)

1086 else:

1087 logger.warning("n8n did not become healthy in 30 s; skipping workflow seed")

1088 return

1089

1090 # Check for the seed marker to avoid duplicate imports.

1091 exit_code, _ = container.exec_run("test -f /home/node/.n8n/.workflows-seeded")

1092 if exit_code == 0:

1093 logger.debug("n8n workflows already seeded; skipping")

1094 return

1095

1096 # Import each workflow template.

1097 exit_code, output = container.exec_run("ls /workflows/")

1098 if exit_code != 0:

1099 logger.debug("No /workflows directory in n8n container")

1100 return

1101

1102 for line in output.decode().strip().splitlines():

1103 fname = line.strip()

1104 if not fname.endswith(".json"):

1105 continue

1106 logger.info("Importing n8n workflow: %s", fname)

1107 exit_code, out = container.exec_run(f"n8n import:workflow --input=/workflows/{fname}")

1108 if exit_code != 0:

1109 logger.warning("Failed to import %s: %s", fname, out.decode())

1110

1111 # Write seed marker so we don't re-import on next restart.

1112 container.exec_run("touch /home/node/.n8n/.workflows-seeded")

1113

1114 def exec_in_ollama(self, command: list[str]) -> str:

1115 """Run a command in the Ollama container and return stdout.

1116

1117 Used for: ollama pull, ollama list, ollama create.

1118 """

1119 container = self._get_container(OLLAMA_CONTAINER)

1120 if container is None or container.status != "running":

1121 raise ContainerNotFoundError(OLLAMA_CONTAINER)

1122

1123 exit_code, output = container.exec_run(

1124 cmd=command,

1125 stdout=True,

1126 stderr=True,

1127 )

1128 decoded: str = output.decode("utf-8", errors="replace")

1129 if exit_code != 0:

1130 logger.error("Command failed in ollama: %s\n%s", " ".join(command), decoded)

1131 return decoded

1132

1133 # =========================================================================

1134 # Container lifecycle

1135 # =========================================================================

1136

1137 def stop_container(self, name: str) -> None:

1138 """Stop a container by name."""

1139 container = self._get_container(name)

1140 if container is None:

1141 raise ContainerNotFoundError(name)

1142 if container.status == "running":

1143 container.stop()

1144 logger.info("Stopped container: %s", name)

1145

1146 def remove_container(self, name: str) -> None:

1147 """Remove a container by name, stopping it first if running."""

1148 container = self._get_container(name)

1149 if container is None:

1150 raise ContainerNotFoundError(name)

1151 if container.status == "running":

1152 container.stop()

1153 logger.info("Stopped container: %s", name)

1154 container.remove()

1155 logger.info("Removed container: %s", name)

1156

1157 def remove_volume(self, name: str) -> bool:

1158 """Remove a named Docker volume.

1159

1160 Returns True if a volume was removed, False if it did not exist.

1161 """

1162 try:

1163 volume = self.client.volumes.get(name)

1164 except NotFound:

1165 return False

1166 volume.remove()

1167 logger.info("Removed volume: %s", name)

1168 return True

1169

1170 def container_ports(self, name: str) -> dict[str, str] | None:

1171 """Get the port mappings for a container.

1172

1173 Returns a dict mapping container ports (e.g. '3000/tcp') to host

1174 addresses (e.g. '0.0.0.0:49152'), or None if the container doesn't exist.

1175 """

1176 container = self._get_container(name)

1177 if container is None:

1178 return None

1179 container.reload()

1180 ports_data = container.attrs.get("NetworkSettings", {}).get("Ports") or {}

1181 result: dict[str, str] = {}

1182 for container_port, bindings in sorted(ports_data.items()):

1183 if bindings:

1184 binding = bindings[0]

1185 result[container_port] = f"{binding['HostIp']}:{binding['HostPort']}"

1186 return result

1187

1188 def container_status(self, name: str) -> str | None:

1189 """Get the status of a container, or None if it doesn't exist."""

1190 container = self._get_container(name)

1191 if container is None:

1192 return None

1193 return container.status # type: ignore[no-any-return]

1194

1195 def container_logs(self, name: str, follow: bool = False, tail: int = 100) -> None:

1196 """Print container logs. If follow=True, streams via docker CLI."""

1197 if follow:

1198 # Use docker CLI for streaming

1199 args = ["docker", "logs", "-f", name]

1200 _exec_docker(args)

1201 else:

1202 container = self._get_container(name)

1203 if container is None:

1204 raise ContainerNotFoundError(name)

1205 logs = container.logs(tail=tail).decode("utf-8", errors="replace")

1206 print(logs)

1207

1208 # =========================================================================

1209 # Internal helpers

1210 # =========================================================================

1211

1212 def _get_container(self, name: str) -> Container | None:

1213 """Get a container by name, or None if it doesn't exist."""

1214 try:

1215 return self.client.containers.get(name)

1216 except NotFound:

1217 return None

1218

1219 def _container_matches_project(self, container: Container, project_dir: Path) -> bool:

1220 """Check whether a container's project mount matches *project_dir*."""

1221 resolved_project_dir = str(project_dir.resolve())

1222 mounts = container.attrs.get("Mounts", [])

1223 for mount in mounts:

1224 if mount.get("Source") == resolved_project_dir:

1225 return True

1226 return False

1227

1228 # AUTO-UPDATE: Pre-launch tool freshness check

1229 def ensure_tool_fresh(self, container_name: str, tool_name: str) -> None:

1230 """Check if a tool is stale and update it before launch.

1231

1232 Runs ``update-tools.sh --check <tool>`` inside the container.

1233 If stale (exit code 1), runs ``--tool <tool>`` in the foreground

1234 (blocking) which also kicks off background updates for other tools.

1235

1236 Silently does nothing if ``update-tools.sh`` is not present in the

1237 container (backward compatibility with older images), or if

1238 ``config.skip_updates`` is True (``--skip-updates`` flag).

1239 """

1240 if self.config.skip_updates:

1241 logger.debug("Skipping tool freshness check (--skip-updates)")

1242 return

1243

1244 update_script = "/usr/local/bin/update-tools.sh"

1245

1246 # Check if update script exists in the container

1247 check_exists = subprocess.run(

1248 ["docker", "exec", container_name, "test", "-x", update_script],

1249 capture_output=True,

1250 )

1251 if check_exists.returncode != 0:

1252 logger.debug(

1253 "update-tools.sh not found in %s, skipping freshness check",

1254 container_name,

1255 )

1256 return

1257

1258 # Check freshness

1259 check_result = subprocess.run(

1260 ["docker", "exec", container_name, update_script, "--check", tool_name],

1261 capture_output=True,

1262 )

1263 if check_result.returncode == 0:

1264 logger.debug("Tool %s is fresh, skipping update", tool_name)

1265 return

1266

1267 # Tool is stale — update it in foreground (--tool also backgrounds the rest)

1268 from rich.console import Console

1269

1270 console = Console(stderr=True)

1271 with console.status(f"[bold]Updating {tool_name}...[/bold]", spinner="dots"):

1272 update_result = subprocess.run(

1273 ["docker", "exec", container_name, update_script, "--tool", tool_name],

1274 capture_output=True,

1275 text=True,

1276 timeout=300, # 5 minute timeout

1277 )

1278 if update_result.returncode == 0:

1279 console.print(f"[green]Updated {tool_name}[/green]")

1280 else:

1281 console.print(f"[yellow]Update for {tool_name} had issues, continuing anyway[/yellow]")

1282 logger.debug("Update stderr: %s", update_result.stderr)

1283

1284 def _build_image_if_needed(self, image: str, context_path: str) -> None:

1285 """Build a Docker image locally if it isn't already present.

1286

1287 Used for images we don't pull from a registry (experimental

1288 components shipped as a Dockerfile in this repo). The local tag

1289 is cached between runs; a rebuild requires removing the image

1290 first (``docker rmi <tag>``).

1291 """

1292 try:

1293 self.client.images.get(image)

1294 logger.debug("Image already built: %s", image)

1295 return

1296 except ImageNotFound:

1297 pass

1298

1299 logger.info("Building image: %s from %s ...", image, context_path)

1300 try:

1301 self.client.images.build(path=context_path, tag=image, rm=True)

1302 logger.info("Image built: %s", image)

1303 except APIError as e:

1304 raise ImagePullError(image, f"build failed: {e}") from e

1305

1306 def _pull_image_if_needed(self, image: str) -> None:

1307 """Pull a Docker image if not available locally.

1308

1309 For the ``latest`` tag, always pull to ensure the freshest digest

1310 since the local cache may be stale. If the pull fails but a

1311 cached copy exists, falls back to the cached version with a

1312 warning.

1313 """

1314 tag = image.rsplit(":", 1)[-1] if ":" in image else "latest"

1315

1316 # AUTO-UPDATE: Always pull 'latest' to get fresh images

1317 if tag != "latest":

1318 try:

1319 self.client.images.get(image)

1320 logger.debug("Image already available: %s", image)

1321 return

1322 except ImageNotFound:

1323 pass

1324

1325 logger.info("Pulling image: %s (this may take a while)...", image)

1326 try:

1327 pulled = self.client.images.pull(*image.rsplit(":", 1))

1328 self._warn_if_image_below_minimum(pulled)

1329 logger.info("Image pulled: %s", image)

1330 except APIError as e:

1331 if tag == "latest":

1332 try:

1333 self.client.images.get(image)

1334 logger.warning("Failed to pull latest image, using cached version: %s", e)

1335 return

1336 except ImageNotFound:

1337 pass

1338 raise ImagePullError(image, str(e)) from e