Coverage for src/ai_shell/models.py: 100%
26 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 22:06 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 22:06 +0000
1"""Curated model catalog for ai-shell.
3Each entry represents an Ollama model tag that has been validated on
4RTX 4090-class hardware. The catalog ships with ai-shell and is the
5single source of truth for model metadata (role, parameter count,
6censored/uncensored, disk footprint, known caveats).
8The ``llm models`` CLI command cross-references this catalog against
9the active config slots and the models actually pulled into Ollama.
10"""
12from __future__ import annotations
14from dataclasses import dataclass
17@dataclass(frozen=True)
18class ModelInfo:
19 """Metadata for a single Ollama model tag."""
21 tag: str
22 role: str # "chat" | "coding"
23 params: str # e.g. "3B", "8B", "14B", "27B", "30B"
24 size_gb: float # approximate disk footprint in GiB
25 uncensored: bool # True = abliterated / uncensored variant
26 description: str # one-line purpose
27 caveats: str = "" # known issues or limitations
30# ---------------------------------------------------------------------------
31# Catalog — ordered by role then descending parameter count
32# ---------------------------------------------------------------------------
33MODEL_CATALOG: tuple[ModelInfo, ...] = (
34 # ── Chat models ───────────────────────────────────────────────────────
35 ModelInfo(
36 tag="qwen3.5:27b",
37 role="chat",
38 params="27B",
39 size_gb=17.0,
40 uncensored=False,
41 description="Primary chat — best quality, heavy on VRAM",
42 caveats="Ollama tool-call bug (ollama #14493)",
43 ),
44 ModelInfo(
45 tag="huihui_ai/qwen3.5-abliterated:27b",
46 role="chat",
47 params="27B",
48 size_gb=17.0,
49 uncensored=True,
50 description="Uncensored chat — abliterated Qwen3.5 27B",
51 caveats="Ollama tool-call bug (ollama #14493)",
52 ),
53 ModelInfo(
54 tag="qwen3.5:9b",
55 role="chat",
56 params="9B",
57 size_gb=6.6,
58 uncensored=False,
59 description="Mid-range chat — fast + capable, good speed/quality tradeoff",
60 ),
61 ModelInfo(
62 tag="huihui_ai/qwen3.5-abliterated:9b",
63 role="chat",
64 params="9B",
65 size_gb=6.6,
66 uncensored=True,
67 description="Mid-range uncensored chat — abliterated Qwen3.5 9B",
68 ),
69 ModelInfo(
70 tag="dolphin3:8b",
71 role="chat",
72 params="8B",
73 size_gb=5.0,
74 uncensored=True,
75 description="Fast uncensored — Dolphin fine-tune of Llama 3.1 8B",
76 ),
77 ModelInfo(
78 tag="llama3.1:8b",
79 role="chat",
80 params="8B",
81 size_gb=5.0,
82 uncensored=False,
83 description="Fast general chat — good for quick tasks",
84 ),
85 ModelInfo(
86 tag="gemma3:12b",
87 role="chat",
88 params="12B",
89 size_gb=8.0,
90 uncensored=False,
91 description="Google Gemma 3 12B — solid mid-range alternative",
92 ),
93 ModelInfo(
94 tag="llama3.2:latest",
95 role="chat",
96 params="3B",
97 size_gb=2.0,
98 uncensored=False,
99 description="Ultra-fast 3B — limited capability, near-instant",
100 ),
101 # ── Coding models ─────────────────────────────────────────────────────
102 ModelInfo(
103 tag="qwen3-coder:30b-a3b-q4_K_M",
104 role="coding",
105 params="30B",
106 size_gb=19.0,
107 uncensored=False,
108 description="Primary coding — explicit Ollama tools badge",
109 caveats="Reliable native tool_calls below ~5 tools",
110 ),
111 ModelInfo(
112 tag="huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M",
113 role="coding",
114 params="30B",
115 size_gb=19.0,
116 uncensored=True,
117 description="Uncensored coding — abliterated Qwen3-Coder 30B",
118 caveats="Reliable native tool_calls below ~5 tools",
119 ),
120 ModelInfo(
121 tag="qwen2.5-coder:14b-instruct",
122 role="coding",
123 params="14B",
124 size_gb=9.0,
125 uncensored=False,
126 description="Mid-range coding — previous-gen, proven on 4090",
127 ),
128 ModelInfo(
129 tag="qwen2.5-coder:32b-q4_k_m",
130 role="coding",
131 params="32B",
132 size_gb=19.0,
133 uncensored=False,
134 description="Large Qwen2.5-Coder — previous-gen, needs full VRAM",
135 ),
136 ModelInfo(
137 tag="devstral:24b",
138 role="coding",
139 params="24B",
140 size_gb=15.0,
141 uncensored=False,
142 description="Mistral Devstral — strong agentic coding model",
143 ),
144)
146# Fast lookup by tag
147_CATALOG_BY_TAG: dict[str, ModelInfo] = {m.tag: m for m in MODEL_CATALOG}
150def lookup(tag: str) -> ModelInfo | None:
151 """Return catalog entry for *tag*, or ``None`` if untracked."""
152 return _CATALOG_BY_TAG.get(tag)
155def classify_status(
156 tag: str,
157 config_tags: set[str],
158 pulled_tags: set[str],
159) -> str:
160 """Classify a model's status relative to config and Ollama state.
162 Returns one of:
163 - ``"config"`` — in one of the 4 config slots (or extra_models)
164 - ``"pulled"`` — downloaded in Ollama but not in config
165 - ``"available"`` — in catalog but not pulled
166 - ``"untracked"`` — pulled in Ollama but not in catalog
167 """
168 in_config = tag in config_tags
169 in_ollama = tag in pulled_tags
170 in_catalog = tag in _CATALOG_BY_TAG
172 if in_config:
173 return "config"
174 if in_ollama and in_catalog:
175 return "pulled"
176 if in_ollama and not in_catalog:
177 return "untracked"
178 return "available"