Coverage for src/ai_shell/models.py: 100%

1"""Curated model catalog for ai-shell.

3Each entry represents an Ollama model tag that has been validated on

4RTX 4090-class hardware. The catalog ships with ai-shell and is the

5single source of truth for model metadata (role, parameter count,

6censored/uncensored, disk footprint, known caveats).

8The ``llm models`` CLI command cross-references this catalog against

9the active config slots and the models actually pulled into Ollama.

10"""

12from __future__ import annotations

14from dataclasses import dataclass

17@dataclass(frozen=True)

18class ModelInfo:

19 """Metadata for a single Ollama model tag."""

21 tag: str

22 role: str # "chat" | "coding"

23 params: str # e.g. "3B", "8B", "14B", "27B", "30B"

24 size_gb: float # approximate disk footprint in GiB

25 uncensored: bool # True = abliterated / uncensored variant

26 description: str # one-line purpose

27 caveats: str = "" # known issues or limitations

30# ---------------------------------------------------------------------------

31# Catalog — ordered by role then descending parameter count

32# ---------------------------------------------------------------------------

33MODEL_CATALOG: tuple[ModelInfo, ...] = (

34 # ── Chat models ───────────────────────────────────────────────────────

35 ModelInfo(

36 tag="qwen3.5:27b",

37 role="chat",

38 params="27B",

39 size_gb=17.0,

40 uncensored=False,

41 description="Primary chat — best quality, heavy on VRAM",

42 caveats="Ollama tool-call bug (ollama #14493)",

43 ),

44 ModelInfo(

45 tag="huihui_ai/qwen3.5-abliterated:27b",

46 role="chat",

47 params="27B",

48 size_gb=17.0,

49 uncensored=True,

50 description="Uncensored chat — abliterated Qwen3.5 27B",

51 caveats="Ollama tool-call bug (ollama #14493)",

52 ),

53 ModelInfo(

54 tag="qwen3.5:9b",

55 role="chat",

56 params="9B",

57 size_gb=6.6,

58 uncensored=False,

59 description="Mid-range chat — fast + capable, good speed/quality tradeoff",

60 ),

61 ModelInfo(

62 tag="huihui_ai/qwen3.5-abliterated:9b",

63 role="chat",

64 params="9B",

65 size_gb=6.6,

66 uncensored=True,

67 description="Mid-range uncensored chat — abliterated Qwen3.5 9B",

68 ),

69 ModelInfo(

70 tag="dolphin3:8b",

71 role="chat",

72 params="8B",

73 size_gb=5.0,

74 uncensored=True,

75 description="Fast uncensored — Dolphin fine-tune of Llama 3.1 8B",

76 ),

77 ModelInfo(

78 tag="llama3.1:8b",

79 role="chat",

80 params="8B",

81 size_gb=5.0,

82 uncensored=False,

83 description="Fast general chat — good for quick tasks",

84 ),

85 ModelInfo(

86 tag="gemma3:12b",

87 role="chat",

88 params="12B",

89 size_gb=8.0,

90 uncensored=False,

91 description="Google Gemma 3 12B — solid mid-range alternative",

92 ),

93 ModelInfo(

94 tag="llama3.2:latest",

95 role="chat",

96 params="3B",

97 size_gb=2.0,

98 uncensored=False,

99 description="Ultra-fast 3B — limited capability, near-instant",

100 ),

101 # ── Coding models ─────────────────────────────────────────────────────

102 ModelInfo(

103 tag="qwen3-coder:30b-a3b-q4_K_M",

104 role="coding",

105 params="30B",

106 size_gb=19.0,

107 uncensored=False,

108 description="Primary coding — explicit Ollama tools badge",

109 caveats="Reliable native tool_calls below ~5 tools",

110 ),

111 ModelInfo(

112 tag="huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M",

113 role="coding",

114 params="30B",

115 size_gb=19.0,

116 uncensored=True,

117 description="Uncensored coding — abliterated Qwen3-Coder 30B",

118 caveats="Reliable native tool_calls below ~5 tools",

119 ),

120 ModelInfo(

121 tag="qwen2.5-coder:14b-instruct",

122 role="coding",

123 params="14B",

124 size_gb=9.0,

125 uncensored=False,

126 description="Mid-range coding — previous-gen, proven on 4090",

127 ),

128 ModelInfo(

129 tag="qwen2.5-coder:32b-q4_k_m",

130 role="coding",

131 params="32B",

132 size_gb=19.0,

133 uncensored=False,

134 description="Large Qwen2.5-Coder — previous-gen, needs full VRAM",

135 ),

136 ModelInfo(

137 tag="devstral:24b",

138 role="coding",

139 params="24B",

140 size_gb=15.0,

141 uncensored=False,

142 description="Mistral Devstral — strong agentic coding model",

143 ),

144)

145

146# Fast lookup by tag

147_CATALOG_BY_TAG: dict[str, ModelInfo] = {m.tag: m for m in MODEL_CATALOG}

148

149

150def lookup(tag: str) -> ModelInfo | None:

151 """Return catalog entry for *tag*, or ``None`` if untracked."""

152 return _CATALOG_BY_TAG.get(tag)

153

154

155def classify_status(

156 tag: str,

157 config_tags: set[str],

158 pulled_tags: set[str],

159) -> str:

160 """Classify a model's status relative to config and Ollama state.

161

162 Returns one of:

163 - ``"config"`` — in one of the 4 config slots (or extra_models)

164 - ``"pulled"`` — downloaded in Ollama but not in config

165 - ``"available"`` — in catalog but not pulled

166 - ``"untracked"`` — pulled in Ollama but not in catalog

167 """

168 in_config = tag in config_tags

169 in_ollama = tag in pulled_tags

170 in_catalog = tag in _CATALOG_BY_TAG

171

172 if in_config:

173 return "config"

174 if in_ollama and in_catalog:

175 return "pulled"

176 if in_ollama and not in_catalog:

177 return "untracked"

178 return "available"