Coverage for src/ai_shell/models.py: 100%

26 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 22:06 +0000

1"""Curated model catalog for ai-shell. 

2 

3Each entry represents an Ollama model tag that has been validated on 

4RTX 4090-class hardware. The catalog ships with ai-shell and is the 

5single source of truth for model metadata (role, parameter count, 

6censored/uncensored, disk footprint, known caveats). 

7 

8The ``llm models`` CLI command cross-references this catalog against 

9the active config slots and the models actually pulled into Ollama. 

10""" 

11 

12from __future__ import annotations 

13 

14from dataclasses import dataclass 

15 

16 

17@dataclass(frozen=True) 

18class ModelInfo: 

19 """Metadata for a single Ollama model tag.""" 

20 

21 tag: str 

22 role: str # "chat" | "coding" 

23 params: str # e.g. "3B", "8B", "14B", "27B", "30B" 

24 size_gb: float # approximate disk footprint in GiB 

25 uncensored: bool # True = abliterated / uncensored variant 

26 description: str # one-line purpose 

27 caveats: str = "" # known issues or limitations 

28 

29 

30# --------------------------------------------------------------------------- 

31# Catalog — ordered by role then descending parameter count 

32# --------------------------------------------------------------------------- 

33MODEL_CATALOG: tuple[ModelInfo, ...] = ( 

34 # ── Chat models ─────────────────────────────────────────────────────── 

35 ModelInfo( 

36 tag="qwen3.5:27b", 

37 role="chat", 

38 params="27B", 

39 size_gb=17.0, 

40 uncensored=False, 

41 description="Primary chat — best quality, heavy on VRAM", 

42 caveats="Ollama tool-call bug (ollama #14493)", 

43 ), 

44 ModelInfo( 

45 tag="huihui_ai/qwen3.5-abliterated:27b", 

46 role="chat", 

47 params="27B", 

48 size_gb=17.0, 

49 uncensored=True, 

50 description="Uncensored chat — abliterated Qwen3.5 27B", 

51 caveats="Ollama tool-call bug (ollama #14493)", 

52 ), 

53 ModelInfo( 

54 tag="qwen3.5:9b", 

55 role="chat", 

56 params="9B", 

57 size_gb=6.6, 

58 uncensored=False, 

59 description="Mid-range chat — fast + capable, good speed/quality tradeoff", 

60 ), 

61 ModelInfo( 

62 tag="huihui_ai/qwen3.5-abliterated:9b", 

63 role="chat", 

64 params="9B", 

65 size_gb=6.6, 

66 uncensored=True, 

67 description="Mid-range uncensored chat — abliterated Qwen3.5 9B", 

68 ), 

69 ModelInfo( 

70 tag="dolphin3:8b", 

71 role="chat", 

72 params="8B", 

73 size_gb=5.0, 

74 uncensored=True, 

75 description="Fast uncensored — Dolphin fine-tune of Llama 3.1 8B", 

76 ), 

77 ModelInfo( 

78 tag="llama3.1:8b", 

79 role="chat", 

80 params="8B", 

81 size_gb=5.0, 

82 uncensored=False, 

83 description="Fast general chat — good for quick tasks", 

84 ), 

85 ModelInfo( 

86 tag="gemma3:12b", 

87 role="chat", 

88 params="12B", 

89 size_gb=8.0, 

90 uncensored=False, 

91 description="Google Gemma 3 12B — solid mid-range alternative", 

92 ), 

93 ModelInfo( 

94 tag="llama3.2:latest", 

95 role="chat", 

96 params="3B", 

97 size_gb=2.0, 

98 uncensored=False, 

99 description="Ultra-fast 3B — limited capability, near-instant", 

100 ), 

101 # ── Coding models ───────────────────────────────────────────────────── 

102 ModelInfo( 

103 tag="qwen3-coder:30b-a3b-q4_K_M", 

104 role="coding", 

105 params="30B", 

106 size_gb=19.0, 

107 uncensored=False, 

108 description="Primary coding — explicit Ollama tools badge", 

109 caveats="Reliable native tool_calls below ~5 tools", 

110 ), 

111 ModelInfo( 

112 tag="huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M", 

113 role="coding", 

114 params="30B", 

115 size_gb=19.0, 

116 uncensored=True, 

117 description="Uncensored coding — abliterated Qwen3-Coder 30B", 

118 caveats="Reliable native tool_calls below ~5 tools", 

119 ), 

120 ModelInfo( 

121 tag="qwen2.5-coder:14b-instruct", 

122 role="coding", 

123 params="14B", 

124 size_gb=9.0, 

125 uncensored=False, 

126 description="Mid-range coding — previous-gen, proven on 4090", 

127 ), 

128 ModelInfo( 

129 tag="qwen2.5-coder:32b-q4_k_m", 

130 role="coding", 

131 params="32B", 

132 size_gb=19.0, 

133 uncensored=False, 

134 description="Large Qwen2.5-Coder — previous-gen, needs full VRAM", 

135 ), 

136 ModelInfo( 

137 tag="devstral:24b", 

138 role="coding", 

139 params="24B", 

140 size_gb=15.0, 

141 uncensored=False, 

142 description="Mistral Devstral — strong agentic coding model", 

143 ), 

144) 

145 

146# Fast lookup by tag 

147_CATALOG_BY_TAG: dict[str, ModelInfo] = {m.tag: m for m in MODEL_CATALOG} 

148 

149 

150def lookup(tag: str) -> ModelInfo | None: 

151 """Return catalog entry for *tag*, or ``None`` if untracked.""" 

152 return _CATALOG_BY_TAG.get(tag) 

153 

154 

155def classify_status( 

156 tag: str, 

157 config_tags: set[str], 

158 pulled_tags: set[str], 

159) -> str: 

160 """Classify a model's status relative to config and Ollama state. 

161 

162 Returns one of: 

163 - ``"config"`` — in one of the 4 config slots (or extra_models) 

164 - ``"pulled"`` — downloaded in Ollama but not in config 

165 - ``"available"`` — in catalog but not pulled 

166 - ``"untracked"`` — pulled in Ollama but not in catalog 

167 """ 

168 in_config = tag in config_tags 

169 in_ollama = tag in pulled_tags 

170 in_catalog = tag in _CATALOG_BY_TAG 

171 

172 if in_config: 

173 return "config" 

174 if in_ollama and in_catalog: 

175 return "pulled" 

176 if in_ollama and not in_catalog: 

177 return "untracked" 

178 return "available"