commitword/commitllm.py at master · rswindell/commitword · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""Optional Ollama-backed selector: pick the *index* of the "best" option from a
candidate list, by a vibe (professional, funny, technical, …).

Design contract: the model only ever **chooses an index** from a list the caller
already vetted — it never invents a code. So whatever it returns still resolves
to the same commit and clears the margin floor; the LLM is a cosmetic selector,
nothing more.

Stdlib-only (`urllib`). The core tools (`commitword.py`, `commitmint.py`,
`commitfind.py`) never import this; `commitmint.py` imports it lazily only when
`--vibe` is used. Provider, endpoint, model, and key come from the environment:

    COMMITWORD_LLM_PROVIDER     ollama (default) | openai
    COMMITWORD_LLM_ENDPOINT     default per provider (see DEFAULT_ENDPOINTS)
    COMMITWORD_LLM_MODEL        default qwen2.5:7b
    COMMITWORD_LLM_KEY          bearer token (openai-compatible providers)
    COMMITWORD_LLM_TEMPERATURE  sampling temperature (default 0.7)
    COMMITWORD_LLM_OPTIONS      extra generation params as a JSON object, e.g.
                                '{"seed": 42, "top_p": 0.9}'; merged into the
                                request (ollama: under `options`; openai: top
                                level), so use the keys your provider expects
    COMMITWORD_LLM_PROMPT       override the instruction prompt (how the model
                                judges); the numbered list and "answer with the
                                index" request are appended automatically, so the
                                index contract holds whatever you write here

`provider=openai` speaks the OpenAI-compatible `/v1/chat/completions` shape,
which also covers Groq, OpenRouter, Together, DeepSeek, Mistral, LM Studio, and
Ollama's own `/v1` endpoint — point COMMITWORD_LLM_ENDPOINT at the right host.

Any failure (server down, malformed reply, out-of-range index) raises
`LLMError`; the caller falls back to its default pick.
"""
import json
import os
import re
import urllib.error
import urllib.request

DEFAULT_ENDPOINTS = {
    "ollama": "http://localhost:11434/api/chat",
    "openai": "https://api.openai.com/v1/chat/completions",
}
DEFAULT_MODEL = "qwen2.5:7b"
DEFAULT_PROMPT = ("You pick the single best option from a numbered list of word "
                  "pairs.")


class LLMError(Exception):
    """Any failure reaching or parsing the model — the caller should fall back."""


def provider():
    return os.environ.get("COMMITWORD_LLM_PROVIDER", "ollama").lower()


def endpoint(prov):
    return os.environ.get("COMMITWORD_LLM_ENDPOINT") or DEFAULT_ENDPOINTS.get(
        prov, DEFAULT_ENDPOINTS["ollama"])


def model():
    return os.environ.get("COMMITWORD_LLM_MODEL", DEFAULT_MODEL)


def _first_int(s):
    m = re.search(r"-?\d+", s or "")
    return int(m.group()) if m else None


def generation_options():
    """Sampling/generation params to merge into the request: COMMITWORD_LLM_OPTIONS
    (a JSON object) plus the COMMITWORD_LLM_TEMPERATURE convenience var (which
    wins over an `options` temperature). Defaults to temperature 0.7. Bad JSON in
    OPTIONS is ignored. The caller places these where the provider wants them."""
    opts = {}
    raw = os.environ.get("COMMITWORD_LLM_OPTIONS")
    if raw:
        try:
            parsed = json.loads(raw)
            if isinstance(parsed, dict):
                opts.update(parsed)
        except ValueError:
            pass
    t = os.environ.get("COMMITWORD_LLM_TEMPERATURE")
    if t:
        try:
            opts["temperature"] = float(t)
        except ValueError:
            pass
    opts.setdefault("temperature", 0.7)
    return opts


def prompt_text():
    return os.environ.get("COMMITWORD_LLM_PROMPT") or DEFAULT_PROMPT


def _prompt(items, vibe):
    listing = "\n".join(f"{i}: {c}" for i, c in enumerate(items))
    # The configurable instruction, then the fixed list + index request. Keeping
    # the index machinery out of the override means a custom prompt can't break
    # the "reply with one integer" contract the caller relies on.
    system = prompt_text() + (" Reply with ONLY the chosen index as a bare "
                              "integer, nothing else.")
    user = (f"Which word pair is the most {vibe}?\n\n{listing}\n\n"
            f"Answer with just the index (0–{len(items) - 1}).")
    return [{"role": "system", "content": system},
            {"role": "user", "content": user}]


def choose_index(items, vibe, *, prov=None, ep=None, mdl=None, key=None, timeout=20):
    """Return the 0-based index of the most-`vibe` item in `items`, per the model.
    Raises LLMError on any failure."""
    if not items:
        raise LLMError("no candidates")
    prov = prov or provider()
    ep = ep or endpoint(prov)
    mdl = mdl or model()
    key = key if key is not None else os.environ.get("COMMITWORD_LLM_KEY", "")
    messages = _prompt(items, vibe)
    headers = {"Content-Type": "application/json"}

    opts = generation_options()
    if prov == "openai":                     # OpenAI-compatible /v1/chat/completions
        body = {"model": mdl, "messages": messages, "stream": False, **opts}
        if key:
            headers["Authorization"] = f"Bearer {key}"
    else:                                    # Ollama native /api/chat
        body = {"model": mdl, "messages": messages, "stream": False, "options": opts}

    req = urllib.request.Request(ep, data=json.dumps(body).encode(), headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            data = json.loads(resp.read().decode())
    except (urllib.error.URLError, OSError, ValueError) as e:
        raise LLMError(f"{ep}: {e}")

    try:
        if prov == "openai":
            content = data["choices"][0]["message"]["content"]
        else:
            content = data["message"]["content"]
    except (KeyError, IndexError, TypeError):
        raise LLMError(f"unexpected response shape from {ep}: {data!r:.200}")

    idx = _first_int(content)
    if idx is None or not (0 <= idx < len(items)):
        raise LLMError(f"model reply {content!r} is not a valid index 0–{len(items) - 1}")
    return idx