From 8556c77a466d4caad708f7fc901e7d5e37254ef0 Mon Sep 17 00:00:00 2001 From: Florent Poinsaut <1256948+FlorentPoinsaut@users.noreply.github.com> Date: Sun, 26 Apr 2026 06:13:29 +0000 Subject: [PATCH 1/8] fix(engine): replace full-vocab percentile with top-N rank scoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #67 The previous formula mapped rank across the entire ~150 000-word vocabulary, compressing ranks 1-1500 into 99% and destroying the score gradient that makes the game engaging. Replace with a top-N neighbourhood score: rank <= top_n → (top_n - rank) / top_n (rank 1 → 0.999, rank top_n → 0) rank > top_n → 0.0 This restores a continuous, visible gradient from 0% (outside the neighbourhood) to 99% (closest non-exact word), with 100% reserved for exact matches only. Changes: - game/engine.py: new formula + top_n constructor param + ValueError guard - config.py: add SCORING_TOP_N env var (default 1000) - overlay/static/index.html: recalibrate gauge gradient to match thresholds (blue 0%, green 60%, gold 90%, red 100%) - tests/test_engine.py: inject _top_n in helper, add beyond-top-N test --- config.py | 1 + game/engine.py | 34 ++++++++++++++++++---------------- overlay/static/index.html | 2 +- tests/test_engine.py | 8 ++++++++ 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/config.py b/config.py index acffa4e..18ee4c0 100644 --- a/config.py +++ b/config.py @@ -19,6 +19,7 @@ def _require(name: str) -> str: COMMAND_PREFIX: str = os.getenv("COMMAND_PREFIX", "!sx") COOLDOWN: int = int(os.getenv("COOLDOWN", "5")) DIFFICULTY: str = os.getenv("DIFFICULTY", "easy") +SCORING_TOP_N: int = int(os.getenv("SCORING_TOP_N", "1000")) MODEL_PATH: str = os.getenv( "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" ) diff --git a/game/engine.py b/game/engine.py index b51edb8..1046599 100644 --- a/game/engine.py +++ b/game/engine.py @@ -10,6 +10,7 @@ _DEFAULT_MODEL_PATH = os.getenv( "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" ) +_DEFAULT_TOP_N: int = int(os.getenv("SCORING_TOP_N", "1000")) class SemanticEngine: @@ -23,12 +24,19 @@ class SemanticEngine: the value of the ``MODEL_PATH`` environment variable, or the standard ``models/frWac_no_postag_no_phrase_700_skip_cut50.bin`` path when the variable is unset. + top_n: Number of nearest neighbours used for rank scoring. Words + ranked beyond this threshold return ``0.0``. Defaults to the + value of the ``SCORING_TOP_N`` environment variable, or ``1000`` + when unset. """ - def __init__(self, model_path: str | pathlib.Path | None = None) -> None: + def __init__(self, model_path: str | pathlib.Path | None = None, top_n: int = _DEFAULT_TOP_N) -> None: + if top_n <= 0: + raise ValueError(f"top_n must be a positive integer, got {top_n}") self._model_path = str(model_path or _DEFAULT_MODEL_PATH) self._model: KeyedVectors | None = None self._cleaned_key_map: dict[str, str] = {} + self._top_n: int = top_n # ------------------------------------------------------------------ # Model management @@ -79,14 +87,13 @@ def similarity(self, word_a: str, word_b: str) -> float | None: def score_guess(self, guess: str, target: str) -> float | None: """Score a player's guess against the target word. - Returns ``1.0`` for an exact (cleaned) match, or a **percentile rank** - in ``[0, 1)`` for a non-exact guess. Returns ``None`` when either - word is missing from the vocabulary. + Returns ``1.0`` for an exact (cleaned) match, or a **top-N rank + score** in ``[0, 1)`` for a non-exact guess. Returns ``None`` when + either word is missing from the vocabulary. - The percentile rank expresses what fraction of the vocabulary is *less - similar* to *target* than *guess* is. For example, a score of - ``0.99`` means the guess is closer to the target than 99 % of all - words in the model. + The score is computed over the top-``top_n`` nearest neighbours of + *target*: a score of ``0.99`` means the guess is among the top 1 % + of the nearest words. Words ranked beyond ``top_n`` return ``0.0``. Args: guess: The word submitted by the player. @@ -104,14 +111,9 @@ def score_guess(self, guess: str, target: str) -> float | None: if key_guess is None or key_target is None: return None rank = self._model.rank(key_target, key_guess) - # effective_vocab excludes the target word itself, matching how - # gensim's closer_than() (used internally by rank()) omits key1. - # Guard against degenerate single-word vocabularies where no ranking - # is meaningful and division by zero would occur. - effective_vocab = len(self._model.key_to_index) - 1 - if effective_vocab <= 0: - return None - return max(0.0, min(1.0, (effective_vocab - rank) / effective_vocab)) + if rank > self._top_n: + return 0.0 + return (self._top_n - rank) / self._top_n class GameEngine: diff --git a/overlay/static/index.html b/overlay/static/index.html index 7a8fd61..f2fb20a 100644 --- a/overlay/static/index.html +++ b/overlay/static/index.html @@ -82,7 +82,7 @@ #gauge-bar { height: 100%; width: 0%; - background: linear-gradient(90deg, #2d7d46, #c9a227, #c0392b); + background: linear-gradient(90deg, #1a5c8a 0%, #2d7d46 60%, #c9a227 90%, #c0392b 100%); border-radius: 3px; transition: width 0.6s ease; } diff --git a/tests/test_engine.py b/tests/test_engine.py index 13e7d8f..eb8e3ad 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -32,6 +32,7 @@ def _make_engine() -> SemanticEngine: engine._model_path = "" engine._model = kv engine._cleaned_key_map = {w: w for w in words} + engine._top_n = len(kv.key_to_index) - 1 # 3 (4 words minus the target) return engine @@ -97,6 +98,13 @@ def test_score_is_percentile_rank(self): assert score_maison is not None assert score_chien > score_maison + def test_word_beyond_top_n_returns_zero(self): + """Words ranked beyond top_n score 0.0 instead of a fractional rank.""" + engine = _make_engine() + engine._top_n = 1 # only rank-1 word (chien) is inside the window + score = engine.score_guess("maison", "chat") # rank 2 > top_n=1 + assert score == 0.0 + def test_unknown_word_returns_none(self): engine = _make_engine() assert engine.score_guess("inconnu", "chat") is None From 6e3185d2b2f84cd9b2fed14cc3b8deaee886136f Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 08:47:34 +0000 Subject: [PATCH 2/8] fix(engine): replace linear top-N formula with logarithmic scoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linear formula (top_n - rank) / top_n with top_n=1000 assigned 0% to any word ranked beyond the 1000th nearest neighbour in frWac. Since the vocabulary contains ~150 000 entries, even loosely related words easily exceed rank 1000, causing every manual guess to display 0%. Replace with a logarithmic formula over a larger top-N window (100 000 by default): score = 1 - log(rank + 1) / log(top_n + 1) This gives a visible, continuous gradient with no compression: rank 1 → 94 % (very close synonyms) rank 10 → 79 % rank 100 → 61 % rank 1 000 → 42 % rank 10 000 → 22 % rank 100 000 → 0 % (hard cutoff) Both config defaults (SCORING_TOP_N) are updated from 1 000 to 100 000. Tests updated to reflect the new formula and _top_n=4 in the mock helper. --- config.py | 2 +- game/engine.py | 5 +++-- tests/test_engine.py | 15 +++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/config.py b/config.py index 18ee4c0..0dcbf29 100644 --- a/config.py +++ b/config.py @@ -19,7 +19,7 @@ def _require(name: str) -> str: COMMAND_PREFIX: str = os.getenv("COMMAND_PREFIX", "!sx") COOLDOWN: int = int(os.getenv("COOLDOWN", "5")) DIFFICULTY: str = os.getenv("DIFFICULTY", "easy") -SCORING_TOP_N: int = int(os.getenv("SCORING_TOP_N", "1000")) +SCORING_TOP_N: int = int(os.getenv("SCORING_TOP_N", "100000")) MODEL_PATH: str = os.getenv( "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" ) diff --git a/game/engine.py b/game/engine.py index 1046599..95b4c16 100644 --- a/game/engine.py +++ b/game/engine.py @@ -1,5 +1,6 @@ """Game engine: state management and guess scoring.""" +import math import os import pathlib @@ -10,7 +11,7 @@ _DEFAULT_MODEL_PATH = os.getenv( "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" ) -_DEFAULT_TOP_N: int = int(os.getenv("SCORING_TOP_N", "1000")) +_DEFAULT_TOP_N: int = int(os.getenv("SCORING_TOP_N", "100000")) class SemanticEngine: @@ -113,7 +114,7 @@ def score_guess(self, guess: str, target: str) -> float | None: rank = self._model.rank(key_target, key_guess) if rank > self._top_n: return 0.0 - return (self._top_n - rank) / self._top_n + return max(0.0, 1.0 - math.log(rank + 1) / math.log(self._top_n + 1)) class GameEngine: diff --git a/tests/test_engine.py b/tests/test_engine.py index eb8e3ad..451446c 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -32,7 +32,7 @@ def _make_engine() -> SemanticEngine: engine._model_path = "" engine._model = kv engine._cleaned_key_map = {w: w for w in words} - engine._top_n = len(kv.key_to_index) - 1 # 3 (4 words minus the target) + engine._top_n = len(kv.key_to_index) # 4 (gives rank 1 → ~0.57 > 0.5, rank 2 → ~0.32 < 0.5 with log formula) return engine @@ -84,16 +84,15 @@ def test_score_is_between_zero_and_one(self): assert 0.0 <= score <= 1.0 def test_score_is_percentile_rank(self): - """score_guess returns a percentile rank, not raw cosine similarity. + """score_guess returns a log-rank score, not raw cosine similarity. - With 4 words in the test vocabulary (chat, chien, maison, voiture), - effective_vocab = 3 (excluding the target 'chat' itself). chien is - the closest non-target word (rank 1/3 → score 2/3) and maison is - less similar (rank 2/3 → score 1/3), so chien must outrank maison. + With top_n=4 and the log formula, chien (rank 1) scores + 1 - log(2)/log(5) ≈ 0.57 and maison (rank 2) scores + 1 - log(3)/log(5) ≈ 0.32, so chien must outrank maison. """ engine = _make_engine() - score_chien = engine.score_guess("chien", "chat") # rank 1/3 → 2/3 - score_maison = engine.score_guess("maison", "chat") # rank 2/3 → 1/3 + score_chien = engine.score_guess("chien", "chat") # rank 1 → ~0.57 + score_maison = engine.score_guess("maison", "chat") # rank 2 → ~0.32 assert score_chien is not None assert score_maison is not None assert score_chien > score_maison From 47d9c99ffcdb59a5fda4d87d85d986809b0490d4 Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 09:04:00 +0000 Subject: [PATCH 3/8] fix(engine): apply review corrections from NLP/Data, Tester and Reviewer agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NLP/Data: - Lower SCORING_TOP_N default from 100 000 to 10 000 so that gold (≥90%) is reachable for a true synonym and the blue zone stays informative - Fix score_guess docstring: remove incorrect '0.99 = top 1%' claim; describe the logarithmic scale accurately Reviewer: - Import MODEL_PATH and SCORING_TOP_N from config instead of calling os.getenv() directly (violates project convention) - Fix stale class docstring: 'or 1000 when unset' → 'or 10 000 when unset' - Wrap __init__ signature to comply with PEP 8 E501 (88 chars max) - Avoid redundant clean_word() calls in score_guess (computed once) Tester: - Rename test_score_guess_raises_when_not_loaded → test_similarity_raises_when_not_loaded (it tested similarity()) - Add test_score_guess_raises_when_not_loaded (line 109 was uncovered) - Add test_invalid_top_n_raises_value_error (top_n=0, line 36 uncovered) - Add test_negative_top_n_raises_value_error (top_n=-1) - Add test_similarity_unknown_word_returns_none (line 85 uncovered) - Add test_score_at_exactly_top_n_returns_zero (boundary condition) Coverage: game/engine.py 91% → 97% — 25/25 tests pass --- config.py | 2 +- game/engine.py | 29 +++++++++++++++++------------ tests/test_engine.py | 27 ++++++++++++++++++++++++++- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/config.py b/config.py index 0dcbf29..99519ea 100644 --- a/config.py +++ b/config.py @@ -19,7 +19,7 @@ def _require(name: str) -> str: COMMAND_PREFIX: str = os.getenv("COMMAND_PREFIX", "!sx") COOLDOWN: int = int(os.getenv("COOLDOWN", "5")) DIFFICULTY: str = os.getenv("DIFFICULTY", "easy") -SCORING_TOP_N: int = int(os.getenv("SCORING_TOP_N", "100000")) +SCORING_TOP_N: int = int(os.getenv("SCORING_TOP_N", "10000")) MODEL_PATH: str = os.getenv( "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" ) diff --git a/game/engine.py b/game/engine.py index 95b4c16..3ae6f19 100644 --- a/game/engine.py +++ b/game/engine.py @@ -1,17 +1,15 @@ """Game engine: state management and guess scoring.""" import math -import os import pathlib from gensim.models import KeyedVectors +import config from game.word_utils import build_cleaned_key_map, clean_word -_DEFAULT_MODEL_PATH = os.getenv( - "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" -) -_DEFAULT_TOP_N: int = int(os.getenv("SCORING_TOP_N", "100000")) +_DEFAULT_MODEL_PATH: str = config.MODEL_PATH +_DEFAULT_TOP_N: int = config.SCORING_TOP_N class SemanticEngine: @@ -27,11 +25,15 @@ class SemanticEngine: path when the variable is unset. top_n: Number of nearest neighbours used for rank scoring. Words ranked beyond this threshold return ``0.0``. Defaults to the - value of the ``SCORING_TOP_N`` environment variable, or ``1000`` + value of the ``SCORING_TOP_N`` environment variable, or ``10 000`` when unset. """ - def __init__(self, model_path: str | pathlib.Path | None = None, top_n: int = _DEFAULT_TOP_N) -> None: + def __init__( + self, + model_path: str | pathlib.Path | None = None, + top_n: int = _DEFAULT_TOP_N, + ) -> None: if top_n <= 0: raise ValueError(f"top_n must be a positive integer, got {top_n}") self._model_path = str(model_path or _DEFAULT_MODEL_PATH) @@ -93,8 +95,9 @@ def score_guess(self, guess: str, target: str) -> float | None: either word is missing from the vocabulary. The score is computed over the top-``top_n`` nearest neighbours of - *target*: a score of ``0.99`` means the guess is among the top 1 % - of the nearest words. Words ranked beyond ``top_n`` return ``0.0``. + *target* using a logarithmic scale: the closest neighbour scores ~94% + and the ``top_n``-th neighbour scores 0%. Words ranked beyond + ``top_n`` return ``0.0``. Args: guess: The word submitted by the player. @@ -103,12 +106,14 @@ def score_guess(self, guess: str, target: str) -> float | None: Returns: A float in ``[0, 1]``, or ``None``. """ - if clean_word(guess) == clean_word(target): + clean_guess = clean_word(guess) + clean_target = clean_word(target) + if clean_guess == clean_target: return 1.0 if self._model is None: raise RuntimeError("Model not loaded. Call load() first.") - key_guess = self._cleaned_key_map.get(clean_word(guess)) - key_target = self._cleaned_key_map.get(clean_word(target)) + key_guess = self._cleaned_key_map.get(clean_guess) + key_target = self._cleaned_key_map.get(clean_target) if key_guess is None or key_target is None: return None rank = self._model.rank(key_target, key_guess) diff --git a/tests/test_engine.py b/tests/test_engine.py index 451446c..dd996db 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -49,11 +49,24 @@ def test_is_loaded_after_injecting_model(self): engine = _make_engine() assert engine.is_loaded - def test_score_guess_raises_when_not_loaded(self): + def test_similarity_raises_when_not_loaded(self): engine = SemanticEngine(model_path="/nonexistent/path.bin") with pytest.raises(RuntimeError, match="not loaded"): engine.similarity("chat", "chien") + def test_score_guess_raises_when_not_loaded(self): + engine = SemanticEngine(model_path="/nonexistent/path.bin") + with pytest.raises(RuntimeError, match="not loaded"): + engine.score_guess("chat", "chien") + + def test_invalid_top_n_raises_value_error(self): + with pytest.raises(ValueError, match="top_n must be a positive integer"): + SemanticEngine(model_path="/nonexistent/path.bin", top_n=0) + + def test_negative_top_n_raises_value_error(self): + with pytest.raises(ValueError, match="top_n must be a positive integer"): + SemanticEngine(model_path="/nonexistent/path.bin", top_n=-1) + # --------------------------------------------------------------------------- # SemanticEngine – similarity @@ -108,6 +121,18 @@ def test_unknown_word_returns_none(self): engine = _make_engine() assert engine.score_guess("inconnu", "chat") is None + def test_similarity_unknown_word_returns_none(self): + engine = _make_engine() + assert engine.similarity("inconnu", "chat") is None + assert engine.similarity("chat", "inconnu") is None + + def test_score_at_exactly_top_n_returns_zero(self): + """At rank == top_n the log formula evaluates to exactly 0.0.""" + engine = _make_engine() + engine._top_n = 2 # maison has rank 2; rank == top_n + score = engine.score_guess("maison", "chat") + assert score == pytest.approx(0.0) + def test_similarity_is_symmetric(self): engine = _make_engine() assert engine.similarity("chat", "chien") == pytest.approx( From 694ce44fc9795a68cf7ce9020393b508521b2f10 Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 09:14:17 +0000 Subject: [PATCH 4/8] fix(engine): lazy-import config inside __init__ to avoid collection error Importing config at module level triggered _require('TWITCH_CHANNEL') during pytest collection, causing an ERROR in CI environments without a .env file. Moving 'import config' inside __init__ defers execution until actual instantiation. This also removes the duplicated os.getenv() defaults (_DEFAULT_MODEL_PATH, _DEFAULT_TOP_N): config.py remains the single source of truth for both values. --- game/engine.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/game/engine.py b/game/engine.py index 3ae6f19..9310a94 100644 --- a/game/engine.py +++ b/game/engine.py @@ -5,12 +5,8 @@ from gensim.models import KeyedVectors -import config from game.word_utils import build_cleaned_key_map, clean_word -_DEFAULT_MODEL_PATH: str = config.MODEL_PATH -_DEFAULT_TOP_N: int = config.SCORING_TOP_N - class SemanticEngine: """Word-embedding-based similarity engine. @@ -32,14 +28,17 @@ class SemanticEngine: def __init__( self, model_path: str | pathlib.Path | None = None, - top_n: int = _DEFAULT_TOP_N, + top_n: int | None = None, ) -> None: - if top_n <= 0: - raise ValueError(f"top_n must be a positive integer, got {top_n}") - self._model_path = str(model_path or _DEFAULT_MODEL_PATH) + import config as _cfg + + resolved_top_n: int = top_n if top_n is not None else _cfg.SCORING_TOP_N + if resolved_top_n <= 0: + raise ValueError(f"top_n must be a positive integer, got {resolved_top_n}") + self._model_path = str(model_path or _cfg.MODEL_PATH) self._model: KeyedVectors | None = None self._cleaned_key_map: dict[str, str] = {} - self._top_n: int = top_n + self._top_n: int = resolved_top_n # ------------------------------------------------------------------ # Model management From ee94041c27f190da6280dd5f0cd59748dd7bba6a Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 09:19:49 +0000 Subject: [PATCH 5/8] fix(config): move _require() to validate() called at startup Executing _require('TWITCH_CHANNEL') at module scope caused pytest to crash during collection/instantiation in CI environments without a .env. Introduce config.validate() which must be called once at application startup (main.py). TWITCH_CHANNEL defaults to '' at import time; the guard fires at startup as before, keeping production fail-fast behaviour. game/engine.py can now import config at module level cleanly, with config.py as the single source of truth for SCORING_TOP_N and MODEL_PATH (no duplication). --- config.py | 13 ++++++++++++- game/engine.py | 17 +++++++++-------- main.py | 1 + 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/config.py b/config.py index 99519ea..aec8f5f 100644 --- a/config.py +++ b/config.py @@ -13,9 +13,20 @@ def _require(name: str) -> str: return value +def validate() -> None: + """Validate all required environment variables. + + Must be called once at application startup, before any Twitch + connection is attempted. Raises RuntimeError if any required + variable is missing. + """ + global TWITCH_CHANNEL + TWITCH_CHANNEL = _require("TWITCH_CHANNEL") + + # Optional: kept for users who still want to supply a token manually. TWITCH_TOKEN: str | None = os.getenv("TWITCH_TOKEN") -TWITCH_CHANNEL: str = _require("TWITCH_CHANNEL") +TWITCH_CHANNEL: str = os.getenv("TWITCH_CHANNEL", "") COMMAND_PREFIX: str = os.getenv("COMMAND_PREFIX", "!sx") COOLDOWN: int = int(os.getenv("COOLDOWN", "5")) DIFFICULTY: str = os.getenv("DIFFICULTY", "easy") diff --git a/game/engine.py b/game/engine.py index 9310a94..3ae6f19 100644 --- a/game/engine.py +++ b/game/engine.py @@ -5,8 +5,12 @@ from gensim.models import KeyedVectors +import config from game.word_utils import build_cleaned_key_map, clean_word +_DEFAULT_MODEL_PATH: str = config.MODEL_PATH +_DEFAULT_TOP_N: int = config.SCORING_TOP_N + class SemanticEngine: """Word-embedding-based similarity engine. @@ -28,17 +32,14 @@ class SemanticEngine: def __init__( self, model_path: str | pathlib.Path | None = None, - top_n: int | None = None, + top_n: int = _DEFAULT_TOP_N, ) -> None: - import config as _cfg - - resolved_top_n: int = top_n if top_n is not None else _cfg.SCORING_TOP_N - if resolved_top_n <= 0: - raise ValueError(f"top_n must be a positive integer, got {resolved_top_n}") - self._model_path = str(model_path or _cfg.MODEL_PATH) + if top_n <= 0: + raise ValueError(f"top_n must be a positive integer, got {top_n}") + self._model_path = str(model_path or _DEFAULT_MODEL_PATH) self._model: KeyedVectors | None = None self._cleaned_key_map: dict[str, str] = {} - self._top_n: int = resolved_top_n + self._top_n: int = top_n # ------------------------------------------------------------------ # Model management diff --git a/main.py b/main.py index bdd6076..6715341 100644 --- a/main.py +++ b/main.py @@ -53,6 +53,7 @@ def _resolve_token() -> str: def main() -> None: """Start the Twitch bot, and optionally the overlay server.""" + config.validate() if len(sys.argv) > 1 and sys.argv[1] == "auth-login": # CLI mode: force a new login flow and exit. if not config.TWITCH_CLIENT_ID or not config.TWITCH_CLIENT_SECRET: From 5c6abf279fd184a7f7b068d235012c2ec43a031d Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 10:11:44 +0000 Subject: [PATCH 6/8] fix(engine): remove top_n concept, score over full vocabulary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every valid guess now scores strictly > 0, with no configurable cutoff. Formula: score = 1 - log(rank+1) / log(vocab_size+1) where vocab_size = len(model.key_to_index) set at load() time. Because rank <= vocab_size - 1 < vocab_size for any in-vocab word, the score is always positive. No hard cutoff, no SCORING_TOP_N config. Score distribution (frWac ~150 000 words): rank 1 → 94 % rank 10 → 80 % rank 100 → 61 % rank 1 000 → 42 % rank 10 000 → 23 % rank 149 999 → 0.003 % Remove: top_n param, _top_n/_top_n_override attrs, SCORING_TOP_N config, _DEFAULT_TOP_N module constant, and related tests. Add: _vocab_size attr set in load(), test_all_vocab_words_score_above_zero. --- config.py | 1 - game/engine.py | 33 +++++++++++++-------------------- tests/test_engine.py | 34 ++++++++++------------------------ 3 files changed, 23 insertions(+), 45 deletions(-) diff --git a/config.py b/config.py index aec8f5f..16fde77 100644 --- a/config.py +++ b/config.py @@ -30,7 +30,6 @@ def validate() -> None: COMMAND_PREFIX: str = os.getenv("COMMAND_PREFIX", "!sx") COOLDOWN: int = int(os.getenv("COOLDOWN", "5")) DIFFICULTY: str = os.getenv("DIFFICULTY", "easy") -SCORING_TOP_N: int = int(os.getenv("SCORING_TOP_N", "10000")) MODEL_PATH: str = os.getenv( "MODEL_PATH", "models/frWac_no_postag_no_phrase_700_skip_cut50.bin" ) diff --git a/game/engine.py b/game/engine.py index 3ae6f19..a1c20d5 100644 --- a/game/engine.py +++ b/game/engine.py @@ -9,7 +9,6 @@ from game.word_utils import build_cleaned_key_map, clean_word _DEFAULT_MODEL_PATH: str = config.MODEL_PATH -_DEFAULT_TOP_N: int = config.SCORING_TOP_N class SemanticEngine: @@ -23,23 +22,16 @@ class SemanticEngine: the value of the ``MODEL_PATH`` environment variable, or the standard ``models/frWac_no_postag_no_phrase_700_skip_cut50.bin`` path when the variable is unset. - top_n: Number of nearest neighbours used for rank scoring. Words - ranked beyond this threshold return ``0.0``. Defaults to the - value of the ``SCORING_TOP_N`` environment variable, or ``10 000`` - when unset. """ def __init__( self, model_path: str | pathlib.Path | None = None, - top_n: int = _DEFAULT_TOP_N, ) -> None: - if top_n <= 0: - raise ValueError(f"top_n must be a positive integer, got {top_n}") self._model_path = str(model_path or _DEFAULT_MODEL_PATH) self._model: KeyedVectors | None = None self._cleaned_key_map: dict[str, str] = {} - self._top_n: int = top_n + self._vocab_size: int | None = None # ------------------------------------------------------------------ # Model management @@ -55,6 +47,7 @@ def load(self) -> None: self._model_path, binary=True, unicode_errors="ignore" ) self._cleaned_key_map = build_cleaned_key_map(self._model.key_to_index) + self._vocab_size = len(self._model.key_to_index) @property def is_loaded(self) -> bool: @@ -90,21 +83,22 @@ def similarity(self, word_a: str, word_b: str) -> float | None: def score_guess(self, guess: str, target: str) -> float | None: """Score a player's guess against the target word. - Returns ``1.0`` for an exact (cleaned) match, or a **top-N rank - score** in ``[0, 1)`` for a non-exact guess. Returns ``None`` when - either word is missing from the vocabulary. + Returns ``1.0`` for an exact (cleaned) match, or a **logarithmic rank + score** in ``(0, 1)`` for a non-exact in-vocabulary guess. Returns + ``None`` when either word is missing from the vocabulary. - The score is computed over the top-``top_n`` nearest neighbours of - *target* using a logarithmic scale: the closest neighbour scores ~94% - and the ``top_n``-th neighbour scores 0%. Words ranked beyond - ``top_n`` return ``0.0``. + The score formula is ``1 − log(rank+1) / log(V+1)`` where *rank* is + 1-based (1 = closest neighbour) and *V* is the full vocabulary size. + Because rank ≤ V−1 < V for any in-vocabulary word, every valid guess + returns a strictly positive score. The closest synonym scores ≈94 %; + rank 10 000 ≈23 %; the furthest possible word scores ≈0.003 %. Args: guess: The word submitted by the player. target: The secret target word. Returns: - A float in ``[0, 1]``, or ``None``. + A float in ``(0, 1]``, or ``None`` if either word is OOV. """ clean_guess = clean_word(guess) clean_target = clean_word(target) @@ -117,9 +111,8 @@ def score_guess(self, guess: str, target: str) -> float | None: if key_guess is None or key_target is None: return None rank = self._model.rank(key_target, key_guess) - if rank > self._top_n: - return 0.0 - return max(0.0, 1.0 - math.log(rank + 1) / math.log(self._top_n + 1)) + vocab_size = self._vocab_size or len(self._model.key_to_index) + return max(0.0, 1.0 - math.log(rank + 1) / math.log(vocab_size + 1)) class GameEngine: diff --git a/tests/test_engine.py b/tests/test_engine.py index dd996db..0741cb1 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -32,7 +32,7 @@ def _make_engine() -> SemanticEngine: engine._model_path = "" engine._model = kv engine._cleaned_key_map = {w: w for w in words} - engine._top_n = len(kv.key_to_index) # 4 (gives rank 1 → ~0.57 > 0.5, rank 2 → ~0.32 < 0.5 with log formula) + engine._vocab_size = len(kv.key_to_index) # 4 return engine @@ -59,14 +59,6 @@ def test_score_guess_raises_when_not_loaded(self): with pytest.raises(RuntimeError, match="not loaded"): engine.score_guess("chat", "chien") - def test_invalid_top_n_raises_value_error(self): - with pytest.raises(ValueError, match="top_n must be a positive integer"): - SemanticEngine(model_path="/nonexistent/path.bin", top_n=0) - - def test_negative_top_n_raises_value_error(self): - with pytest.raises(ValueError, match="top_n must be a positive integer"): - SemanticEngine(model_path="/nonexistent/path.bin", top_n=-1) - # --------------------------------------------------------------------------- # SemanticEngine – similarity @@ -96,10 +88,10 @@ def test_score_is_between_zero_and_one(self): assert score is not None assert 0.0 <= score <= 1.0 - def test_score_is_percentile_rank(self): - """score_guess returns a log-rank score, not raw cosine similarity. + def test_score_is_log_rank(self): + """score_guess returns a logarithmic rank score, not raw cosine similarity. - With top_n=4 and the log formula, chien (rank 1) scores + With vocab_size=4 and the log formula, chien (rank 1) scores 1 - log(2)/log(5) ≈ 0.57 and maison (rank 2) scores 1 - log(3)/log(5) ≈ 0.32, so chien must outrank maison. """ @@ -110,12 +102,13 @@ def test_score_is_percentile_rank(self): assert score_maison is not None assert score_chien > score_maison - def test_word_beyond_top_n_returns_zero(self): - """Words ranked beyond top_n score 0.0 instead of a fractional rank.""" + def test_all_vocab_words_score_above_zero(self): + """Every in-vocabulary word scores strictly > 0.""" engine = _make_engine() - engine._top_n = 1 # only rank-1 word (chien) is inside the window - score = engine.score_guess("maison", "chat") # rank 2 > top_n=1 - assert score == 0.0 + for word in ["chien", "maison", "voiture"]: + score = engine.score_guess(word, "chat") + assert score is not None + assert score > 0.0, f"{word!r} scored 0" def test_unknown_word_returns_none(self): engine = _make_engine() @@ -126,13 +119,6 @@ def test_similarity_unknown_word_returns_none(self): assert engine.similarity("inconnu", "chat") is None assert engine.similarity("chat", "inconnu") is None - def test_score_at_exactly_top_n_returns_zero(self): - """At rank == top_n the log formula evaluates to exactly 0.0.""" - engine = _make_engine() - engine._top_n = 2 # maison has rank 2; rank == top_n - score = engine.score_guess("maison", "chat") - assert score == pytest.approx(0.0) - def test_similarity_is_symmetric(self): engine = _make_engine() assert engine.similarity("chat", "chien") == pytest.approx( From 8fe9a36a8e25370dc06a6cddbc003b7b5d85da07 Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 10:19:11 +0000 Subject: [PATCH 7/8] feat(engine): rescale scores so rank 1 = 99%, 100% reserved for exact match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cache _max_score = 1 - log(2)/log(V+1) at load() time and rescale: score = score_raw * 0.99 / _max_score This maps rank 1 exactly to 0.99 (99%) while preserving the logarithmic gradient. 1.0 (100%) remains exclusive to exact matches. Score distribution (frWac ~150 000 words): rank 1 → 99 % (closest neighbour) rank 10 → 85 % rank 100 → 65 % rank 1 000 → 44 % rank 10 000 → 24 % rank 149 999 → 0.003 % (always > 0) Update tests: replace absolute 0.5 thresholds with relative comparisons (unrelated < close), add _max_score=None to _make_engine() helper. --- game/engine.py | 22 +++++++++++++--------- tests/test_engine.py | 25 ++++++++++++++----------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/game/engine.py b/game/engine.py index a1c20d5..fff0b9a 100644 --- a/game/engine.py +++ b/game/engine.py @@ -32,6 +32,7 @@ def __init__( self._model: KeyedVectors | None = None self._cleaned_key_map: dict[str, str] = {} self._vocab_size: int | None = None + self._max_score: float | None = None # ------------------------------------------------------------------ # Model management @@ -48,6 +49,7 @@ def load(self) -> None: ) self._cleaned_key_map = build_cleaned_key_map(self._model.key_to_index) self._vocab_size = len(self._model.key_to_index) + self._max_score = 1.0 - math.log(2) / math.log(self._vocab_size + 1) @property def is_loaded(self) -> bool: @@ -84,21 +86,21 @@ def score_guess(self, guess: str, target: str) -> float | None: """Score a player's guess against the target word. Returns ``1.0`` for an exact (cleaned) match, or a **logarithmic rank - score** in ``(0, 1)`` for a non-exact in-vocabulary guess. Returns - ``None`` when either word is missing from the vocabulary. + score** rescaled to ``(0, 0.99]`` for a non-exact in-vocabulary guess. + Returns ``None`` when either word is missing from the vocabulary. - The score formula is ``1 − log(rank+1) / log(V+1)`` where *rank* is - 1-based (1 = closest neighbour) and *V* is the full vocabulary size. - Because rank ≤ V−1 < V for any in-vocabulary word, every valid guess - returns a strictly positive score. The closest synonym scores ≈94 %; - rank 10 000 ≈23 %; the furthest possible word scores ≈0.003 %. + The raw formula ``1 − log(rank+1) / log(V+1)`` is rescaled so that + rank 1 (the closest vocabulary neighbour) maps to exactly ``0.99`` + and the furthest possible word maps to near ``0``. ``1.0`` is + reserved exclusively for exact matches. The closest synonym scores + ``0.99`` (99 %); rank 10 000 ≈ 24 %; the furthest word ≈ 0.003 %. Args: guess: The word submitted by the player. target: The secret target word. Returns: - A float in ``(0, 1]``, or ``None`` if either word is OOV. + A float in ``(0, 0.99]``, or ``None`` if either word is OOV. """ clean_guess = clean_word(guess) clean_target = clean_word(target) @@ -112,7 +114,9 @@ def score_guess(self, guess: str, target: str) -> float | None: return None rank = self._model.rank(key_target, key_guess) vocab_size = self._vocab_size or len(self._model.key_to_index) - return max(0.0, 1.0 - math.log(rank + 1) / math.log(vocab_size + 1)) + max_score = self._max_score or (1.0 - math.log(2) / math.log(vocab_size + 1)) + score_raw = max(0.0, 1.0 - math.log(rank + 1) / math.log(vocab_size + 1)) + return score_raw * 0.99 / max_score class GameEngine: diff --git a/tests/test_engine.py b/tests/test_engine.py index 0741cb1..7249fc1 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -33,6 +33,7 @@ def _make_engine() -> SemanticEngine: engine._model = kv engine._cleaned_key_map = {w: w for w in words} engine._vocab_size = len(kv.key_to_index) # 4 + engine._max_score = None # resolved lazily in score_guess return engine @@ -77,9 +78,11 @@ def test_similar_words_return_high_score(self): def test_unrelated_words_return_low_score(self): engine = _make_engine() - score = engine.score_guess("maison", "chat") - assert score is not None - assert score < 0.5 + score_close = engine.score_guess("chien", "chat") + score_unrelated = engine.score_guess("maison", "chat") + assert score_close is not None + assert score_unrelated is not None + assert score_unrelated < score_close def test_score_is_between_zero_and_one(self): engine = _make_engine() @@ -89,15 +92,14 @@ def test_score_is_between_zero_and_one(self): assert 0.0 <= score <= 1.0 def test_score_is_log_rank(self): - """score_guess returns a logarithmic rank score, not raw cosine similarity. + """score_guess returns a logarithmic rank score rescaled so rank 1 = 0.99. - With vocab_size=4 and the log formula, chien (rank 1) scores - 1 - log(2)/log(5) ≈ 0.57 and maison (rank 2) scores - 1 - log(3)/log(5) ≈ 0.32, so chien must outrank maison. + With vocab_size=4, chien (rank 1) scores exactly 0.99 and maison + (rank 2) scores less, so chien must outrank maison. """ engine = _make_engine() - score_chien = engine.score_guess("chien", "chat") # rank 1 → ~0.57 - score_maison = engine.score_guess("maison", "chat") # rank 2 → ~0.32 + score_chien = engine.score_guess("chien", "chat") # rank 1 → 0.99 + score_maison = engine.score_guess("maison", "chat") # rank 2 → lower assert score_chien is not None assert score_maison is not None assert score_chien > score_maison @@ -151,8 +153,9 @@ def test_exact_match_returns_one(self): def test_unrelated_word_returns_low_score(self): ge = GameEngine("chat", semantic_engine=_make_engine()) - score = ge.score_guess("maison") - assert score < 0.5 + score_close = ge.score_guess("chien") + score_unrelated = ge.score_guess("maison") + assert score_unrelated < score_close def test_score_is_between_zero_and_one(self): ge = GameEngine("chat", semantic_engine=_make_engine()) From 296d3a5d4fdf7c035c9383ea5e459f0235bd9c18 Mon Sep 17 00:00:00 2001 From: Florent Poinsaut Date: Tue, 28 Apr 2026 12:03:08 +0000 Subject: [PATCH 8/8] feat(engine): use offset-log formula for gapless 1-99% scoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace rescaled log formula with formula E (offset k=9) recommended by NLP/Data agent after analysis of cemantix.certitudes.org approach: score = 0.99 * log((V+9) / (rank+9)) / log((V+9) / 10) Mathematical guarantees (V = 150 000, frWac): - rank 1 → exactly 99% (100% reserved for exact match) - step rank 1→2 = 0.98 pp ≤ 1 pp → no integer % gaps (1–99 all reachable) - score > 0 for every in-vocabulary word - strictly monotone decreasing Score distribution: rank 1 → 99 % rank 2 → 98 % rank 3 → 97 % rank 10 → 92 % rank 100 → 74 % rank 1 000 → 51 % rank 10 000 → 27 % rank 149 999 → 0.0001 % Remove _max_score attr (no longer needed). Update test docstring. --- game/engine.py | 27 ++++++++++++++++----------- tests/test_engine.py | 5 ++--- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/game/engine.py b/game/engine.py index fff0b9a..bdfbf0b 100644 --- a/game/engine.py +++ b/game/engine.py @@ -32,7 +32,6 @@ def __init__( self._model: KeyedVectors | None = None self._cleaned_key_map: dict[str, str] = {} self._vocab_size: int | None = None - self._max_score: float | None = None # ------------------------------------------------------------------ # Model management @@ -49,7 +48,6 @@ def load(self) -> None: ) self._cleaned_key_map = build_cleaned_key_map(self._model.key_to_index) self._vocab_size = len(self._model.key_to_index) - self._max_score = 1.0 - math.log(2) / math.log(self._vocab_size + 1) @property def is_loaded(self) -> bool: @@ -86,14 +84,23 @@ def score_guess(self, guess: str, target: str) -> float | None: """Score a player's guess against the target word. Returns ``1.0`` for an exact (cleaned) match, or a **logarithmic rank - score** rescaled to ``(0, 0.99]`` for a non-exact in-vocabulary guess. + score** in ``(0, 0.99]`` for a non-exact in-vocabulary guess. Returns ``None`` when either word is missing from the vocabulary. - The raw formula ``1 − log(rank+1) / log(V+1)`` is rescaled so that - rank 1 (the closest vocabulary neighbour) maps to exactly ``0.99`` - and the furthest possible word maps to near ``0``. ``1.0`` is - reserved exclusively for exact matches. The closest synonym scores - ``0.99`` (99 %); rank 10 000 ≈ 24 %; the furthest word ≈ 0.003 %. + Formula: ``0.99 * log((V+9) / (rank+9)) / log((V+9) / 10)`` where V + is the vocabulary size. The offset of 9 ensures the step from rank 1 + to rank 2 is ≤ 1 percentage point (no integer % gaps) for any + V ≥ 123 000. ``1.0`` is reserved exclusively for exact matches. + + Score distribution (frWac, V ≈ 150 000): + rank 1 → 99 % + rank 2 → 98 % + rank 3 → 97 % + rank 10 → 92 % + rank 100 → 74 % + rank 1 000 → 51 % + rank 10 000 → 27 % + rank 149 999 → 0.0001 % (always > 0) Args: guess: The word submitted by the player. @@ -114,9 +121,7 @@ def score_guess(self, guess: str, target: str) -> float | None: return None rank = self._model.rank(key_target, key_guess) vocab_size = self._vocab_size or len(self._model.key_to_index) - max_score = self._max_score or (1.0 - math.log(2) / math.log(vocab_size + 1)) - score_raw = max(0.0, 1.0 - math.log(rank + 1) / math.log(vocab_size + 1)) - return score_raw * 0.99 / max_score + return 0.99 * math.log((vocab_size + 9) / (rank + 9)) / math.log((vocab_size + 9) / 10) class GameEngine: diff --git a/tests/test_engine.py b/tests/test_engine.py index 7249fc1..4a972a8 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -33,7 +33,6 @@ def _make_engine() -> SemanticEngine: engine._model = kv engine._cleaned_key_map = {w: w for w in words} engine._vocab_size = len(kv.key_to_index) # 4 - engine._max_score = None # resolved lazily in score_guess return engine @@ -92,10 +91,10 @@ def test_score_is_between_zero_and_one(self): assert 0.0 <= score <= 1.0 def test_score_is_log_rank(self): - """score_guess returns a logarithmic rank score rescaled so rank 1 = 0.99. + """score_guess uses formula E: 0.99*log((V+9)/(r+9))/log((V+9)/10). With vocab_size=4, chien (rank 1) scores exactly 0.99 and maison - (rank 2) scores less, so chien must outrank maison. + (rank 2) scores less, so chien must strictly outrank maison. """ engine = _make_engine() score_chien = engine.score_guess("chien", "chat") # rank 1 → 0.99