Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions code_review_graph/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,22 +164,57 @@ def load_config(path: Path | None = None) -> DaemonConfig:
# ---------------------------------------------------------------------------


def _toml_escape_string(value: str) -> str:
"""Escape *value* so it can be embedded in a TOML basic (double-quoted) string.

TOML basic strings interpret backslash as the start of an escape sequence,
so a raw Windows path like ``C:\\Users\\jimmy`` would otherwise be written
verbatim and later misread: ``\\U`` is parsed as an 8-hex-digit Unicode
escape (``\\u`` is the 4-digit form), ``\\n``/``\\t`` as control chars, etc.
Backslash and double-quote must therefore be doubled; the remaining control
characters are emitted with their standard TOML escapes.

Note: ``str.translate`` cannot map a single character to a multi-character
string (such mappings are silently ignored), so replacements are done
explicitly with ``str.replace``.
"""
# Backslash first: every replacement below adds backslashes, so doubling
# them up front prevents double-escaping of our own inserted backslashes.
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
# Emit control characters with their standard TOML escapes.
control_escapes = {
"\b": "\\b",
"\t": "\\t",
"\n": "\\n",
"\f": "\\f",
"\r": "\\r",
}
for char, esc in control_escapes.items():
escaped = escaped.replace(char, esc)
# Any remaining control character (< 0x20) as \uXXXX to keep the file valid.
return "".join(
ch if ord(ch) >= 0x20 else f"\\u{ord(ch):04x}" for ch in escaped
)


def _serialize_toml(config: DaemonConfig) -> str:
"""Serialize a :class:`DaemonConfig` to TOML text.

``tomllib`` is read-only, so we build the TOML manually.
``tomllib`` is read-only, so we build the TOML manually. String values are
escaped via :func:`_toml_escape_string` so backslashes (e.g. Windows paths)
survive a save/load round-trip.
"""
lines: list[str] = [
"[daemon]",
f'session_name = "{config.session_name}"',
f'log_dir = "{config.log_dir}"',
f'session_name = "{_toml_escape_string(config.session_name)}"',
f'log_dir = "{_toml_escape_string(str(config.log_dir))}"',
f"poll_interval = {config.poll_interval}",
]
for repo in config.repos:
lines.append("")
lines.append("[[repos]]")
lines.append(f'path = "{repo.path}"')
lines.append(f'alias = "{repo.alias}"')
lines.append(f'path = "{_toml_escape_string(repo.path)}"')
lines.append(f'alias = "{_toml_escape_string(repo.alias)}"')
lines.append("") # trailing newline
return "\n".join(lines)

Expand Down
92 changes: 71 additions & 21 deletions tests/test_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,21 @@ def sample_config_file(tmp_path):
repo_b.mkdir()
(repo_b / ".git").mkdir()

config = tmp_path / "watch.toml"
config.write_text(
f"[daemon]\n"
f'session_name = "test-session"\n'
f'log_dir = "{tmp_path / "logs"}"\n'
f"poll_interval = 5\n"
f"\n"
f"[[repos]]\n"
f'path = "{repo_a}"\n'
f'alias = "alpha"\n'
f"\n"
f"[[repos]]\n"
f'path = "{repo_b}"\n'
f'alias = "beta"\n',
encoding="utf-8",
config = DaemonConfig(
session_name="test-session",
log_dir=tmp_path / "logs",
poll_interval=5,
repos=[
WatchRepo(path=str(repo_a.resolve()), alias="alpha"),
WatchRepo(path=str(repo_b.resolve()), alias="beta"),
],
)
return config
config_file = tmp_path / "watch.toml"
# Persist via save_config so paths are escaped correctly on every platform
# (hand-writing a TOML string with a Windows tmp_path would produce an
# invalid "\U" escape sequence).
save_config(config, config_file)
return config_file


@pytest.fixture()
Expand Down Expand Up @@ -96,8 +94,10 @@ def test_load_config_missing_alias(self, tmp_path):
(repo / ".git").mkdir()

config_file = tmp_path / "watch.toml"
# Use a TOML literal string (single quotes) for the path so backslashes
# in a Windows tmp_path are treated as literal characters, not escapes.
config_file.write_text(
f'[[repos]]\npath = "{repo}"\n',
f"[[repos]]\npath = '{repo}'\n",
encoding="utf-8",
)
cfg = load_config(config_file)
Expand All @@ -108,7 +108,7 @@ def test_load_config_invalid_path(self, tmp_path):
"""Bad repo path is skipped with a warning."""
config_file = tmp_path / "watch.toml"
config_file.write_text(
'[[repos]]\npath = "/no/such/directory/ever"\nalias = "gone"\n',
"[[repos]]\npath = '/no/such/directory/ever'\nalias = 'gone'\n",
encoding="utf-8",
)
cfg = load_config(config_file)
Expand All @@ -126,8 +126,8 @@ def test_load_config_duplicate_alias(self, tmp_path):

config_file = tmp_path / "watch.toml"
config_file.write_text(
f'[[repos]]\npath = "{repo_a}"\nalias = "dup"\n\n'
f'[[repos]]\npath = "{repo_b}"\nalias = "dup"\n',
f"[[repos]]\npath = '{repo_a}'\nalias = 'dup'\n\n"
f"[[repos]]\npath = '{repo_b}'\nalias = 'dup'\n",
encoding="utf-8",
)
cfg = load_config(config_file)
Expand All @@ -141,7 +141,7 @@ def test_load_config_no_git_dir(self, tmp_path):

config_file = tmp_path / "watch.toml"
config_file.write_text(
f'[[repos]]\npath = "{bare}"\nalias = "bare"\n',
f"[[repos]]\npath = '{bare}'\nalias = 'bare'\n",
encoding="utf-8",
)
cfg = load_config(config_file)
Expand Down Expand Up @@ -170,6 +170,56 @@ def test_serialize_roundtrip(self, tmp_path):
assert loaded.repos[0].alias == "rt"
assert loaded.repos[0].path == str(repo.resolve())

def test_serialize_roundtrip_backslash_paths(self, tmp_path):
"""Windows-style backslash paths survive a save/load round-trip.

Regression: _serialize_toml wrote raw backslashes into a basic
(double-quoted) TOML string, so a path like ``C:\\Users`` produced
the escape sequence ``\\U`` which tomllib parsed as an 8-hex-digit
Unicode escape and raised ``Invalid hex value`` on the next load.
"""
import tomllib

backslash_path = r"C:\Users\jimmy\project"
backslash_log_dir = r"C:\Users\jimmy\.code-review-graph\logs"

original = DaemonConfig(
session_name="win-session",
log_dir=Path(backslash_log_dir),
poll_interval=3,
repos=[WatchRepo(path=backslash_path, alias="win")],
)
config_file = tmp_path / "backslash.toml"
save_config(original, config_file)

# The persisted file must be valid TOML. tomllib.load would previously
# raise TOMLDecodeError: Invalid hex value (at line 3, column 16).
with open(config_file, "rb") as fh:
raw = tomllib.load(fh)
assert raw["daemon"]["log_dir"] == backslash_log_dir
assert raw["repos"][0]["path"] == backslash_path
assert raw["repos"][0]["alias"] == "win"

def test_serialize_escapes_special_chars(self, tmp_path):
"""Quotes and backslashes in string values are escaped on write."""
import tomllib

original = DaemonConfig(
session_name='he said "hi"',
log_dir=Path(r"C:\path with \backslash"),
poll_interval=1,
repos=[WatchRepo(path=r"D:\repo\name", alias='alias"quote')],
)
config_file = tmp_path / "special.toml"
save_config(original, config_file)

with open(config_file, "rb") as fh:
raw = tomllib.load(fh)
assert raw["daemon"]["session_name"] == 'he said "hi"'
assert raw["daemon"]["log_dir"] == r"C:\path with \backslash"
assert raw["repos"][0]["path"] == r"D:\repo\name"
assert raw["repos"][0]["alias"] == 'alias"quote'

def test_add_repo_to_config(self, tmp_path):
"""add_repo_to_config adds a repo and saves."""
repo = tmp_path / "new-repo"
Expand Down