diff --git a/code_review_graph/daemon.py b/code_review_graph/daemon.py index cdbdd543..65d1c7e4 100644 --- a/code_review_graph/daemon.py +++ b/code_review_graph/daemon.py @@ -164,6 +164,40 @@ def load_config(path: Path | None = None) -> DaemonConfig: # --------------------------------------------------------------------------- +# TOML basic strings have named escapes for these; everything else in +# the control range must use the \uXXXX form. +_TOML_SHORT_ESCAPES = { + "\\": "\\\\", + '"': '\\"', + "\b": "\\b", + "\t": "\\t", + "\n": "\\n", + "\f": "\\f", + "\r": "\\r", +} + + +def _toml_str(value: object) -> str: + """Render *value* as a TOML basic string. + + Backslashes and double quotes are escape characters in TOML basic + strings, so Windows paths like ``C:\\Users\\x`` must be escaped or + the file fails to parse on the next load. Control characters + (U+0000-U+001F, U+007F) are forbidden unescaped by the TOML spec, + so they are escaped too — ``tomllib`` rejects the file otherwise. + """ + chars: list[str] = [] + for ch in str(value): + esc = _TOML_SHORT_ESCAPES.get(ch) + if esc is not None: + chars.append(esc) + elif ord(ch) < 0x20 or ord(ch) == 0x7F: + chars.append(f"\\u{ord(ch):04X}") + else: + chars.append(ch) + return '"' + "".join(chars) + '"' + + def _serialize_toml(config: DaemonConfig) -> str: """Serialize a :class:`DaemonConfig` to TOML text. @@ -171,15 +205,15 @@ def _serialize_toml(config: DaemonConfig) -> str: """ lines: list[str] = [ "[daemon]", - f'session_name = "{config.session_name}"', - f'log_dir = "{config.log_dir}"', + f"session_name = {_toml_str(config.session_name)}", + f"log_dir = {_toml_str(config.log_dir)}", f"poll_interval = {config.poll_interval}", ] for repo in config.repos: lines.append("") lines.append("[[repos]]") - lines.append(f'path = "{repo.path}"') - lines.append(f'alias = "{repo.alias}"') + lines.append(f"path = {_toml_str(repo.path)}") + lines.append(f"alias = {_toml_str(repo.alias)}") lines.append("") # trailing newline return "\n".join(lines) diff --git a/tests/test_daemon.py b/tests/test_daemon.py index 61f5139b..bc524f94 100644 --- a/tests/test_daemon.py +++ b/tests/test_daemon.py @@ -13,6 +13,7 @@ DaemonConfig, WatchDaemon, WatchRepo, + _serialize_toml, add_repo_to_config, clear_pid, is_daemon_running, @@ -41,18 +42,20 @@ def sample_config_file(tmp_path): (repo_b / ".git").mkdir() config = tmp_path / "watch.toml" + # as_posix() keeps hand-written TOML valid on Windows, where native + # backslash paths are invalid basic-string escapes. config.write_text( f"[daemon]\n" f'session_name = "test-session"\n' - f'log_dir = "{tmp_path / "logs"}"\n' + f'log_dir = "{(tmp_path / "logs").as_posix()}"\n' f"poll_interval = 5\n" f"\n" f"[[repos]]\n" - f'path = "{repo_a}"\n' + f'path = "{repo_a.as_posix()}"\n' f'alias = "alpha"\n' f"\n" f"[[repos]]\n" - f'path = "{repo_b}"\n' + f'path = "{repo_b.as_posix()}"\n' f'alias = "beta"\n', encoding="utf-8", ) @@ -97,7 +100,7 @@ def test_load_config_missing_alias(self, tmp_path): config_file = tmp_path / "watch.toml" config_file.write_text( - f'[[repos]]\npath = "{repo}"\n', + f'[[repos]]\npath = "{repo.as_posix()}"\n', encoding="utf-8", ) cfg = load_config(config_file) @@ -126,8 +129,8 @@ def test_load_config_duplicate_alias(self, tmp_path): config_file = tmp_path / "watch.toml" config_file.write_text( - f'[[repos]]\npath = "{repo_a}"\nalias = "dup"\n\n' - f'[[repos]]\npath = "{repo_b}"\nalias = "dup"\n', + f'[[repos]]\npath = "{repo_a.as_posix()}"\nalias = "dup"\n\n' + f'[[repos]]\npath = "{repo_b.as_posix()}"\nalias = "dup"\n', encoding="utf-8", ) cfg = load_config(config_file) @@ -141,7 +144,7 @@ def test_load_config_no_git_dir(self, tmp_path): config_file = tmp_path / "watch.toml" config_file.write_text( - f'[[repos]]\npath = "{bare}"\nalias = "bare"\n', + f'[[repos]]\npath = "{bare.as_posix()}"\nalias = "bare"\n', encoding="utf-8", ) cfg = load_config(config_file) @@ -170,6 +173,40 @@ def test_serialize_roundtrip(self, tmp_path): assert loaded.repos[0].alias == "rt" assert loaded.repos[0].path == str(repo.resolve()) + def test_serialize_toml_escapes_backslashes_and_quotes(self): + """Windows paths and quotes must survive serialize -> parse (WinError on load otherwise).""" + from code_review_graph.daemon import tomllib + + config = DaemonConfig( + session_name='quo"ted', + log_dir=Path(r"C:\Users\example\logs"), + poll_interval=2, + repos=[WatchRepo(path=r"C:\Users\example\repo", alias="win")], + ) + parsed = tomllib.loads(_serialize_toml(config)) + assert parsed["daemon"]["session_name"] == 'quo"ted' + assert parsed["daemon"]["log_dir"] == str(Path(r"C:\Users\example\logs")) + assert parsed["repos"][0]["path"] == r"C:\Users\example\repo" + + def test_serialize_toml_escapes_control_characters(self): + """Newlines, tabs, and other control chars must survive serialize -> parse. + + TOML forbids unescaped control characters in basic strings, so a + pathological alias or session name must not corrupt the config file. + """ + from code_review_graph.daemon import tomllib + + weird = "line1\nline2\ttabbed\x01ctrl\x7fdel" + config = DaemonConfig( + session_name=weird, + log_dir=Path("logs"), + poll_interval=2, + repos=[WatchRepo(path="repo", alias="a\rb")], + ) + parsed = tomllib.loads(_serialize_toml(config)) + assert parsed["daemon"]["session_name"] == weird + assert parsed["repos"][0]["alias"] == "a\rb" + def test_add_repo_to_config(self, tmp_path): """add_repo_to_config adds a repo and saves.""" repo = tmp_path / "new-repo"