From d13b071842b9d4c6dbb82d773de8c5e07812f31c Mon Sep 17 00:00:00 2001 From: yu-med Date: Thu, 7 May 2026 05:42:33 +0800 Subject: [PATCH 1/4] refactor: centralize slugify in utils (#30) Add utils.slugify.slugify (ASCII-safe, matches former export_api). Remove duplicate implementations from api/export_api.py and scripts/export.py; add regression tests. Closes #30 --- api/export_api.py | 14 ++++---------- scripts/export.py | 19 ++++--------------- tests/test_slugify.py | 30 ++++++++++++++++++++++++++++++ utils/slugify.py | 18 ++++++++++++++++++ 4 files changed, 56 insertions(+), 25 deletions(-) create mode 100644 tests/test_slugify.py create mode 100644 utils/slugify.py diff --git a/api/export_api.py b/api/export_api.py index 700e348..2462434 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -14,6 +14,7 @@ from utils.md_exporter import session_to_markdown from utils.json_exporter import session_to_json from utils.exclusion_rules import is_session_excluded +from utils.slugify import slugify export_bp = Blueprint("export", __name__) @@ -49,13 +50,6 @@ def get_export_state(): }) -def _slugify(text: str) -> str: - import re - text = text.lower() - text = re.sub(r"[^a-z0-9]+", "-", text) - return text.strip("-") - - @export_bp.route("/api/export", methods=["POST"]) def bulk_export(): body = request.get_json(silent=True) or {} @@ -97,9 +91,9 @@ def bulk_export(): stats = compute_stats(session) md = session_to_markdown(session, stats) - title_slug = _slugify(session["title"]) or "session" + title_slug = slugify(session["title"]) or "session" short_id = sid[:8] - proj_slug = _slugify(project["name"]) + proj_slug = slugify(project["name"]) ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" rel_path = f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" @@ -155,7 +149,7 @@ def export_session(project_name, session_id): if is_session_excluded(rules, session, project_name): return jsonify({"error": "Session not found"}), 404 stats = compute_stats(session) - title_slug = _slugify(session["title"]) or "session" + title_slug = slugify(session["title"]) or "session" if fmt == "json": content = session_to_json(session, stats) diff --git a/scripts/export.py b/scripts/export.py index e8f93cb..21905b2 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -33,6 +33,7 @@ load_rules, is_session_excluded, ) +from utils.slugify import slugify STATE_DIR = os.path.join(os.path.expanduser("~"), ".claude-code-chat-browser") @@ -366,9 +367,9 @@ def cmd_export(args): meta["first_timestamp"] = ts date_str = ts[:10] ts_file = ts[:19].replace(":", "-") # 2026-02-10T01-46-15 - title_slug = _slugify(session["title"]) + title_slug = slugify(session["title"]) short_id = sid[:8] - project_slug = _slugify(project["name"]) + project_slug = slugify(project["name"]) if fmt in ("md", "both"): md = session_to_markdown(session, stats) @@ -444,7 +445,7 @@ def cmd_export(args): def _export_single(session: dict, stats: dict, fmt: str, out_dir: str): """Write one session to disk as md, json, or both.""" - title_slug = _slugify(session["title"]) + title_slug = slugify(session["title"]) short_id = session["session_id"][:8] ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" @@ -609,18 +610,6 @@ def _save_state(sessions: dict, count: int, out_dir: str): json.dump(state, f, indent=2) -def _slugify(text: str) -> str: - slug = "" - for c in text.lower(): - if c.isalnum(): - slug += c - elif c in " -_/.": - slug += "-" - while "--" in slug: - slug = slug.replace("--", "-") - return slug.strip("-") - - def _die(msg: str): print(f"Error: {msg}", file=sys.stderr) sys.exit(1) diff --git a/tests/test_slugify.py b/tests/test_slugify.py new file mode 100644 index 0000000..b75387b --- /dev/null +++ b/tests/test_slugify.py @@ -0,0 +1,30 @@ +"""Regression tests for utils.slugify (Issue #30 / CCC8). + +Historically ``scripts/export.py`` used ``isalnum()`` (Unicode letters preserved) +while ``api/export_api.py`` used ASCII-only ``[^a-z0-9]+``. The canonical +implementation matches the API for portable zip / download filenames. +""" + +from utils.slugify import slugify + + +def test_ascii_words_hyphenated(): + assert slugify("Hello World") == "hello-world" + + +def test_punctuation_collapses_to_single_hyphen(): + assert slugify("foo__bar") == "foo-bar" + assert slugify("a.b.c") == "a-b-c" + + +def test_unicode_letters_become_ascii_safe(): + """Old CLI kept Latin-1 letters (e.g. é); canonical slug strips to ASCII.""" + assert slugify("Café noir") == "caf-noir" + + +def test_empty_after_strip(): + assert slugify("!!!") == "" + + +def test_digits_preserved(): + assert slugify("Issue 42 Fix") == "issue-42-fix" diff --git a/utils/slugify.py b/utils/slugify.py new file mode 100644 index 0000000..eb3478e --- /dev/null +++ b/utils/slugify.py @@ -0,0 +1,18 @@ +"""Filesystem- and URL-safe slugs for export paths and download names. + +Uses ASCII letters and digits only; other characters (including Unicode +letters and punctuation) become hyphen runs, then trimmed. Matches the +historical behavior of ``api/export_api.py`` and avoids platform-specific +issues with non-ASCII paths inside zip archives. +""" + +from __future__ import annotations + +import re + + +def slugify(text: str) -> str: + """Lowercase *text* and replace each run of non-[a-z0-9] with a single hyphen.""" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") From c5db2b5e8e3154d046e344306d2e8130f8454456 Mon Sep 17 00:00:00 2001 From: yu-med Date: Thu, 7 May 2026 05:49:30 +0800 Subject: [PATCH 2/4] fix(export): default empty slugify slugs to session/project (#30) --- api/export_api.py | 2 +- scripts/export.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/export_api.py b/api/export_api.py index 2462434..18dcdf0 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -93,7 +93,7 @@ def bulk_export(): md = session_to_markdown(session, stats) title_slug = slugify(session["title"]) or "session" short_id = sid[:8] - proj_slug = slugify(project["name"]) + proj_slug = slugify(project["name"]) or "project" ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" rel_path = f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" diff --git a/scripts/export.py b/scripts/export.py index 21905b2..3f08f2d 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -367,9 +367,9 @@ def cmd_export(args): meta["first_timestamp"] = ts date_str = ts[:10] ts_file = ts[:19].replace(":", "-") # 2026-02-10T01-46-15 - title_slug = slugify(session["title"]) short_id = sid[:8] - project_slug = slugify(project["name"]) + title_slug = slugify(session["title"]) or "session" + project_slug = slugify(project["name"]) or "project" if fmt in ("md", "both"): md = session_to_markdown(session, stats) @@ -445,8 +445,8 @@ def cmd_export(args): def _export_single(session: dict, stats: dict, fmt: str, out_dir: str): """Write one session to disk as md, json, or both.""" - title_slug = slugify(session["title"]) short_id = session["session_id"][:8] + title_slug = slugify(session["title"]) or "session" ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" From f94bde7c9e6cb4617eb4675f873443b92402a07f Mon Sep 17 00:00:00 2001 From: yu-med Date: Thu, 7 May 2026 11:02:43 +0800 Subject: [PATCH 3/4] refactor(slugify): default= kwarg, caller parity, review feedback (#30) --- api/export_api.py | 6 +++--- scripts/export.py | 6 +++--- tests/test_slugify.py | 30 ++++++++++++++++++++++++++++++ utils/slugify.py | 17 ++++++++++++++--- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/api/export_api.py b/api/export_api.py index 18dcdf0..475ba96 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -91,9 +91,9 @@ def bulk_export(): stats = compute_stats(session) md = session_to_markdown(session, stats) - title_slug = slugify(session["title"]) or "session" + title_slug = slugify(session["title"], default="session") short_id = sid[:8] - proj_slug = slugify(project["name"]) or "project" + proj_slug = slugify(project["name"], default="project") ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" rel_path = f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" @@ -149,7 +149,7 @@ def export_session(project_name, session_id): if is_session_excluded(rules, session, project_name): return jsonify({"error": "Session not found"}), 404 stats = compute_stats(session) - title_slug = slugify(session["title"]) or "session" + title_slug = slugify(session["title"], default="session") if fmt == "json": content = session_to_json(session, stats) diff --git a/scripts/export.py b/scripts/export.py index 3f08f2d..60969f8 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -367,9 +367,9 @@ def cmd_export(args): meta["first_timestamp"] = ts date_str = ts[:10] ts_file = ts[:19].replace(":", "-") # 2026-02-10T01-46-15 + title_slug = slugify(session["title"], default="session") short_id = sid[:8] - title_slug = slugify(session["title"]) or "session" - project_slug = slugify(project["name"]) or "project" + project_slug = slugify(project["name"], default="project") if fmt in ("md", "both"): md = session_to_markdown(session, stats) @@ -445,8 +445,8 @@ def cmd_export(args): def _export_single(session: dict, stats: dict, fmt: str, out_dir: str): """Write one session to disk as md, json, or both.""" + title_slug = slugify(session["title"], default="session") short_id = session["session_id"][:8] - title_slug = slugify(session["title"]) or "session" ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" diff --git a/tests/test_slugify.py b/tests/test_slugify.py index b75387b..78c71c8 100644 --- a/tests/test_slugify.py +++ b/tests/test_slugify.py @@ -5,6 +5,8 @@ implementation matches the API for portable zip / download filenames. """ +import os + from utils.slugify import slugify @@ -28,3 +30,31 @@ def test_empty_after_strip(): def test_digits_preserved(): assert slugify("Issue 42 Fix") == "issue-42-fix" + + +def test_punctuation_examples_match_regex_behavior(): + assert slugify("AT&T") == "at-t" + assert slugify("issue#42") == "issue-42" + + +def test_default_used_when_slug_empty(): + assert slugify("!!!", default="session") == "session" + assert slugify("!!!") == "" + + +def test_export_leaf_path_parity_api_zip_vs_cli(): + """Same session inputs → same ``proj_slug``, ``title_slug``, and file leaf as API vs CLI.""" + title = "Issue #42: AT&T" + project = "Foo/Bar!" + sid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + ts_file = "2026-05-07T12-00-00" + short_id = sid[:8] + title_slug = slugify(title, default="session") + proj_slug = slugify(project, default="project") + leaf_md = f"{ts_file}__{title_slug}__{short_id}.md" + api_zip_inner = f"{proj_slug}/{leaf_md}" + date_str = ts_file[:10] + cli_rel = os.path.join(date_str, proj_slug, leaf_md) + assert api_zip_inner.endswith(leaf_md) + assert os.path.basename(cli_rel) == leaf_md + assert cli_rel.replace("\\", "/").endswith(f"{proj_slug}/{leaf_md}") diff --git a/utils/slugify.py b/utils/slugify.py index eb3478e..c1340e8 100644 --- a/utils/slugify.py +++ b/utils/slugify.py @@ -11,8 +11,19 @@ import re -def slugify(text: str) -> str: - """Lowercase *text* and replace each run of non-[a-z0-9] with a single hyphen.""" +def slugify(text: str, *, default: str = "") -> str: + """Lowercase *text* and replace each run of non-[a-z0-9] with one hyphen. + + After stripping leading/trailing hyphens, returns that string; if it is + empty, returns *default*. Export code passes ``default="session"`` or + ``default="project"``. + Examples (handled by the ``[^a-z0-9]+`` substitution below): + + - ``AT&T`` → ``at-t`` + - ``issue#42`` → ``issue-42`` + """ text = text.lower() + # Non-ASCII-alphanumeric runs → '-'; e.g. AT&T → at-t, issue#42 → issue-42. text = re.sub(r"[^a-z0-9]+", "-", text) - return text.strip("-") + stripped = text.strip("-") + return stripped if stripped else default From fd88ba4127e260c7eeb449a965e1df2c7b67c9d2 Mon Sep 17 00:00:00 2001 From: yu-med Date: Thu, 7 May 2026 23:47:32 +0800 Subject: [PATCH 4/4] removed unused import --- utils/slugify.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/slugify.py b/utils/slugify.py index c1340e8..fbc5301 100644 --- a/utils/slugify.py +++ b/utils/slugify.py @@ -6,8 +6,6 @@ issues with non-ASCII paths inside zip archives. """ -from __future__ import annotations - import re