From 06391713c055e6e2d6370676df4aaf258a396244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20G=C3=B6rresen=20Mello?= Date: Wed, 18 Feb 2026 17:31:21 -0300 Subject: [PATCH 1/5] adding support for aleph exports --- alephclient/cli.py | 34 ++++++++++ alephclient/exportdir.py | 72 ++++++++++++++++++++ alephclient/tests/test_export.py | 112 +++++++++++++++++++++++++++++++ 3 files changed, 218 insertions(+) create mode 100644 alephclient/exportdir.py create mode 100644 alephclient/tests/test_export.py diff --git a/alephclient/cli.py b/alephclient/cli.py index b9399b0..cae0564 100644 --- a/alephclient/cli.py +++ b/alephclient/cli.py @@ -8,6 +8,7 @@ from alephclient.errors import AlephException from alephclient.crawldir import crawl_dir from alephclient.fetchdir import fetch_collection, fetch_entity +from alephclient.exportdir import list_exports, format_exports_table, download_export log = logging.getLogger(__name__) @@ -424,5 +425,38 @@ def make_list(ctx, foreign_id, outfile, label, summary): raise click.Abort() +@cli.group() +@click.pass_context +def export(ctx): + """Manage exports.""" + pass + + +@export.command("list") +@click.pass_context +def export_list(ctx): + """List all exports.""" + api = ctx.obj["api"] + try: + exports = list_exports(api) + click.echo(format_exports_table(exports)) + except AlephException as exc: + raise click.ClickException(str(exc)) + + +@export.command("download") +@click.argument("export_id", required=True) +@click.argument("destination", required=True, type=click.Path()) +@click.pass_context +def export_download(ctx, export_id, destination): + """Download an export by ID to a destination path.""" + api = ctx.obj["api"] + try: + path = download_export(api, export_id, destination) + click.echo(f"Export downloaded to {path}") + except AlephException as exc: + raise click.ClickException(str(exc)) + + if __name__ == "__main__": cli() diff --git a/alephclient/exportdir.py b/alephclient/exportdir.py new file mode 100644 index 0000000..7682246 --- /dev/null +++ b/alephclient/exportdir.py @@ -0,0 +1,72 @@ +from pathlib import Path +from typing import List, Dict + +from alephclient.api import AlephAPI, APIResultSet +from alephclient.errors import AlephException + + +def list_exports(api: AlephAPI) -> List[Dict]: + """Fetch all exports from the API, handling pagination.""" + url = api._make_url("exports") + return list(APIResultSet(api, url)) + + +def format_exports_table(exports: List[Dict]) -> str: + """Format a list of exports as a plain-text table.""" + if not exports: + return "No exports found." + + headers = ["ID", "Label", "Status", "Created At", "Content Hash"] + keys = ["id", "label", "status", "created_at", "content_hash"] + + rows = [] + for export in exports: + rows.append([str(export.get(k, "")) for k in keys]) + + col_widths = [len(h) for h in headers] + for row in rows: + for i, val in enumerate(row): + col_widths[i] = max(col_widths[i], len(val)) + + def format_row(values): + return " ".join(v.ljust(col_widths[i]) for i, v in enumerate(values)) + + lines = [format_row(headers), format_row(["-" * w for w in col_widths])] + for row in rows: + lines.append(format_row(row)) + return "\n".join(lines) + + +def _get_export(api: AlephAPI, export_id: str) -> Dict: + """Fetch a single export by ID from the exports list.""" + for export in list_exports(api): + if str(export.get("id")) == str(export_id): + return export + raise AlephException(ValueError(f"Export {export_id} not found")) + + +def download_export(api: AlephAPI, export_id: str, destination: str) -> Path: + """Download an export archive to the given destination path.""" + export = _get_export(api, export_id) + download_url = export.get("links", {}).get("download") + if not download_url: + raise AlephException(ValueError(f"No download link for export {export_id}")) + + file_name = export.get("file_name", export_id) + dest = Path(destination) + if dest.is_dir(): + dest = dest / file_name + dest.parent.mkdir(parents=True, exist_ok=True) + + try: + response = api.session.get(download_url, stream=True) + response.raise_for_status() + except Exception as exc: + raise AlephException(exc) from exc + + with open(dest, "wb") as fh: + for chunk in response.iter_content(chunk_size=512 * 1024): + if chunk: + fh.write(chunk) + + return dest diff --git a/alephclient/tests/test_export.py b/alephclient/tests/test_export.py new file mode 100644 index 0000000..fa97347 --- /dev/null +++ b/alephclient/tests/test_export.py @@ -0,0 +1,112 @@ +from unittest.mock import MagicMock + +from alephclient.api import AlephAPI +from alephclient.exportdir import list_exports, format_exports_table, download_export + + +FAKE_EXPORT = { + "id": "123", + "label": "My Export", + "status": "complete", + "file_name": "export.zip", + "links": {"download": "http://aleph.test/api/2/archive?token=abc"}, +} + + +class TestListExports: + fake_url = "http://aleph.test/api/2/" + + def setup_method(self): + self.api = AlephAPI(host=self.fake_url, api_key="fake_key") + + def test_single_page(self, mocker): + exports = [{"id": "1", "label": "Export 1"}] + mocker.patch.object( + self.api, + "_request", + return_value={"results": exports, "next": None}, + ) + result = list_exports(self.api) + assert result == exports + + def test_pagination(self, mocker): + page1 = {"results": [{"id": "1"}], "next": self.fake_url + "exports?page=2"} + page2 = {"results": [{"id": "2"}], "next": None} + mocker.patch.object( + self.api, + "_request", + side_effect=[page1, page2], + ) + result = list_exports(self.api) + assert len(result) == 2 + assert result[0]["id"] == "1" + assert result[1]["id"] == "2" + + def test_empty(self, mocker): + mocker.patch.object( + self.api, + "_request", + return_value={"results": [], "next": None}, + ) + result = list_exports(self.api) + assert result == [] + + +class TestFormatExportsTable: + def test_empty_list(self): + assert format_exports_table([]) == "No exports found." + + def test_with_data(self): + exports = [ + { + "id": "abc", + "label": "My Export", + "status": "completed", + "created_at": "2025-01-01", + "content_hash": "sha1:deadbeef", + } + ] + table = format_exports_table(exports) + lines = table.split("\n") + assert len(lines) == 3 + assert "ID" in lines[0] + assert "Label" in lines[0] + assert "Status" in lines[0] + assert "abc" in lines[2] + assert "My Export" in lines[2] + assert "completed" in lines[2] + + +class TestDownloadExport: + fake_url = "http://aleph.test/api/2/" + + def setup_method(self): + self.api = AlephAPI(host=self.fake_url, api_key="fake_key") + + def _mock_download(self, mocker, content=b"file content"): + mock_response = MagicMock() + mock_response.iter_content.return_value = [content] + mock_response.raise_for_status = MagicMock() + mocker.patch.object( + self.api.session, + "get", + return_value=mock_response, + ) + mocker.patch( + "alephclient.exportdir.list_exports", + return_value=[FAKE_EXPORT], + ) + + def test_download_to_file(self, mocker, tmp_path): + dest = tmp_path / "output.zip" + self._mock_download(mocker) + result = download_export(self.api, "123", str(dest)) + assert result == dest + assert dest.read_bytes() == b"file content" + + def test_download_to_directory(self, mocker, tmp_path): + self._mock_download(mocker, content=b"data") + result = download_export(self.api, "123", str(tmp_path)) + expected = tmp_path / "export.zip" + assert result == expected + assert expected.read_bytes() == b"data" From ceab713cbc18e78dc7116cd88cf916155fb6e747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20G=C3=B6rresen=20Mello?= Date: Wed, 18 Feb 2026 17:37:38 -0300 Subject: [PATCH 2/5] fixing tests --- alephclient/tests/test_export.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/alephclient/tests/test_export.py b/alephclient/tests/test_export.py index fa97347..7e699e9 100644 --- a/alephclient/tests/test_export.py +++ b/alephclient/tests/test_export.py @@ -24,14 +24,14 @@ def test_single_page(self, mocker): mocker.patch.object( self.api, "_request", - return_value={"results": exports, "next": None}, + return_value={"results": exports, "next": None, "offset": 0, "limit": 20}, ) result = list_exports(self.api) assert result == exports def test_pagination(self, mocker): - page1 = {"results": [{"id": "1"}], "next": self.fake_url + "exports?page=2"} - page2 = {"results": [{"id": "2"}], "next": None} + page1 = {"results": [{"id": "1"}], "next": self.fake_url + "exports?page=2", "offset": 0, "limit": 1} + page2 = {"results": [{"id": "2"}], "next": None, "offset": 1, "limit": 1} mocker.patch.object( self.api, "_request", @@ -46,7 +46,7 @@ def test_empty(self, mocker): mocker.patch.object( self.api, "_request", - return_value={"results": [], "next": None}, + return_value={"results": [], "next": None, "offset": 0, "limit": 20}, ) result = list_exports(self.api) assert result == [] From 5236e61294b10e18bd9d592c7ef078838e3aad1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20G=C3=B6rresen=20Mello?= Date: Wed, 18 Feb 2026 17:44:08 -0300 Subject: [PATCH 3/5] fixing formatting --- alephclient/tests/test_export.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/alephclient/tests/test_export.py b/alephclient/tests/test_export.py index 7e699e9..bb98a99 100644 --- a/alephclient/tests/test_export.py +++ b/alephclient/tests/test_export.py @@ -30,8 +30,18 @@ def test_single_page(self, mocker): assert result == exports def test_pagination(self, mocker): - page1 = {"results": [{"id": "1"}], "next": self.fake_url + "exports?page=2", "offset": 0, "limit": 1} - page2 = {"results": [{"id": "2"}], "next": None, "offset": 1, "limit": 1} + page1 = { + "results": [{"id": "1"}], + "next": self.fake_url + "exports?page=2", + "offset": 0, + "limit": 1, + } + page2 = { + "results": [{"id": "2"}], + "next": None, + "offset": 1, + "limit": 1, + } mocker.patch.object( self.api, "_request", From 0fe1d2cdbb65f52af3ad79691cd5d56ca54a41a3 Mon Sep 17 00:00:00 2001 From: Klil Eden Date: Thu, 5 Mar 2026 11:56:21 -0500 Subject: [PATCH 4/5] change name of exportdir to exports --- alephclient/cli.py | 2 +- alephclient/{exportdir.py => exports.py} | 0 alephclient/tests/test_export.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename alephclient/{exportdir.py => exports.py} (100%) diff --git a/alephclient/cli.py b/alephclient/cli.py index cae0564..1ffa19a 100644 --- a/alephclient/cli.py +++ b/alephclient/cli.py @@ -8,7 +8,7 @@ from alephclient.errors import AlephException from alephclient.crawldir import crawl_dir from alephclient.fetchdir import fetch_collection, fetch_entity -from alephclient.exportdir import list_exports, format_exports_table, download_export +from alephclient.exports import list_exports, format_exports_table, download_export log = logging.getLogger(__name__) diff --git a/alephclient/exportdir.py b/alephclient/exports.py similarity index 100% rename from alephclient/exportdir.py rename to alephclient/exports.py diff --git a/alephclient/tests/test_export.py b/alephclient/tests/test_export.py index bb98a99..1cad666 100644 --- a/alephclient/tests/test_export.py +++ b/alephclient/tests/test_export.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock from alephclient.api import AlephAPI -from alephclient.exportdir import list_exports, format_exports_table, download_export +from alephclient.exports import list_exports, format_exports_table, download_export FAKE_EXPORT = { @@ -103,7 +103,7 @@ def _mock_download(self, mocker, content=b"file content"): return_value=mock_response, ) mocker.patch( - "alephclient.exportdir.list_exports", + "alephclient.exports.list_exports", return_value=[FAKE_EXPORT], ) From 147340dab1f13bd40139064987ad258a136f48a1 Mon Sep 17 00:00:00 2001 From: Klil Eden Date: Thu, 5 Mar 2026 12:06:25 -0500 Subject: [PATCH 5/5] use normal exception pattern --- alephclient/exports.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/alephclient/exports.py b/alephclient/exports.py index 7682246..1a5fca2 100644 --- a/alephclient/exports.py +++ b/alephclient/exports.py @@ -1,6 +1,9 @@ from pathlib import Path from typing import List, Dict +from requests import RequestException +from requests.exceptions import HTTPError + from alephclient.api import AlephAPI, APIResultSet from alephclient.errors import AlephException @@ -42,7 +45,7 @@ def _get_export(api: AlephAPI, export_id: str) -> Dict: for export in list_exports(api): if str(export.get("id")) == str(export_id): return export - raise AlephException(ValueError(f"Export {export_id} not found")) + raise AlephException(f"Export {export_id} not found") def download_export(api: AlephAPI, export_id: str, destination: str) -> Path: @@ -50,7 +53,7 @@ def download_export(api: AlephAPI, export_id: str, destination: str) -> Path: export = _get_export(api, export_id) download_url = export.get("links", {}).get("download") if not download_url: - raise AlephException(ValueError(f"No download link for export {export_id}")) + raise AlephException(f"No download link for export {export_id}") file_name = export.get("file_name", export_id) dest = Path(destination) @@ -61,7 +64,7 @@ def download_export(api: AlephAPI, export_id: str, destination: str) -> Path: try: response = api.session.get(download_url, stream=True) response.raise_for_status() - except Exception as exc: + except (RequestException, HTTPError) as exc: raise AlephException(exc) from exc with open(dest, "wb") as fh: