diff --git a/alephclient/cli.py b/alephclient/cli.py index b9399b0..1ffa19a 100644 --- a/alephclient/cli.py +++ b/alephclient/cli.py @@ -8,6 +8,7 @@ from alephclient.errors import AlephException from alephclient.crawldir import crawl_dir from alephclient.fetchdir import fetch_collection, fetch_entity +from alephclient.exports import list_exports, format_exports_table, download_export log = logging.getLogger(__name__) @@ -424,5 +425,38 @@ def make_list(ctx, foreign_id, outfile, label, summary): raise click.Abort() +@cli.group() +@click.pass_context +def export(ctx): + """Manage exports.""" + pass + + +@export.command("list") +@click.pass_context +def export_list(ctx): + """List all exports.""" + api = ctx.obj["api"] + try: + exports = list_exports(api) + click.echo(format_exports_table(exports)) + except AlephException as exc: + raise click.ClickException(str(exc)) + + +@export.command("download") +@click.argument("export_id", required=True) +@click.argument("destination", required=True, type=click.Path()) +@click.pass_context +def export_download(ctx, export_id, destination): + """Download an export by ID to a destination path.""" + api = ctx.obj["api"] + try: + path = download_export(api, export_id, destination) + click.echo(f"Export downloaded to {path}") + except AlephException as exc: + raise click.ClickException(str(exc)) + + if __name__ == "__main__": cli() diff --git a/alephclient/exports.py b/alephclient/exports.py new file mode 100644 index 0000000..1a5fca2 --- /dev/null +++ b/alephclient/exports.py @@ -0,0 +1,75 @@ +from pathlib import Path +from typing import List, Dict + +from requests import RequestException +from requests.exceptions import HTTPError + +from alephclient.api import AlephAPI, APIResultSet +from alephclient.errors import AlephException + + +def list_exports(api: AlephAPI) -> List[Dict]: + """Fetch all exports from the API, handling pagination.""" + url = api._make_url("exports") + return list(APIResultSet(api, url)) + + +def format_exports_table(exports: List[Dict]) -> str: + """Format a list of exports as a plain-text table.""" + if not exports: + return "No exports found." + + headers = ["ID", "Label", "Status", "Created At", "Content Hash"] + keys = ["id", "label", "status", "created_at", "content_hash"] + + rows = [] + for export in exports: + rows.append([str(export.get(k, "")) for k in keys]) + + col_widths = [len(h) for h in headers] + for row in rows: + for i, val in enumerate(row): + col_widths[i] = max(col_widths[i], len(val)) + + def format_row(values): + return " ".join(v.ljust(col_widths[i]) for i, v in enumerate(values)) + + lines = [format_row(headers), format_row(["-" * w for w in col_widths])] + for row in rows: + lines.append(format_row(row)) + return "\n".join(lines) + + +def _get_export(api: AlephAPI, export_id: str) -> Dict: + """Fetch a single export by ID from the exports list.""" + for export in list_exports(api): + if str(export.get("id")) == str(export_id): + return export + raise AlephException(f"Export {export_id} not found") + + +def download_export(api: AlephAPI, export_id: str, destination: str) -> Path: + """Download an export archive to the given destination path.""" + export = _get_export(api, export_id) + download_url = export.get("links", {}).get("download") + if not download_url: + raise AlephException(f"No download link for export {export_id}") + + file_name = export.get("file_name", export_id) + dest = Path(destination) + if dest.is_dir(): + dest = dest / file_name + dest.parent.mkdir(parents=True, exist_ok=True) + + try: + response = api.session.get(download_url, stream=True) + response.raise_for_status() + except (RequestException, HTTPError) as exc: + raise AlephException(exc) from exc + + with open(dest, "wb") as fh: + for chunk in response.iter_content(chunk_size=512 * 1024): + if chunk: + fh.write(chunk) + + return dest diff --git a/alephclient/tests/test_export.py b/alephclient/tests/test_export.py new file mode 100644 index 0000000..1cad666 --- /dev/null +++ b/alephclient/tests/test_export.py @@ -0,0 +1,122 @@ +from unittest.mock import MagicMock + +from alephclient.api import AlephAPI +from alephclient.exports import list_exports, format_exports_table, download_export + + +FAKE_EXPORT = { + "id": "123", + "label": "My Export", + "status": "complete", + "file_name": "export.zip", + "links": {"download": "http://aleph.test/api/2/archive?token=abc"}, +} + + +class TestListExports: + fake_url = "http://aleph.test/api/2/" + + def setup_method(self): + self.api = AlephAPI(host=self.fake_url, api_key="fake_key") + + def test_single_page(self, mocker): + exports = [{"id": "1", "label": "Export 1"}] + mocker.patch.object( + self.api, + "_request", + return_value={"results": exports, "next": None, "offset": 0, "limit": 20}, + ) + result = list_exports(self.api) + assert result == exports + + def test_pagination(self, mocker): + page1 = { + "results": [{"id": "1"}], + "next": self.fake_url + "exports?page=2", + "offset": 0, + "limit": 1, + } + page2 = { + "results": [{"id": "2"}], + "next": None, + "offset": 1, + "limit": 1, + } + mocker.patch.object( + self.api, + "_request", + side_effect=[page1, page2], + ) + result = list_exports(self.api) + assert len(result) == 2 + assert result[0]["id"] == "1" + assert result[1]["id"] == "2" + + def test_empty(self, mocker): + mocker.patch.object( + self.api, + "_request", + return_value={"results": [], "next": None, "offset": 0, "limit": 20}, + ) + result = list_exports(self.api) + assert result == [] + + +class TestFormatExportsTable: + def test_empty_list(self): + assert format_exports_table([]) == "No exports found." + + def test_with_data(self): + exports = [ + { + "id": "abc", + "label": "My Export", + "status": "completed", + "created_at": "2025-01-01", + "content_hash": "sha1:deadbeef", + } + ] + table = format_exports_table(exports) + lines = table.split("\n") + assert len(lines) == 3 + assert "ID" in lines[0] + assert "Label" in lines[0] + assert "Status" in lines[0] + assert "abc" in lines[2] + assert "My Export" in lines[2] + assert "completed" in lines[2] + + +class TestDownloadExport: + fake_url = "http://aleph.test/api/2/" + + def setup_method(self): + self.api = AlephAPI(host=self.fake_url, api_key="fake_key") + + def _mock_download(self, mocker, content=b"file content"): + mock_response = MagicMock() + mock_response.iter_content.return_value = [content] + mock_response.raise_for_status = MagicMock() + mocker.patch.object( + self.api.session, + "get", + return_value=mock_response, + ) + mocker.patch( + "alephclient.exports.list_exports", + return_value=[FAKE_EXPORT], + ) + + def test_download_to_file(self, mocker, tmp_path): + dest = tmp_path / "output.zip" + self._mock_download(mocker) + result = download_export(self.api, "123", str(dest)) + assert result == dest + assert dest.read_bytes() == b"file content" + + def test_download_to_directory(self, mocker, tmp_path): + self._mock_download(mocker, content=b"data") + result = download_export(self.api, "123", str(tmp_path)) + expected = tmp_path / "export.zip" + assert result == expected + assert expected.read_bytes() == b"data"