Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions alephclient/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from alephclient.errors import AlephException
from alephclient.crawldir import crawl_dir
from alephclient.fetchdir import fetch_collection, fetch_entity
from alephclient.exports import list_exports, format_exports_table, download_export

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -424,5 +425,38 @@ def make_list(ctx, foreign_id, outfile, label, summary):
raise click.Abort()


@cli.group()
@click.pass_context
def export(ctx):
"""Manage exports."""
pass


@export.command("list")
@click.pass_context
def export_list(ctx):
"""List all exports."""
api = ctx.obj["api"]
try:
exports = list_exports(api)
click.echo(format_exports_table(exports))
except AlephException as exc:
raise click.ClickException(str(exc))


@export.command("download")
@click.argument("export_id", required=True)
@click.argument("destination", required=True, type=click.Path())
@click.pass_context
def export_download(ctx, export_id, destination):
"""Download an export by ID to a destination path."""
api = ctx.obj["api"]
try:
path = download_export(api, export_id, destination)
click.echo(f"Export downloaded to {path}")
except AlephException as exc:
raise click.ClickException(str(exc))


if __name__ == "__main__":
cli()
75 changes: 75 additions & 0 deletions alephclient/exports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from pathlib import Path
from typing import List, Dict

from requests import RequestException
from requests.exceptions import HTTPError

from alephclient.api import AlephAPI, APIResultSet
from alephclient.errors import AlephException


def list_exports(api: AlephAPI) -> List[Dict]:
"""Fetch all exports from the API, handling pagination."""
url = api._make_url("exports")
return list(APIResultSet(api, url))


def format_exports_table(exports: List[Dict]) -> str:
"""Format a list of exports as a plain-text table."""
if not exports:
return "No exports found."

headers = ["ID", "Label", "Status", "Created At", "Content Hash"]
keys = ["id", "label", "status", "created_at", "content_hash"]

rows = []
for export in exports:
rows.append([str(export.get(k, "")) for k in keys])

col_widths = [len(h) for h in headers]
for row in rows:
for i, val in enumerate(row):
col_widths[i] = max(col_widths[i], len(val))

def format_row(values):
return " ".join(v.ljust(col_widths[i]) for i, v in enumerate(values))

lines = [format_row(headers), format_row(["-" * w for w in col_widths])]
for row in rows:
lines.append(format_row(row))
return "\n".join(lines)


def _get_export(api: AlephAPI, export_id: str) -> Dict:
"""Fetch a single export by ID from the exports list."""
for export in list_exports(api):
if str(export.get("id")) == str(export_id):
return export
raise AlephException(f"Export {export_id} not found")


def download_export(api: AlephAPI, export_id: str, destination: str) -> Path:
"""Download an export archive to the given destination path."""
export = _get_export(api, export_id)
download_url = export.get("links", {}).get("download")
if not download_url:
raise AlephException(f"No download link for export {export_id}")

file_name = export.get("file_name", export_id)
dest = Path(destination)
if dest.is_dir():
dest = dest / file_name
dest.parent.mkdir(parents=True, exist_ok=True)

try:
response = api.session.get(download_url, stream=True)
response.raise_for_status()
except (RequestException, HTTPError) as exc:
raise AlephException(exc) from exc

with open(dest, "wb") as fh:
for chunk in response.iter_content(chunk_size=512 * 1024):
if chunk:
fh.write(chunk)

return dest
122 changes: 122 additions & 0 deletions alephclient/tests/test_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from unittest.mock import MagicMock

from alephclient.api import AlephAPI
from alephclient.exports import list_exports, format_exports_table, download_export


FAKE_EXPORT = {
"id": "123",
"label": "My Export",
"status": "complete",
"file_name": "export.zip",
"links": {"download": "http://aleph.test/api/2/archive?token=abc"},
}


class TestListExports:
fake_url = "http://aleph.test/api/2/"

def setup_method(self):
self.api = AlephAPI(host=self.fake_url, api_key="fake_key")

def test_single_page(self, mocker):
exports = [{"id": "1", "label": "Export 1"}]
mocker.patch.object(
self.api,
"_request",
return_value={"results": exports, "next": None, "offset": 0, "limit": 20},
)
result = list_exports(self.api)
assert result == exports

def test_pagination(self, mocker):
page1 = {
"results": [{"id": "1"}],
"next": self.fake_url + "exports?page=2",
"offset": 0,
"limit": 1,
}
page2 = {
"results": [{"id": "2"}],
"next": None,
"offset": 1,
"limit": 1,
}
mocker.patch.object(
self.api,
"_request",
side_effect=[page1, page2],
)
result = list_exports(self.api)
assert len(result) == 2
assert result[0]["id"] == "1"
assert result[1]["id"] == "2"

def test_empty(self, mocker):
mocker.patch.object(
self.api,
"_request",
return_value={"results": [], "next": None, "offset": 0, "limit": 20},
)
result = list_exports(self.api)
assert result == []


class TestFormatExportsTable:
def test_empty_list(self):
assert format_exports_table([]) == "No exports found."

def test_with_data(self):
exports = [
{
"id": "abc",
"label": "My Export",
"status": "completed",
"created_at": "2025-01-01",
"content_hash": "sha1:deadbeef",
}
]
table = format_exports_table(exports)
lines = table.split("\n")
assert len(lines) == 3
assert "ID" in lines[0]
assert "Label" in lines[0]
assert "Status" in lines[0]
assert "abc" in lines[2]
assert "My Export" in lines[2]
assert "completed" in lines[2]


class TestDownloadExport:
fake_url = "http://aleph.test/api/2/"

def setup_method(self):
self.api = AlephAPI(host=self.fake_url, api_key="fake_key")

def _mock_download(self, mocker, content=b"file content"):
mock_response = MagicMock()
mock_response.iter_content.return_value = [content]
mock_response.raise_for_status = MagicMock()
mocker.patch.object(
self.api.session,
"get",
return_value=mock_response,
)
mocker.patch(
"alephclient.exports.list_exports",
return_value=[FAKE_EXPORT],
)

def test_download_to_file(self, mocker, tmp_path):
dest = tmp_path / "output.zip"
self._mock_download(mocker)
result = download_export(self.api, "123", str(dest))
assert result == dest
assert dest.read_bytes() == b"file content"

def test_download_to_directory(self, mocker, tmp_path):
self._mock_download(mocker, content=b"data")
result = download_export(self.api, "123", str(tmp_path))
expected = tmp_path / "export.zip"
assert result == expected
assert expected.read_bytes() == b"data"