Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Version 13.3.0 - 2024-06-27

- Added `workflows execute-all` which starts an execution on all documents in a dataset

## Version 13.2.2 - 2024-06-13

- Bugfix `models update-training` now works as intended when specifying `--deployment-environment-id`
Expand Down
2 changes: 1 addition & 1 deletion lascli/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
__maintainer_email__ = 'magnus@lucidtech.ai'
__title__ = 'lucidtech-las-cli'
__url__ = 'https://github.com/LucidtechAI/las-cli'
__version__ = '13.2.2'
__version__ = '13.3.0'
6 changes: 3 additions & 3 deletions lascli/parser/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _get_document_worker(las_client: Client, document_id, output_dir):
return None


def _list_all_documents_in_dataset(las_client: Client, dataset_id):
def list_all_documents_in_dataset(las_client: Client, dataset_id):
list_response = las_client.list_documents(dataset_id=dataset_id)
yield from list_response['documents']
next_token = list_response.get('nextToken')
Expand Down Expand Up @@ -332,7 +332,7 @@ def get_documents(las_client: Client, dataset_id, output_dir, num_threads, chunk
already_downloaded_from_dataset = set()
with ThreadPoolExecutor(max_workers=num_threads) as executor:
documents = []
for document in _list_all_documents_in_dataset(las_client, dataset_id):
for document in list_all_documents_in_dataset(las_client, dataset_id):
if document['documentId'] in already_downloaded:
already_downloaded_from_dataset.add(document['documentId'])
else:
Expand Down Expand Up @@ -480,7 +480,7 @@ def create_datasets_parser(subparsers):
"options": {} (optional)
},
...
]
]
Examples:
[{"type": "remove-duplicates", "options": {}}]
'''))
Expand Down
23 changes: 22 additions & 1 deletion lascli/parser/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import pathlib
import textwrap
from argparse import RawTextHelpFormatter
from functools import partial

import dateparser
from las import Client

from lascli.util import nullable, NotProvided, json_path, json_or_json_path
from .datasets import list_all_documents_in_dataset
from lascli.actions import workflows
from lascli.util import nullable, NotProvided, json_path, json_or_json_path


def list_workflows(las_client: Client, **optional_args):
Expand All @@ -32,6 +34,20 @@ def execute_workflow(las_client: Client, workflow_id, path):
return las_client.execute_workflow(workflow_id, content)


def execute_all_workflow(las_client: Client, workflow_id, dataset_id):
executions = []
for i, document in enumerate(list_all_documents_in_dataset(las_client, dataset_id)):
content = {'documentId': document['documentId'], 'source': 'CLI', 'initialSleepInSeconds': i * 4}
if original_file_path := (document.get('metadata') or {}).get('originalFilePath'):
file_path = pathlib.Path(original_file_path)
content['title'] = file_path.name
execution = las_client.execute_workflow(workflow_id, content)
print(json.dumps(execution, indent=2))
executions.append(execution)

return f'Started {len(executions)} executions'


def list_workflow_executions(las_client: Client, workflow_id, **optional_args):
return las_client.list_workflow_executions(workflow_id, **optional_args)

Expand Down Expand Up @@ -196,6 +212,11 @@ def create_workflows_parser(subparsers):
execute_workflow_parser.add_argument('path', help='path to json-file with input to the first state of the workflow')
execute_workflow_parser.set_defaults(cmd=execute_workflow)

execute_workflow_parser = subparsers.add_parser('execute-all')
execute_workflow_parser.add_argument('workflow_id')
execute_workflow_parser.add_argument('dataset_id', help='Start execution on all documents in dataset')
execute_workflow_parser.set_defaults(cmd=execute_all_workflow)

list_executions_parser = subparsers.add_parser('list-executions')
list_executions_parser.add_argument('workflow_id')
list_executions_parser.add_argument('--status', '-s', nargs='+', help='Only return those with the given status')
Expand Down
10 changes: 10 additions & 0 deletions tests/test_workflow_executions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ def test_executions_create(parser, client):
util.main_parser(parser, client, args)


def test_executions_create_all(parser, client):
args = [
'workflows',
'execute-all',
service.create_workflow_id(),
service.create_dataset_id(),
]
util.main_parser(parser, client, args)


@pytest.mark.parametrize('sort_by', [
('--sort-by', 'startTime'),
('--sort-by', 'endTime'),
Expand Down