Skip to content

Commit 4ec2144

Browse files
committed
feat(gooddata-pipelines): support tags, description and merge into existing LDM
1 parent 6d554ce commit 4ec2144

8 files changed

Lines changed: 599 additions & 91 deletions

File tree

docs/content/en/latest/pipelines/ldm_extension/_index.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ The custom dataset represents a new dataset appended to the child LDM. It is def
4545
| dataset_reference_source_column_data_type | [ColumnDataType](#columndatatype) | Column data type. |
4646
| workspace_data_filter_id | string | ID of the workspace data filter to use. |
4747
| workspace_data_filter_column_name | string | Name of the column in custom dataset used for filtering. |
48+
| dataset_description | string \| None | Optional declarative description on the custom dataset. |
49+
| dataset_tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |
4850

4951
#### Validity constraints
5052

@@ -63,6 +65,8 @@ The custom fields define the individual fields in the custom datasets defined ab
6365
| custom_field_type | [CustomFieldType](#customfieldtype) | Indicates whether the field represents an attribute, a date, or a fact. |
6466
| custom_field_source_column | string | Name of the column in the physical data model. |
6567
| custom_field_source_column_data_type | [ColumnDataType](#columndatatype) | Data type of the field. |
68+
| description | string \| None | Optional declarative description on the attribute, fact, or date dataset. |
69+
| tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |
6670

6771
#### Validity constraints
6872

@@ -128,6 +132,25 @@ ldm_extension_manager.process(
128132

129133
```
130134

135+
### Merging into an existing child workspace LDM
136+
137+
By default, `process` **replaces** the child workspace LDM with the declarative fragment built from your inputs. Any prior custom datasets or date instances that aren't in the current call are lost.
138+
139+
Set `merge_into_existing_ldm=True` to switch to an **append / update** behaviour: `process` loads the current workspace LDM first, replaces any dataset or date instance whose `id` matches one in your input, and keeps the rest of the model as is (including previously uploaded custom extensions).
140+
141+
Optional cleanup: when `remove_managed_datasets_missing_from_input=True` and `management_tag` is set, datasets that carry that tag but are **not** in the current `process` call are removed from the merged LDM before the upload. This lets tools such as BCA reliably delete their own obsolete custom datasets without touching anything else.
142+
143+
```python
144+
ldm_extension_manager.process(
145+
custom_datasets=custom_dataset_definitions,
146+
custom_fields=custom_field_definitions,
147+
check_relations=False,
148+
merge_into_existing_ldm=True,
149+
remove_managed_datasets_missing_from_input=True,
150+
management_tag="bca_tooling_managed",
151+
)
152+
```
153+
131154
## Example
132155

133156
Here is a complete example of extending a child workspace's LDM:

packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
into objects defined in the GoodData Python SDK.
66
"""
77

8+
import copy
9+
810
from gooddata_sdk.catalog.identifier import (
911
CatalogDatasetWorkspaceDataFilterIdentifier,
1012
CatalogGrainIdentifier,
@@ -36,11 +38,26 @@
3638
from gooddata_pipelines.ldm_extension.models.custom_data_object import (
3739
ColumnDataType,
3840
CustomDataset,
41+
CustomDatasetDefinition,
3942
CustomFieldDefinition,
4043
CustomFieldType,
4144
)
4245

4346

47+
def _effective_field_tags(
48+
dataset_name: str, custom_field: CustomFieldDefinition
49+
) -> list[str]:
50+
if custom_field.tags is not None:
51+
return list(custom_field.tags)
52+
return [dataset_name]
53+
54+
55+
def _effective_dataset_tags(definition: CustomDatasetDefinition) -> list[str]:
56+
if definition.dataset_tags is not None:
57+
return list(definition.dataset_tags)
58+
return [definition.dataset_name]
59+
60+
4461
class LdmExtensionDataProcessor:
4562
"""Create GoodData LDM from validated custom datasets and fields."""
4663

@@ -77,7 +94,8 @@ def _attribute_from_field(
7794
source_column=custom_field.custom_field_source_column,
7895
labels=[],
7996
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
80-
tags=[dataset_name],
97+
description=custom_field.description,
98+
tags=_effective_field_tags(dataset_name, custom_field),
8199
)
82100

83101
@staticmethod
@@ -91,7 +109,8 @@ def _fact_from_field(
91109
title=custom_field.custom_field_name,
92110
source_column=custom_field.custom_field_source_column,
93111
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
94-
tags=[dataset_name],
112+
description=custom_field.description,
113+
tags=_effective_field_tags(dataset_name, custom_field),
95114
)
96115

97116
def _date_from_field(
@@ -109,7 +128,8 @@ def _date_from_field(
109128
title_pattern="%titleBase - %granularityTitle",
110129
),
111130
granularities=self.DATE_GRANULARITIES,
112-
tags=[dataset_name],
131+
description=custom_field.description,
132+
tags=_effective_field_tags(dataset_name, custom_field),
113133
)
114134

115135
@staticmethod
@@ -258,7 +278,7 @@ def datasets_to_ldm(
258278
),
259279
]
260280
+ date_references,
261-
description=None,
281+
description=dataset.definition.dataset_description,
262282
attributes=attributes,
263283
facts=facts,
264284
data_source_table_id=dataset_source_table_id,
@@ -278,7 +298,7 @@ def datasets_to_ldm(
278298
filter_column_data_type=ColumnDataType.STRING.value,
279299
)
280300
],
281-
tags=[dataset.definition.dataset_name],
301+
tags=_effective_dataset_tags(dataset.definition),
282302
)
283303
)
284304

@@ -287,3 +307,60 @@ def datasets_to_ldm(
287307
datasets=declarative_datasets, date_instances=date_instances
288308
)
289309
return CatalogDeclarativeModel(ldm=ldm)
310+
311+
def merge_custom_ldm_into_existing(
312+
self,
313+
existing: CatalogDeclarativeModel,
314+
custom_datasets: dict[DatasetId, CustomDataset],
315+
*,
316+
remove_managed_datasets_missing_from_input: bool = False,
317+
management_tag: str | None = None,
318+
) -> CatalogDeclarativeModel:
319+
"""Merge datasets produced from ``custom_datasets`` into an existing declarative LDM.
320+
321+
Custom datasets and date instances that share an ``id`` with the fragment replace
322+
their previous definitions. When ``remove_managed_datasets_missing_from_input`` is
323+
set, datasets that carry ``management_tag`` but are absent from the incoming
324+
fragment are removed first (typical for tooling-owned extension datasets).
325+
326+
Any other pre-existing LDM objects (previously uploaded extensions whose ids
327+
are not in the incoming fragment) are preserved unchanged.
328+
"""
329+
fragment = self.datasets_to_ldm(custom_datasets)
330+
fragment_ldm = fragment.ldm or CatalogDeclarativeLdm(
331+
datasets=[], date_instances=[]
332+
)
333+
334+
result = copy.deepcopy(existing)
335+
result_ldm = result.ldm or CatalogDeclarativeLdm(
336+
datasets=[], date_instances=[]
337+
)
338+
result.ldm = result_ldm
339+
340+
incoming_dataset_ids = {d.id for d in fragment_ldm.datasets}
341+
incoming_date_ids = {d.id for d in fragment_ldm.date_instances}
342+
343+
datasets = list(result_ldm.datasets)
344+
if remove_managed_datasets_missing_from_input and management_tag:
345+
datasets = [
346+
d
347+
for d in datasets
348+
if not (
349+
d.tags
350+
and management_tag in d.tags
351+
and d.id not in incoming_dataset_ids
352+
)
353+
]
354+
datasets = [d for d in datasets if d.id not in incoming_dataset_ids]
355+
datasets.extend(fragment_ldm.datasets)
356+
result_ldm.datasets = datasets
357+
358+
date_instances = [
359+
d
360+
for d in result_ldm.date_instances
361+
if d.id not in incoming_date_ids
362+
]
363+
date_instances.extend(fragment_ldm.date_instances)
364+
result_ldm.date_instances = date_instances
365+
366+
return result

packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
from pathlib import Path
55

6+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import (
7+
CatalogDeclarativeModel,
8+
)
69
from gooddata_sdk.sdk import GoodDataSdk
710
from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
811

@@ -147,9 +150,35 @@ def _new_ldm_does_not_invalidate_relations(
147150
# If the set of new invalid relations is a subset of the current one,
148151
return set_new_invalid_relations.issubset(set_current_invalid_relations)
149152

153+
def _ldm_payload_for_workspace(
154+
self,
155+
workspace_id: str,
156+
datasets: dict[DatasetId, CustomDataset],
157+
*,
158+
merge_into_existing_ldm: bool,
159+
remove_managed_datasets_missing_from_input: bool,
160+
management_tag: str | None,
161+
) -> CatalogDeclarativeModel:
162+
"""Build the declarative LDM payload to upload for one workspace."""
163+
if not merge_into_existing_ldm:
164+
return self._processor.datasets_to_ldm(datasets)
165+
current = self._sdk.catalog_workspace_content.get_declarative_ldm(
166+
workspace_id
167+
)
168+
return self._processor.merge_custom_ldm_into_existing(
169+
current,
170+
datasets,
171+
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
172+
management_tag=management_tag,
173+
)
174+
150175
def _process_with_relations_check(
151176
self,
152177
validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]],
178+
*,
179+
merge_into_existing_ldm: bool = False,
180+
remove_managed_datasets_missing_from_input: bool = False,
181+
management_tag: str | None = None,
153182
) -> None:
154183
"""Check whether relations of analytical objects are valid before and after
155184
updating the LDM in the GoodData workspace.
@@ -173,7 +202,13 @@ def _process_with_relations_check(
173202
# Put the LDM with custom datasets into the GoodData workspace.
174203
self._sdk.catalog_workspace_content.put_declarative_ldm(
175204
workspace_id=workspace_id,
176-
ldm=self._processor.datasets_to_ldm(datasets),
205+
ldm=self._ldm_payload_for_workspace(
206+
workspace_id,
207+
datasets,
208+
merge_into_existing_ldm=merge_into_existing_ldm,
209+
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
210+
management_tag=management_tag,
211+
),
177212
)
178213

179214
# Get a set of objects with invalid relations from the new workspace state
@@ -232,13 +267,23 @@ def _log_diff_invalid_relations(
232267
def _process_without_relations_check(
233268
self,
234269
validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]],
270+
*,
271+
merge_into_existing_ldm: bool = False,
272+
remove_managed_datasets_missing_from_input: bool = False,
273+
management_tag: str | None = None,
235274
) -> None:
236275
"""Update the LDM in the GoodData workspace without checking relations."""
237276
for workspace_id, datasets in validated_data.items():
238277
# Put the LDM with custom datasets into the GoodData workspace.
239278
self._sdk.catalog_workspace_content.put_declarative_ldm(
240279
workspace_id=workspace_id,
241-
ldm=self._processor.datasets_to_ldm(datasets),
280+
ldm=self._ldm_payload_for_workspace(
281+
workspace_id,
282+
datasets,
283+
merge_into_existing_ldm=merge_into_existing_ldm,
284+
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
285+
management_tag=management_tag,
286+
),
242287
)
243288
self._log_success_message(workspace_id)
244289

@@ -251,6 +296,9 @@ def process(
251296
custom_datasets: list[CustomDatasetDefinition],
252297
custom_fields: list[CustomFieldDefinition],
253298
check_relations: bool = True,
299+
merge_into_existing_ldm: bool = False,
300+
remove_managed_datasets_missing_from_input: bool = False,
301+
management_tag: str | None = None,
254302
) -> None:
255303
"""Create custom datasets and fields in GoodData workspaces.
256304
@@ -266,6 +314,14 @@ def process(
266314
after updating the LDM. If the number of invalid relations increases,
267315
the LDM will be reverted to its previous state. If False, the check
268316
is skiped and the LDM is updated directly. Defaults to True.
317+
merge_into_existing_ldm (bool): When True, load the workspace LDM first and merge
318+
the generated custom datasets and date instances into it instead of uploading
319+
only the extension fragment. Defaults to False for backward compatibility.
320+
remove_managed_datasets_missing_from_input (bool): When ``merge_into_existing_ldm``
321+
is True, remove existing datasets that contain ``management_tag`` but whose
322+
dataset id is not present in this ``process`` call (tooling cleanup).
323+
management_tag (str | None): Tag value used with
324+
``remove_managed_datasets_missing_from_input``.
269325
270326
Raises:
271327
ValueError: If there are validation errors in the dataset or field definitions.
@@ -278,6 +334,16 @@ def process(
278334

279335
if check_relations:
280336
# Process the validated data with relations check.
281-
self._process_with_relations_check(validated_data)
337+
self._process_with_relations_check(
338+
validated_data,
339+
merge_into_existing_ldm=merge_into_existing_ldm,
340+
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
341+
management_tag=management_tag,
342+
)
282343
else:
283-
self._process_without_relations_check(validated_data)
344+
self._process_without_relations_check(
345+
validated_data,
346+
merge_into_existing_ldm=merge_into_existing_ldm,
347+
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
348+
management_tag=management_tag,
349+
)

packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from enum import Enum
99

10-
from pydantic import BaseModel, model_validator
10+
from pydantic import BaseModel, Field, model_validator
1111

1212

1313
class CustomFieldType(str, Enum):
@@ -42,6 +42,14 @@ class CustomFieldDefinition(BaseModel):
4242
custom_field_type: CustomFieldType
4343
custom_field_source_column: str
4444
custom_field_source_column_data_type: ColumnDataType
45+
description: str | None = Field(
46+
default=None,
47+
description="Declarative description on the attribute, fact, or date dataset.",
48+
)
49+
tags: list[str] | None = Field(
50+
default=None,
51+
description="If set, replaces the default tag list (dataset display name only).",
52+
)
4553

4654
@model_validator(mode="after")
4755
def check_ids_not_equal(self) -> "CustomFieldDefinition":
@@ -68,6 +76,14 @@ class CustomDatasetDefinition(BaseModel):
6876
dataset_reference_source_column_data_type: ColumnDataType
6977
workspace_data_filter_id: str
7078
workspace_data_filter_column_name: str
79+
dataset_description: str | None = Field(
80+
default=None,
81+
description="Declarative description on the custom dataset.",
82+
)
83+
dataset_tags: list[str] | None = Field(
84+
default=None,
85+
description="If set, replaces the default tag list (dataset display name only).",
86+
)
7187

7288
@model_validator(mode="after")
7389
def check_source(self) -> "CustomDatasetDefinition":

0 commit comments

Comments
 (0)