gooddata
diff --git a/‎docs/content/en/latest/pipelines/ldm_extension/_index.md‎
Lines changed: 23 additions & 0 deletions b/‎docs/content/en/latest/pipelines/ldm_extension/_index.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py‎
Lines changed: 82 additions & 5 deletions b/‎packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py‎
Lines changed: 82 additions & 5 deletions
diff --git a/‎packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py‎
Lines changed: 70 additions & 4 deletions b/‎packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py‎
Lines changed: 70 additions & 4 deletions
diff --git a/‎packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py‎
Lines changed: 17 additions & 1 deletion b/‎packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py‎
Lines changed: 17 additions & 1 deletion
@@ -45,6 +45,8 @@ The custom dataset represents a new dataset appended to the child LDM. It is def
 | dataset_reference_source_column_data_type | [ColumnDataType](#columndatatype) | Column data type. |
 | workspace_data_filter_id | string | ID of the workspace data filter to use. |
 | workspace_data_filter_column_name | string | Name of the column in custom dataset used for filtering. |
+| dataset_description | string \| None | Optional declarative description on the custom dataset. |
+| dataset_tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |
 
 #### Validity constraints
 
@@ -63,6 +65,8 @@ The custom fields define the individual fields in the custom datasets defined ab
 | custom_field_type | [CustomFieldType](#customfieldtype) | Indicates whether the field represents an attribute, a date, or a fact. |
 | custom_field_source_column | string | Name of the column in the physical data model. |
 | custom_field_source_column_data_type | [ColumnDataType](#columndatatype) | Data type of the field. |
+| description | string \| None | Optional declarative description on the attribute, fact, or date dataset. |
+| tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |
 
 #### Validity constraints
 
@@ -128,6 +132,25 @@ ldm_extension_manager.process(
 
 ```
 
+### Merging into an existing child workspace LDM
+
+By default, `process` **replaces** the child workspace LDM with the declarative fragment built from your inputs. Any prior custom datasets or date instances that aren't in the current call are lost.
+
+Set `merge_into_existing_ldm=True` to switch to an **append / update** behaviour: `process` loads the current workspace LDM first, replaces any dataset or date instance whose `id` matches one in your input, and keeps the rest of the model as is (including previously uploaded custom extensions).
+
+Optional cleanup: when `remove_managed_datasets_missing_from_input=True` and `management_tag` is set, datasets that carry that tag but are **not** in the current `process` call are removed from the merged LDM before the upload. This lets tools such as BCA reliably delete their own obsolete custom datasets without touching anything else.
+
+```python
+ldm_extension_manager.process(
+    custom_datasets=custom_dataset_definitions,
+    custom_fields=custom_field_definitions,
+    check_relations=False,
+    merge_into_existing_ldm=True,
+    remove_managed_datasets_missing_from_input=True,
+    management_tag="bca_tooling_managed",
+)
+```
+
 ## Example
 
 Here is a complete example of extending a child workspace's LDM:
 
@@ -5,6 +5,8 @@
 into objects defined in the GoodData Python SDK.
 """
 
+import copy
+
 from gooddata_sdk.catalog.identifier import (
     CatalogDatasetWorkspaceDataFilterIdentifier,
     CatalogGrainIdentifier,
@@ -36,11 +38,26 @@
 from gooddata_pipelines.ldm_extension.models.custom_data_object import (
     ColumnDataType,
     CustomDataset,
+    CustomDatasetDefinition,
     CustomFieldDefinition,
     CustomFieldType,
 )
 
 
+def _effective_field_tags(
+    dataset_name: str, custom_field: CustomFieldDefinition
+) -> list[str]:
+    if custom_field.tags is not None:
+        return list(custom_field.tags)
+    return [dataset_name]
+
+
+def _effective_dataset_tags(definition: CustomDatasetDefinition) -> list[str]:
+    if definition.dataset_tags is not None:
+        return list(definition.dataset_tags)
+    return [definition.dataset_name]
+
+
 class LdmExtensionDataProcessor:
     """Create GoodData LDM from validated custom datasets and fields."""
 
@@ -77,7 +94,8 @@ def _attribute_from_field(
             source_column=custom_field.custom_field_source_column,
             labels=[],
             source_column_data_type=custom_field.custom_field_source_column_data_type.value,
-            tags=[dataset_name],
+            description=custom_field.description,
+            tags=_effective_field_tags(dataset_name, custom_field),
         )
 
     @staticmethod
@@ -91,7 +109,8 @@ def _fact_from_field(
             title=custom_field.custom_field_name,
             source_column=custom_field.custom_field_source_column,
             source_column_data_type=custom_field.custom_field_source_column_data_type.value,
-            tags=[dataset_name],
+            description=custom_field.description,
+            tags=_effective_field_tags(dataset_name, custom_field),
         )
 
     def _date_from_field(
@@ -109,7 +128,8 @@ def _date_from_field(
                 title_pattern="%titleBase - %granularityTitle",
             ),
             granularities=self.DATE_GRANULARITIES,
-            tags=[dataset_name],
+            description=custom_field.description,
+            tags=_effective_field_tags(dataset_name, custom_field),
         )
 
     @staticmethod
@@ -258,7 +278,7 @@ def datasets_to_ldm(
                         ),
                     ]
                     + date_references,
-                    description=None,
+                    description=dataset.definition.dataset_description,
                     attributes=attributes,
                     facts=facts,
                     data_source_table_id=dataset_source_table_id,
@@ -278,7 +298,7 @@ def datasets_to_ldm(
                             filter_column_data_type=ColumnDataType.STRING.value,
                         )
                     ],
-                    tags=[dataset.definition.dataset_name],
+                    tags=_effective_dataset_tags(dataset.definition),
                 )
             )
 
@@ -287,3 +307,60 @@ def datasets_to_ldm(
             datasets=declarative_datasets, date_instances=date_instances
         )
         return CatalogDeclarativeModel(ldm=ldm)
+
+    def merge_custom_ldm_into_existing(
+        self,
+        existing: CatalogDeclarativeModel,
+        custom_datasets: dict[DatasetId, CustomDataset],
+        *,
+        remove_managed_datasets_missing_from_input: bool = False,
+        management_tag: str | None = None,
+    ) -> CatalogDeclarativeModel:
+        """Merge datasets produced from ``custom_datasets`` into an existing declarative LDM.
+
+        Custom datasets and date instances that share an ``id`` with the fragment replace
+        their previous definitions. When ``remove_managed_datasets_missing_from_input`` is
+        set, datasets that carry ``management_tag`` but are absent from the incoming
+        fragment are removed first (typical for tooling-owned extension datasets).
+
+        Any other pre-existing LDM objects (previously uploaded extensions whose ids
+        are not in the incoming fragment) are preserved unchanged.
+        """
+        fragment = self.datasets_to_ldm(custom_datasets)
+        fragment_ldm = fragment.ldm or CatalogDeclarativeLdm(
+            datasets=[], date_instances=[]
+        )
+
+        result = copy.deepcopy(existing)
+        result_ldm = result.ldm or CatalogDeclarativeLdm(
+            datasets=[], date_instances=[]
+        )
+        result.ldm = result_ldm
+
+        incoming_dataset_ids = {d.id for d in fragment_ldm.datasets}
+        incoming_date_ids = {d.id for d in fragment_ldm.date_instances}
+
+        datasets = list(result_ldm.datasets)
+        if remove_managed_datasets_missing_from_input and management_tag:
+            datasets = [
+                d
+                for d in datasets
+                if not (
+                    d.tags
+                    and management_tag in d.tags
+                    and d.id not in incoming_dataset_ids
+                )
+            ]
+        datasets = [d for d in datasets if d.id not in incoming_dataset_ids]
+        datasets.extend(fragment_ldm.datasets)
+        result_ldm.datasets = datasets
+
+        date_instances = [
+            d
+            for d in result_ldm.date_instances
+            if d.id not in incoming_date_ids
+        ]
+        date_instances.extend(fragment_ldm.date_instances)
+        result_ldm.date_instances = date_instances
+
+        return result
@@ -3,6 +3,9 @@
 
 from pathlib import Path
 
+from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import (
+    CatalogDeclarativeModel,
+)
 from gooddata_sdk.sdk import GoodDataSdk
 from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
 
@@ -147,9 +150,35 @@ def _new_ldm_does_not_invalidate_relations(
         # If the set of new invalid relations is a subset of the current one,
         return set_new_invalid_relations.issubset(set_current_invalid_relations)
 
+    def _ldm_payload_for_workspace(
+        self,
+        workspace_id: str,
+        datasets: dict[DatasetId, CustomDataset],
+        *,
+        merge_into_existing_ldm: bool,
+        remove_managed_datasets_missing_from_input: bool,
+        management_tag: str | None,
+    ) -> CatalogDeclarativeModel:
+        """Build the declarative LDM payload to upload for one workspace."""
+        if not merge_into_existing_ldm:
+            return self._processor.datasets_to_ldm(datasets)
+        current = self._sdk.catalog_workspace_content.get_declarative_ldm(
+            workspace_id
+        )
+        return self._processor.merge_custom_ldm_into_existing(
+            current,
+            datasets,
+            remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
+            management_tag=management_tag,
+        )
+
     def _process_with_relations_check(
         self,
         validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]],
+        *,
+        merge_into_existing_ldm: bool = False,
+        remove_managed_datasets_missing_from_input: bool = False,
+        management_tag: str | None = None,
     ) -> None:
         """Check whether relations of analytical objects are valid before and after
         updating the LDM in the GoodData workspace.
@@ -173,7 +202,13 @@ def _process_with_relations_check(
             # Put the LDM with custom datasets into the GoodData workspace.
             self._sdk.catalog_workspace_content.put_declarative_ldm(
                 workspace_id=workspace_id,
-                ldm=self._processor.datasets_to_ldm(datasets),
+                ldm=self._ldm_payload_for_workspace(
+                    workspace_id,
+                    datasets,
+                    merge_into_existing_ldm=merge_into_existing_ldm,
+                    remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
+                    management_tag=management_tag,
+                ),
             )
 
             # Get a set of objects with invalid relations from the new workspace state
@@ -232,13 +267,23 @@ def _log_diff_invalid_relations(
     def _process_without_relations_check(
         self,
         validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]],
+        *,
+        merge_into_existing_ldm: bool = False,
+        remove_managed_datasets_missing_from_input: bool = False,
+        management_tag: str | None = None,
     ) -> None:
         """Update the LDM in the GoodData workspace without checking relations."""
         for workspace_id, datasets in validated_data.items():
             # Put the LDM with custom datasets into the GoodData workspace.
             self._sdk.catalog_workspace_content.put_declarative_ldm(
                 workspace_id=workspace_id,
-                ldm=self._processor.datasets_to_ldm(datasets),
+                ldm=self._ldm_payload_for_workspace(
+                    workspace_id,
+                    datasets,
+                    merge_into_existing_ldm=merge_into_existing_ldm,
+                    remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
+                    management_tag=management_tag,
+                ),
             )
             self._log_success_message(workspace_id)
 
@@ -251,6 +296,9 @@ def process(
         custom_datasets: list[CustomDatasetDefinition],
         custom_fields: list[CustomFieldDefinition],
         check_relations: bool = True,
+        merge_into_existing_ldm: bool = False,
+        remove_managed_datasets_missing_from_input: bool = False,
+        management_tag: str | None = None,
     ) -> None:
         """Create custom datasets and fields in GoodData workspaces.
 
@@ -266,6 +314,14 @@ def process(
                 after updating the LDM. If the number of invalid relations increases,
                 the LDM will be reverted to its previous state. If False, the check
                 is skiped and the LDM is updated directly. Defaults to True.
+            merge_into_existing_ldm (bool): When True, load the workspace LDM first and merge
+                the generated custom datasets and date instances into it instead of uploading
+                only the extension fragment. Defaults to False for backward compatibility.
+            remove_managed_datasets_missing_from_input (bool): When ``merge_into_existing_ldm``
+                is True, remove existing datasets that contain ``management_tag`` but whose
+                dataset id is not present in this ``process`` call (tooling cleanup).
+            management_tag (str | None): Tag value used with
+                ``remove_managed_datasets_missing_from_input``.
 
         Raises:
             ValueError: If there are validation errors in the dataset or field definitions.
@@ -278,6 +334,16 @@ def process(
 
         if check_relations:
             # Process the validated data with relations check.
-            self._process_with_relations_check(validated_data)
+            self._process_with_relations_check(
+                validated_data,
+                merge_into_existing_ldm=merge_into_existing_ldm,
+                remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
+                management_tag=management_tag,
+            )
         else:
-            self._process_without_relations_check(validated_data)
+            self._process_without_relations_check(
+                validated_data,
+                merge_into_existing_ldm=merge_into_existing_ldm,
+                remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
+                management_tag=management_tag,
+            )
@@ -7,7 +7,7 @@
 
 from enum import Enum
 
-from pydantic import BaseModel, model_validator
+from pydantic import BaseModel, Field, model_validator
 
 
 class CustomFieldType(str, Enum):
@@ -42,6 +42,14 @@ class CustomFieldDefinition(BaseModel):
     custom_field_type: CustomFieldType
     custom_field_source_column: str
     custom_field_source_column_data_type: ColumnDataType
+    description: str | None = Field(
+        default=None,
+        description="Declarative description on the attribute, fact, or date dataset.",
+    )
+    tags: list[str] | None = Field(
+        default=None,
+        description="If set, replaces the default tag list (dataset display name only).",
+    )
 
     @model_validator(mode="after")
     def check_ids_not_equal(self) -> "CustomFieldDefinition":
@@ -68,6 +76,14 @@ class CustomDatasetDefinition(BaseModel):
     dataset_reference_source_column_data_type: ColumnDataType
     workspace_data_filter_id: str
     workspace_data_filter_column_name: str
+    dataset_description: str | None = Field(
+        default=None,
+        description="Declarative description on the custom dataset.",
+    )
+    dataset_tags: list[str] | None = Field(
+        default=None,
+        description="If set, replaces the default tag list (dataset display name only).",
+    )
 
     @model_validator(mode="after")
     def check_source(self) -> "CustomDatasetDefinition":