digitalmethodsinitiative · dale-wahl · May 14, 2026
diff --git a/backend/lib/processor.py b/backend/lib/processor.py
@@ -744,7 +744,7 @@ def create_standalone(self, item_ids=None):
         except AttributeError:
             standalone.board = self.type
 
-        standalone.type = top_parent.type
+        standalone.adopt_type(top_parent.type)
 
         standalone.detach()
         standalone.delete_parameter("key_parent")

diff --git a/common/lib/dataset.py b/common/lib/dataset.py
@@ -2046,14 +2046,85 @@ def get_place_in_queue(self, update=False):
 
     def get_own_processor(self):
         """
-        Get the processor class that produced this dataset
+        Get the processor class corresponding to this dataset's data shape.
+
+        Normally this is the processor that produced the dataset, but for
+        datasets whose `type` was adopted from another datasource (e.g. by a
+        filter that copies its parent's NDJSON content verbatim), this is the
+        processor whose `map_item` / extension match the result file's
+        contents -- not necessarily the producing processor. See
+        `get_producer_processor` for the latter.
 
         :return:  Processor class, or `None` if not available.
         """
         processor_type = self.parameters.get("type", self.data.get("type"))
 
         return self.modules.processors.get(processor_type)
 
+    def get_producer_processor(self):
+        """
+        Get the processor class that actually produced this dataset.
+
+        Falls back to `get_own_processor()` for datasets whose `type` was not
+        rewritten via `adopt_type`. UI code that renders the parameter panel
+        should use this so labels/tooltips come from the producing processor's
+        options schema, not from a possibly-divergent data-shape processor.
+
+        :return:  Processor class, or `None` if not available.
+        """
+        producer_type = self.parameters.get("producer_type", self.data.get("type"))
+        return self.modules.processors.get(producer_type)
+
+    def adopt_type(self, new_type):
+        """
+        Rewrite this dataset's `type` to reflect a change in the result file's
+        data shape (e.g. after a filter has copied its parent's NDJSON content
+        verbatim into its result). The original producing processor's type is
+        preserved under `parameters["producer_type"]` on the first call, so
+        the UI can still look up the right options schema.
+
+        This is the only sanctioned path for rewriting `type` post-creation;
+        direct attribute assignment is blocked by `__setattr__`. `datasource`
+        is orthogonal -- adjust it separately via `change_datasource` if the
+        platform grouping also needs to change.
+
+        :param str new_type:  The type to adopt.
+        """
+        current_type = self.data.get("type")
+        if new_type == current_type:
+            return
+
+        if "producer_type" not in self.parameters:
+            # preserve only the original producer; chained adopt_type calls
+            # must not overwrite the first one
+            self.parameters = {**self.parameters, "producer_type": current_type}
+
+        # bypass the __setattr__ guard via the underlying DB update path
+        self.db.update("datasets", where={"key": self.key}, data={"type": new_type})
+        self.data["type"] = new_type
+
+    def get_displayable_parameters(self, config=None):
+        """
+        Return parameters annotated with the producing processor's options
+        schema, suitable for rendering in the UI's parameter panel.
+
+        The schema is sourced from `get_producer_processor()` so the original
+        producer's labels/tooltips survive an `adopt_type` rewrite. Sensitive
+        options and parameters not present in the schema are filtered out.
+
+        :param config:  Configuration reader, passed through to get_options.
+        :return list:  List of dicts: {"key", "value", "schema"}.
+        """
+        producer = self.get_producer_processor()
+        if not producer:
+            return []
+        options = producer.get_options(parent_dataset=self.top_parent(), config=config)
+        return [
+            {"key": k, "value": v, "schema": options[k]}
+            for k, v in self.parameters.items()
+            if k in options and v != "" and not options[k].get("sensitive")
+        ]
+
     def get_available_processors(self, config=None, exclude_hidden=False):
         """
         Get list of processors that may be run for this dataset
@@ -2780,6 +2851,20 @@ def __setattr__(self, attr, value):
             super().__setattr__(attr, value)
             return
 
+        # `type` describes the data shape of the result file. It may diverge
+        # from the producing processor's type (e.g. a filter that copies its
+        # parent's NDJSON content verbatim). Direct rewrites would lose the
+        # producer identity needed by UI code; force callers through
+        # adopt_type() which preserves the original under parameters[
+        # "producer_type"].
+        if attr == "type" and self.data and self.data.get("type") and value != self.data["type"]:
+            raise AttributeError(
+                "Refusing to rewrite DataSet.type from %r to %r via direct attribute "
+                "assignment. Use DataSet.adopt_type() so the original producing "
+                "processor is preserved under parameters['producer_type']."
+                % (self.data["type"], value)
+            )
+
         if attr not in self.data:
             self.parameters[attr] = value
             attr = "parameters"

diff --git a/processors/conversion/merge_datasets.py b/processors/conversion/merge_datasets.py
@@ -287,5 +287,12 @@ def after_process(self):
         else:
             standalone.update_label(f"(Merged) {self.source_dataset.get_label()}")
 
-        standalone.parameters = {**self.dataset.parameters, "board": "merged"}
-        standalone.type = self.source_dataset.type
+        # Wholesale-overwriting parameters here would clobber the producer_type
+        # stashed by create_standalone -> adopt_type; preserve it explicitly so
+        # the UI can still resolve this dataset back to merge-datasets.
+        standalone.parameters = {
+            **self.dataset.parameters,
+            "board": "merged",
+            "producer_type": standalone.parameters.get("producer_type", self.type),
+        }
+        standalone.adopt_type(self.source_dataset.type)
diff --git a/processors/filtering/column_filter.py b/processors/filtering/column_filter.py
@@ -298,7 +298,7 @@ def after_process(self):
         # correctly on the filtered result (especially for NDJSON). Unlike
         # BaseFilter, we deliberately keep this dataset attached to its parent
         # rather than promoting it to a standalone top-level dataset.
-        self.dataset.type = self.source_dataset.type
-        self.dataset.datasource = self.source_dataset.parameters.get(
-            "datasource", self.source_dataset.type
+        self.dataset.adopt_type(self.source_dataset.type)
+        self.dataset.change_datasource(
+            self.source_dataset.parameters.get("datasource", self.source_dataset.type)
         )
diff --git a/processors/filtering/tiktok_refresh.py b/processors/filtering/tiktok_refresh.py
@@ -74,7 +74,7 @@ def after_process(self):
         # Request standalone
         standalone = self.create_standalone()
         # Update the type
-        standalone.type = "tiktok-urls-search"
+        standalone.adopt_type("tiktok-urls-search")
 
     @classmethod
     def is_filter(cls):

diff --git a/webtool/templates/components/result-parameters.html b/webtool/templates/components/result-parameters.html
@@ -45,11 +45,10 @@
                 <p role="tooltip" id="tooltip-delete-{{ item.key }}" aria-hidden="true">{% if not item.is_finished() %}Cancel and d{% else %}D{% endif %}elete this analysis and any underlying analyses</p>
             </li>
             {% endif %}
-        {% if item.type in processors %}{% set processor_options = processors[item.type].get_options(parent_dataset=dataset, config=__config) %}{% endif %}
-        {% for option in item.parameters %}
-            {% if item.type in processors and option in processor_options and item.parameters[option] != "" and not processor_options[option].get("sensitive") %}
-                {% include 'components/result-parameter.html' %}
-        {% endif %}
+        {% for entry in item.get_displayable_parameters(config=__config) %}
+            {% set option = entry.key %}
+            {% set processor_options = {entry.key: entry.schema} %}
+            {% include 'components/result-parameter.html' %}
         {% endfor %}
     </ul>
 </div>