diff --git a/backend/lib/processor.py b/backend/lib/processor.py index cb798b1dc..c58908774 100644 --- a/backend/lib/processor.py +++ b/backend/lib/processor.py @@ -744,7 +744,7 @@ def create_standalone(self, item_ids=None): except AttributeError: standalone.board = self.type - standalone.type = top_parent.type + standalone.adopt_type(top_parent.type) standalone.detach() standalone.delete_parameter("key_parent") diff --git a/common/lib/dataset.py b/common/lib/dataset.py index 37aaed1ba..2c5a97c03 100644 --- a/common/lib/dataset.py +++ b/common/lib/dataset.py @@ -2046,7 +2046,14 @@ def get_place_in_queue(self, update=False): def get_own_processor(self): """ - Get the processor class that produced this dataset + Get the processor class corresponding to this dataset's data shape. + + Normally this is the processor that produced the dataset, but for + datasets whose `type` was adopted from another datasource (e.g. by a + filter that copies its parent's NDJSON content verbatim), this is the + processor whose `map_item` / extension match the result file's + contents -- not necessarily the producing processor. See + `get_producer_processor` for the latter. :return: Processor class, or `None` if not available. """ @@ -2054,6 +2061,70 @@ def get_own_processor(self): return self.modules.processors.get(processor_type) + def get_producer_processor(self): + """ + Get the processor class that actually produced this dataset. + + Falls back to `get_own_processor()` for datasets whose `type` was not + rewritten via `adopt_type`. UI code that renders the parameter panel + should use this so labels/tooltips come from the producing processor's + options schema, not from a possibly-divergent data-shape processor. + + :return: Processor class, or `None` if not available. + """ + producer_type = self.parameters.get("producer_type", self.data.get("type")) + return self.modules.processors.get(producer_type) + + def adopt_type(self, new_type): + """ + Rewrite this dataset's `type` to reflect a change in the result file's + data shape (e.g. after a filter has copied its parent's NDJSON content + verbatim into its result). The original producing processor's type is + preserved under `parameters["producer_type"]` on the first call, so + the UI can still look up the right options schema. + + This is the only sanctioned path for rewriting `type` post-creation; + direct attribute assignment is blocked by `__setattr__`. `datasource` + is orthogonal -- adjust it separately via `change_datasource` if the + platform grouping also needs to change. + + :param str new_type: The type to adopt. + """ + current_type = self.data.get("type") + if new_type == current_type: + return + + if "producer_type" not in self.parameters: + # preserve only the original producer; chained adopt_type calls + # must not overwrite the first one + self.parameters = {**self.parameters, "producer_type": current_type} + + # bypass the __setattr__ guard via the underlying DB update path + self.db.update("datasets", where={"key": self.key}, data={"type": new_type}) + self.data["type"] = new_type + + def get_displayable_parameters(self, config=None): + """ + Return parameters annotated with the producing processor's options + schema, suitable for rendering in the UI's parameter panel. + + The schema is sourced from `get_producer_processor()` so the original + producer's labels/tooltips survive an `adopt_type` rewrite. Sensitive + options and parameters not present in the schema are filtered out. + + :param config: Configuration reader, passed through to get_options. + :return list: List of dicts: {"key", "value", "schema"}. + """ + producer = self.get_producer_processor() + if not producer: + return [] + options = producer.get_options(parent_dataset=self.top_parent(), config=config) + return [ + {"key": k, "value": v, "schema": options[k]} + for k, v in self.parameters.items() + if k in options and v != "" and not options[k].get("sensitive") + ] + def get_available_processors(self, config=None, exclude_hidden=False): """ Get list of processors that may be run for this dataset @@ -2780,6 +2851,20 @@ def __setattr__(self, attr, value): super().__setattr__(attr, value) return + # `type` describes the data shape of the result file. It may diverge + # from the producing processor's type (e.g. a filter that copies its + # parent's NDJSON content verbatim). Direct rewrites would lose the + # producer identity needed by UI code; force callers through + # adopt_type() which preserves the original under parameters[ + # "producer_type"]. + if attr == "type" and self.data and self.data.get("type") and value != self.data["type"]: + raise AttributeError( + "Refusing to rewrite DataSet.type from %r to %r via direct attribute " + "assignment. Use DataSet.adopt_type() so the original producing " + "processor is preserved under parameters['producer_type']." + % (self.data["type"], value) + ) + if attr not in self.data: self.parameters[attr] = value attr = "parameters" diff --git a/processors/conversion/merge_datasets.py b/processors/conversion/merge_datasets.py index 94dbc83d9..9bec40a63 100644 --- a/processors/conversion/merge_datasets.py +++ b/processors/conversion/merge_datasets.py @@ -287,5 +287,12 @@ def after_process(self): else: standalone.update_label(f"(Merged) {self.source_dataset.get_label()}") - standalone.parameters = {**self.dataset.parameters, "board": "merged"} - standalone.type = self.source_dataset.type + # Wholesale-overwriting parameters here would clobber the producer_type + # stashed by create_standalone -> adopt_type; preserve it explicitly so + # the UI can still resolve this dataset back to merge-datasets. + standalone.parameters = { + **self.dataset.parameters, + "board": "merged", + "producer_type": standalone.parameters.get("producer_type", self.type), + } + standalone.adopt_type(self.source_dataset.type) diff --git a/processors/filtering/column_filter.py b/processors/filtering/column_filter.py index 557bbd8aa..8b0729d9c 100644 --- a/processors/filtering/column_filter.py +++ b/processors/filtering/column_filter.py @@ -298,7 +298,7 @@ def after_process(self): # correctly on the filtered result (especially for NDJSON). Unlike # BaseFilter, we deliberately keep this dataset attached to its parent # rather than promoting it to a standalone top-level dataset. - self.dataset.type = self.source_dataset.type - self.dataset.datasource = self.source_dataset.parameters.get( - "datasource", self.source_dataset.type + self.dataset.adopt_type(self.source_dataset.type) + self.dataset.change_datasource( + self.source_dataset.parameters.get("datasource", self.source_dataset.type) ) diff --git a/processors/filtering/tiktok_refresh.py b/processors/filtering/tiktok_refresh.py index 4789543aa..5de1fe473 100644 --- a/processors/filtering/tiktok_refresh.py +++ b/processors/filtering/tiktok_refresh.py @@ -74,7 +74,7 @@ def after_process(self): # Request standalone standalone = self.create_standalone() # Update the type - standalone.type = "tiktok-urls-search" + standalone.adopt_type("tiktok-urls-search") @classmethod def is_filter(cls): diff --git a/webtool/templates/components/result-parameters.html b/webtool/templates/components/result-parameters.html index 74d859f55..d34dbec7f 100644 --- a/webtool/templates/components/result-parameters.html +++ b/webtool/templates/components/result-parameters.html @@ -45,11 +45,10 @@ {% endif %} - {% if item.type in processors %}{% set processor_options = processors[item.type].get_options(parent_dataset=dataset, config=__config) %}{% endif %} - {% for option in item.parameters %} - {% if item.type in processors and option in processor_options and item.parameters[option] != "" and not processor_options[option].get("sensitive") %} - {% include 'components/result-parameter.html' %} - {% endif %} + {% for entry in item.get_displayable_parameters(config=__config) %} + {% set option = entry.key %} + {% set processor_options = {entry.key: entry.schema} %} + {% include 'components/result-parameter.html' %} {% endfor %} \ No newline at end of file