Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/lib/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ def create_standalone(self, item_ids=None):
except AttributeError:
standalone.board = self.type

standalone.type = top_parent.type
standalone.adopt_type(top_parent.type)

standalone.detach()
standalone.delete_parameter("key_parent")
Expand Down
87 changes: 86 additions & 1 deletion common/lib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2046,14 +2046,85 @@ def get_place_in_queue(self, update=False):

def get_own_processor(self):
"""
Get the processor class that produced this dataset
Get the processor class corresponding to this dataset's data shape.

Normally this is the processor that produced the dataset, but for
datasets whose `type` was adopted from another datasource (e.g. by a
filter that copies its parent's NDJSON content verbatim), this is the
processor whose `map_item` / extension match the result file's
contents -- not necessarily the producing processor. See
`get_producer_processor` for the latter.

:return: Processor class, or `None` if not available.
"""
processor_type = self.parameters.get("type", self.data.get("type"))

return self.modules.processors.get(processor_type)

def get_producer_processor(self):
"""
Get the processor class that actually produced this dataset.

Falls back to `get_own_processor()` for datasets whose `type` was not
rewritten via `adopt_type`. UI code that renders the parameter panel
should use this so labels/tooltips come from the producing processor's
options schema, not from a possibly-divergent data-shape processor.

:return: Processor class, or `None` if not available.
"""
producer_type = self.parameters.get("producer_type", self.data.get("type"))
return self.modules.processors.get(producer_type)

def adopt_type(self, new_type):
"""
Rewrite this dataset's `type` to reflect a change in the result file's
data shape (e.g. after a filter has copied its parent's NDJSON content
verbatim into its result). The original producing processor's type is
preserved under `parameters["producer_type"]` on the first call, so
the UI can still look up the right options schema.

This is the only sanctioned path for rewriting `type` post-creation;
direct attribute assignment is blocked by `__setattr__`. `datasource`
is orthogonal -- adjust it separately via `change_datasource` if the
platform grouping also needs to change.

:param str new_type: The type to adopt.
"""
current_type = self.data.get("type")
if new_type == current_type:
return

if "producer_type" not in self.parameters:
# preserve only the original producer; chained adopt_type calls
# must not overwrite the first one
self.parameters = {**self.parameters, "producer_type": current_type}

# bypass the __setattr__ guard via the underlying DB update path
self.db.update("datasets", where={"key": self.key}, data={"type": new_type})
self.data["type"] = new_type

def get_displayable_parameters(self, config=None):
"""
Return parameters annotated with the producing processor's options
schema, suitable for rendering in the UI's parameter panel.

The schema is sourced from `get_producer_processor()` so the original
producer's labels/tooltips survive an `adopt_type` rewrite. Sensitive
options and parameters not present in the schema are filtered out.

:param config: Configuration reader, passed through to get_options.
:return list: List of dicts: {"key", "value", "schema"}.
"""
producer = self.get_producer_processor()
if not producer:
return []
options = producer.get_options(parent_dataset=self.top_parent(), config=config)
return [
{"key": k, "value": v, "schema": options[k]}
for k, v in self.parameters.items()
if k in options and v != "" and not options[k].get("sensitive")
]

def get_available_processors(self, config=None, exclude_hidden=False):
"""
Get list of processors that may be run for this dataset
Expand Down Expand Up @@ -2780,6 +2851,20 @@ def __setattr__(self, attr, value):
super().__setattr__(attr, value)
return

# `type` describes the data shape of the result file. It may diverge
# from the producing processor's type (e.g. a filter that copies its
# parent's NDJSON content verbatim). Direct rewrites would lose the
# producer identity needed by UI code; force callers through
# adopt_type() which preserves the original under parameters[
# "producer_type"].
if attr == "type" and self.data and self.data.get("type") and value != self.data["type"]:
raise AttributeError(
"Refusing to rewrite DataSet.type from %r to %r via direct attribute "
"assignment. Use DataSet.adopt_type() so the original producing "
"processor is preserved under parameters['producer_type']."
% (self.data["type"], value)
)

if attr not in self.data:
self.parameters[attr] = value
attr = "parameters"
Expand Down
11 changes: 9 additions & 2 deletions processors/conversion/merge_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,5 +287,12 @@ def after_process(self):
else:
standalone.update_label(f"(Merged) {self.source_dataset.get_label()}")

standalone.parameters = {**self.dataset.parameters, "board": "merged"}
standalone.type = self.source_dataset.type
# Wholesale-overwriting parameters here would clobber the producer_type
# stashed by create_standalone -> adopt_type; preserve it explicitly so
# the UI can still resolve this dataset back to merge-datasets.
standalone.parameters = {
**self.dataset.parameters,
"board": "merged",
"producer_type": standalone.parameters.get("producer_type", self.type),
}
standalone.adopt_type(self.source_dataset.type)
6 changes: 3 additions & 3 deletions processors/filtering/column_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def after_process(self):
# correctly on the filtered result (especially for NDJSON). Unlike
# BaseFilter, we deliberately keep this dataset attached to its parent
# rather than promoting it to a standalone top-level dataset.
self.dataset.type = self.source_dataset.type
self.dataset.datasource = self.source_dataset.parameters.get(
"datasource", self.source_dataset.type
self.dataset.adopt_type(self.source_dataset.type)
self.dataset.change_datasource(
self.source_dataset.parameters.get("datasource", self.source_dataset.type)
)
2 changes: 1 addition & 1 deletion processors/filtering/tiktok_refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def after_process(self):
# Request standalone
standalone = self.create_standalone()
# Update the type
standalone.type = "tiktok-urls-search"
standalone.adopt_type("tiktok-urls-search")

@classmethod
def is_filter(cls):
Expand Down
9 changes: 4 additions & 5 deletions webtool/templates/components/result-parameters.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@
<p role="tooltip" id="tooltip-delete-{{ item.key }}" aria-hidden="true">{% if not item.is_finished() %}Cancel and d{% else %}D{% endif %}elete this analysis and any underlying analyses</p>
</li>
{% endif %}
{% if item.type in processors %}{% set processor_options = processors[item.type].get_options(parent_dataset=dataset, config=__config) %}{% endif %}
{% for option in item.parameters %}
{% if item.type in processors and option in processor_options and item.parameters[option] != "" and not processor_options[option].get("sensitive") %}
{% include 'components/result-parameter.html' %}
{% endif %}
{% for entry in item.get_displayable_parameters(config=__config) %}
{% set option = entry.key %}
{% set processor_options = {entry.key: entry.schema} %}
{% include 'components/result-parameter.html' %}
{% endfor %}
</ul>
</div>
Loading