From 4e625f7ef45828a0c22e18652857d1884bb9f859 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:22:33 +0000 Subject: [PATCH 1/2] Initial plan From e5454358326851534ce7fdd51fb4b49cb71d41e9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:29:58 +0000 Subject: [PATCH 2/2] fix: move resource metadata update after datastore_create to persist preview fields The preview and preview_rows fields were being set on the resource via resource_update, but then immediately overwritten when datastore_create (called via send_resource_to_datastore with calculate_record_count=True) internally modified the resource metadata. Fix: Move _update_resource_metadata() call to AFTER send_resource_to_datastore() and re-fetch the resource to get the latest state. This ensures our preview fields are set last and persist correctly. Applied to both the new pipeline (metadata.py) and legacy code (jobs_legacy.py). Co-authored-by: minhajuddin2510 <68331751+minhajuddin2510@users.noreply.github.com> --- .../datapusher_plus/jobs/stages/metadata.py | 20 ++++++++++--- ckanext/datapusher_plus/jobs_legacy.py | 29 ++++++++++++------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/ckanext/datapusher_plus/jobs/stages/metadata.py b/ckanext/datapusher_plus/jobs/stages/metadata.py index e24ec5c..14f0d5a 100644 --- a/ckanext/datapusher_plus/jobs/stages/metadata.py +++ b/ckanext/datapusher_plus/jobs/stages/metadata.py @@ -71,10 +71,11 @@ def process(self, context: ProcessingContext) -> ProcessingContext: if raw_connection: raw_connection.close() - # Update resource metadata - self._update_resource_metadata(context) - - # Set alias and calculate record count + # Set alias and calculate record count first, before updating + # resource metadata. datastore_create internally modifies the resource + # (e.g. setting datastore_active, total_record_count), so we must + # update our preview fields AFTER this call to avoid them being + # overwritten. dsu.send_resource_to_datastore( resource=None, resource_id=context.resource["id"], @@ -87,6 +88,10 @@ def process(self, context: ProcessingContext) -> ProcessingContext: if alias: context.logger.info(f'Created alias "{alias}" for "{context.resource_id}"...') + # Update resource metadata (preview fields, etc.) AFTER + # send_resource_to_datastore to ensure our fields persist. + self._update_resource_metadata(context) + metadata_elapsed = time.perf_counter() - metadata_start context.logger.info( f"RESOURCE METADATA UPDATES DONE! Resource metadata updated in " @@ -372,11 +377,18 @@ def _update_resource_metadata(self, context: ProcessingContext) -> None: """ Update resource metadata fields. + Re-fetches the resource from CKAN to get the latest state + (after datastore_create may have modified it), then sets + our metadata fields and persists them. + Args: context: Processing context """ record_count = context.dataset_stats.get("RECORD_COUNT", 0) + # Re-fetch the resource to get the latest state after datastore_create + context.resource = dsu.get_resource(context.resource_id) + context.resource["datastore_active"] = True context.resource["total_record_count"] = record_count diff --git a/ckanext/datapusher_plus/jobs_legacy.py b/ckanext/datapusher_plus/jobs_legacy.py index 23f57bd..ce8912b 100644 --- a/ckanext/datapusher_plus/jobs_legacy.py +++ b/ckanext/datapusher_plus/jobs_legacy.py @@ -1575,18 +1575,11 @@ def _push_to_datastore( raw_connection.commit() raw_connection.close() - resource["datastore_active"] = True - resource["total_record_count"] = record_count - if conf.PREVIEW_ROWS < record_count or (conf.PREVIEW_ROWS > 0): - resource["preview"] = True - resource["preview_rows"] = copied_count - else: - resource["preview"] = False - resource["preview_rows"] = None - resource["partial_download"] = False - dsu.update_resource(resource) - # tell CKAN to calculate_record_count and set alias if set + # This must happen BEFORE updating resource metadata because + # datastore_create internally modifies the resource (e.g. setting + # datastore_active, total_record_count), which could overwrite + # our preview fields. dsu.send_resource_to_datastore( resource=None, resource_id=resource["id"], @@ -1599,6 +1592,20 @@ def _push_to_datastore( if alias: logger.info(f'Created alias "{alias}" for "{resource_id}"...') + # Re-fetch the resource to get the latest state after datastore_create + resource = dsu.get_resource(resource_id) + + resource["datastore_active"] = True + resource["total_record_count"] = record_count + if conf.PREVIEW_ROWS < record_count or (conf.PREVIEW_ROWS > 0): + resource["preview"] = True + resource["preview_rows"] = copied_count + else: + resource["preview"] = False + resource["preview_rows"] = None + resource["partial_download"] = False + dsu.update_resource(resource) + metadata_elapsed = time.perf_counter() - metadata_start logger.info( f"RESOURCE METADATA UPDATES DONE! Resource metadata updated in {metadata_elapsed:,.2f} seconds."