diff --git a/ckanext/datapusher_plus/jobs/stages/metadata.py b/ckanext/datapusher_plus/jobs/stages/metadata.py index e24ec5c..14f0d5a 100644 --- a/ckanext/datapusher_plus/jobs/stages/metadata.py +++ b/ckanext/datapusher_plus/jobs/stages/metadata.py @@ -71,10 +71,11 @@ def process(self, context: ProcessingContext) -> ProcessingContext: if raw_connection: raw_connection.close() - # Update resource metadata - self._update_resource_metadata(context) - - # Set alias and calculate record count + # Set alias and calculate record count first, before updating + # resource metadata. datastore_create internally modifies the resource + # (e.g. setting datastore_active, total_record_count), so we must + # update our preview fields AFTER this call to avoid them being + # overwritten. dsu.send_resource_to_datastore( resource=None, resource_id=context.resource["id"], @@ -87,6 +88,10 @@ def process(self, context: ProcessingContext) -> ProcessingContext: if alias: context.logger.info(f'Created alias "{alias}" for "{context.resource_id}"...') + # Update resource metadata (preview fields, etc.) AFTER + # send_resource_to_datastore to ensure our fields persist. + self._update_resource_metadata(context) + metadata_elapsed = time.perf_counter() - metadata_start context.logger.info( f"RESOURCE METADATA UPDATES DONE! Resource metadata updated in " @@ -372,11 +377,18 @@ def _update_resource_metadata(self, context: ProcessingContext) -> None: """ Update resource metadata fields. + Re-fetches the resource from CKAN to get the latest state + (after datastore_create may have modified it), then sets + our metadata fields and persists them. + Args: context: Processing context """ record_count = context.dataset_stats.get("RECORD_COUNT", 0) + # Re-fetch the resource to get the latest state after datastore_create + context.resource = dsu.get_resource(context.resource_id) + context.resource["datastore_active"] = True context.resource["total_record_count"] = record_count diff --git a/ckanext/datapusher_plus/jobs_legacy.py b/ckanext/datapusher_plus/jobs_legacy.py index 23f57bd..ce8912b 100644 --- a/ckanext/datapusher_plus/jobs_legacy.py +++ b/ckanext/datapusher_plus/jobs_legacy.py @@ -1575,18 +1575,11 @@ def _push_to_datastore( raw_connection.commit() raw_connection.close() - resource["datastore_active"] = True - resource["total_record_count"] = record_count - if conf.PREVIEW_ROWS < record_count or (conf.PREVIEW_ROWS > 0): - resource["preview"] = True - resource["preview_rows"] = copied_count - else: - resource["preview"] = False - resource["preview_rows"] = None - resource["partial_download"] = False - dsu.update_resource(resource) - # tell CKAN to calculate_record_count and set alias if set + # This must happen BEFORE updating resource metadata because + # datastore_create internally modifies the resource (e.g. setting + # datastore_active, total_record_count), which could overwrite + # our preview fields. dsu.send_resource_to_datastore( resource=None, resource_id=resource["id"], @@ -1599,6 +1592,20 @@ def _push_to_datastore( if alias: logger.info(f'Created alias "{alias}" for "{resource_id}"...') + # Re-fetch the resource to get the latest state after datastore_create + resource = dsu.get_resource(resource_id) + + resource["datastore_active"] = True + resource["total_record_count"] = record_count + if conf.PREVIEW_ROWS < record_count or (conf.PREVIEW_ROWS > 0): + resource["preview"] = True + resource["preview_rows"] = copied_count + else: + resource["preview"] = False + resource["preview_rows"] = None + resource["partial_download"] = False + dsu.update_resource(resource) + metadata_elapsed = time.perf_counter() - metadata_start logger.info( f"RESOURCE METADATA UPDATES DONE! Resource metadata updated in {metadata_elapsed:,.2f} seconds."