Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions ckanext/datapusher_plus/jobs/stages/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ def process(self, context: ProcessingContext) -> ProcessingContext:
if raw_connection:
raw_connection.close()

# Update resource metadata
self._update_resource_metadata(context)

# Set alias and calculate record count
# Set alias and calculate record count first, before updating
# resource metadata. datastore_create internally modifies the resource
# (e.g. setting datastore_active, total_record_count), so we must
# update our preview fields AFTER this call to avoid them being
# overwritten.
dsu.send_resource_to_datastore(
resource=None,
resource_id=context.resource["id"],
Expand All @@ -87,6 +88,10 @@ def process(self, context: ProcessingContext) -> ProcessingContext:
if alias:
context.logger.info(f'Created alias "{alias}" for "{context.resource_id}"...')

# Update resource metadata (preview fields, etc.) AFTER
# send_resource_to_datastore to ensure our fields persist.
self._update_resource_metadata(context)

metadata_elapsed = time.perf_counter() - metadata_start
context.logger.info(
f"RESOURCE METADATA UPDATES DONE! Resource metadata updated in "
Expand Down Expand Up @@ -372,11 +377,18 @@ def _update_resource_metadata(self, context: ProcessingContext) -> None:
"""
Update resource metadata fields.

Re-fetches the resource from CKAN to get the latest state
(after datastore_create may have modified it), then sets
our metadata fields and persists them.

Args:
context: Processing context
"""
record_count = context.dataset_stats.get("RECORD_COUNT", 0)

# Re-fetch the resource to get the latest state after datastore_create
context.resource = dsu.get_resource(context.resource_id)

context.resource["datastore_active"] = True
context.resource["total_record_count"] = record_count

Expand Down
29 changes: 18 additions & 11 deletions ckanext/datapusher_plus/jobs_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1575,18 +1575,11 @@ def _push_to_datastore(
raw_connection.commit()
raw_connection.close()

resource["datastore_active"] = True
resource["total_record_count"] = record_count
if conf.PREVIEW_ROWS < record_count or (conf.PREVIEW_ROWS > 0):
resource["preview"] = True
resource["preview_rows"] = copied_count
else:
resource["preview"] = False
resource["preview_rows"] = None
resource["partial_download"] = False
dsu.update_resource(resource)

# tell CKAN to calculate_record_count and set alias if set
# This must happen BEFORE updating resource metadata because
# datastore_create internally modifies the resource (e.g. setting
# datastore_active, total_record_count), which could overwrite
# our preview fields.
dsu.send_resource_to_datastore(
resource=None,
resource_id=resource["id"],
Expand All @@ -1599,6 +1592,20 @@ def _push_to_datastore(
if alias:
logger.info(f'Created alias "{alias}" for "{resource_id}"...')

# Re-fetch the resource to get the latest state after datastore_create
resource = dsu.get_resource(resource_id)

resource["datastore_active"] = True
resource["total_record_count"] = record_count
if conf.PREVIEW_ROWS < record_count or (conf.PREVIEW_ROWS > 0):
resource["preview"] = True
resource["preview_rows"] = copied_count
else:
resource["preview"] = False
resource["preview_rows"] = None
resource["partial_download"] = False
dsu.update_resource(resource)

metadata_elapsed = time.perf_counter() - metadata_start
logger.info(
f"RESOURCE METADATA UPDATES DONE! Resource metadata updated in {metadata_elapsed:,.2f} seconds."
Expand Down