Skip to content

Commit 1ae59b9

Browse files
author
erangi-ar
committed
Merge branch 'deployment-est-gpu-Bimsara' of https://github.com/rootcodelabs/Global-Classifier into deployment-est-gpu
2 parents 527cad0 + 6c11424 commit 1ae59b9

4 files changed

Lines changed: 40 additions & 6 deletions

File tree

DSL/CronManager/DSL/callback_formatter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ callback_format:
22
trigger: off
33
type: exec
44
command: "../app/scripts/callback_format.sh"
5-
allowedEnvs: ['filePath', 'results', 'taskId']
5+
allowedEnvs: ['filePath', 'results', 'taskId', 'metricsFile']

DSL/CronManager/script/callback_format.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
echo "Started Shell Script for Dataset Generation Callback Processing"
44

55
# Check if environment variables are set
6-
if [ -z "$filePath" ] || [ -z "$results" ] || [ -z "$taskId" ]; then
7-
echo "Please set the filePath, results, and taskId environment variables."
6+
if [ -z "$filePath" ] || [ -z "$results" ] || [ -z "$taskId" ] || [ -z "$metricsFile" ]; then
7+
echo "Please set the filePath, results, taskId, and metricsFile environment variables."
88
exit 1
99
fi
1010

@@ -62,6 +62,7 @@ python3 "$CALLBACK_SCRIPT" \
6262
--encoded-results "$results" \
6363
--output-json "$temp_response" \
6464
--session-id "$taskId" \
65+
--metrics-file "$metricsFile" \
6566
> /tmp/callback_stdout.log 2> /tmp/callback_stderr.log
6667
exit_code=$?
6768

DSL/Ruuter.public/global-classifier/POST/data/callback.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ declare:
2323
- field: results
2424
type: array
2525
description: "List of agency IDs for which the dataset generation was completed"
26+
- field: metrics_file
27+
type: string
28+
description: "Path to the metrics file for the dataset generation"
2629

2730
log_callback_received:
2831
log: "Dataset generation callback received - Task ID: ${incoming.body.task_id}, Status: ${incoming.body.status}, File Path: ${incoming.body.filePath}, Result: ${incoming.body.results}"
@@ -35,10 +38,11 @@ extract_callback_data:
3538
message: ${incoming.body.message}
3639
file_path: ${incoming.body.filePath}
3740
results: ${encodeURIComponent(JSON.stringify(incoming.body.results))}
41+
metrics_file: ${incoming.body.metrics_file}
3842
next: log_detailed_info
3943

4044
log_detailed_info:
41-
log: "Callback Details - Task: ${task_id}, Status: ${status}, Message: ${message}, filePath: ${file_path}, results: ${results}"
45+
log: "Callback Details - Task: ${task_id}, Status: ${status}, Message: ${message}, filePath: ${file_path}, results: ${results}, metrics_file: ${metrics_file}"
4246
next: check_for_request_data
4347

4448
check_for_request_data:
@@ -76,6 +80,7 @@ execute_cron_manager:
7680
taskId: ${task_id}
7781
filePath: ${file_path}
7882
results: ${results}
83+
metricsFile: ${metrics_file}
7984
result: cron_res
8085
next: assign_success_cron_response
8186

src/s3_dataset_processor/dataset_generation_callback_processor.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,23 @@ def upload_csv_to_s3(local_csv_path: str, dataset_id: int) -> None:
101101
raise RuntimeError(f"Failed to upload CSV to S3: {response.text}")
102102

103103

104+
def upload_metrics_to_s3(metrics_file_path: str, dataset_id: int) -> None:
105+
"""Upload the metrics file to S3 using S3Ferry."""
106+
destination_file_path = f"/datasets/{dataset_id}/metrics.json"
107+
source_file_path = metrics_file_path.replace("/app/", "")
108+
logger.info(f"Uploading {metrics_file_path} to S3 as {destination_file_path}")
109+
response = s3_ferry_service.transfer_file(
110+
destination_file_path=destination_file_path,
111+
destination_storage_type="S3",
112+
source_file_path=source_file_path,
113+
source_storage_type="FS",
114+
)
115+
logger.info(f"Metrics S3 upload status: {response.status_code}")
116+
logger.info(f"Metrics S3 upload response: {response.text}")
117+
if response.status_code not in [200, 201]:
118+
raise RuntimeError(f"Failed to upload metrics to S3: {response.text}")
119+
120+
104121
def notify_progress_uploading_to_s3(session_id: int) -> None:
105122
"""Notify progress update: New Dataset Uploading to S3."""
106123
payload = {
@@ -239,7 +256,7 @@ def _log_cleanup_results(cleanup_summary: list) -> None:
239256

240257

241258
def process_callback_background(
242-
file_path: str, encoded_results: str, session_id: int
259+
file_path: str, encoded_results: str, session_id: int, metrics_file: str
243260
) -> None:
244261
"""Process the dataset generation callback: upload CSV to S3 and send status update."""
245262
try:
@@ -302,6 +319,13 @@ def process_callback_background(
302319

303320
notify_dataset_update(output_csv_path)
304321
upload_csv_to_s3(output_csv_path, dataset_id)
322+
323+
try:
324+
upload_metrics_to_s3(metrics_file, dataset_id)
325+
logger.info(f"Metrics file uploaded successfully for dataset {dataset_id}")
326+
except Exception as e:
327+
logger.warning(f"S3 upload failure for metrics file: {e}")
328+
305329
send_status_update(dataset_id, encoded_results)
306330

307331
logger.info("Processing completed successfully")
@@ -330,6 +354,9 @@ def parse_args():
330354
parser.add_argument(
331355
"--session-id", required=True, help="Session ID for the callback"
332356
)
357+
parser.add_argument(
358+
"--metrics-file", required=True, help="Metrics file path for the callback"
359+
)
333360
return parser.parse_args()
334361

335362

@@ -339,10 +366,11 @@ def main():
339366
try:
340367
logger.info("Starting callback processing...")
341368
logger.info(f"File path: {args.file_path}")
369+
logger.info(f"Metrics file: {args.metrics_file}")
342370
logger.info(f"Encoded results length: {len(args.encoded_results)} characters")
343371

344372
process_callback_background(
345-
args.file_path, args.encoded_results, args.session_id
373+
args.file_path, args.encoded_results, args.session_id, args.metrics_file
346374
)
347375

348376
response = {

0 commit comments

Comments
 (0)