diff --git a/cloudpub/error.py b/cloudpub/error.py index 70d9aef..c73ab0d 100644 --- a/cloudpub/error.py +++ b/cloudpub/error.py @@ -27,3 +27,11 @@ class ConflictError(RuntimeError): class Timeout(Exception): """Represent a missing resource.""" + + +class CertificationError(InvalidStateError): + """Report Azure Marketplace certification failure.""" + + +class InvalidSchema(RuntimeError): + """Report when an invalid schema is returned from cloud provider API.""" diff --git a/cloudpub/models/ms_azure.py b/cloudpub/models/ms_azure.py index c43e286..54c5bdb 100644 --- a/cloudpub/models/ms_azure.py +++ b/cloudpub/models/ms_azure.py @@ -67,7 +67,7 @@ class ConfigureStatus(AttrsJSONDecodeMixin): resource_uri: Optional[str] = field(metadata={"alias": "resourceUri", "hide_unset": True}) """The resource URI related to the configure job.""" - errors: List[str] + errors: List[Dict[str, Any]] """List of errors when the ``job_result`` is ``failed``.""" diff --git a/cloudpub/ms_azure/service.py b/cloudpub/ms_azure/service.py index fbbaff8..e349477 100644 --- a/cloudpub/ms_azure/service.py +++ b/cloudpub/ms_azure/service.py @@ -8,12 +8,18 @@ from deepdiff import DeepDiff from requests import HTTPError from tenacity import RetryError, Retrying, retry -from tenacity.retry import retry_if_exception_type, retry_if_result +from tenacity.retry import retry_if_exception_type, retry_if_not_exception_type, retry_if_result from tenacity.stop import stop_after_attempt, stop_after_delay from tenacity.wait import wait_fixed from cloudpub.common import BaseService -from cloudpub.error import ConflictError, InvalidStateError, NotFoundError, Timeout +from cloudpub.error import ( + CertificationError, + ConflictError, + InvalidStateError, + NotFoundError, + Timeout, +) from cloudpub.models.ms_azure import ( RESOURCE_MAPING, AzureResource, @@ -43,6 +49,7 @@ TechnicalConfigLookUpData, create_disk_version_from_scratch, is_azure_job_not_complete, + is_certification_error, is_sas_present, logdiff, seek_disk_version, @@ -190,11 +197,14 @@ def query_job_status(self, job_id: str) -> ConfigureStatus: Returns: ConfigureStatus: The ConfigureStatus from JobID Raises: + CertificationError: If the job failed due to Certifications issues. InvalidStateError: If the job has failed. """ job_details = self._query_job_details(job_id=job_id) if job_details.job_result == "failed": error_message = f"Job {job_id} failed: \n{job_details.errors}" + if is_certification_error(job_details.errors): + self._raise_error(CertificationError, error_message) self._raise_error(InvalidStateError, error_message) elif job_details.job_result == "succeeded": log.debug("Job %s succeeded", job_id) @@ -731,6 +741,7 @@ def _is_submission_in_preview(self, current: ProductSubmission) -> bool: @retry( wait=wait_fixed(wait=60), stop=stop_after_attempt(3), + retry=retry_if_not_exception_type(CertificationError), reraise=True, ) def _publish_preview( @@ -754,16 +765,18 @@ def _publish_preview( if res.job_result != 'succeeded' or not self.get_submission_state( product.id, state="preview" ): - errors = "\n".join(res.errors) failure_msg = ( f"Failed to submit the product {product_name} ({product.id}) to preview. " - f"Status: {res.job_result} Errors: {errors}" + f"Status: {res.job_result} Errors: {res.errors}" ) + if is_certification_error(res.errors): + self._raise_error(CertificationError, failure_msg) raise RuntimeError(failure_msg) @retry( wait=wait_fixed(wait=60), stop=stop_after_attempt(3), + retry=retry_if_not_exception_type(CertificationError), reraise=True, ) def _publish_live(self, product: Product, product_name: str) -> None: @@ -781,11 +794,12 @@ def _publish_live(self, product: Product, product_name: str) -> None: res = self.submit_to_status(product_id=product.id, status='live') if res.job_result != 'succeeded' or not self.get_submission_state(product.id, state="live"): - errors = "\n".join(res.errors) failure_msg = ( f"Failed to submit the product {product_name} ({product.id}) to live. " - f"Status: {res.job_result} Errors: {errors}" + f"Status: {res.job_result} Errors: {res.errors}" ) + if is_certification_error(res.errors): + self._raise_error(CertificationError, failure_msg) raise RuntimeError(failure_msg) def _overwrite_disk_version( diff --git a/cloudpub/ms_azure/utils.py b/cloudpub/ms_azure/utils.py index 859c336..a95cac3 100644 --- a/cloudpub/ms_azure/utils.py +++ b/cloudpub/ms_azure/utils.py @@ -6,6 +6,7 @@ from deepdiff import DeepDiff from cloudpub.common import PublishingMetadata # Cannot circular import AzurePublishingMetadata +from cloudpub.error import InvalidSchema from cloudpub.models.ms_azure import ( ConfigureStatus, DiskVersion, @@ -544,3 +545,29 @@ def logdiff(diff: DeepDiff) -> None: """Log the offer diff if it exists.""" if diff: log.warning("Found the following offer diff before publishing:\n%s", diff.pretty()) + + +def _contains_certification_error(item: Any) -> bool: + """Recursively inspect Azure error payloads for certification failures.""" + if not isinstance(item, dict): + raise InvalidSchema(f"Invalid schema for error object: {item}") + + code: str = item.get("code", "") + message: str = item.get("message", "") + if code == "invalidState" and "certification" in message.lower(): + return True + if not isinstance(item.get('details'), list): + raise InvalidSchema(f"Invalid schema for 'details' inside error object: {item}") + for detail in item.get("details") or []: + if _contains_certification_error(detail): + return True + return False + + +def is_certification_error(errors: List[Dict[str, Any]]) -> bool: + """Return True when Azure job errors indicate a certification failure. + + Certification failures are permanent for a given submission and should not + be retried. + """ + return any(_contains_certification_error(error) for error in errors) diff --git a/tests/ms_azure/conftest.py b/tests/ms_azure/conftest.py index 76652c8..b66baa4 100644 --- a/tests/ms_azure/conftest.py +++ b/tests/ms_azure/conftest.py @@ -98,7 +98,20 @@ def errors() -> List[Dict[str, Any]]: { "code": "conflict", "message": "Error message", - "details": [{"code": "invalidResource", "message": "Failure for resource"}], + "details": [ + {"code": "invalidResource", "message": "Failure for resource", "details": []} + ], + } + ] + + +@pytest.fixture +def cert_error_invalid_schema() -> List[Dict[str, Any]]: + return [ + { + "code": "internalServerError", + "message": "Certification failed.", + "details": {}, } ] @@ -629,3 +642,26 @@ def job_details_completed_failure_obj( job_details_completed_failure: Dict[str, Any], ) -> ConfigureStatus: return ConfigureStatus.from_json(job_details_completed_failure) + + +@pytest.fixture +def cert_error_failure() -> List[Dict[str, Any]]: + return [ + { + "resourceId": "submission/2d541119", + "code": "internalServerError", + "message": "Operation failed", + "details": [ + { + "code": "invalidState", + "message": "Certification", + "details": [ + { + "code": "invalidState", + "message": "Issues found during Certification.", + } + ], + } + ], + } + ] diff --git a/tests/ms_azure/test_service.py b/tests/ms_azure/test_service.py index 1af4af5..8dc1a1d 100644 --- a/tests/ms_azure/test_service.py +++ b/tests/ms_azure/test_service.py @@ -1,5 +1,6 @@ import json import logging +import re from copy import deepcopy from typing import Any, Dict, List from unittest import mock @@ -12,7 +13,13 @@ from requests.exceptions import HTTPError from cloudpub.common import BaseService -from cloudpub.error import ConflictError, InvalidStateError, NotFoundError, Timeout +from cloudpub.error import ( + CertificationError, + ConflictError, + InvalidStateError, + NotFoundError, + Timeout, +) from cloudpub.models.ms_azure import ( ConfigureStatus, CustomerLeads, @@ -309,9 +316,16 @@ def test_get_job_details_after_failed_completion( azure_service: AzureService, caplog: LogCaptureFixture, job_details_running_obj: ConfigureStatus, - job_details_completed_failure_obj: ConfigureStatus, errors: List[Dict[str, Any]], ) -> None: + job_details_completed_failure_obj = ConfigureStatus.from_json( + { + "jobId": "job_id_111", + "jobStatus": "completed", + "jobResult": "failed", + "errors": errors, + } + ) mock_job_details.side_effect = [ job_details_running_obj, job_details_running_obj, @@ -329,6 +343,34 @@ def test_get_job_details_after_failed_completion( assert f"Job {job_id} failed" in caplog.text assert f"Job {job_id} succeeded" not in caplog.text + @mock.patch("cloudpub.ms_azure.AzureService._query_job_details") + def test_get_job_details_after_failed_completion_on_certification_error( + self, + mock_job_details: mock.MagicMock, + azure_service: AzureService, + job_details_running_obj: ConfigureStatus, + cert_error_failure: List[Dict[str, Any]], + ) -> None: + cert_error_response = ConfigureStatus.from_json( + { + "jobId": "job_id_111", + "jobStatus": "completed", + "jobResult": "failed", + "errors": cert_error_failure, + } + ) + mock_job_details.side_effect = [ + job_details_running_obj, + job_details_running_obj, + cert_error_response, + ] + job_id = "job_id_111" + + with pytest.raises(CertificationError, match=f"Job {job_id} failed:"): + azure_service._wait_for_job_completion(job_id=job_id) + + assert mock_job_details.call_count == 3 + @mock.patch("cloudpub.ms_azure.AzureService._wait_for_job_completion") @mock.patch("cloudpub.ms_azure.AzureService._configure") def test_configure( @@ -977,7 +1019,10 @@ def test_publish_preview_fail_on_retry( "jobId": "1", "jobStatus": "completed", "jobResult": "failed", - "errors": ["failure1", "failure2"], + "errors": [ + {"code": "invalidState", "message": "message1", "details": []}, + {"code": "invalidState", "message": "message2", "details": []}, + ], } ) mock_is_sbpreview.return_value = False @@ -987,7 +1032,7 @@ def test_publish_preview_fail_on_retry( azure_service._publish_preview.retry.sleep = mock.Mock() # type: ignore expected_err = ( f"Failed to submit the product test-product \\({product_obj.id}\\) to preview. " - "Status: failed Errors: failure1\nfailure2" + f"Status: failed Errors: {re.escape(str(err_resp.errors))}" ) # Test @@ -1032,13 +1077,22 @@ def test_publish_live_fail_on_retry( product_obj: Product, azure_service: AzureService, ) -> None: + valid_non_certification_errors: list[Dict[str, Any]] = [ + { + "code": "conflict", + "message": "Error message", + "details": [ + {"code": "invalidResource", "message": "Failure for resource", "details": []} + ], + } + ] # Prepare mocks err_resp = ConfigureStatus.from_json( { "jobId": "1", "jobStatus": "completed", "jobResult": "failed", - "errors": ["failure1", "failure2"], + "errors": valid_non_certification_errors, } ) mock_subst.side_effect = [err_resp for _ in range(3)] @@ -1047,7 +1101,7 @@ def test_publish_live_fail_on_retry( azure_service._publish_live.retry.sleep = mock.Mock() # type: ignore expected_err = ( f"Failed to submit the product test-product \\({product_obj.id}\\) to live. " - "Status: failed Errors: failure1\nfailure2" + f"Status: failed Errors: *" ) # Test @@ -2569,3 +2623,86 @@ def test_publish_live_modular_push( mock_configure.assert_has_calls( [mock.call(resources=[expected_tc]), mock.call(resources=expected_modular_resources)] ) + + @mock.patch("cloudpub.ms_azure.AzureService.get_submission_state") + @mock.patch("cloudpub.ms_azure.AzureService.submit_to_status") + @mock.patch("cloudpub.ms_azure.AzureService._is_submission_in_preview") + @mock.patch("cloudpub.ms_azure.AzureService._raise_error") + def test_publish_preview_fail_no_retry_on_certification_error( + self, + mock_raise_error: mock.MagicMock, + mock_is_sbpreview: mock.MagicMock, + mock_subst: mock.MagicMock, + mock_getsubst: mock.MagicMock, + product_obj: Product, + azure_service: AzureService, + cert_error_failure: List[Dict[str, Any]], + ) -> None: + err_resp = ConfigureStatus.from_json( + { + "jobId": "1", + "jobStatus": "completed", + "jobResult": "failed", + "errors": cert_error_failure, + } + ) + mock_is_sbpreview.return_value = False + mock_subst.side_effect = [err_resp for _ in range(3)] + mock_getsubst.side_effect = [None for _ in range(3)] + azure_service._publish_preview.retry.sleep = mock.Mock() # type: ignore + expected_msg = ( + f"Failed to submit the product test-product ({product_obj.id}) to preview. " + f"Status: failed Errors: {err_resp.errors}" + ) + expected_err = ( + f"Failed to submit the product test-product \\({product_obj.id}\\) to preview. " + r"Status: failed Errors: .*" + ) + mock_raise_error.side_effect = CertificationError(expected_msg) + + with pytest.raises(CertificationError, match=expected_err): + azure_service._publish_preview(product_obj, "test-product") + + mock_raise_error.assert_called_once_with(CertificationError, expected_msg) + assert mock_subst.call_count == 1 + assert mock_getsubst.call_count == 0 + + @mock.patch("cloudpub.ms_azure.AzureService.get_submission_state") + @mock.patch("cloudpub.ms_azure.AzureService.submit_to_status") + @mock.patch("cloudpub.ms_azure.AzureService._raise_error") + def test_publish_live_fail_no_retry_on_certification_error( + self, + mock_raise_error: mock.MagicMock, + mock_subst: mock.MagicMock, + mock_getsubst: mock.MagicMock, + product_obj: Product, + azure_service: AzureService, + cert_error_failure: List[Dict[str, Any]], + ) -> None: + err_resp = ConfigureStatus.from_json( + { + "jobId": "1", + "jobStatus": "completed", + "jobResult": "failed", + "errors": cert_error_failure, + } + ) + mock_subst.side_effect = [err_resp for _ in range(3)] + mock_getsubst.side_effect = [None for _ in range(3)] + azure_service._publish_live.retry.sleep = mock.Mock() # type: ignore + expected_msg = ( + f"Failed to submit the product test-product ({product_obj.id}) to live. " + f"Status: failed Errors: {err_resp.errors}" + ) + expected_err = ( + f"Failed to submit the product test-product \\({product_obj.id}\\) to live. " + r"Status: failed Errors: .*" + ) + mock_raise_error.side_effect = CertificationError(expected_msg) + + with pytest.raises(CertificationError, match=expected_err): + azure_service._publish_live(product_obj, "test-product") + + mock_raise_error.assert_called_once_with(CertificationError, expected_msg) + assert mock_subst.call_count == 1 + assert mock_getsubst.call_count == 0 diff --git a/tests/ms_azure/test_utils.py b/tests/ms_azure/test_utils.py index 44df8a7..8f1498b 100644 --- a/tests/ms_azure/test_utils.py +++ b/tests/ms_azure/test_utils.py @@ -5,6 +5,7 @@ import pytest from _pytest.logging import LogCaptureFixture +from cloudpub.error import InvalidSchema from cloudpub.models.ms_azure import ( ConfigureStatus, DiskVersion, @@ -17,6 +18,7 @@ create_disk_version_from_scratch, get_image_type_mapping, is_azure_job_not_complete, + is_certification_error, is_sas_present, update_skus, ) @@ -508,3 +510,39 @@ def test_create_disk_version_from_scratch_arm64( res.vm_images = sorted(res.vm_images, key=attrgetter("image_type")) assert res == disk_version_arm64_obj + + +def test_is_certification_error(cert_error_failure: list[Dict[str, Any]]) -> None: + valid_non_certification_errors: list[Dict[str, Any]] = [ + { + "code": "conflict", + "message": "Error message", + "details": [ + {"code": "invalidResource", "message": "Failure for resource", "details": []} + ], + } + ] + assert is_certification_error(cert_error_failure) is True + assert is_certification_error(valid_non_certification_errors) is False + assert is_certification_error([]) is False + + +@pytest.mark.parametrize( + "errors", + [ + pytest.param([123], id="non-dict-error-item"), + pytest.param( + [ + { + "code": "internalServerError", + "message": "Certification failed.", + "details": {}, + } + ], + id="non-list-details", + ), + ], +) +def test_is_certification_error_invalid_schema(errors: list[Any]) -> None: + with pytest.raises(InvalidSchema, match="Invalid schema"): + is_certification_error(errors)