Skip to content

Commit d9d8b6b

Browse files
committed
fix(webapp): retry transient ECR verification failures before failing closed
1 parent 3c7200f commit d9d8b6b

1 file changed

Lines changed: 36 additions & 11 deletions

File tree

apps/webapp/app/v3/services/verifyDeploymentImage.server.ts

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
RepositoryNotFoundException,
55
} from "@aws-sdk/client-ecr";
66
import { tryCatch } from "@trigger.dev/core";
7+
import pRetry, { AbortError } from "p-retry";
78
import { logger } from "~/services/logger.server";
89
import {
910
type AssumeRoleConfig,
@@ -129,19 +130,43 @@ export async function ecrImageExists(
129130
imageDigest && SHA256_DIGEST.test(imageDigest.trim()) ? imageDigest.trim() : undefined;
130131
const imageId = validDigest ? { imageDigest: validDigest } : { imageTag: parsed.tag };
131132

133+
const assumeRole = registryConfig.ecrAssumeRoleArn
134+
? {
135+
roleArn: registryConfig.ecrAssumeRoleArn,
136+
externalId: registryConfig.ecrAssumeRoleExternalId,
137+
}
138+
: undefined;
139+
140+
// Retry transient ECR failures (throttling/network) before giving up, so a blip
141+
// doesn't fail an otherwise-fine deploy. A missing repo is definitive - don't retry.
132142
const [error, response] = await tryCatch(
133-
_send({
134-
region,
135-
assumeRole: registryConfig.ecrAssumeRoleArn
136-
? {
137-
roleArn: registryConfig.ecrAssumeRoleArn,
138-
externalId: registryConfig.ecrAssumeRoleExternalId,
143+
pRetry(
144+
() =>
145+
_send({
146+
region,
147+
assumeRole,
148+
registryId: accountId,
149+
repositoryName: parsed.repositoryName,
150+
imageIds: [imageId],
151+
}).catch((err) => {
152+
if (err instanceof RepositoryNotFoundException) {
153+
throw new AbortError(err);
139154
}
140-
: undefined,
141-
registryId: accountId,
142-
repositoryName: parsed.repositoryName,
143-
imageIds: [imageId],
144-
})
155+
throw err;
156+
}),
157+
{
158+
retries: 2,
159+
minTimeout: 200,
160+
maxTimeout: 1000,
161+
onFailedAttempt: (e) => {
162+
logger.warn("Retrying ECR image verification", {
163+
imageReference,
164+
attempt: e.attemptNumber,
165+
error: e.message,
166+
});
167+
},
168+
}
169+
)
145170
);
146171

147172
if (error) {

0 commit comments

Comments
 (0)