diff --git a/lib/errorhandler.ts b/lib/errorhandler.ts index 4c01703..3e3661d 100644 --- a/lib/errorhandler.ts +++ b/lib/errorhandler.ts @@ -211,37 +211,43 @@ export class ErrorHandler { return false } - // Per the RFC, only DNS and TCP-dial failures retry; anything after the - // socket is dialed (TLS handshake, cert verification) must fail fast. - return connectionRetryAllowList.has(nodeError.code) + return !connectionDenyList.has(nodeError.code) } } /** - * Node `err.code`s for the DNS and TCP-dial failures the RFC marks retriable. - * + * Taken from https://github.com/sindresorhus/is-retry-allowed * @internal */ -const connectionRetryAllowList = new Set([ - // DNS resolution failures - 'EAI_AGAIN', +const connectionDenyList = new Set([ 'ENOTFOUND', - // TCP-dial / connection failures - 'ECONNREFUSED', - 'ECONNRESET', - 'ECONNABORTED', - 'ETIMEDOUT', - 'EHOSTUNREACH', - 'EHOSTDOWN', 'ENETUNREACH', - 'ENETDOWN', - 'ENETRESET', - 'EADDRNOTAVAIL', - // EPIPE looks like a post-dial write failure, but it only reaches here as a - // request-side error (isRequestError), meaning the peer tore down the - // connection before accepting our request (e.g. a closed keep-alive socket, - // an LB drop, or a restarting node). Nothing was processed, so retrying is - // safe. A broken pipe after the server began responding surfaces as a - // response-side error and fails fast instead. - 'EPIPE', + 'UNABLE_TO_GET_ISSUER_CERT', + 'UNABLE_TO_GET_CRL', + 'UNABLE_TO_DECRYPT_CERT_SIGNATURE', + 'UNABLE_TO_DECRYPT_CRL_SIGNATURE', + 'UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY', + 'CERT_SIGNATURE_FAILURE', + 'CRL_SIGNATURE_FAILURE', + 'CERT_NOT_YET_VALID', + 'CERT_HAS_EXPIRED', + 'CRL_NOT_YET_VALID', + 'CRL_HAS_EXPIRED', + 'ERROR_IN_CERT_NOT_BEFORE_FIELD', + 'ERROR_IN_CERT_NOT_AFTER_FIELD', + 'ERROR_IN_CRL_LAST_UPDATE_FIELD', + 'ERROR_IN_CRL_NEXT_UPDATE_FIELD', + 'OUT_OF_MEM', + 'DEPTH_ZERO_SELF_SIGNED_CERT', + 'SELF_SIGNED_CERT_IN_CHAIN', + 'UNABLE_TO_GET_ISSUER_CERT_LOCALLY', + 'UNABLE_TO_VERIFY_LEAF_SIGNATURE', + 'CERT_CHAIN_TOO_LONG', + 'CERT_REVOKED', + 'INVALID_CA', + 'PATH_LENGTH_EXCEEDED', + 'INVALID_PURPOSE', + 'CERT_UNTRUSTED', + 'CERT_REJECTED', + 'HOSTNAME_MISMATCH', ]) diff --git a/test/errorhandler.test.ts b/test/errorhandler.test.ts deleted file mode 100644 index 9870a9e..0000000 --- a/test/errorhandler.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2016-2026. Couchbase, Inc. - * All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import { assert } from 'chai' -import { ErrorHandler } from '../lib/errorhandler.js' -import { ConnectionError } from '../lib/internalerrors.js' -import { RequestContext } from '../lib/requestcontext.js' - -// Build a ConnectionError wrapping a Node system error carrying `code`, -// mirroring what the executor produces from a request `error` event. -function connectionError(code: string | undefined, isRequestError = true): ConnectionError { - const cause = new Error(`synthetic ${code ?? 'no-code'} error`) as NodeJS.ErrnoException - if (code !== undefined) { - cause.code = code - } - return new ConnectionError(cause, isRequestError) -} - -function classify(err: ConnectionError): boolean { - // handleErrors routes through the private _isRetriableConnectionError. - return ErrorHandler.handleErrors(err, new RequestContext(7)).shouldRetry() -} - -// Per the RFC, only DNS failures and TCP-dial connection failures are eligible -// for retry (allowlist). Anything that happens after the socket is dialed -- the -// TLS handshake, cert/hostname verification, response read/write -- must fail -// fast. -describe('#ConnectionError retry classification', function () { - it('does not retry a TLS hostname/altname mismatch', function () { - assert.isFalse(classify(connectionError('ERR_TLS_CERT_ALTNAME_INVALID'))) - }) - - it('does not retry other ERR_TLS_* handshake errors', function () { - assert.isFalse(classify(connectionError('ERR_TLS_HANDSHAKE_TIMEOUT'))) - }) - - it('does not retry ERR_SSL_* errors', function () { - assert.isFalse(classify(connectionError('ERR_SSL_WRONG_VERSION_NUMBER'))) - }) - - it('does not retry an OpenSSL-style cert verification code', function () { - // e.g. an expired/self-signed cert -- a handshake failure, not a dial failure. - assert.isFalse(classify(connectionError('CERT_HAS_EXPIRED'))) - assert.isFalse(classify(connectionError('DEPTH_ZERO_SELF_SIGNED_CERT'))) - }) - - it('retries TCP-dial connection failures', function () { - assert.isTrue(classify(connectionError('ECONNREFUSED'))) - assert.isTrue(classify(connectionError('ECONNRESET'))) - assert.isTrue(classify(connectionError('ETIMEDOUT'))) - // Network-unreachable is a connection failure -- the old denylist wrongly - // blocked it; the allowlist restores RFC-correct retry behavior. - assert.isTrue(classify(connectionError('ENETUNREACH'))) - }) - - it('retries DNS-resolution failures', function () { - assert.isTrue(classify(connectionError('EAI_AGAIN'))) - assert.isTrue(classify(connectionError('ENOTFOUND'))) - }) - - it('does not retry an unrecognized read/write error code', function () { - // Not a DNS or TCP-dial failure -> not on the allowlist -> fail fast. - assert.isFalse(classify(connectionError('EPROTO'))) - assert.isFalse(classify(connectionError('ERR_STREAM_PREMATURE_CLOSE'))) - }) - - it('does not retry a response (non-request) error', function () { - // Errors raised while reading the response are read/write-after-dial. - assert.isFalse(classify(connectionError('ECONNREFUSED', false))) - }) - - it('does not retry a connection error with no code', function () { - assert.isFalse(classify(connectionError(undefined))) - }) -}) diff --git a/test/hostselection.test.ts b/test/hostselection.test.ts index c073611..d121103 100644 --- a/test/hostselection.test.ts +++ b/test/hostselection.test.ts @@ -111,9 +111,10 @@ describe('#Host selection', function () { } }) - it('rejects with a retriable ConnectionError when no records resolve', async function () { - // An empty result is a DNS failure; per the RFC it should rejoin the retry - // path, so it is wrapped as a request-side ConnectionError carrying ENOTFOUND. + it('wraps a no-records DNS result as a request-side ConnectionError', async function () { + // An empty result is a DNS failure, so it is wrapped as a request-side + // ConnectionError carrying ENOTFOUND, rejoining the request error path. + // Whether that code retries is decided separately by ErrorHandler. stubLookup([]) const cluster = createInstance( `https://${HOSTNAME}:${PORT}`, @@ -136,7 +137,7 @@ describe('#Host selection', function () { } }) - it('wraps a DNS-resolution failure as a retriable ConnectionError', async function () { + it('wraps a DNS-resolution failure as a request-side ConnectionError', async function () { // e.g. a transient resolver failure during a rebalance. stubLookup(() => { const e = new Error('getaddrinfo EAI_AGAIN') as NodeJS.ErrnoException @@ -157,7 +158,7 @@ describe('#Host selection', function () { } assert.instanceOf(caught, ConnectionError) const err = caught as ConnectionError - // isRequestError + a DNS code is what ErrorHandler classifies as retriable. + // isRequestError + the original code is what ErrorHandler uses to classify the failure. assert.isTrue(err.isRequestError) assert.strictEqual((err.cause as NodeJS.ErrnoException).code, 'EAI_AGAIN') } finally {