From 3cafecb7662cef5c72920db34b1a623af3b2ec41 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Fri, 12 Dec 2025 14:16:47 +0000 Subject: [PATCH 1/9] remove workflow completely --- .github/workflows/contracts-ecdsa.yml | 352 ------------------ .github/workflows/contracts-random-beacon.yml | 346 ----------------- 2 files changed, 698 deletions(-) delete mode 100644 .github/workflows/contracts-ecdsa.yml delete mode 100644 .github/workflows/contracts-random-beacon.yml diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml deleted file mode 100644 index 7ec449b67c..0000000000 --- a/.github/workflows/contracts-ecdsa.yml +++ /dev/null @@ -1,352 +0,0 @@ -name: Solidity ECDSA - -on: - # We intend to use `workflow dispatch` in two different situations/paths: - # 1. If a workflow will be manually dispatched from branch named - # `dapp-development`, workflow will deploy the contracts on the selected - # testnet and publish them to NPM registry with `dapp-dev-` - # suffix and `dapp-development-` tag. Such packages are meant - # to be used locally by the team developing Threshold Token dApp and may - # contain contracts that have different values from the ones used on - # mainnet. - # 2. If a workflow will be manually dispatched from a branch which name is not - # `dapp-development`, the workflow will deploy the contracts on the - # selected testnet and publish them to NPM registry with `` - # suffix and tag. Such packages will be used later to deploy public - # Threshold Token dApp on a testnet, with contracts resembling those used - # on mainnet. - workflow_dispatch: - inputs: - environment: - description: "Environment (network) for workflow execution, e.g. `sepolia`" - required: true - upstream_builds: - description: "Upstream builds" - required: false - upstream_ref: - description: "Git reference to checkout (e.g. branch name)" - required: false - default: "main" - -jobs: - contracts-detect-changes: - runs-on: ubuntu-latest - outputs: - path-filter: ${{ steps.set-output.outputs.path-filter }} - steps: - - name: Set path-filter output - id: set-output - run: echo "path-filter=true" >> $GITHUB_OUTPUT - - contracts-lint: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/ecdsa - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/ecdsa/yarn.lock - - - name: Install dependencies - run: yarn install - - - name: Build - run: yarn build - - - name: Lint - run: yarn lint - - contracts-slither: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/ecdsa - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/ecdsa/yarn.lock - - - uses: actions/setup-python@v4 - with: - python-version: 3.10.8 - - - name: Install Solidity - env: - SOLC_VERSION: 0.8.9 # according to solidity.version in hardhat.config.ts - run: | - pip3 install solc-select - solc-select install $SOLC_VERSION - solc-select use $SOLC_VERSION - - - name: Install Slither - env: - SLITHER_VERSION: 0.8.3 - run: pip3 install slither-analyzer==$SLITHER_VERSION - - - name: Install dependencies - run: yarn install - - # As a workaround for a slither issue https://github.com/crytic/slither/issues/1140 - # we disable compilation of dependencies when running slither. - - name: Run Slither - run: SKIP_DEPENDENCY_COMPILER=true slither . - - contracts-build-and-test: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/ecdsa - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/ecdsa/yarn.lock - - - name: Install dependencies - run: yarn install - - - name: Build solidity contracts - run: yarn build - - - name: Run tests - if: github.ref != 'refs/heads/dapp-development' - run: yarn test - - contracts-deployment-dry-run: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/ecdsa - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/ecdsa/yarn.lock - - - name: Install dependencies - run: yarn install --frozen-lockfile - - - name: Deploy contracts - run: yarn deploy:test - - - name: Build Docker Image - uses: ./.github/actions/docker-build-push - with: - imageName: keep-ecdsa-hardhat - push: false - context: ./solidity/ecdsa - - contracts-deployment-testnet: - needs: [contracts-build-and-test] - if: | - github.event_name == 'workflow_dispatch' - && github.ref != 'refs/heads/dapp-development' - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/ecdsa - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/ecdsa/yarn.lock - registry-url: "https://registry.npmjs.org" - - - name: Install dependencies - run: yarn install --frozen-lockfile - - - name: Get upstream packages versions - uses: keep-network/ci/actions/upstream-builds-query@v2 - id: upstream-builds-query - with: - upstream-builds: ${{ github.event.inputs.upstream_builds }} - query: | - threshold-contracts-version = github.com/threshold-network/solidity-contracts#version - random-beacon-version = github.com/keep-network/keep-core/random-beacon#version - - - name: Resolve latest contracts - run: | - yarn upgrade \ - @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ - @keep-network/random-beacon@${{ steps.upstream-builds-query.outputs.random-beacon-version }} \ - @keep-network/sortition-pools - - # TODO: Remove this step. We replace sortition pools for deployment on testnet - # with forked contracts that were tweaked to make operators joining the pool - # easier. This should never be used outside of the test environment. On - # test environment it should be used temporarily only. - - name: Use Sortition Pool forked contracts - run: | - yarn upgrade @keep-network/sortition-pools@github:keep-network/sortition-pools#test-fork - - - name: Configure tenderly - env: - TENDERLY_TOKEN: ${{ secrets.TENDERLY_TOKEN }} - run: ./config_tenderly.sh - - - name: Deploy contracts - env: - CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} - ACCOUNTS_PRIVATE_KEYS: ${{ secrets.TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} - ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} - run: yarn deploy --network ${{ github.event.inputs.environment }} - - - name: Bump up package version - id: npm-version-bump - uses: keep-network/npm-version-bump@v2 - with: - work-dir: solidity/ecdsa - environment: ${{ github.event.inputs.environment }} - branch: ${{ github.ref }} - commit: ${{ github.sha }} - - - name: Publish to npm - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npm publish --access=public --tag ${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} - - - name: Build and Publish Docker image - uses: ./.github/actions/docker-build-push - with: - environment: ${{ github.event.inputs.environment }} - imageName: keep-ecdsa-hardhat - context: ./solidity/ecdsa - push: true - gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} - - - name: Notify CI about completion of the workflow - uses: keep-network/ci/actions/notify-workflow-completed@v2 - env: - GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} - with: - module: "github.com/keep-network/keep-core/ecdsa" - url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - environment: ${{ github.event.inputs.environment }} - upstream_builds: ${{ github.event.inputs.upstream_builds }} - upstream_ref: ${{ github.event.inputs.upstream_ref }} - version: ${{ steps.npm-version-bump.outputs.version }} - - # This job is responsible for publishing packackes with slightly modified - # contracts. The modifications are there to help with the process of testing - # some features on the T Token Dashboard. The job starts only if workflow - # gets triggered by the `workflow_dispatch` event on the branch called - # `dapp-development`. - contracts-dapp-development-deployment-testnet: - needs: [contracts-build-and-test] - if: | - github.event_name == 'workflow_dispatch' - && github.ref == 'refs/heads/dapp-development' - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/ecdsa - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/ecdsa/yarn.lock - registry-url: "https://registry.npmjs.org" - - - name: Install dependencies - run: yarn install --frozen-lockfile - - - name: Get upstream packages versions - uses: keep-network/ci/actions/upstream-builds-query@v2 - id: upstream-builds-query - with: - upstream-builds: ${{ github.event.inputs.upstream_builds }} - query: | - threshold-contracts-version = github.com/threshold-network/solidity-contracts#version - random-beacon-version = github.com/keep-network/keep-core/random-beacon#version - - - name: Resolve latest contracts - run: | - yarn upgrade \ - @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ - @keep-network/random-beacon@${{ steps.upstream-builds-query.outputs.random-beacon-version }} \ - @keep-network/sortition-pools - - - name: Deploy contracts - env: - CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} - ACCOUNTS_PRIVATE_KEYS: ${{ secrets.DAPP_DEV_TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} - ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} - run: yarn deploy --network ${{ github.event.inputs.environment }} - - - name: Bump up package version - id: npm-version-bump - uses: keep-network/npm-version-bump@v2 - with: - work-dir: solidity/ecdsa - environment: dapp-dev-${{ github.event.inputs.environment }} - branch: ${{ github.ref }} - commit: ${{ github.sha }} - - - name: Publish to npm - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npm publish --access=public --tag dapp-development-${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} - - - name: Build and Publish Docker image - uses: ./.github/actions/docker-build-push - with: - environment: ${{ github.event.inputs.environment }} - imageName: keep-ecdsa-hardhat-dapp-dev - context: ./solidity/ecdsa - push: true - gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} - - - name: Notify CI about completion of the workflow - uses: keep-network/ci/actions/notify-workflow-completed@v2 - env: - GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} - with: - module: "github.com/keep-network/keep-core/ecdsa" - url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - environment: ${{ github.event.inputs.environment }} - upstream_builds: ${{ github.event.inputs.upstream_builds }} - upstream_ref: dapp-development - version: ${{ steps.npm-version-bump.outputs.version }} diff --git a/.github/workflows/contracts-random-beacon.yml b/.github/workflows/contracts-random-beacon.yml deleted file mode 100644 index 71460b2456..0000000000 --- a/.github/workflows/contracts-random-beacon.yml +++ /dev/null @@ -1,346 +0,0 @@ -name: Solidity Random Beacon - -on: - # We intend to use `workflow dispatch` in two different situations/paths: - # 1. If a workflow will be manually dispatched from branch named - # `dapp-development`, workflow will deploy the contracts on the selected - # testnet and publish them to NPM registry with `dapp-dev-` - # suffix and `dapp-development-` tag. Such packages are meant - # to be used locally by the team developing Threshold Token dApp and may - # contain contracts that have different values from the ones used on - # mainnet. - # 2. If a workflow will be manually dispatched from a branch which name is not - # `dapp-development`, the workflow will deploy the contracts on the - # selected testnet and publish them to NPM registry with `` - # suffix and tag. Such packages will be used later to deploy public - # Threshold Token dApp on a testnet, with contracts resembling those used - # on mainnet. - workflow_dispatch: - inputs: - environment: - description: "Environment (network) for workflow execution, e.g. `sepolia`" - required: true - upstream_builds: - description: "Upstream builds" - required: false - upstream_ref: - description: "Git reference to checkout (e.g. branch name)" - required: false - default: "main" - -jobs: - contracts-detect-changes: - runs-on: ubuntu-latest - outputs: - path-filter: ${{ steps.set-output.outputs.path-filter }} - steps: - - name: Set path-filter output - id: set-output - run: echo "path-filter=true" >> $GITHUB_OUTPUT - - contracts-lint: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/random-beacon - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/random-beacon/yarn.lock - - - name: Install dependencies - run: yarn install --network-concurrency 1 - - - name: Build - run: yarn build - - - name: Lint - run: yarn lint - - contracts-slither: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/random-beacon - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/random-beacon/yarn.lock - - - uses: actions/setup-python@v4 - with: - python-version: 3.10.8 - - - name: Install Solidity - env: - SOLC_VERSION: 0.8.9 # according to solidity.version in hardhat.config.js - run: | - pip3 install solc-select - solc-select install $SOLC_VERSION - solc-select use $SOLC_VERSION - - - name: Install Slither - env: - SLITHER_VERSION: 0.8.3 - run: pip3 install slither-analyzer==$SLITHER_VERSION - - - name: Install dependencies - run: yarn install --network-concurrency 1 - - - name: Run Slither - run: slither . - - contracts-build-and-test: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/random-beacon - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/random-beacon/yarn.lock - - - name: Install dependencies - run: yarn install --network-concurrency 1 - - - name: Build solidity contracts - run: yarn build - - - name: Run tests - if: github.ref != 'refs/heads/dapp-development' - run: yarn test - - contracts-deployment-dry-run: - needs: contracts-detect-changes - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/random-beacon - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/random-beacon/yarn.lock - - - name: Install dependencies - run: yarn install --network-concurrency 1 --frozen-lockfile - - - name: Deploy contracts - run: yarn deploy:test - - - name: Build Docker Image - uses: ./.github/actions/docker-build-push - with: - imageName: keep-random-beacon-hardhat - context: ./solidity/random-beacon - push: false - - contracts-deployment-testnet: - needs: [contracts-build-and-test] - if: | - github.event_name == 'workflow_dispatch' - && github.ref != 'refs/heads/dapp-development' - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/random-beacon - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/random-beacon/yarn.lock - registry-url: "https://registry.npmjs.org" - - - name: Install dependencies - run: yarn install --network-concurrency 1 --frozen-lockfile - - - name: Get upstream packages versions - uses: keep-network/ci/actions/upstream-builds-query@v2 - id: upstream-builds-query - with: - upstream-builds: ${{ github.event.inputs.upstream_builds }} - query: | - threshold-contracts-version = github.com/threshold-network/solidity-contracts#version - - - name: Resolve latest contracts - run: | - yarn upgrade \ - @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ - @keep-network/sortition-pools - - # TODO: Remove this step. We replace sortition pools for deployment on testnet - # with forked contracts that were tweaked to make operators joining the pool - # easier. This should never be used outside of the test environment. On - # test environment it should be used temporarily only. - - name: Use Sortition Pool forked contracts - run: | - yarn upgrade @keep-network/sortition-pools@github:keep-network/sortition-pools#test-fork - - - name: Configure tenderly - env: - TENDERLY_TOKEN: ${{ secrets.TENDERLY_TOKEN }} - run: ./config_tenderly.sh - - - name: Deploy contracts - env: - CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} - ACCOUNTS_PRIVATE_KEYS: ${{ secrets.TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} - ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} - run: yarn deploy --network ${{ github.event.inputs.environment }} - - - name: Bump up package version - id: npm-version-bump - uses: keep-network/npm-version-bump@v2 - with: - work-dir: solidity/random-beacon - environment: ${{ github.event.inputs.environment }} - branch: ${{ github.ref }} - commit: ${{ github.sha }} - - - name: Publish to npm - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npm publish --access=public --tag ${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} - - - name: Build and Publish Docker image - uses: ./.github/actions/docker-build-push - with: - environment: ${{ github.event.inputs.environment }} - imageName: keep-random-beacon-hardhat - context: ./solidity/random-beacon - push: true - gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} - - - name: Notify CI about completion of the workflow - uses: keep-network/ci/actions/notify-workflow-completed@v2 - env: - GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} - with: - module: "github.com/keep-network/keep-core/random-beacon" - url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - environment: ${{ github.event.inputs.environment }} - upstream_builds: ${{ github.event.inputs.upstream_builds }} - upstream_ref: ${{ github.event.inputs.upstream_ref }} - version: ${{ steps.npm-version-bump.outputs.version }} - - # This job is responsible for publishing packackes with slightly modified - # contracts. The modifications are there to help with the process of testing - # some features on the T Token Dashboard. The job starts only if workflow - # gets triggered by the `workflow_dispatch` event on the branch called - # `dapp-development`. - contracts-dapp-development-deployment-testnet: - needs: [contracts-build-and-test] - if: | - github.event_name == 'workflow_dispatch' - && github.ref == 'refs/heads/dapp-development' - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./solidity/random-beacon - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - with: - # Using fixed version, because 18.16 was sometimes causing issues with - # artifacts generation during `hardhat compile` - see - # https://github.com/NomicFoundation/hardhat/issues/3877 - node-version: "18.15.0" - cache: "yarn" - cache-dependency-path: solidity/random-beacon/yarn.lock - registry-url: "https://registry.npmjs.org" - - - name: Install dependencies - run: yarn install --network-concurrency 1 --frozen-lockfile - - - name: Get upstream packages versions - uses: keep-network/ci/actions/upstream-builds-query@v2 - id: upstream-builds-query - with: - upstream-builds: ${{ github.event.inputs.upstream_builds }} - query: | - threshold-contracts-version = github.com/threshold-network/solidity-contracts#version - - - name: Resolve latest contracts - run: | - yarn upgrade \ - @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ - @keep-network/sortition-pools - - - name: Deploy contracts - env: - CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} - ACCOUNTS_PRIVATE_KEYS: ${{ secrets.DAPP_DEV_TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} - ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} - run: yarn deploy --network ${{ github.event.inputs.environment }} - - - name: Bump up package version - id: npm-version-bump - uses: keep-network/npm-version-bump@v2 - with: - work-dir: solidity/random-beacon - environment: dapp-dev-${{ github.event.inputs.environment }} - branch: ${{ github.ref }} - commit: ${{ github.sha }} - - - name: Publish to npm - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npm publish --access=public --tag dapp-development-${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} - - - name: Build and Publish Docker image - uses: ./.github/actions/docker-build-push - with: - environment: ${{ github.event.inputs.environment }} - imageName: keep-random-beacon-hardhat-dapp-dev - context: ./solidity/random-beacon - push: true - gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} - - - name: Notify CI about completion of the workflow - uses: keep-network/ci/actions/notify-workflow-completed@v2 - env: - GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} - with: - module: "github.com/keep-network/keep-core/random-beacon" - url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - environment: ${{ github.event.inputs.environment }} - upstream_builds: ${{ github.event.inputs.upstream_builds }} - upstream_ref: dapp-development - version: ${{ steps.npm-version-bump.outputs.version }} From 92693bbfff11883ccff2733d9f410cc7dfe52796 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Fri, 12 Dec 2025 14:30:27 +0000 Subject: [PATCH 2/9] flow back --- .github/workflows/contracts-ecdsa.yml | 352 ++++++++++++++++++ .github/workflows/contracts-random-beacon.yml | 346 +++++++++++++++++ 2 files changed, 698 insertions(+) create mode 100644 .github/workflows/contracts-ecdsa.yml create mode 100644 .github/workflows/contracts-random-beacon.yml diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml new file mode 100644 index 0000000000..7ec449b67c --- /dev/null +++ b/.github/workflows/contracts-ecdsa.yml @@ -0,0 +1,352 @@ +name: Solidity ECDSA + +on: + # We intend to use `workflow dispatch` in two different situations/paths: + # 1. If a workflow will be manually dispatched from branch named + # `dapp-development`, workflow will deploy the contracts on the selected + # testnet and publish them to NPM registry with `dapp-dev-` + # suffix and `dapp-development-` tag. Such packages are meant + # to be used locally by the team developing Threshold Token dApp and may + # contain contracts that have different values from the ones used on + # mainnet. + # 2. If a workflow will be manually dispatched from a branch which name is not + # `dapp-development`, the workflow will deploy the contracts on the + # selected testnet and publish them to NPM registry with `` + # suffix and tag. Such packages will be used later to deploy public + # Threshold Token dApp on a testnet, with contracts resembling those used + # on mainnet. + workflow_dispatch: + inputs: + environment: + description: "Environment (network) for workflow execution, e.g. `sepolia`" + required: true + upstream_builds: + description: "Upstream builds" + required: false + upstream_ref: + description: "Git reference to checkout (e.g. branch name)" + required: false + default: "main" + +jobs: + contracts-detect-changes: + runs-on: ubuntu-latest + outputs: + path-filter: ${{ steps.set-output.outputs.path-filter }} + steps: + - name: Set path-filter output + id: set-output + run: echo "path-filter=true" >> $GITHUB_OUTPUT + + contracts-lint: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/ecdsa + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/ecdsa/yarn.lock + + - name: Install dependencies + run: yarn install + + - name: Build + run: yarn build + + - name: Lint + run: yarn lint + + contracts-slither: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/ecdsa + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/ecdsa/yarn.lock + + - uses: actions/setup-python@v4 + with: + python-version: 3.10.8 + + - name: Install Solidity + env: + SOLC_VERSION: 0.8.9 # according to solidity.version in hardhat.config.ts + run: | + pip3 install solc-select + solc-select install $SOLC_VERSION + solc-select use $SOLC_VERSION + + - name: Install Slither + env: + SLITHER_VERSION: 0.8.3 + run: pip3 install slither-analyzer==$SLITHER_VERSION + + - name: Install dependencies + run: yarn install + + # As a workaround for a slither issue https://github.com/crytic/slither/issues/1140 + # we disable compilation of dependencies when running slither. + - name: Run Slither + run: SKIP_DEPENDENCY_COMPILER=true slither . + + contracts-build-and-test: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/ecdsa + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/ecdsa/yarn.lock + + - name: Install dependencies + run: yarn install + + - name: Build solidity contracts + run: yarn build + + - name: Run tests + if: github.ref != 'refs/heads/dapp-development' + run: yarn test + + contracts-deployment-dry-run: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/ecdsa + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/ecdsa/yarn.lock + + - name: Install dependencies + run: yarn install --frozen-lockfile + + - name: Deploy contracts + run: yarn deploy:test + + - name: Build Docker Image + uses: ./.github/actions/docker-build-push + with: + imageName: keep-ecdsa-hardhat + push: false + context: ./solidity/ecdsa + + contracts-deployment-testnet: + needs: [contracts-build-and-test] + if: | + github.event_name == 'workflow_dispatch' + && github.ref != 'refs/heads/dapp-development' + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/ecdsa + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/ecdsa/yarn.lock + registry-url: "https://registry.npmjs.org" + + - name: Install dependencies + run: yarn install --frozen-lockfile + + - name: Get upstream packages versions + uses: keep-network/ci/actions/upstream-builds-query@v2 + id: upstream-builds-query + with: + upstream-builds: ${{ github.event.inputs.upstream_builds }} + query: | + threshold-contracts-version = github.com/threshold-network/solidity-contracts#version + random-beacon-version = github.com/keep-network/keep-core/random-beacon#version + + - name: Resolve latest contracts + run: | + yarn upgrade \ + @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ + @keep-network/random-beacon@${{ steps.upstream-builds-query.outputs.random-beacon-version }} \ + @keep-network/sortition-pools + + # TODO: Remove this step. We replace sortition pools for deployment on testnet + # with forked contracts that were tweaked to make operators joining the pool + # easier. This should never be used outside of the test environment. On + # test environment it should be used temporarily only. + - name: Use Sortition Pool forked contracts + run: | + yarn upgrade @keep-network/sortition-pools@github:keep-network/sortition-pools#test-fork + + - name: Configure tenderly + env: + TENDERLY_TOKEN: ${{ secrets.TENDERLY_TOKEN }} + run: ./config_tenderly.sh + + - name: Deploy contracts + env: + CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} + ACCOUNTS_PRIVATE_KEYS: ${{ secrets.TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} + ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} + run: yarn deploy --network ${{ github.event.inputs.environment }} + + - name: Bump up package version + id: npm-version-bump + uses: keep-network/npm-version-bump@v2 + with: + work-dir: solidity/ecdsa + environment: ${{ github.event.inputs.environment }} + branch: ${{ github.ref }} + commit: ${{ github.sha }} + + - name: Publish to npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm publish --access=public --tag ${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} + + - name: Build and Publish Docker image + uses: ./.github/actions/docker-build-push + with: + environment: ${{ github.event.inputs.environment }} + imageName: keep-ecdsa-hardhat + context: ./solidity/ecdsa + push: true + gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} + + - name: Notify CI about completion of the workflow + uses: keep-network/ci/actions/notify-workflow-completed@v2 + env: + GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} + with: + module: "github.com/keep-network/keep-core/ecdsa" + url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + environment: ${{ github.event.inputs.environment }} + upstream_builds: ${{ github.event.inputs.upstream_builds }} + upstream_ref: ${{ github.event.inputs.upstream_ref }} + version: ${{ steps.npm-version-bump.outputs.version }} + + # This job is responsible for publishing packackes with slightly modified + # contracts. The modifications are there to help with the process of testing + # some features on the T Token Dashboard. The job starts only if workflow + # gets triggered by the `workflow_dispatch` event on the branch called + # `dapp-development`. + contracts-dapp-development-deployment-testnet: + needs: [contracts-build-and-test] + if: | + github.event_name == 'workflow_dispatch' + && github.ref == 'refs/heads/dapp-development' + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/ecdsa + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/ecdsa/yarn.lock + registry-url: "https://registry.npmjs.org" + + - name: Install dependencies + run: yarn install --frozen-lockfile + + - name: Get upstream packages versions + uses: keep-network/ci/actions/upstream-builds-query@v2 + id: upstream-builds-query + with: + upstream-builds: ${{ github.event.inputs.upstream_builds }} + query: | + threshold-contracts-version = github.com/threshold-network/solidity-contracts#version + random-beacon-version = github.com/keep-network/keep-core/random-beacon#version + + - name: Resolve latest contracts + run: | + yarn upgrade \ + @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ + @keep-network/random-beacon@${{ steps.upstream-builds-query.outputs.random-beacon-version }} \ + @keep-network/sortition-pools + + - name: Deploy contracts + env: + CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} + ACCOUNTS_PRIVATE_KEYS: ${{ secrets.DAPP_DEV_TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} + ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} + run: yarn deploy --network ${{ github.event.inputs.environment }} + + - name: Bump up package version + id: npm-version-bump + uses: keep-network/npm-version-bump@v2 + with: + work-dir: solidity/ecdsa + environment: dapp-dev-${{ github.event.inputs.environment }} + branch: ${{ github.ref }} + commit: ${{ github.sha }} + + - name: Publish to npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm publish --access=public --tag dapp-development-${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} + + - name: Build and Publish Docker image + uses: ./.github/actions/docker-build-push + with: + environment: ${{ github.event.inputs.environment }} + imageName: keep-ecdsa-hardhat-dapp-dev + context: ./solidity/ecdsa + push: true + gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} + + - name: Notify CI about completion of the workflow + uses: keep-network/ci/actions/notify-workflow-completed@v2 + env: + GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} + with: + module: "github.com/keep-network/keep-core/ecdsa" + url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + environment: ${{ github.event.inputs.environment }} + upstream_builds: ${{ github.event.inputs.upstream_builds }} + upstream_ref: dapp-development + version: ${{ steps.npm-version-bump.outputs.version }} diff --git a/.github/workflows/contracts-random-beacon.yml b/.github/workflows/contracts-random-beacon.yml new file mode 100644 index 0000000000..71460b2456 --- /dev/null +++ b/.github/workflows/contracts-random-beacon.yml @@ -0,0 +1,346 @@ +name: Solidity Random Beacon + +on: + # We intend to use `workflow dispatch` in two different situations/paths: + # 1. If a workflow will be manually dispatched from branch named + # `dapp-development`, workflow will deploy the contracts on the selected + # testnet and publish them to NPM registry with `dapp-dev-` + # suffix and `dapp-development-` tag. Such packages are meant + # to be used locally by the team developing Threshold Token dApp and may + # contain contracts that have different values from the ones used on + # mainnet. + # 2. If a workflow will be manually dispatched from a branch which name is not + # `dapp-development`, the workflow will deploy the contracts on the + # selected testnet and publish them to NPM registry with `` + # suffix and tag. Such packages will be used later to deploy public + # Threshold Token dApp on a testnet, with contracts resembling those used + # on mainnet. + workflow_dispatch: + inputs: + environment: + description: "Environment (network) for workflow execution, e.g. `sepolia`" + required: true + upstream_builds: + description: "Upstream builds" + required: false + upstream_ref: + description: "Git reference to checkout (e.g. branch name)" + required: false + default: "main" + +jobs: + contracts-detect-changes: + runs-on: ubuntu-latest + outputs: + path-filter: ${{ steps.set-output.outputs.path-filter }} + steps: + - name: Set path-filter output + id: set-output + run: echo "path-filter=true" >> $GITHUB_OUTPUT + + contracts-lint: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/random-beacon + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/random-beacon/yarn.lock + + - name: Install dependencies + run: yarn install --network-concurrency 1 + + - name: Build + run: yarn build + + - name: Lint + run: yarn lint + + contracts-slither: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/random-beacon + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/random-beacon/yarn.lock + + - uses: actions/setup-python@v4 + with: + python-version: 3.10.8 + + - name: Install Solidity + env: + SOLC_VERSION: 0.8.9 # according to solidity.version in hardhat.config.js + run: | + pip3 install solc-select + solc-select install $SOLC_VERSION + solc-select use $SOLC_VERSION + + - name: Install Slither + env: + SLITHER_VERSION: 0.8.3 + run: pip3 install slither-analyzer==$SLITHER_VERSION + + - name: Install dependencies + run: yarn install --network-concurrency 1 + + - name: Run Slither + run: slither . + + contracts-build-and-test: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/random-beacon + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/random-beacon/yarn.lock + + - name: Install dependencies + run: yarn install --network-concurrency 1 + + - name: Build solidity contracts + run: yarn build + + - name: Run tests + if: github.ref != 'refs/heads/dapp-development' + run: yarn test + + contracts-deployment-dry-run: + needs: contracts-detect-changes + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/random-beacon + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/random-beacon/yarn.lock + + - name: Install dependencies + run: yarn install --network-concurrency 1 --frozen-lockfile + + - name: Deploy contracts + run: yarn deploy:test + + - name: Build Docker Image + uses: ./.github/actions/docker-build-push + with: + imageName: keep-random-beacon-hardhat + context: ./solidity/random-beacon + push: false + + contracts-deployment-testnet: + needs: [contracts-build-and-test] + if: | + github.event_name == 'workflow_dispatch' + && github.ref != 'refs/heads/dapp-development' + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/random-beacon + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/random-beacon/yarn.lock + registry-url: "https://registry.npmjs.org" + + - name: Install dependencies + run: yarn install --network-concurrency 1 --frozen-lockfile + + - name: Get upstream packages versions + uses: keep-network/ci/actions/upstream-builds-query@v2 + id: upstream-builds-query + with: + upstream-builds: ${{ github.event.inputs.upstream_builds }} + query: | + threshold-contracts-version = github.com/threshold-network/solidity-contracts#version + + - name: Resolve latest contracts + run: | + yarn upgrade \ + @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ + @keep-network/sortition-pools + + # TODO: Remove this step. We replace sortition pools for deployment on testnet + # with forked contracts that were tweaked to make operators joining the pool + # easier. This should never be used outside of the test environment. On + # test environment it should be used temporarily only. + - name: Use Sortition Pool forked contracts + run: | + yarn upgrade @keep-network/sortition-pools@github:keep-network/sortition-pools#test-fork + + - name: Configure tenderly + env: + TENDERLY_TOKEN: ${{ secrets.TENDERLY_TOKEN }} + run: ./config_tenderly.sh + + - name: Deploy contracts + env: + CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} + ACCOUNTS_PRIVATE_KEYS: ${{ secrets.TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} + ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} + run: yarn deploy --network ${{ github.event.inputs.environment }} + + - name: Bump up package version + id: npm-version-bump + uses: keep-network/npm-version-bump@v2 + with: + work-dir: solidity/random-beacon + environment: ${{ github.event.inputs.environment }} + branch: ${{ github.ref }} + commit: ${{ github.sha }} + + - name: Publish to npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm publish --access=public --tag ${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} + + - name: Build and Publish Docker image + uses: ./.github/actions/docker-build-push + with: + environment: ${{ github.event.inputs.environment }} + imageName: keep-random-beacon-hardhat + context: ./solidity/random-beacon + push: true + gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} + + - name: Notify CI about completion of the workflow + uses: keep-network/ci/actions/notify-workflow-completed@v2 + env: + GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} + with: + module: "github.com/keep-network/keep-core/random-beacon" + url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + environment: ${{ github.event.inputs.environment }} + upstream_builds: ${{ github.event.inputs.upstream_builds }} + upstream_ref: ${{ github.event.inputs.upstream_ref }} + version: ${{ steps.npm-version-bump.outputs.version }} + + # This job is responsible for publishing packackes with slightly modified + # contracts. The modifications are there to help with the process of testing + # some features on the T Token Dashboard. The job starts only if workflow + # gets triggered by the `workflow_dispatch` event on the branch called + # `dapp-development`. + contracts-dapp-development-deployment-testnet: + needs: [contracts-build-and-test] + if: | + github.event_name == 'workflow_dispatch' + && github.ref == 'refs/heads/dapp-development' + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./solidity/random-beacon + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + # Using fixed version, because 18.16 was sometimes causing issues with + # artifacts generation during `hardhat compile` - see + # https://github.com/NomicFoundation/hardhat/issues/3877 + node-version: "18.15.0" + cache: "yarn" + cache-dependency-path: solidity/random-beacon/yarn.lock + registry-url: "https://registry.npmjs.org" + + - name: Install dependencies + run: yarn install --network-concurrency 1 --frozen-lockfile + + - name: Get upstream packages versions + uses: keep-network/ci/actions/upstream-builds-query@v2 + id: upstream-builds-query + with: + upstream-builds: ${{ github.event.inputs.upstream_builds }} + query: | + threshold-contracts-version = github.com/threshold-network/solidity-contracts#version + + - name: Resolve latest contracts + run: | + yarn upgrade \ + @threshold-network/solidity-contracts@${{ steps.upstream-builds-query.outputs.threshold-contracts-version }} \ + @keep-network/sortition-pools + + - name: Deploy contracts + env: + CHAIN_API_URL: ${{ secrets.SEPOLIA_ETH_HOSTNAME_HTTP }} + ACCOUNTS_PRIVATE_KEYS: ${{ secrets.DAPP_DEV_TESTNET_ETH_CONTRACT_OWNER_PRIVATE_KEY }} + ETHERSCAN_API_KEY: ${{ secrets.ETHERSCAN_API_KEY }} + run: yarn deploy --network ${{ github.event.inputs.environment }} + + - name: Bump up package version + id: npm-version-bump + uses: keep-network/npm-version-bump@v2 + with: + work-dir: solidity/random-beacon + environment: dapp-dev-${{ github.event.inputs.environment }} + branch: ${{ github.ref }} + commit: ${{ github.sha }} + + - name: Publish to npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm publish --access=public --tag dapp-development-${{ github.event.inputs.environment }} --network=${{ github.event.inputs.environment }} + + - name: Build and Publish Docker image + uses: ./.github/actions/docker-build-push + with: + environment: ${{ github.event.inputs.environment }} + imageName: keep-random-beacon-hardhat-dapp-dev + context: ./solidity/random-beacon + push: true + gcrJsonKey: ${{ secrets.KEEP_TEST_GCR_JSON_KEY }} + + - name: Notify CI about completion of the workflow + uses: keep-network/ci/actions/notify-workflow-completed@v2 + env: + GITHUB_TOKEN: ${{ secrets.CI_GITHUB_TOKEN }} + with: + module: "github.com/keep-network/keep-core/random-beacon" + url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + environment: ${{ github.event.inputs.environment }} + upstream_builds: ${{ github.event.inputs.upstream_builds }} + upstream_ref: dapp-development + version: ${{ steps.npm-version-bump.outputs.version }} From 7fb3e0a16e8381058512ec3ece50f641eafd1d97 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Fri, 12 Dec 2025 14:39:46 +0000 Subject: [PATCH 3/9] run on pr --- .github/workflows/contracts-ecdsa.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml index 7ec449b67c..2564db4c46 100644 --- a/.github/workflows/contracts-ecdsa.yml +++ b/.github/workflows/contracts-ecdsa.yml @@ -1,6 +1,10 @@ name: Solidity ECDSA on: + pull_request: + paths: + - "solidity/ecdsa/**" + - ".github/workflows/contracts-ecdsa.yml" # We intend to use `workflow dispatch` in two different situations/paths: # 1. If a workflow will be manually dispatched from branch named # `dapp-development`, workflow will deploy the contracts on the selected From e50cc206a24b6d9437d63161fb7483c6be855df9 Mon Sep 17 00:00:00 2001 From: lion <139767474+lionakhnazarov@users.noreply.github.com> Date: Fri, 12 Dec 2025 14:47:23 +0000 Subject: [PATCH 4/9] Update contracts-ecdsa.yml --- .github/workflows/contracts-ecdsa.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml index 2564db4c46..a09f78b74c 100644 --- a/.github/workflows/contracts-ecdsa.yml +++ b/.github/workflows/contracts-ecdsa.yml @@ -2,9 +2,6 @@ name: Solidity ECDSA on: pull_request: - paths: - - "solidity/ecdsa/**" - - ".github/workflows/contracts-ecdsa.yml" # We intend to use `workflow dispatch` in two different situations/paths: # 1. If a workflow will be manually dispatched from branch named # `dapp-development`, workflow will deploy the contracts on the selected From bc75b651d7a0a39c82709dc43d5a09270556c2ec Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Fri, 12 Dec 2025 15:21:38 +0000 Subject: [PATCH 5/9] revert workflow dispatch --- .github/workflows/contracts-ecdsa.yml | 3 +-- .github/workflows/contracts-random-beacon.yml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml index a09f78b74c..2f3efdfe8b 100644 --- a/.github/workflows/contracts-ecdsa.yml +++ b/.github/workflows/contracts-ecdsa.yml @@ -1,7 +1,6 @@ name: Solidity ECDSA on: - pull_request: # We intend to use `workflow dispatch` in two different situations/paths: # 1. If a workflow will be manually dispatched from branch named # `dapp-development`, workflow will deploy the contracts on the selected @@ -350,4 +349,4 @@ jobs: environment: ${{ github.event.inputs.environment }} upstream_builds: ${{ github.event.inputs.upstream_builds }} upstream_ref: dapp-development - version: ${{ steps.npm-version-bump.outputs.version }} + version: ${{ steps.npm-version-bump.outputs.version }} \ No newline at end of file diff --git a/.github/workflows/contracts-random-beacon.yml b/.github/workflows/contracts-random-beacon.yml index 71460b2456..6402472d77 100644 --- a/.github/workflows/contracts-random-beacon.yml +++ b/.github/workflows/contracts-random-beacon.yml @@ -343,4 +343,4 @@ jobs: environment: ${{ github.event.inputs.environment }} upstream_builds: ${{ github.event.inputs.upstream_builds }} upstream_ref: dapp-development - version: ${{ steps.npm-version-bump.outputs.version }} + version: ${{ steps.npm-version-bump.outputs.version }} \ No newline at end of file From 7af61bf5e801bf6a5efa4b045627f1b47a3d67b8 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Fri, 12 Dec 2025 16:11:57 +0000 Subject: [PATCH 6/9] touch workflow --- .github/workflows/contracts-ecdsa.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml index 2f3efdfe8b..f8eb71ffd7 100644 --- a/.github/workflows/contracts-ecdsa.yml +++ b/.github/workflows/contracts-ecdsa.yml @@ -1,6 +1,7 @@ name: Solidity ECDSA on: + pull_request: # We intend to use `workflow dispatch` in two different situations/paths: # 1. If a workflow will be manually dispatched from branch named # `dapp-development`, workflow will deploy the contracts on the selected From f81d510b2f4caa0bb067b34c9e7a4c498d621104 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Wed, 1 Apr 2026 14:35:33 +0100 Subject: [PATCH 7/9] merge --- .github/workflows/contracts-ecdsa.yml | 4 +- .github/workflows/contracts-random-beacon.yml | 3 + .github/workflows/release.yml | 59 ++ .gitignore | 3 + cmd/start.go | 42 +- config/peers_test.go | 3 +- docs-v1/run-random-beacon.adoc | 8 +- docs/performance-metrics.adoc | 279 +++++++ docs/resources/docker-start-mainnet-sample | 2 +- go.mod | 1 + go.sum | 4 +- .../keep-maintainer/kustomization.yaml | 2 +- pkg/clientinfo/clientinfo.go | 9 +- pkg/clientinfo/metrics.go | 12 +- pkg/clientinfo/performance.go | 707 ++++++++++++++++++ pkg/clientinfo/performance_test.go | 374 +++++++++ pkg/clientinfo/rpc_health.go | 293 ++++++++ pkg/maintainer/spv/deposit_sweep.go | 26 + pkg/maintainer/spv/deposit_sweep_test.go | 1 + pkg/maintainer/spv/redemptions.go | 35 + pkg/maintainer/spv/redemptions_test.go | 1 + pkg/maintainer/spv/spv.go | 29 + pkg/net/libp2p/authenticated_connection.go | 35 + .../libp2p/authenticated_connection_test.go | 3 + pkg/net/libp2p/channel.go | 73 +- pkg/net/libp2p/channel_manager.go | 28 + pkg/net/libp2p/libp2p.go | 121 ++- pkg/net/libp2p/transport.go | 42 +- pkg/tbtc/coordination.go | 48 +- pkg/tbtc/coordination_test.go | 34 +- pkg/tbtc/coordination_window_metrics.go | 434 +++++++++++ pkg/tbtc/deposit_sweep.go | 61 ++ pkg/tbtc/dkg.go | 47 +- pkg/tbtc/node.go | 175 +++++ pkg/tbtc/redemption.go | 52 ++ pkg/tbtc/signing.go | 48 ++ pkg/tbtc/tbtc.go | 25 + pkg/tbtc/wallet.go | 93 ++- pkg/tbtcpg/redemptions.go | 17 + pkg/tbtcpg/tbtcpg.go | 12 + scripts/build.sh | 6 +- 41 files changed, 3207 insertions(+), 44 deletions(-) create mode 100644 docs/performance-metrics.adoc create mode 100644 pkg/clientinfo/performance.go create mode 100644 pkg/clientinfo/performance_test.go create mode 100644 pkg/clientinfo/rpc_health.go create mode 100644 pkg/tbtc/coordination_window_metrics.go diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml index f8eb71ffd7..04804cf70f 100644 --- a/.github/workflows/contracts-ecdsa.yml +++ b/.github/workflows/contracts-ecdsa.yml @@ -2,7 +2,9 @@ name: Solidity ECDSA on: pull_request: - # We intend to use `workflow dispatch` in two different situations/paths: + branches: + - main + # We intend to use `workflow dispatch` in two different situations/paths # 1. If a workflow will be manually dispatched from branch named # `dapp-development`, workflow will deploy the contracts on the selected # testnet and publish them to NPM registry with `dapp-dev-` diff --git a/.github/workflows/contracts-random-beacon.yml b/.github/workflows/contracts-random-beacon.yml index 6402472d77..885376c9dd 100644 --- a/.github/workflows/contracts-random-beacon.yml +++ b/.github/workflows/contracts-random-beacon.yml @@ -1,6 +1,9 @@ name: Solidity Random Beacon on: + pull_request: + branches: + - main # We intend to use `workflow dispatch` in two different situations/paths: # 1. If a workflow will be manually dispatched from branch named # `dapp-development`, workflow will deploy the contracts on the selected diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7813775382..7df458d649 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -114,3 +114,62 @@ jobs: run: | rm -rf /tmp/.buildx-cache mv /tmp/.buildx-cache-new /tmp/.buildx-cache + + publish-docker-images: + needs: build-and-release + runs-on: ubuntu-latest + environment: keep-production # Requires release-admin team approval + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Resolve versions + run: | + echo "version=$(git describe --tags --match 'v[0-9]*' HEAD)" >> $GITHUB_ENV + echo "revision=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + echo "dockerhub_org=${DOCKERHUB_ORG:-thresholdnetwork}" >> $GITHUB_ENV + env: + DOCKERHUB_ORG: ${{ secrets.DOCKERHUB_ORG }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Cache Docker layers + uses: actions/cache@v4 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-docker-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-docker- + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and Push Docker Images + uses: docker/build-push-action@v5 + with: + target: runtime-docker + tags: | + ${{ env.dockerhub_org }}/keep-client:latest + ${{ env.dockerhub_org }}/keep-client:${{ env.version }} + ${{ env.dockerhub_org }}/keep-client:mainnet + labels: | + version=${{ env.version }} + revision=${{ env.revision }} + build-args: | + ENVIRONMENT=mainnet + VERSION=${{ env.version }} + REVISION=${{ env.revision }} + push: true + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-docker-new + context: . + + - name: Move Docker cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-docker-new /tmp/.buildx-cache \ No newline at end of file diff --git a/.gitignore b/.gitignore index 84cd19c28e..30d1ee50a4 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,6 @@ tmp/ # Output directory out/ +data/ +logs/ +storage/ \ No newline at end of file diff --git a/cmd/start.go b/cmd/start.go index 691d666e2f..cfaece274c 100644 --- a/cmd/start.go +++ b/cmd/start.go @@ -3,6 +3,8 @@ package cmd import ( "context" "fmt" + "time" + "github.com/keep-network/keep-core/pkg/tbtcpg" "github.com/keep-network/keep-common/pkg/persistence" @@ -87,6 +89,23 @@ func start(cmd *cobra.Command) error { blockCounter, ) + // Wire performance metrics into network provider if available + var perfMetrics *clientinfo.PerformanceMetrics + if clientInfoRegistry != nil { + perfMetrics = clientinfo.NewPerformanceMetrics(ctx, clientInfoRegistry) + // Type assert to libp2p provider to set metrics recorder + // The provider struct is not exported, so we use interface assertion + if setter, ok := netProvider.(interface { + SetMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + }) + }); ok { + setter.SetMetricsRecorder(perfMetrics) + } + } + // Initialize beacon and tbtc only for non-bootstrap nodes. // Skip initialization for bootstrap nodes as they are only used for network // discovery. @@ -106,12 +125,22 @@ func start(cmd *cobra.Command) error { scheduler := generator.StartScheduler() - clientInfoRegistry.ObserveBtcConnectivity( - btcChain, - clientConfig.ClientInfo.BitcoinMetricsTick, - ) - - clientInfoRegistry.RegisterBtcChainInfoSource(btcChain) + if clientInfoRegistry != nil { + clientInfoRegistry.ObserveBtcConnectivity( + btcChain, + clientConfig.ClientInfo.BitcoinMetricsTick, + ) + + clientInfoRegistry.RegisterBtcChainInfoSource(btcChain) + + rpcHealthChecker := clientinfo.NewRPCHealthChecker( + clientInfoRegistry, + blockCounter, + btcChain, + clientConfig.ClientInfo.RPCHealthCheckInterval, + ) + rpcHealthChecker.Start(ctx) + } err = beacon.Initialize( ctx, @@ -140,6 +169,7 @@ func start(cmd *cobra.Command) error { proposalGenerator, clientConfig.Tbtc, clientInfoRegistry, + perfMetrics, // Pass the existing performance metrics instance to avoid duplicate registrations ) if err != nil { return fmt.Errorf("error initializing TBTC: [%v]", err) diff --git a/config/peers_test.go b/config/peers_test.go index 692496ac11..7fe66d32d3 100644 --- a/config/peers_test.go +++ b/config/peers_test.go @@ -24,9 +24,8 @@ func TestResolvePeers(t *testing.T) { "sepolia network": { network: network.Testnet, expectedPeers: []string{ - "/dns4/bootstrap-0.test.keep.network/tcp/3919/ipfs/16Uiu2HAmCcfVpHwfBKNFbQuhvGuFXHVLQ65gB4sJm7HyrcZuLttH", - "/dns4/bootstrap-1.test.keep.network/tcp/3919/ipfs/16Uiu2HAm3eJtyFKAttzJ85NLMromHuRg4yyum3CREMf6CHBBV6KY", "/dns4/bst-a01.test.keep.boar.network/tcp/6001/ipfs/16Uiu2HAmSLDSahiKyTbCNNu8wJmZAsiKF7wuYJ8mogY8ZuAG1jhu", + "/dns4/keep-validator-0.eks-ap-northeast-2-secure.staging.staked.cloud/tcp/3919/ipfs/16Uiu2HAm77eSvRq5ioD4J8VFPkq3bJHBEHkssCuiFkgAoABwjo2S", }, }, "developer network": { diff --git a/docs-v1/run-random-beacon.adoc b/docs-v1/run-random-beacon.adoc index 27a6680d39..34cb7fb715 100644 --- a/docs-v1/run-random-beacon.adoc +++ b/docs-v1/run-random-beacon.adoc @@ -267,13 +267,13 @@ See the link:development#building[building] section in our developer docs. === Get Image -https://hub.docker.com/r/keepnetwork/keep-client/ +https://hub.docker.com/r/thresholdnetwork/keep-client/ *Latest:* -`docker pull keepnetwork/keep-client` +`docker pull thresholdnetwork/keep-client` *Tag:* -`docker pull keepnetwork/keep-client:` +`docker pull thresholdnetwork/keep-client:` === Run Image This is a sample run command for illustration purposes only. @@ -293,7 +293,7 @@ docker run -d \ --log-opt max-size=100m \ --log-opt max-file=3 \ -p 3919:3919 \ -keepnetwork/keep-client: --config /mnt/keep-client/config/keep-client-config.toml start +thresholdnetwork/keep-client: --config /mnt/keep-client/config/keep-client-config.toml start ---- == Deployment Considerations diff --git a/docs/performance-metrics.adoc b/docs/performance-metrics.adoc new file mode 100644 index 0000000000..af2a7132bc --- /dev/null +++ b/docs/performance-metrics.adoc @@ -0,0 +1,279 @@ += Performance Metrics + +The Keep Core client exposes performance metrics that can be used to monitor +the health and performance of node operations. These metrics are available +through the `/metrics` endpoint when the client info endpoint is configured. + +== Metrics Endpoint + +Metrics are exposed via HTTP at the `/metrics` endpoint on the port configured +in the `ClientInfo` section of the configuration file (default: `9601`). + +Example: +---- +curl http://localhost:9601/metrics +---- + +== Metric Types + +The client uses three types of metrics: + +* **Counters**: Cumulative counts that only increase (e.g., total operations) +* **Gauges**: Current values that can go up or down (e.g., queue sizes, active operations) +* **Durations**: Time measurements for operations (exposed as average duration and count) + +== Available Metrics + +=== Distributed Key Generation (DKG) Metrics + +==== `performance_dkg_joined_total` +*Type*: Counter +*Description*: Total number of times the node has joined a DKG process +*Labels*: None + +==== `performance_dkg_failed_total` +*Type*: Counter +*Description*: Total number of failed DKG attempts +*Labels*: None + +==== `performance_dkg_duration_seconds` +*Type*: Gauge (average) +*Description*: Average duration of DKG operations in seconds +*Labels*: None + +==== `performance_dkg_duration_seconds_count` +*Type*: Gauge +*Description*: Total number of DKG operations completed +*Labels*: None + +==== `performance_dkg_validation_total` +*Type*: Counter +*Description*: Total number of DKG result validations performed +*Labels*: None + +==== `performance_dkg_challenges_submitted_total` +*Type*: Counter +*Description*: Total number of DKG challenges submitted on-chain +*Labels*: None + +==== `performance_dkg_approvals_submitted_total` +*Type*: Counter +*Description*: Total number of DKG approvals submitted on-chain +*Labels*: None + +=== Signing Operation Metrics + +==== `performance_signing_operations_total` +*Type*: Counter +*Description*: Total number of signing operations attempted +*Labels*: None + +==== `performance_signing_success_total` +*Type*: Counter +*Description*: Total number of successful signing operations +*Labels*: None + +==== `performance_signing_failed_total` +*Type*: Counter +*Description*: Total number of failed signing operations +*Labels*: None + +==== `performance_signing_duration_seconds` +*Type*: Gauge (average) +*Description*: Average duration of signing operations in seconds +*Labels*: None + +==== `performance_signing_duration_seconds_count` +*Type*: Gauge +*Description*: Total number of signing operations completed +*Labels*: None + +==== `performance_signing_timeouts_total` +*Type*: Counter +*Description*: Total number of signing operations that timed out +*Labels*: None + +=== Wallet Action Metrics + +==== `performance_wallet_actions_total` +*Type*: Counter +*Description*: Total number of wallet actions dispatched +*Labels*: None + +==== `performance_wallet_action_success_total` +*Type*: Counter +*Description*: Total number of successfully completed wallet actions +*Labels*: None + +==== `performance_wallet_action_failed_total` +*Type*: Counter +*Description*: Total number of failed wallet actions +*Labels*: None + +==== `performance_wallet_action_duration_seconds` +*Type*: Gauge (average) +*Description*: Average duration of wallet actions in seconds +*Labels*: None + +==== `performance_wallet_action_duration_seconds_count` +*Type*: Gauge +*Description*: Total number of wallet actions completed +*Labels*: None + +==== `performance_wallet_heartbeat_failures_total` +*Type*: Counter +*Description*: Total number of heartbeat failures across all wallets +*Labels*: None + +==== Per-Action Type Metrics + +The following metrics are tracked separately for each wallet action type: +`heartbeat`, `deposit_sweep`, `redemption`, `moving_funds`, `moved_funds_sweep`. + +For each action type, the following metrics are available: + +===== `performance_wallet_action_{action_type}_total` +*Type*: Counter +*Description*: Total number of {action_type} wallet actions dispatched +*Example*: `performance_wallet_action_heartbeat_total`, `performance_wallet_action_deposit_sweep_total` +*Labels*: None + +===== `performance_wallet_action_{action_type}_success_total` +*Type*: Counter +*Description*: Total number of successfully completed {action_type} wallet actions +*Example*: `performance_wallet_action_heartbeat_success_total`, `performance_wallet_action_redemption_success_total` +*Labels*: None + +===== `performance_wallet_action_{action_type}_failed_total` +*Type*: Counter +*Description*: Total number of failed {action_type} wallet actions +*Example*: `performance_wallet_action_heartbeat_failed_total`, `performance_wallet_action_moving_funds_failed_total` +*Labels*: None + +===== `performance_wallet_action_{action_type}_duration_seconds` +*Type*: Gauge (average) +*Description*: Average duration of {action_type} wallet actions in seconds +*Example*: `performance_wallet_action_heartbeat_duration_seconds`, `performance_wallet_action_deposit_sweep_duration_seconds` +*Labels*: None + +=== Wallet Dispatcher Metrics + +==== `performance_wallet_dispatcher_active_actions` +*Type*: Gauge +*Description*: Current number of wallets with active actions being executed +*Labels*: None +*Note*: This metric helps identify when wallets are busy and cannot accept new actions + +==== `performance_wallet_dispatcher_rejected_total` +*Type*: Counter +*Description*: Total number of wallet actions rejected because the wallet was busy +*Labels*: None +*Note*: High values indicate that wallets are frequently busy and actions may need retry logic + +=== Coordination Metrics + +==== `performance_coordination_windows_detected_total` +*Type*: Counter +*Description*: Total number of coordination windows detected +*Labels*: None + +==== `performance_coordination_procedures_executed_total` +*Type*: Counter +*Description*: Total number of coordination procedures executed +*Labels*: None + +==== `performance_coordination_failed_total` +*Type*: Counter +*Description*: Total number of failed coordination procedures +*Labels*: None + +==== `performance_coordination_duration_seconds` +*Type*: Gauge (average) +*Description*: Average duration of coordination procedures in seconds +*Labels*: None + +==== `performance_coordination_duration_seconds_count` +*Type*: Gauge +*Description*: Total count of coordination duration samples +*Labels*: None + +=== Network Metrics + +==== `performance_incoming_message_queue_size` +*Type*: Gauge +*Description*: Current size of the incoming message queue +*Labels*: `channel` (channel name) +*Note*: Maximum queue size is 4096. Values approaching this limit indicate message processing bottlenecks. + +==== `performance_message_handler_queue_size` +*Type*: Gauge +*Description*: Current size of message handler queues +*Labels*: `channel` (channel name), `handler` (handler ID) +*Note*: Maximum queue size per handler is 512. + +==== `performance_peer_connections_total` +*Type*: Counter +*Description*: Total number of peer connections established +*Labels*: None + +==== `performance_peer_disconnections_total` +*Type*: Counter +*Description*: Total number of peer disconnections +*Labels*: None + +==== `performance_message_broadcast_total` +*Type*: Counter +*Description*: Total number of messages broadcast to the network +*Labels*: None + +==== `performance_message_received_total` +*Type*: Counter +*Description*: Total number of messages received from the network +*Labels*: None + +==== `performance_ping_test_total` +*Type*: Counter +*Description*: Total number of ping tests performed +*Labels*: None + +==== `performance_ping_test_success_total` +*Type*: Counter +*Description*: Total number of successful ping tests +*Labels*: None + +==== `performance_ping_test_failed_total` +*Type*: Counter +*Description*: Total number of failed ping tests +*Labels*: None + +=== Relay Entry Metrics (Beacon Node) + +==== `performance_relay_entry_generation_total` +*Type*: Counter +*Description*: Total number of relay entry generation attempts +*Labels*: None + +==== `performance_relay_entry_success_total` +*Type*: Counter +*Description*: Total number of successful relay entries generated +*Labels*: None + +==== `performance_relay_entry_failed_total` +*Type*: Counter +*Description*: Total number of failed relay entry generations +*Labels*: None + +==== `performance_relay_entry_duration_seconds` +*Type*: Gauge (average) +*Description*: Average duration of relay entry generation in seconds +*Labels*: None + +==== `performance_relay_entry_duration_seconds_count` +*Type*: Gauge +*Description*: Total count of relay entry duration samples +*Labels*: None + +==== `performance_relay_entry_timeout_reported_total` +*Type*: Counter +*Description*: Total number of relay entry timeouts reported on-chain +*Labels*: None \ No newline at end of file diff --git a/docs/resources/docker-start-mainnet-sample b/docs/resources/docker-start-mainnet-sample index 6a12fcb9cc..3a428281eb 100644 --- a/docs/resources/docker-start-mainnet-sample +++ b/docs/resources/docker-start-mainnet-sample @@ -15,7 +15,7 @@ docker run --detach \ --log-opt max-file=3 \ -p 3919:3919 \ -p 9601:9601 \ - keepnetwork/keep-client:latest \ + thresholdnetwork/keep-client:latest \ start \ --ethereum.url $ETHEREUM_WS_URL \ --ethereum.keyFile /mnt/keep/config/$OPERATOR_KEY_FILE_NAME \ diff --git a/go.mod b/go.mod index 8e99078976..de5b555654 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ replace ( github.com/btcsuite/btcd => github.com/btcsuite/btcd v0.22.3 github.com/btcsuite/btcd/v2 => github.com/btcsuite/btcd v0.23.4 github.com/checksum0/go-electrum => github.com/keep-network/go-electrum v0.0.0-20240206170935-6038cb594daa + github.com/keep-network/keep-common => github.com/threshold-network/keep-common v1.7.1-tlabs.0 // Temporary replacement until v1.28.2 is released containing `protodelim` package. // See https://github.com/protocolbuffers/protobuf-go/commit/fb0abd915897428ccfdd6b03b48ad8219751ee54 google.golang.org/protobuf/dev => google.golang.org/protobuf v1.28.2-0.20220831092852-f930b1dc76e8 diff --git a/go.sum b/go.sum index 74807931a2..3f16b9d0e4 100644 --- a/go.sum +++ b/go.sum @@ -400,8 +400,6 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/keep-network/go-electrum v0.0.0-20240206170935-6038cb594daa h1:AKTJr+STc4rP9NcN2ppP9Zft3GbYechFW8q/S8UNQrQ= github.com/keep-network/go-electrum v0.0.0-20240206170935-6038cb594daa/go.mod h1:eiMFzdvS+x8Voi0bmiZtVfJ3zMNRUnPNDnhCQR0tudo= -github.com/keep-network/keep-common v1.7.1-0.20240424094333-bd36cd25bb74 h1:cG2BiQJj6+v86duIAuDd6sPJZqLVWaOPxzt3nWQQaAo= -github.com/keep-network/keep-common v1.7.1-0.20240424094333-bd36cd25bb74/go.mod h1:OmaZrnZODf6RJ95yUn2kBjy8Z4u2npPJQkSiyimluto= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -719,6 +717,8 @@ github.com/supranational/blst v0.3.11/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= +github.com/threshold-network/keep-common v1.7.1-tlabs.0 h1:E3Qy3yoeA3+9Ybi08Bb1Xm1D2fFxoberQwUjw+UEK8k= +github.com/threshold-network/keep-common v1.7.1-tlabs.0/go.mod h1:OmaZrnZODf6RJ95yUn2kBjy8Z4u2npPJQkSiyimluto= github.com/threshold-network/tss-lib v0.0.0-20230901144531-2e712689cfbe h1:dOKhoYxZjXwFIyGnxgU+Sa1obZPMHRhu6e44oOLkzU4= github.com/threshold-network/tss-lib v0.0.0-20230901144531-2e712689cfbe/go.mod h1:o3zAAo7A88ZJnCE1qpjy1hTqPn+GPQlxRsj8soz14UU= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= diff --git a/infrastructure/kube/keep-prd/keep-maintainer/kustomization.yaml b/infrastructure/kube/keep-prd/keep-maintainer/kustomization.yaml index 33424f8422..a74c65a959 100644 --- a/infrastructure/kube/keep-prd/keep-maintainer/kustomization.yaml +++ b/infrastructure/kube/keep-prd/keep-maintainer/kustomization.yaml @@ -10,7 +10,7 @@ commonLabels: images: - name: keep-maintainer - newName: keepnetwork/keep-client + newName: thresholdnetwork/keep-client newTag: v2.1.0 configMapGenerator: diff --git a/pkg/clientinfo/clientinfo.go b/pkg/clientinfo/clientinfo.go index 2f3c6c6f41..7848aa0ec7 100644 --- a/pkg/clientinfo/clientinfo.go +++ b/pkg/clientinfo/clientinfo.go @@ -13,10 +13,11 @@ var logger = log.Logger("keep-clientinfo") // Config stores configuration for the client info. type Config struct { - Port int - NetworkMetricsTick time.Duration - EthereumMetricsTick time.Duration - BitcoinMetricsTick time.Duration + Port int + NetworkMetricsTick time.Duration + EthereumMetricsTick time.Duration + BitcoinMetricsTick time.Duration + RPCHealthCheckInterval time.Duration } // Registry wraps keep-common clientinfo registry and exposes additional diff --git a/pkg/clientinfo/metrics.go b/pkg/clientinfo/metrics.go index 85afdf6b67..c80755cab8 100644 --- a/pkg/clientinfo/metrics.go +++ b/pkg/clientinfo/metrics.go @@ -2,6 +2,7 @@ package clientinfo import ( "fmt" + "strings" "time" "github.com/keep-network/keep-common/pkg/clientinfo" @@ -159,7 +160,16 @@ func (r *Registry) observe( ) { observer, err := r.NewMetricGaugeObserver(name, clientinfo.MetricObserverInput(input)) if err != nil { - logger.Warnf("could not create gauge observer [%v]", name) + // Check if the error is due to metric already existing (expected in some cases) + errStr := err.Error() + if strings.Contains(errStr, "already exists") { + // Metric already registered, this is expected if registerAllMetrics is called multiple times + // or if the same metric is registered in multiple places. Log at debug level. + logger.Debugf("metric [%v] already registered, skipping duplicate registration: %v", name, err) + return + } + // For other errors, log as warning + logger.Warnf("could not create gauge observer [%v]: %v", name, err) return } diff --git a/pkg/clientinfo/performance.go b/pkg/clientinfo/performance.go new file mode 100644 index 0000000000..219216c22d --- /dev/null +++ b/pkg/clientinfo/performance.go @@ -0,0 +1,707 @@ +package clientinfo + +import ( + "context" + "fmt" + "math" + "runtime" + "sync" + "time" + + // gopsutil provides cross-platform system and process utilities. + // It supports linux/amd64 and darwin/amd64 (the target platforms for this codebase), + // as well as Windows, FreeBSD, OpenBSD, and Solaris. + "github.com/shirou/gopsutil/cpu" + "github.com/shirou/gopsutil/mem" +) + +// PerformanceMetricsRecorder provides a simple interface for recording +// performance metrics. It can be nil if metrics are not enabled. +type PerformanceMetricsRecorder interface { + // IncrementCounter increments a counter metric + IncrementCounter(name string, value float64) + // RecordDuration records a duration in seconds + RecordDuration(name string, duration time.Duration) + // SetGauge sets a gauge metric value + SetGauge(name string, value float64) + // GetCounterValue returns current counter value + GetCounterValue(name string) float64 + // GetGaugeValue returns current gauge value + GetGaugeValue(name string) float64 +} + +// PerformanceMetrics provides a way to record performance-related metrics +// including operation counts, durations, and queue sizes. +// It implements PerformanceMetricsRecorder interface. +type PerformanceMetrics struct { + registry *Registry + cancel context.CancelFunc + + // Counters track cumulative counts of events + countersMutex sync.RWMutex + counters map[string]*counter + + // Histograms track distributions of values (like durations) + histogramsMutex sync.RWMutex + histograms map[string]*histogram + + // Gauges track current values (like queue sizes) + gaugesMutex sync.RWMutex + gauges map[string]*gauge +} + +// Ensure PerformanceMetrics implements PerformanceMetricsRecorder +var _ PerformanceMetricsRecorder = (*PerformanceMetrics)(nil) + +type counter struct { + value float64 + mutex sync.RWMutex +} + +type histogram struct { + buckets map[float64]float64 // bucket upper bound -> count + mutex sync.RWMutex +} + +type gauge struct { + value float64 + mutex sync.RWMutex +} + +// Histogram bucket keys for internal tracking +const ( + histogramCountKey = -1.0 + histogramSumKey = -2.0 +) + +// NewPerformanceMetrics creates a new performance metrics instance. +func NewPerformanceMetrics(ctx context.Context, registry *Registry) *PerformanceMetrics { + ctx, cancel := context.WithCancel(ctx) + pm := &PerformanceMetrics{ + registry: registry, + cancel: cancel, + counters: make(map[string]*counter), + histograms: make(map[string]*histogram), + gauges: make(map[string]*gauge), + } + + // Register all metrics upfront with 0 values so they appear in /metrics endpoint + pm.registerAllMetrics() + + // Start observing system metrics + go pm.observeSystemMetrics(ctx) + + return pm +} + +// Stop stops the performance metrics collection goroutines. +func (pm *PerformanceMetrics) Stop() { + pm.cancel() +} + +// registerAllMetrics registers all performance metrics with 0 values +// so they appear in the /metrics endpoint even before operations occur. +func (pm *PerformanceMetrics) registerAllMetrics() { + // Register all counter metrics with 0 initial value + counters := []string{ + MetricDKGJoinedTotal, + MetricDKGFailedTotal, + MetricDKGValidationTotal, + MetricDKGChallengesSubmittedTotal, + MetricDKGApprovalsSubmittedTotal, + MetricSigningOperationsTotal, + MetricSigningSuccessTotal, + MetricSigningFailedTotal, + MetricSigningTimeoutsTotal, + MetricRedemptionExecutionsTotal, + MetricRedemptionExecutionsSuccessTotal, + MetricRedemptionExecutionsFailedTotal, + MetricRedemptionProofSubmissionsTotal, + MetricRedemptionProofSubmissionsSuccessTotal, + MetricRedemptionProofSubmissionsFailedTotal, + MetricWalletActionsTotal, + MetricWalletActionSuccessTotal, + MetricWalletActionFailedTotal, + MetricWalletHeartbeatFailuresTotal, + MetricCoordinationWindowsDetectedTotal, + MetricCoordinationProceduresExecutedTotal, + MetricCoordinationFailedTotal, + MetricCoordinationLeaderTimeoutTotal, + MetricPeerConnectionsTotal, + MetricPeerDisconnectionsTotal, + MetricMessageBroadcastTotal, + MetricMessageReceivedTotal, + MetricPingTestsTotal, + MetricPingTestSuccessTotal, + MetricPingTestFailedTotal, + MetricNetworkJoinRequestsTotal, + MetricNetworkJoinRequestsSuccessTotal, + MetricNetworkJoinRequestsFailedTotal, + MetricFirewallRejectionsTotal, + MetricWalletDispatcherRejectedTotal, + } + + // First, initialize all counters in the map + pm.countersMutex.Lock() + for _, name := range counters { + pm.counters[name] = &counter{value: 0} + } + pm.countersMutex.Unlock() + + // Then, register observers (this prevents concurrent map read/write) + for _, name := range counters { + metricName := name // Capture for closure + pm.registry.ObserveApplicationSource( + "performance", + map[string]Source{ + metricName: func() float64 { + pm.countersMutex.RLock() + c, exists := pm.counters[metricName] + pm.countersMutex.RUnlock() + if !exists { + return 0 + } + c.mutex.RLock() + defer c.mutex.RUnlock() + return c.value + }, + }, + ) + } + + // Register per-action type wallet metrics + // For each action type, register: total, success_total, failed_total, duration_seconds + for _, actionType := range GetAllWalletActionTypes() { + actionCounters := []string{ + WalletActionMetricName(actionType, "total"), + WalletActionMetricName(actionType, "success_total"), + WalletActionMetricName(actionType, "failed_total"), + } + for _, name := range actionCounters { + pm.countersMutex.Lock() + pm.counters[name] = &counter{value: 0} + pm.countersMutex.Unlock() + metricName := name // Capture for closure + pm.registry.ObserveApplicationSource( + "performance", + map[string]Source{ + metricName: func() float64 { + pm.countersMutex.RLock() + c, exists := pm.counters[metricName] + pm.countersMutex.RUnlock() + if !exists { + return 0 + } + c.mutex.RLock() + defer c.mutex.RUnlock() + return c.value + }, + }, + ) + } + + // Register duration metric for this action type + durationName := WalletActionMetricName(actionType, "duration_seconds") + pm.histogramsMutex.Lock() + pm.histograms[durationName] = &histogram{ + buckets: make(map[float64]float64), + } + pm.histogramsMutex.Unlock() + durationMetricName := durationName // Capture for closure + pm.registry.ObserveApplicationSource( + "performance", + map[string]Source{ + durationMetricName: func() float64 { + pm.histogramsMutex.RLock() + h, exists := pm.histograms[durationMetricName] + pm.histogramsMutex.RUnlock() + if !exists { + return 0 + } + h.mutex.RLock() + defer h.mutex.RUnlock() + count := h.buckets[histogramCountKey] + if count == 0 { + return 0 + } + return h.buckets[histogramSumKey] / count // average + }, + }, + ) + } + + // Register all duration/histogram metrics with 0 initial values + // Note: These use the actual metric names as used in the codebase + durationMetrics := []string{ + MetricDKGDurationSeconds, + MetricSigningDurationSeconds, + MetricRedemptionActionDurationSeconds, + MetricWalletActionDurationSeconds, + MetricCoordinationDurationSeconds, + MetricCoordinationWindowDurationSeconds, + MetricPingTestDurationSeconds, + MetricNetworkHandshakeDurationSeconds, + } + + // First, initialize all histograms in the map + pm.histogramsMutex.Lock() + for _, name := range durationMetrics { + pm.histograms[name] = &histogram{ + buckets: make(map[float64]float64), + } + } + pm.histogramsMutex.Unlock() + + // Then, register observers (this prevents concurrent map read/write) + for _, name := range durationMetrics { + metricName := name + sources := map[string]Source{ + metricName: func() float64 { + pm.histogramsMutex.RLock() + h, exists := pm.histograms[metricName] + pm.histogramsMutex.RUnlock() + if !exists { + return 0 + } + h.mutex.RLock() + defer h.mutex.RUnlock() + count := h.buckets[histogramCountKey] + if count == 0 { + return 0 + } + return h.buckets[histogramSumKey] / count // average + }, + } + // Skip _count variant for ping_test_duration_seconds + if metricName != "ping_test_duration_seconds" { + sources[metricName+"_count"] = func() float64 { + pm.histogramsMutex.RLock() + h, exists := pm.histograms[metricName] + pm.histogramsMutex.RUnlock() + if !exists { + return 0 + } + h.mutex.RLock() + defer h.mutex.RUnlock() + return h.buckets[histogramCountKey] + } + } + pm.registry.ObserveApplicationSource("performance", sources) + } + + // Register all gauge metrics with 0 initial value + gauges := []string{ + MetricWalletDispatcherActiveActions, + MetricIncomingMessageQueueSize, + MetricMessageHandlerQueueSize, + MetricSigningAttemptsPerOperation, + MetricCPUUtilization, + MetricMemoryUsageMB, + MetricGoroutineCount, + MetricCPULoadPercent, + MetricRAMUtilizationPercent, + MetricSwapUtilizationPercent, + } + + // First, initialize all gauges in the map + pm.gaugesMutex.Lock() + for _, name := range gauges { + pm.gauges[name] = &gauge{value: 0} + } + pm.gaugesMutex.Unlock() + + // Then, register observers (this prevents concurrent map read/write) + for _, name := range gauges { + metricName := name // Capture for closure + pm.registry.ObserveApplicationSource( + "performance", + map[string]Source{ + metricName: func() float64 { + pm.gaugesMutex.RLock() + g, exists := pm.gauges[metricName] + pm.gaugesMutex.RUnlock() + if !exists { + return 0 + } + g.mutex.RLock() + defer g.mutex.RUnlock() + return g.value + }, + }, + ) + } + +} + +// IncrementCounter increments a counter metric by the given value. +// Observers are already registered in registerAllMetrics, so this method +// only updates the counter value without re-registering observers. +func (pm *PerformanceMetrics) IncrementCounter(name string, value float64) { + pm.countersMutex.RLock() + c, exists := pm.counters[name] + pm.countersMutex.RUnlock() + + // Fast path: if counter exists, just increment it + if exists { + c.mutex.Lock() + c.value += value + c.mutex.Unlock() + return + } + + // Slow path: counter doesn't exist, need to create it + // Upgrade to write lock and check/create + pm.countersMutex.Lock() + c, exists = pm.counters[name] + if !exists { + c = &counter{value: value} + pm.counters[name] = c + pm.countersMutex.Unlock() + return + } + pm.countersMutex.Unlock() + + // Counter was created by another goroutine after our first check + c.mutex.Lock() + c.value += value + c.mutex.Unlock() +} + +// RecordDuration records a duration value in a histogram. +// The duration is recorded in seconds. +// Observers are already registered in registerAllMetrics, so this method +// only updates the histogram without re-registering observers. +func (pm *PerformanceMetrics) RecordDuration(name string, duration time.Duration) { + pm.histogramsMutex.Lock() + h, exists := pm.histograms[name] + if !exists { + h = &histogram{ + buckets: make(map[float64]float64), + } + pm.histograms[name] = h + } + pm.histogramsMutex.Unlock() + + seconds := duration.Seconds() + h.mutex.Lock() + // Simple histogram: increment bucket counts + // Buckets: 0.001, 0.01, 0.1, 1, 10, 60, 300, 600, +Inf (overflow) + buckets := []float64{0.001, 0.01, 0.1, 1, 10, 60, 300, 600} + bucketed := false + for _, bucket := range buckets { + if seconds <= bucket { + h.buckets[bucket]++ + bucketed = true + break + } + } + // Track overflow for values > 600 seconds + if !bucketed { + h.buckets[math.Inf(1)]++ + } + // Also track total count and sum for average calculation + h.buckets[histogramCountKey]++ // count + h.buckets[histogramSumKey] += seconds + h.mutex.Unlock() +} + +// SetGauge sets a gauge metric to the given value. +// Observers are already registered in registerAllMetrics, so this method +// only updates the gauge value without re-registering observers. +func (pm *PerformanceMetrics) SetGauge(name string, value float64) { + pm.gaugesMutex.Lock() + g, exists := pm.gauges[name] + if !exists { + g = &gauge{value: value} + pm.gauges[name] = g + pm.gaugesMutex.Unlock() + return + } + pm.gaugesMutex.Unlock() + + g.mutex.Lock() + g.value = value + g.mutex.Unlock() +} + +// observeSystemMetrics periodically collects and updates system metrics +// including CPU utilization, memory usage, and goroutine count. +func (pm *PerformanceMetrics) observeSystemMetrics(ctx context.Context) { + ticker := time.NewTicker(60 * time.Second) // Update every 10 seconds + defer ticker.Stop() + + var lastMemStats runtime.MemStats + var lastUpdateTime time.Time + runtime.ReadMemStats(&lastMemStats) + lastUpdateTime = time.Now() + + for { + select { + case <-ticker.C: + // Update goroutine count + goroutineCount := float64(runtime.NumGoroutine()) + pm.SetGauge(MetricGoroutineCount, goroutineCount) + + // Update memory usage + // Using Sys (total memory obtained from OS) for accurate total memory footprint + // This includes heap, stack, GC metadata, and other runtime overhead + // For heap-only memory, use memStats.Alloc instead + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + memoryUsageMB := float64(memStats.Sys) / (1024 * 1024) // Total memory in megabytes + pm.SetGauge(MetricMemoryUsageMB, memoryUsageMB) + + // Calculate CPU utilization using a more realistic heuristic + now := time.Now() + elapsed := now.Sub(lastUpdateTime) + if elapsed > 0 { + cpuUtilization := pm.calculateCPUUtilizationHeuristic(memStats, lastMemStats, elapsed) + pm.SetGauge(MetricCPUUtilization, cpuUtilization) + + lastMemStats = memStats + lastUpdateTime = now + } + + // Update OS-level machine stats + pm.updateMachineStats() + case <-ctx.Done(): + return + } + } +} + +// calculateCPUUtilizationHeuristic calculates CPU utilization using a heuristic +// based on goroutine count and GC activity. This provides a reasonable approximation. +// Note: For accurate CPU metrics, consider using OS-level process CPU time. +func (pm *PerformanceMetrics) calculateCPUUtilizationHeuristic( + currentMemStats runtime.MemStats, + lastMemStats runtime.MemStats, + elapsed time.Duration, +) float64 { + numCPU := float64(runtime.NumCPU()) + activeGoroutines := float64(runtime.NumGoroutine()) + + // Calculate GC rate (GCs per second) + gcDelta := float64(currentMemStats.NumGC - lastMemStats.NumGC) + gcRate := gcDelta / elapsed.Seconds() + + // Normalize goroutines: if we have more goroutines than CPU cores, + // we're likely using more CPU, but use a conservative multiplier + // Formula: (goroutines / CPU cores) * 10%, capped at 40% + goroutineContribution := (activeGoroutines / numCPU) * 10.0 + if goroutineContribution > 40.0 { + goroutineContribution = 40.0 + } + + // GC contribution: frequent GCs indicate CPU work, but use conservative multiplier + // Formula: GC rate * 1%, capped at 20% + gcContribution := gcRate * 1.0 + if gcContribution > 20.0 { + gcContribution = 20.0 + } + + // Total CPU utilization estimate + cpuUtilization := goroutineContribution + gcContribution + + // Add a small base load if there are active goroutines + if cpuUtilization < 1.0 && activeGoroutines > 0 { + cpuUtilization = 1.0 // Minimum 1% if there are active goroutines + } + + // Cap CPU utilization at 100% + if cpuUtilization > 100.0 { + cpuUtilization = 100.0 + } + if cpuUtilization < 0.0 { + cpuUtilization = 0.0 + } + + return cpuUtilization +} + +// updateMachineStats collects and updates OS-level machine statistics +// including CPU load, RAM utilization, and swapfile utilization. +func (pm *PerformanceMetrics) updateMachineStats() { + // Get CPU load percentage (1-second average) + // NOTE: cpu.Percent blocks for the specified duration (1 second) to sample + // CPU usage over that interval. This blocking behavior is intentional and + // necessary to obtain an accurate CPU utilization measurement. The function + // will not return until the 1-second sampling period completes. + cpuPercent, err := cpu.Percent(time.Second, false) + if err == nil && len(cpuPercent) > 0 { + pm.SetGauge(MetricCPULoadPercent, cpuPercent[0]) + } + + // Get memory statistics + memInfo, err := mem.VirtualMemory() + if err == nil { + // RAM utilization percentage + pm.SetGauge(MetricRAMUtilizationPercent, memInfo.UsedPercent) + + // Swap utilization percentage + swapInfo, err := mem.SwapMemory() + if err == nil && swapInfo.Total > 0 { + swapUtilizationPercent := (float64(swapInfo.Used) / float64(swapInfo.Total)) * 100.0 + pm.SetGauge(MetricSwapUtilizationPercent, swapUtilizationPercent) + } else { + // If swap is not available or has no total, set to 0 + pm.SetGauge(MetricSwapUtilizationPercent, 0) + } + } +} + +// NoOpPerformanceMetrics is a no-op implementation of PerformanceMetricsRecorder +// that can be used when metrics are disabled. +type NoOpPerformanceMetrics struct{} + +// IncrementCounter is a no-op. +func (n *NoOpPerformanceMetrics) IncrementCounter(name string, value float64) {} + +// RecordDuration is a no-op. +func (n *NoOpPerformanceMetrics) RecordDuration(name string, duration time.Duration) {} + +// SetGauge is a no-op. +func (n *NoOpPerformanceMetrics) SetGauge(name string, value float64) {} + +// GetCounterValue always returns 0. +func (n *NoOpPerformanceMetrics) GetCounterValue(name string) float64 { return 0 } + +// GetGaugeValue always returns 0. +func (n *NoOpPerformanceMetrics) GetGaugeValue(name string) float64 { return 0 } + +// GetCounterValue returns the current value of a counter. +func (pm *PerformanceMetrics) GetCounterValue(name string) float64 { + pm.countersMutex.RLock() + c, exists := pm.counters[name] + pm.countersMutex.RUnlock() + + if !exists { + return 0 + } + + c.mutex.RLock() + defer c.mutex.RUnlock() + return c.value +} + +// GetGaugeValue returns the current value of a gauge. +func (pm *PerformanceMetrics) GetGaugeValue(name string) float64 { + pm.gaugesMutex.RLock() + g, exists := pm.gauges[name] + pm.gaugesMutex.RUnlock() + + if !exists { + return 0 + } + + g.mutex.RLock() + defer g.mutex.RUnlock() + return g.value +} + +// Metric names for performance metrics +const ( + // DKG Metrics + MetricDKGJoinedTotal = "dkg_joined_total" + MetricDKGFailedTotal = "dkg_failed_total" + MetricDKGDurationSeconds = "dkg_duration_seconds" + MetricDKGValidationTotal = "dkg_validation_total" + MetricDKGChallengesSubmittedTotal = "dkg_challenges_submitted_total" + MetricDKGApprovalsSubmittedTotal = "dkg_approvals_submitted_total" + + // Signing Metrics + MetricSigningOperationsTotal = "signing_operations_total" + MetricSigningSuccessTotal = "signing_success_total" + MetricSigningFailedTotal = "signing_failed_total" + MetricSigningDurationSeconds = "signing_duration_seconds" + MetricSigningAttemptsPerOperation = "signing_attempts_per_operation" + MetricSigningTimeoutsTotal = "signing_timeouts_total" + + // Redemption Metrics + MetricRedemptionExecutionsTotal = "redemption_executions_total" + MetricRedemptionExecutionsSuccessTotal = "redemption_executions_success_total" + MetricRedemptionExecutionsFailedTotal = "redemption_executions_failed_total" + MetricRedemptionActionDurationSeconds = "redemption_action_duration_seconds" + + // Redemption Proof Submission Metrics (SPV maintainer) + MetricRedemptionProofSubmissionsTotal = "redemption_proof_submissions_total" + MetricRedemptionProofSubmissionsSuccessTotal = "redemption_proof_submissions_success_total" + MetricRedemptionProofSubmissionsFailedTotal = "redemption_proof_submissions_failed_total" + + // Wallet Action Metrics (aggregate) + MetricWalletActionsTotal = "wallet_actions_total" + MetricWalletActionSuccessTotal = "wallet_action_success_total" + MetricWalletActionFailedTotal = "wallet_action_failed_total" + MetricWalletActionDurationSeconds = "wallet_action_duration_seconds" + MetricWalletHeartbeatFailuresTotal = "wallet_heartbeat_failures_total" + + // Wallet Action Metrics (per-action type) + // These are generated dynamically using WalletActionMetricName helper function + // Format: wallet_action_{action_type}_{metric_type} + // Example: wallet_action_heartbeat_total, wallet_action_deposit_sweep_duration_seconds + + // Coordination Metrics + MetricCoordinationWindowsDetectedTotal = "coordination_windows_detected_total" + MetricCoordinationProceduresExecutedTotal = "coordination_procedures_executed_total" + MetricCoordinationFailedTotal = "coordination_failed_total" // Only when node is leader + MetricCoordinationLeaderTimeoutTotal = "coordination_leader_timeout_total" // When follower observes leader timeout + MetricCoordinationDurationSeconds = "coordination_duration_seconds" + + // Coordination Window Metrics (per-window tracking) + MetricCoordinationWindowDurationSeconds = "coordination_window_duration_seconds" + MetricCoordinationWindowWalletsCoordinated = "coordination_window_wallets_coordinated" + MetricCoordinationWindowWalletsSuccessful = "coordination_window_wallets_successful" + MetricCoordinationWindowWalletsFailed = "coordination_window_wallets_failed" + MetricCoordinationWindowTotalFaults = "coordination_window_total_faults" + MetricCoordinationWindowCoordinationBlock = "coordination_window_coordination_block" + + // Network Metrics + MetricIncomingMessageQueueSize = "incoming_message_queue_size" + MetricMessageHandlerQueueSize = "message_handler_queue_size" + MetricPeerConnectionsTotal = "peer_connections_total" + MetricPeerDisconnectionsTotal = "peer_disconnections_total" + MetricMessageBroadcastTotal = "message_broadcast_total" + MetricMessageReceivedTotal = "message_received_total" + MetricPingTestsTotal = "ping_test_total" + MetricPingTestSuccessTotal = "ping_test_success_total" + MetricPingTestFailedTotal = "ping_test_failed_total" + MetricPingTestDurationSeconds = "ping_test_duration_seconds" + + // Network Join Request Metrics (inbound connection attempts from peers) + MetricNetworkJoinRequestsTotal = "network_join_requests_total" // Total inbound join attempts + MetricNetworkJoinRequestsSuccessTotal = "network_join_requests_success_total" // Successful joins + MetricNetworkJoinRequestsFailedTotal = "network_join_requests_failed_total" // Failed joins (handshake failure) + MetricNetworkHandshakeDurationSeconds = "network_handshake_duration_seconds" // Handshake duration + MetricFirewallRejectionsTotal = "firewall_rejections_total" // Firewall rejections + + // Wallet Dispatcher Metrics + MetricWalletDispatcherActiveActions = "wallet_dispatcher_active_actions" + MetricWalletDispatcherRejectedTotal = "wallet_dispatcher_rejected_total" + + // System Metrics + MetricCPUUtilization = "cpu_utilization_percent" + MetricMemoryUsageMB = "memory_usage_mb" + MetricGoroutineCount = "goroutine_count" + MetricCPULoadPercent = "cpu_load_percent" + MetricRAMUtilizationPercent = "ram_utilization_percent" + MetricSwapUtilizationPercent = "swap_utilization_percent" +) + +// WalletActionMetricName generates a metric name for a specific wallet action type. +// actionType should be the string representation of the action (e.g., "heartbeat", "deposit_sweep"). +// metricType should be one of: "total", "success_total", "failed_total", "duration_seconds" +func WalletActionMetricName(actionType string, metricType string) string { + return fmt.Sprintf("wallet_action_%s_%s", actionType, metricType) +} + +// GetAllWalletActionTypes returns all wallet action types that should be tracked. +// ActionNoop is excluded as it's a no-op action. +func GetAllWalletActionTypes() []string { + return []string{ + "heartbeat", + "deposit_sweep", + "redemption", + "moving_funds", + "moved_funds_sweep", + } +} diff --git a/pkg/clientinfo/performance_test.go b/pkg/clientinfo/performance_test.go new file mode 100644 index 0000000000..75190c8545 --- /dev/null +++ b/pkg/clientinfo/performance_test.go @@ -0,0 +1,374 @@ +package clientinfo + +import ( + "context" + "math" + "sync" + "testing" + "time" + + keepclientinfo "github.com/keep-network/keep-common/pkg/clientinfo" +) + +// TestConcurrentCounterIncrement tests that concurrent counter increments +// are safe and produce correct results. +func TestConcurrentCounterIncrement(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + const ( + numGoroutines = 100 + incrementsPer = 1000 + metricName = MetricSigningOperationsTotal + ) + + var wg sync.WaitGroup + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < incrementsPer; j++ { + pm.IncrementCounter(metricName, 1) + } + }() + } + wg.Wait() + + expected := float64(numGoroutines * incrementsPer) + actual := pm.GetCounterValue(metricName) + if actual != expected { + t.Errorf("Expected counter value %v, got %v", expected, actual) + } +} + +// TestConcurrentCounterDifferentMetrics tests concurrent increments on +// different counters. +func TestConcurrentCounterDifferentMetrics(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + const ( + numGoroutines = 50 + incrementsPer = 100 + ) + + metrics := []string{ + MetricSigningOperationsTotal, + MetricSigningSuccessTotal, + MetricSigningFailedTotal, + } + + // Add timeout to prevent test from hanging indefinitely + testCtx, testCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer testCancel() + + done := make(chan struct{}) + var wg sync.WaitGroup + for _, metricName := range metrics { + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(name string) { + defer wg.Done() + for j := 0; j < incrementsPer; j++ { + select { + case <-testCtx.Done(): + return + default: + pm.IncrementCounter(name, 1) + } + } + }(metricName) + } + } + + go func() { + wg.Wait() + close(done) + }() + + select { + case <-done: + // Test completed successfully + case <-testCtx.Done(): + t.Fatal("Test timed out waiting for goroutines to complete") + } + + expected := float64(numGoroutines * incrementsPer) + for _, metricName := range metrics { + actual := pm.GetCounterValue(metricName) + if actual != expected { + t.Errorf("Metric %s: expected %v, got %v", metricName, expected, actual) + } + } +} + +// TestConcurrentDurationRecording tests that concurrent duration recordings +// are safe and produce correct results. +func TestConcurrentDurationRecording(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + const ( + numGoroutines = 50 + recordingsPer = 100 + metricName = "signing_duration_seconds" + ) + + durations := []time.Duration{ + 1 * time.Millisecond, + 10 * time.Millisecond, + 100 * time.Millisecond, + 1 * time.Second, + } + + var wg sync.WaitGroup + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + for j := 0; j < recordingsPer; j++ { + duration := durations[goroutineID%len(durations)] + pm.RecordDuration(metricName, duration) + } + }(i) + } + wg.Wait() + + // Verify histogram was updated (we can't easily verify exact values + // without exposing internal state, but we can verify the count matches) + pm.histogramsMutex.RLock() + h, exists := pm.histograms[metricName] + pm.histogramsMutex.RUnlock() + + if !exists { + t.Fatal("Histogram not found") + } + + h.mutex.RLock() + count := h.buckets[histogramCountKey] + h.mutex.RUnlock() + + expectedCount := float64(numGoroutines * recordingsPer) + if count != expectedCount { + t.Errorf("Expected histogram count %v, got %v", expectedCount, count) + } +} + +// TestConcurrentGaugeSet tests that concurrent gauge updates are safe. +func TestConcurrentGaugeSet(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + const ( + numGoroutines = 100 + updatesPer = 100 + metricName = MetricIncomingMessageQueueSize + ) + + var wg sync.WaitGroup + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + for j := 0; j < updatesPer; j++ { + value := float64(goroutineID*updatesPer + j) + pm.SetGauge(metricName, value) + } + }(i) + } + wg.Wait() + + // We can't verify the exact value since goroutines race, + // but we can verify the gauge exists and has been set + value := pm.GetGaugeValue(metricName) + if value < 0 { + t.Errorf("Expected non-negative gauge value, got %v", value) + } +} + +// TestConcurrentDifferentOperations tests that different metric operations +// can run concurrently without issues. +func TestConcurrentDifferentOperations(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + const ( + numGoroutines = 30 + operationsPer = 50 + ) + + var wg sync.WaitGroup + + // Counter increments + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < operationsPer; j++ { + pm.IncrementCounter(MetricSigningOperationsTotal, 1) + } + }() + } + + // Duration recordings + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < operationsPer; j++ { + pm.RecordDuration("signing_duration_seconds", time.Duration(j)*time.Millisecond) + } + }() + } + + // Gauge sets + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + for j := 0; j < operationsPer; j++ { + pm.SetGauge(MetricIncomingMessageQueueSize, float64(goroutineID+j)) + } + }(i) + } + + wg.Wait() + + // Verify all operations completed without race + expectedCounter := float64(numGoroutines * operationsPer) + actualCounter := pm.GetCounterValue(MetricSigningOperationsTotal) + if actualCounter != expectedCounter { + t.Errorf("Expected counter value %v, got %v", expectedCounter, actualCounter) + } +} + +// TestHistogramBucketPlacement tests that duration values are placed +// in the correct histogram buckets. +func TestHistogramBucketPlacement(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + metricName := "test_duration_seconds" + + testCases := []struct { + duration time.Duration + bucket float64 + shouldRun bool + }{ + {500 * time.Microsecond, 0.001, true}, // < 1ms + {5 * time.Millisecond, 0.01, true}, // < 10ms + {50 * time.Millisecond, 0.1, true}, // < 100ms + {500 * time.Millisecond, 1, true}, // < 1s + {5 * time.Second, 10, true}, // < 10s + {30 * time.Second, 60, true}, // < 60s + {200 * time.Second, 300, true}, // < 300s + {500 * time.Second, 600, true}, // < 600s + {1000 * time.Second, 0, false}, // > 600s (overflow) + } + + for _, tc := range testCases { + pm.RecordDuration(metricName, tc.duration) + } + + // Verify histogram + pm.histogramsMutex.RLock() + h, exists := pm.histograms[metricName] + pm.histogramsMutex.RUnlock() + + if !exists { + t.Fatal("Histogram not found") + } + + h.mutex.RLock() + defer h.mutex.RUnlock() + + // Verify count + expectedCount := float64(len(testCases)) + actualCount := h.buckets[histogramCountKey] + if actualCount != expectedCount { + t.Errorf("Expected count %v, got %v", expectedCount, actualCount) + } + + // Verify overflow bucket + overflowCount := h.buckets[math.Inf(1)] + if overflowCount != 1 { + t.Errorf("Expected overflow bucket count 1, got %v", overflowCount) + } +} + +// TestMetricsInitialization tests that all metrics are initialized with zero values. +func TestMetricsInitialization(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + // Test counters + counters := []string{ + MetricDKGJoinedTotal, + MetricSigningOperationsTotal, + MetricSigningSuccessTotal, + } + + for _, counterName := range counters { + value := pm.GetCounterValue(counterName) + if value != 0 { + t.Errorf("Counter %s should start at 0, got %v", counterName, value) + } + } + + // Test gauges + gauges := []string{ + MetricCPUUtilization, + MetricMemoryUsageMB, + MetricGoroutineCount, + MetricCPULoadPercent, + MetricRAMUtilizationPercent, + MetricSwapUtilizationPercent, + } + + for _, gaugeName := range gauges { + value := pm.GetGaugeValue(gaugeName) + if value != 0 { + t.Errorf("Gauge %s should start at 0, got %v", gaugeName, value) + } + } +} + +// TestContextCancelation tests that goroutines stop when context is cancelled. +func TestContextCancelation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + registry := &Registry{keepclientinfo.NewRegistry(), ctx} + pm := NewPerformanceMetrics(ctx, registry) + + // Cancel context immediately + cancel() + + // Give goroutines time to stop + time.Sleep(100 * time.Millisecond) + + // This should not panic or cause issues + pm.IncrementCounter(MetricSigningOperationsTotal, 1) + pm.SetGauge(MetricIncomingMessageQueueSize, 5) + pm.RecordDuration("signing_duration_seconds", 100*time.Millisecond) +} diff --git a/pkg/clientinfo/rpc_health.go b/pkg/clientinfo/rpc_health.go new file mode 100644 index 0000000000..f43831408f --- /dev/null +++ b/pkg/clientinfo/rpc_health.go @@ -0,0 +1,293 @@ +package clientinfo + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/ipfs/go-log" + + "github.com/keep-network/keep-core/pkg/bitcoin" + "github.com/keep-network/keep-core/pkg/chain" +) + +var rpcHealthLogger = log.Logger("keep-rpc-health") + +// RPCHealthChecker performs periodic health checks on Ethereum and Bitcoin RPC endpoints +// by making actual RPC calls (not just ICMP ping) to verify the services are working. +type RPCHealthChecker struct { + registry *Registry + + // Ethereum health check + ethBlockCounter chain.BlockCounter + ethLastCheck time.Time + ethLastSuccess time.Time + ethLastError error + ethLastDuration time.Duration // Last successful RPC call duration + ethMutex sync.RWMutex + + // Bitcoin health check + btcChain bitcoin.Chain + btcLastCheck time.Time + btcLastSuccess time.Time + btcLastError error + btcLastDuration time.Duration // Last successful RPC call duration + btcMutex sync.RWMutex + + // Configuration + checkInterval time.Duration + + // Concurrency control + startOnce sync.Once +} + +// NewRPCHealthChecker creates a new RPC health checker instance. +func NewRPCHealthChecker( + registry *Registry, + ethBlockCounter chain.BlockCounter, + btcChain bitcoin.Chain, + checkInterval time.Duration, +) *RPCHealthChecker { + if checkInterval == 0 { + checkInterval = 30 * time.Second // Default: check every 30 seconds + } + + return &RPCHealthChecker{ + registry: registry, + ethBlockCounter: ethBlockCounter, + btcChain: btcChain, + checkInterval: checkInterval, + } +} + +// Start begins periodic health checks for both Ethereum and Bitcoin RPC endpoints. +// Safe to call multiple times - only the first call will execute. +func (r *RPCHealthChecker) Start(ctx context.Context) { + r.startOnce.Do(func() { + r.start(ctx) + }) +} + +// start is the internal implementation of Start. Use Start() for public API. +func (r *RPCHealthChecker) start(ctx context.Context) { + // Perform initial health checks immediately + r.checkEthereumHealth(ctx) + r.checkBitcoinHealth(ctx) + + // Start periodic health checks + go r.runEthereumHealthChecks(ctx) + go r.runBitcoinHealthChecks(ctx) + + // Register metrics observers + r.registerMetrics() +} + +// runEthereumHealthChecks runs periodic Ethereum RPC health checks. +func (r *RPCHealthChecker) runEthereumHealthChecks(ctx context.Context) { + ticker := time.NewTicker(r.checkInterval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + r.checkEthereumHealth(ctx) + case <-ctx.Done(): + return + } + } +} + +// runBitcoinHealthChecks runs periodic Bitcoin RPC health checks. +func (r *RPCHealthChecker) runBitcoinHealthChecks(ctx context.Context) { + ticker := time.NewTicker(r.checkInterval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + r.checkBitcoinHealth(ctx) + case <-ctx.Done(): + return + } + } +} + +// checkEthereumHealth performs a comprehensive health check on the Ethereum RPC endpoint +// by making actual RPC calls to verify the service is working properly. +// It checks: +// 1. Current block number retrieval +// 2. Block number is reasonable (not stuck at 0 or extremely old) +func (r *RPCHealthChecker) checkEthereumHealth(ctx context.Context) { + if r.ethBlockCounter == nil { + return + } + + startTime := time.Now() + + // First check: Get current block number + currentBlock, err := r.ethBlockCounter.CurrentBlock() + if err != nil { + r.ethMutex.Lock() + r.ethLastCheck = startTime + r.ethLastError = err + r.ethMutex.Unlock() + rpcHealthLogger.Warnf( + "Ethereum RPC health check failed (CurrentBlock): [%v] (duration: %v)", + err, + time.Since(startTime), + ) + return + } + + // Second check: Verify block number is reasonable + // Block number should be > 0 (unless on a very new testnet) + // For mainnet/testnet, block numbers should be in thousands/millions + if currentBlock == 0 { + blockErr := fmt.Errorf("block number is 0, node may not be synced") + r.ethMutex.Lock() + r.ethLastCheck = startTime + r.ethLastError = blockErr + r.ethMutex.Unlock() + rpcHealthLogger.Warnf( + "Ethereum RPC health check failed (block number is 0): [%v] (duration: %v)", + blockErr, + time.Since(startTime), + ) + return + } + + duration := time.Since(startTime) + + r.ethMutex.Lock() + r.ethLastCheck = startTime + r.ethLastSuccess = time.Now() + r.ethLastError = nil + r.ethLastDuration = duration + r.ethMutex.Unlock() + + rpcHealthLogger.Debugf( + "Ethereum RPC health check succeeded (block: %d, duration: %v)", + currentBlock, + duration, + ) +} + +// checkBitcoinHealth performs a comprehensive health check on the Bitcoin RPC endpoint +// by making actual RPC calls to verify the service is working properly. +// It checks: +// 1. Latest block height retrieval +// 2. Block header retrieval for the latest block (verifies RPC can retrieve block data) +// 3. Block height is reasonable (not 0) +func (r *RPCHealthChecker) checkBitcoinHealth(ctx context.Context) { + if r.btcChain == nil { + return + } + + startTime := time.Now() + + // First check: Get latest block height + latestHeight, err := r.btcChain.GetLatestBlockHeight() + if err != nil { + r.btcMutex.Lock() + r.btcLastCheck = startTime + r.btcLastError = err + r.btcMutex.Unlock() + rpcHealthLogger.Warnf( + "Bitcoin RPC health check failed (GetLatestBlockHeight): [%v] (duration: %v)", + err, + time.Since(startTime), + ) + return + } + + // Second check: Verify block height is reasonable + if latestHeight == 0 { + heightErr := fmt.Errorf("block height is 0, node may not be synced") + r.btcMutex.Lock() + r.btcLastCheck = startTime + r.btcLastError = heightErr + r.btcMutex.Unlock() + rpcHealthLogger.Warnf( + "Bitcoin RPC health check failed (block height is 0): [%v] (duration: %v)", + heightErr, + time.Since(startTime), + ) + return + } + + // Third check: Try to get block header for the latest block + // This verifies the RPC can actually retrieve block data, not just return a number + _, err = r.btcChain.GetBlockHeader(latestHeight) + if err != nil { + headerErr := fmt.Errorf("failed to get block header for height %d: %w", latestHeight, err) + r.btcMutex.Lock() + r.btcLastCheck = startTime + r.btcLastError = headerErr + r.btcMutex.Unlock() + rpcHealthLogger.Warnf( + "Bitcoin RPC health check failed (GetBlockHeader): [%v] (duration: %v)", + headerErr, + time.Since(startTime), + ) + return + } + + duration := time.Since(startTime) + + r.btcMutex.Lock() + r.btcLastCheck = startTime + r.btcLastSuccess = time.Now() + r.btcLastError = nil + r.btcLastDuration = duration + r.btcMutex.Unlock() + + rpcHealthLogger.Debugf( + "Bitcoin RPC health check succeeded (height: %d, duration: %v)", + latestHeight, + duration, + ) +} + +// GetEthereumHealthStatus returns the current Ethereum RPC health status. +func (r *RPCHealthChecker) GetEthereumHealthStatus() (isHealthy bool, lastCheck time.Time, lastSuccess time.Time, lastError error, lastDuration time.Duration) { + r.ethMutex.RLock() + defer r.ethMutex.RUnlock() + + isHealthy = r.ethLastError == nil && !r.ethLastCheck.IsZero() + return isHealthy, r.ethLastCheck, r.ethLastSuccess, r.ethLastError, r.ethLastDuration +} + +// GetBitcoinHealthStatus returns the current Bitcoin RPC health status. +func (r *RPCHealthChecker) GetBitcoinHealthStatus() (isHealthy bool, lastCheck time.Time, lastSuccess time.Time, lastError error, lastDuration time.Duration) { + r.btcMutex.RLock() + defer r.btcMutex.RUnlock() + + isHealthy = r.btcLastError == nil && !r.btcLastCheck.IsZero() + return isHealthy, r.btcLastCheck, r.btcLastSuccess, r.btcLastError, r.btcLastDuration +} + +// registerMetrics registers metrics observers for RPC health status. +func (r *RPCHealthChecker) registerMetrics() { + // Ethereum RPC response time + r.registry.ObserveApplicationSource( + "performance", + map[string]Source{ + "rpc_eth_response_time_seconds": func() float64 { + _, _, _, _, lastDuration := r.GetEthereumHealthStatus() + return lastDuration.Seconds() + }, + }, + ) + + // Bitcoin RPC response time + r.registry.ObserveApplicationSource( + "performance", + map[string]Source{ + "rpc_btc_response_time_seconds": func() float64 { + _, _, _, _, lastDuration := r.GetBitcoinHealthStatus() + return lastDuration.Seconds() + }, + }, + ) +} diff --git a/pkg/maintainer/spv/deposit_sweep.go b/pkg/maintainer/spv/deposit_sweep.go index ef85221b90..2b0b8a5f77 100644 --- a/pkg/maintainer/spv/deposit_sweep.go +++ b/pkg/maintainer/spv/deposit_sweep.go @@ -27,6 +27,7 @@ func SubmitDepositSweepProof( btcChain, spvChain, bitcoin.AssembleSpvProof, + getGlobalMetricsRecorder(), ) } @@ -36,8 +37,19 @@ func submitDepositSweepProof( btcChain bitcoin.Chain, spvChain Chain, spvProofAssembler spvProofAssembler, + metricsRecorder interface { + IncrementCounter(name string, value float64) + }, ) error { + // Record proof submission attempt + if metricsRecorder != nil { + metricsRecorder.IncrementCounter("deposit_sweep_proof_submissions_total", 1) + } + if requiredConfirmations == 0 { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter("deposit_sweep_proof_submissions_failed_total", 1) + } return fmt.Errorf( "provided required confirmations count must be greater than 0", ) @@ -49,6 +61,9 @@ func submitDepositSweepProof( btcChain, ) if err != nil { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter("deposit_sweep_proof_submissions_failed_total", 1) + } return fmt.Errorf( "failed to assemble transaction spv proof: [%v]", err, @@ -61,6 +76,9 @@ func submitDepositSweepProof( transaction, ) if err != nil { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter("deposit_sweep_proof_submissions_failed_total", 1) + } return fmt.Errorf( "error while parsing transaction inputs: [%v]", err, @@ -73,12 +91,20 @@ func submitDepositSweepProof( mainUTXO, vault, ); err != nil { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter("deposit_sweep_proof_submissions_failed_total", 1) + } return fmt.Errorf( "failed to submit deposit sweep proof with reimbursement: [%v]", err, ) } + // Record successful proof submission + if metricsRecorder != nil { + metricsRecorder.IncrementCounter("deposit_sweep_proof_submissions_success_total", 1) + } + return nil } diff --git a/pkg/maintainer/spv/deposit_sweep_test.go b/pkg/maintainer/spv/deposit_sweep_test.go index fa9ff0f655..dc61256ccf 100644 --- a/pkg/maintainer/spv/deposit_sweep_test.go +++ b/pkg/maintainer/spv/deposit_sweep_test.go @@ -96,6 +96,7 @@ func TestSubmitDepositSweepProof(t *testing.T) { btcChain, spvChain, mockSpvProofAssembler, + getGlobalMetricsRecorder(), ) if err != nil { t.Fatal(err) diff --git a/pkg/maintainer/spv/redemptions.go b/pkg/maintainer/spv/redemptions.go index df8541f07b..e504860f81 100644 --- a/pkg/maintainer/spv/redemptions.go +++ b/pkg/maintainer/spv/redemptions.go @@ -3,10 +3,19 @@ package spv import ( "bytes" "fmt" + "github.com/keep-network/keep-core/pkg/bitcoin" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/tbtc" ) +// getGlobalMetricsRecorder returns the global metrics recorder if set. +func getGlobalMetricsRecorder() interface { + IncrementCounter(name string, value float64) +} { + return getMetricsRecorder() +} + // SubmitRedemptionProof prepares redemption proof for the given transaction // and submits it to the on-chain contract. If the number of required // confirmations is `0`, an error is returned. @@ -22,6 +31,7 @@ func SubmitRedemptionProof( btcChain, spvChain, bitcoin.AssembleSpvProof, + getGlobalMetricsRecorder(), ) } @@ -31,8 +41,19 @@ func submitRedemptionProof( btcChain bitcoin.Chain, spvChain Chain, spvProofAssembler spvProofAssembler, + metricsRecorder interface { + IncrementCounter(name string, value float64) + }, ) error { + // Record proof submission attempt + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionProofSubmissionsTotal, 1) + } + if requiredConfirmations == 0 { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionProofSubmissionsFailedTotal, 1) + } return fmt.Errorf( "provided required confirmations count must be greater than 0", ) @@ -44,6 +65,9 @@ func submitRedemptionProof( btcChain, ) if err != nil { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionProofSubmissionsFailedTotal, 1) + } return fmt.Errorf( "failed to assemble transaction spv proof: [%v]", err, @@ -55,6 +79,9 @@ func submitRedemptionProof( transaction, ) if err != nil { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionProofSubmissionsFailedTotal, 1) + } return fmt.Errorf( "error while parsing transaction inputs: [%v]", err, @@ -67,12 +94,20 @@ func submitRedemptionProof( mainUTXO, walletPublicKeyHash, ); err != nil { + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionProofSubmissionsFailedTotal, 1) + } return fmt.Errorf( "failed to submit redemption proof with reimbursement: [%v]", err, ) } + // Record successful proof submission + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionProofSubmissionsSuccessTotal, 1) + } + return nil } diff --git a/pkg/maintainer/spv/redemptions_test.go b/pkg/maintainer/spv/redemptions_test.go index 70a1594c00..4f10a3a208 100644 --- a/pkg/maintainer/spv/redemptions_test.go +++ b/pkg/maintainer/spv/redemptions_test.go @@ -78,6 +78,7 @@ func TestSubmitRedemptionProof(t *testing.T) { btcChain, spvChain, mockSpvProofAssembler, + getGlobalMetricsRecorder(), ) if err != nil { t.Fatal(err) diff --git a/pkg/maintainer/spv/spv.go b/pkg/maintainer/spv/spv.go index ef1bef69e2..990d8b0ec0 100644 --- a/pkg/maintainer/spv/spv.go +++ b/pkg/maintainer/spv/spv.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "fmt" "math/big" + "sync" "time" "github.com/keep-network/keep-core/pkg/tbtc" @@ -38,6 +39,34 @@ func Initialize( go spvMaintainer.startControlLoop(ctx) } +// globalMetricsRecorder is a package-level variable to access metrics recorder +// from proof submission functions. +var ( + globalMetricsRecorderMu sync.RWMutex + globalMetricsRecorder interface { + IncrementCounter(name string, value float64) + } +) + +// SetMetricsRecorder sets the metrics recorder for the SPV maintainer. +// This allows recording metrics for proof submissions. +func SetMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) +}) { + globalMetricsRecorderMu.Lock() + defer globalMetricsRecorderMu.Unlock() + globalMetricsRecorder = recorder +} + +// getMetricsRecorder safely retrieves the metrics recorder. +func getMetricsRecorder() interface { + IncrementCounter(name string, value float64) +} { + globalMetricsRecorderMu.RLock() + defer globalMetricsRecorderMu.RUnlock() + return globalMetricsRecorder +} + // proofTypes holds the information about proof types supported by the // SPV maintainer. var proofTypes = map[tbtc.WalletActionType]struct { diff --git a/pkg/net/libp2p/authenticated_connection.go b/pkg/net/libp2p/authenticated_connection.go index acbacf37be..fa13a4dabd 100644 --- a/pkg/net/libp2p/authenticated_connection.go +++ b/pkg/net/libp2p/authenticated_connection.go @@ -6,11 +6,13 @@ import ( "fmt" "io" "net" + "time" libp2pcrypto "github.com/libp2p/go-libp2p/core/crypto" libp2pnetwork "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" + "github.com/keep-network/keep-core/pkg/clientinfo" keepNet "github.com/keep-network/keep-core/pkg/net" "github.com/keep-network/keep-core/pkg/net/gen/pb" "github.com/keep-network/keep-core/pkg/net/security/handshake" @@ -89,7 +91,15 @@ func newAuthenticatedInboundConnection( privateKey libp2pcrypto.PrivKey, firewall keepNet.Firewall, protocol string, + metricsRecorder MetricsRecorder, ) (*authenticatedConnection, error) { + startTime := time.Now() + + // Track inbound join request attempt + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricNetworkJoinRequestsTotal, 1) + } + ac := &authenticatedConnection{ Conn: unauthenticatedConn, connState: connState, @@ -102,6 +112,11 @@ func newAuthenticatedInboundConnection( ac.initializePipe() if err := ac.runHandshakeAsResponder(); err != nil { + // Track failed join request (handshake failure) + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricNetworkJoinRequestsFailedTotal, 1) + } + // close the conn before returning (if it hasn't already) // otherwise we leak. if closeErr := ac.Close(); closeErr != nil { @@ -112,6 +127,12 @@ func newAuthenticatedInboundConnection( } if err := ac.checkFirewallRules(); err != nil { + // Track firewall rejection + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricFirewallRejectionsTotal, 1) + metricsRecorder.IncrementCounter(clientinfo.MetricNetworkJoinRequestsFailedTotal, 1) + } + if closeErr := ac.Close(); closeErr != nil { logger.Debugf("could not close the connection: [%v]", closeErr) } @@ -119,6 +140,12 @@ func newAuthenticatedInboundConnection( return nil, fmt.Errorf("connection handshake failed: [%v]", err) } + // Track successful join request + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricNetworkJoinRequestsSuccessTotal, 1) + metricsRecorder.RecordDuration(clientinfo.MetricNetworkHandshakeDurationSeconds, time.Since(startTime)) + } + return ac, nil } @@ -135,7 +162,10 @@ func newAuthenticatedOutboundConnection( remotePeerID peer.ID, firewall keepNet.Firewall, protocol string, + metricsRecorder MetricsRecorder, ) (*authenticatedConnection, error) { + startTime := time.Now() + remotePublicKey, err := remotePeerID.ExtractPublicKey() if err != nil { return nil, fmt.Errorf( @@ -173,6 +203,11 @@ func newAuthenticatedOutboundConnection( return nil, fmt.Errorf("connection handshake failed: [%v]", err) } + // Record handshake duration for outbound connections + if metricsRecorder != nil { + metricsRecorder.RecordDuration(clientinfo.MetricNetworkHandshakeDurationSeconds, time.Since(startTime)) + } + return ac, nil } diff --git a/pkg/net/libp2p/authenticated_connection_test.go b/pkg/net/libp2p/authenticated_connection_test.go index 7603be1664..f3008a150d 100644 --- a/pkg/net/libp2p/authenticated_connection_test.go +++ b/pkg/net/libp2p/authenticated_connection_test.go @@ -64,6 +64,7 @@ func TestPinnedAndMessageKeyMismatch(t *testing.T) { responder.networkPrivateKey, firewall, authProtocolID, + nil, // metricsRecorder ) if err == nil { t.Fatal("should not have successfully completed handshake") @@ -259,6 +260,7 @@ func connectInitiatorAndResponder( responderPeerID, firewall, authProtocolID, + nil, // metricsRecorder ) done <- struct{}{} }(initiatorConn, initiator.peerID, initiator.networkPrivateKey, responder.peerID) @@ -270,6 +272,7 @@ func connectInitiatorAndResponder( responder.networkPrivateKey, firewall, authProtocolID, + nil, // metricsRecorder ) <-done // handshake is done diff --git a/pkg/net/libp2p/channel.go b/pkg/net/libp2p/channel.go index c91e338b01..634cd20e98 100644 --- a/pkg/net/libp2p/channel.go +++ b/pkg/net/libp2p/channel.go @@ -6,9 +6,11 @@ import ( "runtime" "sync" "sync/atomic" + "time" "google.golang.org/protobuf/proto" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/operator" pubsub "github.com/libp2p/go-libp2p-pubsub" @@ -54,6 +56,8 @@ type channel struct { name string + ctx context.Context + clientIdentity *identity peerStore peerstore.Peerstore @@ -73,6 +77,16 @@ type channel struct { unmarshalersByType map[string]func() net.TaggedUnmarshaler retransmissionTicker *retransmission.Ticker + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } + + // monitorQueueSizesOnce ensures the monitoring goroutine is started only once + monitorQueueSizesOnce sync.Once } type messageHandler struct { @@ -239,7 +253,11 @@ func (c *channel) publish(message *pb.BroadcastNetworkMessage) error { c.publisherMutex.Lock() defer c.publisherMutex.Unlock() - return c.publisher.Publish(context.TODO(), messageBytes) + publishErr := c.publisher.Publish(context.TODO(), messageBytes) + if publishErr == nil && c.metricsRecorder != nil { + c.metricsRecorder.IncrementCounter("message_broadcast_total", 1) + } + return publishErr } func (c *channel) handleMessages(ctx context.Context) { @@ -282,6 +300,9 @@ func (c *channel) incomingMessageWorker(ctx context.Context) { case <-ctx.Done(): return case msg := <-c.incomingMessageQueue: + if c.metricsRecorder != nil { + c.metricsRecorder.IncrementCounter("message_received_total", 1) + } if err := c.processPubsubMessage(msg); err != nil { logger.Error(err) } @@ -424,3 +445,53 @@ func extractPublicKey(peer peer.ID) (*operator.PublicKey, error) { return networkPublicKeyToOperatorPublicKey(publicKey) } + +// setMetricsRecorder sets the metrics recorder for the channel and starts +// periodic queue size monitoring. +func (c *channel) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + c.metricsRecorder = recorder + // Start periodic queue size monitoring (only once) + if recorder != nil { + c.monitorQueueSizesOnce.Do(func() { + go c.monitorQueueSizes(c.ctx, recorder) + }) + } +} + +// monitorQueueSizes periodically records queue sizes as metrics. +// It stops when the provided context is cancelled (e.g., when the channel is closed). +func (c *channel) monitorQueueSizes(ctx context.Context, recorder interface { + SetGauge(name string, value float64) +}) { + ticker := time.NewTicker(1 * time.Minute) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + // Record incoming message queue size + queueSize := float64(len(c.incomingMessageQueue)) + recorder.SetGauge(clientinfo.MetricIncomingMessageQueueSize, queueSize) + + // Record message handler queue sizes + // Copy data while holding lock, then record metrics after releasing + c.messageHandlersMutex.Lock() + queueSizes := make([]float64, len(c.messageHandlers)) + for i, handler := range c.messageHandlers { + queueSizes[i] = float64(len(handler.channel)) + } + c.messageHandlersMutex.Unlock() + + // Record metrics outside the lock to prevent potential deadlock + for i, size := range queueSizes { + recorder.SetGauge(fmt.Sprintf("%s_%d", clientinfo.MetricMessageHandlerQueueSize, i), size) + } + case <-ctx.Done(): + return + } + } +} diff --git a/pkg/net/libp2p/channel_manager.go b/pkg/net/libp2p/channel_manager.go index aa9f888a12..bcb10f7ffb 100644 --- a/pkg/net/libp2p/channel_manager.go +++ b/pkg/net/libp2p/channel_manager.go @@ -48,6 +48,13 @@ type channelManager struct { topicsMutex sync.Mutex topics map[string]*pubsub.Topic + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } func newChannelManager( @@ -108,11 +115,31 @@ func (cm *channelManager) getChannel(name string) (*channel, error) { } cm.channels[name] = channel + // Wire metrics recorder into channel if available + if cm.metricsRecorder != nil { + channel.setMetricsRecorder(cm.metricsRecorder) + } } return channel, nil } +// setMetricsRecorder sets the metrics recorder for the channel manager +// and wires it into existing channels. +func (cm *channelManager) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + // Wire metrics into existing channels + cm.channelsMutex.Lock() + defer cm.channelsMutex.Unlock() + cm.metricsRecorder = recorder + for _, channel := range cm.channels { + channel.setMetricsRecorder(recorder) + } +} + func (cm *channelManager) newChannel(name string) (*channel, error) { topic, err := cm.getTopic(name) if err != nil { @@ -134,6 +161,7 @@ func (cm *channelManager) newChannel(name string) (*channel, error) { channel := &channel{ name: name, + ctx: cm.ctx, clientIdentity: cm.identity, peerStore: cm.peerStore, validator: cm.pubsub, diff --git a/pkg/net/libp2p/libp2p.go b/pkg/net/libp2p/libp2p.go index 5f56306222..4d429a8f4f 100644 --- a/pkg/net/libp2p/libp2p.go +++ b/pkg/net/libp2p/libp2p.go @@ -4,8 +4,10 @@ import ( "context" "fmt" "sync" + "sync/atomic" "time" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/operator" "github.com/ipfs/go-log" @@ -16,6 +18,7 @@ import ( dstore "github.com/ipfs/go-datastore" dssync "github.com/ipfs/go-datastore/sync" + //lint:ignore SA1019 package deprecated, but we rely on its interface addrutil "github.com/libp2p/go-addr-util" "github.com/libp2p/go-libp2p" @@ -92,6 +95,11 @@ type provider struct { disseminationTime int connectionManager *connectionManager + + // metricsRecorder is optional and used for recording performance metrics. + // It uses a pointer to atomic.Value for thread-safe access and sharing + // with the transport layer for join request metrics. + metricsRecorder *atomic.Value } func (p *provider) BroadcastChannelFor(name string) (net.BroadcastChannel, error) { @@ -314,19 +322,22 @@ func Connect( return nil, err } + // Initialize the metrics recorder atomic.Value before creating the host. + // This allows the transport to reference it and receive metrics recorder updates later. + var metricsRecorderRef atomic.Value + host, err := discoverAndListen( ctx, identity, config.Port, config.AnnouncedAddresses, firewall, + &metricsRecorderRef, ) if err != nil { return nil, err } - host.Network().Notify(buildNotifiee(host)) - broadcastChannelManager, err := newChannelManager(ctx, identity, host, ticker) if err != nil { return nil, err @@ -352,6 +363,7 @@ func Connect( host: rhost.Wrap(host, router), routing: router, disseminationTime: config.DisseminationTime, + metricsRecorder: &metricsRecorderRef, } if len(config.Peers) == 0 { @@ -364,6 +376,10 @@ func Connect( provider.connectionManager = newConnectionManager(ctx, provider.host) + // Register notifiee - it will reference provider.metricsRecorder which can be updated later + notifiee := buildNotifiee(provider.host, provider) + provider.host.Network().Notify(notifiee) + // Instantiates and starts the connection management background process. watchtower.NewGuard( ctx, @@ -376,12 +392,26 @@ func Connect( return provider, nil } +// SetMetricsRecorder sets the metrics recorder for the provider and wires it +// into network components. +func (p *provider) SetMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + p.metricsRecorder.Store(recorder) + if p.broadcastChannelManager != nil { + p.broadcastChannelManager.setMetricsRecorder(recorder) + } +} + func discoverAndListen( ctx context.Context, identity *identity, port int, announcedAddresses []string, firewall net.Firewall, + metricsRecorderRef *atomic.Value, ) (host.Host, error) { var err error @@ -419,6 +449,7 @@ func discoverAndListen( privateKey, muxers, firewall, + metricsRecorderRef, ) if err != nil { return nil, fmt.Errorf( @@ -533,9 +564,15 @@ func extractMultiAddrFromPeers(peers []string) ([]peer.AddrInfo, error) { return peerInfos, nil } -func buildNotifiee(libp2pHost host.Host) libp2pnet.Notifiee { +func buildNotifiee(libp2pHost host.Host, p *provider) libp2pnet.Notifiee { notifyBundle := &libp2pnet.NotifyBundle{} + // Track peers we've already pinged to avoid duplicate ping tests. + // libp2p may establish multiple connections to the same peer (different + // transports/addresses), and we only want to ping once per unique peer. + var pingedPeersMu sync.Mutex + pingedPeers := make(map[peer.ID]struct{}) + notifyBundle.ConnectedF = func(_ libp2pnet.Network, connection libp2pnet.Conn) { peerID := connection.RemotePeer() @@ -546,16 +583,64 @@ func buildNotifiee(libp2pHost host.Host) libp2pnet.Notifiee { logger.Infof("established connection to [%v]", peerMultiaddress) - go executePingTest(libp2pHost, peerID, peerMultiaddress) + var recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } + if p.metricsRecorder != nil { + if metricsRecorderValue := p.metricsRecorder.Load(); metricsRecorderValue != nil { + recorder = metricsRecorderValue.(interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + }) + recorder.IncrementCounter(clientinfo.MetricPeerConnectionsTotal, 1) + } + } + + // Only ping each unique peer once to avoid false failures from + // connection multiplexing (multiple connections to same peer). + pingedPeersMu.Lock() + if _, alreadyPinged := pingedPeers[peerID]; alreadyPinged { + pingedPeersMu.Unlock() + logger.Debugf("skipping duplicate ping test for [%v]", peerID) + return + } + pingedPeers[peerID] = struct{}{} + pingedPeersMu.Unlock() + + go executePingTest(libp2pHost, peerID, peerMultiaddress, recorder) } - notifyBundle.DisconnectedF = func(_ libp2pnet.Network, connection libp2pnet.Conn) { + notifyBundle.DisconnectedF = func(network libp2pnet.Network, connection libp2pnet.Conn) { + peerID := connection.RemotePeer() + logger.Infof( "disconnected from [%v]", multiaddressWithIdentity( connection.RemoteMultiaddr(), - connection.RemotePeer(), + peerID, ), ) + + if p.metricsRecorder != nil { + if metricsRecorderValue := p.metricsRecorder.Load(); metricsRecorderValue != nil { + recorder := metricsRecorderValue.(interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + }) + recorder.IncrementCounter(clientinfo.MetricPeerDisconnectionsTotal, 1) + } + } + + // Remove peer from pinged set only if no more connections remain. + // This allows re-pinging if the peer reconnects later. + if len(network.ConnsToPeer(peerID)) == 0 { + pingedPeersMu.Lock() + delete(pingedPeers, peerID) + pingedPeersMu.Unlock() + } } return notifyBundle @@ -565,15 +650,25 @@ func executePingTest( libp2pHost host.Host, peerID peer.ID, peerMultiaddress string, + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + }, ) { logger.Infof("starting ping test for [%v]", peerMultiaddress) + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricPingTestsTotal, 1) + } + ctx, cancelCtx := context.WithTimeout( context.Background(), pingTestTimeout, ) defer cancelCtx() + startTime := time.Now() resultChan := ping.Ping(ctx, libp2pHost, peerID) select { @@ -584,20 +679,34 @@ func executePingTest( peerMultiaddress, result.Error, ) + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricPingTestFailedTotal, 1) + } } else if result.Error == nil && result.RTT == 0 { logger.Warnf( "peer test for [%v] failed without clear reason", peerMultiaddress, ) + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricPingTestFailedTotal, 1) + } } else { logger.Infof( "ping test for [%v] completed with success (RTT [%v])", peerMultiaddress, result.RTT, ) + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricPingTestSuccessTotal, 1) + // Only record duration on successful ping tests + metricsRecorder.RecordDuration(clientinfo.MetricPingTestDurationSeconds, time.Since(startTime)) + } } case <-ctx.Done(): logger.Warnf("ping test for [%v] timed out", peerMultiaddress) + if metricsRecorder != nil { + metricsRecorder.IncrementCounter(clientinfo.MetricPingTestFailedTotal, 1) + } } } diff --git a/pkg/net/libp2p/transport.go b/pkg/net/libp2p/transport.go index 8f2a065729..7ad91c6157 100644 --- a/pkg/net/libp2p/transport.go +++ b/pkg/net/libp2p/transport.go @@ -3,6 +3,8 @@ package libp2p import ( "context" "net" + "sync/atomic" + "time" libp2ptls "github.com/libp2p/go-libp2p/p2p/security/tls" @@ -26,6 +28,12 @@ const ( var _ sec.SecureTransport = (*transport)(nil) var _ sec.SecureConn = (*authenticatedConnection)(nil) +// MetricsRecorder is an interface for recording network metrics. +type MetricsRecorder interface { + IncrementCounter(name string, value float64) + RecordDuration(name string, duration time.Duration) +} + // transport constructs an encrypted and authenticated connection for a peer. type transport struct { protocolID protocol.ID @@ -37,6 +45,10 @@ type transport struct { encryptionLayer sec.SecureTransport firewall keepNet.Firewall + + // metricsRecorderRef is a pointer to an atomic.Value that holds the metrics recorder. + // This allows late binding of the metrics recorder after the transport is created. + metricsRecorderRef *atomic.Value } func newEncryptedAuthenticatedTransport( @@ -45,6 +57,7 @@ func newEncryptedAuthenticatedTransport( privateKey libp2pcrypto.PrivKey, muxers []upgrader.StreamMuxer, firewall keepNet.Firewall, + metricsRecorderRef *atomic.Value, ) (*transport, error) { id, err := peer.IDFromPrivateKey(privateKey) if err != nil { @@ -57,15 +70,30 @@ func newEncryptedAuthenticatedTransport( } return &transport{ - protocolID: protocolID, - authProtocolID: authProtocolID, - localPeerID: id, - privateKey: privateKey, - encryptionLayer: encryptionLayer, - firewall: firewall, + protocolID: protocolID, + authProtocolID: authProtocolID, + localPeerID: id, + privateKey: privateKey, + encryptionLayer: encryptionLayer, + firewall: firewall, + metricsRecorderRef: metricsRecorderRef, }, nil } +// getMetricsRecorder returns the current metrics recorder from the atomic reference, +// or nil if none is set. +func (t *transport) getMetricsRecorder() MetricsRecorder { + if t.metricsRecorderRef == nil { + return nil + } + if val := t.metricsRecorderRef.Load(); val != nil { + if recorder, ok := val.(MetricsRecorder); ok { + return recorder + } + } + return nil +} + // SecureInbound secures an inbound connection. func (t *transport) SecureInbound( ctx context.Context, @@ -84,6 +112,7 @@ func (t *transport) SecureInbound( t.privateKey, t.firewall, t.authProtocolID, + t.getMetricsRecorder(), ) } @@ -110,6 +139,7 @@ func (t *transport) SecureOutbound( remotePeerID, t.firewall, t.authProtocolID, + t.getMetricsRecorder(), ) } diff --git a/pkg/tbtc/coordination.go b/pkg/tbtc/coordination.go index 6d06ff9634..531d47b34c 100644 --- a/pkg/tbtc/coordination.go +++ b/pkg/tbtc/coordination.go @@ -16,6 +16,7 @@ import ( "github.com/keep-network/keep-core/pkg/bitcoin" "github.com/keep-network/keep-core/pkg/chain" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/generator" "github.com/keep-network/keep-core/pkg/net" "github.com/keep-network/keep-core/pkg/protocol/group" @@ -295,6 +296,13 @@ type coordinationExecutor struct { protocolLatch *generator.ProtocolLatch waitForBlockFn waitForBlockFn + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } // newCoordinationExecutor creates a new coordination executor for the @@ -363,6 +371,11 @@ func (ce *coordinationExecutor) coordinate( execLogger.Info("starting coordination") + startTime := time.Now() + + // Record duration metric once at the end using defer + var coordinationFailed bool + seed, err := ce.getSeed(window.coordinationBlock) if err != nil { return nil, fmt.Errorf("failed to compute coordination seed: [%v]", err) @@ -411,6 +424,10 @@ func (ce *coordinationExecutor) coordinate( // no point to keep the context active as retransmissions do not // occur anyway. cancelCtx() + coordinationFailed = true + if ce.metricsRecorder != nil { + ce.metricsRecorder.IncrementCounter(clientinfo.MetricCoordinationFailedTotal, 1) + } return nil, fmt.Errorf( "failed to execute leader's routine: [%v]", err, @@ -431,7 +448,21 @@ func (ce *coordinationExecutor) coordinate( append(actionsChecklist, ActionNoop), ) if err != nil { - return nil, fmt.Errorf( + coordinationFailed = true + // Record as leader timeout observation, not as a failure of this node. + // The actual failure is on the leader's side. + if ce.metricsRecorder != nil { + ce.metricsRecorder.IncrementCounter(clientinfo.MetricCoordinationLeaderTimeoutTotal, 1) + } + // Return a partial result with leader and faults information + partialResult := &coordinationResult{ + wallet: ce.coordinatedWallet, + window: window, + leader: leader, + proposal: nil, // no proposal on failure + faults: faults, + } + return partialResult, fmt.Errorf( "failed to execute follower's routine: [%v]", err, ) @@ -459,9 +490,24 @@ func (ce *coordinationExecutor) coordinate( execLogger.Infof("coordination completed with result: [%s]", result) + // Record successful coordination counter + if ce.metricsRecorder != nil && !coordinationFailed { + ce.metricsRecorder.IncrementCounter(clientinfo.MetricCoordinationProceduresExecutedTotal, 1) + ce.metricsRecorder.RecordDuration(clientinfo.MetricCoordinationDurationSeconds, time.Since(startTime)) + } + return result, nil } +// setMetricsRecorder sets the metrics recorder for the coordination executor. +func (ce *coordinationExecutor) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + ce.metricsRecorder = recorder +} + // getSeed computes the coordination seed for the given coordination window. func (ce *coordinationExecutor) getSeed( coordinationBlock uint64, diff --git a/pkg/tbtc/coordination_test.go b/pkg/tbtc/coordination_test.go index 763964a64b..de9e0b4df8 100644 --- a/pkg/tbtc/coordination_test.go +++ b/pkg/tbtc/coordination_test.go @@ -145,21 +145,49 @@ func TestWatchCoordinationWindows(t *testing.T) { return blocksChan } - receivedWindows := make([]*coordinationWindow, 0) + // Channel to collect windows from callbacks. + // Buffered to handle multiple windows without blocking. + windowsChan := make(chan *coordinationWindow, 10) onWindowFn := func(window *coordinationWindow) { - receivedWindows = append(receivedWindows, window) + windowsChan <- window } ctx, cancelCtx := context.WithTimeout( context.Background(), - 2000*time.Millisecond, + 2500*time.Millisecond, ) defer cancelCtx() go watchCoordinationWindows(ctx, watchBlocksFn, onWindowFn) + // Wait for the context to complete so all blocks are generated. <-ctx.Done() + // Now collect windows with a timeout to ensure we get all expected windows. + // This avoids race conditions where callback goroutines haven't sent yet. + // We use a longer timeout since the watchCoordinationWindows loop may have + // just spawned the callback goroutine for the last window when the context expired. + receivedWindows := make([]*coordinationWindow, 0) + expectedWindows := 2 + collectTimeout := 2 * time.Second + deadline := time.Now().Add(collectTimeout) + + for len(receivedWindows) < expectedWindows { + select { + case window := <-windowsChan: + receivedWindows = append(receivedWindows, window) + case <-time.After(10 * time.Millisecond): + // Check if we've exceeded the deadline + if time.Now().After(deadline) { + t.Fatalf( + "timeout waiting for windows: got %d, expected %d", + len(receivedWindows), + expectedWindows, + ) + } + } + } + testutils.AssertIntsEqual(t, "received windows", 2, len(receivedWindows)) testutils.AssertIntsEqual( t, diff --git a/pkg/tbtc/coordination_window_metrics.go b/pkg/tbtc/coordination_window_metrics.go new file mode 100644 index 0000000000..2b57fc4c52 --- /dev/null +++ b/pkg/tbtc/coordination_window_metrics.go @@ -0,0 +1,434 @@ +package tbtc + +import ( + "fmt" + "sync" + "time" + + "github.com/keep-network/keep-core/pkg/chain" + "github.com/keep-network/keep-core/pkg/clientinfo" +) + +// coordinationWindowMetrics tracks detailed metrics for individual coordination windows. +type coordinationWindowMetrics struct { + mu sync.RWMutex + + // windows stores metrics for each coordination window by window index + windows map[uint64]*windowMetrics + + // performanceMetrics is used to record aggregate metrics + performanceMetrics clientinfo.PerformanceMetricsRecorder + + // maxWindowsToTrack limits the number of windows to keep in memory + // to prevent unbounded memory growth + maxWindowsToTrack uint64 +} + +// windowMetrics contains all metrics for a single coordination window. +type windowMetrics struct { + // Window identification + WindowIndex uint64 `json:"window_index"` + CoordinationBlock uint64 `json:"coordination_block"` + + // Window timing + StartTime time.Time `json:"start_time"` + EndTime time.Time `json:"end_time"` + Duration time.Duration `json:"duration_ns"` + ActivePhaseEndBlock uint64 `json:"active_phase_end_block"` + EndBlock uint64 `json:"end_block"` + + // Coordination statistics + WalletsCoordinated uint64 `json:"wallets_coordinated"` + WalletsSuccessful uint64 `json:"wallets_successful"` + WalletsFailed uint64 `json:"wallets_failed"` + TotalProceduresStarted uint64 `json:"total_procedures_started"` + TotalProceduresCompleted uint64 `json:"total_procedures_completed"` + + // Leader information + Leaders map[string]uint64 `json:"leaders"` // leader address -> count of wallets they led + + // Action type statistics + ActionTypes map[string]uint64 `json:"action_types"` // action type -> count + + // Fault statistics + TotalFaults uint64 `json:"total_faults"` + FaultsByType map[string]uint64 `json:"faults_by_type"` // fault type -> count + FaultsByCulprit map[string]uint64 `json:"faults_by_culprit"` // culprit address -> count + + // Per-wallet coordination details + WalletCoordinationDetails []walletCoordinationDetail `json:"wallet_coordination_details"` +} + +// walletCoordinationDetail contains metrics for a single wallet's coordination +// in a window. +type walletCoordinationDetail struct { + WalletPublicKeyHash string `json:"wallet_public_key_hash"` + Leader string `json:"leader"` + ActionType string `json:"action_type"` + Success bool `json:"success"` + Duration time.Duration `json:"duration_ns"` + ErrorMessage string `json:"error_message,omitempty"` // error message if failed + Faults []faultDetail `json:"faults"` // detailed fault information +} + +// faultDetail contains detailed information about a coordination fault. +type faultDetail struct { + Type string `json:"type"` // fault type (e.g., LeaderIdleness, LeaderMistake) + Culprit string `json:"culprit"` // address of the operator responsible + Message string `json:"message"` // human-readable description +} + +// faultMessage generates a human-readable message for a coordination fault. +func faultMessage(faultType CoordinationFaultType, culprit string) string { + switch faultType { + case FaultLeaderIdleness: + return fmt.Sprintf("Leader %s was idle and missed their turn to propose a wallet action", culprit) + case FaultLeaderMistake: + return fmt.Sprintf("Leader %s proposed an invalid action", culprit) + case FaultLeaderImpersonation: + return fmt.Sprintf("Operator %s impersonated the leader", culprit) + case FaultUnknown: + return fmt.Sprintf("Unknown fault from operator %s", culprit) + default: + return fmt.Sprintf("Fault type %s from operator %s", faultType.String(), culprit) + } +} + +// newCoordinationWindowMetrics creates a new coordination window metrics tracker. +func newCoordinationWindowMetrics( + performanceMetrics clientinfo.PerformanceMetricsRecorder, + maxWindowsToTrack uint64, +) *coordinationWindowMetrics { + return &coordinationWindowMetrics{ + windows: make(map[uint64]*windowMetrics), + performanceMetrics: performanceMetrics, + maxWindowsToTrack: maxWindowsToTrack, + } +} + +// recordWindowStart records the start of a coordination window. +func (cwm *coordinationWindowMetrics) recordWindowStart(window *coordinationWindow) { + cwm.mu.Lock() + defer cwm.mu.Unlock() + + cwm.initializeWindowIfNeeded(window) +} + +// initializeWindowIfNeeded initializes window metrics if they don't exist. +// This function assumes the caller already holds cwm.mu.Lock(). +func (cwm *coordinationWindowMetrics) initializeWindowIfNeeded(window *coordinationWindow) { + windowIndex := window.index() + if windowIndex == 0 { + // Invalid window, skip + return + } + + // Initialize window metrics if not exists + if _, exists := cwm.windows[windowIndex]; !exists { + cwm.windows[windowIndex] = &windowMetrics{ + WindowIndex: windowIndex, + CoordinationBlock: window.coordinationBlock, + StartTime: time.Now(), + ActivePhaseEndBlock: window.activePhaseEndBlock(), + EndBlock: window.endBlock(), + Leaders: make(map[string]uint64), + ActionTypes: make(map[string]uint64), + FaultsByType: make(map[string]uint64), + FaultsByCulprit: make(map[string]uint64), + WalletCoordinationDetails: make([]walletCoordinationDetail, 0), + } + } + + // Clean up old windows if we exceed the limit + cwm.cleanupOldWindows() +} + +// recordWindowEnd records the end of a coordination window. +func (cwm *coordinationWindowMetrics) recordWindowEnd(window *coordinationWindow) { + cwm.mu.Lock() + defer cwm.mu.Unlock() + + windowIndex := window.index() + if windowIndex == 0 { + return + } + + wm, exists := cwm.windows[windowIndex] + if !exists { + return + } + + // This guard prevents double-recording when recordWindowEnd is called + // both during normal operation (when a new window is detected) and during + // shutdown cleanup (to ensure the last active window is properly closed). + // Without this check, the shutdown cleanup goroutine could overwrite the + // EndTime that was already set by the normal window transition flow. + if !wm.EndTime.IsZero() { + return + } + + wm.EndTime = time.Now() + wm.Duration = wm.EndTime.Sub(wm.StartTime) + + // Record aggregate metrics + if cwm.performanceMetrics != nil { + // Record window duration + cwm.performanceMetrics.RecordDuration( + clientinfo.MetricCoordinationWindowDurationSeconds, + wm.Duration, + ) + } +} + +// recordWalletCoordination records metrics for a single wallet's coordination +// in a window. +func (cwm *coordinationWindowMetrics) recordWalletCoordination( + window *coordinationWindow, + walletPublicKeyHash [20]byte, + leader chain.Address, + actionType string, + success bool, + duration time.Duration, + faults []*coordinationFault, + coordinationErr error, +) { + cwm.mu.Lock() + defer cwm.mu.Unlock() + + windowIndex := window.index() + if windowIndex == 0 { + return + } + + wm, exists := cwm.windows[windowIndex] + if !exists { + // Window not initialized, initialize it now + // Note: we already hold the lock, so use the lock-free helper + cwm.initializeWindowIfNeeded(window) + wm = cwm.windows[windowIndex] + } + + // Update window-level statistics + wm.WalletsCoordinated++ + wm.TotalProceduresStarted++ + if success { + wm.WalletsSuccessful++ + wm.TotalProceduresCompleted++ + } else { + wm.WalletsFailed++ + } + + // Track leader + leaderStr := leader.String() + wm.Leaders[leaderStr]++ + + // Track action type + if actionType != "" { + wm.ActionTypes[actionType]++ + } + + // Track faults + faultDetails := make([]faultDetail, 0, len(faults)) + for _, fault := range faults { + faultTypeStr := fault.faultType.String() + culpritStr := fault.culprit.String() + + wm.FaultsByType[faultTypeStr]++ + wm.TotalFaults++ + wm.FaultsByCulprit[culpritStr]++ + + faultDetails = append(faultDetails, faultDetail{ + Type: faultTypeStr, + Culprit: culpritStr, + Message: faultMessage(fault.faultType, culpritStr), + }) + } + + // Record per-wallet detail + detail := walletCoordinationDetail{ + WalletPublicKeyHash: fmt.Sprintf("0x%x", walletPublicKeyHash), + Leader: leaderStr, + ActionType: actionType, + Success: success, + Duration: duration, + Faults: faultDetails, + } + if coordinationErr != nil { + detail.ErrorMessage = coordinationErr.Error() + } + wm.WalletCoordinationDetails = append(wm.WalletCoordinationDetails, detail) +} + +// GetWindowMetrics returns metrics for a specific window. +func (cwm *coordinationWindowMetrics) GetWindowMetrics(windowIndex uint64) (*windowMetrics, bool) { + cwm.mu.RLock() + defer cwm.mu.RUnlock() + + wm, exists := cwm.windows[windowIndex] + if !exists { + return nil, false + } + + // Return a deep copy to avoid race conditions + return wm.deepCopy(), true +} + +// GetRecentWindows returns metrics for the most recent N windows. +func (cwm *coordinationWindowMetrics) GetRecentWindows(limit int) []*windowMetrics { + cwm.mu.RLock() + defer cwm.mu.RUnlock() + + // Collect all window indices and sort them + indices := make([]uint64, 0, len(cwm.windows)) + for idx := range cwm.windows { + indices = append(indices, idx) + } + + // Sort in descending order (most recent first) + for i := 0; i < len(indices)-1; i++ { + for j := i + 1; j < len(indices); j++ { + if indices[i] < indices[j] { + indices[i], indices[j] = indices[j], indices[i] + } + } + } + + // Limit results + if limit > 0 && limit < len(indices) { + indices = indices[:limit] + } + + // Return deep copies + result := make([]*windowMetrics, 0, len(indices)) + for _, idx := range indices { + wm := cwm.windows[idx] + result = append(result, wm.deepCopy()) + } + + return result +} + +// cleanupOldWindows removes old windows to prevent unbounded memory growth. +func (cwm *coordinationWindowMetrics) cleanupOldWindows() { + if uint64(len(cwm.windows)) <= cwm.maxWindowsToTrack { + return + } + + // Find the oldest window indices + indices := make([]uint64, 0, len(cwm.windows)) + for idx := range cwm.windows { + indices = append(indices, idx) + } + + // Sort in ascending order (oldest first) + for i := 0; i < len(indices)-1; i++ { + for j := i + 1; j < len(indices); j++ { + if indices[i] > indices[j] { + indices[i], indices[j] = indices[j], indices[i] + } + } + } + + // Remove oldest windows + windowsToRemove := len(cwm.windows) - int(cwm.maxWindowsToTrack) + for i := 0; i < windowsToRemove; i++ { + delete(cwm.windows, indices[i]) + } +} + +// GetSummary returns a summary of all tracked windows. +func (cwm *coordinationWindowMetrics) GetSummary() WindowMetricsSummary { + cwm.mu.RLock() + defer cwm.mu.RUnlock() + + summary := WindowMetricsSummary{ + TotalWindows: uint64(len(cwm.windows)), + TotalWalletsCoordinated: 0, + TotalWalletsSuccessful: 0, + TotalWalletsFailed: 0, + TotalFaults: 0, + Windows: make([]*windowMetrics, 0, len(cwm.windows)), + } + + for _, wm := range cwm.windows { + summary.TotalWalletsCoordinated += wm.WalletsCoordinated + summary.TotalWalletsSuccessful += wm.WalletsSuccessful + summary.TotalWalletsFailed += wm.WalletsFailed + summary.TotalFaults += wm.TotalFaults + + summary.Windows = append(summary.Windows, wm.deepCopy()) + } + + return summary +} + +// WindowMetricsSummary provides a summary of coordination window metrics. +type WindowMetricsSummary struct { + TotalWindows uint64 `json:"total_windows"` + TotalWalletsCoordinated uint64 `json:"total_wallets_coordinated"` + TotalWalletsSuccessful uint64 `json:"total_wallets_successful"` + TotalWalletsFailed uint64 `json:"total_wallets_failed"` + TotalFaults uint64 `json:"total_faults"` + Windows []*windowMetrics `json:"windows"` +} + +// deepCopy creates a deep copy of windowMetrics, properly copying all maps and slices. +func (wm *windowMetrics) deepCopy() *windowMetrics { + if wm == nil { + return nil + } + + wmCopy := &windowMetrics{ + WindowIndex: wm.WindowIndex, + CoordinationBlock: wm.CoordinationBlock, + StartTime: wm.StartTime, + EndTime: wm.EndTime, + Duration: wm.Duration, + ActivePhaseEndBlock: wm.ActivePhaseEndBlock, + EndBlock: wm.EndBlock, + WalletsCoordinated: wm.WalletsCoordinated, + WalletsSuccessful: wm.WalletsSuccessful, + WalletsFailed: wm.WalletsFailed, + TotalProceduresStarted: wm.TotalProceduresStarted, + TotalProceduresCompleted: wm.TotalProceduresCompleted, + TotalFaults: wm.TotalFaults, + Leaders: make(map[string]uint64, len(wm.Leaders)), + ActionTypes: make(map[string]uint64, len(wm.ActionTypes)), + FaultsByType: make(map[string]uint64, len(wm.FaultsByType)), + FaultsByCulprit: make(map[string]uint64, len(wm.FaultsByCulprit)), + WalletCoordinationDetails: make([]walletCoordinationDetail, len(wm.WalletCoordinationDetails)), + } + + // Deep copy maps + for k, v := range wm.Leaders { + wmCopy.Leaders[k] = v + } + for k, v := range wm.ActionTypes { + wmCopy.ActionTypes[k] = v + } + for k, v := range wm.FaultsByType { + wmCopy.FaultsByType[k] = v + } + for k, v := range wm.FaultsByCulprit { + wmCopy.FaultsByCulprit[k] = v + } + + // Deep copy slice + copy(wmCopy.WalletCoordinationDetails, wm.WalletCoordinationDetails) + + return wmCopy +} + +// String returns a string representation of window metrics for logging. +func (wm *windowMetrics) String() string { + return fmt.Sprintf( + "window[%d] block[%d] wallets[%d/%d/%d] faults[%d] actions[%v]", + wm.WindowIndex, + wm.CoordinationBlock, + wm.WalletsSuccessful, + wm.WalletsFailed, + wm.WalletsCoordinated, + wm.TotalFaults, + wm.ActionTypes, + ) +} diff --git a/pkg/tbtc/deposit_sweep.go b/pkg/tbtc/deposit_sweep.go index ca29d63b2e..824ce29d28 100644 --- a/pkg/tbtc/deposit_sweep.go +++ b/pkg/tbtc/deposit_sweep.go @@ -87,6 +87,12 @@ type depositSweepAction struct { signingTimeoutSafetyMarginBlocks uint64 broadcastTimeout time.Duration broadcastCheckDelay time.Duration + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } func newDepositSweepAction( @@ -124,6 +130,13 @@ func newDepositSweepAction( } func (dsa *depositSweepAction) execute() error { + executionStartTime := time.Now() + + // Record deposit sweep execution attempt + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_total", 1) + } + validateProposalLogger := dsa.logger.With( zap.String("step", "validateProposal"), ) @@ -139,6 +152,10 @@ func (dsa *depositSweepAction) execute() error { dsa.btcChain, ) if err != nil { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf("validate proposal step failed: [%v]", err) } @@ -148,6 +165,10 @@ func (dsa *depositSweepAction) execute() error { dsa.btcChain, ) if err != nil { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf( "error while determining wallet's main UTXO: [%v]", err, @@ -161,6 +182,10 @@ func (dsa *depositSweepAction) execute() error { dsa.btcChain, ) if err != nil { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf( "error while ensuring wallet state is synced between "+ "BTC and host chain: [%v]", @@ -176,6 +201,10 @@ func (dsa *depositSweepAction) execute() error { dsa.proposal.SweepTxFee.Int64(), ) if err != nil { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf( "error while assembling deposit sweep transaction: [%v]", err, @@ -188,9 +217,14 @@ func (dsa *depositSweepAction) execute() error { // Just in case. This should never happen. if dsa.proposalExpiryBlock < dsa.signingTimeoutSafetyMarginBlocks { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf("invalid proposal expiry block") } + signingStartTime := time.Now() sweepTx, err := dsa.transactionExecutor.signTransaction( signTxLogger, unsignedSweepTx, @@ -198,9 +232,18 @@ func (dsa *depositSweepAction) execute() error { dsa.proposalExpiryBlock-dsa.signingTimeoutSafetyMarginBlocks, ) if err != nil { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf("sign transaction step failed: [%v]", err) } + // Record deposit sweep transaction signing duration + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.RecordDuration("deposit_sweep_tx_signing_duration_seconds", time.Since(signingStartTime)) + } + broadcastTxLogger := dsa.logger.With( zap.String("step", "broadcastTransaction"), zap.String("sweepTxHash", sweepTx.Hash().Hex(bitcoin.ReversedByteOrder)), @@ -213,9 +256,19 @@ func (dsa *depositSweepAction) execute() error { dsa.broadcastCheckDelay, ) if err != nil { + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_failed_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } return fmt.Errorf("broadcast transaction step failed: [%v]", err) } + // Record successful deposit sweep execution + if dsa.metricsRecorder != nil { + dsa.metricsRecorder.IncrementCounter("deposit_sweep_executions_success_total", 1) + dsa.metricsRecorder.RecordDuration("deposit_sweep_execution_duration_seconds", time.Since(executionStartTime)) + } + return nil } @@ -424,6 +477,14 @@ func (dsa *depositSweepAction) actionType() WalletActionType { return ActionDepositSweep } +// setMetricsRecorder sets the metrics recorder for the deposit sweep action. +func (dsa *depositSweepAction) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + dsa.metricsRecorder = recorder +} + // assembleDepositSweepTransaction constructs an unsigned deposit sweep Bitcoin // transaction. // diff --git a/pkg/tbtc/dkg.go b/pkg/tbtc/dkg.go index 06f417b32b..177e225a18 100644 --- a/pkg/tbtc/dkg.go +++ b/pkg/tbtc/dkg.go @@ -4,15 +4,18 @@ import ( "context" "errors" "fmt" - "golang.org/x/exp/maps" "math/big" "sort" + "time" + + "golang.org/x/exp/maps" "go.uber.org/zap" "github.com/ipfs/go-log/v2" "github.com/keep-network/keep-common/pkg/persistence" "github.com/keep-network/keep-core/pkg/chain" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/generator" "github.com/keep-network/keep-core/pkg/net" "github.com/keep-network/keep-core/pkg/protocol/announcer" @@ -64,6 +67,13 @@ type dkgExecutor struct { waitForBlockFn waitForBlockFn tecdsaExecutor *dkg.Executor + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } // newDkgExecutor creates a new instance of dkgExecutor struct. There should @@ -105,6 +115,15 @@ func newDkgExecutor( } } +// setMetricsRecorder sets the metrics recorder for the DKG executor. +func (de *dkgExecutor) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + de.metricsRecorder = recorder +} + // preParamsCount returns the current count of the ECDSA DKG pre-parameters. func (de *dkgExecutor) preParamsCount() int { return de.tecdsaExecutor.PreParamsCount() @@ -152,6 +171,10 @@ func (de *dkgExecutor) executeDkgIfEligible( membersCount, ) + if de.metricsRecorder != nil { + de.metricsRecorder.IncrementCounter(clientinfo.MetricDKGJoinedTotal, float64(membersCount)) + } + de.generateSigningGroup( dkgLogger, seed, @@ -283,6 +306,7 @@ func (de *dkgExecutor) generateSigningGroup( memberIndex := index go func() { + dkgStartTime := time.Now() de.protocolLatch.Lock() defer de.protocolLatch.Unlock() @@ -388,6 +412,10 @@ func (de *dkgExecutor) generateSigningGroup( }, ) if err != nil { + if de.metricsRecorder != nil { + de.metricsRecorder.IncrementCounter(clientinfo.MetricDKGFailedTotal, 1) + de.metricsRecorder.RecordDuration(clientinfo.MetricDKGDurationSeconds, time.Since(dkgStartTime)) + } if errors.Is(err, context.Canceled) { dkgLogger.Infof( "[member:%v] DKG is no longer awaiting the result; "+ @@ -420,6 +448,11 @@ func (de *dkgExecutor) generateSigningGroup( dkgLogger.Infof("registered %s", signer) + // Record successful DKG completion + if de.metricsRecorder != nil { + de.metricsRecorder.RecordDuration(clientinfo.MetricDKGDurationSeconds, time.Since(dkgStartTime)) + } + err = de.publishDkgResult( ctx, dkgLogger, @@ -557,6 +590,10 @@ func (de *dkgExecutor) executeDkgValidation( dkgLogger.Infof("starting DKG result validation") + if de.metricsRecorder != nil { + de.metricsRecorder.IncrementCounter(clientinfo.MetricDKGValidationTotal, 1) + } + isValid, err := de.chain.IsDKGResultValid(result) if err != nil { dkgLogger.Errorf("cannot validate DKG result: [%v]", err) @@ -586,6 +623,10 @@ func (de *dkgExecutor) executeDkgValidation( return } + if de.metricsRecorder != nil { + de.metricsRecorder.IncrementCounter(clientinfo.MetricDKGChallengesSubmittedTotal, 1) + } + confirmationBlock := submissionBlock + (i * dkgResultChallengeConfirmationBlocks) @@ -732,6 +773,10 @@ func (de *dkgExecutor) executeDkgValidation( return } + if de.metricsRecorder != nil { + de.metricsRecorder.IncrementCounter(clientinfo.MetricDKGApprovalsSubmittedTotal, 1) + } + dkgLogger.Infof("[member:%v] approving DKG result", memberIndex) }(currentMemberIndex) } diff --git a/pkg/tbtc/node.go b/pkg/tbtc/node.go index fc9ba55b10..f8f40b9f7c 100644 --- a/pkg/tbtc/node.go +++ b/pkg/tbtc/node.go @@ -7,10 +7,12 @@ import ( "fmt" "math/big" "sync" + "time" "github.com/keep-network/keep-common/pkg/chain/ethereum" "github.com/keep-network/keep-core/pkg/bitcoin" "github.com/keep-network/keep-core/pkg/chain" + "github.com/keep-network/keep-core/pkg/clientinfo" "go.uber.org/zap" @@ -111,6 +113,16 @@ type node struct { // proposalGenerator is the implementation of the coordination proposal // generator used by the node. proposalGenerator CoordinationProposalGenerator + + // performanceMetrics is optional and used for recording performance metrics + performanceMetrics interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } + + // windowMetricsTracker tracks detailed metrics for individual coordination windows + windowMetricsTracker *coordinationWindowMetrics } func newNode( @@ -184,6 +196,59 @@ func newNode( return node, nil } +// setPerformanceMetrics sets the performance metrics recorder for the node +// and wires it into components that support metrics. +func (n *node) setPerformanceMetrics(metrics interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + n.performanceMetrics = metrics + + // Initialize window metrics tracker with performance metrics + // Keep metrics for the last 100 windows (approximately 25 hours at 900 blocks per window) + if perfMetrics, ok := metrics.(clientinfo.PerformanceMetricsRecorder); ok { + n.windowMetricsTracker = newCoordinationWindowMetrics(perfMetrics, 100) + } + + if n.walletDispatcher != nil { + n.walletDispatcher.setMetricsRecorder(metrics) + } + if n.dkgExecutor != nil { + n.dkgExecutor.setMetricsRecorder(metrics) + } + + // Wire redemption metrics to proposal generator if it supports it + // This uses a type assertion to check if proposalGenerator is a *ProposalGenerator + // from the tbtcpg package. We can't import tbtcpg here to avoid circular dependencies, + // so we use an interface check instead. + if pg, ok := n.proposalGenerator.(interface { + SetRedemptionMetricsRecorder(recorder interface { + SetGauge(name string, value float64) + }) + }); ok { + pg.SetRedemptionMetricsRecorder(metrics) + } + + // Update metrics recorder for all cached coordination executors + // This is important because executors may be created before metrics are set + n.coordinationExecutorsMutex.Lock() + for _, executor := range n.coordinationExecutors { + executor.setMetricsRecorder(metrics) + } + n.coordinationExecutorsMutex.Unlock() +} + +// GetCoordinationWindowsSummary returns a summary of coordination window metrics. +// Returns nil if the window metrics tracker is not initialized. +func (n *node) GetCoordinationWindowsSummary() *WindowMetricsSummary { + if n.windowMetricsTracker == nil { + return nil + } + summary := n.windowMetricsTracker.GetSummary() + return &summary +} + // operatorAddress returns the node's operator address. func (n *node) operatorAddress() (chain.Address, error) { _, operatorPublicKey, err := n.chain.OperatorKeyPair() @@ -339,6 +404,11 @@ func (n *node) getSigningExecutor( signingAttemptsLimit, ) + // Wire metrics recorder if available + if n.performanceMetrics != nil { + executor.setMetricsRecorder(n.performanceMetrics) + } + n.signingExecutors[executorKey] = executor return executor, true, nil @@ -362,6 +432,11 @@ func (n *node) getCoordinationExecutor( executorKey := hex.EncodeToString(walletPublicKeyBytes) if executor, exists := n.coordinationExecutors[executorKey]; exists { + // Ensure metrics recorder is set if metrics are available + // (executor may have been created before metrics were initialized) + if n.performanceMetrics != nil { + executor.setMetricsRecorder(n.performanceMetrics) + } return executor, true, nil } @@ -434,6 +509,11 @@ func (n *node) getCoordinationExecutor( n.waitForBlockHeight, ) + // Wire metrics recorder if available + if n.performanceMetrics != nil { + executor.setMetricsRecorder(n.performanceMetrics) + } + n.coordinationExecutors[executorKey] = executor executorLogger.Infof( @@ -673,6 +753,11 @@ func (n *node) handleDepositSweepProposal( n.waitForBlockHeight, ) + // Wire metrics recorder if available + if n.performanceMetrics != nil { + action.setMetricsRecorder(n.performanceMetrics) + } + err = n.walletDispatcher.dispatch(action) if err != nil { walletActionLogger.Errorf("cannot dispatch wallet action: [%v]", err) @@ -741,6 +826,11 @@ func (n *node) handleRedemptionProposal( n.waitForBlockHeight, ) + // Wire metrics recorder if available + if n.performanceMetrics != nil { + action.setMetricsRecorder(n.performanceMetrics) + } + err = n.walletDispatcher.dispatch(action) if err != nil { walletActionLogger.Errorf("cannot dispatch wallet action: [%v]", err) @@ -931,9 +1021,35 @@ func (n *node) runCoordinationLayer( coordinationResultChan := make(chan *coordinationResult) + // Track the previous window to record its end when a new one starts + // Use a mutex to safely access from multiple goroutines + var previousWindowMu sync.Mutex + var previousWindow *coordinationWindow + // Prepare a callback function that will be called every time a new // coordination window is detected. onWindowFn := func(window *coordinationWindow) { + previousWindowMu.Lock() + // Record end of previous window if it exists + if previousWindow != nil && n.windowMetricsTracker != nil { + n.windowMetricsTracker.recordWindowEnd(previousWindow) + } + previousWindowMu.Unlock() + + // Track coordination window detection + if n.performanceMetrics != nil { + n.performanceMetrics.IncrementCounter(clientinfo.MetricCoordinationWindowsDetectedTotal, 1) + } + + // Record window start in detailed metrics tracker + if n.windowMetricsTracker != nil { + n.windowMetricsTracker.recordWindowStart(window) + } + + previousWindowMu.Lock() + previousWindow = window + previousWindowMu.Unlock() + // Fetch all wallets controlled by the node. It is important to // get the wallets every time the window is triggered as the // node may have started controlling a new wallet in the meantime. @@ -975,6 +1091,17 @@ func (n *node) runCoordinationLayer( } }() + // Start a cleanup goroutine to record the end time of the last window on shutdown + go func() { + <-ctx.Done() + // Record end time for the active window if it exists and hasn't been ended yet + previousWindowMu.Lock() + if previousWindow != nil && n.windowMetricsTracker != nil { + n.windowMetricsTracker.recordWindowEnd(previousWindow) + } + previousWindowMu.Unlock() + }() + return nil } @@ -1012,9 +1139,36 @@ func executeCoordinationProcedure( return nil, false } + startTime := time.Now() result, err := executor.coordinate(window) + duration := time.Since(startTime) + if err != nil { procedureLogger.Errorf("coordination procedure failed: [%v]", err) + // Metrics are already recorded in executor.coordinate() for failures + + // Record window metrics for failed coordination + if node.windowMetricsTracker != nil { + walletPublicKeyHash := bitcoin.PublicKeyHash(walletPublicKey) + // Extract leader and faults from partial result if available + // (e.g., when follower routine fails, we know who the leader was) + leader := chain.Address("") + var faults []*coordinationFault + if result != nil { + leader = result.leader + faults = result.faults + } + node.windowMetricsTracker.recordWalletCoordination( + window, + walletPublicKeyHash, + leader, + "", + false, + duration, + faults, + err, // capture the error message + ) + } return nil, false } @@ -1023,6 +1177,27 @@ func executeCoordinationProcedure( result, ) + // Metrics are already recorded in executor.coordinate() for successful executions + + // Record window metrics for successful coordination + if node.windowMetricsTracker != nil { + walletPublicKeyHash := bitcoin.PublicKeyHash(walletPublicKey) + actionType := "" + if result.proposal != nil { + actionType = result.proposal.ActionType().String() + } + node.windowMetricsTracker.recordWalletCoordination( + window, + walletPublicKeyHash, + result.leader, + actionType, + true, + duration, + result.faults, + nil, // no error on success + ) + } + return result, true } diff --git a/pkg/tbtc/redemption.go b/pkg/tbtc/redemption.go index 218f2db58d..1dd950c95f 100644 --- a/pkg/tbtc/redemption.go +++ b/pkg/tbtc/redemption.go @@ -12,6 +12,7 @@ import ( "github.com/keep-network/keep-core/pkg/bitcoin" "github.com/keep-network/keep-core/pkg/chain" + "github.com/keep-network/keep-core/pkg/clientinfo" ) const ( @@ -118,6 +119,12 @@ type redemptionAction struct { feeDistribution redemptionFeeDistributionFn transactionShape RedemptionTransactionShape + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } func newRedemptionAction( @@ -158,6 +165,13 @@ func newRedemptionAction( } func (ra *redemptionAction) execute() error { + startTime := time.Now() + + // Record redemption execution attempt + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsTotal, 1) + } + validateProposalLogger := ra.logger.With( zap.String("step", "validateProposal"), ) @@ -171,6 +185,9 @@ func (ra *redemptionAction) execute() error { ra.chain, ) if err != nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf("validate proposal step failed: [%v]", err) } @@ -180,6 +197,9 @@ func (ra *redemptionAction) execute() error { ra.btcChain, ) if err != nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf( "error while determining wallet's main UTXO: [%v]", err, @@ -189,6 +209,9 @@ func (ra *redemptionAction) execute() error { // Proposal validation should detect this but let's make a check just // in case. if walletMainUtxo == nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf("redeeming wallet has no main UTXO") } @@ -199,6 +222,9 @@ func (ra *redemptionAction) execute() error { ra.btcChain, ) if err != nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf( "error while ensuring wallet state is synced between "+ "BTC and host chain: [%v]", @@ -215,6 +241,9 @@ func (ra *redemptionAction) execute() error { ra.transactionShape, ) if err != nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf( "error while assembling redemption transaction: [%v]", err, @@ -227,6 +256,9 @@ func (ra *redemptionAction) execute() error { // Just in case. This should never happen. if ra.proposalExpiryBlock < ra.signingTimeoutSafetyMarginBlocks { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf("invalid proposal expiry block") } @@ -237,6 +269,9 @@ func (ra *redemptionAction) execute() error { ra.proposalExpiryBlock-ra.signingTimeoutSafetyMarginBlocks, ) if err != nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf("sign transaction step failed: [%v]", err) } @@ -252,12 +287,29 @@ func (ra *redemptionAction) execute() error { ra.broadcastCheckDelay, ) if err != nil { + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsFailedTotal, 1) + } return fmt.Errorf("broadcast transaction step failed: [%v]", err) } + // Record successful redemption execution + if ra.metricsRecorder != nil { + ra.metricsRecorder.IncrementCounter(clientinfo.MetricRedemptionExecutionsSuccessTotal, 1) + ra.metricsRecorder.RecordDuration(clientinfo.MetricRedemptionActionDurationSeconds, time.Since(startTime)) + } + return nil } +// setMetricsRecorder sets the metrics recorder for the redemption action. +func (ra *redemptionAction) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + ra.metricsRecorder = recorder +} + // ValidateRedemptionProposal checks the redemption proposal with on-chain // validation rules. func ValidateRedemptionProposal( diff --git a/pkg/tbtc/signing.go b/pkg/tbtc/signing.go index 370e8df583..346b6b0446 100644 --- a/pkg/tbtc/signing.go +++ b/pkg/tbtc/signing.go @@ -6,7 +6,9 @@ import ( "math/big" "strings" "sync" + "time" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/generator" "github.com/keep-network/keep-core/pkg/net" "github.com/keep-network/keep-core/pkg/protocol/announcer" @@ -58,6 +60,13 @@ type signingExecutor struct { // be made by a single signer for the given message. Once the attempts // limit is hit the signer gives up. signingAttemptsLimit uint + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } func newSigningExecutor( @@ -147,6 +156,7 @@ func (se *signingExecutor) signBatch( signature, _, endBlock, err := se.sign(ctx, message, signingStartBlock) if err != nil { + // Error metrics are recorded in the sign() method for all error paths. return nil, err } @@ -176,14 +186,29 @@ func (se *signingExecutor) sign( startBlock uint64, ) (*tecdsa.Signature, *signingActivityReport, uint64, error) { if lockAcquired := se.lock.TryAcquire(1); !lockAcquired { + // Record failure metrics for lock acquisition failure + if se.metricsRecorder != nil { + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningOperationsTotal, 1) + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningFailedTotal, 1) + } return nil, nil, 0, errSigningExecutorBusy } defer se.lock.Release(1) + startTime := time.Now() + + if se.metricsRecorder != nil { + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningOperationsTotal, 1) + } + wallet := se.wallet() walletPublicKeyBytes, err := marshalPublicKey(wallet.publicKey) if err != nil { + // Record failure metrics for marshal error + if se.metricsRecorder != nil { + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningFailedTotal, 1) + } return nil, nil, 0, fmt.Errorf("cannot marshal wallet public key: [%v]", err) } @@ -386,8 +411,22 @@ func (se *signingExecutor) sign( // signer, that means all signers failed and have not produced a signature. select { case outcome := <-signingOutcomeChan: + if se.metricsRecorder != nil { + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningSuccessTotal, 1) + se.metricsRecorder.RecordDuration(clientinfo.MetricSigningDurationSeconds, time.Since(startTime)) + } return outcome.signature, outcome.activityReport, outcome.endBlock, nil default: + if se.metricsRecorder != nil { + // All signers failed to produce a signature within the timeout period. + // This is counted as both a failure and a timeout. + // Note: Non-timeout errors (e.g., member selection failures) cause + // early return via cancelLoopCtx() and never reach this default case. + // Therefore, all failures reaching here are actual timeouts. + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningFailedTotal, 1) + se.metricsRecorder.IncrementCounter(clientinfo.MetricSigningTimeoutsTotal, 1) + se.metricsRecorder.RecordDuration(clientinfo.MetricSigningDurationSeconds, time.Since(startTime)) + } return nil, nil, 0, fmt.Errorf("all signers failed") } } @@ -397,3 +436,12 @@ func (se *signingExecutor) wallet() wallet { // first signer. return se.signers[0].wallet } + +// setMetricsRecorder sets the metrics recorder for the signing executor. +func (se *signingExecutor) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + se.metricsRecorder = recorder +} diff --git a/pkg/tbtc/tbtc.go b/pkg/tbtc/tbtc.go index 6f6414ba35..62b226aed6 100644 --- a/pkg/tbtc/tbtc.go +++ b/pkg/tbtc/tbtc.go @@ -81,6 +81,7 @@ func Initialize( proposalGenerator CoordinationProposalGenerator, config Config, clientInfo *clientinfo.Registry, + perfMetrics *clientinfo.PerformanceMetrics, ) error { groupParameters := &GroupParameters{ GroupSize: 100, @@ -120,6 +121,30 @@ func Initialize( }, }, ) + + if perfMetrics == nil { + perfMetrics = clientinfo.NewPerformanceMetrics(ctx, clientInfo) + } + node.setPerformanceMetrics(perfMetrics) + + // Register coordination windows as a diagnostic source + clientInfo.RegisterApplicationSource( + "coordination_windows", + func() clientinfo.ApplicationInfo { + summary := node.GetCoordinationWindowsSummary() + if summary == nil { + return clientinfo.ApplicationInfo{} + } + return clientinfo.ApplicationInfo{ + "total_windows": summary.TotalWindows, + "total_wallets_coordinated": summary.TotalWalletsCoordinated, + "total_wallets_successful": summary.TotalWalletsSuccessful, + "total_wallets_failed": summary.TotalWalletsFailed, + "total_faults": summary.TotalFaults, + "windows": summary.Windows, + } + }, + ) } err = sortition.MonitorPool( diff --git a/pkg/tbtc/wallet.go b/pkg/tbtc/wallet.go index 461c9ba3a5..1da076356b 100644 --- a/pkg/tbtc/wallet.go +++ b/pkg/tbtc/wallet.go @@ -16,6 +16,7 @@ import ( "github.com/ipfs/go-log/v2" "github.com/keep-network/keep-core/pkg/bitcoin" "github.com/keep-network/keep-core/pkg/chain" + "github.com/keep-network/keep-core/pkg/clientinfo" "github.com/keep-network/keep-core/pkg/protocol/group" "github.com/keep-network/keep-core/pkg/tecdsa" "go.uber.org/zap" @@ -72,6 +73,27 @@ func (wat WalletActionType) String() string { } } +// MetricName returns the metric name format for this action type (lowercase with underscores). +// This is used for generating per-action metric names. +func (wat WalletActionType) MetricName() string { + switch wat { + case ActionNoop: + return "noop" + case ActionHeartbeat: + return "heartbeat" + case ActionDepositSweep: + return "deposit_sweep" + case ActionRedemption: + return "redemption" + case ActionMovingFunds: + return "moving_funds" + case ActionMovedFundsSweep: + return "moved_funds_sweep" + default: + panic("unknown wallet action type") + } +} + // walletAction represents an action that can be performed by the wallet. type walletAction interface { // execute carries out the walletAction until completion. @@ -127,6 +149,14 @@ type walletDispatcher struct { // given wallet. The mapping key is the uncompressed public key // (with 04 prefix) of the wallet. actions map[string]WalletActionType + // metricsRecorderMutex protects concurrent access to metricsRecorder + metricsRecorderMutex sync.RWMutex + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) + } } func newWalletDispatcher() *walletDispatcher { @@ -135,6 +165,17 @@ func newWalletDispatcher() *walletDispatcher { } } +// setMetricsRecorder sets the metrics recorder for the wallet dispatcher. +func (wd *walletDispatcher) setMetricsRecorder(recorder interface { + IncrementCounter(name string, value float64) + SetGauge(name string, value float64) + RecordDuration(name string, duration time.Duration) +}) { + wd.metricsRecorderMutex.Lock() + defer wd.metricsRecorderMutex.Unlock() + wd.metricsRecorder = recorder +} + // dispatch sends the given walletAction for execution. If the wallet is // already busy, an errWalletBusy error is returned and the action is ignored. func (wd *walletDispatcher) dispatch(action walletAction) error { @@ -154,16 +195,49 @@ func (wd *walletDispatcher) dispatch(action walletAction) error { key := hex.EncodeToString(walletPublicKeyBytes) if _, ok := wd.actions[key]; ok { + wd.metricsRecorderMutex.RLock() + if wd.metricsRecorder != nil { + wd.metricsRecorder.IncrementCounter(clientinfo.MetricWalletDispatcherRejectedTotal, 1) + } + wd.metricsRecorderMutex.RUnlock() return errWalletBusy } - wd.actions[key] = action.actionType() + actionType := action.actionType() + wd.actions[key] = actionType + actionMetricName := actionType.MetricName() + + // Update metrics + wd.metricsRecorderMutex.RLock() + if wd.metricsRecorder != nil { + activeCount := float64(len(wd.actions)) + wd.metricsRecorder.SetGauge(clientinfo.MetricWalletDispatcherActiveActions, activeCount) + // Aggregate metrics (for backward compatibility) + wd.metricsRecorder.IncrementCounter(clientinfo.MetricWalletActionsTotal, 1) + // Per-action metrics + wd.metricsRecorder.IncrementCounter(clientinfo.WalletActionMetricName(actionMetricName, "total"), 1) + } + wd.metricsRecorderMutex.RUnlock() go func() { + startTime := time.Now() defer func() { wd.actionsMutex.Lock() delete(wd.actions, key) + activeCount := float64(len(wd.actions)) wd.actionsMutex.Unlock() + + // Update metrics + wd.metricsRecorderMutex.RLock() + if wd.metricsRecorder != nil { + wd.metricsRecorder.SetGauge(clientinfo.MetricWalletDispatcherActiveActions, activeCount) + duration := time.Since(startTime) + // Aggregate metrics (for backward compatibility) + wd.metricsRecorder.RecordDuration(clientinfo.MetricWalletActionDurationSeconds, duration) + // Per-action metrics + wd.metricsRecorder.RecordDuration(clientinfo.WalletActionMetricName(actionMetricName, "duration_seconds"), duration) + } + wd.metricsRecorderMutex.RUnlock() }() walletActionLogger.Infof("starting action execution") @@ -174,9 +248,26 @@ func (wd *walletDispatcher) dispatch(action walletAction) error { "action execution terminated with error: [%v]", err, ) + wd.metricsRecorderMutex.RLock() + if wd.metricsRecorder != nil { + // Aggregate metrics (for backward compatibility) + wd.metricsRecorder.IncrementCounter(clientinfo.MetricWalletActionFailedTotal, 1) + // Per-action metrics + wd.metricsRecorder.IncrementCounter(clientinfo.WalletActionMetricName(actionMetricName, "failed_total"), 1) + } + wd.metricsRecorderMutex.RUnlock() return } + wd.metricsRecorderMutex.RLock() + if wd.metricsRecorder != nil { + // Aggregate metrics (for backward compatibility) + wd.metricsRecorder.IncrementCounter(clientinfo.MetricWalletActionSuccessTotal, 1) + // Per-action metrics + wd.metricsRecorder.IncrementCounter(clientinfo.WalletActionMetricName(actionMetricName, "success_total"), 1) + } + wd.metricsRecorderMutex.RUnlock() + walletActionLogger.Infof("action execution terminated with success") }() diff --git a/pkg/tbtcpg/redemptions.go b/pkg/tbtcpg/redemptions.go index c9c01a5856..981e9a8eb7 100644 --- a/pkg/tbtcpg/redemptions.go +++ b/pkg/tbtcpg/redemptions.go @@ -18,6 +18,11 @@ import ( type RedemptionTask struct { chain Chain btcChain bitcoin.Chain + + // metricsRecorder is optional and used for recording performance metrics + metricsRecorder interface { + SetGauge(name string, value float64) + } } func NewRedemptionTask( @@ -30,6 +35,13 @@ func NewRedemptionTask( } } +// setMetricsRecorder sets the metrics recorder for the redemption task. +func (rt *RedemptionTask) setMetricsRecorder(recorder interface { + SetGauge(name string, value float64) +}) { + rt.metricsRecorder = recorder +} + func (rt *RedemptionTask) Run(request *tbtc.CoordinationProposalRequest) ( tbtc.CoordinationProposal, bool, @@ -165,6 +177,11 @@ func (rt *RedemptionTask) FindPendingRedemptions( taskLogger.Infof("found [%d] redemption requests", len(pendingRedemptions)) + // Record pending redemption requests count + if rt.metricsRecorder != nil { + rt.metricsRecorder.SetGauge("redemption_pending_requests_count", float64(len(pendingRedemptions))) + } + result := make([]bitcoin.Script, 0) for _, pendingRedemption := range pendingRedemptions { diff --git a/pkg/tbtcpg/tbtcpg.go b/pkg/tbtcpg/tbtcpg.go index 9e12735fd9..14b29f7c84 100644 --- a/pkg/tbtcpg/tbtcpg.go +++ b/pkg/tbtcpg/tbtcpg.go @@ -31,6 +31,18 @@ type ProposalGenerator struct { tasks []ProposalTask } +// SetRedemptionMetricsRecorder sets the metrics recorder for the redemption task. +// This allows recording redemption-specific metrics. +func (pg *ProposalGenerator) SetRedemptionMetricsRecorder(recorder interface { + SetGauge(name string, value float64) +}) { + for _, task := range pg.tasks { + if redemptionTask, ok := task.(*RedemptionTask); ok { + redemptionTask.setMetricsRecorder(recorder) + } + } +} + // NewProposalGenerator returns a new proposal generator. func NewProposalGenerator( chain Chain, diff --git a/scripts/build.sh b/scripts/build.sh index d307685631..43f453abae 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -29,9 +29,9 @@ docker buildx build \ docker buildx build \ --platform=linux/amd64 \ --target runtime-docker \ - --tag keepnetwork/keep-client:latest \ - --tag keepnetwork/keep-client:${VERSION} \ - --tag keepnetwork/keep-client:${ENVIRONMENT} \ + --tag thresholdnetwork/keep-client:latest \ + --tag thresholdnetwork/keep-client:${VERSION} \ + --tag thresholdnetwork/keep-client:${ENVIRONMENT} \ --build-arg ENVIRONMENT=${ENVIRONMENT} \ --build-arg VERSION=${VERSION} \ --build-arg REVISION=${REVISION} \ From 19132c5309b999142805299f34aa85d293bdc847 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Wed, 1 Apr 2026 14:38:51 +0100 Subject: [PATCH 8/9] Implement block header difficulty handling in Bitcoin chain tests - Added function to create block headers with specified difficulty. - Introduced method in to add headers for a range of heights based on dynamic difficulty. - Updated tests in to utilize the new difficulty handling, ensuring accurate proof range calculations across epochs. - Enhanced function to compute required confirmations based on actual header difficulties instead of fixed assumptions. --- pkg/maintainer/spv/bitcoin_chain_test.go | 29 ++++ pkg/maintainer/spv/spv.go | 201 +++++++++++------------ pkg/maintainer/spv/spv_test.go | 82 ++++++--- 3 files changed, 178 insertions(+), 134 deletions(-) diff --git a/pkg/maintainer/spv/bitcoin_chain_test.go b/pkg/maintainer/spv/bitcoin_chain_test.go index 2f790bf11f..888ab7960a 100644 --- a/pkg/maintainer/spv/bitcoin_chain_test.go +++ b/pkg/maintainer/spv/bitcoin_chain_test.go @@ -3,11 +3,26 @@ package spv import ( "bytes" "fmt" + "math/big" "sync" + "github.com/btcsuite/btcd/blockchain" "github.com/keep-network/keep-core/pkg/bitcoin" ) +// blockHeaderWithDifficulty returns a header whose Difficulty() matches the +// given value (within Bitcoin compact encoding precision). +func blockHeaderWithDifficulty(difficulty *big.Int) *bitcoin.BlockHeader { + maxTarget := new(big.Int) + maxTarget.SetString( + "ffff0000000000000000000000000000000000000000000000000000", + 16, + ) + target := new(big.Int).Div(maxTarget, difficulty) + bits := blockchain.BigToCompact(target) + return &bitcoin.BlockHeader{Bits: bits} +} + type localBitcoinChain struct { mutex sync.Mutex @@ -203,6 +218,20 @@ func (lbc *localBitcoinChain) addBlockHeader( return nil } +// populateBlockHeaders adds headers for [fromHeight, toHeight] inclusive using +// difficultyAt(height) for each block's Bits-derived difficulty. +func (lbc *localBitcoinChain) populateBlockHeaders( + fromHeight, toHeight uint, + difficultyAt func(uint) *big.Int, +) error { + for h := fromHeight; h <= toHeight; h++ { + if err := lbc.addBlockHeader(h, blockHeaderWithDifficulty(difficultyAt(h))); err != nil { + return err + } + } + return nil +} + func (lbc *localBitcoinChain) addTransactionConfirmations( transactionHash bitcoin.Hash, transactionConfirmations uint, diff --git a/pkg/maintainer/spv/spv.go b/pkg/maintainer/spv/spv.go index 990d8b0ec0..21eb54cd97 100644 --- a/pkg/maintainer/spv/spv.go +++ b/pkg/maintainer/spv/spv.go @@ -292,10 +292,45 @@ func isInputCurrentWalletsMainUTXO( return bytes.Equal(mainUtxoHash[:], wallet.MainUtxoHash[:]), nil } +// proofRangeWithinRelayWindow returns true iff [proofStartBlock, proofEndBlock] +// is one of: entirely in previous epoch, entirely in current epoch, or spanning +// exactly previous→current (matches Bridge SPV assumptions). +func proofRangeWithinRelayWindow( + proofStartBlock, proofEndBlock uint64, + previousEpoch, currentEpoch uint64, +) bool { + if proofEndBlock < proofStartBlock { + return false + } + ps := proofStartBlock / difficultyEpochLength + pe := proofEndBlock / difficultyEpochLength + if ps < previousEpoch || pe > currentEpoch { + return false + } + if ps == currentEpoch && pe == currentEpoch { + return true + } + if ps == previousEpoch && pe == previousEpoch { + return true + } + if ps == previousEpoch && pe == currentEpoch { + return true + } + return false +} + // getProofInfo returns information about the SPV proof. It includes the // information whether the transaction proof range is within the previous and // current difficulty epochs as seen by the relay, the accumulated number of // confirmations and the required number of confirmations. +// +// Required confirmations are computed to match Bridge.evaluateProofDifficulty: +// the concatenated block headers must sum to at least +// requestedDifficulty * txProofDifficultyFactor, where requestedDifficulty is the +// relay epoch difficulty that matches the first header (same as on-chain). +// Per-block difficulties can vary (e.g. testnet4 min-difficulty blocks), so we +// walk forward summing actual header difficulties instead of assuming a fixed +// block count × epoch-average difficulty. func getProofInfo( transactionHash bitcoin.Hash, btcChain bitcoin.Chain, @@ -330,135 +365,85 @@ func getProofInfo( ) } - // Calculate the starting block of the proof and the difficulty epoch number - // it belongs to. - proofStartBlock := uint64(latestBlockHeight - accumulatedConfirmations + 1) - proofStartEpoch := proofStartBlock / difficultyEpochLength - - // Calculate the ending block of the proof and the difficulty epoch number - // it belongs to. - proofEndBlock := proofStartBlock + txProofDifficultyFactor.Uint64() - 1 - proofEndEpoch := proofEndBlock / difficultyEpochLength + currentEpochDifficulty, previousEpochDifficulty, err := + btcDiffChain.GetCurrentAndPrevEpochDifficulty() + if err != nil { + return false, 0, 0, fmt.Errorf( + "failed to get Bitcoin epoch difficulties: [%v]", + err, + ) + } - // Get the current difficulty epoch number as seen by the relay. Subtract - // one to get the previous epoch number. currentEpoch, err := btcDiffChain.CurrentEpoch() if err != nil { return false, 0, 0, fmt.Errorf("failed to get current epoch: [%v]", err) } previousEpoch := currentEpoch - 1 - // There are only three possible valid combinations of the proof's block - // headers range: the proof must either be entirely in the previous epoch, - // must be entirely in the current epoch or must span the previous and - // current epochs. + proofStartBlock := uint64(latestBlockHeight) - uint64(accumulatedConfirmations) + 1 - // If the proof is entirely within the current epoch, required confirmations - // does not need to be adjusted. - if proofStartEpoch == currentEpoch && - proofEndEpoch == currentEpoch { - return true, accumulatedConfirmations, uint(txProofDifficultyFactor.Uint64()), nil + firstHeader, err := btcChain.GetBlockHeader(uint(proofStartBlock)) + if err != nil { + return false, 0, 0, fmt.Errorf( + "failed to get block header at proof start: [%v]", + err, + ) } - - // If the proof is entirely within the previous epoch, required confirmations - // does not need to be adjusted. - if proofStartEpoch == previousEpoch && - proofEndEpoch == previousEpoch { - return true, accumulatedConfirmations, uint(txProofDifficultyFactor.Uint64()), nil + firstHeaderDiff := firstHeader.Difficulty() + + var requestedDiff *big.Int + switch { + case firstHeaderDiff.Cmp(currentEpochDifficulty) == 0: + requestedDiff = currentEpochDifficulty + case firstHeaderDiff.Cmp(previousEpochDifficulty) == 0: + requestedDiff = previousEpochDifficulty + default: + // Bridge would revert "Not at current or previous difficulty". + return false, 0, 0, nil } - // If the proof spans the previous and current difficulty epochs, the - // required confirmations may have to be adjusted. The reason for this is - // that there may be a drop in the value of difficulty between the current - // and the previous epochs. Example: - // Let's assume the transaction was done near the end of an epoch, so that - // part of the proof (let's say two block headers) is in the previous epoch - // and part of it is in the current epoch. - // If the previous epoch difficulty is 50 and the current epoch difficulty - // is 30, the total required difficulty of the proof will be transaction - // difficulty factor times previous difficulty: 6 * 50 = 300. - // However, if we simply use transaction difficulty factor to get the number - // of blocks we will end up with the difficulty sum that is too low: - // 50 + 50 + 30 + 30 + 30 + 30 = 220. To calculate the correct number of - // block headers needed we need to find how much difficulty needs to come - // from from the current epoch block headers: 300 - 2*50 = 200 and divide - // it by the current difficulty: 200 / 30 = 6 and add 1, because there - // was a remainder. So the number of block headers from the current epoch - // would be 7. The total number of block headers would be 9 and the sum - // of their difficulties would be: 50 + 50 + 30 + 30 + 30 + 30 + 30 + 30 + - // 30 = 310 which is enough to prove the transaction. - if proofStartEpoch == previousEpoch && - proofEndEpoch == currentEpoch { - currentEpochDifficulty, previousEpochDifficulty, err := - btcDiffChain.GetCurrentAndPrevEpochDifficulty() + totalDifficultyRequired := new(big.Int).Mul( + requestedDiff, + txProofDifficultyFactor, + ) + + sumDifficulty := new(big.Int) + var requiredBlockCount uint64 + var reached bool + for height := proofStartBlock; height <= uint64(latestBlockHeight); height++ { + hdr, err := btcChain.GetBlockHeader(uint(height)) if err != nil { return false, 0, 0, fmt.Errorf( - "failed to get Bitcoin epoch difficulties: [%v]", + "failed to get block header at height %d: [%v]", + height, err, ) } - - // Calculate the total difficulty that is required for the proof. The - // proof begins in the previous difficulty epoch, therefore the total - // required difficulty will be the previous epoch difficulty times - // transaction proof difficulty factor. - totalDifficultyRequired := new(big.Int).Mul( - previousEpochDifficulty, - txProofDifficultyFactor, - ) - - // Calculate the number of block headers in the proof that will come - // from the previous difficulty epoch. - numberOfBlocksPreviousEpoch := - uint64(difficultyEpochLength - proofStartBlock%difficultyEpochLength) - - // Calculate how much difficulty the blocks from the previous epoch part - // of the proof have in total. - totalDifficultyPreviousEpoch := new(big.Int).Mul( - big.NewInt(int64(numberOfBlocksPreviousEpoch)), - previousEpochDifficulty, - ) - - // Calculate how much difficulty must come from the current epoch. - totalDifficultyCurrentEpoch := new(big.Int).Sub( - totalDifficultyRequired, - totalDifficultyPreviousEpoch, - ) - - // Calculate how many blocks from the current epoch we need. - remainder := new(big.Int) - numberOfBlocksCurrentEpoch, remainder := new(big.Int).DivMod( - totalDifficultyCurrentEpoch, - currentEpochDifficulty, - remainder, - ) - // If there is a remainder, it means there is still some amount of - // difficulty missing that is less than one block difficulty. We need to - // account for that by adding one additional block. - if remainder.Cmp(big.NewInt(0)) > 0 { - numberOfBlocksCurrentEpoch.Add( - numberOfBlocksCurrentEpoch, - big.NewInt(1), - ) + sumDifficulty.Add(sumDifficulty, hdr.Difficulty()) + requiredBlockCount++ + if sumDifficulty.Cmp(totalDifficultyRequired) >= 0 { + reached = true + break } + } - // The total required number of confirmations is the sum of blocks from - // the previous and current epochs. - requiredConfirmations := numberOfBlocksPreviousEpoch + - numberOfBlocksCurrentEpoch.Uint64() + if !reached { + // Not enough accumulated work in the chain yet; wait for more blocks. + available := uint64(latestBlockHeight) - proofStartBlock + 1 + return true, accumulatedConfirmations, uint(available + 1), nil + } - return true, accumulatedConfirmations, uint(requiredConfirmations), nil + proofEndBlock := proofStartBlock + requiredBlockCount - 1 + if !proofRangeWithinRelayWindow( + proofStartBlock, + proofEndBlock, + previousEpoch, + currentEpoch, + ) { + return false, 0, 0, nil } - // If we entered here, it means that the proof's block headers range goes - // outside the previous or current difficulty epochs as seen by the relay. - // The reason for this is most likely that transaction entered the Bitcoin - // blockchain within the very new difficulty epoch that is not yet proven in - // the relay. In that case the transaction will be proven in the future. - // The other case could be that the transaction is older than the last two - // Bitcoin difficulty epochs. In that case the transaction will soon leave - // the sliding window of recent transactions. - return false, 0, 0, nil + return true, accumulatedConfirmations, uint(requiredBlockCount), nil } // walletEvent is a type constraint representing wallet-related chain events. diff --git a/pkg/maintainer/spv/spv_test.go b/pkg/maintainer/spv/spv_test.go index 94c2084d11..532d56a4d5 100644 --- a/pkg/maintainer/spv/spv_test.go +++ b/pkg/maintainer/spv/spv_test.go @@ -12,12 +12,16 @@ import ( ) func TestGetProofInfo(t *testing.T) { + // First block height of Bitcoin difficulty epoch 392 (392 * 2016). + const epoch392Start = 392 * 2016 + tests := map[string]struct { latestBlockHeight uint transactionConfirmations uint currentEpoch uint64 currentEpochDifficulty *big.Int previousEpochDifficulty *big.Int + difficultyAtBlock func(uint) *big.Int expectedIsProofWithinRelayRange bool expectedAccumulatedConfirmations uint expectedRequiredConfirmations uint @@ -26,58 +30,79 @@ func TestGetProofInfo(t *testing.T) { latestBlockHeight: 790277, transactionConfirmations: 3, currentEpoch: 392, - currentEpochDifficulty: nil, // not needed - previousEpochDifficulty: nil, // not needed + currentEpochDifficulty: big.NewInt(1), + previousEpochDifficulty: big.NewInt(1), + difficultyAtBlock: func(uint) *big.Int { return big.NewInt(1) }, expectedIsProofWithinRelayRange: true, expectedAccumulatedConfirmations: 3, - expectedRequiredConfirmations: 6, + // Only 3 blocks of work available (sum 3 < 6); need one more block. + expectedRequiredConfirmations: 4, }, "proof entirely within previous epoch": { latestBlockHeight: 790300, transactionConfirmations: 2041, currentEpoch: 392, - currentEpochDifficulty: nil, // not needed - previousEpochDifficulty: nil, // not needed + currentEpochDifficulty: big.NewInt(1), + previousEpochDifficulty: big.NewInt(1), + difficultyAtBlock: func(uint) *big.Int { return big.NewInt(1) }, expectedAccumulatedConfirmations: 2041, expectedIsProofWithinRelayRange: true, expectedRequiredConfirmations: 6, }, "proof spans previous and current epochs and difficulty drops": { - latestBlockHeight: 790300, - transactionConfirmations: 31, - currentEpoch: 392, - currentEpochDifficulty: big.NewInt(50000000000000), - previousEpochDifficulty: big.NewInt(30000000000000), + latestBlockHeight: 790300, + transactionConfirmations: 31, + currentEpoch: 392, + currentEpochDifficulty: big.NewInt(50000), + previousEpochDifficulty: big.NewInt(30000), + difficultyAtBlock: func(h uint) *big.Int { + if h < epoch392Start { + return big.NewInt(30000) + } + return big.NewInt(50000) + }, expectedIsProofWithinRelayRange: true, expectedAccumulatedConfirmations: 31, - expectedRequiredConfirmations: 9, + // requestedDiff 30000 * factor 6 = 180000; first 5 headers suffice. + expectedRequiredConfirmations: 5, }, "proof spans previous and current epochs and difficulty raises": { - latestBlockHeight: 790300, - transactionConfirmations: 31, - currentEpoch: 392, - currentEpochDifficulty: big.NewInt(30000000000000), - previousEpochDifficulty: big.NewInt(60000000000000), + latestBlockHeight: 790300, + transactionConfirmations: 31, + currentEpoch: 392, + currentEpochDifficulty: big.NewInt(30000), + previousEpochDifficulty: big.NewInt(60000), + difficultyAtBlock: func(h uint) *big.Int { + if h < epoch392Start { + return big.NewInt(60000) + } + return big.NewInt(30000) + }, expectedIsProofWithinRelayRange: true, expectedAccumulatedConfirmations: 31, - expectedRequiredConfirmations: 4, + // requestedDiff 60000 * 6 = 360000; needs 10 headers from proof start. + expectedRequiredConfirmations: 10, }, "proof begins outside previous epoch": { latestBlockHeight: 790300, transactionConfirmations: 2048, currentEpoch: 392, - currentEpochDifficulty: nil, // not needed - previousEpochDifficulty: nil, // not needed + currentEpochDifficulty: big.NewInt(1), + previousEpochDifficulty: big.NewInt(1), + difficultyAtBlock: func(uint) *big.Int { return big.NewInt(1) }, expectedIsProofWithinRelayRange: false, expectedAccumulatedConfirmations: 0, expectedRequiredConfirmations: 0, }, "proof ends outside current epoch": { - latestBlockHeight: 792285, - transactionConfirmations: 3, + // Tx in 792283; six difficulty-1 blocks reach 792288 (next epoch), which + // is past relay currentEpoch 392. + latestBlockHeight: 792288, + transactionConfirmations: 6, currentEpoch: 392, - currentEpochDifficulty: nil, // not needed - previousEpochDifficulty: nil, // not needed + currentEpochDifficulty: big.NewInt(1), + previousEpochDifficulty: big.NewInt(1), + difficultyAtBlock: func(uint) *big.Int { return big.NewInt(1) }, expectedIsProofWithinRelayRange: false, expectedAccumulatedConfirmations: 0, expectedRequiredConfirmations: 0, @@ -97,10 +122,15 @@ func TestGetProofInfo(t *testing.T) { localChain := newLocalChain() btcChain := newLocalBitcoinChain() - btcChain.addBlockHeader( + proofStart := test.latestBlockHeight - test.transactionConfirmations + 1 + err = btcChain.populateBlockHeaders( + proofStart, test.latestBlockHeight, - &bitcoin.BlockHeader{}, + test.difficultyAtBlock, ) + if err != nil { + t.Fatal(err) + } btcChain.addTransactionConfirmations( transactionHash, test.transactionConfirmations, @@ -109,8 +139,8 @@ func TestGetProofInfo(t *testing.T) { localChain.setTxProofDifficultyFactor(big.NewInt(6)) localChain.setCurrentEpoch(test.currentEpoch) localChain.setCurrentAndPrevEpochDifficulty( - test.currentEpochDifficulty, test.previousEpochDifficulty, + test.currentEpochDifficulty, ) isProofWithinRelayRange, From 09713a43cdd04408f592223d55fb632a84c54892 Mon Sep 17 00:00:00 2001 From: Lev Akhnazarov Date: Wed, 1 Apr 2026 14:46:50 +0100 Subject: [PATCH 9/9] chore: drop unintended workflow and peers_test changes from PR --- .github/workflows/contracts-ecdsa.yml | 2 +- .github/workflows/contracts-random-beacon.yml | 2 +- config/peers_test.go | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/contracts-ecdsa.yml b/.github/workflows/contracts-ecdsa.yml index 9eb80e826c..69465e47bc 100644 --- a/.github/workflows/contracts-ecdsa.yml +++ b/.github/workflows/contracts-ecdsa.yml @@ -352,4 +352,4 @@ jobs: environment: ${{ github.event.inputs.environment }} upstream_builds: ${{ github.event.inputs.upstream_builds }} upstream_ref: dapp-development - version: ${{ steps.npm-version-bump.outputs.version }} \ No newline at end of file + version: ${{ steps.npm-version-bump.outputs.version }} diff --git a/.github/workflows/contracts-random-beacon.yml b/.github/workflows/contracts-random-beacon.yml index 885376c9dd..3eec0b4b71 100644 --- a/.github/workflows/contracts-random-beacon.yml +++ b/.github/workflows/contracts-random-beacon.yml @@ -346,4 +346,4 @@ jobs: environment: ${{ github.event.inputs.environment }} upstream_builds: ${{ github.event.inputs.upstream_builds }} upstream_ref: dapp-development - version: ${{ steps.npm-version-bump.outputs.version }} \ No newline at end of file + version: ${{ steps.npm-version-bump.outputs.version }} diff --git a/config/peers_test.go b/config/peers_test.go index 7fe66d32d3..56892b5028 100644 --- a/config/peers_test.go +++ b/config/peers_test.go @@ -25,7 +25,6 @@ func TestResolvePeers(t *testing.T) { network: network.Testnet, expectedPeers: []string{ "/dns4/bst-a01.test.keep.boar.network/tcp/6001/ipfs/16Uiu2HAmSLDSahiKyTbCNNu8wJmZAsiKF7wuYJ8mogY8ZuAG1jhu", - "/dns4/keep-validator-0.eks-ap-northeast-2-secure.staging.staked.cloud/tcp/3919/ipfs/16Uiu2HAm77eSvRq5ioD4J8VFPkq3bJHBEHkssCuiFkgAoABwjo2S", }, }, "developer network": {