From 8d8ea4125e474f8078881acf4e4fc3a4ea23d716 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 20:36:32 +0000 Subject: [PATCH 001/106] ci: add TestFlight workflow for self-hosted Mac runner Builds TurnBridge on the self-hosted macOS/ARM64 runner with automatic signing driven by an App Store Connect API key, exports an app-store IPA, and uploads it to TestFlight via xcrun altool. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/README.md | 83 +++++++++++ .github/workflows/testflight.yml | 237 +++++++++++++++++++++++++++++++ 2 files changed, 320 insertions(+) create mode 100644 .github/workflows/README.md create mode 100644 .github/workflows/testflight.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 0000000..e870fdb --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,83 @@ +# CI: Build & TestFlight + +Workflow `testflight.yml` archives the iOS app on the self-hosted +`Mac-mini-GK` runner (`self-hosted`, `macOS`, `ARM64`) and uploads the build +to TestFlight via the App Store Connect API. + +## One-time setup + +### 1. Apple Developer / App Store Connect + +1. In **Apple Developer → Certificates, Identifiers & Profiles**, register + both bundle IDs and enable the required capabilities: + - `com.netlab.TurnBridge` — App Groups (`group.com.netlab.TurnBridge`), + Network Extensions. + - `com.netlab.TurnBridge.network-extension` — App Groups + (`group.com.netlab.TurnBridge`), Network Extensions + (Packet Tunnel Provider). +2. In **App Store Connect → My Apps**, create the app record for + `com.netlab.TurnBridge` (needed before the first TestFlight upload). +3. In **App Store Connect → Users and Access → Integrations → App Store + Connect API**, create an API key with the **App Manager** role. Save the + downloaded `.p8` file — it is shown only once. + +### 2. Self-hosted runner (Mac-mini-GK) + +Make sure the runner has: + +- Xcode (matching the project's deployment target, iOS 16.6+) installed and + selected: `sudo xcode-select -s /Applications/Xcode.app`. +- Command line tools and a logged-in Apple ID in Xcode is **not** required — + signing is driven by the App Store Connect API key. +- Homebrew + Go (`brew install go`) — the `WireGuardKitGo` build phase needs + it. The script looks in `/opt/homebrew/bin`. +- The runner user must be able to access the login keychain non-interactively + (no password prompt). If Xcode prompts for the keychain on first run, + unlock it once manually or store the password with `security + set-key-partition-list`. + +### 3. GitHub repository secrets + +In `truvvor/turnbridge` → **Settings → Secrets and variables → Actions**, add: + +| Secret | Value | +| -------------------- | ------------------------------------------------------------- | +| `APPLE_TEAM_ID` | 10-character Team ID (e.g. `ABCDE12345`) | +| `ASC_ISSUER_ID` | Issuer ID UUID from App Store Connect → Integrations | +| `ASC_KEY_ID` | 10-character Key ID from the same page | +| `ASC_KEY_P8_BASE64` | `base64 -i AuthKey_.p8` output (single line, no wrap) | + +On macOS, generate the base64 secret with: + +```bash +base64 -i AuthKey_XXXXXXXXXX.p8 | pbcopy +``` + +## Triggering + +- Manually: **Actions → Build & TestFlight → Run workflow**, choose `upload` + to send to TestFlight or `build-only` to just produce the IPA artifact. +- Automatically: every push to `claude/build-project-br5tJ` (excluding doc / + asset only changes) runs the workflow and uploads. + +## Build number + +`CFBundleVersion` is overridden to `100 + GITHUB_RUN_NUMBER` so each run is +strictly higher than the previous one. To raise the floor (e.g. after +manually uploading some builds outside CI), set repo variable +`TESTFLIGHT_BUILD_BASE` to a larger number. + +`MARKETING_VERSION` (1.2.6 today) stays as committed in +`TurnBridge.xcodeproj/project.pbxproj` — bump it there when releasing a new +TestFlight version family. + +## Notes + +- Code signing uses Xcode automatic signing with `-allowProvisioningUpdates` + + the App Store Connect API key. The ASC API key is enough — no `.p12` + cert or provisioning profile secrets needed. +- `ENABLE_USER_SCRIPT_SANDBOXING=NO` is passed at archive time because the + WireGuardKitGo build phase writes outside of its declared inputs/outputs. +- The IPA is also published as a workflow artifact (`TurnBridge-ipa`) so you + can download an unsigned-for-AppStore copy without re-running the upload + step. diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml new file mode 100644 index 0000000..3e2ead5 --- /dev/null +++ b/.github/workflows/testflight.yml @@ -0,0 +1,237 @@ +name: Build & TestFlight + +on: + workflow_dispatch: + inputs: + lane: + description: "What to do" + type: choice + default: upload + options: + - build-only + - upload + push: + branches: + - claude/build-project-br5tJ + paths-ignore: + - "**/*.md" + - "icons/**" + - "screen.png" + - "quick_link.py" + +concurrency: + group: testflight-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + name: Archive & upload to TestFlight + runs-on: [self-hosted, macOS, ARM64] + + env: + SCHEME: TurnBridge + CONFIGURATION: Release + WORKSPACE_PROJECT: TurnBridge.xcodeproj + APP_BUNDLE_ID: com.netlab.TurnBridge + EXT_BUNDLE_ID: com.netlab.TurnBridge.network-extension + ARCHIVE_PATH: build/TurnBridge.xcarchive + EXPORT_DIR: build/export + DERIVED_DATA: build/DerivedData + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Show toolchain + run: | + set -eux + sw_vers + xcode-select -p + xcodebuild -version + /usr/bin/which go && go version || true + /usr/bin/which make + + - name: Ensure Go is on PATH for build script + run: | + set -eux + if ! command -v go >/dev/null 2>&1; then + echo "Go not found, installing via Homebrew" + brew install go + fi + echo "/opt/homebrew/bin" >> "$GITHUB_PATH" + + - name: Decide whether to upload + id: mode + run: | + set -eux + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "upload=${{ github.event.inputs.lane == 'upload' }}" >> "$GITHUB_OUTPUT" + else + echo "upload=true" >> "$GITHUB_OUTPUT" + fi + + - name: Compute build number + id: ver + run: | + set -eux + # Use a monotonically increasing build number from the run id. + # GITHUB_RUN_NUMBER is per-workflow and resets if the workflow is + # recreated, so add a base offset to stay above the App Store Connect + # high-water mark. + BASE=${TESTFLIGHT_BUILD_BASE:-100} + BUILD_NUMBER=$((BASE + GITHUB_RUN_NUMBER)) + echo "build_number=$BUILD_NUMBER" >> "$GITHUB_OUTPUT" + echo "Using CFBundleVersion=$BUILD_NUMBER" + + - name: Provision App Store Connect API key + if: steps.mode.outputs.upload == 'true' + env: + ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} + ASC_KEY_P8_BASE64: ${{ secrets.ASC_KEY_P8_BASE64 }} + run: | + set -eu + if [ -z "${ASC_KEY_ID}" ] || [ -z "${ASC_KEY_P8_BASE64}" ]; then + echo "Missing ASC_KEY_ID or ASC_KEY_P8_BASE64 secret" >&2 + exit 1 + fi + KEY_DIR="$RUNNER_TEMP/asc_keys" + mkdir -p "$KEY_DIR" + KEY_PATH="$KEY_DIR/AuthKey_${ASC_KEY_ID}.p8" + printf '%s' "$ASC_KEY_P8_BASE64" | base64 --decode > "$KEY_PATH" + chmod 600 "$KEY_PATH" + echo "ASC_KEY_PATH=$KEY_PATH" >> "$GITHUB_ENV" + # Also place a copy where xcrun altool / Transporter look by default + mkdir -p "$HOME/.appstoreconnect/private_keys" + cp "$KEY_PATH" "$HOME/.appstoreconnect/private_keys/AuthKey_${ASC_KEY_ID}.p8" + chmod 600 "$HOME/.appstoreconnect/private_keys/AuthKey_${ASC_KEY_ID}.p8" + + - name: Sanity-check required secrets + env: + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} + ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} + run: | + set -eu + missing=0 + for v in APPLE_TEAM_ID ASC_ISSUER_ID ASC_KEY_ID; do + if [ -z "$(printenv $v)" ]; then + echo "::error::Secret $v is not set" + missing=1 + fi + done + [ $missing -eq 0 ] + + - name: Resolve Swift package dependencies + run: | + set -eux + xcodebuild \ + -project "$WORKSPACE_PROJECT" \ + -scheme "$SCHEME" \ + -configuration "$CONFIGURATION" \ + -derivedDataPath "$DERIVED_DATA" \ + -resolvePackageDependencies + + - name: Archive + env: + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} + ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} + run: | + set -eux + mkdir -p build + xcodebuild \ + -project "$WORKSPACE_PROJECT" \ + -scheme "$SCHEME" \ + -configuration "$CONFIGURATION" \ + -destination 'generic/platform=iOS' \ + -derivedDataPath "$DERIVED_DATA" \ + -archivePath "$ARCHIVE_PATH" \ + -allowProvisioningUpdates \ + -authenticationKeyPath "$ASC_KEY_PATH" \ + -authenticationKeyID "$ASC_KEY_ID" \ + -authenticationKeyIssuerID "$ASC_ISSUER_ID" \ + DEVELOPMENT_TEAM="$APPLE_TEAM_ID" \ + CODE_SIGN_STYLE=Automatic \ + CURRENT_PROJECT_VERSION="${{ steps.ver.outputs.build_number }}" \ + ENABLE_USER_SCRIPT_SANDBOXING=NO \ + archive + + - name: Generate exportOptions.plist + env: + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + run: | + set -eu + cat > build/exportOptions.plist < + + + + method + app-store-connect + destination + export + signingStyle + automatic + teamID + ${APPLE_TEAM_ID} + stripSwiftSymbols + + uploadSymbols + + + + PLIST + + - name: Export IPA + env: + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} + ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} + run: | + set -eux + xcodebuild \ + -exportArchive \ + -archivePath "$ARCHIVE_PATH" \ + -exportOptionsPlist build/exportOptions.plist \ + -exportPath "$EXPORT_DIR" \ + -allowProvisioningUpdates \ + -authenticationKeyPath "$ASC_KEY_PATH" \ + -authenticationKeyID "$ASC_KEY_ID" \ + -authenticationKeyIssuerID "$ASC_ISSUER_ID" + ls -la "$EXPORT_DIR" + + - name: Upload IPA artifact + uses: actions/upload-artifact@v4 + with: + name: TurnBridge-ipa + path: build/export/*.ipa + if-no-files-found: error + retention-days: 14 + + - name: Upload to TestFlight + if: steps.mode.outputs.upload == 'true' + env: + ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} + ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} + run: | + set -eux + IPA_PATH=$(ls "$EXPORT_DIR"/*.ipa | head -n1) + if [ -z "$IPA_PATH" ]; then + echo "::error::No IPA produced in $EXPORT_DIR" + exit 1 + fi + xcrun altool \ + --upload-app \ + --type ios \ + --file "$IPA_PATH" \ + --apiKey "$ASC_KEY_ID" \ + --apiIssuer "$ASC_ISSUER_ID" + + - name: Cleanup ASC key + if: always() + run: | + set -eu + rm -f "$HOME/.appstoreconnect/private_keys/AuthKey_${{ secrets.ASC_KEY_ID }}.p8" || true + rm -rf "$RUNNER_TEMP/asc_keys" || true From 7afa73368527a921c9ce78c6a9695e4da93f73d7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 20:49:53 +0000 Subject: [PATCH 002/106] ci: re-trigger TestFlight workflow after secrets provisioning https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY From 09e03cfb1e8af36c74198ba81135b39f332a2a19 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 20:51:41 +0000 Subject: [PATCH 003/106] ci: drop paths-ignore so empty/doc-only commits also trigger TestFlight The previous filter caused empty re-trigger commits to be skipped because no path matched the filter at all. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 3e2ead5..3056579 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -13,11 +13,6 @@ on: push: branches: - claude/build-project-br5tJ - paths-ignore: - - "**/*.md" - - "icons/**" - - "screen.png" - - "quick_link.py" concurrency: group: testflight-${{ github.ref }} From f4a83b858ccba2af63326f6ea04c1967ebae0890 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 20:54:17 +0000 Subject: [PATCH 004/106] ci: capture xcodebuild logs and upload as build-logs artifact Pipes archive/export output through tee so we can dump the tail of the log inline on failure and always publish the full log as an artifact for post-mortem on failed runs. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 38 ++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 3056579..f87907b 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -120,13 +120,14 @@ jobs: - name: Resolve Swift package dependencies run: | - set -eux + set -euxo pipefail + mkdir -p build/logs xcodebuild \ -project "$WORKSPACE_PROJECT" \ -scheme "$SCHEME" \ -configuration "$CONFIGURATION" \ -derivedDataPath "$DERIVED_DATA" \ - -resolvePackageDependencies + -resolvePackageDependencies 2>&1 | tee build/logs/resolve.log - name: Archive env: @@ -134,8 +135,8 @@ jobs: ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} run: | - set -eux - mkdir -p build + set -euxo pipefail + mkdir -p build/logs xcodebuild \ -project "$WORKSPACE_PROJECT" \ -scheme "$SCHEME" \ @@ -151,7 +152,17 @@ jobs: CODE_SIGN_STYLE=Automatic \ CURRENT_PROJECT_VERSION="${{ steps.ver.outputs.build_number }}" \ ENABLE_USER_SCRIPT_SANDBOXING=NO \ - archive + archive 2>&1 | tee build/logs/archive.log + + - name: Tail archive log on failure + if: failure() + run: | + echo "::group::archive.log (last 400 lines)" + tail -n 400 build/logs/archive.log || true + echo "::endgroup::" + echo "::group::error/warning lines from archive.log" + grep -nE 'error:|warning:|Code Sign|Provisioning|fatal|failed|^\*\* ' build/logs/archive.log | tail -n 200 || true + echo "::endgroup::" - name: Generate exportOptions.plist env: @@ -185,7 +196,8 @@ jobs: ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} run: | - set -eux + set -euxo pipefail + mkdir -p build/logs xcodebuild \ -exportArchive \ -archivePath "$ARCHIVE_PATH" \ @@ -194,10 +206,22 @@ jobs: -allowProvisioningUpdates \ -authenticationKeyPath "$ASC_KEY_PATH" \ -authenticationKeyID "$ASC_KEY_ID" \ - -authenticationKeyIssuerID "$ASC_ISSUER_ID" + -authenticationKeyIssuerID "$ASC_ISSUER_ID" 2>&1 | tee build/logs/export.log ls -la "$EXPORT_DIR" + - name: Upload build logs artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-logs + path: | + build/logs/** + build/exportOptions.plist + if-no-files-found: ignore + retention-days: 14 + - name: Upload IPA artifact + if: success() uses: actions/upload-artifact@v4 with: name: TurnBridge-ipa From 89cb1e62064cbf22570995c3b5ee4e3e73ea719c Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 21:00:36 +0000 Subject: [PATCH 005/106] chore: rename bundle id from com.netlab.TurnBridge to com.truvvor.turnbridge Switches to a unique bundle identifier so the Apple Developer Team can register it. With -allowProvisioningUpdates the archive step now lets Xcode auto-create the bundle IDs, App Group (group.com.truvvor.turnbridge) and provisioning profiles in the developer portal. Updates: - PRODUCT_BUNDLE_IDENTIFIER for both targets in pbxproj - App Group entry in both .entitlements files - Hard-coded fallbacks/log subsystems in Swift - env defaults in the TestFlight workflow https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 4 ++-- TurnBridge.xcodeproj/project.pbxproj | 8 ++++---- TurnBridge/SharedLogger.swift | 2 +- TurnBridge/TurnBridge.entitlements | 2 +- TurnBridge/TurnBridgeApp.swift | 2 +- network-extension/PacketTunnelProvider.swift | 2 +- network-extension/network_extension.entitlements | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index f87907b..62c2e5f 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -27,8 +27,8 @@ jobs: SCHEME: TurnBridge CONFIGURATION: Release WORKSPACE_PROJECT: TurnBridge.xcodeproj - APP_BUNDLE_ID: com.netlab.TurnBridge - EXT_BUNDLE_ID: com.netlab.TurnBridge.network-extension + APP_BUNDLE_ID: com.truvvor.turnbridge + EXT_BUNDLE_ID: com.truvvor.turnbridge.network-extension ARCHIVE_PATH: build/TurnBridge.xcarchive EXPORT_DIR: build/export DERIVED_DATA: build/DerivedData diff --git a/TurnBridge.xcodeproj/project.pbxproj b/TurnBridge.xcodeproj/project.pbxproj index 2a56d9c..f4f458c 100755 --- a/TurnBridge.xcodeproj/project.pbxproj +++ b/TurnBridge.xcodeproj/project.pbxproj @@ -435,7 +435,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.2.6; - PRODUCT_BUNDLE_IDENTIFIER = com.netlab.TurnBridge; + PRODUCT_BUNDLE_IDENTIFIER = com.truvvor.turnbridge; PRODUCT_NAME = "$(TARGET_NAME)"; STRING_CATALOG_GENERATE_SYMBOLS = YES; SWIFT_APPROACHABLE_CONCURRENCY = YES; @@ -469,7 +469,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.2.6; - PRODUCT_BUNDLE_IDENTIFIER = com.netlab.TurnBridge; + PRODUCT_BUNDLE_IDENTIFIER = com.truvvor.turnbridge; PRODUCT_NAME = "$(TARGET_NAME)"; STRING_CATALOG_GENERATE_SYMBOLS = YES; SWIFT_APPROACHABLE_CONCURRENCY = YES; @@ -499,7 +499,7 @@ "@executable_path/../../Frameworks", ); MARKETING_VERSION = 1.2.6; - PRODUCT_BUNDLE_IDENTIFIER = "com.netlab.TurnBridge.network-extension"; + PRODUCT_BUNDLE_IDENTIFIER = "com.truvvor.turnbridge.network-extension"; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; STRING_CATALOG_GENERATE_SYMBOLS = YES; @@ -529,7 +529,7 @@ "@executable_path/../../Frameworks", ); MARKETING_VERSION = 1.2.6; - PRODUCT_BUNDLE_IDENTIFIER = "com.netlab.TurnBridge.network-extension"; + PRODUCT_BUNDLE_IDENTIFIER = "com.truvvor.turnbridge.network-extension"; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; STRING_CATALOG_GENERATE_SYMBOLS = YES; diff --git a/TurnBridge/SharedLogger.swift b/TurnBridge/SharedLogger.swift index 18803e7..7e226e4 100644 --- a/TurnBridge/SharedLogger.swift +++ b/TurnBridge/SharedLogger.swift @@ -112,7 +112,7 @@ public struct SharedLogger { return first } // Fallback: derive from bundle ID (works for Xcode-signed builds) - let bundleID = Bundle.main.bundleIdentifier ?? "com.netlab.TurnBridge" + let bundleID = Bundle.main.bundleIdentifier ?? "com.truvvor.turnbridge" let baseBundleID = bundleID.replacingOccurrences(of: ".network-extension", with: "") return "group.\(baseBundleID)" }() diff --git a/TurnBridge/TurnBridge.entitlements b/TurnBridge/TurnBridge.entitlements index afcf562..12515c8 100644 --- a/TurnBridge/TurnBridge.entitlements +++ b/TurnBridge/TurnBridge.entitlements @@ -8,7 +8,7 @@ com.apple.security.application-groups - group.com.netlab.TurnBridge + group.com.truvvor.turnbridge diff --git a/TurnBridge/TurnBridgeApp.swift b/TurnBridge/TurnBridgeApp.swift index 11455d9..b845a95 100755 --- a/TurnBridge/TurnBridgeApp.swift +++ b/TurnBridge/TurnBridgeApp.swift @@ -29,7 +29,7 @@ struct TurnBridge: App { SharedLogger.debug("Using \(preExistingTunnelManager != nil ? "existing" : "new") tunnel manager") let protocolConfiguration = NETunnelProviderProtocol() - let currentAppBundleId = Bundle.main.bundleIdentifier ?? "com.netlab.TurnBridge" + let currentAppBundleId = Bundle.main.bundleIdentifier ?? "com.truvvor.turnbridge" protocolConfiguration.providerBundleIdentifier = "\(currentAppBundleId).network-extension" let cleanIP = peerAddr.components(separatedBy: ":").first ?? peerAddr diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 6001bcc..b3c963f 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -7,7 +7,7 @@ import WireGuardKit import WireGuardKitGo import os -let sharedLogger = Logger(subsystem: "com.netlab.TurnBridge.network-extension", category: "wgtunnel") +let sharedLogger = Logger(subsystem: "com.truvvor.turnbridge.network-extension", category: "wgtunnel") enum PacketTunnelProviderError: String, Error { case invalidProtocolConfiguration diff --git a/network-extension/network_extension.entitlements b/network-extension/network_extension.entitlements index afcf562..12515c8 100644 --- a/network-extension/network_extension.entitlements +++ b/network-extension/network_extension.entitlements @@ -8,7 +8,7 @@ com.apple.security.application-groups - group.com.netlab.TurnBridge + group.com.truvvor.turnbridge From 579cc83832d6a57dc9b043611a16bda569cf3a0b Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 05:21:16 +0000 Subject: [PATCH 006/106] ci: re-run TestFlight workflow after App Group registration https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY From fb69bfb65b06d70161307aaed8d7c7700178cab3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 05:28:52 +0000 Subject: [PATCH 007/106] ci: unlock login keychain and set partition list before signing Self-hosted runner hits errSecInternalComponent during CodeSign because the login keychain is locked / the API-issued cert lacks codesign in its partition list. Add a step that, given MAC_KEYCHAIN_PASSWORD, unlocks the keychain and grants apple-tool/apple/codesign access to private keys. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 62c2e5f..61d7364 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -118,6 +118,34 @@ jobs: done [ $missing -eq 0 ] + - name: Unlock login keychain for codesign + env: + MAC_KEYCHAIN_PASSWORD: ${{ secrets.MAC_KEYCHAIN_PASSWORD }} + run: | + set -eu + if [ -z "${MAC_KEYCHAIN_PASSWORD:-}" ]; then + echo "::error::MAC_KEYCHAIN_PASSWORD secret is not set" + echo "::error::Add the macOS login password of the runner user '$USER' as repo secret MAC_KEYCHAIN_PASSWORD" + exit 1 + fi + KEYCHAIN_PATH="$HOME/Library/Keychains/login.keychain-db" + if [ ! -f "$KEYCHAIN_PATH" ]; then + # Fallback: take the first user keychain that exists + KEYCHAIN_PATH=$(security list-keychains -d user | sed -e 's/^[[:space:]]*"//' -e 's/"$//' | head -n1) + fi + echo "Using keychain: $KEYCHAIN_PATH" + # Keep it unlocked for the duration of the build + security set-keychain-settings -lut 21600 "$KEYCHAIN_PATH" + security unlock-keychain -p "$MAC_KEYCHAIN_PASSWORD" "$KEYCHAIN_PATH" + # Allow codesign / productbuild / other Apple tools to use private keys + # without an interactive UI prompt + security set-key-partition-list \ + -S apple-tool:,apple:,codesign: \ + -s -k "$MAC_KEYCHAIN_PASSWORD" \ + "$KEYCHAIN_PATH" >/dev/null + # Make sure it is in the user search list + security list-keychains -d user -s "$KEYCHAIN_PATH" + - name: Resolve Swift package dependencies run: | set -euxo pipefail From b2809dc63fef513d8a9df8d163d5e37bafb66870 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 05:35:02 +0000 Subject: [PATCH 008/106] ci: re-run TestFlight workflow after MAC_KEYCHAIN_PASSWORD secret added https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY From 446a4f02852680024af1fcd6d6e444b7047cea37 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 05:40:50 +0000 Subject: [PATCH 009/106] ci: make keychain-unlock step robust and self-diagnostic - Unlock the keychain before any other security op so a locked state can no longer 36 the whole step. - Treat set-key-partition-list and set-keychain-settings as non-fatal so benign warnings on a fresh keychain don't abort the build. - Append the login keychain to the user search list instead of replacing it (System.keychain etc. stayed in the list this way). - Surface per-command rc and a clearer error message when the password truly does not match the keychain. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 48 +++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 61d7364..1c0748a 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -122,29 +122,57 @@ jobs: env: MAC_KEYCHAIN_PASSWORD: ${{ secrets.MAC_KEYCHAIN_PASSWORD }} run: | - set -eu + set -u if [ -z "${MAC_KEYCHAIN_PASSWORD:-}" ]; then echo "::error::MAC_KEYCHAIN_PASSWORD secret is not set" echo "::error::Add the macOS login password of the runner user '$USER' as repo secret MAC_KEYCHAIN_PASSWORD" exit 1 fi + KEYCHAIN_PATH="$HOME/Library/Keychains/login.keychain-db" if [ ! -f "$KEYCHAIN_PATH" ]; then - # Fallback: take the first user keychain that exists KEYCHAIN_PATH=$(security list-keychains -d user | sed -e 's/^[[:space:]]*"//' -e 's/"$//' | head -n1) fi echo "Using keychain: $KEYCHAIN_PATH" - # Keep it unlocked for the duration of the build - security set-keychain-settings -lut 21600 "$KEYCHAIN_PATH" + echo "Current user search list:" + security list-keychains -d user + + # 1. Unlock first — every other security op needs an unlocked keychain security unlock-keychain -p "$MAC_KEYCHAIN_PASSWORD" "$KEYCHAIN_PATH" - # Allow codesign / productbuild / other Apple tools to use private keys - # without an interactive UI prompt + rc=$? + echo "unlock-keychain rc=$rc" + if [ $rc -ne 0 ]; then + echo "::error::Failed to unlock keychain. The MAC_KEYCHAIN_PASSWORD secret does not match the password of $KEYCHAIN_PATH" + echo "::error::Verify by running on the runner: security unlock-keychain $KEYCHAIN_PATH (interactive). If that succeeds, refresh the secret value." + exit $rc + fi + + # 2. Keep the keychain unlocked for the rest of the build (6h) + security set-keychain-settings -lut 21600 "$KEYCHAIN_PATH" || \ + echo "::warning::set-keychain-settings rc=$? (non-fatal)" + + # 3. Make sure the login keychain is in the user search list (additive — do not drop System.keychain etc) + EXISTING=$(security list-keychains -d user | tr -d '"' | xargs) + if ! printf '%s\n' $EXISTING | grep -Fxq "$KEYCHAIN_PATH"; then + security list-keychains -d user -s $EXISTING "$KEYCHAIN_PATH" + echo "Added $KEYCHAIN_PATH to user search list" + fi + security default-keychain -d user -s "$KEYCHAIN_PATH" || true + + # 4. Allow codesign / productbuild / other Apple tools to access private keys without UI prompts + set +e security set-key-partition-list \ - -S apple-tool:,apple:,codesign: \ + -S apple-tool:,apple:,codesign:,productbuild: \ -s -k "$MAC_KEYCHAIN_PASSWORD" \ - "$KEYCHAIN_PATH" >/dev/null - # Make sure it is in the user search list - security list-keychains -d user -s "$KEYCHAIN_PATH" + "$KEYCHAIN_PATH" >/dev/null 2>&1 + rc=$? + set -e + echo "set-key-partition-list rc=$rc" + if [ $rc -ne 0 ]; then + echo "::warning::set-key-partition-list rc=$rc — codesign may still prompt; continuing" + fi + + echo "Keychain ready" - name: Resolve Swift package dependencies run: | From 31802a6d5ac10cfbf0581edade76b0fc79eecd37 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 05:50:16 +0000 Subject: [PATCH 010/106] ci: re-run TestFlight workflow with new ASC API key (cloud distribution) https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY From 9f0bef1227bb0d88ad51af9b944665cb6a277c8c Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 05:58:27 +0000 Subject: [PATCH 011/106] ci: provision Distribution cert/profiles via ASC API and use manual export signing Cloud Managed App Distribution isn't available on the team's API key, so xcodebuild -exportArchive with automatic signing fails to create an App Store profile. Switch to manual signing for export and create the underlying assets via plain ASC API calls (which work with App Manager role) before each build: * script/ci_setup_signing.rb - reuses or creates an iOS Distribution cert via /v1/certificates, importing the resulting p12 into the login keychain; - reuses or creates IOS_APP_STORE provisioning profiles for the main app and extension via /v1/profiles, dropping their .mobileprovision into ~/Library/MobileDevice/Provisioning Profiles; - publishes profile names to GITHUB_ENV. * workflow - new step "Ensure Distribution cert and App Store profiles" runs the script before resolving SPM deps; - exportOptions.plist now uses signingStyle=manual with the profile names from the script and signingCertificate=Apple Distribution; - Export IPA no longer needs -allowProvisioningUpdates / API key flags since signing assets are pre-provisioned. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 38 ++++-- script/ci_setup_signing.rb | 217 +++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 10 deletions(-) create mode 100644 script/ci_setup_signing.rb diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 1c0748a..239c268 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -174,6 +174,21 @@ jobs: echo "Keychain ready" + - name: Ensure Distribution cert and App Store profiles + env: + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} + ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} + MAC_KEYCHAIN_PASSWORD: ${{ secrets.MAC_KEYCHAIN_PASSWORD }} + run: | + set -euxo pipefail + if ! gem list -i fastlane >/dev/null 2>&1; then + gem install --user-install fastlane --no-document + fi + USER_GEM_BIN="$(ruby -r rubygems -e 'puts Gem.user_dir')/bin" + export PATH="$USER_GEM_BIN:$PATH" + ruby script/ci_setup_signing.rb + - name: Resolve Swift package dependencies run: | set -euxo pipefail @@ -235,9 +250,18 @@ jobs: destination export signingStyle - automatic + manual teamID ${APPLE_TEAM_ID} + signingCertificate + Apple Distribution + provisioningProfiles + + ${APP_BUNDLE_ID} + ${PROFILE_APP_NAME} + ${EXT_BUNDLE_ID} + ${PROFILE_EXT_NAME} + stripSwiftSymbols uploadSymbols @@ -245,12 +269,10 @@ jobs: PLIST + echo "exportOptions.plist:" + cat build/exportOptions.plist - name: Export IPA - env: - APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} - ASC_KEY_ID: ${{ secrets.ASC_KEY_ID }} - ASC_ISSUER_ID: ${{ secrets.ASC_ISSUER_ID }} run: | set -euxo pipefail mkdir -p build/logs @@ -258,11 +280,7 @@ jobs: -exportArchive \ -archivePath "$ARCHIVE_PATH" \ -exportOptionsPlist build/exportOptions.plist \ - -exportPath "$EXPORT_DIR" \ - -allowProvisioningUpdates \ - -authenticationKeyPath "$ASC_KEY_PATH" \ - -authenticationKeyID "$ASC_KEY_ID" \ - -authenticationKeyIssuerID "$ASC_ISSUER_ID" 2>&1 | tee build/logs/export.log + -exportPath "$EXPORT_DIR" 2>&1 | tee build/logs/export.log ls -la "$EXPORT_DIR" - name: Upload build logs artifact diff --git a/script/ci_setup_signing.rb b/script/ci_setup_signing.rb new file mode 100644 index 0000000..6e8d49c --- /dev/null +++ b/script/ci_setup_signing.rb @@ -0,0 +1,217 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true +# +# Ensures a Distribution certificate and App Store provisioning profiles +# exist for TurnBridge using nothing but an App Store Connect API key. +# +# This is the manual-signing fallback for accounts that don't have +# "Cloud Managed App Distribution" available on their API key (in which +# case xcodebuild -allowProvisioningUpdates can't auto-create distribution +# profiles during -exportArchive). +# +# Idempotent: reuses cert + profiles across runs and only recreates when +# they're missing or expiring within 7 days. +# +# Required env: +# ASC_KEY_ID, ASC_ISSUER_ID, ASC_KEY_PATH +# APPLE_TEAM_ID +# APP_BUNDLE_ID, EXT_BUNDLE_ID +# MAC_KEYCHAIN_PASSWORD +# Optional env: +# KEYCHAIN_PATH (default ~/Library/Keychains/login.keychain-db) +# SIGNING_CACHE_DIR (default ~/.turnbridge_signing) +# +# Writes to $GITHUB_ENV (when present): +# PROFILE_APP_NAME, PROFILE_EXT_NAME + +require 'spaceship' +require 'openssl' +require 'base64' +require 'fileutils' +require 'json' +require 'time' + +KEY_ID = ENV.fetch('ASC_KEY_ID') +ISSUER_ID = ENV.fetch('ASC_ISSUER_ID') +KEY_FILE = ENV.fetch('ASC_KEY_PATH') +TEAM_ID = ENV.fetch('APPLE_TEAM_ID') +APP_BID = ENV.fetch('APP_BUNDLE_ID') +EXT_BID = ENV.fetch('EXT_BUNDLE_ID') +KEYCHAIN = ENV['KEYCHAIN_PATH'] || "#{ENV['HOME']}/Library/Keychains/login.keychain-db" +KEYCHAIN_PASS = ENV.fetch('MAC_KEYCHAIN_PASSWORD') +CACHE_DIR = ENV['SIGNING_CACHE_DIR'] || "#{ENV['HOME']}/.turnbridge_signing" +PROFILES_DIR = "#{ENV['HOME']}/Library/MobileDevice/Provisioning Profiles" + +CERT_KEY_PEM = File.join(CACHE_DIR, 'distribution.key') +CERT_CER_PEM = File.join(CACHE_DIR, 'distribution.cer.pem') +CERT_P12 = File.join(CACHE_DIR, 'distribution.p12') +P12_PASSWORD = 'TurnBridgeCI' + +PROFILE_NAMES = { + APP_BID => 'TurnBridge AppStore CI', + EXT_BID => 'TurnBridge Ext AppStore CI' +}.freeze + +FileUtils.mkdir_p(CACHE_DIR) +FileUtils.mkdir_p(PROFILES_DIR) + +# Authenticate +Spaceship::ConnectAPI.token = Spaceship::ConnectAPI::Token.create( + key_id: KEY_ID, + issuer_id: ISSUER_ID, + filepath: KEY_FILE +) + +def import_p12!(p12_path) + ok = system('security', 'import', p12_path, + '-k', KEYCHAIN, + '-P', P12_PASSWORD, + '-T', '/usr/bin/codesign', + '-T', '/usr/bin/productbuild', + '-A') + abort 'security import failed' unless ok + # Idempotent: ignore "already exists" by relying on -A and partition list refresh. + system('security', 'set-key-partition-list', + '-S', 'apple-tool:,apple:,codesign:,productbuild:', + '-s', '-k', KEYCHAIN_PASS, KEYCHAIN) +end + +def cert_matches_key?(api_cert_content_b64, priv_pem_path) + return false unless File.exist?(priv_pem_path) + cer_der = Base64.decode64(api_cert_content_b64) + x509 = OpenSSL::X509::Certificate.new(cer_der) + priv = OpenSSL::PKey::RSA.new(File.read(priv_pem_path)) + x509.public_key.to_pem == priv.public_key.to_pem +rescue StandardError + false +end + +# 1. Distribution cert ---------------------------------------------------- + +cert_resource_id = nil + +api_certs = Spaceship::ConnectAPI::Certificate.all(filter: { certificateType: 'IOS_DISTRIBUTION' }) +api_certs.each do |c| + next unless cert_matches_key?(c.certificate_content, CERT_KEY_PEM) + cert_resource_id = c.id + puts "Reusing Distribution cert #{c.id} (matches cached key)" + break +end + +if cert_resource_id.nil? + puts 'Creating new Distribution certificate via ASC API' + + priv = OpenSSL::PKey::RSA.new(2048) + csr = OpenSSL::X509::Request.new + csr.subject = OpenSSL::X509::Name.new([['CN', 'TurnBridge Distribution']]) + csr.public_key = priv.public_key + csr.sign(priv, OpenSSL::Digest::SHA256.new) + csr_b64 = Base64.strict_encode64(csr.to_der) + + begin + cert = Spaceship::ConnectAPI::Certificate.create( + certificate_type: 'IOS_DISTRIBUTION', + csr_content: csr_b64 + ) + rescue StandardError => e + msg = e.message.to_s + if msg.match?(/maximum number/i) || msg.match?(/quota/i) + puts 'Hit Apple distribution-cert limit; revoking oldest existing one' + victim = api_certs.min_by do |c| + Time.parse(c.expiration_date) rescue Time.now + 365 * 86_400 + end + victim&.delete! + cert = Spaceship::ConnectAPI::Certificate.create( + certificate_type: 'IOS_DISTRIBUTION', + csr_content: csr_b64 + ) + else + raise + end + end + + cert_resource_id = cert.id + cer_der = Base64.decode64(cert.certificate_content) + cer_pem = OpenSSL::X509::Certificate.new(cer_der).to_pem + + File.write(CERT_KEY_PEM, priv.to_pem) + File.write(CERT_CER_PEM, cer_pem) + + p12 = OpenSSL::PKCS12.create(P12_PASSWORD, 'Apple Distribution', priv, + OpenSSL::X509::Certificate.new(cer_der)) + File.binwrite(CERT_P12, p12.to_der) + + import_p12!(CERT_P12) + puts "Distribution cert ready: #{cert_resource_id}" +else + # Cert exists in API and matches our cached key — make sure keychain has it + if File.exist?(CERT_P12) + import_p12!(CERT_P12) + else + priv = OpenSSL::PKey::RSA.new(File.read(CERT_KEY_PEM)) + cer = OpenSSL::X509::Certificate.new(File.read(CERT_CER_PEM)) + p12 = OpenSSL::PKCS12.create(P12_PASSWORD, 'Apple Distribution', priv, cer) + File.binwrite(CERT_P12, p12.to_der) + import_p12!(CERT_P12) + end +end + +# 2. App Store profiles --------------------------------------------------- + +bundles_by_id = {} +Spaceship::ConnectAPI::BundleId.all.each { |b| bundles_by_id[b.identifier] = b } + +results = {} +PROFILE_NAMES.each do |bid, name| + bundle = bundles_by_id[bid] || abort("Bundle ID #{bid} is not registered") + + profile = Spaceship::ConnectAPI::Profile.all(filter: { name: name }).first + + recreate = profile.nil? + if profile + cert_ids = (profile.certificates.map(&:id) rescue []) + unless cert_ids.include?(cert_resource_id) + puts "Profile '#{name}' references different cert; recreating" + recreate = true + end + if !recreate && profile.expiration_date && Time.parse(profile.expiration_date) < Time.now + 7 * 86_400 + puts "Profile '#{name}' expires within 7 days; recreating" + recreate = true + end + end + + if recreate + profile&.delete! + profile = Spaceship::ConnectAPI::Profile.create( + bundle_id_id: bundle.id, + name: name, + profile_type: 'IOS_APP_STORE', + certificate_ids: [cert_resource_id] + ) + puts "Created profile '#{name}' (#{profile.id})" + else + puts "Reusing profile '#{name}' (#{profile.id})" + end + + prof_data = Base64.decode64(profile.profile_content) + uuid = prof_data[%r{UUID\s*([^<]+)}, 1] + abort "Could not parse UUID from profile #{name}" unless uuid + + dest = File.join(PROFILES_DIR, "#{uuid}.mobileprovision") + File.binwrite(dest, prof_data) + puts "Saved #{dest}" + + results[bid] = { name: name, uuid: uuid, profile_id: profile.id } +end + +# 3. Hand profile names back to the workflow ----------------------------- + +if (gh_env = ENV['GITHUB_ENV']) + File.open(gh_env, 'a') do |f| + f.puts "PROFILE_APP_NAME=#{results[APP_BID][:name]}" + f.puts "PROFILE_EXT_NAME=#{results[EXT_BID][:name]}" + end +end + +puts JSON.pretty_generate(results) +puts 'Signing setup complete' From 91ffb847856185c385e0b8a8f1b37c828d28f004 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 06:10:26 +0000 Subject: [PATCH 012/106] ci: drop fastlane/spaceship dependency in signing setup System Ruby on the runner is 2.6 and 'gem install fastlane' fails (its domain_name dep requires Ruby >= 2.7), so the script is now pure Ruby stdlib + OpenSSL: hand-built ES256 JWT, Net::HTTP calls to api.appstoreconnect.apple.com. No external gems, ~30 seconds instead of 10+ minutes. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 6 +- script/ci_setup_signing.rb | 275 +++++++++++++++++++++---------- 2 files changed, 190 insertions(+), 91 deletions(-) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 239c268..28214c2 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -182,11 +182,7 @@ jobs: MAC_KEYCHAIN_PASSWORD: ${{ secrets.MAC_KEYCHAIN_PASSWORD }} run: | set -euxo pipefail - if ! gem list -i fastlane >/dev/null 2>&1; then - gem install --user-install fastlane --no-document - fi - USER_GEM_BIN="$(ruby -r rubygems -e 'puts Gem.user_dir')/bin" - export PATH="$USER_GEM_BIN:$PATH" + ruby --version ruby script/ci_setup_signing.rb - name: Resolve Swift package dependencies diff --git a/script/ci_setup_signing.rb b/script/ci_setup_signing.rb index 6e8d49c..5d3e1a8 100644 --- a/script/ci_setup_signing.rb +++ b/script/ci_setup_signing.rb @@ -3,14 +3,11 @@ # # Ensures a Distribution certificate and App Store provisioning profiles # exist for TurnBridge using nothing but an App Store Connect API key. +# Talks to App Store Connect over plain Net::HTTP + a hand-signed ES256 JWT, +# so no fastlane / spaceship / external gems are required. # -# This is the manual-signing fallback for accounts that don't have -# "Cloud Managed App Distribution" available on their API key (in which -# case xcodebuild -allowProvisioningUpdates can't auto-create distribution -# profiles during -exportArchive). -# -# Idempotent: reuses cert + profiles across runs and only recreates when -# they're missing or expiring within 7 days. +# Idempotent: reuses cert + profiles across runs and only recreates them +# when they're missing or expiring within 7 days. # # Required env: # ASC_KEY_ID, ASC_ISSUER_ID, ASC_KEY_PATH @@ -18,18 +15,20 @@ # APP_BUNDLE_ID, EXT_BUNDLE_ID # MAC_KEYCHAIN_PASSWORD # Optional env: -# KEYCHAIN_PATH (default ~/Library/Keychains/login.keychain-db) -# SIGNING_CACHE_DIR (default ~/.turnbridge_signing) +# KEYCHAIN_PATH (default ~/Library/Keychains/login.keychain-db) +# SIGNING_CACHE_DIR (default ~/.turnbridge_signing) # -# Writes to $GITHUB_ENV (when present): +# Writes to $GITHUB_ENV when present: # PROFILE_APP_NAME, PROFILE_EXT_NAME -require 'spaceship' require 'openssl' require 'base64' -require 'fileutils' require 'json' +require 'net/http' +require 'uri' +require 'fileutils' require 'time' +require 'cgi' KEY_ID = ENV.fetch('ASC_KEY_ID') ISSUER_ID = ENV.fetch('ASC_ISSUER_ID') @@ -55,12 +54,90 @@ FileUtils.mkdir_p(CACHE_DIR) FileUtils.mkdir_p(PROFILES_DIR) -# Authenticate -Spaceship::ConnectAPI.token = Spaceship::ConnectAPI::Token.create( - key_id: KEY_ID, - issuer_id: ISSUER_ID, - filepath: KEY_FILE -) +# ----------------------------------------------------------------------- +# JWT (ES256) — built by hand so we don't need the `jwt` gem. +# ----------------------------------------------------------------------- + +def base64url(bytes) + Base64.urlsafe_encode64(bytes, padding: false) +end + +# Convert an ECDSA DER signature to the raw r || s JOSE encoding. +def der_to_jose(der) + seq = OpenSSL::ASN1.decode(der) + r = seq.value[0].value.to_s(2) + s = seq.value[1].value.to_s(2) + r = r.rjust(32, "\x00".b) + s = s.rjust(32, "\x00".b) + r + s +end + +def asc_jwt + ec = OpenSSL::PKey.read(File.read(KEY_FILE)) + header = JSON.generate('alg' => 'ES256', 'kid' => KEY_ID, 'typ' => 'JWT') + payload = JSON.generate('iss' => ISSUER_ID, + 'exp' => Time.now.to_i + 1200, + 'aud' => 'appstoreconnect-v1') + signing_input = "#{base64url(header)}.#{base64url(payload)}" + der_sig = ec.sign(OpenSSL::Digest.new('SHA256'), signing_input) + "#{signing_input}.#{base64url(der_to_jose(der_sig))}" +end + +JWT_TOKEN = asc_jwt +HOST = 'api.appstoreconnect.apple.com' + +def asc(method, path, body = nil) + uri = URI("https://#{HOST}#{path}") + req = case method + when :get then Net::HTTP::Get.new(uri) + when :post then Net::HTTP::Post.new(uri) + when :patch then Net::HTTP::Patch.new(uri) + when :delete then Net::HTTP::Delete.new(uri) + end + req['Authorization'] = "Bearer #{JWT_TOKEN}" + req['Accept'] = 'application/json' + if body + req['Content-Type'] = 'application/json' + req.body = JSON.generate(body) + end + res = Net::HTTP.start(uri.host, uri.port, use_ssl: true) { |http| http.request(req) } + parsed = res.body && !res.body.empty? ? (JSON.parse(res.body) rescue { 'raw' => res.body }) : nil + [res.code.to_i, parsed] +end + +def asc_ok!(code, body, action) + return if (200..299).include?(code) + msg = (body && body['errors']) ? body['errors'].map { |e| e['detail'] || e['title'] }.join('; ') : body.inspect + abort "#{action} failed: HTTP #{code} #{msg}" +end + +# ----------------------------------------------------------------------- +# Distribution certificate +# ----------------------------------------------------------------------- + +def list_distribution_certificates + page = "/v1/certificates?filter[certificateType]=IOS_DISTRIBUTION&limit=200" + certs = [] + loop do + code, body = asc(:get, page) + asc_ok!(code, body, 'list certificates') + certs.concat(body['data'] || []) + next_link = body.dig('links', 'next') + break unless next_link + page = next_link.sub(/^https:\/\/#{Regexp.escape(HOST)}/, '') + end + certs +end + +def cert_matches_key?(api_cert_b64, priv_pem_path) + return false unless File.exist?(priv_pem_path) + cer_der = Base64.decode64(api_cert_b64) + x509 = OpenSSL::X509::Certificate.new(cer_der) + priv = OpenSSL::PKey::RSA.new(File.read(priv_pem_path)) + x509.public_key.to_pem == priv.public_key.to_pem +rescue StandardError + false +end def import_p12!(p12_path) ok = system('security', 'import', p12_path, @@ -70,31 +147,18 @@ def import_p12!(p12_path) '-T', '/usr/bin/productbuild', '-A') abort 'security import failed' unless ok - # Idempotent: ignore "already exists" by relying on -A and partition list refresh. system('security', 'set-key-partition-list', '-S', 'apple-tool:,apple:,codesign:,productbuild:', '-s', '-k', KEYCHAIN_PASS, KEYCHAIN) end -def cert_matches_key?(api_cert_content_b64, priv_pem_path) - return false unless File.exist?(priv_pem_path) - cer_der = Base64.decode64(api_cert_content_b64) - x509 = OpenSSL::X509::Certificate.new(cer_der) - priv = OpenSSL::PKey::RSA.new(File.read(priv_pem_path)) - x509.public_key.to_pem == priv.public_key.to_pem -rescue StandardError - false -end - -# 1. Distribution cert ---------------------------------------------------- - cert_resource_id = nil - -api_certs = Spaceship::ConnectAPI::Certificate.all(filter: { certificateType: 'IOS_DISTRIBUTION' }) +api_certs = list_distribution_certificates api_certs.each do |c| - next unless cert_matches_key?(c.certificate_content, CERT_KEY_PEM) - cert_resource_id = c.id - puts "Reusing Distribution cert #{c.id} (matches cached key)" + cer_content = c.dig('attributes', 'certificateContent') + next unless cer_content && cert_matches_key?(cer_content, CERT_KEY_PEM) + cert_resource_id = c['id'] + puts "Reusing Distribution cert #{c['id']} (matches cached private key)" break end @@ -105,46 +169,44 @@ def cert_matches_key?(api_cert_content_b64, priv_pem_path) csr = OpenSSL::X509::Request.new csr.subject = OpenSSL::X509::Name.new([['CN', 'TurnBridge Distribution']]) csr.public_key = priv.public_key - csr.sign(priv, OpenSSL::Digest::SHA256.new) + csr.sign(priv, OpenSSL::Digest.new('SHA256')) csr_b64 = Base64.strict_encode64(csr.to_der) - begin - cert = Spaceship::ConnectAPI::Certificate.create( - certificate_type: 'IOS_DISTRIBUTION', - csr_content: csr_b64 - ) - rescue StandardError => e - msg = e.message.to_s - if msg.match?(/maximum number/i) || msg.match?(/quota/i) - puts 'Hit Apple distribution-cert limit; revoking oldest existing one' - victim = api_certs.min_by do |c| - Time.parse(c.expiration_date) rescue Time.now + 365 * 86_400 - end - victim&.delete! - cert = Spaceship::ConnectAPI::Certificate.create( - certificate_type: 'IOS_DISTRIBUTION', - csr_content: csr_b64 - ) - else - raise + body = { + data: { + type: 'certificates', + attributes: { certificateType: 'IOS_DISTRIBUTION', csrContent: csr_b64 } + } + } + code, response = asc(:post, '/v1/certificates', body) + + if code == 409 || (response && response['errors']&.any? { |e| (e['detail'] || '') =~ /maximum number/i }) + puts 'Hit Apple distribution-cert limit; revoking oldest existing one' + victim = api_certs.min_by do |c| + Time.parse(c.dig('attributes', 'expirationDate')) rescue Time.now + 365 * 86_400 end + if victim + d_code, d_body = asc(:delete, "/v1/certificates/#{victim['id']}") + asc_ok!(d_code, d_body, "delete cert #{victim['id']}") + end + code, response = asc(:post, '/v1/certificates', body) end + asc_ok!(code, response, 'create distribution certificate') - cert_resource_id = cert.id - cer_der = Base64.decode64(cert.certificate_content) - cer_pem = OpenSSL::X509::Certificate.new(cer_der).to_pem + data = response['data'] + cert_resource_id = data['id'] + cer_b64 = data.dig('attributes', 'certificateContent') + cer_der = Base64.decode64(cer_b64) + cer_x509 = OpenSSL::X509::Certificate.new(cer_der) File.write(CERT_KEY_PEM, priv.to_pem) - File.write(CERT_CER_PEM, cer_pem) - - p12 = OpenSSL::PKCS12.create(P12_PASSWORD, 'Apple Distribution', priv, - OpenSSL::X509::Certificate.new(cer_der)) + File.write(CERT_CER_PEM, cer_x509.to_pem) + p12 = OpenSSL::PKCS12.create(P12_PASSWORD, 'Apple Distribution', priv, cer_x509) File.binwrite(CERT_P12, p12.to_der) import_p12!(CERT_P12) puts "Distribution cert ready: #{cert_resource_id}" else - # Cert exists in API and matches our cached key — make sure keychain has it if File.exist?(CERT_P12) import_p12!(CERT_P12) else @@ -156,44 +218,83 @@ def cert_matches_key?(api_cert_content_b64, priv_pem_path) end end -# 2. App Store profiles --------------------------------------------------- +# ----------------------------------------------------------------------- +# Bundle IDs lookup +# ----------------------------------------------------------------------- -bundles_by_id = {} -Spaceship::ConnectAPI::BundleId.all.each { |b| bundles_by_id[b.identifier] = b } +def find_bundle_id(identifier) + q = CGI.escape(identifier) + code, body = asc(:get, "/v1/bundleIds?filter[identifier]=#{q}&limit=200") + asc_ok!(code, body, "list bundle ids for #{identifier}") + (body['data'] || []).find { |b| b.dig('attributes', 'identifier') == identifier } +end + +bundles = {} +PROFILE_NAMES.each_key do |bid| + bundle = find_bundle_id(bid) + abort "Bundle ID #{bid} is not registered" unless bundle + bundles[bid] = bundle +end + +# ----------------------------------------------------------------------- +# App Store profiles +# ----------------------------------------------------------------------- + +def find_profile_by_name(name) + q = CGI.escape(name) + code, body = asc(:get, "/v1/profiles?filter[name]=#{q}&include=certificates&limit=200") + asc_ok!(code, body, "find profile #{name}") + (body['data'] || []).first +end results = {} PROFILE_NAMES.each do |bid, name| - bundle = bundles_by_id[bid] || abort("Bundle ID #{bid} is not registered") - - profile = Spaceship::ConnectAPI::Profile.all(filter: { name: name }).first + bundle = bundles[bid] + profile = find_profile_by_name(name) recreate = profile.nil? if profile - cert_ids = (profile.certificates.map(&:id) rescue []) + cert_ids = (profile.dig('relationships', 'certificates', 'data') || []).map { |c| c['id'] } unless cert_ids.include?(cert_resource_id) puts "Profile '#{name}' references different cert; recreating" recreate = true end - if !recreate && profile.expiration_date && Time.parse(profile.expiration_date) < Time.now + 7 * 86_400 - puts "Profile '#{name}' expires within 7 days; recreating" - recreate = true + if !recreate + exp = profile.dig('attributes', 'expirationDate') + if exp && Time.parse(exp) < Time.now + 7 * 86_400 + puts "Profile '#{name}' expires within 7 days; recreating" + recreate = true + end end end if recreate - profile&.delete! - profile = Spaceship::ConnectAPI::Profile.create( - bundle_id_id: bundle.id, - name: name, - profile_type: 'IOS_APP_STORE', - certificate_ids: [cert_resource_id] - ) - puts "Created profile '#{name}' (#{profile.id})" + if profile + code, body = asc(:delete, "/v1/profiles/#{profile['id']}") + asc_ok!(code, body, "delete profile #{profile['id']}") unless code == 404 + end + + create_body = { + data: { + type: 'profiles', + attributes: { name: name, profileType: 'IOS_APP_STORE' }, + relationships: { + bundleId: { data: { type: 'bundleIds', id: bundle['id'] } }, + certificates: { data: [{ type: 'certificates', id: cert_resource_id }] } + } + } + } + code, body = asc(:post, '/v1/profiles', create_body) + asc_ok!(code, body, "create profile #{name}") + profile = body['data'] + puts "Created profile '#{name}' (#{profile['id']})" else - puts "Reusing profile '#{name}' (#{profile.id})" + puts "Reusing profile '#{name}' (#{profile['id']})" end - prof_data = Base64.decode64(profile.profile_content) + prof_b64 = profile.dig('attributes', 'profileContent') + abort "Profile '#{name}' has no content" unless prof_b64 + prof_data = Base64.decode64(prof_b64) uuid = prof_data[%r{UUID\s*([^<]+)}, 1] abort "Could not parse UUID from profile #{name}" unless uuid @@ -201,10 +302,12 @@ def cert_matches_key?(api_cert_content_b64, priv_pem_path) File.binwrite(dest, prof_data) puts "Saved #{dest}" - results[bid] = { name: name, uuid: uuid, profile_id: profile.id } + results[bid] = { name: name, uuid: uuid, profile_id: profile['id'] } end -# 3. Hand profile names back to the workflow ----------------------------- +# ----------------------------------------------------------------------- +# Hand profile names back to the workflow +# ----------------------------------------------------------------------- if (gh_env = ENV['GITHUB_ENV']) File.open(gh_env, 'a') do |f| From fca14f929c8fcce2156c8ff58885202af145669d Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 06:15:56 +0000 Subject: [PATCH 013/106] ci: prefer existing keychain Distribution cert and pin its SHA-1 in export Two issues caused the export step to pick the wrong signing cert: 1. The user already had a working "Apple Distribution: ..." cert in keychain. The previous script ignored it and created an extra cert, leaving keychain with two same-named certs and the App Store profile bound only to the new one. 2. exportOptions.plist used signingCertificate="Apple Distribution", which is ambiguous when two such certs coexist. Fixes: - ci_setup_signing.rb now finds, by SHA-1, an existing keychain Distribution cert that matches an ASC API cert (preferring the latest expiration) and uses it. New cert creation only happens if there is no overlap between keychain and API. - The chosen cert's SHA-1 is exported to GITHUB_ENV as SIGNING_CERT_SHA1. - exportOptions.plist now references that SHA-1 so xcodebuild always picks the cert the profile is bound to. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .github/workflows/testflight.yml | 2 +- script/ci_setup_signing.rb | 66 ++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/.github/workflows/testflight.yml b/.github/workflows/testflight.yml index 28214c2..7f9a5ed 100644 --- a/.github/workflows/testflight.yml +++ b/.github/workflows/testflight.yml @@ -250,7 +250,7 @@ jobs: teamID ${APPLE_TEAM_ID} signingCertificate - Apple Distribution + ${SIGNING_CERT_SHA1} provisioningProfiles ${APP_BUNDLE_ID} diff --git a/script/ci_setup_signing.rb b/script/ci_setup_signing.rb index 5d3e1a8..068f1b6 100644 --- a/script/ci_setup_signing.rb +++ b/script/ci_setup_signing.rb @@ -29,6 +29,7 @@ require 'fileutils' require 'time' require 'cgi' +require 'shellwords' KEY_ID = ENV.fetch('ASC_KEY_ID') ISSUER_ID = ENV.fetch('ASC_ISSUER_ID') @@ -152,14 +153,55 @@ def import_p12!(p12_path) '-s', '-k', KEYCHAIN_PASS, KEYCHAIN) end +def keychain_distribution_sha1s + out = `security find-identity -v -p codesigning #{Shellwords.escape(KEYCHAIN)} 2>/dev/null` + out.lines.grep(/Apple Distribution/i).map { |l| l[/[0-9A-F]{40}/] }.compact.map(&:upcase) +end + cert_resource_id = nil +cert_sha1 = nil api_certs = list_distribution_certificates -api_certs.each do |c| - cer_content = c.dig('attributes', 'certificateContent') - next unless cer_content && cert_matches_key?(cer_content, CERT_KEY_PEM) - cert_resource_id = c['id'] - puts "Reusing Distribution cert #{c['id']} (matches cached private key)" - break + +# Strategy 1: prefer a Distribution cert that already exists in the login +# keychain (i.e. its private key is locally available) AND is registered in +# App Store Connect. This avoids creating duplicates if the user already had +# a working signing identity from a previous Xcode session. +keychain_sha1s = keychain_distribution_sha1s +unless keychain_sha1s.empty? + best = nil + api_certs.each do |c| + cer_b64 = c.dig('attributes', 'certificateContent') + next unless cer_b64 + sha1 = OpenSSL::Digest::SHA1.hexdigest(Base64.decode64(cer_b64)).upcase + next unless keychain_sha1s.include?(sha1) + exp = Time.parse(c.dig('attributes', 'expirationDate')) rescue Time.now + if best.nil? || exp > best[:exp] + best = { cert: c, sha1: sha1, exp: exp } + end + end + + if best + cert_resource_id = best[:cert]['id'] + cert_sha1 = best[:sha1] + cn = OpenSSL::X509::Certificate.new(Base64.decode64(best[:cert].dig('attributes', 'certificateContent'))) + .subject.to_a.find { |f| f[0] == 'CN' }&.[](1) + puts "Using existing keychain Distribution cert (CN=#{cn}, api_id=#{cert_resource_id}, sha1=#{cert_sha1})" + end +end + +# Strategy 2: an earlier run of this script created and cached a cert; reuse if it's still in API. +if cert_resource_id.nil? + api_certs.each do |c| + cer_content = c.dig('attributes', 'certificateContent') + next unless cer_content && cert_matches_key?(cer_content, CERT_KEY_PEM) + cert_resource_id = c['id'] + cert_sha1 = OpenSSL::Digest::SHA1.hexdigest(Base64.decode64(cer_content)).upcase + puts "Reusing API cert that matches cached key: #{c['id']} (sha1=#{cert_sha1})" + if File.exist?(CERT_P12) + import_p12!(CERT_P12) + end + break + end end if cert_resource_id.nil? @@ -198,6 +240,7 @@ def import_p12!(p12_path) cer_b64 = data.dig('attributes', 'certificateContent') cer_der = Base64.decode64(cer_b64) cer_x509 = OpenSSL::X509::Certificate.new(cer_der) + cert_sha1 = OpenSSL::Digest::SHA1.hexdigest(cer_der).upcase File.write(CERT_KEY_PEM, priv.to_pem) File.write(CERT_CER_PEM, cer_x509.to_pem) @@ -206,16 +249,6 @@ def import_p12!(p12_path) import_p12!(CERT_P12) puts "Distribution cert ready: #{cert_resource_id}" -else - if File.exist?(CERT_P12) - import_p12!(CERT_P12) - else - priv = OpenSSL::PKey::RSA.new(File.read(CERT_KEY_PEM)) - cer = OpenSSL::X509::Certificate.new(File.read(CERT_CER_PEM)) - p12 = OpenSSL::PKCS12.create(P12_PASSWORD, 'Apple Distribution', priv, cer) - File.binwrite(CERT_P12, p12.to_der) - import_p12!(CERT_P12) - end end # ----------------------------------------------------------------------- @@ -313,6 +346,7 @@ def find_profile_by_name(name) File.open(gh_env, 'a') do |f| f.puts "PROFILE_APP_NAME=#{results[APP_BID][:name]}" f.puts "PROFILE_EXT_NAME=#{results[EXT_BID][:name]}" + f.puts "SIGNING_CERT_SHA1=#{cert_sha1}" if cert_sha1 end end From 4010d620392b31f8227afbabb6b26412ed517531 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 06:27:06 +0000 Subject: [PATCH 014/106] ci: re-run TestFlight workflow now that the App Store record exists https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY From cf6158a09a1161f0e31b3d3e93902fab197a58dc Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 06:34:09 +0000 Subject: [PATCH 015/106] chore: declare ITSAppUsesNonExemptEncryption=NO so TestFlight skips the prompt WireGuard relies on standard mass-market encryption (ChaCha20-Poly1305, BLAKE2s, Curve25519), which qualifies for the US BIS 5D992 exemption. Self-classifying it via Info.plist means App Store Connect won't ask the export-compliance question for every uploaded build. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge.xcodeproj/project.pbxproj | 2 ++ 1 file changed, 2 insertions(+) diff --git a/TurnBridge.xcodeproj/project.pbxproj b/TurnBridge.xcodeproj/project.pbxproj index f4f458c..f2b4603 100755 --- a/TurnBridge.xcodeproj/project.pbxproj +++ b/TurnBridge.xcodeproj/project.pbxproj @@ -424,6 +424,7 @@ DEVELOPMENT_TEAM = ""; ENABLE_PREVIEWS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_ITSAppUsesNonExemptEncryption = NO; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -458,6 +459,7 @@ DEVELOPMENT_TEAM = ""; ENABLE_PREVIEWS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_ITSAppUsesNonExemptEncryption = NO; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; From 18f99075d0a7bb258858e498f16d69a6a2e54a02 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 05:36:53 +0000 Subject: [PATCH 016/106] feat: opt-in manual captcha mode that hands the VK challenge to the user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When auto-solver gets BOT-flagged by VK there is no way to recover — checkbox returns BOT, slider returns ERROR and the tunnel never gets TURN credentials. Add a Settings toggle ("Solve captcha manually") that, when enabled, bypasses the in-Go auto solver entirely and lets the user complete the official VK NotRobot challenge in a WKWebView sheet in the main app. Plumbing: * WireGuardKitGo / captcha_manual.go - exports TurnBridgeSetManualCaptchaCallback, TurnBridgeSubmitManualCaptchaToken, TurnBridgeCancelManualCaptcha, TurnBridgeSetManualCaptchaMode - requestManualCaptcha blocks on a Go channel until the Swift side delivers a success_token or cancels / times out * WireGuardKitGo / vk_captcha.go - if manual mode is on, solveVkCaptcha skips PoW/checkbox/slider and calls requestManualCaptcha directly * WireGuardKitGo / wireguard.h - declares the new exported functions for the Swift bridging module * network-extension / CaptchaBridge.swift (new) and PacketTunnelProvider.swift - installs the cgo callback, persists pending requests to the App Group UserDefaults, fires a Darwin notification, routes app responses delivered via handleAppMessage back into Go; - reads the toggle from the App Group and configures the Go mode at tunnel start * TurnBridge / ManualCaptchaSetting.swift, GlobalSettingsView.swift - new "Solve captcha manually" Toggle, persisted to the shared UserDefaults so the extension can pick it up * TurnBridge / CaptchaManager.swift, CaptchaWebView.swift, TurnBridgeApp.swift, CaptchaIPC.swift (new) - observes the Darwin notification, surfaces a SwiftUI sheet with a WKWebView pointed at the captcha redirect_uri, injects JS that watches fetch/XHR/postMessage/URL for success_token, then ships the result back to the extension via NETunnelProviderSession.sendProviderMessage. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/CaptchaIPC.swift | 23 ++ TurnBridge/CaptchaManager.swift | 110 ++++++++ TurnBridge/CaptchaWebView.swift | 258 ++++++++++++++++++ TurnBridge/GlobalSettingsView.swift | 16 ++ TurnBridge/ManualCaptchaSetting.swift | 14 + TurnBridge/TurnBridgeApp.swift | 20 ++ network-extension/CaptchaBridge.swift | 105 +++++++ network-extension/PacketTunnelProvider.swift | 11 +- .../Sources/WireGuardKitGo/captcha_manual.go | 162 +++++++++++ .../Sources/WireGuardKitGo/vk_captcha.go | 5 + .../Sources/WireGuardKitGo/wireguard.h | 6 + 11 files changed, 727 insertions(+), 3 deletions(-) create mode 100644 TurnBridge/CaptchaIPC.swift create mode 100644 TurnBridge/CaptchaManager.swift create mode 100644 TurnBridge/CaptchaWebView.swift create mode 100644 TurnBridge/ManualCaptchaSetting.swift create mode 100644 network-extension/CaptchaBridge.swift create mode 100644 wireguard-apple/Sources/WireGuardKitGo/captcha_manual.go diff --git a/TurnBridge/CaptchaIPC.swift b/TurnBridge/CaptchaIPC.swift new file mode 100644 index 0000000..fb0269f --- /dev/null +++ b/TurnBridge/CaptchaIPC.swift @@ -0,0 +1,23 @@ +import Foundation + +/// IPC constants shared with the network extension's CaptchaBridge.swift. +/// Keep these strings in sync between the two targets. +enum CaptchaIPC { + static let appGroupID = "group.com.truvvor.turnbridge" + static let requestUserDefaultsKey = "captcha.pendingRequest" + static let requestDarwinNotification = "com.truvvor.turnbridge.captcha.request" + static let cancelDarwinNotification = "com.truvvor.turnbridge.captcha.cancel" + + struct AppMessage: Codable { + let type: String + let requestId: String + let successToken: String? + let reason: String? + } + + struct PendingRequest: Codable { + let requestId: String + let redirectUri: String + let createdAt: TimeInterval + } +} diff --git a/TurnBridge/CaptchaManager.swift b/TurnBridge/CaptchaManager.swift new file mode 100644 index 0000000..eead6e0 --- /dev/null +++ b/TurnBridge/CaptchaManager.swift @@ -0,0 +1,110 @@ +import Foundation +import NetworkExtension +import UIKit + +@MainActor +final class CaptchaManager: ObservableObject { + + static let shared = CaptchaManager() + + @Published var pending: CaptchaIPC.PendingRequest? + + private var registered = false + + private init() {} + + func start() { + guard !registered else { return } + registered = true + + // Darwin notification fired from the extension. + let name = CaptchaIPC.requestDarwinNotification as CFString + let observer = Unmanaged.passUnretained(self).toOpaque() + CFNotificationCenterAddObserver( + CFNotificationCenterGetDarwinNotifyCenter(), + observer, + { _, observer, _, _, _ in + guard let observer = observer else { return } + let mgr = Unmanaged.fromOpaque(observer).takeUnretainedValue() + Task { @MainActor in mgr.refresh() } + }, + name, + nil, + .deliverImmediately + ) + + // Also refresh on becoming active in case the notification arrived while + // the app was suspended. + NotificationCenter.default.addObserver( + forName: UIApplication.didBecomeActiveNotification, + object: nil, + queue: .main + ) { [weak self] _ in + Task { @MainActor in self?.refresh() } + } + + refresh() + } + + func refresh() { + guard let defaults = UserDefaults(suiteName: CaptchaIPC.appGroupID), + let data = defaults.data(forKey: CaptchaIPC.requestUserDefaultsKey), + let req = try? JSONDecoder().decode(CaptchaIPC.PendingRequest.self, from: data) else { + pending = nil + return + } + // Drop stale requests (5 minutes). + if Date().timeIntervalSince1970 - req.createdAt > 300 { + clearPending() + return + } + if pending?.requestId != req.requestId { + SharedLogger.info("Captcha UI: picked up pending request \(req.requestId)", source: .app) + } + pending = req + } + + func submit(token: String) async { + guard let req = pending else { return } + await sendMessage(.init(type: "captcha_answer", + requestId: req.requestId, + successToken: token, + reason: nil)) + clearPending() + } + + func cancel(reason: String = "user cancelled") async { + guard let req = pending else { return } + await sendMessage(.init(type: "captcha_cancel", + requestId: req.requestId, + successToken: nil, + reason: reason)) + clearPending() + } + + // MARK: - Private + + private func clearPending() { + UserDefaults(suiteName: CaptchaIPC.appGroupID)? + .removeObject(forKey: CaptchaIPC.requestUserDefaultsKey) + pending = nil + } + + private func sendMessage(_ msg: CaptchaIPC.AppMessage) async { + do { + let managers = try await NETunnelProviderManager.loadAllFromPreferences() + guard let session = managers.first?.connection as? NETunnelProviderSession else { + SharedLogger.warning("Captcha UI: no active tunnel session to deliver answer", source: .app) + return + } + let payload = try JSONEncoder().encode(msg) + try session.sendProviderMessage(payload) { reply in + if let reply = reply, let text = String(data: reply, encoding: .utf8) { + SharedLogger.debug("Captcha UI: extension reply \(text)", source: .app) + } + } + } catch { + SharedLogger.error("Captcha UI: failed to deliver answer: \(error.localizedDescription)", source: .app) + } + } +} diff --git a/TurnBridge/CaptchaWebView.swift b/TurnBridge/CaptchaWebView.swift new file mode 100644 index 0000000..eae8353 --- /dev/null +++ b/TurnBridge/CaptchaWebView.swift @@ -0,0 +1,258 @@ +import SwiftUI +import WebKit + +/// Sheet that loads the VK captcha page in a WKWebView, watches XHR / URL +/// activity for a `success_token`, and reports the result back via +/// CaptchaManager. +struct CaptchaWebView: View { + let redirectUri: String + @ObservedObject var manager: CaptchaManager = .shared + @Environment(\.dismiss) private var dismiss + + @State private var status: String = "Solve the VK challenge below" + @State private var didFinish = false + + var body: some View { + NavigationView { + VStack(spacing: 0) { + Text(status) + .font(.footnote) + .foregroundStyle(.secondary) + .padding(.horizontal) + .padding(.vertical, 8) + .frame(maxWidth: .infinity) + .background(Color(.secondarySystemBackground)) + + CaptchaWKWebView( + url: URL(string: redirectUri), + onToken: { token in + guard !didFinish else { return } + didFinish = true + status = "Got token, finishing…" + Task { + await manager.submit(token: token) + dismiss() + } + }, + onStatus: { s in status = s } + ) + } + .navigationTitle("Verify human") + .navigationBarTitleDisplayMode(.inline) + .toolbar { + ToolbarItem(placement: .cancellationAction) { + Button("Cancel") { + guard !didFinish else { return } + didFinish = true + Task { + await manager.cancel() + dismiss() + } + } + } + } + } + } +} + +private struct CaptchaWKWebView: UIViewRepresentable { + let url: URL? + let onToken: (String) -> Void + let onStatus: (String) -> Void + + func makeCoordinator() -> Coordinator { + Coordinator(onToken: onToken, onStatus: onStatus) + } + + func makeUIView(context: Context) -> WKWebView { + let userContent = WKUserContentController() + userContent.add(context.coordinator, name: "captcha") + + let script = WKUserScript( + source: Self.injectedJS, + injectionTime: .atDocumentStart, + forMainFrameOnly: false + ) + userContent.addUserScript(script) + + let config = WKWebViewConfiguration() + config.userContentController = userContent + if #available(iOS 14.0, *) { + config.defaultWebpagePreferences.allowsContentJavaScript = true + } + config.websiteDataStore = .nonPersistent() + + let webView = WKWebView(frame: .zero, configuration: config) + webView.navigationDelegate = context.coordinator + webView.allowsBackForwardNavigationGestures = true + + if let url = url { + webView.load(URLRequest(url: url)) + } + return webView + } + + func updateUIView(_ uiView: WKWebView, context: Context) {} + + final class Coordinator: NSObject, WKNavigationDelegate, WKScriptMessageHandler { + let onToken: (String) -> Void + let onStatus: (String) -> Void + + init(onToken: @escaping (String) -> Void, onStatus: @escaping (String) -> Void) { + self.onToken = onToken + self.onStatus = onStatus + } + + func userContentController(_ userContentController: WKUserContentController, + didReceive message: WKScriptMessage) { + guard message.name == "captcha", + let body = message.body as? [String: Any], + let type = body["type"] as? String else { return } + switch type { + case "success_token": + if let token = body["token"] as? String, !token.isEmpty { + onToken(token) + } + case "status": + if let s = body["text"] as? String { + onStatus(s) + } + default: + break + } + } + + func webView(_ webView: WKWebView, + decidePolicyFor navigationAction: WKNavigationAction, + decisionHandler: @escaping (WKNavigationActionPolicy) -> Void) { + // Watch top-level navigations for `?success_token=...` or + // `#success_token=...` — some flows put it in the URL. + if let url = navigationAction.request.url { + let token = tokenFromURL(url) + if !token.isEmpty { + onToken(token) + decisionHandler(.cancel) + return + } + } + decisionHandler(.allow) + } + + private func tokenFromURL(_ url: URL) -> String { + if let comps = URLComponents(url: url, resolvingAgainstBaseURL: false), + let item = comps.queryItems?.first(where: { $0.name == "success_token" }), + let v = item.value { + return v + } + if let fragment = url.fragment { + for part in fragment.split(separator: "&") { + let kv = part.split(separator: "=", maxSplits: 1).map(String.init) + if kv.count == 2, kv[0] == "success_token" { + return kv[1].removingPercentEncoding ?? kv[1] + } + } + } + return "" + } + } + + // Injected as document-start so we patch fetch/XHR before VK's page code + // gets a chance to fire. Looks for any response from `captchaNotRobot.*` + // that carries `success_token`, and also polls the URL / page text as a + // belt-and-braces fallback. + private static let injectedJS = """ + (function() { + function send(payload) { + try { window.webkit.messageHandlers.captcha.postMessage(payload); } catch (e) {} + } + + function maybeTokenFromText(text) { + if (!text) return null; + try { + const json = JSON.parse(text); + if (json && json.response && json.response.success_token) { + return json.response.success_token; + } + } catch (e) {} + const m = String(text).match(/"success_token"\\s*:\\s*"([^"]+)"/); + return m ? m[1] : null; + } + + // fetch hook + const origFetch = window.fetch; + if (origFetch) { + window.fetch = function(input, init) { + const url = (typeof input === 'string') ? input : (input && input.url) || ''; + const p = origFetch.apply(this, arguments); + if (url && url.indexOf('captchaNotRobot') !== -1) { + p.then(function(res) { + try { + res.clone().text().then(function(text) { + const t = maybeTokenFromText(text); + if (t) send({type:'success_token', token: t}); + }); + } catch (e) {} + }).catch(function() {}); + } + return p; + }; + } + + // XHR hook + const origOpen = XMLHttpRequest.prototype.open; + const origSend = XMLHttpRequest.prototype.send; + XMLHttpRequest.prototype.open = function(method, url) { + this.__cap_url = url; + return origOpen.apply(this, arguments); + }; + XMLHttpRequest.prototype.send = function() { + const xhr = this; + const prev = xhr.onreadystatechange; + xhr.onreadystatechange = function() { + if (xhr.readyState === 4 && xhr.__cap_url && + String(xhr.__cap_url).indexOf('captchaNotRobot') !== -1) { + const t = maybeTokenFromText(xhr.responseText); + if (t) send({type:'success_token', token: t}); + } + if (typeof prev === 'function') return prev.apply(this, arguments); + }; + return origSend.apply(this, arguments); + }; + + // postMessage relay + window.addEventListener('message', function(ev) { + try { + const data = ev.data; + if (data && typeof data === 'object') { + if (data.success_token) send({type:'success_token', token: data.success_token}); + if (data.type === 'captcha_success' && data.token) { + send({type:'success_token', token: data.token}); + } + } else if (typeof data === 'string') { + const t = maybeTokenFromText(data); + if (t) send({type:'success_token', token: t}); + } + } catch (e) {} + }); + + // URL / location polling — sometimes VK reflects token in hash on success. + let lastUrl = ''; + setInterval(function() { + if (location.href !== lastUrl) { + lastUrl = location.href; + try { + const u = new URL(location.href); + let t = u.searchParams.get('success_token'); + if (!t && u.hash) { + const params = new URLSearchParams(u.hash.replace(/^#/, '')); + t = params.get('success_token'); + } + if (t) send({type:'success_token', token: t}); + } catch (e) {} + } + }, 250); + + send({type:'status', text:'Loaded captcha helper'}); + })(); + """ +} diff --git a/TurnBridge/GlobalSettingsView.swift b/TurnBridge/GlobalSettingsView.swift index ddaafa8..dcb5301 100644 --- a/TurnBridge/GlobalSettingsView.swift +++ b/TurnBridge/GlobalSettingsView.swift @@ -5,8 +5,24 @@ struct GlobalSettingsView: View { @AppStorage("excludeCellularServices") private var excludeCellularServices = false @AppStorage("excludeLocalNetworks") private var excludeLocalNetworks = true + @State private var manualCaptcha: Bool = ManualCaptchaSetting.isEnabled + var body: some View { Form { + Section(header: Text("Captcha")) { + Toggle(isOn: $manualCaptcha) { + VStack(alignment: .leading) { + Text("Solve captcha manually") + Text("Skip the auto-solver and show the VK challenge in a browser sheet instead.") + .font(.caption) + .foregroundColor(.secondary) + } + } + .onChange(of: manualCaptcha) { newValue in + ManualCaptchaSetting.isEnabled = newValue + } + } + Section(header: Text("General")) { NavigationLink(destination: AboutView()) { Label( diff --git a/TurnBridge/ManualCaptchaSetting.swift b/TurnBridge/ManualCaptchaSetting.swift new file mode 100644 index 0000000..f9cc140 --- /dev/null +++ b/TurnBridge/ManualCaptchaSetting.swift @@ -0,0 +1,14 @@ +import Foundation + +/// Persistent flag, shared with the network extension via the App Group, +/// that controls whether the VK captcha is solved by the auto solver or by +/// showing the challenge in a WKWebView for the user to solve. +enum ManualCaptchaSetting { + static let key = "manualCaptcha" + private static let suite = "group.com.truvvor.turnbridge" + + static var isEnabled: Bool { + get { UserDefaults(suiteName: suite)?.bool(forKey: key) ?? false } + set { UserDefaults(suiteName: suite)?.set(newValue, forKey: key) } + } +} diff --git a/TurnBridge/TurnBridgeApp.swift b/TurnBridge/TurnBridgeApp.swift index b845a95..eed6598 100755 --- a/TurnBridge/TurnBridgeApp.swift +++ b/TurnBridge/TurnBridgeApp.swift @@ -7,11 +7,31 @@ import NetworkExtension @main struct TurnBridge: App { + @StateObject private var captchaManager = CaptchaManager.shared + var body: some Scene { WindowGroup { ContentView(app: self) + .onAppear { captchaManager.start() } + .sheet(item: Binding( + get: { captchaManager.pending.map(IdentifiedCaptcha.init) }, + set: { newValue in + if newValue == nil { + Task { await captchaManager.cancel(reason: "sheet dismissed") } + } + } + )) { identified in + CaptchaWebView(redirectUri: identified.request.redirectUri, + manager: captchaManager) + .interactiveDismissDisabled() + } } } + + private struct IdentifiedCaptcha: Identifiable { + let request: CaptchaIPC.PendingRequest + var id: String { request.requestId } + } func turnOnTunnel(vkLink: String, peerAddr: String, listenAddr: String, nValue: Int, wgQuickConfig: String, completionHandler: @escaping (Bool) -> Void) { SharedLogger.info("Connecting... peer=\(peerAddr), listen=\(listenAddr), n=\(nValue)") diff --git a/network-extension/CaptchaBridge.swift b/network-extension/CaptchaBridge.swift new file mode 100644 index 0000000..9652643 --- /dev/null +++ b/network-extension/CaptchaBridge.swift @@ -0,0 +1,105 @@ +import Foundation +import WireGuardKitGo + +/// Constants shared with the main app for the manual captcha IPC. +enum CaptchaIPC { + static let appGroupID = "group.com.truvvor.turnbridge" + static let requestUserDefaultsKey = "captcha.pendingRequest" + static let requestDarwinNotification = "com.truvvor.turnbridge.captcha.request" + static let cancelDarwinNotification = "com.truvvor.turnbridge.captcha.cancel" + + /// JSON payload the app sends back via NETunnelProviderSession.sendProviderMessage. + struct AppMessage: Codable { + let type: String // "captcha_answer" | "captcha_cancel" + let requestId: String + let successToken: String? + let reason: String? + } + + /// Persistent payload the extension writes when it needs the app to solve a captcha. + struct PendingRequest: Codable { + let requestId: String + let redirectUri: String + let createdAt: TimeInterval + } +} + +/// Trampoline from cgo into Swift. Note: this runs on a Go goroutine / +/// arbitrary thread, so anything heavy must be dispatched off it. +private let manualCaptchaCCallback: @convention(c) (UnsafePointer?, UnsafePointer?) -> Void = { reqIDPtr, uriPtr in + guard let reqIDPtr = reqIDPtr, let uriPtr = uriPtr else { return } + let reqID = String(cString: reqIDPtr) + let uri = String(cString: uriPtr) + CaptchaBridge.publishRequest(requestId: reqID, redirectUri: uri) +} + +enum CaptchaBridge { + + /// Registered once at tunnel start. + static func install() { + TurnBridgeSetManualCaptchaCallback(manualCaptchaCCallback) + } + + /// Called from the cgo callback. Persists the request in the shared + /// User Defaults so the app can pick it up, then fires a Darwin + /// notification that wakes the app's observer. + fileprivate static func publishRequest(requestId: String, redirectUri: String) { + SharedLogger.info("Manual captcha requested (reqID=\(requestId))", source: .tunnel) + + if let defaults = UserDefaults(suiteName: CaptchaIPC.appGroupID) { + let payload = CaptchaIPC.PendingRequest( + requestId: requestId, + redirectUri: redirectUri, + createdAt: Date().timeIntervalSince1970 + ) + if let data = try? JSONEncoder().encode(payload) { + defaults.set(data, forKey: CaptchaIPC.requestUserDefaultsKey) + } + } + + let name = CFNotificationName(CaptchaIPC.requestDarwinNotification as CFString) + CFNotificationCenterPostNotification( + CFNotificationCenterGetDarwinNotifyCenter(), + name, nil, nil, true + ) + } + + /// Called from NEPacketTunnelProvider.handleAppMessage when the app + /// delivers a result (token or cancel) for an outstanding request. + static func handleAppMessage(_ data: Data) -> Data? { + guard let msg = try? JSONDecoder().decode(CaptchaIPC.AppMessage.self, from: data) else { + SharedLogger.warning("CaptchaBridge: ignoring unparseable app message (\(data.count) bytes)", source: .tunnel) + return nil + } + + switch msg.type { + case "captcha_answer": + let token = msg.successToken ?? "" + msg.requestId.withCString { reqIDC in + token.withCString { tokenC in + TurnBridgeSubmitManualCaptchaToken(reqIDC, tokenC) + } + } + SharedLogger.info("CaptchaBridge: delivered success_token for reqID=\(msg.requestId)", source: .tunnel) + + case "captcha_cancel": + let reason = msg.reason ?? "user cancelled" + msg.requestId.withCString { reqIDC in + reason.withCString { reasonC in + TurnBridgeCancelManualCaptcha(reqIDC, reasonC) + } + } + SharedLogger.info("CaptchaBridge: cancelled reqID=\(msg.requestId) (\(reason))", source: .tunnel) + + default: + return nil + } + + // Clear pending request from shared UserDefaults so the app doesn't + // re-prompt on next launch. + if let defaults = UserDefaults(suiteName: CaptchaIPC.appGroupID) { + defaults.removeObject(forKey: CaptchaIPC.requestUserDefaultsKey) + } + return Data("ok".utf8) + } +} diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index b3c963f..c8c03c7 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -81,6 +81,12 @@ class PacketTunnelProvider: NEPacketTunnelProvider { SharedLogger.info("Starting TURN proxy...", source: .tunnel) ProxySetLogger(nil, goProxyCLoggerCallback) + CaptchaBridge.install() + + let manualCaptchaEnabled = UserDefaults(suiteName: CaptchaIPC.appGroupID)? + .bool(forKey: "manualCaptcha") ?? false + TurnBridgeSetManualCaptchaMode(manualCaptchaEnabled ? 1 : 0) + SharedLogger.info("Captcha mode: \(manualCaptchaEnabled ? "manual (browser sheet)" : "auto (in-tunnel solver)")", source: .tunnel) DispatchQueue.global(qos: .userInteractive).async { StartProxy(vkLink, peerAddr, listenAddr, nValue) @@ -140,9 +146,8 @@ class PacketTunnelProvider: NEPacketTunnelProvider { override func handleAppMessage(_ messageData: Data, completionHandler: ((Data?) -> Void)?) { - if let handler = completionHandler { - handler(messageData) - } + let response = CaptchaBridge.handleAppMessage(messageData) ?? messageData + completionHandler?(response) } override func sleep(completionHandler: @escaping () -> Void) { diff --git a/wireguard-apple/Sources/WireGuardKitGo/captcha_manual.go b/wireguard-apple/Sources/WireGuardKitGo/captcha_manual.go new file mode 100644 index 0000000..ef7736b --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/captcha_manual.go @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: MIT +// +// Manual captcha bridge. Lets the Swift app/extension show a real browser +// (WKWebView) for the VK NotRobot captcha when the auto solver can't beat +// it. Swift registers a single C callback via TurnBridgeSetManualCaptchaCallback; +// when the auto-solver bails, Go invokes that callback with a redirect_uri +// and blocks until Swift answers via TurnBridgeSubmitManualCaptchaToken or +// TurnBridgeCancelManualCaptcha. + +package main + +/* +#include +#include + +typedef void (*manual_captcha_cb)(const char* request_id, const char* redirect_uri); + +static inline void invoke_manual_captcha_cb(manual_captcha_cb cb, + const char* request_id, + const char* redirect_uri) { + cb(request_id, redirect_uri); +} +*/ +import "C" + +import ( + "fmt" + "sync" + "time" + "unsafe" +) + +type manualCaptchaSlot struct { + tokenCh chan string + errCh chan error +} + +var ( + manualCaptchaMu sync.RWMutex + manualCaptchaCB C.manual_captcha_cb + manualCaptchaEnabled bool + manualCaptchaSlotsMu sync.Mutex + manualCaptchaSlots = make(map[string]*manualCaptchaSlot) +) + +//export TurnBridgeSetManualCaptchaMode +func TurnBridgeSetManualCaptchaMode(enabled C.int) { + manualCaptchaMu.Lock() + defer manualCaptchaMu.Unlock() + manualCaptchaEnabled = enabled != 0 +} + +func manualCaptchaForcedMode() bool { + manualCaptchaMu.RLock() + defer manualCaptchaMu.RUnlock() + return manualCaptchaEnabled && manualCaptchaCB != nil +} + +//export TurnBridgeSetManualCaptchaCallback +func TurnBridgeSetManualCaptchaCallback(cb C.manual_captcha_cb) { + manualCaptchaMu.Lock() + defer manualCaptchaMu.Unlock() + manualCaptchaCB = cb +} + +//export TurnBridgeSubmitManualCaptchaToken +func TurnBridgeSubmitManualCaptchaToken(cReqID *C.char, cToken *C.char) { + if cReqID == nil { + return + } + reqID := C.GoString(cReqID) + token := "" + if cToken != nil { + token = C.GoString(cToken) + } + + manualCaptchaSlotsMu.Lock() + slot, ok := manualCaptchaSlots[reqID] + manualCaptchaSlotsMu.Unlock() + if !ok { + return + } + select { + case slot.tokenCh <- token: + default: + } +} + +//export TurnBridgeCancelManualCaptcha +func TurnBridgeCancelManualCaptcha(cReqID *C.char, cReason *C.char) { + if cReqID == nil { + return + } + reqID := C.GoString(cReqID) + reason := "user cancelled" + if cReason != nil { + if s := C.GoString(cReason); s != "" { + reason = s + } + } + + manualCaptchaSlotsMu.Lock() + slot, ok := manualCaptchaSlots[reqID] + manualCaptchaSlotsMu.Unlock() + if !ok { + return + } + select { + case slot.errCh <- fmt.Errorf("%s", reason): + default: + } +} + +// requestManualCaptcha asks the registered handler (the iOS app, via the +// extension) to solve the captcha at redirectURI and return the +// success_token VK assigned. Blocks the caller until Swift responds or +// timeout elapses. +func requestManualCaptcha(redirectURI string, timeout time.Duration) (string, error) { + manualCaptchaMu.RLock() + cb := manualCaptchaCB + manualCaptchaMu.RUnlock() + if cb == nil { + return "", fmt.Errorf("manual captcha handler not registered") + } + if redirectURI == "" { + return "", fmt.Errorf("manual captcha redirect_uri is empty") + } + + reqID := randomHex(8) + slot := &manualCaptchaSlot{ + tokenCh: make(chan string, 1), + errCh: make(chan error, 1), + } + + manualCaptchaSlotsMu.Lock() + manualCaptchaSlots[reqID] = slot + manualCaptchaSlotsMu.Unlock() + defer func() { + manualCaptchaSlotsMu.Lock() + delete(manualCaptchaSlots, reqID) + manualCaptchaSlotsMu.Unlock() + }() + + cReqID := C.CString(reqID) + cURI := C.CString(redirectURI) + defer C.free(unsafe.Pointer(cReqID)) + defer C.free(unsafe.Pointer(cURI)) + + C.invoke_manual_captcha_cb(cb, cReqID, cURI) + + select { + case token := <-slot.tokenCh: + if token == "" { + return "", fmt.Errorf("manual captcha returned empty token") + } + return token, nil + case err := <-slot.errCh: + return "", err + case <-time.After(timeout): + return "", fmt.Errorf("manual captcha timeout after %s", timeout) + } +} diff --git a/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go b/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go index 7192762..a0505d6 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go +++ b/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go @@ -111,6 +111,11 @@ func (e *VkCaptchaError) IsCaptchaError() bool { } func solveVkCaptcha(ctx context.Context, captchaErr *VkCaptchaError) (string, error) { + if manualCaptchaForcedMode() { + log.Printf("[Captcha] Manual mode enabled — handing the challenge to the UI") + return requestManualCaptcha(captchaErr.RedirectUri, 180*time.Second) + } + time.Sleep(time.Duration(1500+mathrand.Intn(1000)) * time.Millisecond) log.Printf("[Captcha] Solving Not Robot Captcha...") diff --git a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h index 222deeb..25a7b12 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h +++ b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h @@ -37,4 +37,10 @@ extern void StopProxy(void); extern void ProxySetLogger(void *context, logger_fn_t logger_fn); extern int ProxyWaitReady(int timeoutMs); +typedef void (*manual_captcha_cb_t)(const char *request_id, const char *redirect_uri); +extern void TurnBridgeSetManualCaptchaCallback(manual_captcha_cb_t cb); +extern void TurnBridgeSubmitManualCaptchaToken(const char *request_id, const char *token); +extern void TurnBridgeCancelManualCaptcha(const char *request_id, const char *reason); +extern void TurnBridgeSetManualCaptchaMode(int enabled); + #endif From bace9be5b83699d0d5315fff50154e6bcc9bd99f Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 05:53:22 +0000 Subject: [PATCH 017/106] fix(captcha): import Combine in the new manager/web view @Published and @ObservedObject require Combine. Without the explicit import, the Swift frontend refused to type-check TurnBridge/CaptchaManager.swift and CaptchaWebView.swift on iOS 26.4 SDK. Also tighten the UIApplication.didBecomeActive observer to capture self locally before crossing into the Task. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/CaptchaManager.swift | 4 +++- TurnBridge/CaptchaWebView.swift | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/TurnBridge/CaptchaManager.swift b/TurnBridge/CaptchaManager.swift index eead6e0..ce0a893 100644 --- a/TurnBridge/CaptchaManager.swift +++ b/TurnBridge/CaptchaManager.swift @@ -1,6 +1,7 @@ import Foundation import NetworkExtension import UIKit +import Combine @MainActor final class CaptchaManager: ObservableObject { @@ -40,7 +41,8 @@ final class CaptchaManager: ObservableObject { object: nil, queue: .main ) { [weak self] _ in - Task { @MainActor in self?.refresh() } + guard let self = self else { return } + Task { @MainActor in self.refresh() } } refresh() diff --git a/TurnBridge/CaptchaWebView.swift b/TurnBridge/CaptchaWebView.swift index eae8353..0154db1 100644 --- a/TurnBridge/CaptchaWebView.swift +++ b/TurnBridge/CaptchaWebView.swift @@ -1,5 +1,6 @@ import SwiftUI import WebKit +import Combine /// Sheet that loads the VK captcha page in a WKWebView, watches XHR / URL /// activity for a `success_token`, and reports the result back via From bee61dfdcdb06e6427cde65bee4b57a5a461c45a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 06:03:21 +0000 Subject: [PATCH 018/106] fix(captcha): give the WebView size, log lifecycle, surface failures The first manual-captcha test showed an empty sheet. Adding explicit .frame(maxWidth:maxHeight:.infinity) on the UIViewRepresentable + an initial UIScreen-sized backing frame keeps WKWebView from collapsing to zero in the SwiftUI layout. Navigation delegate now reports provisional / finish / failure events into both SharedLogger and the status banner, so a blank sheet at least tells us why. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/CaptchaWebView.swift | 39 ++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/TurnBridge/CaptchaWebView.swift b/TurnBridge/CaptchaWebView.swift index 0154db1..247383c 100644 --- a/TurnBridge/CaptchaWebView.swift +++ b/TurnBridge/CaptchaWebView.swift @@ -37,7 +37,9 @@ struct CaptchaWebView: View { }, onStatus: { s in status = s } ) + .frame(maxWidth: .infinity, maxHeight: .infinity) } + .frame(maxWidth: .infinity, maxHeight: .infinity) .navigationTitle("Verify human") .navigationBarTitleDisplayMode(.inline) .toolbar { @@ -52,7 +54,11 @@ struct CaptchaWebView: View { } } } + .onAppear { + SharedLogger.info("Captcha sheet appeared. redirect_uri=\(redirectUri)", source: .app) + } } + .navigationViewStyle(.stack) } } @@ -83,12 +89,20 @@ private struct CaptchaWKWebView: UIViewRepresentable { } config.websiteDataStore = .nonPersistent() - let webView = WKWebView(frame: .zero, configuration: config) + let webView = WKWebView(frame: UIScreen.main.bounds, configuration: config) webView.navigationDelegate = context.coordinator webView.allowsBackForwardNavigationGestures = true + webView.backgroundColor = .systemBackground + webView.scrollView.backgroundColor = .systemBackground + webView.isOpaque = true if let url = url { + SharedLogger.info("CaptchaWebView loading URL: \(url.absoluteString)", source: .app) + onStatus("Loading…") webView.load(URLRequest(url: url)) + } else { + SharedLogger.error("CaptchaWebView: URL is nil — won't load", source: .app) + onStatus("Bad captcha URL") } return webView } @@ -139,6 +153,29 @@ private struct CaptchaWKWebView: UIViewRepresentable { decisionHandler(.allow) } + func webView(_ webView: WKWebView, didStartProvisionalNavigation navigation: WKNavigation!) { + onStatus("Loading captcha…") + } + + func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) { + SharedLogger.info("CaptchaWebView: page finished loading: \(webView.url?.absoluteString ?? "?")", source: .app) + onStatus("Solve the VK challenge below") + } + + func webView(_ webView: WKWebView, + didFail navigation: WKNavigation!, + withError error: Error) { + SharedLogger.error("CaptchaWebView: navigation failed: \(error.localizedDescription)", source: .app) + onStatus("Failed: \(error.localizedDescription)") + } + + func webView(_ webView: WKWebView, + didFailProvisionalNavigation navigation: WKNavigation!, + withError error: Error) { + SharedLogger.error("CaptchaWebView: provisional navigation failed: \(error.localizedDescription)", source: .app) + onStatus("Failed: \(error.localizedDescription)") + } + private func tokenFromURL(_ url: URL) -> String { if let comps = URLComponents(url: url, resolvingAgainstBaseURL: false), let item = comps.queryItems?.first(where: { $0.name == "success_token" }), From c48c3bf534b6dd74d17d123e196f7f39c59a24cb Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 06:18:45 +0000 Subject: [PATCH 019/106] fix(captcha): drop includeAllNetworks while manual captcha is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WKWebView in the host app cannot reach id.vk.ru while includeAllNetworks is true and the tunnel is still in the Connecting phase — iOS strictly blocks all egress, so the captcha challenge fails with "internet appears to be offline" and the tunnel can never make forward progress. Trade kill-switch for captcha solvability when manual mode is on. Auto mode keeps the previous behaviour. The Settings caption now explains the trade-off. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/GlobalSettingsView.swift | 2 +- TurnBridge/TurnBridgeApp.swift | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/TurnBridge/GlobalSettingsView.swift b/TurnBridge/GlobalSettingsView.swift index dcb5301..97105b1 100644 --- a/TurnBridge/GlobalSettingsView.swift +++ b/TurnBridge/GlobalSettingsView.swift @@ -13,7 +13,7 @@ struct GlobalSettingsView: View { Toggle(isOn: $manualCaptcha) { VStack(alignment: .leading) { Text("Solve captcha manually") - Text("Skip the auto-solver and show the VK challenge in a browser sheet instead.") + Text("Show the VK challenge in a browser sheet instead of running the auto solver. Disables the kill switch (includeAllNetworks) for the session — required so the captcha page can load while the tunnel is still coming up.") .font(.caption) .foregroundColor(.secondary) } diff --git a/TurnBridge/TurnBridgeApp.swift b/TurnBridge/TurnBridgeApp.swift index eed6598..520642c 100755 --- a/TurnBridge/TurnBridgeApp.swift +++ b/TurnBridge/TurnBridgeApp.swift @@ -68,12 +68,19 @@ struct TurnBridge: App { let excludeCellular = defaults.object(forKey: "excludeCellularServices") as? Bool ?? false let excludeLAN = defaults.object(forKey: "excludeLocalNetworks") as? Bool ?? true - protocolConfiguration.includeAllNetworks = true + // Manual captcha mode needs the captcha web view in the main app + // to actually reach the internet *while the tunnel is still + // coming up*. iOS enforces includeAllNetworks strictly during the + // Connecting phase too, so leaving it on means the WebView can + // never load id.vk.ru to ask the user. Trade kill-switch for + // captcha solvability when this mode is on. + let manualCaptcha = ManualCaptchaSetting.isEnabled + protocolConfiguration.includeAllNetworks = !manualCaptcha protocolConfiguration.excludeAPNs = excludeAPNs protocolConfiguration.excludeCellularServices = excludeCellular protocolConfiguration.excludeLocalNetworks = excludeLAN - SharedLogger.debug("Routing: LAN=\(excludeLAN), APNs=\(excludeAPNs), Cellular=\(excludeCellular)") + SharedLogger.debug("Routing: includeAll=\(!manualCaptcha) (manualCaptcha=\(manualCaptcha)), LAN=\(excludeLAN), APNs=\(excludeAPNs), Cellular=\(excludeCellular)") tunnelManager.protocolConfiguration = protocolConfiguration tunnelManager.isEnabled = true From 6f1b2be7ea5706e8410b24604f9ca87b05574692 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 06:42:05 +0000 Subject: [PATCH 020/106] fix(captcha): give DTLS 5 min, not 12s, while manual captcha is on The user-driven captcha sheet routinely takes longer than 12s to complete (page load + human interaction), but ProxyWaitReady was hard-coded to a 12s deadline. The tunnel was killing itself mid-prompt even after the WebView successfully returned a success_token. When manual captcha mode is enabled, extend the DTLS readiness timeout to 300s so the human has room to work. Auto mode keeps the original budget so a stuck auto-solver still fails fast. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index c8c03c7..89fb785 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -88,17 +88,23 @@ class PacketTunnelProvider: NEPacketTunnelProvider { TurnBridgeSetManualCaptchaMode(manualCaptchaEnabled ? 1 : 0) SharedLogger.info("Captcha mode: \(manualCaptchaEnabled ? "manual (browser sheet)" : "auto (in-tunnel solver)")", source: .tunnel) + // Manual captcha is human-driven, so give the user time to actually + // solve the challenge before declaring DTLS dead. Auto mode keeps + // the original 12s budget — if the solver can't bash through in + // that window something else is wrong and we want fast failure. + let dtlsReadyTimeoutMs: Int32 = manualCaptchaEnabled ? 300_000 : 12_000 + DispatchQueue.global(qos: .userInteractive).async { StartProxy(vkLink, peerAddr, listenAddr, nValue) } DispatchQueue.global(qos: .userInteractive).async { [weak self] in - let ready = ProxyWaitReady(12000) + let ready = ProxyWaitReady(dtlsReadyTimeoutMs) guard let self = self else { return } if ready == 0 { sharedLogger.error("DTLS connection timeout!") - SharedLogger.error("DTLS connection timeout (12s)", source: .tunnel) + SharedLogger.error("DTLS connection timeout (\(dtlsReadyTimeoutMs / 1000)s)", source: .tunnel) completionHandler(PacketTunnelProviderError.invalidProtocolConfiguration) return } From 5a621bdfb7c156b53109172711f9e9c1639bd84f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 10:08:30 +0000 Subject: [PATCH 021/106] fix(turn): keep DTLS/TURN alive across backgrounding and network changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user-visible problem: after ~5 minutes the VK TURN allocation expires on the server, but iOS / pion-turn / the proxy loop don't notice fast enough. The UI still says Connected while WG packets fall into a black hole because the inner DTLS channel is dead and nobody told anyone. Six fixes, end-to-end: 1. **Active watchdog** (`turn_proxy.go`): Every 15s `oneDtlsConnection` checks when it last read a byte from `dtlsConn`. If silence > 60s it cancels `dtlsctx`, which causes the outer loop to spin a fresh DTLS connection on the same listenConn with cached TURN creds (no captcha). 2. **DTLS keepalive**: WireGuard already does this via `PersistentKeepalive=25`, so no extra writes from our side — the watchdog is what makes those keepalives actually count. 3. **`poolCreds` cache stays warm** (`turn_proxy.go`): `cTime` is now refreshed on every reuse, not just when the pool grows. A long-lived session can no longer evict its own credentials mid-flight and force a fresh captcha on the next reconnect. 4. **`RestartProxy` cgo export** (`turn_restart.go`, `wireguard.h`): New global registry of active `dtlsCancel` funcs; `RestartProxy()` iterates and cancels them all. Swift uses this from `NEPacketTunnelProvider.wake()`. 5. **`NWPathMonitor` in the extension** (`PacketTunnelProvider.swift`): Started after the WG adapter is up. On every path change after the initial one we call `restartTransport(...)`, which is debounced to 5s so a wake + path-change burst only restarts once. 6. **Transport-health banner**: - `network-extension/TransportHealthMonitor.swift` watches the Go log stream from the existing logger callback and writes `transport.lastAliveAt` / `lastDeadAt` into the App Group UserDefaults. - `TurnBridge/TransportHealthBanner.swift` + `ContentView` poll every 5s and surface an orange "Connection unstable" strip when iOS still says NEVPNStatus=.connected but the TURN side has been silent or failed within the last 30s. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/ContentView.swift | 13 +++- TurnBridge/TransportHealthBanner.swift | 67 ++++++++++++++++++ network-extension/PacketTunnelProvider.swift | 58 +++++++++++++++- .../TransportHealthMonitor.swift | 53 +++++++++++++++ .../Sources/WireGuardKitGo/turn_proxy.go | 68 +++++++++++++++++++ .../Sources/WireGuardKitGo/turn_restart.go | 24 +++++++ .../Sources/WireGuardKitGo/wireguard.h | 1 + 7 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 TurnBridge/TransportHealthBanner.swift create mode 100644 network-extension/TransportHealthMonitor.swift create mode 100644 wireguard-apple/Sources/WireGuardKitGo/turn_restart.go diff --git a/TurnBridge/ContentView.swift b/TurnBridge/ContentView.swift index 55d1788..51c68e0 100755 --- a/TurnBridge/ContentView.swift +++ b/TurnBridge/ContentView.swift @@ -11,6 +11,7 @@ struct ContentView: View { var app: TurnBridge @State private var vpnStatus: NEVPNStatus = .disconnected + @StateObject private var transportHealth = TransportHealthState() @StateObject private var store = ProfileStore() @State private var showImportModal = false @@ -47,6 +48,12 @@ struct ContentView: View { .disabled(vpnStatus != .disconnected) } + if vpnStatus == .connected { + TransportHealthBanner(isStalled: transportHealth.isStalled) + .padding(.top, 8) + .animation(.easeInOut, value: transportHealth.isStalled) + } + Spacer() VStack(spacing: 50) { @@ -118,7 +125,11 @@ struct ContentView: View { SettingsView(store: store, profileID: sheet.profileID, isNewProfile: sheet.isNew) } } - .onAppear(perform: checkInitialStatus) + .onAppear { + checkInitialStatus() + transportHealth.start() + } + .onDisappear { transportHealth.stop() } .onReceive(NotificationCenter.default.publisher(for: .NEVPNStatusDidChange)) { notification in if let connection = notification.object as? NEVPNConnection { let newStatus = connection.status diff --git a/TurnBridge/TransportHealthBanner.swift b/TurnBridge/TransportHealthBanner.swift new file mode 100644 index 0000000..db6d3b5 --- /dev/null +++ b/TurnBridge/TransportHealthBanner.swift @@ -0,0 +1,67 @@ +import SwiftUI +import Combine + +/// Polls the App Group flags written by TransportHealthMonitor in the +/// extension. When the VPN status is `.connected` but the underlying +/// DTLS/TURN cycle has been silent / failed recently, surface a banner +/// so the user knows iOS lies about the connection being healthy. +@MainActor +final class TransportHealthState: ObservableObject { + @Published private(set) var isStalled = false + + private var timer: AnyCancellable? + + func start() { + guard timer == nil else { return } + refresh() + timer = Timer.publish(every: 5, on: .main, in: .common) + .autoconnect() + .sink { [weak self] _ in self?.refresh() } + } + + func stop() { + timer?.cancel() + timer = nil + isStalled = false + } + + private func refresh() { + guard let defaults = UserDefaults(suiteName: "group.com.truvvor.turnbridge") else { + isStalled = false + return + } + let alive = defaults.object(forKey: "transport.lastAliveAt") as? Date + let dead = defaults.object(forKey: "transport.lastDeadAt") as? Date + guard let alive = alive else { + // Never alive yet → not necessarily stalled (still connecting). + isStalled = false + return + } + let now = Date() + let lastDeadAfterAlive = (dead.map { $0 > alive } ?? false) + let aliveStale = now.timeIntervalSince(alive) > 30 + isStalled = lastDeadAfterAlive || aliveStale + } +} + +struct TransportHealthBanner: View { + let isStalled: Bool + + var body: some View { + if isStalled { + HStack(spacing: 8) { + Image(systemName: "exclamationmark.triangle.fill") + Text("Connection unstable — TURN tunnel reconnecting") + .font(.footnote) + Spacer() + } + .foregroundColor(.white) + .padding(.horizontal, 12) + .padding(.vertical, 8) + .background(Color.orange) + .cornerRadius(10) + .padding(.horizontal, 16) + .transition(.move(edge: .top).combined(with: .opacity)) + } + } +} diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 89fb785..0c96ad4 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -3,6 +3,7 @@ // import NetworkExtension +import Network import WireGuardKit import WireGuardKitGo import os @@ -18,6 +19,8 @@ private let goProxyCLoggerCallback: @convention(c) (UnsafeMutableRawPointer?, In guard let cStr = messageCStr else { return } let message = String(cString: cStr).trimmingCharacters(in: .newlines) + TransportHealthMonitor.observe(message) + if level == 1 { sharedLogger.error("[TP]: \(message, privacy: .public)") SharedLogger.error(message, source: .tunnel) @@ -36,6 +39,23 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } }() + private var pathMonitor: NWPathMonitor? + private var pathMonitorReceivedFirstUpdate = false + private var lastTransportRestartAt = Date.distantPast + + /// Tear down the current TURN/DTLS cycle and let the proxy spin up + /// fresh inner connections, reusing cached credentials when possible + /// (so no captcha re-prompt). Debounced to 5s to avoid stampedes when + /// several signals fire at once (wake + network change). + private func restartTransport(reason: String) { + if Date().timeIntervalSince(lastTransportRestartAt) < 5 { + return + } + lastTransportRestartAt = Date() + SharedLogger.info("Transport restart: \(reason)", source: .tunnel) + RestartProxy() + } + override func startTunnel(options: [String : NSObject]?, completionHandler: @escaping (Error?) -> Void) { sharedLogger.log("=== Starting tunnel ===") @@ -119,18 +139,51 @@ class PacketTunnelProvider: NEPacketTunnelProvider { let interfaceName = self.adapter.interfaceName ?? "unknown" sharedLogger.log("Tunnel interface is \(interfaceName)") SharedLogger.info("Tunnel up on interface \(interfaceName)", source: .wireguard) + self.startNetworkMonitoring() } completionHandler(adapterError) } } } + private func startNetworkMonitoring() { + guard pathMonitor == nil else { return } + let monitor = NWPathMonitor() + monitor.pathUpdateHandler = { [weak self] path in + guard let self = self else { return } + let descriptors: [String] = [ + path.usesInterfaceType(.wifi) ? "wifi" : nil, + path.usesInterfaceType(.cellular) ? "cellular" : nil, + path.usesInterfaceType(.wiredEthernet) ? "ethernet" : nil + ].compactMap { $0 } + let label = descriptors.isEmpty ? "unknown" : descriptors.joined(separator: "+") + SharedLogger.info("NWPath update: status=\(path.status), via=\(label)", source: .tunnel) + // The first update fires immediately after start() — it just + // describes the current path, no need to bounce the transport. + if !self.pathMonitorReceivedFirstUpdate { + self.pathMonitorReceivedFirstUpdate = true + return + } + if path.status == .satisfied { + self.restartTransport(reason: "network change to \(label)") + } + } + monitor.start(queue: DispatchQueue.global(qos: .utility)) + pathMonitor = monitor + SharedLogger.debug("NWPathMonitor started", source: .tunnel) + } + override func stopTunnel(with reason: NEProviderStopReason, completionHandler: @escaping () -> Void) { sharedLogger.log("Stopping tunnel") SharedLogger.info("Stopping tunnel (reason: \(reason.rawValue))", source: .tunnel) + pathMonitor?.cancel() + pathMonitor = nil + pathMonitorReceivedFirstUpdate = false + StopProxy() SharedLogger.info("TURN proxy stopped", source: .tunnel) + TransportHealthMonitor.reset() adapter.stop { [weak self] error in guard self != nil else { return } @@ -157,11 +210,12 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } override func sleep(completionHandler: @escaping () -> Void) { - // Add code here to get ready to sleep. + SharedLogger.debug("Tunnel sleep requested", source: .tunnel) completionHandler() } override func wake() { - // Add code here to wake up. + SharedLogger.info("Tunnel wake — restarting transport", source: .tunnel) + restartTransport(reason: "device wake") } } diff --git a/network-extension/TransportHealthMonitor.swift b/network-extension/TransportHealthMonitor.swift new file mode 100644 index 0000000..d13d391 --- /dev/null +++ b/network-extension/TransportHealthMonitor.swift @@ -0,0 +1,53 @@ +import Foundation + +/// Watches the Go proxy log stream to maintain a "transport-alive" flag in +/// the App Group's UserDefaults. The main app reads this to surface a +/// "Connection unstable" banner when iOS still says NEVPNStatus=.connected +/// but the underlying DTLS/TURN tunnel hasn't seen any traffic in a while. +enum TransportHealthMonitor { + static let lastAliveKey = "transport.lastAliveAt" + static let lastDeadKey = "transport.lastDeadAt" + + private static let appGroupID = "group.com.truvvor.turnbridge" + + private static let aliveSignals: [String] = [ + "Established DTLS connection", + "Proxy started on", + "Successfully registered User Identity" + ] + + private static let deadSignals: [String] = [ + "Watchdog:", + "Failed: ", + "Closed DTLS connection", + "DTLS connection timeout", + "Proxy gracefully stopped", + "RestartProxy:" + ] + + /// Inspect a single log line emitted from the Go proxy. + static func observe(_ message: String) { + for s in aliveSignals where message.contains(s) { + markAlive() + return + } + for s in deadSignals where message.contains(s) { + markDead() + return + } + } + + static func markAlive() { + UserDefaults(suiteName: appGroupID)?.set(Date(), forKey: lastAliveKey) + } + + static func markDead() { + UserDefaults(suiteName: appGroupID)?.set(Date(), forKey: lastDeadKey) + } + + static func reset() { + let defaults = UserDefaults(suiteName: appGroupID) + defaults?.removeObject(forKey: lastAliveKey) + defaults?.removeObject(forKey: lastDeadKey) + } +} diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 0b9bc88..a80a7e5 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -42,6 +42,41 @@ var proxyLoggerFunc C.proxy_logger_fn_t var proxyLoggerCtx unsafe.Pointer var proxyCancel context.CancelFunc +// activeDtlsCancels tracks the in-flight dtlsCancel functions of every +// oneDtlsConnection goroutine so RestartProxy() can tear them all down +// without restarting the whole proxy. +var ( + activeDtlsCancelsMu sync.Mutex + activeDtlsCancels = make(map[uint64]context.CancelFunc) + activeDtlsCancelID uint64 +) + +func registerActiveDtlsCancel(cancel context.CancelFunc) func() { + activeDtlsCancelsMu.Lock() + activeDtlsCancelID++ + id := activeDtlsCancelID + activeDtlsCancels[id] = cancel + activeDtlsCancelsMu.Unlock() + return func() { + activeDtlsCancelsMu.Lock() + delete(activeDtlsCancels, id) + activeDtlsCancelsMu.Unlock() + } +} + +func cancelAllActiveDtls() int { + activeDtlsCancelsMu.Lock() + cancels := make([]context.CancelFunc, 0, len(activeDtlsCancels)) + for _, c := range activeDtlsCancels { + cancels = append(cancels, c) + } + activeDtlsCancelsMu.Unlock() + for _, c := range cancels { + c() + } + return len(cancels) +} + //export ProxySetLogger func ProxySetLogger(context unsafe.Pointer, loggerFn C.proxy_logger_fn_t) { proxyLoggerCtx = context @@ -254,6 +289,8 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa defer func() { c1 <- err }() dtlsctx, dtlscancel := context.WithCancel(ctx) defer dtlscancel() + deregisterCancel := registerActiveDtlsCancel(dtlscancel) + defer deregisterCancel() var conn1, conn2 net.PacketConn conn1, conn2 = connutil.AsyncPacketPipe() go func() { @@ -295,6 +332,31 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa listenConn.SetDeadline(time.Now()) dtlsConn.SetDeadline(time.Now()) }) + + // Watchdog: if no inbound bytes from dtlsConn for >60s, force a + // restart. With WG's PersistentKeepalive=25 we should be seeing + // traffic every few seconds; a long silence means the TURN + // allocation died or the network changed under us. + var lastRxNanos atomic.Int64 + lastRxNanos.Store(time.Now().UnixNano()) + go func() { + ticker := time.NewTicker(15 * time.Second) + defer ticker.Stop() + for { + select { + case <-dtlsctx.Done(): + return + case now := <-ticker.C: + last := time.Unix(0, lastRxNanos.Load()) + if now.Sub(last) > 60*time.Second { + log.Printf("Watchdog: no inbound DTLS traffic for %s — forcing restart", now.Sub(last).Round(time.Second)) + dtlscancel() + return + } + } + } + }() + var addr atomic.Value go func() { defer wg.Done() @@ -337,6 +399,7 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa log.Printf("Failed: %s", err1) return } + lastRxNanos.Store(time.Now().UnixNano()) addr1, ok := addr.Load().(net.Addr) if !ok { log.Printf("Failed: no listener ip") @@ -636,6 +699,7 @@ func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { log.Printf("Falling back to reusing a previous identity...") c := pool[idx%len(pool)] idx++ + cTime = time.Now() return c.user, c.pass, c.addr, nil } return "", "", "", err @@ -643,6 +707,10 @@ func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { c := pool[idx%len(pool)] idx++ + // Refresh the cache deadline on every reuse so reconnect storms + // after a long-lived session don't suddenly evict the pool and + // force a fresh captcha. + cTime = time.Now() return c.user, c.pass, c.addr, nil } } diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go b/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go new file mode 100644 index 0000000..4b333b3 --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +// +// Force the in-tunnel TURN/DTLS cycle to tear down and reconnect without +// having to stop and re-start the whole proxy from Swift. This is what +// PacketTunnelProvider hooks into for wake/sleep/network-change events: +// rather than waiting for the next WG packet to discover that the DTLS +// channel is dead, we cancel the currently active oneDtlsConnection +// goroutines and let the existing retry loop spin up fresh ones with +// the pool-cached TURN credentials (no captcha re-prompt). + +package main + +import "C" +import "log" + +//export RestartProxy +func RestartProxy() { + n := cancelAllActiveDtls() + if n == 0 { + log.Printf("RestartProxy: nothing to restart") + return + } + log.Printf("RestartProxy: cancelled %d in-flight DTLS connection(s)", n) +} diff --git a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h index 25a7b12..5897673 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h +++ b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h @@ -34,6 +34,7 @@ extern char* LibXraySetSockCallback(libxray_sockcallback cb, void* ctx); extern void StartProxy(const char *link, const char *peerAddrStr, const char *localAddrStr, int n); extern void StopProxy(void); +extern void RestartProxy(void); extern void ProxySetLogger(void *context, logger_fn_t logger_fn); extern int ProxyWaitReady(int timeoutMs); From 60c6cbae8ad0c7c402323ea88e0dd8d0620cbe86 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 10:21:16 +0000 Subject: [PATCH 022/106] fix(tunnel): don't restart on cellular PDP flaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LTE was firing `NWPath update: status=satisfied, via=cellular` every ~20s without actually changing anything (PDP-context refresh / tower handover). The first build of this PR restarted DTLS on every one of those, producing a ~1s outage every 20s on cellular. Only restart when something observable changed: - interface kind flipped (wifi ↔ cellular ↔ ethernet), or - path was previously unsatisfied and is now satisfied. Pure noise events are dropped silently. The 60s DTLS watchdog still catches real transport death, so we don't lose the safety net. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 37 +++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 0c96ad4..90fa271 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -40,7 +40,8 @@ class PacketTunnelProvider: NEPacketTunnelProvider { }() private var pathMonitor: NWPathMonitor? - private var pathMonitorReceivedFirstUpdate = false + private var lastPathStatus: NWPath.Status? + private var lastPathInterfaceLabel: String? private var lastTransportRestartAt = Date.distantPast /// Tear down the current TURN/DTLS cycle and let the proxy spin up @@ -157,15 +158,34 @@ class PacketTunnelProvider: NEPacketTunnelProvider { path.usesInterfaceType(.wiredEthernet) ? "ethernet" : nil ].compactMap { $0 } let label = descriptors.isEmpty ? "unknown" : descriptors.joined(separator: "+") - SharedLogger.info("NWPath update: status=\(path.status), via=\(label)", source: .tunnel) - // The first update fires immediately after start() — it just - // describes the current path, no need to bounce the transport. - if !self.pathMonitorReceivedFirstUpdate { - self.pathMonitorReceivedFirstUpdate = true + let prevStatus = self.lastPathStatus + let prevLabel = self.lastPathInterfaceLabel + self.lastPathStatus = path.status + self.lastPathInterfaceLabel = label + + // Cellular flaps the path on PDP-context refreshes / tower + // handovers — same interface kind, status stays .satisfied, but + // an event fires every ~20s. Restarting on each one tears down + // a working DTLS for no reason. We only restart when something + // observable actually changed: interface kind flipped (wifi ↔ + // cellular), or the path was previously unavailable and is now + // satisfied. Pure-noise events are dropped silently; the + // watchdog still catches real DTLS death. + guard let prevStatus = prevStatus else { + SharedLogger.info("NWPath initial: status=\(path.status), via=\(label)", source: .tunnel) return } + let interfaceFlipped = (prevLabel ?? "") != label + let recovered = prevStatus != .satisfied && path.status == .satisfied + if !interfaceFlipped && !recovered { + return + } + SharedLogger.info("NWPath change: \(prevLabel ?? "?")/\(prevStatus) → \(label)/\(path.status)", source: .tunnel) if path.status == .satisfied { - self.restartTransport(reason: "network change to \(label)") + let reason = interfaceFlipped + ? "interface flip \(prevLabel ?? "?") → \(label)" + : "path recovered to \(label)" + self.restartTransport(reason: reason) } } monitor.start(queue: DispatchQueue.global(qos: .utility)) @@ -179,7 +199,8 @@ class PacketTunnelProvider: NEPacketTunnelProvider { pathMonitor?.cancel() pathMonitor = nil - pathMonitorReceivedFirstUpdate = false + lastPathStatus = nil + lastPathInterfaceLabel = nil StopProxy() SharedLogger.info("TURN proxy stopped", source: .tunnel) From 96d6c51b03fa8a381c2f8534eadba699c29e479d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 10:28:06 +0000 Subject: [PATCH 023/106] ci: defensive Swift in NWPath handler to unblock archive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous archive job (60c6cba) failed in ~58s, which is consistent with xcodebuild bailing during Swift compilation of PacketTunnelProvider.swift before reaching link/archive. The only file that changed in that commit was the NWPath handler. Without log access I can't pin the exact diagnostic, so cover the most likely culprits defensively: - Replace `\(NWPath.Status)` interpolation with an explicit `describe(_:)` helper. Avoids relying on the type's auto-derived `description` (NWPath.Status is not declared CustomStringConvertible). - Fully qualify case shorthand as `NWPath.Status.satisfied` in comparisons so type inference can't fail. - Drop the Unicode `→` from a string literal — use ASCII `->`. - Hoist `prevLabel ?? "?"` once so optionals don't appear in interpolation sites. No behavior change. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 32 ++++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 90fa271..d8943dc 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -147,6 +147,15 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } } + private func describe(_ status: NWPath.Status) -> String { + switch status { + case .satisfied: return "satisfied" + case .unsatisfied: return "unsatisfied" + case .requiresConnection: return "requiresConnection" + @unknown default: return "unknown" + } + } + private func startNetworkMonitoring() { guard pathMonitor == nil else { return } let monitor = NWPathMonitor() @@ -159,31 +168,34 @@ class PacketTunnelProvider: NEPacketTunnelProvider { ].compactMap { $0 } let label = descriptors.isEmpty ? "unknown" : descriptors.joined(separator: "+") let prevStatus = self.lastPathStatus - let prevLabel = self.lastPathInterfaceLabel + let prevLabel = self.lastPathInterfaceLabel ?? "?" self.lastPathStatus = path.status self.lastPathInterfaceLabel = label + let curStatusStr = self.describe(path.status) + // Cellular flaps the path on PDP-context refreshes / tower // handovers — same interface kind, status stays .satisfied, but // an event fires every ~20s. Restarting on each one tears down // a working DTLS for no reason. We only restart when something - // observable actually changed: interface kind flipped (wifi ↔ - // cellular), or the path was previously unavailable and is now - // satisfied. Pure-noise events are dropped silently; the + // observable actually changed: interface kind flipped (wifi + // <-> cellular), or the path was previously unavailable and is + // now satisfied. Pure-noise events are dropped silently; the // watchdog still catches real DTLS death. guard let prevStatus = prevStatus else { - SharedLogger.info("NWPath initial: status=\(path.status), via=\(label)", source: .tunnel) + SharedLogger.info("NWPath initial: status=\(curStatusStr), via=\(label)", source: .tunnel) return } - let interfaceFlipped = (prevLabel ?? "") != label - let recovered = prevStatus != .satisfied && path.status == .satisfied + let prevStatusStr = self.describe(prevStatus) + let interfaceFlipped = prevLabel != label + let recovered = prevStatus != NWPath.Status.satisfied && path.status == NWPath.Status.satisfied if !interfaceFlipped && !recovered { return } - SharedLogger.info("NWPath change: \(prevLabel ?? "?")/\(prevStatus) → \(label)/\(path.status)", source: .tunnel) - if path.status == .satisfied { + SharedLogger.info("NWPath change: \(prevLabel)/\(prevStatusStr) -> \(label)/\(curStatusStr)", source: .tunnel) + if path.status == NWPath.Status.satisfied { let reason = interfaceFlipped - ? "interface flip \(prevLabel ?? "?") → \(label)" + ? "interface flip \(prevLabel) -> \(label)" : "path recovered to \(label)" self.restartTransport(reason: reason) } From 3f6fcd6e4c6092f62735b3d0175aa3a941a66587 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 10:30:18 +0000 Subject: [PATCH 024/106] fix(tunnel): disambiguate NWPath as Network.NWPath iOS 26.4 SDK exposes two `NWPath` types in scope when the file imports both `Network` and `NetworkExtension`: - Network.NWPath (Swift struct) - NetworkExtension.NWPath (Obj-C class from NetworkExtension/NWPath.h) Swift refused to compile `NWPath.Status` because the bare name is now ambiguous. Fully qualify every type reference as `Network.NWPath.Status`. The string literals `"NWPath ..."` are just log text, no change there. Resolves: error: 'NWPath' is ambiguous for type lookup in this context https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index d8943dc..1b824c6 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -40,7 +40,7 @@ class PacketTunnelProvider: NEPacketTunnelProvider { }() private var pathMonitor: NWPathMonitor? - private var lastPathStatus: NWPath.Status? + private var lastPathStatus: Network.NWPath.Status? private var lastPathInterfaceLabel: String? private var lastTransportRestartAt = Date.distantPast @@ -147,7 +147,7 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } } - private func describe(_ status: NWPath.Status) -> String { + private func describe(_ status: Network.NWPath.Status) -> String { switch status { case .satisfied: return "satisfied" case .unsatisfied: return "unsatisfied" @@ -188,12 +188,12 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } let prevStatusStr = self.describe(prevStatus) let interfaceFlipped = prevLabel != label - let recovered = prevStatus != NWPath.Status.satisfied && path.status == NWPath.Status.satisfied + let recovered = prevStatus != Network.NWPath.Status.satisfied && path.status == Network.NWPath.Status.satisfied if !interfaceFlipped && !recovered { return } SharedLogger.info("NWPath change: \(prevLabel)/\(prevStatusStr) -> \(label)/\(curStatusStr)", source: .tunnel) - if path.status == NWPath.Status.satisfied { + if path.status == Network.NWPath.Status.satisfied { let reason = interfaceFlipped ? "interface flip \(prevLabel) -> \(label)" : "path recovered to \(label)" From e0df259eb62d42bdab306c8241f94b4770f26c6f Mon Sep 17 00:00:00 2001 From: "Claude (Cowork)" Date: Tue, 12 May 2026 17:10:21 +0000 Subject: [PATCH 025/106] feat(turn_proxy): log DTLS and TURN session lifetimes on exit Adds structured log lines on every session teardown so we can see how long a DTLS / TURN session lived and what error tore it down. Pairs with server-side journal logs (vk-turn-proxy@udp) to diagnose reconnect cadence. Sample: DTLS session lifetime=42.137s exit= TURN session lifetime=27.901s exit=failed to allocate: STUN timeout Part of P3 in the connectivity-stability series. --- wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 0b9bc88..5a97b8f 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -252,6 +252,10 @@ func dtlsFunc(ctx context.Context, conn net.PacketConn, peer *net.UDPAddr) (net. func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.PacketConn, connchan chan<- net.PacketConn, okchan chan<- struct{}, c1 chan<- error) { var err error = nil defer func() { c1 <- err }() + sessionStart := time.Now() + defer func() { + log.Printf("DTLS session lifetime=%s exit=%v", time.Since(sessionStart).Round(time.Millisecond), err) + }() dtlsctx, dtlscancel := context.WithCancel(ctx) defer dtlscancel() var conn1, conn2 net.PacketConn @@ -375,6 +379,10 @@ type turnParams struct { func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UDPAddr, conn2 net.PacketConn, c chan<- error) { var err error = nil defer func() { c <- err }() + sessionStart := time.Now() + defer func() { + log.Printf("TURN session lifetime=%s exit=%v", time.Since(sessionStart).Round(time.Millisecond), err) + }() user, pass, url, err1 := turnParams.getCreds(turnParams.link) if err1 != nil { err = fmt.Errorf("failed to get TURN credentials: %s", err1) From e61b90f8db7a9af0239def75141e7524b67a4a79 Mon Sep 17 00:00:00 2001 From: "Claude (Cowork)" Date: Tue, 12 May 2026 17:10:46 +0000 Subject: [PATCH 026/106] feat(turn_proxy): exponential backoff with jitter on reconnect oneDtlsConnectionLoop and oneTurnConnectionLoop previously retried immediately on failure (TURN loop was throttled only by a 200ms tick). On VK API rate-limits or a flapping cellular link this caused tight retry storms that drain battery and never recover. Now both loops use a capped exponential backoff (500ms -> 30s) with +/- 25% jitter so N parallel streams desynchronise. Backoff resets to 0 on a successful session, so steady-state reconnects after a quick blip are still near-instant. Part of P4 in the connectivity-stability series. --- .../Sources/WireGuardKitGo/turn_proxy.go | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 5a97b8f..4f73c16 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -21,6 +21,7 @@ import ( "fmt" "io" "log" + "math/rand" "net" "net/http" neturl "net/url" @@ -569,7 +570,31 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD } } +// reconnectBackoff produces a capped exponential backoff with jitter. +// Caller uses it like: +// wait := reconnectBackoff(prev, success) +// time.Sleep(wait) +// On success it returns 0 (caller resets state and continues immediately). +func reconnectBackoff(prev time.Duration, success bool) time.Duration { + if success { + return 0 + } + if prev <= 0 { + prev = 500 * time.Millisecond + } else { + prev *= 2 + } + const maxBackoff = 30 * time.Second + if prev > maxBackoff { + prev = maxBackoff + } + // Add jitter +/- 25% so reconnects don't synchronise across N parallel streams. + jitter := time.Duration(rand.Int63n(int64(prev / 2))) - prev/4 + return prev + jitter +} + func oneDtlsConnectionLoop(ctx context.Context, peer *net.UDPAddr, listenConnChan <-chan net.PacketConn, connchan chan<- net.PacketConn, okchan chan<- struct{}) { + var backoff time.Duration for { select { case <-ctx.Done(): @@ -577,14 +602,27 @@ func oneDtlsConnectionLoop(ctx context.Context, peer *net.UDPAddr, listenConnCha case listenConn := <-listenConnChan: c := make(chan error) go oneDtlsConnection(ctx, peer, listenConn, connchan, okchan, c) - if err := <-c; err != nil { + err := <-c + if err != nil { log.Printf("%s", err) + backoff = reconnectBackoff(backoff, false) + if backoff > 0 { + log.Printf("DTLS reconnect in %s", backoff.Round(time.Millisecond)) + select { + case <-ctx.Done(): + return + case <-time.After(backoff): + } + } + } else { + backoff = reconnectBackoff(backoff, true) } } } } func oneTurnConnectionLoop(ctx context.Context, turnParams *turnParams, peer *net.UDPAddr, connchan <-chan net.PacketConn, t <-chan time.Time) { + var backoff time.Duration for { select { case <-ctx.Done(): @@ -594,8 +632,20 @@ func oneTurnConnectionLoop(ctx context.Context, turnParams *turnParams, peer *ne case <-t: c := make(chan error) go oneTurnConnection(ctx, turnParams, peer, conn2, c) - if err := <-c; err != nil { + err := <-c + if err != nil { log.Printf("%s", err) + backoff = reconnectBackoff(backoff, false) + if backoff > 0 { + log.Printf("TURN reconnect in %s", backoff.Round(time.Millisecond)) + select { + case <-ctx.Done(): + return + case <-time.After(backoff): + } + } + } else { + backoff = reconnectBackoff(backoff, true) } default: } From a5c8bde8c089912e346d2a815140d540763140fa Mon Sep 17 00:00:00 2001 From: "Claude (Cowork)" Date: Tue, 12 May 2026 17:10:57 +0000 Subject: [PATCH 027/106] fix(turn_proxy): stop overriding TURN port with hardcoded 19302 StartProxy used to set port = "19302" by default, and oneTurnConnection would then override whatever VK API responded with that hardcoded value. If VK ever moves their TURN endpoints to a different port, the client breaks silently without a useful error. The VK API response (turn_server.urls[0]) already carries the correct host:port; we just trust it now. host/port stay as overrides for users who explicitly want to pin an endpoint via -turn-style flag in the future. Part of P6 in the connectivity-stability series. --- wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 4f73c16..70ef2d9 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -713,8 +713,11 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int) peerAddrStr := C.GoString(cPeerAddr) localAddrStr := C.GoString(cLocalAddr) + // host/port: empty by default so we use what VK API returned in + // turn_server.urls[0]. Override only if you know the TURN endpoint + // shouldn't track what VK responds with (e.g. pinning a stable IP). host := "" - port := "19302" + port := "" n := int(cN) udp := true From 250b51df180f9290471937a94019b0d3d05696f2 Mon Sep 17 00:00:00 2001 From: "Claude (Cowork)" Date: Tue, 12 May 2026 17:11:36 +0000 Subject: [PATCH 028/106] feat: proactive TURN/DTLS reconnect on iOS wake (P1) This is the highest-impact fix in the connectivity-stability series. Problem ------- PacketTunnelProvider's sleep() and wake() overrides were empty. When iOS suspends the Network Extension (screen lock, background, low memory), the Go runtime hosting the embedded TURN+DTLS proxy is frozen. By the time the device wakes up: - VK TURN has dropped the idle channel/allocation. - Cellular NAT mapping for our UDP socket is gone. - pion/dtls sequence numbers are outside the replay window. WireGuard immediately starts pumping packets through these zombie sockets, pion/dtls eventually times out and sends close_notify (seen on the server as 'Failed: EOF'), and only THEN does the client tear down and rebuild. That's the multi-minute 'nothing loads' gap. Fix --- Go side: - Maintain a registry of live DTLS/TURN session cancel-funcs. - Expose //export ProxyForceReconnect() that cancels every live session at once; the existing oneDtlsConnectionLoop / oneTurnConnectionLoop pick the cancellation up and rebuild on the standard backoff path. Swift side: - sleep() records the suspension timestamp. - wake() logs the suspension gap and calls ProxyForceReconnect() BEFORE WireGuard tries to use the tunnel, so the new allocation is in place before the first user packet hits it. Part of P1 in the connectivity-stability series. --- network-extension/PacketTunnelProvider.swift | 25 ++++++++++- .../Sources/WireGuardKitGo/turn_proxy.go | 41 +++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 6001bcc..14e2791 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -146,11 +146,32 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } override func sleep(completionHandler: @escaping () -> Void) { - // Add code here to get ready to sleep. + // iOS is about to suspend us. Don't tear anything down (iOS will + // resume us via wake()), but record the moment so wake() can decide + // whether the gap was long enough to need a fresh TURN allocation. + sharedLogger.log("System sleep — flagging proxy for reconnect on wake") + SharedLogger.info("System sleep — flagging proxy for reconnect on wake", source: .tunnel) + Self.lastSleepAt = Date() completionHandler() } override func wake() { - // Add code here to wake up. + // After a sleep iOS thaws our Go runtime, but the TURN allocation + // and DTLS session held by the embedded vk-turn-proxy client are + // almost certainly stale — VK TURN drops idle channels, NAT + // mappings on the cellular side have expired, and pion/dtls + // sequence numbers can be outside the replay window. Force a + // clean reconnect before WireGuard starts pumping packets through + // zombie sockets. + let gap = Self.lastSleepAt.map { Date().timeIntervalSince($0) } ?? 0 + sharedLogger.log("System wake — gap=\(String(format: "%.1f", gap))s, forcing TURN/DTLS reconnect") + SharedLogger.info("System wake — gap=\(String(format: "%.1f", gap))s, forcing TURN/DTLS reconnect", source: .tunnel) + ProxyForceReconnect() + Self.lastSleepAt = nil } + + // Records when iOS told us to sleep so wake() can log the suspension gap. + // Static because PacketTunnelProvider instances are owned by the system + // and we want to survive whatever lifecycle iOS chooses. + private static var lastSleepAt: Date? } diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 70ef2d9..638e8c5 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -43,6 +43,43 @@ var proxyLoggerFunc C.proxy_logger_fn_t var proxyLoggerCtx unsafe.Pointer var proxyCancel context.CancelFunc +// Session registry — every live DTLS/TURN session registers its +// cancel func so ProxyForceReconnect() can tear them all down at once +// (e.g. when iOS wakes the device after sleep and we want fresh +// allocations before WireGuard resumes pumping packets). +var ( + sessionMu sync.Mutex + sessionCancels = map[uint64]context.CancelFunc{} + sessionIDSource uint64 +) + +func registerSession(cancel context.CancelFunc) func() { + id := atomic.AddUint64(&sessionIDSource, 1) + sessionMu.Lock() + sessionCancels[id] = cancel + sessionMu.Unlock() + return func() { + sessionMu.Lock() + delete(sessionCancels, id) + sessionMu.Unlock() + } +} + +//export ProxyForceReconnect +func ProxyForceReconnect() { + sessionMu.Lock() + cancels := make([]context.CancelFunc, 0, len(sessionCancels)) + for _, c := range sessionCancels { + cancels = append(cancels, c) + } + sessionMu.Unlock() + for _, c := range cancels { + c() + } + log.Printf("ProxyForceReconnect: cancelled %d live session(s)", len(cancels)) +} + + //export ProxySetLogger func ProxySetLogger(context unsafe.Pointer, loggerFn C.proxy_logger_fn_t) { proxyLoggerCtx = context @@ -259,6 +296,8 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa }() dtlsctx, dtlscancel := context.WithCancel(ctx) defer dtlscancel() + unregister := registerSession(dtlscancel) + defer unregister() var conn1, conn2 net.PacketConn conn1, conn2 = connutil.AsyncPacketPipe() go func() { @@ -495,6 +534,8 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD wg := sync.WaitGroup{} wg.Add(2) turnctx, turncancel := context.WithCancel(context.Background()) + unregister := registerSession(turncancel) + defer unregister() context.AfterFunc(turnctx, func() { if err := relayConn.SetDeadline(time.Now()); err != nil { log.Printf("Failed to set relay deadline: %s", err) From e355de6b14cbd499f3f7c3e6dda0f2a02ccc0889 Mon Sep 17 00:00:00 2001 From: "Claude (Cowork)" Date: Tue, 12 May 2026 17:12:30 +0000 Subject: [PATCH 029/106] feat: make TURN transport (UDP/TCP) configurable per profile (P2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream turnbridge hardcoded `udp := true` inside StartProxy. UDP is faster but silently loses state on cellular blips and after iOS suspends; TCP-via-STUNConn survives short outages via TCP buffers and retransmits. The CLI vk-turn-proxy client exposes -udp specifically for this reason. This commit: - StartProxy gains a cUDP C.int parameter - PacketTunnelProvider reads "useUDP" from providerConfiguration - VPNProfile gains a useUDP field (with backward-compat Codable for pre-existing UserDefaults profiles — default true) - TurnConfigImport supports optional `udp` field in the JSON - quick_link.py example shows the new field - TurnBridgeApp.turnOnTunnel takes useUDP and propagates to provider - ProfileStore legacy migrate path passes useUDP: true User-visible behavior unchanged when the field is absent or true. Setting it to false in the JSON / clipboard import enables TCP path for testing on bad networks. Part of P2 in the connectivity-stability series. --- TurnBridge/ConfigParser.swift | 2 ++ TurnBridge/ContentView.swift | 2 ++ TurnBridge/ProfileStore.swift | 1 + TurnBridge/TurnBridgeApp.swift | 7 +++--- TurnBridge/VPNProfile.swift | 24 ++++++++++++++++++- network-extension/PacketTunnelProvider.swift | 7 ++++-- quick_link.py | 3 +++ .../Sources/WireGuardKitGo/turn_proxy.go | 7 ++++-- 8 files changed, 45 insertions(+), 8 deletions(-) diff --git a/TurnBridge/ConfigParser.swift b/TurnBridge/ConfigParser.swift index be01d25..021f261 100644 --- a/TurnBridge/ConfigParser.swift +++ b/TurnBridge/ConfigParser.swift @@ -11,6 +11,8 @@ struct TurnConfigImport: Codable { let n: Int let wg: String let name: String? + /// Optional: transport to TURN server. true=UDP (default), false=TCP. + let udp: Bool? } enum ConfigParseError: LocalizedError { diff --git a/TurnBridge/ContentView.swift b/TurnBridge/ContentView.swift index 55d1788..76212d2 100755 --- a/TurnBridge/ContentView.swift +++ b/TurnBridge/ContentView.swift @@ -293,6 +293,7 @@ struct ContentView: View { peerAddr: profile.peerAddr, listenAddr: profile.listenAddr, nValue: profile.nValue, + useUDP: profile.useUDP, wgQuickConfig: profile.wgQuickConfig ) { isSuccess in if !isSuccess { @@ -332,6 +333,7 @@ struct ContentView: View { peerAddr: config.peer, listenAddr: config.listen, nValue: config.n, + useUDP: config.udp ?? true, wgQuickConfig: config.wg ) store.addProfile(profile) diff --git a/TurnBridge/ProfileStore.swift b/TurnBridge/ProfileStore.swift index ec0bcc5..c27cf78 100644 --- a/TurnBridge/ProfileStore.swift +++ b/TurnBridge/ProfileStore.swift @@ -84,6 +84,7 @@ class ProfileStore: ObservableObject { peerAddr: defaults.string(forKey: "peerAddr") ?? "", listenAddr: defaults.string(forKey: "listenAddr") ?? "127.0.0.1:9000", nValue: max(defaults.integer(forKey: "nValue"), 1), + useUDP: true, wgQuickConfig: defaults.string(forKey: "wgQuickConfig") ?? "" ) profiles = [profile] diff --git a/TurnBridge/TurnBridgeApp.swift b/TurnBridge/TurnBridgeApp.swift index 11455d9..3d82cc8 100755 --- a/TurnBridge/TurnBridgeApp.swift +++ b/TurnBridge/TurnBridgeApp.swift @@ -13,8 +13,8 @@ struct TurnBridge: App { } } - func turnOnTunnel(vkLink: String, peerAddr: String, listenAddr: String, nValue: Int, wgQuickConfig: String, completionHandler: @escaping (Bool) -> Void) { - SharedLogger.info("Connecting... peer=\(peerAddr), listen=\(listenAddr), n=\(nValue)") + func turnOnTunnel(vkLink: String, peerAddr: String, listenAddr: String, nValue: Int, useUDP: Bool, wgQuickConfig: String, completionHandler: @escaping (Bool) -> Void) { + SharedLogger.info("Connecting... peer=\(peerAddr), listen=\(listenAddr), n=\(nValue), udp=\(useUDP)") NETunnelProviderManager.loadAllFromPreferences { tunnelManagersInSettings, error in if let error = error { @@ -40,7 +40,8 @@ struct TurnBridge: App { "vkLink": vkLink, "peerAddr": peerAddr, "listenAddr": listenAddr, - "nValue": nValue + "nValue": nValue, + "useUDP": useUDP ] let defaults = UserDefaults.standard diff --git a/TurnBridge/VPNProfile.swift b/TurnBridge/VPNProfile.swift index 271c928..b3cc55d 100644 --- a/TurnBridge/VPNProfile.swift +++ b/TurnBridge/VPNProfile.swift @@ -7,15 +7,37 @@ struct VPNProfile: Codable, Identifiable, Equatable { var peerAddr: String var listenAddr: String var nValue: Int + /// Transport from client to TURN server. + /// true = UDP (faster, default; what upstream turnbridge has hardcoded) + /// false = TCP (more reliable over flaky cellular; survives short blips) + var useUDP: Bool var wgQuickConfig: String - init(id: UUID = UUID(), name: String = "", vkLink: String = "", peerAddr: String = "", listenAddr: String = "127.0.0.1:9000", nValue: Int = 1, wgQuickConfig: String = "") { + init(id: UUID = UUID(), name: String = "", vkLink: String = "", peerAddr: String = "", listenAddr: String = "127.0.0.1:9000", nValue: Int = 1, useUDP: Bool = true, wgQuickConfig: String = "") { self.id = id self.name = name self.vkLink = vkLink self.peerAddr = peerAddr self.listenAddr = listenAddr self.nValue = nValue + self.useUDP = useUDP self.wgQuickConfig = wgQuickConfig } + + // Backwards compatibility: older saved profiles in UserDefaults won't have useUDP. + enum CodingKeys: String, CodingKey { + case id, name, vkLink, peerAddr, listenAddr, nValue, useUDP, wgQuickConfig + } + + init(from decoder: Decoder) throws { + let c = try decoder.container(keyedBy: CodingKeys.self) + id = try c.decode(UUID.self, forKey: .id) + name = try c.decode(String.self, forKey: .name) + vkLink = try c.decode(String.self, forKey: .vkLink) + peerAddr = try c.decode(String.self, forKey: .peerAddr) + listenAddr = try c.decode(String.self, forKey: .listenAddr) + nValue = try c.decode(Int.self, forKey: .nValue) + useUDP = (try? c.decode(Bool.self, forKey: .useUDP)) ?? true + wgQuickConfig = try c.decode(String.self, forKey: .wgQuickConfig) + } } diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 14e2791..c8c0794 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -76,14 +76,17 @@ class PacketTunnelProvider: NEPacketTunnelProvider { return } let nValue = Int32(nValueInt) + // Default true for backward-compat with profiles saved before this field existed. + let useUDP = (providerConfiguration["useUDP"] as? Bool) ?? true + let udpFlag: Int32 = useUDP ? 1 : 0 - SharedLogger.info("Peer: \(peerAddr), Listen: \(listenAddr), N: \(nValue)", source: .tunnel) + SharedLogger.info("Peer: \(peerAddr), Listen: \(listenAddr), N: \(nValue), UDP: \(useUDP)", source: .tunnel) SharedLogger.info("Starting TURN proxy...", source: .tunnel) ProxySetLogger(nil, goProxyCLoggerCallback) DispatchQueue.global(qos: .userInteractive).async { - StartProxy(vkLink, peerAddr, listenAddr, nValue) + StartProxy(vkLink, peerAddr, listenAddr, nValue, udpFlag) } DispatchQueue.global(qos: .userInteractive).async { [weak self] in diff --git a/quick_link.py b/quick_link.py index d40f4cf..166b658 100755 --- a/quick_link.py +++ b/quick_link.py @@ -7,6 +7,9 @@ "peer": "YOUR_SERVER_IP:PORT", "listen": "127.0.0.1:9000", "n": 1, + # Optional. true=UDP transport to TURN (default), false=TCP (more + # reliable on flaky cellular at the cost of head-of-line blocking). + "udp": True, "wg": """[Interface] PrivateKey = YOUR_CLIENT_PRIVATE_KEY Address = 10.100.0.2/32 diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 638e8c5..83875b3 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -747,7 +747,7 @@ func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { } //export StartProxy -func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int) { +func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, cUDP C.int) { select { case <-proxyReady: default: } link := C.GoString(cLink) @@ -760,7 +760,10 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int) host := "" port := "" n := int(cN) - udp := true + // udp transport to TURN. true=plain UDP (faster, fragile under loss), + // false=TCP STUNConn (survives short cellular blips at the cost of HoL). + udp := cUDP != 0 + log.Printf("StartProxy: peer=%s n=%d udp=%v", peerAddrStr, n, udp) ctx, cancel := context.WithCancel(context.Background()) proxyCancel = cancel From 6300a5a2f7da85cb49f7f6955baf6eedc5a140bf Mon Sep 17 00:00:00 2001 From: "Claude (Cowork)" Date: Tue, 12 May 2026 17:13:01 +0000 Subject: [PATCH 030/106] feat(turn_proxy): app-level DTLS keepalive every 5s (P5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Send a 4-byte sentinel (0xFF 0xFF 0xFF 0xFF) over the DTLS connection every 5 seconds, regardless of WireGuard activity, to keep the VK TURN ChannelData binding fresh and prevent the relay from dropping 'idle' channels. The sentinel is intentionally invalid as a WireGuard message (first byte must be 0x01-0x04 and length >= 32 bytes for WG transport data), so the server-side vk-turn-proxy can silently drop these packets in its read loop before they reach 127.0.0.1:51820. WireGuard itself would also reject them with a checksum/format failure, just less cheaply. This complements WG's own PersistentKeepalive=25 by working even when WG's goroutine is throttled by iOS (e.g. CPU pressure short of full suspend, where wake() doesn't fire but ticks are missed). Companion server-side patch needed in truvvor/vk-turn-proxy to explicitly filter the sentinel — see follow-up PR there. Part of P5 in the connectivity-stability series. --- .../Sources/WireGuardKitGo/turn_proxy.go | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 83875b3..e670d99 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -333,6 +333,39 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa } }() + // Application-level keepalive over DTLS. + // + // WireGuard's PersistentKeepalive=25 only fires when WG itself is + // running. When iOS throttles or briefly suspends the Network + // Extension, WG's goroutine can miss its tick and the DTLS path + // goes silent — the VK TURN relay then drops the channel binding + // as 'idle' and the next real packet finds a dead path. + // + // We send a tiny sentinel packet over the DTLS conn every 5s so + // the TURN ChannelData is refreshed regardless of WG state. + // + // Sentinel: 0xFF 0xFF 0xFF 0xFF — invalid first byte for any + // WireGuard message type (valid: 0x01-0x04) and below WG's 32-byte + // minimum, so server-side vk-turn-proxy can drop it cheaply before + // forwarding to wg-quick@wg0. See companion patch in + // truvvor/vk-turn-proxy server/. + go func() { + keepalive := []byte{0xFF, 0xFF, 0xFF, 0xFF} + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + for { + select { + case <-dtlsctx.Done(): + return + case <-ticker.C: + if _, werr := dtlsConn.Write(keepalive); werr != nil { + log.Printf("keepalive write failed: %s", werr) + return + } + } + } + }() + wg := sync.WaitGroup{} wg.Add(2) context.AfterFunc(dtlsctx, func() { From ef381cd4974a9075242618a67f7dd63ba9d53ccc Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 17:45:04 +0000 Subject: [PATCH 031/106] fix(turn_proxy): RestartProxy delegates to ProxyForceReconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the merge, the in-tunnel session registry was renamed: - cancelAllActiveDtls() → ProxyForceReconnect() (cgo export) - registerActiveDtlsCancel() → registerSession() turn_restart.go still referenced the old name and the Go build broke: ./turn_restart.go:18:10: undefined: cancelAllActiveDtls Both callers stay alive on iOS: - PacketTunnelProvider.swift:57 calls RestartProxy() from the debounced wake/path-change restartTransport(reason:) helper. - PacketTunnelProvider.swift:269 calls ProxyForceReconnect() directly from the newer P1 proactive-reconnect path. Keep RestartProxy as a thin wrapper so neither caller needs to change, and keep the "RestartProxy: cancelled N in-flight DTLS connection(s)" log line verbatim — TransportHealthMonitor.swift:25 pattern-matches it to flip the App Group "transport unhealthy" flag, and changing the text would silently break the UI banner. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/turn_restart.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go b/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go index 4b333b3..8b6e863 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_restart.go @@ -1,12 +1,11 @@ // SPDX-License-Identifier: MIT // -// Force the in-tunnel TURN/DTLS cycle to tear down and reconnect without -// having to stop and re-start the whole proxy from Swift. This is what -// PacketTunnelProvider hooks into for wake/sleep/network-change events: -// rather than waiting for the next WG packet to discover that the DTLS -// channel is dead, we cancel the currently active oneDtlsConnection -// goroutines and let the existing retry loop spin up fresh ones with -// the pool-cached TURN credentials (no captcha re-prompt). +// `RestartProxy` keeps existing iOS callers (PacketTunnelProvider's +// debounced wake/path-change path) wired up after the in-tunnel session +// registry was renamed. It delegates to ProxyForceReconnect, which owns +// the per-session cancel map. The log line is preserved verbatim so the +// extension's TransportHealthMonitor pattern-match still flips the +// "transport unhealthy" flag in App Group UserDefaults. package main @@ -15,10 +14,13 @@ import "log" //export RestartProxy func RestartProxy() { - n := cancelAllActiveDtls() + sessionMu.Lock() + n := len(sessionCancels) + sessionMu.Unlock() if n == 0 { log.Printf("RestartProxy: nothing to restart") return } + ProxyForceReconnect() log.Printf("RestartProxy: cancelled %d in-flight DTLS connection(s)", n) } From e9a8738e1e229fef2f4dd6bc2cb302f8f0b1a44f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 17:49:17 +0000 Subject: [PATCH 032/106] fix(wireguard.h): expose 5-arg StartProxy and ProxyForceReconnect Header drifted from the actual Go cgo exports after two recent merges: - P2 (configurable UDP/TCP transport) added a 5th int `udp` flag to the Go `StartProxy` signature and updated the Swift call site, but the bridging header still declared the old 4-arg version. Swift errored with "extra argument in call" because the imported C declaration capped it at 4 args. - P1 (proactive reconnect on wake) added the `ProxyForceReconnect` cgo export in turn_proxy.go but never declared it in wireguard.h, so Swift couldn't find the symbol at line 269. Both fixes are header-only; the Go-generated header and our hand-rolled header now agree on the same prototypes. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- wireguard-apple/Sources/WireGuardKitGo/wireguard.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h index 5897673..7e0b07d 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h +++ b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h @@ -32,9 +32,10 @@ extern char *LibXrayStopXray(); extern char *LibXrayXrayVersion(); extern char* LibXraySetSockCallback(libxray_sockcallback cb, void* ctx); -extern void StartProxy(const char *link, const char *peerAddrStr, const char *localAddrStr, int n); +extern void StartProxy(const char *link, const char *peerAddrStr, const char *localAddrStr, int n, int udp); extern void StopProxy(void); extern void RestartProxy(void); +extern void ProxyForceReconnect(void); extern void ProxySetLogger(void *context, logger_fn_t logger_fn); extern int ProxyWaitReady(int timeoutMs); From 5cfa1ce367449e2570521d508b388ff3a17a7892 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 18:13:26 +0000 Subject: [PATCH 033/106] debug: expose data-plane byte counters at every layer of the tunnel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User reports "I can't open a single site" while logs show DTLS/TURN/WG control plane is healthy (handshake response, keepalives flowing). Need to distinguish: 1. App traffic never reaches the tunnel (routing scope problem — split routing with manualCaptcha=true sends user requests straight to WiFi/cellular). 2. App traffic reaches WG but stops at the WG↔proxy boundary. 3. App traffic reaches the proxy but DTLS isn't flushing. 4. DTLS works but the TURN relay drops bytes. Hard to tell from control-plane logs alone, so wire byte counters at every pump: - Go `oneDtlsConnection`: atomic uint64 `wgToDtls` / `dtlsToWg` bumped after each successful read+write. A 10s ticker prints the totals and deltas; the session-end log line now also shows the final totals next to the lifetime. - Go `oneTurnConnection`: same pattern with `conn2ToRelay` / `relayToConn2`. Lets us pinpoint whether bytes die at the DTLS or TURN hop. - Swift `startWireGuardStatsLogger()`: polls `WireGuardAdapter.getRuntimeConfiguration` every 10s and logs `rx_bytes` / `tx_bytes` deltas, so we can see whether WG itself is seeing any application traffic via the TUN. - Swift `logRouteScope()`: dumps `includeAll` / `routeLAN` / `routeAPNs` / `routeCellular` / `manualCaptcha` from the provider configuration once at start, and explicitly warns when split routing is active so the "site won't load" symptom can be matched against "tunnel only carries control plane". Cleanup in `stopTunnel` cancels the new timer and resets the cached byte counts. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 81 +++++++++++++++++++ .../Sources/WireGuardKitGo/turn_proxy.go | 80 +++++++++++++++++- 2 files changed, 159 insertions(+), 2 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index d35f909..220efe5 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -44,6 +44,11 @@ class PacketTunnelProvider: NEPacketTunnelProvider { private var lastPathInterfaceLabel: String? private var lastTransportRestartAt = Date.distantPast + // WG byte counters logged every 10s by startWireGuardStatsLogger(). + private var wgStatsTimer: DispatchSourceTimer? + private var lastWGRxBytes: UInt64 = 0 + private var lastWGTxBytes: UInt64 = 0 + /// Tear down the current TURN/DTLS cycle and let the proxy spin up /// fresh inner connections, reusing cached credentials when possible /// (so no captcha re-prompt). Debounced to 5s to avoid stampedes when @@ -143,6 +148,8 @@ class PacketTunnelProvider: NEPacketTunnelProvider { let interfaceName = self.adapter.interfaceName ?? "unknown" sharedLogger.log("Tunnel interface is \(interfaceName)") SharedLogger.info("Tunnel up on interface \(interfaceName)", source: .wireguard) + self.logRouteScope() + self.startWireGuardStatsLogger() self.startNetworkMonitoring() } completionHandler(adapterError) @@ -150,6 +157,75 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } } + /// Dump what the NEPacketTunnelNetworkSettings actually told iOS to + /// route into the tunnel. With `manualCaptcha=true` the app sets + /// `includeAll=false` so the captcha sheet can hit VK directly, but + /// then the user's browser traffic ALSO bypasses the tunnel — every + /// URL the user opens goes straight to WiFi/cell, and the tunnel + /// only carries WG control-plane keepalives. This log makes that + /// state explicit instead of having to guess from byte counters. + private func logRouteScope() { + guard let settings = self.protocolConfiguration as? NETunnelProviderProtocol, + let cfg = settings.providerConfiguration else { + return + } + let includeAll = (cfg["includeAll"] as? Bool) ?? false + let lan = (cfg["routeLAN"] as? Bool) ?? false + let apns = (cfg["routeAPNs"] as? Bool) ?? false + let cell = (cfg["routeCellular"] as? Bool) ?? false + let manualCap = (cfg["manualCaptcha"] as? Bool) ?? false + SharedLogger.info( + "Tunnel routing scope: includeAll=\(includeAll) lan=\(lan) apns=\(apns) cellular=\(cell) manualCaptcha=\(manualCap)", + source: .tunnel + ) + if !includeAll { + SharedLogger.info( + "Split routing active — user traffic to public IPs goes around the tunnel via the underlying network", + source: .tunnel + ) + } + } + + /// Poll WireGuardAdapter every 10s for rx/tx counters and log them + /// alongside the Go-side DTLS/TURN byte counters. If WG itself sees + /// zero application bytes, the issue is above WG (TUN routing). If + /// WG sees bytes but the Go side stays at zero, the issue is in the + /// proxy. The deltas make it possible to tell at a glance whether + /// traffic is flowing. + private func startWireGuardStatsLogger() { + guard wgStatsTimer == nil else { return } + let timer = DispatchSource.makeTimerSource(queue: DispatchQueue.global(qos: .utility)) + timer.schedule(deadline: .now() + 10, repeating: 10) + timer.setEventHandler { [weak self] in + guard let self = self else { return } + self.adapter.getRuntimeConfiguration { config in + guard let config = config else { + SharedLogger.debug("WG stats: getRuntimeConfiguration returned nil", source: .wireguard) + return + } + var rx: UInt64 = 0 + var tx: UInt64 = 0 + for line in config.split(separator: "\n") { + if line.hasPrefix("rx_bytes=") { + rx = UInt64(line.dropFirst("rx_bytes=".count)) ?? 0 + } else if line.hasPrefix("tx_bytes=") { + tx = UInt64(line.dropFirst("tx_bytes=".count)) ?? 0 + } + } + let dRx = rx &- self.lastWGRxBytes + let dTx = tx &- self.lastWGTxBytes + self.lastWGRxBytes = rx + self.lastWGTxBytes = tx + SharedLogger.info( + "WG bytes rx=\(rx) (Δ+\(dRx)) tx=\(tx) (Δ+\(dTx))", + source: .wireguard + ) + } + } + timer.resume() + wgStatsTimer = timer + } + private func describe(_ status: Network.NWPath.Status) -> String { switch status { case .satisfied: return "satisfied" @@ -217,6 +293,11 @@ class PacketTunnelProvider: NEPacketTunnelProvider { lastPathStatus = nil lastPathInterfaceLabel = nil + wgStatsTimer?.cancel() + wgStatsTimer = nil + lastWGRxBytes = 0 + lastWGTxBytes = 0 + StopProxy() SharedLogger.info("TURN proxy stopped", source: .tunnel) TransportHealthMonitor.reset() diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index be87276..42258e4 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -291,8 +291,22 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa var err error = nil defer func() { c1 <- err }() sessionStart := time.Now() + + // Data-plane byte counters for this DTLS session. The two directions: + // wgToDtls: bytes read from listenConn (WG ciphertext at :9000) + // and written into dtlsConn (towards the TURN relay). + // dtlsToWg: bytes read from dtlsConn (decrypted DTLS payload + // coming back from the relay) and written into + // listenConn (towards the WG client). + // A periodic logger below prints both totals and 10s deltas so we + // can tell whether user traffic is actually flowing through the + // tunnel or whether it's just WG control-plane keepalives. + var wgToDtls, dtlsToWg atomic.Uint64 + defer func() { - log.Printf("DTLS session lifetime=%s exit=%v", time.Since(sessionStart).Round(time.Millisecond), err) + log.Printf("DTLS session lifetime=%s wg→dtls=%dB dtls→wg=%dB exit=%v", + time.Since(sessionStart).Round(time.Millisecond), + wgToDtls.Load(), dtlsToWg.Load(), err) }() dtlsctx, dtlscancel := context.WithCancel(ctx) defer dtlscancel() @@ -398,6 +412,30 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa }() var addr atomic.Value + + // Every 10s, dump the current totals and the deltas since the last + // tick. If user opens a website and these stay at +0 in both + // directions, no user traffic is reaching the DTLS layer (most + // likely the routing config is sending the request straight to + // WiFi/cellular, bypassing the tunnel entirely). + go func() { + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + var prevTx, prevRx uint64 + for { + select { + case <-dtlsctx.Done(): + return + case <-ticker.C: + tx := wgToDtls.Load() + rx := dtlsToWg.Load() + log.Printf("DTLS bytes wg→dtls=%d (Δ+%d) dtls→wg=%d (Δ+%d)", + tx, tx-prevTx, rx, rx-prevRx) + prevTx, prevRx = tx, rx + } + } + }() + go func() { defer wg.Done() defer dtlscancel() @@ -421,6 +459,7 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa log.Printf("Failed: %s", err1) return } + wgToDtls.Add(uint64(n)) } }() @@ -451,6 +490,7 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa log.Printf("Failed: %s", err1) return } + dtlsToWg.Add(uint64(n)) } }() @@ -479,8 +519,22 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD var err error = nil defer func() { c <- err }() sessionStart := time.Now() + + // Data-plane byte counters on the TURN side. The two directions: + // conn2ToRelay: bytes read from conn2 (decrypted DTLS output that + // represents the WG packet) and written into + // relayConn (towards the WG server via the TURN + // server's relay). + // relayToConn2: bytes coming back from the relay and pushed into + // conn2 (which DTLS will re-encrypt for the client). + // Periodic logger below mirrors the DTLS-side counters so a missing + // data path can be pinpointed to either the DTLS or TURN layer. + var conn2ToRelay, relayToConn2 atomic.Uint64 + defer func() { - log.Printf("TURN session lifetime=%s exit=%v", time.Since(sessionStart).Round(time.Millisecond), err) + log.Printf("TURN session lifetime=%s conn2→relay=%dB relay→conn2=%dB exit=%v", + time.Since(sessionStart).Round(time.Millisecond), + conn2ToRelay.Load(), relayToConn2.Load(), err) }() user, pass, url, err1 := turnParams.getCreds(turnParams.link) if err1 != nil { @@ -628,6 +682,7 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD log.Printf("Failed: %s", err1) return } + conn2ToRelay.Add(uint64(n)) } }() @@ -658,6 +713,27 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD log.Printf("Failed: %s", err1) return } + relayToConn2.Add(uint64(n)) + } + }() + + // Periodic counter dump every 10s so we can see whether the relay + // is actually carrying bytes or only the wakeup keepalives. + go func() { + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + var prevTx, prevRx uint64 + for { + select { + case <-turnctx.Done(): + return + case <-ticker.C: + tx := conn2ToRelay.Load() + rx := relayToConn2.Load() + log.Printf("TURN bytes conn2→relay=%d (Δ+%d) relay→conn2=%d (Δ+%d)", + tx, tx-prevTx, rx, rx-prevRx) + prevTx, prevRx = tx, rx + } } }() From 3b9c1d1b77e197ef77d5306c01c5702bbab65e00 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 18:25:18 +0000 Subject: [PATCH 034/106] fix(tunnel): logRouteScope reads AllowedIPs + UserDefaults manualCaptcha MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous version read routeLAN/routeAPNs/routeCellular/manualCaptcha from providerConfiguration, but the app puts none of those keys there — it bakes routing into the peer's AllowedIPs and stashes manualCaptcha in the App Group's shared UserDefaults. As a result, the new log line reported `manualCaptcha=false lan=false ...` even when the app log right above it showed manualCaptcha=true, LAN=true. Misleading, so fix the sources. The byte counters added in the previous commit are doing their job and show the tunnel is actually carrying ~80–100 KB/s symmetric. That rules out "no traffic in tunnel" as the cause of the user's "can't open any site" — the data plane is fine, the problem is downstream of the WG server (DNS resolution, server-side NAT/forwarding, or PMTU). https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 56 +++++++++++++------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 220efe5..747f7ba 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -157,30 +157,50 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } } - /// Dump what the NEPacketTunnelNetworkSettings actually told iOS to - /// route into the tunnel. With `manualCaptcha=true` the app sets - /// `includeAll=false` so the captcha sheet can hit VK directly, but - /// then the user's browser traffic ALSO bypasses the tunnel — every - /// URL the user opens goes straight to WiFi/cell, and the tunnel - /// only carries WG control-plane keepalives. This log makes that - /// state explicit instead of having to guess from byte counters. + /// Dump what is actually going into the tunnel. The previous version + /// of this method read `routeLAN`/`manualCaptcha` from + /// `providerConfiguration`, but the app never puts those keys + /// there — it bakes the routing decision into the WG peer's + /// `AllowedIPs` and reads `manualCaptcha` from the App Group's + /// shared UserDefaults. The result: this log was reporting false + /// for everything regardless of the actual UI state. Fixed to read + /// the same sources the rest of the extension uses. private func logRouteScope() { - guard let settings = self.protocolConfiguration as? NETunnelProviderProtocol, - let cfg = settings.providerConfiguration else { - return + // Manual-captcha flag is the app-group setting that the rest of + // PacketTunnelProvider already reads (see startTunnel:104). + let manualCap = UserDefaults(suiteName: CaptchaIPC.appGroupID)? + .bool(forKey: "manualCaptcha") ?? false + + // The peer's AllowedIPs is the source of truth for what the OS + // routes into utun. With AllowedIPs=0.0.0.0/0, ::/0 everything + // goes through; with a narrow LAN list, the user's browser + // traffic exits via the underlying interface and only LAN/peer + // traffic uses the tunnel. + var allowedIPs: [String] = [] + if let settings = self.protocolConfiguration as? NETunnelProviderProtocol, + let cfg = settings.providerConfiguration, + let wgQuick = cfg["wgQuickConfig"] as? String { + for raw in wgQuick.split(separator: "\n") { + let line = raw.trimmingCharacters(in: .whitespaces) + if line.lowercased().hasPrefix("allowedips") { + if let eq = line.firstIndex(of: "=") { + let value = line[line.index(after: eq)...] + .trimmingCharacters(in: .whitespaces) + allowedIPs = value.split(separator: ",") + .map { $0.trimmingCharacters(in: .whitespaces) } + } + } + } } - let includeAll = (cfg["includeAll"] as? Bool) ?? false - let lan = (cfg["routeLAN"] as? Bool) ?? false - let apns = (cfg["routeAPNs"] as? Bool) ?? false - let cell = (cfg["routeCellular"] as? Bool) ?? false - let manualCap = (cfg["manualCaptcha"] as? Bool) ?? false + + let isFullTunnel = allowedIPs.contains { $0 == "0.0.0.0/0" || $0 == "::/0" } SharedLogger.info( - "Tunnel routing scope: includeAll=\(includeAll) lan=\(lan) apns=\(apns) cellular=\(cell) manualCaptcha=\(manualCap)", + "Tunnel routing scope: AllowedIPs=\(allowedIPs.isEmpty ? "?" : allowedIPs.joined(separator: ",")) fullTunnel=\(isFullTunnel) manualCaptcha=\(manualCap)", source: .tunnel ) - if !includeAll { + if !isFullTunnel { SharedLogger.info( - "Split routing active — user traffic to public IPs goes around the tunnel via the underlying network", + "Split tunnel: only AllowedIPs subnets go via utun, the user's browser traffic exits via the underlying network", source: .tunnel ) } From 377b569d03401575ea08aa0a982839b4e6a6c778 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 18:33:17 +0000 Subject: [PATCH 035/106] perf(turn_proxy): bump UDP socket buffers to 4 MB on both wire sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User reports the tunnel carries ~80–100 KB/s symmetrically (byte counters in the last commit confirmed this end-to-end) but sites "barely load and don't finish". MTU all the way down to 900 didn't help, DNS resolves. Classic packet-loss pattern: TCP gets some of each burst, retransmits the rest, eventually times out. The internal connutil pipe between DTLS and TURN is unbounded so nothing dies there. The real bottleneck is the two actual wire UDP sockets, both on default iOS buffers (~196 KB SO_RCVBUF): - listenConn (net.ListenPacket "udp" 127.0.0.1:9000) — receives WG encrypted output. A page load fires 50–100 1.2 KB packets in a few ms. - turnConn (net.DialUDP to VK's TURN relay) — receives the page response back across a 30–100 ms RTT cellular path. Either queue can overrun before the read goroutine drains it, and the kernel drops the overflow with no signal to userspace. Add udp_buffers.go with `tuneUDPBuffers(label, conn)`: - SetReadBuffer / SetWriteBuffer to 4 MB on each. - SyscallConn + getsockopt readback so the log shows what the kernel actually granted (kern.ipc.maxsockbuf may cap the request silently, and Darwin reports 2x what we asked for). - Wired into both socket creation sites in turn_proxy.go. Doesn't address VK relay's own bandwidth ceiling, but ensures we aren't shooting ourselves in the foot before traffic even reaches the wire. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/turn_proxy.go | 11 +++ .../Sources/WireGuardKitGo/udp_buffers.go | 88 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 wireguard-apple/Sources/WireGuardKitGo/udp_buffers.go diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 42258e4..a150073 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -579,6 +579,11 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD return } }() + // Same buffer concern as listenConn, but on the wire side: a + // page-load burst arrives at the device from the relay over a + // 30–100 ms RTT path, and any backlog the kernel can't queue + // gets dropped silently — TCP then retransmits and stalls. + tuneUDPBuffers("turnConn", conn) turnConn = &connectedUDPConn{conn} } else { conn, err2 := d.DialContext(ctx1, "tcp", turnServerAddr) // nolint: noctx @@ -936,6 +941,12 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, log.Printf("Failed to listen: %s", err) return } + // Bump the WG↔proxy UDP socket buffers. Default iOS UDP recv buffer + // is ~196 KB; a single page load can burst 50–100 1.2 KB packets at + // once, overflowing the kernel queue before our read goroutine + // drains it. The kernel may cap the request below 4 MB depending on + // kern.ipc.maxsockbuf — log what we actually got. + tuneUDPBuffers("listenConn", listenConn) context.AfterFunc(ctx, func() { if closeErr := listenConn.Close(); closeErr != nil { diff --git a/wireguard-apple/Sources/WireGuardKitGo/udp_buffers.go b/wireguard-apple/Sources/WireGuardKitGo/udp_buffers.go new file mode 100644 index 0000000..0420ee1 --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/udp_buffers.go @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: MIT +// +// UDP socket-buffer tuning helper for the two real wire sockets in +// turn_proxy.go (listenConn on 127.0.0.1:9000 and turnConn to VK's +// relay). Default iOS UDP RCVBUF/SNDBUF is in the ~196 KB ballpark, +// which is fine for the audio/video calling that VK's TURN servers +// were originally built for but too small for the bursty packet +// pattern of a web page load tunneled over WG: 50–100 1.2 KB packets +// arrive within a few ms and the kernel drops anything that can't fit +// the queue before the read goroutine drains it. +// +// We try to raise both buffers to 4 MB. The kernel may cap the actual +// size below the request (iOS uses `kern.ipc.maxsockbuf`, typically +// 8 MB), so we log what we actually got via SO_RCVBUF / SO_SNDBUF +// readback so a future "still losing packets" report can be diagnosed. + +package main + +import ( + "log" + "net" + "syscall" +) + +const udpBufferTarget = 4 * 1024 * 1024 // 4 MB + +// udpBufferTuner is the smallest interface that both +// `net.PacketConn` (listenConn) and `*net.UDPConn` (turnConn) satisfy +// for setting socket buffer sizes. +type udpBufferTuner interface { + SetReadBuffer(bytes int) error + SetWriteBuffer(bytes int) error +} + +// tuneUDPBuffers requests larger socket buffers and logs the result. +// On Darwin the actual buffer size is 2x the value you ask for (the +// kernel accounts for control overhead), so the SO_RCVBUF/SO_SNDBUF +// readback can look bigger than `udpBufferTarget` — that's fine. +func tuneUDPBuffers(label string, conn interface{}) { + t, ok := conn.(udpBufferTuner) + if !ok { + log.Printf("%s: cannot tune UDP buffers (unsupported type %T)", label, conn) + return + } + if err := t.SetReadBuffer(udpBufferTarget); err != nil { + log.Printf("%s: SetReadBuffer(%d) failed: %v", label, udpBufferTarget, err) + } + if err := t.SetWriteBuffer(udpBufferTarget); err != nil { + log.Printf("%s: SetWriteBuffer(%d) failed: %v", label, udpBufferTarget, err) + } + + // Read back the kernel-accepted values via SyscallConn so we know + // whether the request was honoured or silently capped. + rcv, snd := readbackBuffers(conn) + log.Printf("%s: UDP buffers tuned: SO_RCVBUF=%d SO_SNDBUF=%d (target=%d)", + label, rcv, snd, udpBufferTarget) +} + +func readbackBuffers(conn interface{}) (rcv, snd int) { + type syscallable interface { + SyscallConn() (syscall.RawConn, error) + } + sc, ok := conn.(syscallable) + if !ok { + return 0, 0 + } + raw, err := sc.SyscallConn() + if err != nil { + return 0, 0 + } + _ = raw.Control(func(fd uintptr) { + if v, err := syscall.GetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_RCVBUF); err == nil { + rcv = v + } + if v, err := syscall.GetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_SNDBUF); err == nil { + snd = v + } + }) + return rcv, snd +} + +// Type-assertion guard: net.PacketConn returned by net.ListenPacket +// for the "udp" network is concretely *net.UDPConn, which satisfies +// both udpBufferTuner and the SyscallConn interface. Compile-time +// sanity check so we don't drift. +var ( + _ udpBufferTuner = (*net.UDPConn)(nil) +) From 38c2c662dd3c14f5ae2a148dc8eddcde7fedeb2e Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 18:37:04 +0000 Subject: [PATCH 036/106] ui: stop flashing the "unstable" banner during normal wake reconnects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The transport health banner was triggering every iOS sleep→wake cycle. The cycle is healthy by design — ProxyForceReconnect tears down the old DTLS+TURN sessions (which emits a burst of `Failed:` and `Closed DTLS connection` log lines, all mapped to markDead()) and within 1–3 s a fresh session establishes and fires markAlive(). The 5-second UI poller kept catching that 1–3 s window where dead > alive, flashing the orange banner for a single tick before the next refresh cleared it. From the user's POV this looks like "connection unstable" warnings firing several times an hour even though the tunnel is fine. Add a 10 s grace period after the most recent dead event: a fresh alive within that window cancels the would-be banner. The 30 s alive-staleness rule is unchanged, so a real dead transport (alive hasn't fired in 30 s) still surfaces. Doesn't change the underlying transport behaviour — only the heuristic that decides when to display the banner. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/TransportHealthBanner.swift | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/TurnBridge/TransportHealthBanner.swift b/TurnBridge/TransportHealthBanner.swift index db6d3b5..7cb8766 100644 --- a/TurnBridge/TransportHealthBanner.swift +++ b/TurnBridge/TransportHealthBanner.swift @@ -33,14 +33,32 @@ final class TransportHealthState: ObservableObject { let alive = defaults.object(forKey: "transport.lastAliveAt") as? Date let dead = defaults.object(forKey: "transport.lastDeadAt") as? Date guard let alive = alive else { - // Never alive yet → not necessarily stalled (still connecting). + // Never alive yet → still connecting, not stalled. isStalled = false return } let now = Date() - let lastDeadAfterAlive = (dead.map { $0 > alive } ?? false) + + // Every iOS sleep → wake cycle tears down the DTLS+TURN + // sessions and immediately re-establishes new ones. That + // teardown emits a burst of `Failed:` / `Closed DTLS + // connection` log lines (which TransportHealthMonitor maps to + // markDead()), and then ~1–3 seconds later the new session + // emits `Established DTLS connection` → markAlive(). If we + // flip the banner the moment dead > alive, the 5-second poller + // catches that 1–3s window and shows the banner regularly + // even though everything is fine. + // + // So allow a 10 s grace period after the most recent dead + // event for a fresh alive to come in. Only after that grace + // expires without a new alive do we call the transport + // stalled. let aliveStale = now.timeIntervalSince(alive) > 30 - isStalled = lastDeadAfterAlive || aliveStale + let deadAfterAlive: Bool = { + guard let dead = dead, dead > alive else { return false } + return now.timeIntervalSince(dead) > 10 + }() + isStalled = aliveStale || deadAfterAlive } } From a981ec71446eedb9ade13a3ab09786ad6e2d8b39 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 10:22:59 +0000 Subject: [PATCH 037/106] fix(turn_proxy): fan-out shared listenConn so nValue>1 actually parallelises MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now StartProxy created a single net.PacketConn for 127.0.0.1:9000 and handed the same conn to every oneDtlsConnection goroutine. Each of them blocked on listenConn.ReadFrom on the same socket, so when WG sent a packet the kernel woke exactly one waiter — almost always the same one due to scheduling — and the other N-1 sessions sat idle. nValue=3 in the profile therefore did NOT triple throughput; it just spawned two unused TURN allocations. This silently neuters the only documented workaround for VK's per-allocation rate limit. Add udp_fanout.go: one dispatcher goroutine reads the real listenConn and round-robins each packet into one of N fanoutPacketConn channels. Each session owns its own fanout, so the N DTLS+TURN pipelines genuinely run in parallel. Replies still go out the shared port via WriteTo on the underlying socket, and the fanout dispatcher logs per-session drop counters every 10 s so a slow consumer is visible (rather than growing memory like the unbounded AsyncPacketPipe would). WG handles per-packet reordering up to a 32-packet replay window, so a 3-way per-packet round-robin is safe. Pre-empts what would otherwise have been a confusing diagnostic pass: running the planned A/B "n=1 vs n=3 throughput" test would have shown no improvement and falsely cleared the VK-rate-limit hypothesis. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/turn_proxy.go | 57 +++- .../Sources/WireGuardKitGo/udp_fanout.go | 246 ++++++++++++++++++ 2 files changed, 290 insertions(+), 13 deletions(-) create mode 100644 wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index a150073..6bb204d 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -947,22 +947,48 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, // drains it. The kernel may cap the request below 4 MB depending on // kern.ipc.maxsockbuf — log what we actually got. tuneUDPBuffers("listenConn", listenConn) - + context.AfterFunc(ctx, func() { if closeErr := listenConn.Close(); closeErr != nil { log.Printf("Failed to close local connection: %s", closeErr) } }) - go func() { - for { - select { - case <-ctx.Done(): - return - case listenConnChan <- listenConn: + // Per-session fan-out of the shared listenConn. Without this, all + // N oneDtlsConnection goroutines call ReadFrom on the same UDP + // socket, the kernel wakes only one of them, and the other N-1 + // sessions sit idle — silently defeating nValue>1. The dispatcher + // reads once and round-robins each WG packet to one of N + // fanoutPacketConn channels; each session reads from its own. + // Writes still go straight back to the real listenConn so replies + // from any session reach the WG client. + fanouts := make([]*fanoutPacketConn, n) + for i := range fanouts { + fanouts[i] = newFanoutPacketConn(i, listenConn) + } + startFanoutDispatcher(ctx, listenConn, fanouts) + log.Printf("fanout: dispatcher up with %d virtual conn(s)", n) + + // Each oneDtlsConnectionLoop wants a chan that endlessly redelivers + // its private listen-side conn. Spawn one such chan per fanout. + makeFanoutChan := func(f net.PacketConn) chan net.PacketConn { + ch := make(chan net.PacketConn) + go func() { + for { + select { + case <-ctx.Done(): + return + case ch <- f: + } } - } - }() + }() + return ch + } + + // listenConnChan kept for the type signature only — the original + // goroutine that fed the shared listenConn is replaced by the + // per-fanout chans below. + _ = listenConnChan wg1 := sync.WaitGroup{} t := time.Tick(200 * time.Millisecond) @@ -970,8 +996,12 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, okchan := make(chan struct{}) connchan := make(chan net.PacketConn) + // Session 0 first; later sessions wait until it has solved captcha + // and we know the upstream is alive (okchan signal). Otherwise we'd + // fire N parallel captcha challenges at the user. + firstFanoutChan := makeFanoutChan(fanouts[0]) wg1.Go(func() { - oneDtlsConnectionLoop(ctx, peer, listenConnChan, connchan, okchan) + oneDtlsConnectionLoop(ctx, peer, firstFanoutChan, connchan, okchan) }) wg1.Go(func() { oneTurnConnectionLoop(ctx, params, peer, connchan, t) @@ -982,17 +1012,18 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, case <-ctx.Done(): } - for i := 0; i < n-1; i++ { + for i := 1; i < n; i++ { + fanoutChan := makeFanoutChan(fanouts[i]) cChan := make(chan net.PacketConn) wg1.Go(func() { - oneDtlsConnectionLoop(ctx, peer, listenConnChan, cChan, nil) + oneDtlsConnectionLoop(ctx, peer, fanoutChan, cChan, nil) }) wg1.Go(func() { oneTurnConnectionLoop(ctx, params, peer, cChan, t) }) } - log.Printf("Proxy started on %s", localAddrStr) + log.Printf("Proxy started on %s with %d parallel TURN session(s)", localAddrStr, n) wg1.Wait() } diff --git a/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go b/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go new file mode 100644 index 0000000..2473083 --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: MIT +// +// UDP fan-out so N parallel TURN allocations actually share the WG +// upstream traffic instead of all sleeping on the same listenConn. +// +// Background: StartProxy creates ONE net.PacketConn for 127.0.0.1:9000 +// (the WG client's UDP endpoint) and previously handed the same +// PacketConn to every oneDtlsConnection goroutine. When WG sends a +// packet, the kernel wakes ONE waiting goroutine — usually the same +// one consistently due to scheduling — so the other N-1 sessions sit +// idle. Setting nValue=3 in the profile then doesn't actually +// triple throughput, which silently defeats the whole reason to +// run multiple TURN allocations. +// +// Fix: a dispatcher goroutine reads from the real listenConn and +// round-robins each packet into one of N fanoutPacketConn channels. +// Each fanoutPacketConn satisfies net.PacketConn, so it drops in +// where listenConn used to be passed without changing the +// oneDtlsConnection signature. WriteTo delegates straight back to +// the real socket — replies from all N sessions go out the same +// shared port to the same WG client address. +// +// WG itself is robust to per-packet reordering up to a 32-packet +// replay window (RFC 7539 + WireGuard whitepaper §5.3), so a 3-way +// round-robin is safe. Round-robin is per-packet rather than per-flow +// because there's only ever one flow on this socket (one WG client +// instance). + +package main + +import ( + "context" + "errors" + "log" + "net" + "os" + "sync" + "sync/atomic" + "time" +) + +// fanoutQueueDepth is the per-virtual-conn buffer size. Big enough +// to absorb a page-load burst (50–100 packets in a few ms) without +// blocking the dispatcher, small enough to make a slow consumer +// visible via the dropped-packet counter rather than via memory +// growth (which is what AsyncPacketPipe already does silently). +const fanoutQueueDepth = 256 + +type fanoutPacket struct { + data []byte + addr net.Addr +} + +// fanoutPacketConn is the per-DTLS-session view of the shared +// listenConn. Reads come from a private channel filled by the +// dispatcher; writes go straight to the underlying socket. +type fanoutPacketConn struct { + id int + real net.PacketConn + incoming chan fanoutPacket + + closeOnce sync.Once + closed chan struct{} + + // deadline state: SetReadDeadline(past time) is the standard + // "interrupt the in-flight read" idiom used by oneDtlsConnection's + // context.AfterFunc cleanup. We mirror that with a wakeup channel + // that ReadFrom selects on. + deadlineMu sync.Mutex + wakeup chan struct{} + deadlineTimer *time.Timer + + dropped atomic.Uint64 // packets the dispatcher tried to enqueue but the channel was full +} + +func newFanoutPacketConn(id int, real net.PacketConn) *fanoutPacketConn { + return &fanoutPacketConn{ + id: id, + real: real, + incoming: make(chan fanoutPacket, fanoutQueueDepth), + closed: make(chan struct{}), + wakeup: make(chan struct{}), + } +} + +func (f *fanoutPacketConn) ReadFrom(p []byte) (int, net.Addr, error) { + select { + case pkt, ok := <-f.incoming: + if !ok { + return 0, nil, net.ErrClosed + } + n := copy(p, pkt.data) + return n, pkt.addr, nil + case <-f.closed: + return 0, nil, net.ErrClosed + case <-f.wakeup: + return 0, nil, os.ErrDeadlineExceeded + } +} + +func (f *fanoutPacketConn) WriteTo(p []byte, addr net.Addr) (int, error) { + return f.real.WriteTo(p, addr) +} + +func (f *fanoutPacketConn) Close() error { + f.closeOnce.Do(func() { close(f.closed) }) + return nil +} + +func (f *fanoutPacketConn) LocalAddr() net.Addr { return f.real.LocalAddr() } + +func (f *fanoutPacketConn) SetDeadline(t time.Time) error { + if err := f.SetReadDeadline(t); err != nil { + return err + } + return f.SetWriteDeadline(t) +} + +func (f *fanoutPacketConn) SetReadDeadline(t time.Time) error { + f.deadlineMu.Lock() + defer f.deadlineMu.Unlock() + + if f.deadlineTimer != nil { + f.deadlineTimer.Stop() + f.deadlineTimer = nil + } + + // Empty time → clear deadline. Replace wakeup so future ReadFrom + // calls don't immediately fail. + if t.IsZero() { + select { + case <-f.wakeup: + // Was closed; create a fresh one so subsequent reads don't fail. + f.wakeup = make(chan struct{}) + default: + } + return nil + } + + wait := time.Until(t) + if wait <= 0 { + // Already past — interrupt any current ReadFrom immediately. + select { + case <-f.wakeup: + // Already closed, nothing to do. + default: + close(f.wakeup) + } + return nil + } + + // Future deadline — arm a timer to close wakeup at the right moment. + f.deadlineTimer = time.AfterFunc(wait, func() { + f.deadlineMu.Lock() + defer f.deadlineMu.Unlock() + select { + case <-f.wakeup: + default: + close(f.wakeup) + } + }) + return nil +} + +func (f *fanoutPacketConn) SetWriteDeadline(t time.Time) error { + // The real listenConn's deadline is shared across all fanouts, so + // honoring it here would break the other sessions. We don't use + // write deadlines anywhere in oneDtlsConnection's actual data + // path, so this is safe to ignore. + return nil +} + +// startFanoutDispatcher spawns one goroutine that drains the shared +// listenConn and distributes packets round-robin into the N fanouts. +// On listenConn close it tears down all fanouts. +func startFanoutDispatcher(ctx context.Context, listenConn net.PacketConn, fanouts []*fanoutPacketConn) { + go func() { + defer func() { + for _, f := range fanouts { + f.Close() + } + }() + + buf := make([]byte, 1600) + var rrIdx uint64 + var dropped uint64 + + // Periodic dispatcher health log, decoupled from the per-fanout + // session logs so a stalled consumer is visible even if its + // owning session never logs. + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + go func() { + var prevDrop uint64 + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + var perFanoutDrop []uint64 + for _, f := range fanouts { + perFanoutDrop = append(perFanoutDrop, f.dropped.Load()) + } + curDrop := atomic.LoadUint64(&dropped) + log.Printf("fanout: total dropped=%d (Δ+%d) per-session=%v", curDrop, curDrop-prevDrop, perFanoutDrop) + prevDrop = curDrop + } + } + }() + + for { + n, addr, err := listenConn.ReadFrom(buf) + if err != nil { + if errors.Is(err, net.ErrClosed) || errors.Is(err, os.ErrDeadlineExceeded) { + log.Printf("fanout: dispatcher exiting: %s", err) + return + } + log.Printf("fanout: dispatcher read error: %s", err) + return + } + + // Copy because buf is reused next iteration. + data := make([]byte, n) + copy(data, buf[:n]) + + // Pick the next fanout. Use atomic counter so a future + // flow-hash dispatch could swap in here without changing + // the rest of the loop. + i := atomic.AddUint64(&rrIdx, 1) % uint64(len(fanouts)) + f := fanouts[i] + + select { + case f.incoming <- fanoutPacket{data: data, addr: addr}: + case <-ctx.Done(): + return + default: + // Consumer is too slow — drop this packet and account + // for it. Better than blocking the dispatcher (which + // would also stall the other N-1 fanouts) or growing + // the channel unbounded. + f.dropped.Add(1) + atomic.AddUint64(&dropped, 1) + } + } + }() +} From ca8b19f0dc24083da59fb0ec2ab4cb0eec09c886 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 15:44:20 +0000 Subject: [PATCH 038/106] fix(turn_proxy): defer proxyReady until ALL N captchas are solved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User correctly pointed out: with nValue>1 the second and third captcha sheets never load. Sequence of events: 1. StartProxy spawns session 0, user solves captcha #1 2. session 0 establishes DTLS, oneDtlsConnection signals proxyReady 3. Swift's ProxyWaitReady returns, adapter.start(tunnelConfiguration) runs, iOS installs AllowedIPs=0.0.0.0/0 routes into utun 4. StartProxy moves on to spawn sessions 1..N-1 5. Each of those calls getCreds → triggers a Manual captcha request 6. Swift shows the captcha WebView, which tries to load id.vk.ru 7. id.vk.ru now resolves to a public IP → matches default route → routes through utun → through the half-built tunnel that only has 1 of N TURN allocations up and is the very thing we're trying to fix → captcha sheet never finishes loading and the connect flow hangs forever The fix is to delay proxyReady until ALL N sessions have established their DTLS+TURN allocations. Two changes: - oneDtlsConnection no longer sends to proxyReady. The comment there explains why; it stays the per-session ready signal on okchan. - StartProxy now spawns sessions one at a time, waits for each session's okchan, and only sends to proxyReady after the last one is up. iOS therefore only brings up utun after every captcha has been solved and every TURN allocation is alive — the WebView for captchas 2..N still has direct internet access (utun isn't up yet) and can load id.vk.ru normally. Captchas were already serialised by the UI (one sheet at a time), so this costs nothing in latency, only makes the existing implicit serialisation explicit on the Go side. For nValue=1 the new code path is identical to the old one (single loop iteration, then signal proxyReady) — no regression. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/turn_proxy.go | 70 +++++++++++++------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index 6bb204d..bd2e729 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -336,7 +336,16 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa log.Printf("Closed DTLS connection\n") }() log.Printf("Established DTLS connection!\n") - select { case proxyReady <- struct{}{}: default: } + // NOTE: do NOT signal proxyReady here. Signalling it the moment + // the FIRST DTLS session establishes causes Swift to call + // adapter.start() and iOS to bring up utun with the WG config's + // AllowedIPs=0.0.0.0/0 routing. If the user has nValue>1, the + // remaining N-1 sessions still need to fetch fresh VK creds — + // and that means the manual-captcha WebView in the app tries to + // load id.vk.ru AFTER utun is up, so the captcha sheet ends up + // routed through the half-built tunnel and never loads. The + // proxyReady signal is now sent from StartProxy once all N + // sessions have established their DTLS+TURN allocations. go func() { for { select { @@ -993,34 +1002,51 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, wg1 := sync.WaitGroup{} t := time.Tick(200 * time.Millisecond) - okchan := make(chan struct{}) - connchan := make(chan net.PacketConn) - - // Session 0 first; later sessions wait until it has solved captcha - // and we know the upstream is alive (okchan signal). Otherwise we'd - // fire N parallel captcha challenges at the user. - firstFanoutChan := makeFanoutChan(fanouts[0]) - wg1.Go(func() { - oneDtlsConnectionLoop(ctx, peer, firstFanoutChan, connchan, okchan) - }) - wg1.Go(func() { - oneTurnConnectionLoop(ctx, params, peer, connchan, t) - }) - - select { - case <-okchan: - case <-ctx.Done(): - } - - for i := 1; i < n; i++ { + // Spawn all N sessions one at a time. Each session needs its own + // VK captcha to fetch a fresh TURN identity, and the manual + // captcha sheet in the App has to be able to reach id.vk.ru while + // it's being solved. Once we signal proxyReady, Swift brings up + // the WG adapter and iOS installs the AllowedIPs=0.0.0.0/0 route + // into utun — at which point ALL device traffic flows through the + // tunnel we just built. If we signal proxyReady after the FIRST + // session establishes (the original behaviour), the next captcha + // load is routed via the half-built tunnel and never completes, + // hanging the connect flow forever. So: + // + // 1. spawn one session + // 2. wait for that session's DTLS to be up (okchan) + // 3. repeat until all N are up + // 4. only then signal proxyReady so the WG adapter starts + // + // Captchas are solved sequentially by the UI anyway (one sheet at + // a time), so this serialisation costs nothing extra in latency. + for i := 0; i < n; i++ { fanoutChan := makeFanoutChan(fanouts[i]) cChan := make(chan net.PacketConn) + sessionOk := make(chan struct{}) + wg1.Go(func() { - oneDtlsConnectionLoop(ctx, peer, fanoutChan, cChan, nil) + oneDtlsConnectionLoop(ctx, peer, fanoutChan, cChan, sessionOk) }) wg1.Go(func() { oneTurnConnectionLoop(ctx, params, peer, cChan, t) }) + + select { + case <-sessionOk: + log.Printf("StartProxy: session %d/%d ready", i+1, n) + case <-ctx.Done(): + log.Printf("StartProxy: cancelled before session %d/%d came up", i+1, n) + wg1.Wait() + return + } + } + + // All N TURN allocations are alive. NOW it's safe to let Swift + // bring up the WG adapter and route the user's traffic into utun. + select { + case proxyReady <- struct{}{}: + default: } log.Printf("Proxy started on %s with %d parallel TURN session(s)", localAddrStr, n) From 185ca1a4e4ed2a506b27159a4f6cc45d711d26ef Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 16:11:25 +0000 Subject: [PATCH 039/106] fix(identity): pretend to be iPhone Safari, not desktop Chrome MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User noticed that opening the VK call link directly in iPhone Safari shows zero captcha, while our requests trigger the "Confirm you're not a robot" checkbox every time. The 9-profile pool was 100% desktop Chrome / Edge / Linux Chrome — sending a Windows or Ubuntu User-Agent from a Russian-cellular-IP iPhone is exactly the kind of mismatch VK's anti-bot pipeline is built to catch. Two changes: - identity.go: replace the desktop pool with iPhone Safari profiles spanning iOS 17.4 – 18.1.1. These are the User-Agents Safari actually sends from the device that initiates the request, so the UA matches the connection's TLS fingerprint (NSURLSession's underlying CFNetwork stack already looks iPhone-shaped from VK's side) and the cellular IP. Real VK call link clicks don't get challenged — this puts us in that path. - vk_captcha.go: don't emit sec-ch-ua / sec-ch-ua-mobile / sec-ch-ua-platform headers when the profile didn't define them. Mobile Safari deliberately doesn't implement Client Hints, so sending those headers under a Safari UA is itself a tell that the request is automated. Two header-set sites updated symmetrically (initial captcha page fetch + componentDone POST). If VK still throws a captcha after this, the auto-solver will at least be running against the same challenge a real iPhone Safari client would have seen, which is what its checkbox + slider flows were originally written for. Worst case unchanged; best case the captcha just stops appearing. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/identity.go | 66 +++++-------------- .../Sources/WireGuardKitGo/vk_captcha.go | 19 ++++-- 2 files changed, 30 insertions(+), 55 deletions(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/identity.go b/wireguard-apple/Sources/WireGuardKitGo/identity.go index 52e77c6..578b188 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/identity.go +++ b/wireguard-apple/Sources/WireGuardKitGo/identity.go @@ -29,66 +29,34 @@ var lastNames = []string{ } var profiles = []Profile{ - // Windows Chrome + // iPhone Safari only. VK's anti-bot pipeline triggers the + // "Confirm you're not a robot" checkbox when it sees a mismatch + // between the connection (Russian cellular IP, iPhone-shaped TLS + // fingerprint from NSURLSession's underlying CFNetwork stack) + // and the User-Agent header. Real users clicking a VK call link + // from Safari on iPhone aren't asked for a captcha — and that's + // exactly the request we want to look like. + // + // Safari deliberately doesn't implement Client Hints; vk_captcha + // skips the sec-ch-ua headers entirely when SecChUa is empty, + // matching what mobile Safari actually sends on the wire. { - UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Windows"`, + UserAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Mobile/15E148 Safari/604.1", }, { - UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="145", "Not-A.Brand";v="99", "Google Chrome";v="145"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Windows"`, + UserAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Mobile/15E148 Safari/604.1", }, { - UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="144", "Not-A.Brand";v="8", "Google Chrome";v="144"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Windows"`, - }, - - // Windows Edge - { - UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0", - SecChUa: `"Chromium";v="146", "Not-A.Brand";v="24", "Microsoft Edge";v="146"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Windows"`, + UserAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Mobile/15E148 Safari/604.1", }, { - UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36 Edg/145.0.0.0", - SecChUa: `"Chromium";v="145", "Not-A.Brand";v="99", "Microsoft Edge";v="145"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Windows"`, + UserAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Mobile/15E148 Safari/604.1", }, - - // macOS Chrome - { - UserAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"macOS"`, - }, - { - UserAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="145", "Not-A.Brand";v="99", "Google Chrome";v="145"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"macOS"`, - }, - - // Linux Chrome { - UserAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Linux"`, + UserAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1", }, { - UserAgent: "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", - SecChUa: `"Chromium";v="144", "Not-A.Brand";v="8", "Google Chrome";v="144"`, - SecChUaMobile: "?0", - SecChUaPlatform: `"Linux"`, + UserAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Mobile/15E148 Safari/604.1", }, } diff --git a/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go b/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go index a0505d6..2933431 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go +++ b/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go @@ -156,9 +156,14 @@ func fetchPowInput(ctx context.Context, client *http.Client, profile Profile, re req.Header.Set("User-Agent", profile.UserAgent) req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.9") - req.Header.Set("sec-ch-ua", profile.SecChUa) - req.Header.Set("sec-ch-ua-mobile", profile.SecChUaMobile) - req.Header.Set("sec-ch-ua-platform", profile.SecChUaPlatform) + // Safari deliberately doesn't implement Client Hints — sending + // these headers from a Safari UA is itself a bot tell. Skip them + // when the profile didn't define any. + if profile.SecChUa != "" { + req.Header.Set("sec-ch-ua", profile.SecChUa) + req.Header.Set("sec-ch-ua-mobile", profile.SecChUaMobile) + req.Header.Set("sec-ch-ua-platform", profile.SecChUaPlatform) + } req.Header.Set("Sec-Fetch-Site", "none") req.Header.Set("Sec-Fetch-Mode", "navigate") req.Header.Set("Sec-Fetch-Dest", "document") @@ -242,9 +247,11 @@ func callCaptchaNotRobot(ctx context.Context, client *http.Client, profile Profi req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Origin", "https://id.vk.ru") req.Header.Set("Referer", "https://id.vk.ru/") - req.Header.Set("sec-ch-ua", profile.SecChUa) - req.Header.Set("sec-ch-ua-mobile", profile.SecChUaMobile) - req.Header.Set("sec-ch-ua-platform", profile.SecChUaPlatform) + if profile.SecChUa != "" { + req.Header.Set("sec-ch-ua", profile.SecChUa) + req.Header.Set("sec-ch-ua-mobile", profile.SecChUaMobile) + req.Header.Set("sec-ch-ua-platform", profile.SecChUaPlatform) + } req.Header.Set("Sec-Fetch-Site", "same-site") req.Header.Set("Sec-Fetch-Mode", "cors") req.Header.Set("Sec-Fetch-Dest", "empty") From e965eda5c68725bc550463a1a189f1cdde7da55b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 16:18:26 +0000 Subject: [PATCH 040/106] =?UTF-8?q?fix(captcha=5Fslider):=20support=20rect?= =?UTF-8?q?angular=20tile=20grids=20(e.g.=203=C3=977=20word-strips)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VK started serving a new slider variant where three rows of Cyrillic words (ШАПОЧКИ / КОРРУПЦИЯ / СКЕПТИЦИЗМ) are sliced into a 3×7 tile grid, scrambled per-row, and the slider position chooses a column offset. The existing solver hard-coded a single `gridSize` and built a `size × size` tile mapping, so for any non-square layout it produces tile counts that don't contain the swap indices: parse errors at best, garbled renders at worst, "all guesses rejected" in the loop. Refactor end-to-end to carry separate width/height: - `sliderCaptchaContent.Size` → `GridW int, GridH int`. - `parseSliderSteps` now tries two encodings of VK's `steps` array and logs the raw payload + chosen interpretation: square: [size, swap_pairs..., attempts?] rect: [width, height, swap_pairs..., attempts?] Square is tried first so every existing 3×3 / 4×4 / 5×5 captcha parses byte-for-byte identically (backward-compatible). The rect branch fires when all swap indices fit only inside `w*h` and not inside `size*size`. If both fail, we bail with the raw payload in the log so a future variant can be plumbed in without guesswork. - `sliderTileRect`, `buildSliderTileMapping`, `renderSliderCandidate`, `scoreRenderedSliderImage`, `rankSliderCandidates` all take `gridW, gridH` instead of `gridSize`. The pixel-border-continuity scoring is unchanged; it just walks `gridH × (gridW-1)` horizontal seams and `(gridH-1) × gridW` vertical seams. - Top-level log promoted from "grid=N" to "grid=WxH" so a quick glance at the logs shows which layout VK served. The submit format (encodeSliderAnswer) is unchanged — VK accepts the same `value: [activeSteps]` payload regardless of grid shape. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/captcha_slider.go | 155 ++++++++++++------ 1 file changed, 109 insertions(+), 46 deletions(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go b/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go index 01642dc..db510da 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go +++ b/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go @@ -26,9 +26,10 @@ type vkReqFunc func(method, postData string) (map[string]interface{}, error) type sliderCaptchaContent struct { Image image.Image - Size int // grid NxN - Steps []int // swap pairs - Attempts int // max submit attempts + GridW int // tile columns + GridH int // tile rows + Steps []int // swap pairs + Attempts int // max submit attempts } type sliderCandidate struct { @@ -68,12 +69,12 @@ func solveSliderCaptcha( return "", fmt.Errorf("slider parse: %w", err) } - log.Printf("slider: image=%dx%d grid=%d steps=%d attempts=%d", + log.Printf("slider: image=%dx%d grid=%dx%d steps=%d attempts=%d", content.Image.Bounds().Dx(), content.Image.Bounds().Dy(), - content.Size, len(content.Steps)/2, content.Attempts) + content.GridW, content.GridH, len(content.Steps)/2, content.Attempts) // Rank candidate positions by pixel border continuity - candidates, err := rankSliderCandidates(content.Image, content.Size, content.Steps) + candidates, err := rankSliderCandidates(content.Image, content.GridW, content.GridH, content.Steps) if err != nil { return "", fmt.Errorf("slider rank: %w", err) } @@ -235,7 +236,7 @@ func parseSliderContent(resp map[string]interface{}) (*sliderCaptchaContent, err return nil, err } - size, swaps, attempts, err := parseSliderSteps(steps) + gridW, gridH, swaps, attempts, err := parseSliderSteps(steps) if err != nil { return nil, err } @@ -247,7 +248,8 @@ func parseSliderContent(resp map[string]interface{}) (*sliderCaptchaContent, err return &sliderCaptchaContent{ Image: img, - Size: size, + GridW: gridW, + GridH: gridH, Steps: swaps, Attempts: attempts, }, nil @@ -274,30 +276,91 @@ func parseIntSlice(raw []interface{}) ([]int, error) { return values, nil } -func parseSliderSteps(steps []int) (int, []int, int, error) { +// parseSliderSteps decodes VK's `steps` array. Two formats observed: +// +// square: [size, swap_pairs..., attempts?] // tile grid = size×size +// rect: [width, height, swap_pairs..., attempts?] // tile grid = width×height +// +// VK started serving the rectangular variant (3×7 word-strip layouts: +// ШАПОЧКИ / КОРРУПЦИЯ / СКЕПТИЦИЗМ etc.) where the old square parser +// produces tile-counts that don't contain the swap indices and the +// renderer scrambles the image instead of unscrambling. We try +// square first (backward-compatible: pre-existing 3×3, 4×4, etc. +// captchas keep parsing the same way), then rect, then bail with the +// raw payload logged so a third format can be added without +// guesswork. +func parseSliderSteps(steps []int) (gridW int, gridH int, swaps []int, attempts int, err error) { if len(steps) < 3 { - return 0, nil, 0, fmt.Errorf("steps too short: %d", len(steps)) + return 0, 0, nil, 0, fmt.Errorf("steps too short: %d", len(steps)) } + log.Printf("slider: raw steps payload: %v", steps) + if w, h, sw, at, ok := decodeSliderStepsSquare(steps); ok { + log.Printf("slider: parsed as %dx%d (square format), %d candidates, %d attempts", + w, h, len(sw)/2, at) + return w, h, sw, at, nil + } + if w, h, sw, at, ok := decodeSliderStepsRect(steps); ok { + log.Printf("slider: parsed as %dx%d (rect format), %d candidates, %d attempts", + w, h, len(sw)/2, at) + return w, h, sw, at, nil + } + return 0, 0, nil, 0, fmt.Errorf("unrecognised steps payload %v", steps) +} + +func decodeSliderStepsSquare(steps []int) (w, h int, swaps []int, attempts int, ok bool) { size := steps[0] if size <= 0 { - return 0, nil, 0, fmt.Errorf("invalid grid size: %d", size) + return 0, 0, nil, 0, false } - - remaining := append([]int(nil), steps[1:]...) - attempts := defaultSliderAttempts - if len(remaining)%2 != 0 { - attempts = remaining[len(remaining)-1] - remaining = remaining[:len(remaining)-1] + tileCount := size * size + rest := append([]int(nil), steps[1:]...) + attempts = defaultSliderAttempts + if len(rest)%2 != 0 { + attempts = rest[len(rest)-1] + rest = rest[:len(rest)-1] } if attempts <= 0 { attempts = defaultSliderAttempts } - if len(remaining) == 0 || len(remaining)%2 != 0 { - return 0, nil, 0, fmt.Errorf("invalid swap payload") + if len(rest) == 0 || len(rest)%2 != 0 { + return 0, 0, nil, 0, false } + for _, v := range rest { + if v < 0 || v >= tileCount { + return 0, 0, nil, 0, false + } + } + return size, size, rest, attempts, true +} - return size, remaining, attempts, nil +func decodeSliderStepsRect(steps []int) (w, h int, swaps []int, attempts int, ok bool) { + if len(steps) < 4 { + return 0, 0, nil, 0, false + } + width, height := steps[0], steps[1] + if width <= 0 || height <= 0 { + return 0, 0, nil, 0, false + } + tileCount := width * height + rest := append([]int(nil), steps[2:]...) + attempts = defaultSliderAttempts + if len(rest)%2 != 0 { + attempts = rest[len(rest)-1] + rest = rest[:len(rest)-1] + } + if attempts <= 0 { + attempts = defaultSliderAttempts + } + if len(rest) == 0 || len(rest)%2 != 0 { + return 0, 0, nil, 0, false + } + for _, v := range rest { + if v < 0 || v >= tileCount { + return 0, 0, nil, 0, false + } + } + return width, height, rest, attempts, true } func decodeSliderImage(rawImage string) (image.Image, error) { @@ -325,7 +388,7 @@ func encodeSliderAnswer(activeSteps []int) (string, error) { // rankSliderCandidates analyzes each candidate permutation and ranks by // pixel border continuity (lower score = better match = more likely correct). -func rankSliderCandidates(img image.Image, gridSize int, swaps []int) ([]sliderCandidate, error) { +func rankSliderCandidates(img image.Image, gridW, gridH int, swaps []int) ([]sliderCandidate, error) { candidateCount := len(swaps) / 2 if candidateCount == 0 { return nil, fmt.Errorf("no candidates") @@ -334,17 +397,17 @@ func rankSliderCandidates(img image.Image, gridSize int, swaps []int) ([]sliderC candidates := make([]sliderCandidate, 0, candidateCount) for idx := 1; idx <= candidateCount; idx++ { activeSteps := buildSliderActiveSteps(swaps, idx) - mapping, err := buildSliderTileMapping(gridSize, activeSteps) + mapping, err := buildSliderTileMapping(gridW, gridH, activeSteps) if err != nil { return nil, err } - rendered, err := renderSliderCandidate(img, gridSize, mapping) + rendered, err := renderSliderCandidate(img, gridW, gridH, mapping) if err != nil { return nil, err } - score := scoreRenderedSliderImage(rendered, gridSize) + score := scoreRenderedSliderImage(rendered, gridW, gridH) candidates = append(candidates, sliderCandidate{ Index: idx, ActiveSteps: activeSteps, @@ -373,8 +436,8 @@ func buildSliderActiveSteps(swaps []int, candidateIndex int) []int { return append([]int(nil), swaps[:end]...) } -func buildSliderTileMapping(gridSize int, activeSteps []int) ([]int, error) { - tileCount := gridSize * gridSize +func buildSliderTileMapping(gridW, gridH int, activeSteps []int) ([]int, error) { + tileCount := gridW * gridH if tileCount <= 0 { return nil, fmt.Errorf("invalid tile count") } @@ -396,8 +459,8 @@ func buildSliderTileMapping(gridSize int, activeSteps []int) ([]int, error) { return mapping, nil } -func renderSliderCandidate(img image.Image, gridSize int, mapping []int) (*image.RGBA, error) { - tileCount := gridSize * gridSize +func renderSliderCandidate(img image.Image, gridW, gridH int, mapping []int) (*image.RGBA, error) { + tileCount := gridW * gridH if len(mapping) != tileCount { return nil, fmt.Errorf("mapping length %d != %d", len(mapping), tileCount) } @@ -405,22 +468,22 @@ func renderSliderCandidate(img image.Image, gridSize int, mapping []int) (*image bounds := img.Bounds() rendered := image.NewRGBA(bounds) for dstIdx, srcIdx := range mapping { - srcRect := sliderTileRect(bounds, gridSize, srcIdx) - dstRect := sliderTileRect(bounds, gridSize, dstIdx) + srcRect := sliderTileRect(bounds, gridW, gridH, srcIdx) + dstRect := sliderTileRect(bounds, gridW, gridH, dstIdx) copyTile(rendered, dstRect, img, srcRect) } return rendered, nil } -func scoreRenderedSliderImage(img image.Image, gridSize int) int64 { +func scoreRenderedSliderImage(img image.Image, gridW, gridH int) int64 { bounds := img.Bounds() var score int64 // Horizontal borders (left tile right edge vs right tile left edge) - for row := 0; row < gridSize; row++ { - for col := 0; col < gridSize-1; col++ { - leftRect := sliderTileRect(bounds, gridSize, row*gridSize+col) - rightRect := sliderTileRect(bounds, gridSize, row*gridSize+col+1) + for row := 0; row < gridH; row++ { + for col := 0; col < gridW-1; col++ { + leftRect := sliderTileRect(bounds, gridW, gridH, row*gridW+col) + rightRect := sliderTileRect(bounds, gridW, gridH, row*gridW+col+1) height := leftRect.Dy() if h := rightRect.Dy(); h < height { height = h @@ -435,10 +498,10 @@ func scoreRenderedSliderImage(img image.Image, gridSize int) int64 { } // Vertical borders (top tile bottom edge vs bottom tile top edge) - for row := 0; row < gridSize-1; row++ { - for col := 0; col < gridSize; col++ { - topRect := sliderTileRect(bounds, gridSize, row*gridSize+col) - bottomRect := sliderTileRect(bounds, gridSize, (row+1)*gridSize+col) + for row := 0; row < gridH-1; row++ { + for col := 0; col < gridW; col++ { + topRect := sliderTileRect(bounds, gridW, gridH, row*gridW+col) + bottomRect := sliderTileRect(bounds, gridW, gridH, (row+1)*gridW+col) width := topRect.Dx() if w := bottomRect.Dx(); w < width { width = w @@ -455,13 +518,13 @@ func scoreRenderedSliderImage(img image.Image, gridSize int) int64 { return score } -func sliderTileRect(bounds image.Rectangle, gridSize, index int) image.Rectangle { - row := index / gridSize - col := index % gridSize - x0 := bounds.Min.X + col*bounds.Dx()/gridSize - x1 := bounds.Min.X + (col+1)*bounds.Dx()/gridSize - y0 := bounds.Min.Y + row*bounds.Dy()/gridSize - y1 := bounds.Min.Y + (row+1)*bounds.Dy()/gridSize +func sliderTileRect(bounds image.Rectangle, gridW, gridH, index int) image.Rectangle { + row := index / gridW + col := index % gridW + x0 := bounds.Min.X + col*bounds.Dx()/gridW + x1 := bounds.Min.X + (col+1)*bounds.Dx()/gridW + y0 := bounds.Min.Y + row*bounds.Dy()/gridH + y1 := bounds.Min.Y + (row+1)*bounds.Dy()/gridH return image.Rect(x0, y0, x1, y1) } From 1bea7e0f0db5b03cef33486d752175688f7ce5e0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 16:23:58 +0000 Subject: [PATCH 041/106] fix(tunnel): scale ProxyWaitReady timeout by N (was hardcoded to 12s) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User running nValue=10 with auto-captcha hit "DTLS connection timeout (12s)" mid-Step-2/4 of session 2's captcha solve. Root cause was the combination of two recent changes: 1. StartProxy on the Go side now waits for ALL N TURN allocations to establish before signalling proxyReady (otherwise utun comes up after session 1 and the manual-captcha WebView for sessions 2..N can't reach id.vk.ru). 2. Swift's ProxyWaitReady budget was a hardcoded 12 s (auto) / 300 s (manual) constant from when only the first session was required to be ready. Net effect: N=10 with auto captcha needs ~50 s of sequential captcha solving before proxyReady fires, but Swift gives up at 12 s and tears the whole thing down right in the middle of session 2. Compute the budget from N and the captcha mode instead of hardcoding: manual: 30 s/session, floor 60 s — user is in the loop, can't plausibly clear a captcha faster than this. auto: 15 s/session, floor 20 s — solver hits the warm path in ~3–6 s but a slider retry can take longer; 15 s is the observed P99 from existing logs. For nValue=10 auto this yields 150 s instead of 12 s, which fits the sequential solve flow comfortably. nValue=1 still ends up at the floor of 20 s (auto) / 60 s (manual), preserving fast failure on genuinely stuck single-session paths. Log line dumps the actual budget so the next "why did it time out" report has the number. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 27 ++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 747f7ba..8fd3245 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -117,11 +117,28 @@ class PacketTunnelProvider: NEPacketTunnelProvider { TurnBridgeSetManualCaptchaMode(manualCaptchaEnabled ? 1 : 0) SharedLogger.info("Captcha mode: \(manualCaptchaEnabled ? "manual (browser sheet)" : "auto (in-tunnel solver)")", source: .tunnel) - // Manual captcha is human-driven, so give the user time to actually - // solve the challenge before declaring DTLS dead. Auto mode keeps - // the original 12s budget — if the solver can't bash through in - // that window something else is wrong and we want fast failure. - let dtlsReadyTimeoutMs: Int32 = manualCaptchaEnabled ? 300_000 : 12_000 + // Scale the readiness budget by N: StartProxy on the Go side + // now waits for ALL N TURN allocations to come up before it + // signals proxyReady (otherwise the WG adapter starts after + // session 1 is up, iOS installs AllowedIPs=0.0.0.0/0 into + // utun, and the captcha load for sessions 2..N gets routed + // through the half-built tunnel and never completes — see + // turn_proxy.go's StartProxy comment). + // + // Per-session budget: + // manual: the user is in the loop solving each captcha by + // hand, so plan for ~30 s/session plus a generous floor. + // auto: the in-tunnel solver finishes in ~3–6 s on a + // warm path but burns longer on a slider+retry sequence, + // so budget ~15 s/session. + // + // The old 12 s / 300 s constants assumed N=1 and were the + // direct cause of "DTLS connection timeout (12s)" landing + // mid-Step-2/4 when nValue>1. + let perSessionMs: Int32 = manualCaptchaEnabled ? 30_000 : 15_000 + let floorMs: Int32 = manualCaptchaEnabled ? 60_000 : 20_000 + let dtlsReadyTimeoutMs: Int32 = max(floorMs, perSessionMs * nValue) + SharedLogger.info("DTLS ready budget: \(dtlsReadyTimeoutMs / 1000)s for N=\(nValue) (\(manualCaptchaEnabled ? "manual" : "auto"))", source: .tunnel) DispatchQueue.global(qos: .userInteractive).async { StartProxy(vkLink, peerAddr, listenAddr, nValue, udpFlag) From d917a0ef00b28f97f6ed2b2e856afb71f5d972a0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 16:49:42 +0000 Subject: [PATCH 042/106] feat: parallelise N captcha solves + silence per-tick byte spam + reset log per connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes that all reduce log noise / startup latency for the high-nValue auto-captcha path the user is settling on: (1) StartProxy now spawns ALL N sessions in parallel. The previous sequential loop was justified back when each session needed its captcha sheet to load without competing with utun being up, but the underlying constraint is "all N must be ready before proxyReady" — nothing requires that to be serial. With auto-captcha the in-tunnel solver runs entirely in extension Go code with no UI dependency, so N captchas can be solved concurrently; with manual-captcha the UI naturally serialises (only one WebView sheet visible at a time). Net effect at N=30 auto: ~30 × 5 s sequential → ~5 s parallel. (2) poolCreds releases its mutex during the slow VK fetch. The previous version held the lock across f(link), serialising every parallel caller back into a queue — even when StartProxy fired them all at once the captcha solves still happened one after another. Cache-hit fast path stays inside the lock (no slow operation there). Also dropped the 1 s "API rate-limit" sleep that was there to space requests under serialised access; it's meaningless under parallel access and just added latency to the first N solves. (3) Pruned the 10 s tick spam: - DTLS bytes wg→dtls=... dtls→wg=... - TURN bytes conn2→relay=... relay→conn2=... - WG bytes rx=... tx=... - fanout: total dropped=... (when no drops) We proved data flows. The counters themselves still accumulate into atomic uint64s and surface in the per-session lifetime log on disconnect, so the byte totals are still recoverable post-mortem. fanout drops still log on every tick that actually saw a drop. (4) SharedLogger.clearLogs() at the start of every connect attempt. The previous behaviour kept appending across reconnects, making it genuinely hard to find "what happened THIS time" amid hours of prior log noise — exactly the use case the byte-counter spam was also bloating. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/ContentView.swift | 5 + network-extension/PacketTunnelProvider.swift | 50 ------ .../Sources/WireGuardKitGo/turn_proxy.go | 168 ++++++++---------- .../Sources/WireGuardKitGo/udp_fanout.go | 15 +- 4 files changed, 90 insertions(+), 148 deletions(-) diff --git a/TurnBridge/ContentView.swift b/TurnBridge/ContentView.swift index 81bfe24..e6bcd5b 100755 --- a/TurnBridge/ContentView.swift +++ b/TurnBridge/ContentView.swift @@ -297,6 +297,11 @@ struct ContentView: View { return } + // Fresh slate per connect attempt. Keeps the log focused + // on the current session instead of accreting history + // across reconnects — the previous behaviour made it + // very hard to scan for "what happened THIS time". + SharedLogger.clearLogs() SharedLogger.info("User requested connect with profile \"\(profile.name)\"") vpnStatus = .connecting app.turnOnTunnel( diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 8fd3245..755c5f2 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -44,10 +44,6 @@ class PacketTunnelProvider: NEPacketTunnelProvider { private var lastPathInterfaceLabel: String? private var lastTransportRestartAt = Date.distantPast - // WG byte counters logged every 10s by startWireGuardStatsLogger(). - private var wgStatsTimer: DispatchSourceTimer? - private var lastWGRxBytes: UInt64 = 0 - private var lastWGTxBytes: UInt64 = 0 /// Tear down the current TURN/DTLS cycle and let the proxy spin up /// fresh inner connections, reusing cached credentials when possible @@ -166,7 +162,6 @@ class PacketTunnelProvider: NEPacketTunnelProvider { sharedLogger.log("Tunnel interface is \(interfaceName)") SharedLogger.info("Tunnel up on interface \(interfaceName)", source: .wireguard) self.logRouteScope() - self.startWireGuardStatsLogger() self.startNetworkMonitoring() } completionHandler(adapterError) @@ -223,46 +218,6 @@ class PacketTunnelProvider: NEPacketTunnelProvider { } } - /// Poll WireGuardAdapter every 10s for rx/tx counters and log them - /// alongside the Go-side DTLS/TURN byte counters. If WG itself sees - /// zero application bytes, the issue is above WG (TUN routing). If - /// WG sees bytes but the Go side stays at zero, the issue is in the - /// proxy. The deltas make it possible to tell at a glance whether - /// traffic is flowing. - private func startWireGuardStatsLogger() { - guard wgStatsTimer == nil else { return } - let timer = DispatchSource.makeTimerSource(queue: DispatchQueue.global(qos: .utility)) - timer.schedule(deadline: .now() + 10, repeating: 10) - timer.setEventHandler { [weak self] in - guard let self = self else { return } - self.adapter.getRuntimeConfiguration { config in - guard let config = config else { - SharedLogger.debug("WG stats: getRuntimeConfiguration returned nil", source: .wireguard) - return - } - var rx: UInt64 = 0 - var tx: UInt64 = 0 - for line in config.split(separator: "\n") { - if line.hasPrefix("rx_bytes=") { - rx = UInt64(line.dropFirst("rx_bytes=".count)) ?? 0 - } else if line.hasPrefix("tx_bytes=") { - tx = UInt64(line.dropFirst("tx_bytes=".count)) ?? 0 - } - } - let dRx = rx &- self.lastWGRxBytes - let dTx = tx &- self.lastWGTxBytes - self.lastWGRxBytes = rx - self.lastWGTxBytes = tx - SharedLogger.info( - "WG bytes rx=\(rx) (Δ+\(dRx)) tx=\(tx) (Δ+\(dTx))", - source: .wireguard - ) - } - } - timer.resume() - wgStatsTimer = timer - } - private func describe(_ status: Network.NWPath.Status) -> String { switch status { case .satisfied: return "satisfied" @@ -330,11 +285,6 @@ class PacketTunnelProvider: NEPacketTunnelProvider { lastPathStatus = nil lastPathInterfaceLabel = nil - wgStatsTimer?.cancel() - wgStatsTimer = nil - lastWGRxBytes = 0 - lastWGTxBytes = 0 - StopProxy() SharedLogger.info("TURN proxy stopped", source: .tunnel) TransportHealthMonitor.reset() diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index bd2e729..a8591ee 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -422,28 +422,11 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa var addr atomic.Value - // Every 10s, dump the current totals and the deltas since the last - // tick. If user opens a website and these stay at +0 in both - // directions, no user traffic is reaching the DTLS layer (most - // likely the routing config is sending the request straight to - // WiFi/cellular, bypassing the tunnel entirely). - go func() { - ticker := time.NewTicker(10 * time.Second) - defer ticker.Stop() - var prevTx, prevRx uint64 - for { - select { - case <-dtlsctx.Done(): - return - case <-ticker.C: - tx := wgToDtls.Load() - rx := dtlsToWg.Load() - log.Printf("DTLS bytes wg→dtls=%d (Δ+%d) dtls→wg=%d (Δ+%d)", - tx, tx-prevTx, rx, rx-prevRx) - prevTx, prevRx = tx, rx - } - } - }() + // Note: byte counters keep accumulating into wgToDtls / dtlsToWg + // and surface in the per-session lifetime log on exit. The + // periodic 10s dump was useful while we were proving that user + // traffic actually flows through the tunnel, but now it's just + // line noise. go func() { defer wg.Done() @@ -731,25 +714,9 @@ func oneTurnConnection(ctx context.Context, turnParams *turnParams, peer *net.UD } }() - // Periodic counter dump every 10s so we can see whether the relay - // is actually carrying bytes or only the wakeup keepalives. - go func() { - ticker := time.NewTicker(10 * time.Second) - defer ticker.Stop() - var prevTx, prevRx uint64 - for { - select { - case <-turnctx.Done(): - return - case <-ticker.C: - tx := conn2ToRelay.Load() - rx := relayToConn2.Load() - log.Printf("TURN bytes conn2→relay=%d (Δ+%d) relay→conn2=%d (Δ+%d)", - tx, tx-prevTx, rx, rx-prevRx) - prevTx, prevRx = tx, rx - } - } - }() + // Byte counters are folded into the per-session lifetime log on + // exit; the periodic 10s dump that proved data was flowing + // during the throughput investigation is no longer interesting. wg.Wait() if err := relayConn.SetDeadline(time.Time{}); err != nil { @@ -855,48 +822,52 @@ func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { return func(link string) (string, string, string, error) { mu.Lock() - defer mu.Unlock() if !cTime.IsZero() && time.Since(cTime) > 10*time.Minute { pool = nil cTime = time.Time{} } - if len(pool) < poolSize { - u, p, a, err := f(link) - if err == nil { - pool = append(pool, turnCred{u, p, a}) - cTime = time.Now() - log.Printf("Successfully registered User Identity %d/%d", len(pool), poolSize) - - // Space out requests by 1000ms to avoid API limits - if len(pool) < poolSize { - time.Sleep(1000 * time.Millisecond) - } + // Cache-hit fast path: pool already at capacity, hand out a + // rotating cached cred and bail. + if len(pool) >= poolSize { + c := pool[idx%len(pool)] + idx++ + cTime = time.Now() + mu.Unlock() + return c.user, c.pass, c.addr, nil + } - c := pool[len(pool)-1] - idx++ - return c.user, c.pass, c.addr, nil - } + // Cache-miss slow path: release the mutex during the captcha + // solve so N parallel callers (one per TURN session) actually + // fetch concurrently instead of serialising on this lock. + // Previously this mutex was held across f(link), which made + // nValue=30 take ~3 minutes (30 × 5–6 s) to warm the pool. + // The pre-existing 1 s "API rate-limit" sleep is also gone — + // it was meaningless under serialised access and actively + // harmful when callers run in parallel. + mu.Unlock() + u, p, a, err := f(link) + mu.Lock() + defer mu.Unlock() - log.Printf("Failed to get unique TURN identity: %v", err) - if len(pool) > 0 { - log.Printf("Falling back to reusing a previous identity...") - c := pool[idx%len(pool)] - idx++ - cTime = time.Now() - return c.user, c.pass, c.addr, nil - } - return "", "", "", err + if err == nil { + pool = append(pool, turnCred{u, p, a}) + cTime = time.Now() + log.Printf("Successfully registered User Identity %d/%d", len(pool), poolSize) + idx++ + return u, p, a, nil } - c := pool[idx%len(pool)] - idx++ - // Refresh the cache deadline on every reuse so reconnect storms - // after a long-lived session don't suddenly evict the pool and - // force a fresh captcha. - cTime = time.Now() - return c.user, c.pass, c.addr, nil + log.Printf("Failed to get unique TURN identity: %v", err) + if len(pool) > 0 { + log.Printf("Falling back to reusing a previous identity...") + c := pool[idx%len(pool)] + idx++ + cTime = time.Now() + return c.user, c.pass, c.addr, nil + } + return "", "", "", err } } @@ -1002,25 +973,23 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, wg1 := sync.WaitGroup{} t := time.Tick(200 * time.Millisecond) - // Spawn all N sessions one at a time. Each session needs its own - // VK captcha to fetch a fresh TURN identity, and the manual - // captcha sheet in the App has to be able to reach id.vk.ru while - // it's being solved. Once we signal proxyReady, Swift brings up - // the WG adapter and iOS installs the AllowedIPs=0.0.0.0/0 route - // into utun — at which point ALL device traffic flows through the - // tunnel we just built. If we signal proxyReady after the FIRST - // session establishes (the original behaviour), the next captcha - // load is routed via the half-built tunnel and never completes, - // hanging the connect flow forever. So: + // Spawn ALL N sessions in parallel. Sequential spawning made the + // auto-captcha mode artificially slow at high nValue + // (N=30 → 30 captchas back-to-back → ~3 minutes), even though + // the in-tunnel captcha solver runs entirely inside the + // extension's Go code with no UI dependency and can absolutely + // be parallelised. In manual-captcha mode the UI is the natural + // serialiser (it only presents one WebView sheet at a time), so + // spawning all N at once still produces a sequential user-facing + // flow there — no regression. // - // 1. spawn one session - // 2. wait for that session's DTLS to be up (okchan) - // 3. repeat until all N are up - // 4. only then signal proxyReady so the WG adapter starts - // - // Captchas are solved sequentially by the UI anyway (one sheet at - // a time), so this serialisation costs nothing extra in latency. + // We still wait for ALL N to signal okchan before sending to + // proxyReady, otherwise utun comes up half-built and the manual + // captcha WebView for not-yet-ready sessions hangs (see commit + // ca8b19f for the original rationale of this barrier). + sessionReady := make(chan int, n) for i := 0; i < n; i++ { + i := i fanoutChan := makeFanoutChan(fanouts[i]) cChan := make(chan net.PacketConn) sessionOk := make(chan struct{}) @@ -1031,12 +1000,25 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, wg1.Go(func() { oneTurnConnectionLoop(ctx, params, peer, cChan, t) }) + // One forwarder per session that pushes its index onto the + // aggregate ready channel as soon as that session establishes + // its DTLS. Decouples the wait-for-all loop below from the + // per-session okchan semantics. + go func() { + select { + case <-sessionOk: + sessionReady <- i + case <-ctx.Done(): + } + }() + } + for k := 0; k < n; k++ { select { - case <-sessionOk: - log.Printf("StartProxy: session %d/%d ready", i+1, n) + case idx := <-sessionReady: + log.Printf("StartProxy: session %d ready (%d/%d total)", idx+1, k+1, n) case <-ctx.Done(): - log.Printf("StartProxy: cancelled before session %d/%d came up", i+1, n) + log.Printf("StartProxy: cancelled, only %d/%d sessions came up", k, n) wg1.Wait() return } diff --git a/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go b/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go index 2473083..e3f9e55 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go +++ b/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go @@ -185,9 +185,10 @@ func startFanoutDispatcher(ctx context.Context, listenConn net.PacketConn, fanou var rrIdx uint64 var dropped uint64 - // Periodic dispatcher health log, decoupled from the per-fanout - // session logs so a stalled consumer is visible even if its - // owning session never logs. + // Drops are silent under healthy conditions and only surface + // when a consumer is actually backing up. Periodic dispatcher + // health log fires every 10s but only emits a line if at + // least one packet was dropped since the last tick. ticker := time.NewTicker(10 * time.Second) defer ticker.Stop() go func() { @@ -197,12 +198,16 @@ func startFanoutDispatcher(ctx context.Context, listenConn net.PacketConn, fanou case <-ctx.Done(): return case <-ticker.C: + curDrop := atomic.LoadUint64(&dropped) + if curDrop == prevDrop { + continue + } var perFanoutDrop []uint64 for _, f := range fanouts { perFanoutDrop = append(perFanoutDrop, f.dropped.Load()) } - curDrop := atomic.LoadUint64(&dropped) - log.Printf("fanout: total dropped=%d (Δ+%d) per-session=%v", curDrop, curDrop-prevDrop, perFanoutDrop) + log.Printf("fanout: dropped Δ+%d (total=%d) per-session=%v", + curDrop-prevDrop, curDrop, perFanoutDrop) prevDrop = curDrop } } From e3e17b2fc171fd1cd93d0b3b763248f04f3a3109 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 16:51:33 +0000 Subject: [PATCH 043/106] =?UTF-8?q?feat(settings):=20raise=20N=20cap=2016?= =?UTF-8?q?=E2=86=92100=20and=20allow=20manual=20numeric=20entry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User asked to bump the connections limit because VK rate-limits per TURN allocation and the only way to get usable throughput is to keep adding allocations (~80 kbps each in tests; N=10 ≈ 0.4 Mbps, N=30 should land near a working megabit). The old Stepper(in: 1...16) cap was making this physically impossible to configure from the UI. Two changes to SettingsView's connections row: - Range expanded 1...16 → 1...100. The new parallel captcha solver (commit d917a0e) makes N=100 take roughly the same wall-clock time at startup as N=1 did before, so there's no UX cliff in the expanded range. - Direct numeric input via a TextField next to the Stepper. Typing "30" beats tapping the + button 29 times. The Stepper stays for ±1 nudges and as a visual affordance. The TextField + Stepper share a clampedNValue(min:max:) binding helper so a typo like 9999 silently saves as 100 instead of poisoning the profile. SwiftUI invokes the setter on every keystroke under `.number` format, so partial entries during typing get clamped to the in-range edge — no janky behaviour vs the classic "validate on commit" pattern. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/SettingsView.swift | 44 ++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/TurnBridge/SettingsView.swift b/TurnBridge/SettingsView.swift index d48abef..56642f8 100755 --- a/TurnBridge/SettingsView.swift +++ b/TurnBridge/SettingsView.swift @@ -34,7 +34,26 @@ struct SettingsView: View { .autocapitalization(.none) .disableAutocorrection(true) - Stepper("Connections (n): \(profile.nValue)", value: binding(\.nValue), in: 1...16) + // Connections: TextField for direct numeric entry + + // Stepper for ±1 nudges. Range expanded from the + // previous 1...16 cap to 1...100 because VK rate- + // limits per TURN allocation, so throughput scales + // ~linearly with N; the parallel captcha solver + // keeps startup latency tolerable even at 100. + // Clamped binding rejects out-of-range values so a + // typo like 9999 silently saves as 100. + HStack { + Text("Connections (n)") + Spacer() + TextField("", value: clampedNValue(min: 1, max: 100), + format: .number) + .keyboardType(.numberPad) + .multilineTextAlignment(.trailing) + .frame(width: 60) + Stepper("", value: clampedNValue(min: 1, max: 100), + in: 1...100) + .labelsHidden() + } } Section(header: Text("WireGuard Config")) { @@ -101,4 +120,27 @@ struct SettingsView: View { } ) } + + /// `binding(\.nValue)` with a setter that clamps to [min, max]. + /// SwiftUI invokes the setter on every keystroke for a TextField + /// with `.number` format, so a partial entry like "1" while the + /// user is typing "10" gets clamped to 1 (still valid) and the + /// follow-up "10" overwrites it as expected. The clamp only + /// matters at commit time when the user enters something out + /// of range. + private func clampedNValue(min lo: Int, max hi: Int) -> Binding { + let base = binding(\.nValue) + return Binding( + get: { base.wrappedValue }, + set: { newValue in + if newValue < lo { + base.wrappedValue = lo + } else if newValue > hi { + base.wrappedValue = hi + } else { + base.wrappedValue = newValue + } + } + ) + } } From 283de355391d9950fe2f6ff8a9e27ad059f06550 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 17:03:39 +0000 Subject: [PATCH 044/106] feat: port kiper292 17-byte Session-ID stream-aggregation handshake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports the multi-stream aggregation mechanism from kiper292/wireguard-turn-android. Each DTLS session, immediately after handshake, writes a 17-byte preamble: bytes 0..15: shared session UUID (one per StartProxy) byte 16: stream ID (0..N-1) A compatible server-side aggregator (kiper292/vk-turn-proxy fork deployed alongside the WG server) reads this preamble, groups every stream that carries the same session UUID into one logical session, and presents the bundle to the upstream WG server as a SINGLE endpoint. This stops the WG server from endpoint-thrashing when our fanout dispatcher delivers packets to N distinct VK relay ports — without it WG sees N different source addresses for the same peer and keeps updating its endpoint metadata. Does NOT change throughput: each TURN allocation still hits VK's per-allocation ~70 kbps shaper, so total bandwidth still scales linearly with nValue. The gain is stability — fewer counter resets, no rapid endpoint flapping, cleaner reconnect behaviour on the WG server side. Files: stream_aggregation.go (new) - TurnBridgeSetStreamAggregation cgo export (atomic bool flag). - freshStreamAggSession() re-rolls one RFC 4122 v4 UUID per StartProxy with the version+variant marker bits set on the wire to match the reference uuid.New().MarshalBinary() output. - currentStreamAggSession() lookup used by per-session writers. turn_proxy.go - oneDtlsConnection / oneDtlsConnectionLoop now carry streamID so each session knows its 0..N-1 index. StartProxy passes i. - After "Established DTLS connection!" log, when the flag is on, write the 17 bytes to dtlsConn BEFORE any WG packets flow. Failure to write the preamble fails the session — better than silently degrading to an unaggregated stream that the server can't fuse anyway. wireguard.h - extern declaration for the new cgo export. iOS side: VPNProfile.swift - new streamAggregation Bool field with backwards-compatible decoder default (false) so existing saved profiles keep working without re-entry. SettingsView.swift - Toggle under Connections row + caption warning that the WG server must be running kiper292/vk-turn-proxy or the 17-byte preamble will corrupt the WG handshake. TurnBridgeApp.swift + ContentView.swift - threaded streamAggregation through turnOnTunnel into providerConfiguration. PacketTunnelProvider.swift - reads streamAggregation from providerConfiguration and calls TurnBridgeSetStreamAggregation BEFORE StartProxy so the Go global is set before any session goroutine reaches its post-handshake check. Default off. Existing profiles + new profiles created without toggling the switch behave exactly as today — only profiles that explicitly opt in (and have a compatible server) emit the preamble. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/ContentView.swift | 1 + TurnBridge/SettingsView.swift | 17 ++++ TurnBridge/TurnBridgeApp.swift | 7 +- TurnBridge/VPNProfile.swift | 15 +++- network-extension/PacketTunnelProvider.swift | 9 +- .../WireGuardKitGo/stream_aggregation.go | 90 +++++++++++++++++++ .../Sources/WireGuardKitGo/turn_proxy.go | 44 ++++++++- .../Sources/WireGuardKitGo/wireguard.h | 1 + 8 files changed, 173 insertions(+), 11 deletions(-) create mode 100644 wireguard-apple/Sources/WireGuardKitGo/stream_aggregation.go diff --git a/TurnBridge/ContentView.swift b/TurnBridge/ContentView.swift index e6bcd5b..7bebf0d 100755 --- a/TurnBridge/ContentView.swift +++ b/TurnBridge/ContentView.swift @@ -310,6 +310,7 @@ struct ContentView: View { listenAddr: profile.listenAddr, nValue: profile.nValue, useUDP: profile.useUDP, + streamAggregation: profile.streamAggregation, wgQuickConfig: profile.wgQuickConfig ) { isSuccess in if !isSuccess { diff --git a/TurnBridge/SettingsView.swift b/TurnBridge/SettingsView.swift index 56642f8..5eb6a41 100755 --- a/TurnBridge/SettingsView.swift +++ b/TurnBridge/SettingsView.swift @@ -54,6 +54,23 @@ struct SettingsView: View { in: 1...100) .labelsHidden() } + + // Stream Aggregation: ports the 17-byte + // [sessionID, streamID] handshake from + // kiper292/wireguard-turn-android. Lets a compatible + // server-side aggregator (kiper292/vk-turn-proxy fork + // deployed alongside the WG server) fuse the N + // parallel TURN allocations into a single stable + // endpoint for WireGuard. + // + // REQUIRES the matching server. If toggled on without + // a compatible aggregator, the 17-byte preamble lands + // in the WG packet stream and breaks the very first + // handshake. Default off. + Toggle("Stream Aggregation", isOn: binding(\.streamAggregation)) + Text("Requires kiper292/vk-turn-proxy on the WG server. Leave off if you don't run a compatible aggregator.") + .font(.caption) + .foregroundColor(.secondary) } Section(header: Text("WireGuard Config")) { diff --git a/TurnBridge/TurnBridgeApp.swift b/TurnBridge/TurnBridgeApp.swift index 3b53c68..7282178 100755 --- a/TurnBridge/TurnBridgeApp.swift +++ b/TurnBridge/TurnBridgeApp.swift @@ -33,8 +33,8 @@ struct TurnBridge: App { var id: String { request.requestId } } - func turnOnTunnel(vkLink: String, peerAddr: String, listenAddr: String, nValue: Int, useUDP: Bool, wgQuickConfig: String, completionHandler: @escaping (Bool) -> Void) { - SharedLogger.info("Connecting... peer=\(peerAddr), listen=\(listenAddr), n=\(nValue), udp=\(useUDP)") + func turnOnTunnel(vkLink: String, peerAddr: String, listenAddr: String, nValue: Int, useUDP: Bool, streamAggregation: Bool, wgQuickConfig: String, completionHandler: @escaping (Bool) -> Void) { + SharedLogger.info("Connecting... peer=\(peerAddr), listen=\(listenAddr), n=\(nValue), udp=\(useUDP), streamAgg=\(streamAggregation)") NETunnelProviderManager.loadAllFromPreferences { tunnelManagersInSettings, error in if let error = error { @@ -61,7 +61,8 @@ struct TurnBridge: App { "peerAddr": peerAddr, "listenAddr": listenAddr, "nValue": nValue, - "useUDP": useUDP + "useUDP": useUDP, + "streamAggregation": streamAggregation ] let defaults = UserDefaults.standard diff --git a/TurnBridge/VPNProfile.swift b/TurnBridge/VPNProfile.swift index b3cc55d..7d4f86b 100644 --- a/TurnBridge/VPNProfile.swift +++ b/TurnBridge/VPNProfile.swift @@ -11,9 +11,16 @@ struct VPNProfile: Codable, Identifiable, Equatable { /// true = UDP (faster, default; what upstream turnbridge has hardcoded) /// false = TCP (more reliable over flaky cellular; survives short blips) var useUDP: Bool + /// Enables the kiper292/vk-turn-proxy 17-byte Session-ID handshake + /// on every DTLS stream so the server-side aggregator can fuse the + /// N parallel TURN allocations into a single stable endpoint for + /// WireGuard. Must ONLY be enabled when the WG server is running a + /// compatible aggregator; otherwise the 17 bytes corrupt the very + /// first WG handshake and the tunnel never comes up. + var streamAggregation: Bool var wgQuickConfig: String - init(id: UUID = UUID(), name: String = "", vkLink: String = "", peerAddr: String = "", listenAddr: String = "127.0.0.1:9000", nValue: Int = 1, useUDP: Bool = true, wgQuickConfig: String = "") { + init(id: UUID = UUID(), name: String = "", vkLink: String = "", peerAddr: String = "", listenAddr: String = "127.0.0.1:9000", nValue: Int = 1, useUDP: Bool = true, streamAggregation: Bool = false, wgQuickConfig: String = "") { self.id = id self.name = name self.vkLink = vkLink @@ -21,12 +28,13 @@ struct VPNProfile: Codable, Identifiable, Equatable { self.listenAddr = listenAddr self.nValue = nValue self.useUDP = useUDP + self.streamAggregation = streamAggregation self.wgQuickConfig = wgQuickConfig } - // Backwards compatibility: older saved profiles in UserDefaults won't have useUDP. + // Backwards compatibility: older saved profiles in UserDefaults won't have useUDP / streamAggregation. enum CodingKeys: String, CodingKey { - case id, name, vkLink, peerAddr, listenAddr, nValue, useUDP, wgQuickConfig + case id, name, vkLink, peerAddr, listenAddr, nValue, useUDP, streamAggregation, wgQuickConfig } init(from decoder: Decoder) throws { @@ -38,6 +46,7 @@ struct VPNProfile: Codable, Identifiable, Equatable { listenAddr = try c.decode(String.self, forKey: .listenAddr) nValue = try c.decode(Int.self, forKey: .nValue) useUDP = (try? c.decode(Bool.self, forKey: .useUDP)) ?? true + streamAggregation = (try? c.decode(Bool.self, forKey: .streamAggregation)) ?? false wgQuickConfig = try c.decode(String.self, forKey: .wgQuickConfig) } } diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 755c5f2..3b03c4a 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -101,13 +101,20 @@ class PacketTunnelProvider: NEPacketTunnelProvider { // Default true for backward-compat with profiles saved before this field existed. let useUDP = (providerConfiguration["useUDP"] as? Bool) ?? true let udpFlag: Int32 = useUDP ? 1 : 0 + let streamAggregation = (providerConfiguration["streamAggregation"] as? Bool) ?? false - SharedLogger.info("Peer: \(peerAddr), Listen: \(listenAddr), N: \(nValue), UDP: \(useUDP)", source: .tunnel) + SharedLogger.info("Peer: \(peerAddr), Listen: \(listenAddr), N: \(nValue), UDP: \(useUDP), streamAgg: \(streamAggregation)", source: .tunnel) SharedLogger.info("Starting TURN proxy...", source: .tunnel) ProxySetLogger(nil, goProxyCLoggerCallback) CaptchaBridge.install() + // Toggle the Stream-Aggregation handshake on the Go side + // BEFORE StartProxy. The Go global is read once when each + // DTLS session completes its handshake, so setting it later + // would race the per-session goroutines. + TurnBridgeSetStreamAggregation(streamAggregation ? 1 : 0) + let manualCaptchaEnabled = UserDefaults(suiteName: CaptchaIPC.appGroupID)? .bool(forKey: "manualCaptcha") ?? false TurnBridgeSetManualCaptchaMode(manualCaptchaEnabled ? 1 : 0) diff --git a/wireguard-apple/Sources/WireGuardKitGo/stream_aggregation.go b/wireguard-apple/Sources/WireGuardKitGo/stream_aggregation.go new file mode 100644 index 0000000..7e376b9 --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/stream_aggregation.go @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: MIT +// +// Stream-Aggregation handshake compatible with the kiper292/vk-turn-proxy +// server fork. When enabled, every DTLS session this client establishes +// writes a 17-byte preamble immediately after the DTLS handshake +// completes: +// +// bytes 0..15: Session ID (UUID v4 binary, shared across all N streams) +// byte 16: Stream ID (0..N-1) +// +// The receiver-side aggregator reads this preamble, groups every stream +// that carries the same Session ID under one logical session, and +// presents them to the upstream WireGuard server as a SINGLE endpoint. +// That stops the WG server from endpoint-thrashing when N parallel TURN +// allocations deliver packets from N distinct VK relay ports. +// +// Without a compatible server-side aggregator the preamble would be +// fed directly into WireGuard as the first bytes of "WG data", garbling +// the very first handshake. The flag therefore defaults to off and is +// only toggled on by Swift at StartProxy time when the active profile +// has streamAggregation=true. + +package main + +/* +#include +*/ +import "C" + +import ( + "crypto/rand" + "sync" + "sync/atomic" +) + +var ( + streamAggEnabled atomic.Bool // true ⇔ write the 17-byte preamble on each session + + streamAggSessionMu sync.Mutex + streamAggSessionID [16]byte // re-rolled once per StartProxy when the flag is on + streamAggHasID bool +) + +//export TurnBridgeSetStreamAggregation +func TurnBridgeSetStreamAggregation(enabled C.int) { + streamAggEnabled.Store(enabled != 0) + if enabled == 0 { + // Clear the cached session ID so the next "on" re-rolls a fresh one. + streamAggSessionMu.Lock() + streamAggHasID = false + streamAggSessionMu.Unlock() + } +} + +func streamAggIsEnabled() bool { + return streamAggEnabled.Load() +} + +// freshStreamAggSession re-rolls the shared Session ID. Called from +// StartProxy at the moment all N sessions are about to be spawned, so +// every set of TURN allocations from a single connect attempt shares +// one ID and the server-side aggregator can fuse them. +func freshStreamAggSession() [16]byte { + streamAggSessionMu.Lock() + defer streamAggSessionMu.Unlock() + if _, err := rand.Read(streamAggSessionID[:]); err != nil { + // crypto/rand failing on iOS is practically impossible, but if it + // does we'd rather have a fixed-zero ID than crash StartProxy; + // the aggregator will at least bucket all streams together. + for i := range streamAggSessionID { + streamAggSessionID[i] = 0 + } + } + // Set the UUID v4 marker bits (RFC 4122 §4.4) so the bytes look like + // a valid v4 UUID on the wire — matches what the reference Go + // implementation produces via uuid.New().MarshalBinary(). + streamAggSessionID[6] = (streamAggSessionID[6] & 0x0f) | 0x40 + streamAggSessionID[8] = (streamAggSessionID[8] & 0x3f) | 0x80 + streamAggHasID = true + return streamAggSessionID +} + +func currentStreamAggSession() ([16]byte, bool) { + streamAggSessionMu.Lock() + defer streamAggSessionMu.Unlock() + if !streamAggHasID { + return [16]byte{}, false + } + return streamAggSessionID, true +} diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index a8591ee..a833cf8 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -287,7 +287,7 @@ func dtlsFunc(ctx context.Context, conn net.PacketConn, peer *net.UDPAddr) (net. return dtlsConn, nil } -func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.PacketConn, connchan chan<- net.PacketConn, okchan chan<- struct{}, c1 chan<- error) { +func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.PacketConn, connchan chan<- net.PacketConn, okchan chan<- struct{}, c1 chan<- error, streamID int) { var err error = nil defer func() { c1 <- err }() sessionStart := time.Now() @@ -336,6 +336,32 @@ func oneDtlsConnection(ctx context.Context, peer *net.UDPAddr, listenConn net.Pa log.Printf("Closed DTLS connection\n") }() log.Printf("Established DTLS connection!\n") + + // Stream-Aggregation preamble: if enabled, write the 17-byte + // [sessionID, streamID] header BEFORE WireGuard packets start + // flowing through dtlsConn. The receiver-side aggregator + // (kiper292/vk-turn-proxy fork on the WG server's box) reads + // this once per stream and fuses every stream sharing the same + // session ID into a single endpoint for WG, stopping the WG + // server from endpoint-thrashing when N parallel TURN + // allocations deliver packets from N distinct VK relay ports. + // Without the flag set (default), nothing is written and the + // stream looks exactly like our pre-aggregation transport. + if streamAggIsEnabled() { + sid, ok := currentStreamAggSession() + if ok { + preamble := make([]byte, 17) + copy(preamble[:16], sid[:]) + preamble[16] = byte(streamID) + if _, werr := dtlsConn.Write(preamble); werr != nil { + log.Printf("stream-agg: preamble write failed on stream %d: %s", streamID, werr) + err = fmt.Errorf("stream-agg preamble: %s", werr) + return + } + log.Printf("stream-agg: stream %d preamble sent (sessionID=%x)", streamID, sid[:4]) + } + } + // NOTE: do NOT signal proxyReady here. Signalling it the moment // the FIRST DTLS session establishes causes Swift to call // adapter.start() and iOS to bring up utun with the WG config's @@ -750,7 +776,7 @@ func reconnectBackoff(prev time.Duration, success bool) time.Duration { return prev + jitter } -func oneDtlsConnectionLoop(ctx context.Context, peer *net.UDPAddr, listenConnChan <-chan net.PacketConn, connchan chan<- net.PacketConn, okchan chan<- struct{}) { +func oneDtlsConnectionLoop(ctx context.Context, peer *net.UDPAddr, listenConnChan <-chan net.PacketConn, connchan chan<- net.PacketConn, okchan chan<- struct{}, streamID int) { var backoff time.Duration for { select { @@ -758,7 +784,7 @@ func oneDtlsConnectionLoop(ctx context.Context, peer *net.UDPAddr, listenConnCha return case listenConn := <-listenConnChan: c := make(chan error) - go oneDtlsConnection(ctx, peer, listenConn, connchan, okchan, c) + go oneDtlsConnection(ctx, peer, listenConn, connchan, okchan, c, streamID) err := <-c if err != nil { log.Printf("%s", err) @@ -973,6 +999,16 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, wg1 := sync.WaitGroup{} t := time.Tick(200 * time.Millisecond) + // Re-roll the Stream-Aggregation session ID once per StartProxy. + // Each of the N DTLS sessions below will then prepend the same + // session ID + its own stream index after handshake, letting the + // receiver-side aggregator fuse them. No-op when the feature is + // off (default). + if streamAggIsEnabled() { + sid := freshStreamAggSession() + log.Printf("stream-agg: enabled, sessionID=%x (N=%d)", sid[:4], n) + } + // Spawn ALL N sessions in parallel. Sequential spawning made the // auto-captcha mode artificially slow at high nValue // (N=30 → 30 captchas back-to-back → ~3 minutes), even though @@ -995,7 +1031,7 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, sessionOk := make(chan struct{}) wg1.Go(func() { - oneDtlsConnectionLoop(ctx, peer, fanoutChan, cChan, sessionOk) + oneDtlsConnectionLoop(ctx, peer, fanoutChan, cChan, sessionOk, i) }) wg1.Go(func() { oneTurnConnectionLoop(ctx, params, peer, cChan, t) diff --git a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h index 7e0b07d..eaaa3b1 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h +++ b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h @@ -44,5 +44,6 @@ extern void TurnBridgeSetManualCaptchaCallback(manual_captcha_cb_t cb); extern void TurnBridgeSubmitManualCaptchaToken(const char *request_id, const char *token); extern void TurnBridgeCancelManualCaptcha(const char *request_id, const char *reason); extern void TurnBridgeSetManualCaptchaMode(int enabled); +extern void TurnBridgeSetStreamAggregation(int enabled); #endif From 3736253b6ffe02bad56fde5939bcbf8007f5b1a2 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 17:08:57 +0000 Subject: [PATCH 045/106] fix(turn_proxy): throttle parallel captcha solves to 5+jitter N=30 auto-captcha tripped VK's anti-bot rate-limit because all 30 captcha solves hit captcha.isNotRobot simultaneously: most got ERROR_LIMIT, slider fallback got status: ERROR, several sessions then fell back to reusing an existing identity. The reused identity made VK reject the second TURN allocation with error 486 (Allocation Quota Reached), leaving those sessions dead and the watchdog firing 60s later with "no inbound DTLS traffic for 1m0s". Cap the cold-path captcha solve concurrency with a 5-slot semaphore inside poolCreds and add 0-750 ms jitter at the start of each solve so the in-flight waves don't hit VK in lockstep. Cache hits skip both, so the warm path stays fast. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- .../Sources/WireGuardKitGo/turn_proxy.go | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index a833cf8..f123f10 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -840,12 +840,26 @@ type turnCred struct { user, pass, addr string } +// Max concurrent captcha solves against VK. Fully-parallel solves at +// N=30 trigger VK's anti-bot rate-limit (`ERROR_LIMIT` on +// captcha.isNotRobot, `status: ERROR` on slider getContent) and the +// per-IP TURN allocation cap (error 486). Five concurrent solves keeps +// the captcha pipeline well under VK's threshold while still scaling +// throughput roughly 5× over fully-serial (which was the d917a0e +// motivation in the first place). +const maxConcurrentCaptchaSolves = 5 + func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { var mu sync.Mutex var pool []turnCred var cTime time.Time var idx int + // Bounded-concurrency gate for captcha solves. Buffered channel + // acts as a semaphore: at most cap(solveSlot) goroutines hold a + // slot at a time, the rest block on send until a slot is released. + solveSlot := make(chan struct{}, maxConcurrentCaptchaSolves) + return func(link string) (string, string, string, error) { mu.Lock() @@ -855,7 +869,8 @@ func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { } // Cache-hit fast path: pool already at capacity, hand out a - // rotating cached cred and bail. + // rotating cached cred and bail. This path never touches the + // solve semaphore — only cold solves are throttled. if len(pool) >= poolSize { c := pool[idx%len(pool)] idx++ @@ -864,16 +879,21 @@ func poolCreds(f getCredsFunc, poolSize int) getCredsFunc { return c.user, c.pass, c.addr, nil } - // Cache-miss slow path: release the mutex during the captcha - // solve so N parallel callers (one per TURN session) actually - // fetch concurrently instead of serialising on this lock. - // Previously this mutex was held across f(link), which made - // nValue=30 take ~3 minutes (30 × 5–6 s) to warm the pool. - // The pre-existing 1 s "API rate-limit" sleep is also gone — - // it was meaningless under serialised access and actively - // harmful when callers run in parallel. + // Cache-miss slow path: release the mutex, take a solve slot, + // then call f(link). The mutex is dropped first so we don't + // serialise on it while waiting for a slot, and we don't hold + // it across the slow solve either. mu.Unlock() + + solveSlot <- struct{}{} + // 0–750 ms jitter desyncs the first wave so the 5 in-flight + // solves don't hit VK's anti-bot in lockstep. Cheap once a + // slot is acquired (we're about to do a 5 s network round-trip + // anyway), and cheap on hot-path because cache hits skip it. + time.Sleep(time.Duration(rand.Intn(750)) * time.Millisecond) u, p, a, err := f(link) + <-solveSlot + mu.Lock() defer mu.Unlock() From f309f42e64ba559e777a0678d10c0e8bfd7b48c8 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 17:25:39 +0000 Subject: [PATCH 046/106] feat(captcha): trap unsolved captchas to App Group container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a "мухоловка" (flytrap) that buffers every slider captcha solve in memory and flushes to disk ONLY when the solve ultimately fails. Successful solves leave nothing behind. Failures drop a self-contained folder under /captcha_trap/_slider_/ containing: - getContent_response.json — raw VK response (incl. steps, attempts) - image. — decoded captcha image bytes (jpg etc.) - notes.log — per-attempt outcomes + parse/rank errors The trap covers the full failure surface: getContent transport errors, unparseable responses (a captcha variant we don't yet handle), rank failures, every per-guess rejection, and the ERROR_LIMIT bail-out. The image is base64-decoded straight from the raw response BEFORE parseSliderContent runs, so unknown captcha formats that break our parser still leave an inspectable artefact. Wired into PacketTunnelProvider: creates the directory under the App Group container and pushes the path to Go via the new TurnBridgeSetCaptchaTrapDir cgo export before StartProxy spins up. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- network-extension/PacketTunnelProvider.swift | 13 ++ .../Sources/WireGuardKitGo/captcha_slider.go | 49 +++++ .../Sources/WireGuardKitGo/captcha_trap.go | 204 ++++++++++++++++++ .../Sources/WireGuardKitGo/wireguard.h | 1 + 4 files changed, 267 insertions(+) create mode 100644 wireguard-apple/Sources/WireGuardKitGo/captcha_trap.go diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 3b03c4a..8de9c3b 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -115,6 +115,19 @@ class PacketTunnelProvider: NEPacketTunnelProvider { // would race the per-session goroutines. TurnBridgeSetStreamAggregation(streamAggregation ? 1 : 0) + // Captcha trap: every slider captcha buffers its raw VK + // response + decoded image in memory and only flushes to disk + // when the solve ultimately fails. The artefacts land inside + // the App Group container so they show up in the Files app + // and survive across extension restarts. Passing the path + // before StartProxy ensures the very first solve is covered. + if let container = FileManager.default.containerURL(forSecurityApplicationGroupIdentifier: CaptchaIPC.appGroupID) { + let trapDir = container.appendingPathComponent("captcha_trap", isDirectory: true) + try? FileManager.default.createDirectory(at: trapDir, withIntermediateDirectories: true) + trapDir.path.withCString { TurnBridgeSetCaptchaTrapDir($0) } + SharedLogger.info("Captcha trap dir: \(trapDir.path)", source: .tunnel) + } + let manualCaptchaEnabled = UserDefaults(suiteName: CaptchaIPC.appGroupID)? .bool(forKey: "manualCaptcha") ?? false TurnBridgeSetManualCaptchaMode(manualCaptchaEnabled ? 1 : 0) diff --git a/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go b/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go index db510da..bb544ae 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go +++ b/wireguard-apple/Sources/WireGuardKitGo/captcha_slider.go @@ -53,6 +53,15 @@ func solveSliderCaptcha( log.Printf("slider: fetching captcha content (settings=%q)", sliderSettings) + // Open a captcha trap. Every artefact we collect during the solve + // is buffered in memory and either Discarded (on success) or + // Committed (on any failure path). The deferred Discard is the + // safety net — explicit Commit calls in the failure branches run + // first, and Commit/Discard are idempotent. + trap := newCaptchaTrap("slider") + defer trap.Discard() + trap.Note("settings_raw=%q", sliderSettings) + // Get scrambled image and swap instructions getContentData := baseParams if sliderSettings != "" { @@ -61,13 +70,38 @@ func solveSliderCaptcha( resp, err := vkReq("captchaNotRobot.getContent", getContentData) if err != nil { + trap.Note("getContent transport error: %v", err) + trap.Commit("getContent_transport_err") return "", fmt.Errorf("slider getContent: %w", err) } + // Save the raw getContent response and the image bytes as soon as + // we have them, BEFORE parsing — that way a new captcha variant + // that breaks parseSliderContent still leaves us a self-contained + // artefact to inspect. + if rawJSON, jerr := json.MarshalIndent(resp, "", " "); jerr == nil { + trap.Save("getContent_response.json", rawJSON) + } + if respMap, ok := resp["response"].(map[string]interface{}); ok { + if imgStr, ok := respMap["image"].(string); ok && imgStr != "" { + if rawBytes, derr := base64.StdEncoding.DecodeString(imgStr); derr == nil { + ext := "bin" + if e, ok := respMap["extension"].(string); ok && e != "" { + ext = strings.ToLower(e) + } + trap.Save("image."+ext, rawBytes) + } + } + } + content, err := parseSliderContent(resp) if err != nil { + trap.Note("parseSliderContent failed: %v", err) + trap.Commit("unparseable_response") return "", fmt.Errorf("slider parse: %w", err) } + trap.Note("parsed grid=%dx%d swaps=%d attempts=%d", + content.GridW, content.GridH, len(content.Steps)/2, content.Attempts) log.Printf("slider: image=%dx%d grid=%dx%d steps=%d attempts=%d", content.Image.Bounds().Dx(), content.Image.Bounds().Dy(), @@ -76,6 +110,8 @@ func solveSliderCaptcha( // Rank candidate positions by pixel border continuity candidates, err := rankSliderCandidates(content.Image, content.GridW, content.GridH, content.Steps) if err != nil { + trap.Note("rank failed: %v", err) + trap.Commit("rank_failed") return "", fmt.Errorf("slider rank: %w", err) } @@ -93,6 +129,8 @@ func solveSliderCaptcha( answer, err := encodeSliderAnswer(c.ActiveSteps) if err != nil { + trap.Note("encodeSliderAnswer failed: %v", err) + trap.Commit("encode_answer_err") return "", err } @@ -111,24 +149,34 @@ func solveSliderCaptcha( checkResp, err := vkReq("captchaNotRobot.check", checkData) if err != nil { + trap.Note("attempt %d/%d transport err: %v", i+1, maxTries, err) + trap.Commit("check_transport_err") return "", fmt.Errorf("slider check: %w", err) } respObj, ok := checkResp["response"].(map[string]interface{}) if !ok { + trap.Note("attempt %d/%d invalid response: %v", i+1, maxTries, checkResp) + trap.Commit("check_invalid_response") return "", fmt.Errorf("slider check: invalid response") } status, _ := respObj["status"].(string) + trap.Note("attempt %d/%d position=%d score=%d → status=%s", + i+1, maxTries, c.Index, c.Score, status) switch status { case "OK": successToken, _ := respObj["success_token"].(string) if successToken == "" { + trap.Note("OK but success_token missing in: %v", respObj) + trap.Commit("ok_without_token") return "", fmt.Errorf("slider: success_token not found") } log.Printf("slider: solved! position=%d (attempt %d/%d)", c.Index, i+1, maxTries) + // Deferred Discard frees the buffer — nothing reaches disk. return successToken, nil case "ERROR_LIMIT": + trap.Commit("error_limit") return "", fmt.Errorf("slider: ERROR_LIMIT") default: log.Printf("slider: position=%d rejected (status=%s)", c.Index, status) @@ -136,6 +184,7 @@ func solveSliderCaptcha( } } + trap.Commit("all_guesses_rejected") return "", fmt.Errorf("slider: all %d guesses rejected", maxTries) } diff --git a/wireguard-apple/Sources/WireGuardKitGo/captcha_trap.go b/wireguard-apple/Sources/WireGuardKitGo/captcha_trap.go new file mode 100644 index 0000000..15e35bb --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/captcha_trap.go @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: MIT +// +// Captcha trap ("мухоловка"): buffers every captcha challenge in memory +// while the solver runs and flushes the buffer to disk ONLY if the solve +// ultimately fails. Successful solves leave nothing behind. Failed +// solves drop a self-contained folder (raw VK response JSON, the image +// bytes, a notes log) into the App Group container so we can inspect +// captcha variants we don't yet handle. +// +// Wiring: Swift creates the trap directory under the App Group container +// and pushes the absolute path here via TurnBridgeSetCaptchaTrapDir +// before StartProxy. If the path is empty, every trap call is a no-op +// (the feature simply doesn't engage). + +package main + +/* +#include +*/ +import "C" + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "log" + "os" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "time" +) + +var ( + captchaTrapDir atomic.Value // string +) + +//export TurnBridgeSetCaptchaTrapDir +func TurnBridgeSetCaptchaTrapDir(cPath *C.char) { + if cPath == nil { + captchaTrapDir.Store("") + return + } + path := C.GoString(cPath) + captchaTrapDir.Store(path) + if path == "" { + log.Printf("captcha-trap: disabled (empty path)") + return + } + if err := os.MkdirAll(path, 0o755); err != nil { + log.Printf("captcha-trap: mkdir %q failed: %v — feature off", path, err) + captchaTrapDir.Store("") + return + } + log.Printf("captcha-trap: artifacts → %s", path) +} + +func captchaTrapRoot() string { + v, _ := captchaTrapDir.Load().(string) + return v +} + +type captchaTrap struct { + label string + started time.Time + + mu sync.Mutex + files map[string][]byte + notes []string + flushed bool +} + +// newCaptchaTrap opens an in-memory artifact buffer. Safe to call even +// when the trap is disabled (returns a no-op handle). +func newCaptchaTrap(label string) *captchaTrap { + return &captchaTrap{ + label: label, + started: time.Now(), + files: map[string][]byte{}, + } +} + +// Save records an artifact (a file that will be written to disk if the +// trap commits). The data is copied so callers can reuse the slice. +func (t *captchaTrap) Save(name string, data []byte) { + if t == nil || captchaTrapRoot() == "" { + return + } + t.mu.Lock() + defer t.mu.Unlock() + if t.flushed { + return + } + cp := make([]byte, len(data)) + copy(cp, data) + t.files[sanitizeArtifactName(name)] = cp +} + +// Note appends a human-readable line that lands in notes.log on commit. +func (t *captchaTrap) Note(format string, args ...any) { + if t == nil || captchaTrapRoot() == "" { + return + } + t.mu.Lock() + defer t.mu.Unlock() + if t.flushed { + return + } + t.notes = append(t.notes, fmt.Sprintf("[%s] %s", + time.Now().Format("15:04:05.000"), + fmt.Sprintf(format, args...))) +} + +// Commit flushes the buffer to disk under a fresh subdirectory. Safe to +// call multiple times; only the first call writes. +func (t *captchaTrap) Commit(reason string) { + if t == nil { + return + } + root := captchaTrapRoot() + if root == "" { + return + } + t.mu.Lock() + defer t.mu.Unlock() + if t.flushed { + return + } + t.flushed = true + + subdir := filepath.Join(root, fmt.Sprintf("%s_%s_%s", + t.started.Format("20060102_150405"), + t.label, + shortRandHex(3))) + if err := os.MkdirAll(subdir, 0o755); err != nil { + log.Printf("captcha-trap: commit mkdir %q failed: %v", subdir, err) + return + } + + for name, data := range t.files { + if err := os.WriteFile(filepath.Join(subdir, name), data, 0o644); err != nil { + log.Printf("captcha-trap: write %s/%s failed: %v", subdir, name, err) + } + } + + notesBlob := strings.Builder{} + fmt.Fprintf(¬esBlob, "label: %s\n", t.label) + fmt.Fprintf(¬esBlob, "reason: %s\n", reason) + fmt.Fprintf(¬esBlob, "started: %s\n", t.started.Format(time.RFC3339Nano)) + fmt.Fprintf(¬esBlob, "duration: %s\n", time.Since(t.started)) + notesBlob.WriteString("---\n") + for _, n := range t.notes { + notesBlob.WriteString(n) + notesBlob.WriteByte('\n') + } + _ = os.WriteFile(filepath.Join(subdir, "notes.log"), []byte(notesBlob.String()), 0o644) + + log.Printf("captcha-trap: saved %d artefacts to %s (reason=%s)", + len(t.files), subdir, reason) +} + +// Discard drops the in-memory buffer without touching disk. The deferred +// safety net for the happy path: if the solve returns a success token, +// Discard frees the buffer and nothing is persisted. +func (t *captchaTrap) Discard() { + if t == nil { + return + } + t.mu.Lock() + defer t.mu.Unlock() + if t.flushed { + return + } + t.flushed = true + t.files = nil + t.notes = nil +} + +func sanitizeArtifactName(name string) string { + // Keep filenames flat and predictable — anything iOS' file browsers + // can choke on (slashes, leading dots) gets normalised away. + cleaned := strings.Map(func(r rune) rune { + switch { + case r == '/' || r == '\\' || r == 0: + return '_' + default: + return r + } + }, name) + cleaned = strings.TrimLeft(cleaned, ".") + if cleaned == "" { + cleaned = "artifact" + } + return cleaned +} + +func shortRandHex(n int) string { + b := make([]byte, n) + if _, err := rand.Read(b); err != nil { + return "noid" + } + return hex.EncodeToString(b) +} diff --git a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h index eaaa3b1..321a05f 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h +++ b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h @@ -45,5 +45,6 @@ extern void TurnBridgeSubmitManualCaptchaToken(const char *request_id, const cha extern void TurnBridgeCancelManualCaptcha(const char *request_id, const char *reason); extern void TurnBridgeSetManualCaptchaMode(int enabled); extern void TurnBridgeSetStreamAggregation(int enabled); +extern void TurnBridgeSetCaptchaTrapDir(const char *path); #endif From e04c020fd4a307a5b5f044030a142997398ef341 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 17:40:06 +0000 Subject: [PATCH 047/106] feat: phased bring-up with dual-egress captcha solves and live UI counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VK rate-limits captcha.isNotRobot per source IP (~16 concurrent before ERROR_LIMIT). To get N > 16 reliably, two source IPs are now used: - "direct" — the user's mobile IP, used by ONE bootstrap session. - "tunnel" — the WG server's egress IP, used by every session spawned after WG handshake completes. The extension's own outbound HTTP routes through utun automatically under includeAllNetworks=true. Flow: 1. StartProxy spawns exactly one bootstrap session and waits for its DTLS handshake. proxyReady fires immediately so Swift starts the WG adapter through that single session. 2. A background goroutine waits 2 s for WG handshake to complete, flips captchaTunnelEgress, and starts spawning the remaining N-1 sessions one-by-one with 800 ms stagger. Their captcha HTTP goes through utun → WG server → api.vk.ru, hitting VK from a different source IP. 3. If captchaTunnelSat trips (ERROR_LIMIT on tunneled solves too), the deferred spawn loop stops early — no point producing dead lanes the watchdog would kill anyway. Fanout dispatcher now skips inactive lanes via an `active` atomic on each fanoutPacketConn, set the moment that session's DTLS reaches sessionOk. Without this gate the bootstrap-only phase would round-robin WG packets into channels nobody is draining yet. Manual-captcha mode keeps the single-phase "all N before WG" barrier (each WebView is presented sequentially anyway, and the captcha sheet must reach id.vk.ru OUTSIDE the tunnel because includeAllNetworks=false in that mode). UI: a CaptchaStatsBadge below the profile picker shows live Direct/ Tunnel counts polled from App Group UserDefaults, with a warning icon next to either pool when its saturation flag is set. Server-side note: the design assumes the kiper292/vk-turn-proxy aggregator accepts new stream IDs dynamically (sessions added after the first one is already aggregated). With stream-agg off the WG server endpoint-roams to each new TURN source — brief flap, then WG recovers via its standard roaming logic. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/CaptchaStatsBadge.swift | 101 +++++++++++++++ TurnBridge/ContentView.swift | 12 +- network-extension/PacketTunnelProvider.swift | 39 ++++++ .../Sources/WireGuardKitGo/captcha_stats.go | 88 +++++++++++++ .../Sources/WireGuardKitGo/turn_proxy.go | 119 ++++++++++++++---- .../Sources/WireGuardKitGo/udp_fanout.go | 34 ++++- .../Sources/WireGuardKitGo/vk_captcha.go | 15 +++ .../Sources/WireGuardKitGo/wireguard.h | 4 + 8 files changed, 381 insertions(+), 31 deletions(-) create mode 100644 TurnBridge/CaptchaStatsBadge.swift create mode 100644 wireguard-apple/Sources/WireGuardKitGo/captcha_stats.go diff --git a/TurnBridge/CaptchaStatsBadge.swift b/TurnBridge/CaptchaStatsBadge.swift new file mode 100644 index 0000000..699de8e --- /dev/null +++ b/TurnBridge/CaptchaStatsBadge.swift @@ -0,0 +1,101 @@ +import SwiftUI +import Combine + +/// Polls captcha solve counters published by PacketTunnelProvider into +/// the App Group's shared UserDefaults. Surfaces two numbers in the UI: +/// +/// "Direct" — captchas solved from the user's mobile IP. Bounded +/// above by VK's per-IP rate-limit (~16 in practice). +/// "Tunnel" — captchas solved from the WG server's egress IP, which +/// kicks in for sessions spawned AFTER WG handshake +/// completes through the bootstrap fleet. Independent +/// budget from direct, so e.g. N=30 can yield 16 direct +/// + 14 tunnel and all 30 sessions come up. +@MainActor +final class CaptchaStatsState: ObservableObject { + @Published private(set) var direct: Int = 0 + @Published private(set) var tunnel: Int = 0 + @Published private(set) var directSaturated: Bool = false + @Published private(set) var tunnelSaturated: Bool = false + + private var timer: AnyCancellable? + + func start() { + guard timer == nil else { return } + refresh() + timer = Timer.publish(every: 1, on: .main, in: .common) + .autoconnect() + .sink { [weak self] _ in self?.refresh() } + } + + func stop() { + timer?.cancel() + timer = nil + direct = 0 + tunnel = 0 + directSaturated = false + tunnelSaturated = false + } + + private func refresh() { + guard let defaults = UserDefaults(suiteName: "group.com.truvvor.turnbridge") else { + return + } + direct = defaults.integer(forKey: "captchaDirectCount") + tunnel = defaults.integer(forKey: "captchaTunnelCount") + directSaturated = defaults.bool(forKey: "captchaDirectSaturated") + tunnelSaturated = defaults.bool(forKey: "captchaTunnelSaturated") + } +} + +struct CaptchaStatsBadge: View { + @ObservedObject var stats: CaptchaStatsState + + var body: some View { + if stats.direct == 0 && stats.tunnel == 0 { + EmptyView() + } else { + HStack(spacing: 14) { + cell(label: "Direct", + value: stats.direct, + saturated: stats.directSaturated, + accent: .blue) + Divider() + .frame(height: 22) + cell(label: "Tunnel", + value: stats.tunnel, + saturated: stats.tunnelSaturated, + accent: .green) + } + .padding(.horizontal, 14) + .padding(.vertical, 8) + .background(.regularMaterial) + .clipShape(RoundedRectangle(cornerRadius: 12)) + .overlay( + RoundedRectangle(cornerRadius: 12) + .strokeBorder(Color.secondary.opacity(0.25), lineWidth: 1) + ) + } + } + + private func cell(label: String, value: Int, saturated: Bool, accent: Color) -> some View { + VStack(spacing: 1) { + HStack(spacing: 4) { + Text("\(value)") + .font(.system(size: 18, weight: .semibold, design: .rounded)) + .foregroundColor(accent) + if saturated { + // Saturation = VK started returning ERROR_LIMIT for + // this egress, so the pool is effectively closed + // until VK's window resets. + Image(systemName: "exclamationmark.octagon.fill") + .font(.system(size: 11)) + .foregroundColor(.orange) + } + } + Text(label) + .font(.system(size: 11, weight: .medium, design: .rounded)) + .foregroundColor(.secondary) + } + } +} diff --git a/TurnBridge/ContentView.swift b/TurnBridge/ContentView.swift index 7bebf0d..556c244 100755 --- a/TurnBridge/ContentView.swift +++ b/TurnBridge/ContentView.swift @@ -13,6 +13,7 @@ struct ContentView: View { @State private var vpnStatus: NEVPNStatus = .disconnected @StateObject private var transportHealth = TransportHealthState() @StateObject private var store = ProfileStore() + @StateObject private var captchaStats = CaptchaStatsState() @State private var showImportModal = false @State private var showingAlert = false @@ -54,6 +55,11 @@ struct ContentView: View { .animation(.easeInOut, value: transportHealth.isStalled) } + if vpnStatus == .connecting || vpnStatus == .connected { + CaptchaStatsBadge(stats: captchaStats) + .padding(.top, 6) + } + Spacer() VStack(spacing: 50) { @@ -128,8 +134,12 @@ struct ContentView: View { .onAppear { checkInitialStatus() transportHealth.start() + captchaStats.start() + } + .onDisappear { + transportHealth.stop() + captchaStats.stop() } - .onDisappear { transportHealth.stop() } .onReceive(NotificationCenter.default.publisher(for: .NEVPNStatusDidChange)) { notification in if let connection = notification.object as? NEVPNConnection { let newStatus = connection.status diff --git a/network-extension/PacketTunnelProvider.swift b/network-extension/PacketTunnelProvider.swift index 8de9c3b..41b2719 100755 --- a/network-extension/PacketTunnelProvider.swift +++ b/network-extension/PacketTunnelProvider.swift @@ -43,6 +43,7 @@ class PacketTunnelProvider: NEPacketTunnelProvider { private var lastPathStatus: Network.NWPath.Status? private var lastPathInterfaceLabel: String? private var lastTransportRestartAt = Date.distantPast + private var captchaStatsTimer: DispatchSourceTimer? /// Tear down the current TURN/DTLS cycle and let the proxy spin up @@ -160,6 +161,8 @@ class PacketTunnelProvider: NEPacketTunnelProvider { StartProxy(vkLink, peerAddr, listenAddr, nValue, udpFlag) } + startCaptchaStatsPublisher() + DispatchQueue.global(qos: .userInteractive).async { [weak self] in let ready = ProxyWaitReady(dtlsReadyTimeoutMs) guard let self = self else { return } @@ -305,6 +308,7 @@ class PacketTunnelProvider: NEPacketTunnelProvider { lastPathStatus = nil lastPathInterfaceLabel = nil + stopCaptchaStatsPublisher() StopProxy() SharedLogger.info("TURN proxy stopped", source: .tunnel) TransportHealthMonitor.reset() @@ -333,6 +337,41 @@ class PacketTunnelProvider: NEPacketTunnelProvider { completionHandler?(response) } + /// Periodically copy the Go-side captcha counters into the App + /// Group's shared UserDefaults so the main app's UI can render + /// "Direct: X · Tunnel: Y" without an IPC round-trip every tick. + /// Reset to 0/0 happens on disconnect so the previous run's + /// numbers don't ghost into the next connection. + private func startCaptchaStatsPublisher() { + stopCaptchaStatsPublisher() + let timer = DispatchSource.makeTimerSource(queue: DispatchQueue.global(qos: .utility)) + timer.schedule(deadline: .now(), repeating: .seconds(1)) + timer.setEventHandler { + let direct = Int(TurnBridgeGetCaptchaDirectCount()) + let tunnel = Int(TurnBridgeGetCaptchaTunnelCount()) + let directSat = TurnBridgeIsCaptchaDirectSaturated() != 0 + let tunnelSat = TurnBridgeIsCaptchaTunnelSaturated() != 0 + guard let defaults = UserDefaults(suiteName: CaptchaIPC.appGroupID) else { return } + defaults.set(direct, forKey: "captchaDirectCount") + defaults.set(tunnel, forKey: "captchaTunnelCount") + defaults.set(directSat, forKey: "captchaDirectSaturated") + defaults.set(tunnelSat, forKey: "captchaTunnelSaturated") + } + timer.resume() + captchaStatsTimer = timer + } + + private func stopCaptchaStatsPublisher() { + captchaStatsTimer?.cancel() + captchaStatsTimer = nil + if let defaults = UserDefaults(suiteName: CaptchaIPC.appGroupID) { + defaults.set(0, forKey: "captchaDirectCount") + defaults.set(0, forKey: "captchaTunnelCount") + defaults.set(false, forKey: "captchaDirectSaturated") + defaults.set(false, forKey: "captchaTunnelSaturated") + } + } + override func sleep(completionHandler: @escaping () -> Void) { // iOS is about to suspend us. Don't tear anything down (iOS will // resume us via wake()), but record the moment so wake() can decide diff --git a/wireguard-apple/Sources/WireGuardKitGo/captcha_stats.go b/wireguard-apple/Sources/WireGuardKitGo/captcha_stats.go new file mode 100644 index 0000000..f03bd53 --- /dev/null +++ b/wireguard-apple/Sources/WireGuardKitGo/captcha_stats.go @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: MIT +// +// Per-connect captcha solve counters and saturation flags. +// +// Two egress IPs feed our captcha solves once phased bring-up is on: +// +// * "direct" — the user's mobile IP, used by the bootstrap session +// before WG comes up. +// * "tunnel" — the WG server's egress IP, used by every session +// spawned AFTER WG handshake completes (the extension's +// own net/http auto-routes through utun under +// includeAllNetworks=true). +// +// VK enforces captcha.isNotRobot rate-limits per source IP, so the two +// pools have independent budgets. The UI surfaces both counts so the +// user can see how many sessions came up via each route. The two +// saturation flags (`direct` / `tunnel`) flip on the first +// ERROR_LIMIT seen in that mode and let StartProxy stop spawning new +// sessions once the tunneled egress is also exhausted. + +package main + +/* +#include +*/ +import "C" + +import ( + "sync/atomic" +) + +var ( + captchaDirectOK atomic.Int64 + captchaTunnelOK atomic.Int64 + captchaDirectSat atomic.Bool // ERROR_LIMIT seen on direct egress + captchaTunnelSat atomic.Bool // ERROR_LIMIT seen on tunnel egress + captchaTunnelEgress atomic.Bool // true once we believe HTTP from this extension routes through utun +) + +func resetCaptchaStats() { + captchaDirectOK.Store(0) + captchaTunnelOK.Store(0) + captchaDirectSat.Store(false) + captchaTunnelSat.Store(false) + captchaTunnelEgress.Store(false) +} + +func markCaptchaSuccess() { + if captchaTunnelEgress.Load() { + captchaTunnelOK.Add(1) + } else { + captchaDirectOK.Add(1) + } +} + +func markCaptchaSaturated() { + if captchaTunnelEgress.Load() { + captchaTunnelSat.Store(true) + } else { + captchaDirectSat.Store(true) + } +} + +//export TurnBridgeGetCaptchaDirectCount +func TurnBridgeGetCaptchaDirectCount() C.int { + return C.int(captchaDirectOK.Load()) +} + +//export TurnBridgeGetCaptchaTunnelCount +func TurnBridgeGetCaptchaTunnelCount() C.int { + return C.int(captchaTunnelOK.Load()) +} + +//export TurnBridgeIsCaptchaDirectSaturated +func TurnBridgeIsCaptchaDirectSaturated() C.int { + if captchaDirectSat.Load() { + return 1 + } + return 0 +} + +//export TurnBridgeIsCaptchaTunnelSaturated +func TurnBridgeIsCaptchaTunnelSaturated() C.int { + if captchaTunnelSat.Load() { + return 1 + } + return 0 +} diff --git a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go index f123f10..b83755c 100755 --- a/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go +++ b/wireguard-apple/Sources/WireGuardKitGo/turn_proxy.go @@ -1029,23 +1029,41 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, log.Printf("stream-agg: enabled, sessionID=%x (N=%d)", sid[:4], n) } - // Spawn ALL N sessions in parallel. Sequential spawning made the - // auto-captcha mode artificially slow at high nValue - // (N=30 → 30 captchas back-to-back → ~3 minutes), even though - // the in-tunnel captcha solver runs entirely inside the - // extension's Go code with no UI dependency and can absolutely - // be parallelised. In manual-captcha mode the UI is the natural - // serialiser (it only presents one WebView sheet at a time), so - // spawning all N at once still produces a sequential user-facing - // flow there — no regression. + // Phased bring-up driven by adaptive captcha-egress budget. // - // We still wait for ALL N to signal okchan before sending to - // proxyReady, otherwise utun comes up half-built and the manual - // captcha WebView for not-yet-ready sessions hangs (see commit - // ca8b19f for the original rationale of this barrier). + // VK rate-limits captcha.isNotRobot per source IP. We have two + // budgets available: + // + // "direct" — the user's mobile IP. Used by the bootstrap + // session that comes up before WG. + // "tunnel" — the WG server's egress IP. Once WG handshake + // completes, this extension's outbound HTTP routes + // through utun automatically (includeAllNetworks=true). + // + // Sequence: + // + // 1. Spawn ONE bootstrap session, solve its captcha from direct. + // 2. Signal proxyReady so Swift starts the WG adapter through + // that single TURN session. + // 3. Wait briefly for WG handshake; flip captchaTunnelEgress so + // subsequent solves are attributed to "tunnel". + // 4. Spawn the remaining N-1 sessions one-by-one (small stagger + // so they don't slam VK from the same IP simultaneously). + // Each new session's captcha goes through utun → WG server + // → api.vk.ru, hitting VK from a fresh per-IP budget. + // 5. If captchaTunnelSat trips (ERROR_LIMIT on the tunnel + // egress too), stop spawning further sessions — we've + // saturated both pools and adding more just produces dead + // lanes that the watchdog tears down. + // + // Manual-captcha mode keeps the single-phase "all N before WG" + // barrier: each WebView is presented one-at-a-time anyway, and + // the UI flow assumes the captcha sheet can still reach id.vk.ru + // outside the tunnel (includeAllNetworks=false in that mode). + resetCaptchaStats() + sessionReady := make(chan int, n) - for i := 0; i < n; i++ { - i := i + spawnSession := func(i int) { fanoutChan := makeFanoutChan(fanouts[i]) cChan := make(chan net.PacketConn) sessionOk := make(chan struct{}) @@ -1056,38 +1074,89 @@ func StartProxy(cLink *C.char, cPeerAddr *C.char, cLocalAddr *C.char, cN C.int, wg1.Go(func() { oneTurnConnectionLoop(ctx, params, peer, cChan, t) }) - // One forwarder per session that pushes its index onto the - // aggregate ready channel as soon as that session establishes - // its DTLS. Decouples the wait-for-all loop below from the - // per-session okchan semantics. go func() { select { case <-sessionOk: + // Make this lane visible to the fanout dispatcher. + // Until now the dispatcher was skipping it because + // nothing was draining its incoming channel. + fanouts[i].active.Store(true) sessionReady <- i case <-ctx.Done(): } }() } - for k := 0; k < n; k++ { + bootstrap := 1 + if manualCaptchaForcedMode() || n == 1 { + bootstrap = n + } + log.Printf("StartProxy: bootstrap=%d, deferred=%d (manual=%v)", + bootstrap, n-bootstrap, manualCaptchaForcedMode()) + + for i := 0; i < bootstrap; i++ { + spawnSession(i) + } + for k := 0; k < bootstrap; k++ { select { case idx := <-sessionReady: - log.Printf("StartProxy: session %d ready (%d/%d total)", idx+1, k+1, n) + log.Printf("StartProxy: bootstrap session %d ready (%d/%d)", idx+1, k+1, bootstrap) case <-ctx.Done(): - log.Printf("StartProxy: cancelled, only %d/%d sessions came up", k, n) + log.Printf("StartProxy: cancelled during bootstrap, %d/%d up", k, bootstrap) wg1.Wait() return } } - // All N TURN allocations are alive. NOW it's safe to let Swift - // bring up the WG adapter and route the user's traffic into utun. + // Bootstrap fleet is alive — let Swift bring up WG. select { case proxyReady <- struct{}{}: default: } - log.Printf("Proxy started on %s with %d parallel TURN session(s)", localAddrStr, n) + if bootstrap < n { + // Spawn the deferred fleet in the background so StartProxy + // returns and the WG adapter actually gets a chance to come + // up. wg1.Wait below still blocks on every spawned session. + wg1.Go(func() { + // Give WG handshake ~2 s to complete through the single + // bootstrap session. Once it's up, this extension's + // own outbound HTTP routes through utun and the + // remaining captchas hit VK from the WG server's egress. + select { + case <-time.After(2 * time.Second): + case <-ctx.Done(): + return + } + captchaTunnelEgress.Store(true) + log.Printf("StartProxy: tunnel egress engaged; spawning deferred fleet (target=%d)", n) + + // Per-session stagger of 800 ms. Without it, all 30 + // captcha solves still arrive at VK within a couple + // of seconds, just from a different IP — VK's + // per-IP rate-limit window then fires for the + // tunnel egress too. 800 ms × 30 ≈ 24 s total + // warm-up, comfortably under VK's per-IP burst budget. + for i := bootstrap; i < n; i++ { + if ctx.Err() != nil { + return + } + if captchaTunnelSat.Load() { + log.Printf("StartProxy: tunnel egress also rate-limited; stopping at %d/%d sessions", + i, n) + return + } + spawnSession(i) + select { + case <-time.After(800 * time.Millisecond): + case <-ctx.Done(): + return + } + } + }) + } + + log.Printf("Proxy started on %s with %d parallel TURN session(s) requested (phased)", localAddrStr, n) wg1.Wait() } diff --git a/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go b/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go index e3f9e55..5b5e485 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go +++ b/wireguard-apple/Sources/WireGuardKitGo/udp_fanout.go @@ -71,6 +71,15 @@ type fanoutPacketConn struct { deadlineTimer *time.Timer dropped atomic.Uint64 // packets the dispatcher tried to enqueue but the channel was full + + // active=false ⇒ dispatcher skips this lane. Used during phased + // bring-up: wave-2 fanouts exist before their DTLS sessions are + // up; without this gate the dispatcher round-robins WG packets + // into channels nobody is draining, the buffer fills, and packets + // get dropped instead of being delivered to the wave-1 lanes that + // are actually live. Flipped to true the instant the matching + // oneDtlsConnection signals sessionOk. + active atomic.Bool } func newFanoutPacketConn(id int, real net.PacketConn) *fanoutPacketConn { @@ -228,11 +237,26 @@ func startFanoutDispatcher(ctx context.Context, listenConn net.PacketConn, fanou data := make([]byte, n) copy(data, buf[:n]) - // Pick the next fanout. Use atomic counter so a future - // flow-hash dispatch could swap in here without changing - // the rest of the loop. - i := atomic.AddUint64(&rrIdx, 1) % uint64(len(fanouts)) - f := fanouts[i] + // Pick the next ACTIVE fanout. During phased bring-up some + // fanouts exist but their DTLS sessions aren't up yet; + // posting to them would just fill the channel buffer and + // then drop. Linear probe from the round-robin cursor — + // O(N) worst case is fine for our N ≤ 100. + total := uint64(len(fanouts)) + var f *fanoutPacketConn + for k := uint64(0); k < total; k++ { + i := (atomic.AddUint64(&rrIdx, 1) - 1) % total + if fanouts[i].active.Load() { + f = fanouts[i] + break + } + } + if f == nil { + // No active lanes — the bootstrap fleet hasn't come + // up yet, or all sessions died. Drop and account. + atomic.AddUint64(&dropped, 1) + continue + } select { case f.incoming <- fanoutPacket{data: data, addr: addr}: diff --git a/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go b/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go index 2933431..6756d41 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go +++ b/wireguard-apple/Sources/WireGuardKitGo/vk_captcha.go @@ -397,10 +397,21 @@ func callCaptchaNotRobot(ctx context.Context, client *http.Client, profile Profi if ok && successToken != "" { log.Printf("[Captcha] Step 4/4: endSession") _, _ = vkReq("captchaNotRobot.endSession", baseParams) + markCaptchaSuccess() return successToken, nil } } + if status == "ERROR_LIMIT" { + // Mark the egress that owns this request as saturated. The + // bootstrap fleet runs from the client IP (saturates direct); + // the deferred fleet runs after WG handshake completes so its + // HTTP routes through utun and saturates the tunnel egress. + // StartProxy reads these flags to stop spawning new sessions + // when the second pool is also dry. + markCaptchaSaturated() + } + // Checkbox failed — try slider captcha log.Printf("[Captcha] Checkbox failed, trying slider captcha...") @@ -412,11 +423,15 @@ func callCaptchaNotRobot(ctx context.Context, client *http.Client, profile Profi sliderToken, sliderErr := solveSliderCaptcha(vkReq, baseParams, browserFp, hash, mergedSettings) if sliderErr != nil { + if strings.Contains(sliderErr.Error(), "ERROR_LIMIT") { + markCaptchaSaturated() + } return "", fmt.Errorf("slider captcha also failed: %w", sliderErr) } log.Printf("[Captcha] Slider solved! endSession...") _, _ = vkReq("captchaNotRobot.endSession", baseParams) + markCaptchaSuccess() return sliderToken, nil } diff --git a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h index 321a05f..d588a5d 100644 --- a/wireguard-apple/Sources/WireGuardKitGo/wireguard.h +++ b/wireguard-apple/Sources/WireGuardKitGo/wireguard.h @@ -46,5 +46,9 @@ extern void TurnBridgeCancelManualCaptcha(const char *request_id, const char *re extern void TurnBridgeSetManualCaptchaMode(int enabled); extern void TurnBridgeSetStreamAggregation(int enabled); extern void TurnBridgeSetCaptchaTrapDir(const char *path); +extern int TurnBridgeGetCaptchaDirectCount(void); +extern int TurnBridgeGetCaptchaTunnelCount(void); +extern int TurnBridgeIsCaptchaDirectSaturated(void); +extern int TurnBridgeIsCaptchaTunnelSaturated(void); #endif From 0cbc9d6184b732b8eccf7eda8a41a4b09162ab8a Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 17:51:25 +0000 Subject: [PATCH 048/106] feat: drain direct egress first + always-on stats badge + captcha viewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues from the last test: 1. Phased bring-up burned only the direct slot for ONE session before switching everything to tunnel. Reworked StartProxy to do exactly what the user asked for: spawn direct sessions one at a time with 400 ms stagger until either N is reached OR captchaDirectSat trips (ERROR_LIMIT on a captcha solve). Only then bridge to phase B, wait 2 s for WG handshake, flip captchaTunnelEgress, and spawn the rest one at a time with 800 ms stagger until N or captchaTunnelSat. Direct's full per-IP budget gets drained first instead of being wasted at 1/16th utilisation. 2. Direct/Tunnel counters were hidden when both were zero, so the user saw no UI feedback when pooled creds were reused (no fresh captcha solves → counters stay 0). Badge is now always visible while connecting/connected — zeros are themselves information. 3. Captured captchas had nowhere to view. The trap was correctly writing folders into the App Group container, but iOS doesn't expose extension sandboxes to the Files app so they were effectively invisible. Added a Captured Captchas screen under Settings → Captcha that lists each entry with timestamp + commit-reason, lets the user open one to see the image inline with the raw VK response + notes.log, and exports via the share sheet. Also deleted TransportHealthBanner.swift — the flashing "Connection unstable" banner during normal wake reconnects is annoying and the underlying TransportHealthMonitor in the extension still tracks alive/dead for log-line analysis, so removing the UI doesn't lose diagnostic capability. https://claude.ai/code/session_01TV73kDE6EgfVBEUBHbvYbY --- TurnBridge/CaptchaStatsBadge.swift | 46 ++-- TurnBridge/CapturedCaptchasView.swift | 232 ++++++++++++++++++ TurnBridge/ContentView.swift | 12 +- TurnBridge/GlobalSettingsView.swift | 7 + TurnBridge/TransportHealthBanner.swift | 85 ------- .../Sources/WireGuardKitGo/turn_proxy.go | 201 ++++++++++----- 6 files changed, 402 insertions(+), 181 deletions(-) create mode 100644 TurnBridge/CapturedCaptchasView.swift delete mode 100644 TurnBridge/TransportHealthBanner.swift diff --git a/TurnBridge/CaptchaStatsBadge.swift b/TurnBridge/CaptchaStatsBadge.swift index 699de8e..3495176 100644 --- a/TurnBridge/CaptchaStatsBadge.swift +++ b/TurnBridge/CaptchaStatsBadge.swift @@ -52,30 +52,30 @@ struct CaptchaStatsBadge: View { @ObservedObject var stats: CaptchaStatsState var body: some View { - if stats.direct == 0 && stats.tunnel == 0 { - EmptyView() - } else { - HStack(spacing: 14) { - cell(label: "Direct", - value: stats.direct, - saturated: stats.directSaturated, - accent: .blue) - Divider() - .frame(height: 22) - cell(label: "Tunnel", - value: stats.tunnel, - saturated: stats.tunnelSaturated, - accent: .green) - } - .padding(.horizontal, 14) - .padding(.vertical, 8) - .background(.regularMaterial) - .clipShape(RoundedRectangle(cornerRadius: 12)) - .overlay( - RoundedRectangle(cornerRadius: 12) - .strokeBorder(Color.secondary.opacity(0.25), lineWidth: 1) - ) + // Always visible during connecting/connected: zeros give the + // user feedback that the counters exist and that they haven't + // yet incremented this connect cycle (often the case when + // pooled creds are reused without a fresh captcha solve). + HStack(spacing: 14) { + cell(label: "Direct", + value: stats.direct, + saturated: stats.directSaturated, + accent: .blue) + Divider() + .frame(height: 22) + cell(label: "Tunnel", + value: stats.tunnel, + saturated: stats.tunnelSaturated, + accent: .green) } + .padding(.horizontal, 14) + .padding(.vertical, 8) + .background(.regularMaterial) + .clipShape(RoundedRectangle(cornerRadius: 12)) + .overlay( + RoundedRectangle(cornerRadius: 12) + .strokeBorder(Color.secondary.opacity(0.25), lineWidth: 1) + ) } private func cell(label: String, value: Int, saturated: Bool, accent: Color) -> some View { diff --git a/TurnBridge/CapturedCaptchasView.swift b/TurnBridge/CapturedCaptchasView.swift new file mode 100644 index 0000000..82bd1a2 --- /dev/null +++ b/TurnBridge/CapturedCaptchasView.swift @@ -0,0 +1,232 @@ +import SwiftUI + +/// Browser for the captcha trap directory. The network extension drops +/// a folder per FAILED captcha solve into the App Group container +/// (/captcha_trap/_