diff --git a/.github/workflows/update_playwright_version.yaml b/.github/workflows/update_playwright_version.yaml new file mode 100644 index 0000000000..76e1054d82 --- /dev/null +++ b/.github/workflows/update_playwright_version.yaml @@ -0,0 +1,106 @@ +name: Update Playwright version + +on: + # Runs when manually triggered from the GitHub UI. + workflow_dispatch: + + # Runs every day at 04:00 UTC. + schedule: + - cron: '0 4 * * *' + +concurrency: + group: update-playwright-version + cancel-in-progress: false + +permissions: + contents: read + +env: + BRANCH_NAME: ci/update-playwright-version + +jobs: + update-playwright-version: + name: Update Playwright version + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v7 + with: + token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + + - name: Update Playwright version + run: python src/crawlee/project_template/update_playwright_version.py + + - name: Detect changes + id: changes + run: | + if git diff --quiet; then + echo "has-changes=false" >> "$GITHUB_OUTPUT" + else + echo "has-changes=true" >> "$GITHUB_OUTPUT" + fi + + - name: Reset existing branch + if: steps.changes.outputs.has-changes == 'true' + env: + GH_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} + run: | + # If a PR already exists for this branch, disable auto-merge before we recreate the + # branch so the old commit can't be merged. + PR_NUMBER=$(gh pr list --head "$BRANCH_NAME" --base master --json number --jq '.[0].number // empty') + if [ -n "$PR_NUMBER" ]; then + gh pr merge "$PR_NUMBER" --disable-auto || true + fi + + # Delete the remote branch if it exists. The signed-commit step below recreates it from + # the current HEAD (origin/master). + if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then + echo "Deleting existing remote branch $BRANCH_NAME" + git push origin --delete "$BRANCH_NAME" + fi + + - name: Commit and push changes + id: commit-and-push + if: steps.changes.outputs.has-changes == 'true' + uses: apify/actions/signed-commit@v1.3.0 + with: + message: 'chore: update Playwright version in project template' + github-token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} + branch: ${{ env.BRANCH_NAME }} + create-branch: 'true' + + - name: Create or update Pull Request + if: steps.commit-and-push.outputs.committed == 'true' + env: + GH_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} + run: | + PR_BODY="Automated bump of the Playwright version pinned by the project template Dockerfile. + + > Generated by the [Update Playwright version](https://github.com/apify/crawlee-python/actions/workflows/update_playwright_version.yaml) workflow." + + PR_NUMBER=$(gh pr list --head "$BRANCH_NAME" --base master --json number --jq '.[0].number // empty') + if [ -z "$PR_NUMBER" ]; then + echo "Creating new PR" + gh pr create \ + --title "chore: update Playwright version in project template" \ + --body "$PR_BODY" \ + --base master \ + --head "$BRANCH_NAME" + PR_NUMBER=$(gh pr list --head "$BRANCH_NAME" --base master --json number --jq '.[0].number // empty') + else + echo "PR #$PR_NUMBER already exists for branch $BRANCH_NAME, updating it" + gh pr edit "$PR_NUMBER" --body "$PR_BODY" + fi + + # Enable auto-merge so the PR merges automatically once CI passes. If CI fails, the PR + # stays open for manual review. + echo "Enabling auto-merge for PR #$PR_NUMBER" + gh pr merge "$PR_NUMBER" --auto --squash --delete-branch \ + || echo "::warning::Failed to enable auto-merge. The PR remains open for manual review." diff --git a/src/crawlee/project_template/update_playwright_version.py b/src/crawlee/project_template/update_playwright_version.py new file mode 100644 index 0000000000..58a304b033 --- /dev/null +++ b/src/crawlee/project_template/update_playwright_version.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +"""Bump the Playwright version pinned by the project template's Dockerfile. + +The template Dockerfile pins a single Playwright version (a Jinja ``# % set playwright_version += '...'`` line) that selects the Apify base image tag and the in-image ``playwright==`` +pin. A version is only safe to pin once Apify has published the matching base image, so the +Apify Playwright base image's Docker Hub tags are the source of truth: this picks the highest +stable ``-`` tag for the Python version the template already uses, and rewrites +the pinned version line if it is newer. The Python version itself is never changed. + +Single-purpose: run with no arguments. It lives next to the Dockerfile it maintains and +resolves that path relative to itself, so it can be run from anywhere in the repository. +""" + +from __future__ import annotations + +import json +import re +import urllib.request +from pathlib import Path + +DOCKERFILE = Path(__file__).resolve().parent / '{{cookiecutter.project_name}}/Dockerfile' +TAGS_URL = 'https://hub.docker.com/v2/repositories/apify/actor-python-playwright/tags?page_size=100' + +# The pinned version line, e.g. ``# % set playwright_version = '1.60.0'``. +VERSION_LINE = re.compile(r"(# % set playwright_version = ')([^']+)(')") +# The Python part of the base image tag, e.g. the ``3.13`` in ``...:3.13-1.60.0``. +PYTHON_PREFIX = re.compile(r'python-playwright[a-z-]*:(\d+\.\d+)-') + + +def fetch_tags() -> list[str]: + """Return all tag names of the Apify Playwright base image, following pagination.""" + tags: list[str] = [] + url: str | None = TAGS_URL + while url: + with urllib.request.urlopen(url, timeout=30) as response: # noqa: S310 + payload = json.load(response) + tags.extend(result['name'] for result in payload['results']) + url = payload['next'] + return tags + + +def main() -> None: + content = DOCKERFILE.read_text(encoding='utf-8') + + version_match = VERSION_LINE.search(content) + if not version_match: + raise SystemExit(f'Pinned Playwright version line not found in {DOCKERFILE}.') + current = version_match.group(2) + + python_match = PYTHON_PREFIX.search(content) + if not python_match: + raise SystemExit(f'Python base image prefix not found in {DOCKERFILE}.') + python_prefix = python_match.group(1) + # Keep only stable `MAJOR.MINOR.PATCH` versions built for the template's current Python line. + tag_re = re.compile(rf'^{re.escape(python_prefix)}-(\d+\.\d+\.\d+)$') + versions = [tuple(int(p) for p in m.group(1).split('.')) for tag in fetch_tags() if (m := tag_re.match(tag))] + if not versions: + raise SystemExit(f'No stable {python_prefix}- base image tags found.') + + latest = max(versions) + latest_str = '.'.join(str(part) for part in latest) + if latest > tuple(int(part) for part in current.split('.')): + DOCKERFILE.write_text(VERSION_LINE.sub(rf'\g<1>{latest_str}\g<3>', content), encoding='utf-8') + print(f'Bumped Playwright version: {current} -> {latest_str}') + else: + print(f'Playwright version is already up to date ({current}).') + + +if __name__ == '__main__': + main()