Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions .github/workflows/update_playwright_version.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
name: Update Playwright version

on:
# Runs when manually triggered from the GitHub UI.
workflow_dispatch:

# Runs every day at 04:00 UTC.
schedule:
- cron: '0 4 * * *'

concurrency:
group: update-playwright-version
cancel-in-progress: false

permissions:
contents: read

env:
BRANCH_NAME: ci/update-playwright-version

jobs:
update-playwright-version:
name: Update Playwright version
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v7
with:
token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.13'

- name: Update Playwright version
run: python src/crawlee/project_template/update_playwright_version.py

- name: Detect changes
id: changes
run: |
if git diff --quiet; then
echo "has-changes=false" >> "$GITHUB_OUTPUT"
else
echo "has-changes=true" >> "$GITHUB_OUTPUT"
fi

- name: Reset existing branch
if: steps.changes.outputs.has-changes == 'true'
env:
GH_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
run: |
# If a PR already exists for this branch, disable auto-merge before we recreate the
# branch so the old commit can't be merged.
PR_NUMBER=$(gh pr list --head "$BRANCH_NAME" --base master --json number --jq '.[0].number // empty')
if [ -n "$PR_NUMBER" ]; then
gh pr merge "$PR_NUMBER" --disable-auto || true
fi

# Delete the remote branch if it exists. The signed-commit step below recreates it from
# the current HEAD (origin/master).
if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then
echo "Deleting existing remote branch $BRANCH_NAME"
git push origin --delete "$BRANCH_NAME"
fi

- name: Commit and push changes
id: commit-and-push
if: steps.changes.outputs.has-changes == 'true'
uses: apify/actions/signed-commit@v1.3.0
with:
message: 'chore: update Playwright version in project template'
github-token: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
branch: ${{ env.BRANCH_NAME }}
create-branch: 'true'

- name: Create or update Pull Request
if: steps.commit-and-push.outputs.committed == 'true'
env:
GH_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
run: |
PR_BODY="Automated bump of the Playwright version pinned by the project template Dockerfile.

> Generated by the [Update Playwright version](https://github.com/apify/crawlee-python/actions/workflows/update_playwright_version.yaml) workflow."

PR_NUMBER=$(gh pr list --head "$BRANCH_NAME" --base master --json number --jq '.[0].number // empty')
if [ -z "$PR_NUMBER" ]; then
echo "Creating new PR"
gh pr create \
--title "chore: update Playwright version in project template" \
--body "$PR_BODY" \
--base master \
--head "$BRANCH_NAME"
PR_NUMBER=$(gh pr list --head "$BRANCH_NAME" --base master --json number --jq '.[0].number // empty')
else
echo "PR #$PR_NUMBER already exists for branch $BRANCH_NAME, updating it"
gh pr edit "$PR_NUMBER" --body "$PR_BODY"
fi

# Enable auto-merge so the PR merges automatically once CI passes. If CI fails, the PR
# stays open for manual review.
echo "Enabling auto-merge for PR #$PR_NUMBER"
gh pr merge "$PR_NUMBER" --auto --squash --delete-branch \
|| echo "::warning::Failed to enable auto-merge. The PR remains open for manual review."
71 changes: 71 additions & 0 deletions src/crawlee/project_template/update_playwright_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""Bump the Playwright version pinned by the project template's Dockerfile.

The template Dockerfile pins a single Playwright version (a Jinja ``# % set playwright_version
= '...'`` line) that selects the Apify base image tag and the in-image ``playwright==<version>``
pin. A version is only safe to pin once Apify has published the matching base image, so the
Apify Playwright base image's Docker Hub tags are the source of truth: this picks the highest
stable ``<python>-<semver>`` tag for the Python version the template already uses, and rewrites
the pinned version line if it is newer. The Python version itself is never changed.

Single-purpose: run with no arguments. It lives next to the Dockerfile it maintains and
resolves that path relative to itself, so it can be run from anywhere in the repository.
"""

from __future__ import annotations

import json
import re
import urllib.request
from pathlib import Path

DOCKERFILE = Path(__file__).resolve().parent / '{{cookiecutter.project_name}}/Dockerfile'
TAGS_URL = 'https://hub.docker.com/v2/repositories/apify/actor-python-playwright/tags?page_size=100'

# The pinned version line, e.g. ``# % set playwright_version = '1.60.0'``.
VERSION_LINE = re.compile(r"(# % set playwright_version = ')([^']+)(')")
# The Python part of the base image tag, e.g. the ``3.13`` in ``...:3.13-1.60.0``.
PYTHON_PREFIX = re.compile(r'python-playwright[a-z-]*:(\d+\.\d+)-')


def fetch_tags() -> list[str]:
"""Return all tag names of the Apify Playwright base image, following pagination."""
tags: list[str] = []
url: str | None = TAGS_URL
while url:
with urllib.request.urlopen(url, timeout=30) as response: # noqa: S310
payload = json.load(response)
tags.extend(result['name'] for result in payload['results'])
url = payload['next']
return tags


def main() -> None:
content = DOCKERFILE.read_text(encoding='utf-8')

Comment thread
Copilot marked this conversation as resolved.
version_match = VERSION_LINE.search(content)
if not version_match:
raise SystemExit(f'Pinned Playwright version line not found in {DOCKERFILE}.')
current = version_match.group(2)

python_match = PYTHON_PREFIX.search(content)
if not python_match:
raise SystemExit(f'Python base image prefix not found in {DOCKERFILE}.')
python_prefix = python_match.group(1)
# Keep only stable `MAJOR.MINOR.PATCH` versions built for the template's current Python line.
tag_re = re.compile(rf'^{re.escape(python_prefix)}-(\d+\.\d+\.\d+)$')
versions = [tuple(int(p) for p in m.group(1).split('.')) for tag in fetch_tags() if (m := tag_re.match(tag))]
if not versions:
raise SystemExit(f'No stable {python_prefix}-<version> base image tags found.')

latest = max(versions)
latest_str = '.'.join(str(part) for part in latest)
if latest > tuple(int(part) for part in current.split('.')):
DOCKERFILE.write_text(VERSION_LINE.sub(rf'\g<1>{latest_str}\g<3>', content), encoding='utf-8')
print(f'Bumped Playwright version: {current} -> {latest_str}')
else:
print(f'Playwright version is already up to date ({current}).')


if __name__ == '__main__':
main()
Loading