Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: CI

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- run: uv run ruff check .
- run: uv run ruff format --check .

test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- run: uv run --python ${{ matrix.python-version }} pytest tests/ --cov=pageviewapi --cov-report=term-missing --cov-report=xml
- uses: codecov/codecov-action@v5
if: matrix.python-version == '3.14'
with:
token: ${{ secrets.CODECOV_TOKEN }}
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,15 @@ cover/
# Typical virtual environments
.venv
venv

# uv
uv.lock
.python-version

# pytest / coverage
.coverage
htmlcov/
.pytest_cache/

# ruff
.ruff_cache/
8 changes: 0 additions & 8 deletions .travis.yml

This file was deleted.

1 change: 0 additions & 1 deletion MANIFEST.in

This file was deleted.

6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# pageview-api
[![Build Status](https://travis-ci.org/Commonists/pageview-api.svg?branch=master)](https://travis-ci.org/Commonists/pageview-api)
[![Code Health](https://landscape.io/github/Commonists/pageview-api/master/landscape.svg?style=flat)](https://landscape.io/github/Commonists/pageview-api/master)
[![CI](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml/badge.svg)](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
[![codecov](https://codecov.io/gh/Commonists/pageview-api/branch/master/graph/badge.svg)](https://codecov.io/gh/Commonists/pageview-api)
[![Pypi](https://img.shields.io/pypi/v/pageviewapi.svg?style=flat)](https://pypi.python.org/pypi/pageviewapi)
[![License](http://img.shields.io/badge/license-MIT-orange.svg?style=flat)](http://opensource.org/licenses/MIT)

Wikimedia Pageview API client
Wikimedia Pageview API client for Python 3.10+

Installation
------------
Expand Down
24 changes: 20 additions & 4 deletions pageviewapi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
"""Python client for wikimedia pageview api."""
"""Python client for the Wikimedia Pageview API."""

from pageviewapi.client import (
PageviewResponse,
ThrottlingException,
ZeroOrDataNotLoadedException,
__version__,
aggregate,
legacy_pagecounts,
per_article,
top,
aggregate,
unique_devices,
legacy_pagecounts,
__version__)
)

__all__ = [
"__version__",
"aggregate",
"PageviewResponse",
"legacy_pagecounts",
"per_article",
"ThrottlingException",
"top",
"unique_devices",
"ZeroOrDataNotLoadedException",
]
219 changes: 133 additions & 86 deletions pageviewapi/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,150 +5,197 @@
- per-article
- top
- aggregate
- unique-devices
- legacy/pagecounts
"""

from attrdict import AttrDict
from importlib.metadata import PackageNotFoundError, version
from typing import Any

import requests

__version__ = "0.4.0"
try:
__version__ = version("pageviewapi")
except PackageNotFoundError:
__version__ = "0.4.0"

# User-agent
PROJECT_URL = "https://github.com/Commonists/pageview-api"
UA = "Python pageview-api client v{version} <{url}>"
USER_AGENT = {
'User-Agent': UA.format(url=PROJECT_URL, version=__version__)
}
USER_AGENT = {"User-Agent": f"Python pageview-api client v{__version__} <{PROJECT_URL}>"}

API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics"
# Per article

PA_ENDPOINT = "pageviews/per-article"
PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"

# Top
TOP_ENDPOINT = "pageviews/top"
TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"

# aggregate
AG_ENDPOINT = "pageviews/aggregate"
AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"

# unique-devices
UD_ENDPOINT = "unique-devices"
UD_ARGS = "{project}/{access}/{granularity}/{start}/{end}"

# legacy pagecounts
PC_ENDPOINT = "legacy/pagecounts/aggregate"
PC_ARGS = "{project}/{access_site}/{granularity}/{start}/{end}"


class PageviewResponse(dict): # type: ignore[type-arg]
"""A Wikimedia Pageview API response with attribute-style read access.

Recursively wraps nested dicts and lists so the full response tree is
navigable via attributes. Dict data keys always take priority over built-in
dict methods, so ``response.items`` returns the ``items`` data value rather
than the ``dict.items`` method when that key is present.

Use ``from_json`` rather than the constructor directly when the input may
contain nested dicts or lists — the constructor only wraps the top level.
"""

def __getattribute__(self, key: str) -> Any:
if not key.startswith("_"):
try:
return dict.__getitem__(self, key)
except KeyError:
pass
return super().__getattribute__(key)

@classmethod
def from_json(cls, obj: Any) -> Any:
"""Recursively convert a JSON value into a ``PageviewResponse`` tree."""
if isinstance(obj, dict):
return cls({k: cls.from_json(v) for k, v in obj.items()})
if isinstance(obj, list):
return [cls.from_json(item) for item in obj]
return obj


class ZeroOrDataNotLoadedException(Exception):
"""Raised for 404 Error
"""Raised on 404 — no data or data not yet filled.

404 may happen when there is no data or data has not been filled yet.
https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
"""
pass


class ThrottlingException(Exception):
"""Raise for 429 Error
"""Raised on 429 — client is sending too many requests.

Client doing too many request may be subject to throttling.
Requests in cache are not throttled (throttling is done at storage layer).
https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
"""


def per_article(project, page, start, end,
access='all-access', agent='all-agents', granularity='daily'):
"""Per article API.
def per_article(
project: str,
page: str,
start: str,
end: str,
access: str = "all-access",
agent: str = "all-agents",
granularity: str = "daily",
) -> dict[str, Any]:
"""Per-article pageview counts.

>>> import pageviewapi
>>> pageview.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
will requests views for Paris article between 2015-11-06 and 2015-11-20
>>> pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
"""
args = PA_ARGS.format(project=project,
page=page,
start=start,
end=end,
access=access,
agent=agent,
granularity=granularity)
return __api__(PA_ENDPOINT, args)


def top(project, year, month, day, access='all-access'):
"""Top 1000 most visited articles from project on a given date.
args = PA_ARGS.format(
project=project,
page=page,
start=start,
end=end,
access=access,
agent=agent,
granularity=granularity,
)
return _api(PA_ENDPOINT, args)


def top(
project: str,
year: int | str,
month: int | str,
day: int | str,
access: str = "all-access",
) -> dict[str, Any]:
"""Top 1000 most visited articles for a project on a given date.

>>> import pageviewapi
>>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
>>> views['items'][0]['articles'][0]
{u'article': u'Wikip\xe9dia:Accueil_principal', u'rank': 1,
u'views': 1600547}
{'article': 'Wikipédia:Accueil_principal', 'rank': 1, 'views': 1600547}
"""
args = TOP_ARGS.format(project=project,
access=access,
year=year,
month=month,
day=day)
return __api__(TOP_ENDPOINT, args)
args = TOP_ARGS.format(project=project, access=access, year=year, month=month, day=day)
return _api(TOP_ENDPOINT, args)


def aggregate(project, start, end,
access='all-access', agent='all-agents', granularity='daily'):
"""Aggregate API.
def aggregate(
project: str,
start: str,
end: str,
access: str = "all-access",
agent: str = "all-agents",
granularity: str = "daily",
) -> dict[str, Any]:
"""Aggregate pageview counts for a project.

>>> import pageviewapi
>>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
"""
args = AG_ARGS.format(project=project,
start=start,
end=end,
access=access,
agent=agent,
granularity=granularity)
return __api__(AG_ENDPOINT, args)


def unique_devices(project, start, end,
access='all-access', granularity='daily'):
"""Unique devices."""
args = UD_ARGS.format(project=project,
start=start,
end=end,
access=access,
granularity=granularity)
return __api__(UD_ENDPOINT, args)


def legacy_pagecounts(project, start, end,
access_site='all-sites', granularity='daily'):
"""Legacy pagecounts
args = AG_ARGS.format(
project=project,
start=start,
end=end,
access=access,
agent=agent,
granularity=granularity,
)
return _api(AG_ENDPOINT, args)


def unique_devices(
project: str,
start: str,
end: str,
access: str = "all-access",
granularity: str = "daily",
) -> dict[str, Any]:
"""Unique devices accessing a project."""
args = UD_ARGS.format(project=project, start=start, end=end, access=access, granularity=granularity)
return _api(UD_ENDPOINT, args)


def legacy_pagecounts(
project: str,
start: str,
end: str,
access_site: str = "all-sites",
granularity: str = "daily",
) -> dict[str, Any]:
"""Legacy pagecounts aggregate.

>>> import pageviewapi
>>> pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100')
"""
project_arg = 'all-projects'
if project != 'all-projects':
project_arg = '{}.org'.format(project)
args = PC_ARGS.format(project=project_arg,
start=start,
end=end,
access_site=access_site,
granularity=granularity)
return __api__(PC_ENDPOINT, args)


def __api__(end_point, args, api_url=API_BASE_URL):
"""Calling API."""
project_arg = "all-projects" if project == "all-projects" else f"{project}.org"
args = PC_ARGS.format(
project=project_arg,
start=start,
end=end,
access_site=access_site,
granularity=granularity,
)
return _api(PC_ENDPOINT, args)


def _api(end_point: str, args: str, api_url: str = API_BASE_URL) -> dict[str, Any]:
url = "/".join([api_url, end_point, args])
response = requests.get(url, headers=USER_AGENT)
if response.status_code == 200:
# Everything went fine!
return AttrDict(response.json())
return PageviewResponse.from_json(response.json())
elif response.status_code == 404:
raise ZeroOrDataNotLoadedException
raise ZeroOrDataNotLoadedException()
elif response.status_code == 429:
raise ThrottlingException
raise ThrottlingException()
else:
response.raise_for_status()
return {} # unreachable, satisfies type checker
Loading
Loading