diff --git a/.gitignore b/.gitignore index cc68159..f169cca 100644 --- a/.gitignore +++ b/.gitignore @@ -186,7 +186,7 @@ cython_debug/ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore # and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder -# .vscode/ +.vscode/ # Ruff stuff: .ruff_cache/ diff --git a/src/pardner/exceptions.py b/src/pardner/exceptions.py index 44b9684..bd209ff 100644 --- a/src/pardner/exceptions.py +++ b/src/pardner/exceptions.py @@ -1,3 +1,5 @@ +from typing import Any + from pardner.verticals import Vertical @@ -26,3 +28,23 @@ def __init__(self, *unsupported_verticals: Vertical, service_name: str) -> None: class UnsupportedRequestException(Exception): def __init__(self, service_name: str, message: str): super().__init__(f'Cannot fetch data from {service_name}: {message}') + + +class TumblrAPIError(Exception): + """ + Raised when the Tumblr API returns a non-OK HTTP status or a success response + whose payload is missing expected structure + """ + + def __init__( + self, + message: str, + status_code: int | None = None, + raw_response: Any = None, + ) -> None: + detail = f'Tumblr API error: {message}' + if status_code is not None: + detail += f' (HTTP {status_code})' + super().__init__(detail) + self.status_code = status_code + self.raw_response = raw_response diff --git a/src/pardner/services/tumblr.py b/src/pardner/services/tumblr.py index c650051..a816366 100644 --- a/src/pardner/services/tumblr.py +++ b/src/pardner/services/tumblr.py @@ -1,9 +1,13 @@ import json -from typing import Any, Iterable, Optional, override +from datetime import datetime, timezone +from typing import Any, Iterable, Literal, Optional, override -from pardner.exceptions import UnsupportedRequestException +from requests import HTTPError + +from pardner.exceptions import TumblrAPIError, UnsupportedRequestException from pardner.services import BaseTransferService from pardner.verticals import SocialPostingVertical, Vertical +from pardner.verticals.sub_verticals import AssociatedMediaSubVertical class TumblrTransferService(BaseTransferService): @@ -63,6 +67,166 @@ def fetch_token( include_client_id: bool = True, ) -> dict[str, Any]: return super().fetch_token(code, authorization_response, include_client_id) + + def _validate_user_info_response(self, data: Any) -> dict[str, Any]: + """ + Validates the shape of a ``user/info`` JSON payload. + + :param data: the parsed JSON dict returned by ``user/info``. + :returns: the ``response`` sub-dict if validation passes. + :raises: :class:`TumblrAPIError` if required keys are absent or have the wrong type. + """ + if not isinstance(data, dict): + raise TumblrAPIError( + 'user/info response is not a JSON object', raw_response=data + ) + response = data.get('response') + if not isinstance(response, dict): + raise TumblrAPIError( + "user/info response is missing a 'response' object", raw_response=data + ) + user = response.get('user') + if not isinstance(user, dict): + raise TumblrAPIError( + 'user/info response.user is missing or not an object', raw_response=data + ) + blogs = user.get('blogs') + if not isinstance(blogs, list): + raise TumblrAPIError( + 'user/info response.user.blogs is missing or not a list', + raw_response=data, + ) + return response + + def _validate_dashboard_response(self, data: Any) -> list[Any]: + """ + Validates the shape of a ``user/dashboard`` JSON payload. + + :param data: the parsed JSON dict returned by ``user/dashboard``. + :returns: the ``posts`` list if validation passes. + :raises: :class:`TumblrAPIError` if required keys are absent or have the wrong type. + """ + if not isinstance(data, dict): + raise TumblrAPIError( + 'user/dashboard response is not a JSON object', raw_response=data + ) + response = data.get('response') + if not isinstance(response, dict): + raise TumblrAPIError( + "user/dashboard response is missing a 'response' object", + raw_response=data, + ) + posts = response.get('posts') + if not isinstance(posts, list): + raise TumblrAPIError( + 'user/dashboard response.posts is missing or not a list', + raw_response=data, + ) + return posts + + def _map_tumblr_state( + self, state: str | None + ) -> Literal['public', 'private', 'draft', 'restricted'] | None: + """Maps a Tumblr post ``state`` string to the vertical status literal.""" + mapping: dict[str, Literal['public', 'private', 'draft', 'restricted']] = { + 'published': 'public', + 'private': 'private', + 'draft': 'draft', + 'queued': 'restricted', + 'queue': 'restricted', + } + return mapping.get(state or '', None) + + def parse_social_posting_vertical( + self, raw_data: Any + ) -> SocialPostingVertical | None: + """ + Given a single raw Tumblr post dict, creates a + :class:`SocialPostingVertical` model object, if possible. + + Maps stable NPF fields: ``id``, ``post_url``, ``timestamp``, + ``summary``, ``note_count``, ``tags``, ``state``, NPF content + text blocks, and media blocks. + + :param raw_data: a single post dict from the Tumblr dashboard response. + :returns: :class:`SocialPostingVertical` or ``None`` if ``raw_data`` + is not a dict. + """ + if not isinstance(raw_data, dict): + return None + + # identity / location + service_object_id: str | None = str(raw_data['id']) if 'id' in raw_data else None + post_url: str | None = raw_data.get('post_url') or raw_data.get('short_url') + + created_at: datetime | None = None + timestamp = raw_data.get('timestamp') + if isinstance(timestamp, (int, float)): + created_at = datetime.fromtimestamp(timestamp, tz=timezone.utc).replace( + tzinfo=None + ) + + blog = raw_data.get('blog') or {} + creator_user_id: str | None = ( + blog.get('uuid') or blog.get('name') or raw_data.get('blog_name') + ) + data_owner_id: str = self.primary_blog_id or '' + + interaction_count: int | None = raw_data.get('note_count') + keywords: list[str] = raw_data.get('tags') or [] + + status = self._map_tumblr_state(raw_data.get('state')) + + # NPF content blocks + content_blocks: list[dict[str, Any]] = raw_data.get('content') or [] + text_parts: list[str] = [] + associated_media: list[AssociatedMediaSubVertical] = [] + + for block in content_blocks: + if not isinstance(block, dict): + continue + block_type = block.get('type', '') + if block_type == 'text': + text_value = block.get('text') + if isinstance(text_value, str) and text_value: + text_parts.append(text_value) + elif block_type in ('image', 'video', 'audio'): + media_type_map: dict[str, Literal['audio', 'image', 'video']] = { + 'image': 'image', + 'video': 'video', + 'audio': 'audio', + } + media_type = media_type_map.get(block_type) + media_entries: list[dict[str, Any]] = block.get('media') or [] + if isinstance(media_entries, list): + for entry in media_entries: + if isinstance(entry, dict) and entry.get('url'): + associated_media.append( + AssociatedMediaSubVertical( + media_type=media_type, url=entry['url'] + ) + ) + elif isinstance(media_entries, dict) and media_entries.get('url'): + associated_media.append( + AssociatedMediaSubVertical( + media_type=media_type, url=media_entries['url'] + ) + ) + + return SocialPostingVertical( + creator_user_id=creator_user_id, + data_owner_id=data_owner_id, + service_object_id=service_object_id, + service=self._service_name, + created_at=created_at, + url=post_url, + abstract=raw_data.get('summary'), + interaction_count=interaction_count, + keywords=keywords, + status=status, + text='\n\n'.join(text_parts) if text_parts else None, + associated_media=associated_media, + ) def fetch_primary_blog_id(self) -> str: """ @@ -75,13 +239,27 @@ def fetch_primary_blog_id(self) -> str: :returns: the primary blog id. - :raises: :class:`ValueError`: if the primary blog ID could not be extracted from - the response. + :raises: :class:`TumblrAPIError`: if the Tumblr API returns a non-OK response + or a malformed success payload. + :raises: :class:`ValueError`: if the response is structurally valid but no + primary blog with a UUID was found. """ if self.primary_blog_id: return self.primary_blog_id - user_info = self._get_resource_from_path('user/info').json().get('response', {}) - for blog_info in user_info.get('user', {}).get('blogs', []): + + try: + raw_response = self._get_resource_from_path('user/info') + except HTTPError as exc: + raise TumblrAPIError( + 'Failed to fetch user/info', + status_code=exc.response.status_code if exc.response is not None else None, + raw_response=exc.response, + ) from exc + + user_info_data = raw_response.json() + response = self._validate_user_info_response(user_info_data) + + for blog_info in response['user']['blogs']: if ( isinstance(blog_info, dict) and blog_info.get('primary') @@ -95,7 +273,7 @@ def fetch_primary_blog_id(self) -> str: 'Failed to fetch primary blog id. Either manually set the _primary_blog_id ' 'attribute or verify all the client credentials ' 'and permissions are correct. Response from Tumblr: ' - f'{json.dumps(user_info, indent=2)}' + f'{json.dumps(user_info_data, indent=2)}' ) def fetch_social_posting_vertical( @@ -103,9 +281,9 @@ def fetch_social_posting_vertical( request_params: dict[str, Any] = {}, count: int = 20, text_only: bool = True, - ) -> list[Any]: + ) -> tuple[list[SocialPostingVertical | None], list[Any]]: """ - Fetches posts from Tumblr feed for user account whose token was + Fetches posts from Tumblr feed for the user account whose token was obtained using the Tumblr API. :param count: number of posts to request. @@ -115,21 +293,34 @@ def fetch_social_posting_vertical( to the endpoint. Depending on the parameters passed, this could override ``count`` and ``text_only``. - :returns: a list of dictionary objects with information for the posts in a feed. + :returns: a two-element tuple: the first element is a list of + :class:`SocialPostingVertical` objects (``None`` for posts that could + not be parsed); the second element is the raw list of post dicts as + returned by the API. - :raises: :class:`UnsupportedRequestException` if the request is unable to be - made. + :raises: :class:`UnsupportedRequestException` if ``count`` exceeds 20. + :raises: :class:`TumblrAPIError` if the API returns a non-OK response + or a malformed success payload. """ - if count <= 20: - params: dict[str, Any] = {'limit': count, 'npf': True, **request_params} - if text_only: - params['type'] = 'text' - dashboard_response = self._get_resource_from_path( - 'user/dashboard', - params, + if count > 20: + raise UnsupportedRequestException( + self._service_name, + 'can only make a request for at most 20 posts at a time.', ) - return list(dashboard_response.json().get('response').get('posts')) - raise UnsupportedRequestException( - self._service_name, - 'can only make a request for at most 20 posts at a time.', - ) + + params: dict[str, Any] = {'limit': count, 'npf': True, **request_params} + if text_only: + params['type'] = 'text' + + try: + dashboard_response = self._get_resource_from_path('user/dashboard', params) + except HTTPError as exc: + raise TumblrAPIError( + 'Failed to fetch user/dashboard', + status_code=exc.response.status_code if exc.response is not None else None, + raw_response=exc.response, + ) from exc + + raw_posts = self._validate_dashboard_response(dashboard_response.json()) + parsed = [self.parse_social_posting_vertical(post) for post in raw_posts] + return parsed, raw_posts diff --git a/tests/test_transfer_services/test_tumblr.py b/tests/test_transfer_services/test_tumblr.py index b3e27e6..7705e7b 100644 --- a/tests/test_transfer_services/test_tumblr.py +++ b/tests/test_transfer_services/test_tumblr.py @@ -1,9 +1,14 @@ +import datetime + import pytest -from requests import HTTPError +from pydantic import AnyHttpUrl -from pardner.exceptions import UnsupportedRequestException +from pardner.exceptions import TumblrAPIError, UnsupportedRequestException from pardner.verticals import SocialPostingVertical -from tests.test_transfer_services.conftest import mock_oauth2_session_get +from tests.test_transfer_services.conftest import ( + dump_and_filter_model_objs, + mock_oauth2_session_get, +) @pytest.mark.parametrize( @@ -19,27 +24,84 @@ def test_fetch_social_posting_vertical_raises_exception(tumblr_transfer_service) tumblr_transfer_service.fetch_social_posting_vertical(count=21) -def test_fetch_social_posting_vertical_raises_http_exception( +def test_fetch_social_posting_vertical_raises_tumblr_api_error_on_http_error( tumblr_transfer_service, mock_oauth2_session_get_bad_response ): - with pytest.raises(HTTPError): + with pytest.raises(TumblrAPIError): tumblr_transfer_service.fetch_social_posting_vertical() def test_fetch_social_posting_vertical(mocker, tumblr_transfer_service): + sample_post = { + 'id': 123456, + 'post_url': 'https://example.tumblr.com/post/123456', + 'timestamp': 1700000000, + 'summary': 'A test post', + 'note_count': 42, + 'tags': ['python', 'testing'], + 'state': 'published', + 'blog': {'uuid': 'author-uuid', 'name': 'author-blog'}, + 'content': [ + {'type': 'text', 'text': 'Hello world'}, + { + 'type': 'image', + 'media': [{'url': 'https://example.com/image.jpg', 'width': 800}], + }, + ], + } response_object = mocker.MagicMock() - response_object.json.return_value = {'response': {'posts': ['sample', 'posts']}} + response_object.json.return_value = {'response': {'posts': [sample_post]}} oauth2_session_get = mock_oauth2_session_get(mocker, response_object) - assert tumblr_transfer_service.fetch_social_posting_vertical() == [ - 'sample', - 'posts', - ] + parsed, raw_posts = tumblr_transfer_service.fetch_social_posting_vertical() + assert ( oauth2_session_get.call_args.args[1] == 'https://api.tumblr.com/v2/user/dashboard' ) + assert raw_posts == [sample_post] + assert len(parsed) == 1 + + model_obj_dump = dump_and_filter_model_objs(parsed)[0] + + assert model_obj_dump == { + 'service': 'Tumblr', + 'vertical_name': 'social_posting', + 'service_object_id': '123456', + 'creator_user_id': 'author-uuid', + 'data_owner_id': '', + 'created_at': datetime.datetime(2023, 11, 14, 22, 13, 20), + 'url': AnyHttpUrl('https://example.tumblr.com/post/123456'), + 'abstract': 'A test post', + 'interaction_count': 42, + 'keywords': ['python', 'testing'], + 'shared_content': [], + 'status': 'public', + 'text': 'Hello world', + 'title': None, + 'associated_media': [ + {'media_type': 'image', 'url': AnyHttpUrl('https://example.com/image.jpg')} + ], + } + + +@pytest.mark.parametrize( + 'bad_payload', + [ + {}, + {'response': {'posts': None}}, + ], +) +def test_fetch_social_posting_vertical_raises_tumblr_api_error_on_bad_payload( + mocker, tumblr_transfer_service, bad_payload +): + response_object = mocker.MagicMock() + response_object.json.return_value = bad_payload + mock_oauth2_session_get(mocker, response_object) + + with pytest.raises(TumblrAPIError): + tumblr_transfer_service.fetch_social_posting_vertical() def test_fetch_primary_blog_id_already_set(tumblr_transfer_service): @@ -67,8 +129,90 @@ def test_fetch_primary_blog_id_success(mocker, tumblr_transfer_service): assert oauth2_session_get.call_args.args[1] == 'https://api.tumblr.com/v2/user/info' -def test_fetch_primary_blog_id_raises_exception( +def test_fetch_primary_blog_id_raises_tumblr_api_error_on_http_error( tumblr_transfer_service, mock_oauth2_session_get_bad_response ): - with pytest.raises(HTTPError): + with pytest.raises(TumblrAPIError): tumblr_transfer_service.fetch_primary_blog_id() + + +@pytest.mark.parametrize( + 'bad_payload', + [ + {}, + {'response': {'user': {'blogs': None}}}, + ], +) +def test_fetch_primary_blog_id_raises_tumblr_api_error_on_bad_payload( + mocker, tumblr_transfer_service, bad_payload +): + response_object = mocker.MagicMock() + response_object.json.return_value = bad_payload + mock_oauth2_session_get(mocker, response_object) + + with pytest.raises(TumblrAPIError): + tumblr_transfer_service.fetch_primary_blog_id() + + +def test_parse_social_posting_vertical_returns_none_for_non_dict( + tumblr_transfer_service, +): + assert tumblr_transfer_service.parse_social_posting_vertical(None) is None + assert tumblr_transfer_service.parse_social_posting_vertical('string') is None + assert tumblr_transfer_service.parse_social_posting_vertical([]) is None + + +def test_parse_social_posting_vertical_minimal(tumblr_transfer_service): + result = tumblr_transfer_service.parse_social_posting_vertical({}) + assert isinstance(result, SocialPostingVertical) + assert result.service_object_id is None + assert result.text is None + assert result.associated_media == [] + assert result.keywords == [] + + +def test_parse_social_posting_vertical_maps_state(tumblr_transfer_service): + for tumblr_state, expected_status in [ + ('published', 'public'), + ('private', 'private'), + ('draft', 'draft'), + ('queued', 'restricted'), + ('queue', 'restricted'), + ]: + result = tumblr_transfer_service.parse_social_posting_vertical( + {'state': tumblr_state} + ) + assert result.status == expected_status, f'Failed for state={tumblr_state}' + + +def test_parse_social_posting_vertical_text_blocks(tumblr_transfer_service): + raw = { + 'content': [ + {'type': 'text', 'text': 'First paragraph'}, + {'type': 'text', 'text': 'Second paragraph'}, + ] + } + result = tumblr_transfer_service.parse_social_posting_vertical(raw) + assert result.text == 'First paragraph\n\nSecond paragraph' + + +def test_parse_social_posting_vertical_media_blocks(tumblr_transfer_service): + raw = { + 'content': [ + { + 'type': 'image', + 'media': [ + {'url': 'https://example.com/img1.jpg'}, + {'url': 'https://example.com/img2.jpg'}, + ], + }, + { + 'type': 'video', + 'media': {'url': 'https://example.com/vid.mp4'}, + }, + ] + } + result = tumblr_transfer_service.parse_social_posting_vertical(raw) + assert len(result.associated_media) == 3 + assert result.associated_media[0].media_type == 'image' + assert result.associated_media[2].media_type == 'video' diff --git a/uv.lock b/uv.lock index 08fa835..ea0cabe 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,6 @@ version = 1 -revision = 2 -requires-python = ">=3.11" +revision = 3 +requires-python = ">=3.11, <3.14" [[package]] name = "annotated-types"