diff --git a/.env.test b/.env.test index be8f3f30..e5e1edb4 100644 --- a/.env.test +++ b/.env.test @@ -1,5 +1,5 @@ PORT=8888 -LOG_LEVEL=DEBUG +LOG_LEVEL=INFO FASTAPI_RELOAD=false LOCALSTORE_USE_CWD=true diff --git a/src/nonebot_plugin_parser/download/__init__.py b/src/nonebot_plugin_parser/download/__init__.py index 2ef0872c..461f8f46 100644 --- a/src/nonebot_plugin_parser/download/__init__.py +++ b/src/nonebot_plugin_parser/download/__init__.py @@ -2,6 +2,7 @@ from pathlib import Path from functools import partial from contextlib import contextmanager +from urllib.parse import urljoin import aiofiles from httpx import HTTPError, AsyncClient @@ -17,7 +18,7 @@ from ..utils import merge_av, safe_unlink, generate_file_name from ..config import pconfig from ..constants import COMMON_HEADER, DOWNLOAD_TIMEOUT -from ..exception import DownloadException, ZeroSizeException, SizeLimitException +from ..exception import IgnoreException, DownloadException class StreamDownloader: @@ -41,7 +42,7 @@ def rich_progress(self, desc: str, total: int | None = None): yield partial(progress.update, task_id) @auto_task - async def streamd( + async def download_file( self, url: str, *, @@ -67,11 +68,11 @@ async def streamd( if content_length == 0: logger.warning(f"媒体 url: {url}, 大小为 0, 取消下载") - raise ZeroSizeException + raise IgnoreException if (file_size := content_length / 1024 / 1024) > pconfig.max_size: - logger.warning(f"媒体 url: {url} 大小 {file_size:.2f} MB 超过 {pconfig.max_size} MB, 取消下载") - raise SizeLimitException + logger.warning(f"媒体 url: {url} 大小 {file_size:.2f} MB, 超过 {pconfig.max_size} MB, 取消下载") + raise IgnoreException with self.rich_progress(file_name, content_length) as update_progress: async with aiofiles.open(file_path, "wb") as file: @@ -97,7 +98,7 @@ async def download_video( """download video file by url with stream""" if video_name is None: video_name = generate_file_name(url, ".mp4") - return await self.streamd(url, file_name=video_name, ext_headers=ext_headers, chunk_size=1024 * 1024) + return await self.download_file(url, file_name=video_name, ext_headers=ext_headers, chunk_size=1024 * 1024) @auto_task async def download_audio( @@ -110,7 +111,7 @@ async def download_audio( """download audio file by url with stream""" if audio_name is None: audio_name = generate_file_name(url, ".mp3") - return await self.streamd(url, file_name=audio_name, ext_headers=ext_headers) + return await self.download_file(url, file_name=audio_name, ext_headers=ext_headers) @auto_task async def download_img( @@ -123,7 +124,7 @@ async def download_img( """download image file by url with stream""" if img_name is None: img_name = generate_file_name(url, ".jpg") - return await self.streamd(url, file_name=img_name, ext_headers=ext_headers) + return await self.download_file(url, file_name=img_name, ext_headers=ext_headers) @auto_task async def download_av_and_merge( @@ -155,6 +156,54 @@ async def download_imgs_without_raise( ) return [p for p in paths_or_errs if isinstance(p, Path)] + @auto_task + async def download_m3u8( + self, + m3u8_url: str, + *, + video_name: str | None = None, + ext_headers: dict[str, str] | None = None, + ) -> Path: + """download m3u8 file by url with stream""" + if video_name is None: + video_name = generate_file_name(m3u8_url, ".mp4") + + video_path = pconfig.cache_dir / video_name + + try: + async with aiofiles.open(video_path, "wb") as f: + total_size = 0 + with self.rich_progress(desc=video_name) as update_progress: + for url in await self._get_m3u8_slices(m3u8_url): + async with self.client.stream("GET", url, headers=ext_headers) as response: + async for chunk in response.aiter_bytes(chunk_size=1024 * 1024): + await f.write(chunk) + total_size += len(chunk) + update_progress(advance=len(chunk), total=total_size) + except HTTPError: + await safe_unlink(video_path) + logger.exception("m3u8 视频下载失败") + raise DownloadException("m3u8 视频下载失败") + + return video_path + + async def _get_m3u8_slices(self, m3u8_url: str): + """获取 m3u8 分片""" + + response = await self.client.get(m3u8_url) + response.raise_for_status() + + slices_text = response.text + slices: list[str] = [] + + for line in slices_text.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + slices.append(urljoin(m3u8_url, line)) + + return slices + DOWNLOADER: StreamDownloader = StreamDownloader() diff --git a/src/nonebot_plugin_parser/download/ytdlp.py b/src/nonebot_plugin_parser/download/ytdlp.py index 0d747ef9..5f92d974 100644 --- a/src/nonebot_plugin_parser/download/ytdlp.py +++ b/src/nonebot_plugin_parser/download/ytdlp.py @@ -5,11 +5,12 @@ import yt_dlp from msgspec import Struct, convert +from nonebot import logger from .task import auto_task from ..utils import LimitedSizeDict, generate_file_name from ..config import pconfig -from ..exception import ParseException, DurationLimitException +from ..exception import ParseException, IgnoreException class VideoInfo(Struct): @@ -36,8 +37,6 @@ def author_name(self) -> str: class YtdlpDownloader: - """YtdlpDownloader class""" - def __init__(self): if TYPE_CHECKING: from yt_dlp import _Params @@ -55,15 +54,8 @@ def __init__(self): self._extract_base_opts["proxy"] = proxy async def extract_video_info(self, url: str, cookiefile: Path | None = None) -> VideoInfo: - """get video info by url - - Args: - url (str): url address - cookiefile (Path | None ): cookie file path. Defaults to None. + """Get video info by yt-dlp""" - Returns: - dict[str, str]: video info - """ video_info = self._video_info_mapping.get(url, None) if video_info: return video_info @@ -83,19 +75,13 @@ async def extract_video_info(self, url: str, cookiefile: Path | None = None) -> @auto_task async def download_video(self, url: str, cookiefile: Path | None = None) -> Path: - """download video by yt-dlp + """Download video by yt-dlp""" - Args: - url (str): url address - cookiefile (Path | None): cookie file path. Defaults to None. - - Returns: - Path: video file path - """ video_info = await self.extract_video_info(url, cookiefile) duration = video_info.duration if duration > pconfig.duration_maximum: - raise DurationLimitException + logger.warning(f"视频时长 {duration} 秒, 超过 {pconfig.duration_maximum} 秒, 取消下载") + raise IgnoreException video_path = pconfig.cache_dir / generate_file_name(url, ".mp4") if video_path.exists(): @@ -127,15 +113,8 @@ async def download_video(self, url: str, cookiefile: Path | None = None) -> Path @auto_task async def download_audio(self, url: str, cookiefile: Path | None = None) -> Path: - """download audio by yt-dlp - - Args: - url (str): url address - cookiefile (Path | None): cookie file path. Defaults to None. + """Download audio by yt-dlp""" - Returns: - Path: audio file path - """ file_name = generate_file_name(url) audio_path = pconfig.cache_dir / f"{file_name}.flac" if audio_path.exists(): diff --git a/src/nonebot_plugin_parser/exception.py b/src/nonebot_plugin_parser/exception.py index 31a9549f..be89d498 100644 --- a/src/nonebot_plugin_parser/exception.py +++ b/src/nonebot_plugin_parser/exception.py @@ -1,17 +1,9 @@ class ParseException(Exception): - """异常基类""" - def __init__(self, message: str): super().__init__(message) self.message = message -class TipException(ParseException): - """提示异常""" - - pass - - class DownloadException(ParseException): """下载异常""" @@ -19,28 +11,12 @@ def __init__(self, message: str | None = None): super().__init__(message or "媒体下载失败") -class DownloadLimitException(DownloadException): - """下载超过限制异常""" - - pass - - -class SizeLimitException(DownloadLimitException): - """下载大小超过限制异常""" - - def __init__(self): - super().__init__("媒体大小超过配置限制,取消下载") - - -class DurationLimitException(DownloadLimitException): - """下载时长超过限制异常""" - - def __init__(self): - super().__init__("媒体时长超过配置限制,取消下载") +class IgnoreException(ParseException): + """可忽略异常""" + def __init__(self, message: str | None = None): + super().__init__(message or "可忽略异常") -class ZeroSizeException(DownloadException): - """下载大小为 0 异常""" - def __init__(self): - super().__init__("媒体大小为 0, 取消下载") +class TipException(ParseException): + """提示异常""" diff --git a/src/nonebot_plugin_parser/matchers/__init__.py b/src/nonebot_plugin_parser/matchers/__init__.py index f3c371ab..e4d019f3 100644 --- a/src/nonebot_plugin_parser/matchers/__init__.py +++ b/src/nonebot_plugin_parser/matchers/__init__.py @@ -11,7 +11,6 @@ from ..helper import UniHelper, UniMessage from ..parsers import BaseParser, ParseResult, BilibiliParser from ..renders import get_renderer -from ..download import DOWNLOADER def _get_enabled_parser_classes() -> list[type[BaseParser]]: @@ -104,7 +103,7 @@ async def _(message: Message = CommandArg()): if not audio_url: await UniMessage("未找到可下载的音频").finish() - audio_path = await DOWNLOADER.download_audio( + audio_path = await parser.downloader.download_audio( audio_url, audio_name=f"{bvid}-{page_idx}.mp3", ext_headers=parser.headers ) await UniMessage(UniHelper.record_seg(audio_path)).send() diff --git a/src/nonebot_plugin_parser/matchers/filter.py b/src/nonebot_plugin_parser/matchers/filter.py index 5abb6f7f..43b51330 100644 --- a/src/nonebot_plugin_parser/matchers/filter.py +++ b/src/nonebot_plugin_parser/matchers/filter.py @@ -37,7 +37,6 @@ def get_group_key(session: Session) -> str: return f"{session.scope}_{session.scene_path}" -# Rule def is_enabled(session: Session = UniSession()) -> bool: """判断当前会话是否在关闭解析的名单中""" if session.scene.is_private: diff --git a/src/nonebot_plugin_parser/matchers/rule.py b/src/nonebot_plugin_parser/matchers/rule.py index b6b3f761..d5730a2f 100644 --- a/src/nonebot_plugin_parser/matchers/rule.py +++ b/src/nonebot_plugin_parser/matchers/rule.py @@ -72,14 +72,7 @@ def _searched(state: T_State) -> SearchResult | None: def _extract_url(hyper: Hyper) -> str | None: - """处理 JSON 类型的消息段,提取 URL - - Args: - json_seg: JSON 类型的消息段 - - Returns: - Optional[str]: 提取的 URL, 如果提取失败则返回 None - """ + """处理 JSON 类型的消息段,提取 URL""" data = hyper.data raw_str: str | None = data.get("raw") diff --git a/src/nonebot_plugin_parser/parsers/acfun/__init__.py b/src/nonebot_plugin_parser/parsers/acfun/__init__.py index 20b3feac..98094304 100644 --- a/src/nonebot_plugin_parser/parsers/acfun/__init__.py +++ b/src/nonebot_plugin_parser/parsers/acfun/__init__.py @@ -1,30 +1,22 @@ import re -import asyncio from typing import ClassVar -from pathlib import Path -from urllib.parse import urljoin -import aiofiles -from httpx import HTTPError, AsyncClient +from httpx import AsyncClient from nonebot import logger from ..base import ( - DOWNLOADER, COMMON_TIMEOUT, - DOWNLOAD_TIMEOUT, Platform, BaseParser, PlatformEnum, ParseException, - DownloadException, - DurationLimitException, + IgnoreException, handle, pconfig, ) class AcfunParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.ACFUN, display_name="猴山") def __init__(self): @@ -39,12 +31,13 @@ async def _parse(self, searched: re.Match[str]): video_info = await self.parse_video_info(url) author = self.create_author(video_info.name, video_info.avatar_url) - video_task = asyncio.create_task( - self.download_video( - video_info.m3u8_url, - f"acfun_{acid}.mp4", - video_info.duration, - ) + if (duration := video_info.duration) >= pconfig.duration_maximum: + logger.warning(f"视频时长 {duration} 超过最大限制 {pconfig.duration_maximum}") + raise IgnoreException + + video_task = self.downloader.download_m3u8( + video_info.m3u8_url, + video_name=f"acfun_{acid}.mp4", ) video_content = self.create_video_content(video_task, cover_url=video_info.coverUrl) @@ -58,14 +51,7 @@ async def _parse(self, searched: re.Match[str]): ) async def parse_video_info(self, url: str): - """解析acfun链接获取详细信息 - - Args: - url (str): 链接 - - Returns: - video.VideoInfo - """ + """解析 acfun 视频信息""" from . import video # 拼接查询参数 @@ -84,68 +70,3 @@ async def parse_video_info(self, url: str): raw = re.sub(r'\\{1,4}"', '"', raw) raw = raw.replace('"{', "{").replace('}"', "}") return video.decoder.decode(raw) - - async def download_video(self, m3u8_url: str, file_name: str, duration: int) -> Path: - """下载acfun视频 - - Args: - m3u8_url (str): m3u8链接 - file_name (str): 文件名 - duration (int): 视频时长(秒) - - Returns: - Path: 下载的mp4文件 - """ - - if duration >= pconfig.duration_maximum: - raise DurationLimitException - - video_file = pconfig.cache_dir / file_name - if video_file.exists(): - return video_file - - m3u8_slices = await self._get_m3u8_slices(m3u8_url) - - try: - async with ( - aiofiles.open(video_file, "wb") as f, - AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client, - ): - total_size = 0 - with DOWNLOADER.rich_progress(desc=file_name) as update_progress: - for url in m3u8_slices: - async with client.stream("GET", url) as response: - async for chunk in response.aiter_bytes(chunk_size=1024 * 1024): - await f.write(chunk) - total_size += len(chunk) - update_progress(advance=len(chunk), total=total_size) - except HTTPError: - video_file.unlink(missing_ok=True) - logger.exception("视频下载失败") - raise DownloadException("视频下载失败") - return video_file - - async def _get_m3u8_slices(self, m3u8_url: str): - """拼接m3u8链接 - - Args: - m3u8_url (str): m3u8链接 - m3u8_slice (str): m3u8切片 - - Returns: - list[str]: 视频链接 - """ - async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client: - response = await client.get(m3u8_url) - response.raise_for_status() - - slices_text = response.text - - slices: list[str] = [] - for line in slices_text.splitlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - slices.append(urljoin(m3u8_url, line)) - - return slices diff --git a/src/nonebot_plugin_parser/parsers/acfun/video.py b/src/nonebot_plugin_parser/parsers/acfun/video.py index 6cd8d64f..89509899 100644 --- a/src/nonebot_plugin_parser/parsers/acfun/video.py +++ b/src/nonebot_plugin_parser/parsers/acfun/video.py @@ -52,7 +52,7 @@ def avatar_url(self) -> str: @property def text(self) -> str | None: - return f"简介: {self.description}" if self.description else None + return self.description @property def timestamp(self) -> int: diff --git a/src/nonebot_plugin_parser/parsers/base.py b/src/nonebot_plugin_parser/parsers/base.py index 24542779..78004688 100644 --- a/src/nonebot_plugin_parser/parsers/base.py +++ b/src/nonebot_plugin_parser/parsers/base.py @@ -1,5 +1,3 @@ -"""Parser 基类定义""" - from re import Match, Pattern, compile from abc import ABC from typing import TYPE_CHECKING, Any, TypeVar, ClassVar, cast @@ -10,16 +8,13 @@ from .data import Platform, ParseResult, ParseResultKwargs from ..config import pconfig as pconfig -from ..download import DOWNLOADER as DOWNLOADER +from ..download import DOWNLOADER from ..constants import IOS_HEADER, COMMON_HEADER, ANDROID_HEADER, COMMON_TIMEOUT from ..constants import DOWNLOAD_TIMEOUT as DOWNLOAD_TIMEOUT from ..constants import PlatformEnum as PlatformEnum -from ..exception import TipException as TipException -from ..exception import ParseException as ParseException +from ..exception import ParseException +from ..exception import IgnoreException as IgnoreException from ..exception import DownloadException as DownloadException -from ..exception import ZeroSizeException as ZeroSizeException -from ..exception import SizeLimitException as SizeLimitException -from ..exception import DurationLimitException as DurationLimitException T = TypeVar("T", bound="BaseParser") HandlerFunc = Callable[[T, Match[str]], Coroutine[Any, Any, ParseResult]] @@ -45,18 +40,12 @@ def decorator(func: HandlerFunc[T]) -> HandlerFunc[T]: class BaseParser: - """所有平台 Parser 的抽象基类 - - 子类必须实现: - - platform: 平台信息(包含名称和显示名称) - """ + platform: ClassVar[Platform] + """ 平台信息(包含名称和显示名称) """ _registry: ClassVar[list[type["BaseParser"]]] = [] """ 存储所有已注册的 Parser 类 """ - platform: ClassVar[Platform] - """ 平台信息(包含名称和显示名称) """ - if TYPE_CHECKING: _key_patterns: ClassVar[KeyPatterns] _handlers: ClassVar[dict[str, HandlerFunc]] @@ -95,18 +84,6 @@ def get_all_subclass(cls) -> list[type["BaseParser"]]: return cls._registry async def parse(self, keyword: str, searched: Match[str]) -> ParseResult: - """解析 URL 提取信息 - - Args: - keyword: 关键词 - searched: 正则表达式匹配对象,由平台对应的模式匹配得到 - - Returns: - ParseResult: 解析结果 - - Raises: - ParseException: 解析失败时抛出 - """ return await self._handlers[keyword](self, searched) async def parse_with_redirect( @@ -259,3 +236,7 @@ def create_graphics_content( image_task = DOWNLOADER.download_img(image_url, ext_headers=self.headers) return GraphicsContent(image_task, text, alt) + + @property + def downloader(self): + return DOWNLOADER diff --git a/src/nonebot_plugin_parser/parsers/bilibili/__init__.py b/src/nonebot_plugin_parser/parsers/bilibili/__init__.py index 9f5c89ac..7db06324 100644 --- a/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +++ b/src/nonebot_plugin_parser/parsers/bilibili/__init__.py @@ -12,12 +12,11 @@ from bilibili_api.login_v2 import QrCodeLogin, QrCodeLoginEvents from ..base import ( - DOWNLOADER, BaseParser, PlatformEnum, ParseException, + IgnoreException, DownloadException, - DurationLimitException, handle, pconfig, ) @@ -32,7 +31,6 @@ class BilibiliParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.BILIBILI, display_name="哔哩哔哩") def __init__(self): @@ -102,22 +100,13 @@ async def parse_video( avid: int | None = None, page_num: int = 1, ): - """解析视频信息 - - Args: - bvid (str | None): bvid - avid (int | None): avid - page_num (int): 页码 - """ + """解析视频信息""" from .video import VideoInfo, AIConclusion video = await self._get_video(bvid=bvid, avid=avid) - # 转换为 msgspec struct video_info = convert(await video.get_info(), VideoInfo) - # 获取简介 - text = f"简介: {video_info.desc}" if video_info.desc else None - # up + # UP author = self.create_author(video_info.owner.name, video_info.owner.face) # 处理分 p page_info = video_info.extract_info_with_page(page_num) @@ -141,13 +130,14 @@ async def download_video(): return output_path v_url, a_url = await self.extract_download_urls(video=video, page_index=page_info.index) if page_info.duration > pconfig.duration_maximum: - raise DurationLimitException + logger.warning(f"视频时长 {page_info.duration} 秒, 超过 {pconfig.duration_maximum} 秒, 取消下载") + raise IgnoreException if a_url is not None: - return await DOWNLOADER.download_av_and_merge( + return await self.downloader.download_av_and_merge( v_url, a_url, output_path=output_path, ext_headers=self.headers ) else: - return await DOWNLOADER.streamd(v_url, file_name=output_path.name, ext_headers=self.headers) + return await self.downloader.download_file(v_url, file_name=output_path.name, ext_headers=self.headers) video_task = asyncio.create_task(download_video()) video_content = self.create_video_content( @@ -160,18 +150,14 @@ async def download_video(): url=url, title=page_info.title, timestamp=page_info.timestamp, - text=text, + text=video_info.desc, author=author, contents=[video_content], extra={"info": ai_summary}, ) async def parse_dynamic_or_opus(self, dynamic_id: int): - """解析动态和图文信息 - - Args: - url (str): 动态链接 - """ + """解析动态或图文""" from bilibili_api.dynamic import Dynamic from .dynamic import DynamicData @@ -186,7 +172,7 @@ async def parse_dynamic_or_opus(self, dynamic_id: int): # 下载图片 contents: list[MediaContent] = [] for image_url in dynamic_info.image_urls: - img_task = DOWNLOADER.download_img(image_url, ext_headers=self.headers) + img_task = self.downloader.download_img(image_url, ext_headers=self.headers) contents.append(ImageContent(img_task)) return self.result( @@ -198,23 +184,12 @@ async def parse_dynamic_or_opus(self, dynamic_id: int): ) async def parse_opus_by_id(self, opus_id: int): - """解析图文动态信息 - - Args: - opus_id (int): 图文动态 id - """ + """解析图文动态(opus id)""" opus = Opus(opus_id, await self.credential) return await self._parse_bilibli_api_opus(opus) async def _parse_bilibli_api_opus(self, bili_opus: Opus): - """解析图文动态信息 - - Args: - opus_id (int): 图文动态 id - - Returns: - ParseResult: 解析结果 - """ + """解析图文动态(Opus)""" from .opus import OpusItem, TextNode, ImageNode @@ -246,14 +221,7 @@ async def _parse_bilibli_api_opus(self, bili_opus: Opus): ) async def parse_live(self, room_id: int): - """解析直播信息 - - Args: - room_id (int): 直播 id - - Returns: - ParseResult: 解析结果 - """ + """解析直播""" from bilibili_api.live import LiveRoom from .live import RoomData @@ -265,12 +233,12 @@ async def parse_live(self, room_id: int): contents: list[MediaContent] = [] # 下载封面 if cover := room_data.cover: - cover_task = DOWNLOADER.download_img(cover, ext_headers=self.headers) + cover_task = self.downloader.download_img(cover, ext_headers=self.headers) contents.append(ImageContent(cover_task)) # 下载关键帧 if keyframe := room_data.keyframe: - keyframe_task = DOWNLOADER.download_img(keyframe, ext_headers=self.headers) + keyframe_task = self.downloader.download_img(keyframe, ext_headers=self.headers) contents.append(ImageContent(keyframe_task)) author = self.create_author(room_data.name, room_data.avatar) @@ -285,14 +253,7 @@ async def parse_live(self, room_id: int): ) async def parse_favlist(self, fav_id: int): - """解析收藏夹信息 - - Args: - fav_id (int): 收藏夹 id - - Returns: - list[GraphicsContent]: 图文内容列表 - """ + """解析收藏夹""" from bilibili_api.favorite_list import get_video_favorite_list_content from .favlist import FavData @@ -313,12 +274,7 @@ async def parse_favlist(self, fav_id: int): ) async def _get_video(self, *, bvid: str | None = None, avid: int | None = None) -> Video: - """解析视频信息 - - Args: - bvid (str | None): bvid - avid (int | None): avid - """ + """解析视频""" if avid: return Video(aid=avid, credential=await self.credential) elif bvid: @@ -334,13 +290,7 @@ async def extract_download_urls( avid: int | None = None, page_index: int = 0, ) -> tuple[str, str | None]: - """解析视频下载链接 - - Args: - bvid (str | None): bvid - avid (int | None): avid - page_index (int): 页索引 = 页码 - 1 - """ + """解析视频下载链接""" from bilibili_api.video import ( AudioStreamDownloadURL, diff --git a/src/nonebot_plugin_parser/parsers/bilibili/favlist.py b/src/nonebot_plugin_parser/parsers/bilibili/favlist.py index 823deca7..51a28650 100644 --- a/src/nonebot_plugin_parser/parsers/bilibili/favlist.py +++ b/src/nonebot_plugin_parser/parsers/bilibili/favlist.py @@ -59,7 +59,7 @@ def cover(self) -> str: @property def desc(self) -> str: - return f"简介: {self.info.intro}" + return self.info.intro @property def timestamp(self) -> int: diff --git a/src/nonebot_plugin_parser/parsers/cookie.py b/src/nonebot_plugin_parser/parsers/cookie.py index 6af79fcb..db96ece1 100644 --- a/src/nonebot_plugin_parser/parsers/cookie.py +++ b/src/nonebot_plugin_parser/parsers/cookie.py @@ -3,13 +3,7 @@ def save_cookies_with_netscape(cookies_str: str, file_path: Path, domain: str): - """以 netscape 格式保存 cookies - - Args: - cookies_str: cookies 字符串 - file_path: 保存的文件路径 - domain: 域名 - """ + """以 netscape 格式保存 cookies""" # 创建 MozillaCookieJar 对象 cj = cookiejar.MozillaCookieJar(file_path) @@ -43,14 +37,7 @@ def save_cookies_with_netscape(cookies_str: str, file_path: Path, domain: str): def ck2dict(cookies_str: str) -> dict[str, str]: - """将 cookies 字符串转换为字典 - - Args: - cookies_str: cookies 字符串 - - Returns: - dict[str, str]: 字典 - """ + """将 cookies 字符串转换为字典""" res = {} for cookie in cookies_str.split(";"): name, value = cookie.strip().split("=", 1) diff --git a/src/nonebot_plugin_parser/parsers/douyin/__init__.py b/src/nonebot_plugin_parser/parsers/douyin/__init__.py index e451c369..b9dd15b3 100644 --- a/src/nonebot_plugin_parser/parsers/douyin/__init__.py +++ b/src/nonebot_plugin_parser/parsers/douyin/__init__.py @@ -15,7 +15,6 @@ class DouyinParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.DOUYIN, display_name="抖音") # https://v.douyin.com/_2ljF4AmKL8 @@ -31,19 +30,13 @@ async def _parse_short_link(self, searched: re.Match[str]): @handle("iesdouyin", r"iesdouyin\.com/share/(?Pslides|video|note)/(?P\d+)") @handle("m.douyin", r"m\.douyin\.com/share/(?Pslides|video|note)/(?P\d+)") # https://jingxuan.douyin.com/m/video/7574300896016862490?app=yumme&utm_source=copy_link - @handle( - "jingxuan.douyin", - r"jingxuan\.douyin.com/m/(?Pslides|video|note)/(?P\d+)", - ) + @handle("jingxuan.douyin", r"jingxuan\.douyin.com/m/(?Pslides|video|note)/(?P\d+)") async def _parse_douyin(self, searched: re.Match[str]): ty, vid = searched.group("ty"), searched.group("vid") if ty == "slides": return await self.parse_slides(vid) - for url in ( - self._build_m_douyin_url(ty, vid), - self._build_iesdouyin_url(ty, vid), - ): + for url in (self._build_m_douyin_url(ty, vid), self._build_iesdouyin_url(ty, vid)): try: return await self.parse_video(url) except ParseException as e: diff --git a/src/nonebot_plugin_parser/parsers/kuaishou/__init__.py b/src/nonebot_plugin_parser/parsers/kuaishou/__init__.py index bd700f4b..f5b4cbfb 100644 --- a/src/nonebot_plugin_parser/parsers/kuaishou/__init__.py +++ b/src/nonebot_plugin_parser/parsers/kuaishou/__init__.py @@ -8,9 +8,6 @@ class KuaiShouParser(BaseParser): - """快手解析器""" - - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.KUAISHOU, display_name="快手") def __init__(self): diff --git a/src/nonebot_plugin_parser/parsers/nga.py b/src/nonebot_plugin_parser/parsers/nga.py index af247382..b419c104 100644 --- a/src/nonebot_plugin_parser/parsers/nga.py +++ b/src/nonebot_plugin_parser/parsers/nga.py @@ -13,7 +13,6 @@ class NGAParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.NGA, display_name="NGA") def __init__(self): diff --git a/src/nonebot_plugin_parser/parsers/tiktok.py b/src/nonebot_plugin_parser/parsers/tiktok.py index af0dd363..37e53f67 100644 --- a/src/nonebot_plugin_parser/parsers/tiktok.py +++ b/src/nonebot_plugin_parser/parsers/tiktok.py @@ -7,7 +7,6 @@ class TikTokParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.TIKTOK, display_name="TikTok") @handle("tiktok", r"(www|vt|vm)\.tiktok\.com/[A-Za-z0-9._?%&+\-=/#@]*") diff --git a/src/nonebot_plugin_parser/parsers/twitter.py b/src/nonebot_plugin_parser/parsers/twitter.py index 38be854f..3390e471 100644 --- a/src/nonebot_plugin_parser/parsers/twitter.py +++ b/src/nonebot_plugin_parser/parsers/twitter.py @@ -10,7 +10,6 @@ class TwitterParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.TWITTER, display_name="小蓝鸟") async def _req_xdown_api(self, url: str) -> dict[str, Any]: @@ -44,14 +43,7 @@ async def _parse(self, searched: re.Match[str]) -> ParseResult: return self.parse_twitter_html(html_content) def parse_twitter_html(self, html_content: str) -> ParseResult: - """解析 Twitter HTML 内容 - - Args: - html_content (str): Twitter HTML 内容 - - Returns: - ParseResult: 解析结果 - """ + """解析 Twitter HTML 内容""" from bs4 import Tag, BeautifulSoup soup = BeautifulSoup(html_content, "html.parser") diff --git a/src/nonebot_plugin_parser/parsers/weibo/__init__.py b/src/nonebot_plugin_parser/parsers/weibo/__init__.py index a32caae5..cb2731f5 100644 --- a/src/nonebot_plugin_parser/parsers/weibo/__init__.py +++ b/src/nonebot_plugin_parser/parsers/weibo/__init__.py @@ -12,7 +12,6 @@ class WeiBoParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.WEIBO, display_name="微博") def __init__(self): diff --git a/src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py b/src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py index 6961e874..f6b2c73e 100644 --- a/src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py +++ b/src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py @@ -9,7 +9,6 @@ class XiaoHongShuParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.XIAOHONGSHU, display_name="小红书") def __init__(self): diff --git a/src/nonebot_plugin_parser/parsers/youtube/__init__.py b/src/nonebot_plugin_parser/parsers/youtube/__init__.py index 7ee52f0e..a2c4f674 100644 --- a/src/nonebot_plugin_parser/parsers/youtube/__init__.py +++ b/src/nonebot_plugin_parser/parsers/youtube/__init__.py @@ -9,7 +9,6 @@ class YouTubeParser(BaseParser): - # 平台信息 platform: ClassVar[Platform] = Platform(name=PlatformEnum.YOUTUBE, display_name="油管") def __init__(self): @@ -53,15 +52,7 @@ async def parse_video(self, url: str): ) async def parse_audio(self, url: str): - """解析 YouTube URL 并标记为音频下载 - - Args: - url: YouTube 链接 - - Returns: - ParseResult: 解析结果(音频内容) - - """ + """解析 YouTube URL 音频""" video_info = await YTDLP_DOWNLOADER.extract_video_info(url, self.cookies_file) author = await self._fetch_author_info(video_info.channel_id) diff --git a/src/nonebot_plugin_parser/renders/base.py b/src/nonebot_plugin_parser/renders/base.py index d0235751..b284b44f 100644 --- a/src/nonebot_plugin_parser/renders/base.py +++ b/src/nonebot_plugin_parser/renders/base.py @@ -16,7 +16,7 @@ DynamicContent, GraphicsContent, ) -from ..exception import DownloadException, ZeroSizeException, DownloadLimitException +from ..exception import IgnoreException, DownloadException class BaseRenderer(ABC): @@ -27,27 +27,13 @@ class BaseRenderer(ABC): @abstractmethod async def render_messages(self, result: ParseResult) -> AsyncGenerator[UniMessage[Any], None]: - """消息生成器 - - Args: - result (ParseResult): 解析结果 - - Returns: - AsyncGenerator[UniMessage[Any], None]: 消息生成器 - """ + """渲染解析结果""" if False: yield raise NotImplementedError async def render_contents(self, result: ParseResult) -> AsyncGenerator[UniMessage[Any], None]: - """渲染媒体内容消息 - - Args: - result (ParseResult): 解析结果 - - Returns: - AsyncGenerator[UniMessage[Any], None]: 消息生成器 - """ + """渲染媒体内容""" failed_count = 0 forwardable_segs: list[ForwardNodeInner] = [] dynamic_segs: list[ForwardNodeInner] = [] @@ -55,10 +41,7 @@ async def render_contents(self, result: ParseResult) -> AsyncGenerator[UniMessag for cont in chain(result.contents, result.repost.contents if result.repost else ()): try: path = await cont.get_path() - # 继续渲染其他内容, 类似之前 gather (return_exceptions=True) 的处理 - except (DownloadLimitException, ZeroSizeException): - # 预期异常,不抛出 - # yield UniMessage(e.message) + except IgnoreException: continue except DownloadException: failed_count += 1 @@ -109,23 +92,11 @@ class ImageRenderer(BaseRenderer): @abstractmethod async def render_image(self, result: ParseResult) -> bytes: - """渲染图片 - - Args: - result (ParseResult): 解析结果 - - Returns: - bytes: 图片字节 png 格式 - """ + """渲染图片""" raise NotImplementedError @override async def render_messages(self, result: ParseResult): - """渲染消息 - - Args: - result (ParseResult): 解析结果 - """ image_seg = await self.cache_or_render_image(result) msg = UniMessage(image_seg) @@ -139,14 +110,7 @@ async def render_messages(self, result: ParseResult): yield message async def cache_or_render_image(self, result: ParseResult): - """获取缓存图片 - - Args: - result (ParseResult): 解析结果 - - Returns: - Image: 图片 Segment - """ + """获取缓存图片""" if result.render_image is None: image_raw = await self.render_image(result) image_path = await self.save_img(image_raw) @@ -158,14 +122,7 @@ async def cache_or_render_image(self, result: ParseResult): @classmethod async def save_img(cls, raw: bytes) -> Path: - """保存图片 - - Args: - raw (bytes): 图片字节 - - Returns: - Path: 图片路径 - """ + """保存图片""" import aiofiles file_name = f"{uuid.uuid4().hex}.png" diff --git a/src/nonebot_plugin_parser/renders/common.py b/src/nonebot_plugin_parser/renders/common.py index 7fc2667f..bfd5ab7d 100644 --- a/src/nonebot_plugin_parser/renders/common.py +++ b/src/nonebot_plugin_parser/renders/common.py @@ -95,7 +95,7 @@ class RenderContext: class CommonRenderer(ImageRenderer): - """统一渲染器 - 单次遍历渲染""" + """统一渲染器""" # 布局常量 PADDING = 25 @@ -124,15 +124,13 @@ class CommonRenderer(ImageRenderer): REPOST_BORDER_COLOR: ClassVar[Color] = (230, 230, 230) # 资源路径 - _RESOURCES = "resources" - _EMOJIS = "emojis" - RESOURCES_DIR: ClassVar[Path] = Path(__file__).parent / _RESOURCES - DEFAULT_FONT_PATH: ClassVar[Path] = RESOURCES_DIR / "HYSongYunLangHeiW-1.ttf" - DEFAULT_VIDEO_BUTTON_PATH: ClassVar[Path] = RESOURCES_DIR / "media_button.png" + RESOURCES_DIR: ClassVar[Path] = Path(__file__).parent / "resources" + DEFAULT_FONT_PATH: ClassVar[Path] = RESOURCES_DIR / "HYSongYunLangHeiW.ttf" + DEFAULT_VIDEO_BUTTON_PATH: ClassVar[Path] = RESOURCES_DIR / "play.png" EMOJI_SOURCE: ClassVar[EmojiCDNSource] = EmojiCDNSource( base_url=pconfig.emoji_cdn, style=pconfig.emoji_style, - cache_dir=pconfig.cache_dir / _EMOJIS, + cache_dir=pconfig.cache_dir / "emojis", show_progress=True, ) @@ -152,9 +150,9 @@ def _load_fonts(cls): @classmethod def _load_video_button(cls): with Image.open(cls.DEFAULT_VIDEO_BUTTON_PATH) as img: - cls.video_button_image: PILImage = img.convert("RGBA") + cls.video_button_image: PILImage = img.convert("RGBA").resize((100, 100)) alpha = cls.video_button_image.split()[-1] - alpha = alpha.point(lambda x: int(x * 0.3)) + alpha = alpha.point(lambda x: int(x * 0.5)) cls.video_button_image.putalpha(alpha) @classmethod @@ -205,13 +203,15 @@ async def _create_card_image(self, result: ParseResult, not_repost: bool = True) # 裁剪到实际高度 final_height = ctx.y_pos + self.PADDING + logger.debug(f"估算高度: {estimated_height}, 画布高度: {ctx.image.height}, 最终高度: {final_height}") return ctx.image.crop((0, 0, card_width, final_height)) def _ensure_canvas_height(self, ctx: RenderContext, needed_height: int) -> None: """确保画布有足够高度,不够则扩展""" if ctx.y_pos + needed_height + self.PADDING > ctx.image.height: - # 扩展画布(每次扩展 1.5 倍或至少满足需求) - new_height = max(int(ctx.image.height * 1.5), ctx.y_pos + needed_height + self.PADDING * 2) + # 扩展画布(每次扩展 1.6 倍或至少满足需求) + new_height = max(int(ctx.image.height * 1.6), ctx.y_pos + needed_height + self.PADDING * 2) + logger.debug(f"扩展画布高度: {ctx.image.height} -> {new_height}") bg_color = self.BG_COLOR if ctx.not_repost else self.REPOST_BG_COLOR new_image = Image.new("RGB", (ctx.card_width, new_height), bg_color) new_image.paste(ctx.image, (0, 0)) @@ -222,20 +222,23 @@ def _estimate_height(self, result: ParseResult, content_width: int) -> int: """估算画布高度""" height = self.PADDING * 2 # 上下边距 - # 头部 + # 头部(头像 + 名称 + 时间) if result.author: height += self.AVATAR_SIZE + self.SECTION_SPACING - # 标题(估算) + # 标题(估算,考虑换行符) if result.title: - # 考虑换行符 lines = result.title.count("\n") + 1 + (len(result.title) * self.fontset.title.cjk_width // content_width) height += lines * self.fontset.title.line_height + self.SECTION_SPACING # 封面或图片 height += self.MAX_COVER_HEIGHT + self.SECTION_SPACING - # 文本(估算:考虑换行符) + # 图文内容 + if graphics_contents := result.graphics_contents: + height += len(graphics_contents) * self.MAX_COVER_HEIGHT + self.SECTION_SPACING + + # 正文(估算,考虑换行符) if result.text: lines = result.text.count("\n") + 1 + (len(result.text) * self.fontset.text.cjk_width // content_width) height += lines * self.fontset.text.line_height + self.SECTION_SPACING @@ -250,7 +253,7 @@ def _estimate_height(self, result: ParseResult, content_width: int) -> int: height += self.REPOST_PADDING * 2 + self.SECTION_SPACING # 增加安全余量,防止估算不足(最后会裁剪) - return height + 300 + return height async def _render_header(self, ctx: RenderContext) -> None: """渲染头部(头像 + 名称 + 时间)""" @@ -351,7 +354,7 @@ async def _render_title(self, ctx: RenderContext) -> None: async def _render_cover_or_images(self, ctx: RenderContext) -> None: """渲染封面或图片网格""" - # 尝试封面 + cover_path = await ctx.result.cover_path if cover_path and cover_path.exists(): cover = self._load_cover(cover_path, ctx.content_width) @@ -359,7 +362,7 @@ async def _render_cover_or_images(self, ctx: RenderContext) -> None: x_pos = self.PADDING ctx.image.paste(cover, (x_pos, ctx.y_pos)) # 视频按钮 - btn_size = 128 + btn_size = 100 btn_x = x_pos + (cover.width - btn_size) // 2 btn_y = ctx.y_pos + (cover.height - btn_size) // 2 ctx.image.paste(self.video_button_image, (btn_x, btn_y), self.video_button_image) diff --git a/src/nonebot_plugin_parser/renders/default.py b/src/nonebot_plugin_parser/renders/default.py index 57b8ba54..7c430bfb 100644 --- a/src/nonebot_plugin_parser/renders/default.py +++ b/src/nonebot_plugin_parser/renders/default.py @@ -11,15 +11,6 @@ class DefaultRenderer(BaseRenderer): @override async def render_messages(self, result: ParseResult): - """渲染内容消息 - - Args: - result (ParseResult): 解析结果 - - Returns: - Generator[UniMessage[Any], None, None]: 消息生成器 - """ - texts = [ result.header, result.text, diff --git a/src/nonebot_plugin_parser/renders/htmlrender.py b/src/nonebot_plugin_parser/renders/htmlrender.py index bcd1519c..5812b396 100644 --- a/src/nonebot_plugin_parser/renders/htmlrender.py +++ b/src/nonebot_plugin_parser/renders/htmlrender.py @@ -15,14 +15,6 @@ class HtmlRenderer(ImageRenderer): @override async def render_image(self, result: ParseResult) -> bytes: - """使用 HTML 绘制通用社交媒体帖子卡片 - - Args: - result: 解析结果 - - Returns: - PNG 图片的字节数据 - """ # 准备模板数据 template_data = await self._resolve_parse_result(result) diff --git a/src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW-1.ttf b/src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW.ttf similarity index 100% rename from src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW-1.ttf rename to src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW.ttf diff --git a/src/nonebot_plugin_parser/renders/resources/media_button.png b/src/nonebot_plugin_parser/renders/resources/media_button.png deleted file mode 100644 index b57717f6..00000000 Binary files a/src/nonebot_plugin_parser/renders/resources/media_button.png and /dev/null differ diff --git a/src/nonebot_plugin_parser/renders/resources/play.png b/src/nonebot_plugin_parser/renders/resources/play.png new file mode 100644 index 00000000..cf18b8f5 Binary files /dev/null and b/src/nonebot_plugin_parser/renders/resources/play.png differ diff --git a/src/nonebot_plugin_parser/utils.py b/src/nonebot_plugin_parser/utils.py index 75247fdb..83eb29bc 100644 --- a/src/nonebot_plugin_parser/utils.py +++ b/src/nonebot_plugin_parser/utils.py @@ -7,6 +7,7 @@ from collections import OrderedDict from urllib.parse import urlparse +from anyio import Path as AnyioPath from nonebot import logger K = TypeVar("K") @@ -29,28 +30,17 @@ def __setitem__(self, key: K, value: V): def keep_zh_en_num(text: str) -> str: - """ - 保留字符串中的中英文和数字 - """ + """保留字符串中的中英文和数字""" return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9\-_]", "", text.replace(" ", "_")) async def safe_unlink(path: Path): - """ - 安全删除文件 - """ - try: - await asyncio.to_thread(path.unlink, missing_ok=True) - except Exception: - logger.warning(f"删除 {path} 失败") + """安全删除文件""" + await AnyioPath(path).unlink(missing_ok=True) async def exec_ffmpeg_cmd(cmd: list[str]) -> None: - """执行命令 - - Args: - cmd (list[str]): 命令序列 - """ + """执行 ffmpeg 命令""" try: process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE @@ -71,13 +61,7 @@ async def merge_av( a_path: Path, output_path: Path, ) -> None: - """合并视频和音频 - - Args: - v_path (Path): 视频文件路径 - a_path (Path): 音频文件路径 - output_path (Path): 输出文件路径 - """ + """合并视频和音频""" logger.info(f"Merging {v_path.name} and {a_path.name} to {output_path.name}") cmd = [ @@ -107,13 +91,7 @@ async def merge_av_h264( a_path: Path, output_path: Path, ) -> None: - """合并视频和音频,并使用 H.264 编码 - - Args: - v_path (Path): 视频文件路径 - a_path (Path): 音频文件路径 - output_path (Path): 输出文件路径 - """ + """合并视频和音频,并使用 H.264 编码""" logger.info(f"Merging {v_path.name} and {a_path.name} to {output_path.name} with H.264") # 修改命令以确保视频使用 H.264 编码 @@ -147,14 +125,7 @@ async def merge_av_h264( async def encode_video_to_h264(video_path: Path) -> Path: - """将视频重新编码到 h264 - - Args: - video_path (Path): 视频路径 - - Returns: - Path: 编码后的视频路径 - """ + """将视频重新编码到 h264""" output_path = video_path.with_name(f"{video_path.stem}_h264{video_path.suffix}") if output_path.exists(): return output_path @@ -178,24 +149,13 @@ async def encode_video_to_h264(video_path: Path) -> Path: def fmt_size(file_path: Path) -> str: - """格式化文件大小 - - Args: - video_path (Path): 视频路径 - """ + """格式化文件大小""" return f"大小: {file_path.stat().st_size / 1024 / 1024:.2f} MB" def generate_file_name(url: str, default_suffix: str = "") -> str: - """根据 url 生成文件名 - - Args: - url (str): url - default_suffix (str): 默认后缀. Defaults to "". + """根据 url 生成文件名""" - Returns: - str: 文件名 - """ # 根据 url 获取文件后缀 path = Path(urlparse(url).path) suffix = path.suffix if path.suffix else default_suffix @@ -206,12 +166,7 @@ def generate_file_name(url: str, default_suffix: str = "") -> str: def write_json_to_data(data: dict[str, Any] | str, file_name: str): - """将数据写入数据目录 - - Args: - data (dict[str, Any] | str): 数据 - file_name (str): 文件名 - """ + """将数据写入数据目录""" import json from .config import pconfig diff --git a/tests/parsers/test_bilibili_need_ck.py b/tests/parsers/test_bilibili_need_ck.py index 2dc8f7fa..e6515fa0 100644 --- a/tests/parsers/test_bilibili_need_ck.py +++ b/tests/parsers/test_bilibili_need_ck.py @@ -49,23 +49,20 @@ async def test_video(): async def test_max_size_video(): from nonebot_plugin_parser.parsers import BilibiliParser from nonebot_plugin_parser.download import DOWNLOADER - from nonebot_plugin_parser.exception import ( - SizeLimitException, - DurationLimitException, - ) + from nonebot_plugin_parser.exception import IgnoreException parser = BilibiliParser() bvid = "BV1du4y1E7Nh" audio_url = None try: _, audio_url = await parser.extract_download_urls(bvid=bvid) - except DurationLimitException: + except IgnoreException: pass assert audio_url is not None try: await DOWNLOADER.download_audio(audio_url, ext_headers=parser.headers) - except SizeLimitException: + except IgnoreException: pass diff --git a/typos.toml b/typos.toml index 0e893dfa..641c5f68 100644 --- a/typos.toml +++ b/typos.toml @@ -1,5 +1,4 @@ [default.extend-words] -streamd = "streamd" detecter = "detecter" Detecter = "Detecter" mapp = "mapp"