diff --git a/bot/constants.py b/bot/constants.py index 63fc156fef..afbdc6513a 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -465,6 +465,8 @@ class _URLs(_BaseURLs): site_logs_view: str = "https://pythondiscord.com/staff/bot/logs" + rhodium_api: str = "https://rhodium.python-discord.workers.dev/" + URLs = _URLs() @@ -593,6 +595,7 @@ class _Keys(EnvConfig, env_prefix="api_keys_"): github: str = "" site_api: str = "" + rhodium: str = "" Keys = _Keys() diff --git a/bot/exts/filtering/_filters/unique/image.py b/bot/exts/filtering/_filters/unique/image.py new file mode 100644 index 0000000000..a0594ad495 --- /dev/null +++ b/bot/exts/filtering/_filters/unique/image.py @@ -0,0 +1,92 @@ + +import aiohttp + +from bot import instance +from bot.constants import Keys, URLs +from bot.exts.filtering._filter_context import Event, FilterContext +from bot.exts.filtering._filters.filter import UniqueFilter +from bot.log import get_logger + +log = get_logger(__name__) + +# Maximum perceptual hash difference for positive predictions +_THRESHOLD = 4 +# Maximum number of seconds to wait for Rhodium API +_TIMEOUT = 5 + +_KNOWN_IMAGE_HASHES = [ + # A camera-taken image of a tweet attributed to @MrBeast about the purported launch of a crypto casino; + # there is a URL in the image that varies by instance + 219481626328303491, + # An image saying "Activate Code for Bonus!" + 6997610946676476306, + # An image saying "Withdrawal Success!" + -9135984495352994088, + # A collage of four images, the first being a purported tweet from Elon Musk about the opening a crypto casino, + # and the rest of similar character to the previous two + 231962884035511073, + # Text centered on a background of a field and sky, the text saying "I've helped 15+ people earn ... + # in stock market and crypto trading" + 360569449461317633, +] + + +def _is_match(image_hash: int) -> bool: + return any( + int.bit_count(image_hash ^ candidate_hash) <= _THRESHOLD + for candidate_hash in _KNOWN_IMAGE_HASHES + ) + +class RhodiumAPIError(Exception): + """Exception raised when the Rhodium API returns an error.""" + + +async def _get_hash(image_url: str) -> int: + async with instance.http_session.post( + url=URLs.rhodium_api, + headers={"Authorization": f"Bearer {Keys.rhodium}"}, + json={"url": image_url}, + timeout=_TIMEOUT, + ) as response: + if response.status != 200: + contents = await response.text() + + raise RhodiumAPIError(f"Rhodium API returned status code {response.status}: {contents}") + + response_data = await response.json() + return response_data["i64"] + + +class ImageFilter(UniqueFilter): + """Filter messages that contain an image attachment whose perceptual hash matches images associated with scams.""" + + name = "image" + events = (Event.MESSAGE, ) + + async def triggered_on(self, ctx: FilterContext) -> bool: + """Return whether the message has an attached image that is known to be posted by compromised accounts.""" + log.trace("Entering image filter") + for attachment in ctx.attachments: + if ( + attachment.content_type is None + or not attachment.content_type.startswith("image") + or attachment.size > 5e6 # 5mb + ): + continue + + try: + image_hash = await _get_hash(attachment.url) + except aiohttp.ClientError: + log.exception("Unhandled aiohttp exception while getting image hash") + return False + except RhodiumAPIError as e: + log.exception("Rhodium API error: %s", e) + return False + except TimeoutError: + log.exception("Timed out getting image hash") + return False + + if _is_match(image_hash): + return True + + return False