diff --git a/Pipfile b/Pipfile index 096fb9b3..26e71409 100644 --- a/Pipfile +++ b/Pipfile @@ -1,15 +1,22 @@ [[source]] + url = "https://pypi.python.org/simple" verify_ssl = true name = "pypi" + [packages] + "72eb2aa" = {file = "https://github.com/Rapptz/discord.py/archive/rewrite.zip"} aiodns = "*" aiohttp = "<2.3.0,>=2.0.0" websockets = ">=4.0,<5.0" +"html2text" = "*" +"bs4" = "*" + [dev-packages] + "flake8" = "*" "flake8-bugbear" = "*" "flake8-bandit" = "*" @@ -20,5 +27,7 @@ websockets = ">=4.0,<5.0" safety = "*" dodgy = "*" + [requires] + python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock index 4e5214bb..532787e9 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "d797e580ddcddc99bf058109ab0306ad584c2902752a3d4076ba713fdc580fb7" + "sha256": "f533ef0cdbbca35169bb4f36c3c6425dcde5cafbd7d3a08646e1897d59050754" }, "pipfile-spec": 6, "requires": { @@ -53,6 +53,38 @@ ], "version": "==2.0.1" }, + "asyncio": { + "hashes": [ + "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41", + "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de", + "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c", + "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d" + ], + "markers": "python_version == '3.3'", + "version": "==3.4.3" + }, + "attrs": { + "hashes": [ + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9", + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450" + ], + "version": "==17.4.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76", + "sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11", + "sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89" + ], + "version": "==4.6.0" + }, + "bs4": { + "hashes": [ + "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" + ], + "index": "pypi", + "version": "==0.0.1" + }, "chardet": { "hashes": [ "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", @@ -60,6 +92,14 @@ ], "version": "==3.0.4" }, + "html2text": { + "hashes": [ + "sha256:490db40fe5b2cd79c461cf56be4d39eb8ca68191ae41ba3ba79f6cb05b7dd662", + "sha256:627514fb30e7566b37be6900df26c2c78a030cc9e6211bda604d8181233bcdd4" + ], + "index": "pypi", + "version": "==2018.1.9" + }, "idna": { "hashes": [ "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f", @@ -67,6 +107,12 @@ ], "version": "==2.6" }, + "idna-ssl": { + "hashes": [ + "sha256:1293f030bc608e9aa9cdee72aa93c1521bbb9c7698068c61c9ada6772162b979" + ], + "version": "==1.0.1" + }, "multidict": { "hashes": [ "sha256:0462372fc74e4c061335118a4a5992b9a618d6c584b028ef03cf3e9b88a960e2", @@ -120,6 +166,20 @@ ], "version": "==2.3.0" }, + "six": { + "hashes": [ + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" + ], + "version": "==1.11.0" + }, + "trollius": { + "hashes": [ + "sha256:b35b9a9079c3c06a04cedf27dd833982b5d58401722da63d2c7c6384063a6924" + ], + "markers": "python_version <= '3.2'", + "version": "==2.2" + }, "websockets": { "hashes": [ "sha256:0c31bc832d529dc7583d324eb6c836a4f362032a1902723c112cf57883488d8c", @@ -197,6 +257,13 @@ ], "version": "==6.7" }, + "configparser": { + "hashes": [ + "sha256:5308b47021bc2340965c371f0f058cc6971a04502638d4244225c49d80db273a" + ], + "markers": "python_version < '3.2'", + "version": "==3.5.0" + }, "dodgy": { "hashes": [ "sha256:65e13cf878d7aff129f1461c13cb5fd1bb6dfe66bb5327e09379c3877763280c" @@ -211,6 +278,16 @@ ], "version": "==0.2.1" }, + "enum34": { + "hashes": [ + "sha256:2d81cbbe0e73112bdfe6ef8576f2238f2ba27dd0d55752a776c41d38b7da2850", + "sha256:644837f692e5f550741432dd3f223bbb9852018674981b1664e5dc339387588a", + "sha256:6bd0f6ad48ec2aa117d3d141940d484deccda84d4fcd884f5c3d93c23ecd8c79", + "sha256:8ad8c4783bf61ded74527bffb48ed9b54166685e4230386a9ed9b1279e2df5b1" + ], + "markers": "python_version < '3.4'", + "version": "==1.1.6" + }, "flake8": { "hashes": [ "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0", @@ -282,10 +359,10 @@ }, "gitpython": { "hashes": [ - "sha256:ad61bc25deadb535b047684d06f3654c001d9415e1971e51c9c20f5b510076e9", - "sha256:b8367c432de995dc330b5b146c5bfdc0926b8496e100fda6692134e00c0dcdc5" + "sha256:05069e26177c650b3cb945dd543a7ef7ca449f8db5b73038b465105673c1ef61", + "sha256:c47cc31af6e88979c57a33962cbc30a7c25508d74a1b3a19ec5aa7ed64b03129" ], - "version": "==2.1.8" + "version": "==2.1.9" }, "idna": { "hashes": [ diff --git a/bot/cogs/snakes.py b/bot/cogs/snakes.py index c9ed8042..1558cd9d 100644 --- a/bot/cogs/snakes.py +++ b/bot/cogs/snakes.py @@ -1,36 +1,180 @@ # coding=utf-8 +import asyncio import logging +import random +import re from typing import Any, Dict -from discord.ext.commands import AutoShardedBot, Context, command +import aiohttp +import bs4 +import discord +import html2text +from discord.ext import commands +from discord.ext.commands import Context + +from .. import hardcoded log = logging.getLogger(__name__) +WKPD = 'https://en.wikipedia.org' +API = WKPD + '/w/api.php?format=json&redirects=1&action=' +rSENTENCE = re.compile(r'^.+?\. ') +rBRACK = re.compile(r'[[(].+?[\])]') +rMDLINK = re.compile(r'(\[.*?\])\((.+?)\s".*?"\)') + + +class BadSnake(ValueError): + pass + class Snakes: """ Snake-related commands """ - def __init__(self, bot: AutoShardedBot): + def __init__(self, bot: commands.AutoShardedBot): self.bot = bot + self.session = aiohttp.ClientSession(loop=bot.loop) # the provided session says no host is reachable + self.h2md = html2text.HTML2Text() + self.disamb_query = API + ( + 'query' + '&titles={}' + '&prop=categories' + '&cllimit=max' + f"&clcategories={'|'.join(hardcoded.categories)}" + ) + self.base_query = API + ( + 'parse' + '&page={}' + '&prop=text|sections' + ) + self.info_query = API + ( + 'query' + '&titles={}' + '&prop=pageimages|categories' + '&pithumbsize=300' + '&cllimit=max' + f"&clcategories={'|'.join(hardcoded.categories)}" + '|Category:Disambiguation_pages|Category:All_disambiguation_pages' + ) - async def get_snek(self, name: str = None) -> Dict[str, Any]: + async def disambiguate(self, ctx: Context, content: str) -> str: """ - Go online and fetch information about a snake + Ask the user to choose between snakes if the name they requested is ambiguous. + If only one snake is present in a disambig page, redirect to it without asking. + + :param ctx: Needed to send the user a dialogue to choose a snake from. + :param page: The disambiguation page in question. + :return: + """ + def check(rxn, usr): + if usr.id != ctx.message.author.id or rxn.message.id != msg.id: + return False + try: + return int(rxn.emoji[0]) <= len(filt) + except ValueError: + return False + soup = bs4.BeautifulSoup(content) + potentials = [ + tag.get('title') for tag in soup.select('li a') + if tag.parent.parent.parent.get('id') != 'toc' + and tag.find_previous(id='See_also') is None + ] + async with self.session.get(self.disamb_query.format('|'.join(potentials))) as resp: + batch = await resp.json() + filt = [i['title'] for i in batch['query']['pages'].values() if 'categories' in i][:9] + if len(filt) > 1: + em = discord.Embed(title='Disambiguation') + em.description = "Oh no, I can't tell which snake you wanted! Help me out by picking one of these:\n" + em.description += ''.join(f'\n{idx}. {title}' for idx, title in enumerate(filt)) + msg = await ctx.send(embed=em) + for i in range(len(filt)): + await msg.add_reaction(f'{i}\u20E3') + rxn, usr = await self.bot.wait_for('reaction_add', timeout=15.0, check=check) + name = filt[int(rxn.emoji[0])] + else: + name = filt[0] + + async with self.session.get(self.base_query.format(name)) as pg_resp, \ + self.session.get(self.info_query.format(name)) as if_resp: # noqa: E127 + data = await pg_resp.json() + info = await if_resp.json() + + return data, info - The information includes the name of the snake, a picture of the snake, and various other pieces of info. - What information you get for the snake is up to you. Be creative! + async def get_rand_name(self, category: str = None) -> str: + """ + Follow wikipedia's Special:RandomInCategory to grab the name of a random snake. + + :param category: Optional, the name of the category to search for a random page in. Omit for random category. + :return: A random snek's name + """ + if category is None: + category = random.choice(hardcoded.categories) + while True: + async with self.session.get(f"{WKPD}/wiki/Special:RandomInCategory/{category}") as resp: + *_, name = resp.url.path.split('/') + if 'Category:' not in name: # Sometimes is a subcategory instead of an article + break + await asyncio.sleep(1) # hmm + return name - If "python" is given as the snake name, you should return information about the programming language, but with - all the information you'd provide for a real snake. Try to have some fun with this! + async def get_snek(self, ctx: Context, name: str = None) -> Dict[str, Any]: + """ + Go online and fetch information about a snake. + The information includes the name of the snake, a picture of the snake if applicable, and some tidbits. + + If "python" is given as the snake name, information about the programming language is provided instead. + + :param ctx: Only required for disambiguating to send the user a reaction-based dialogue :param name: Optional, the name of the snake to get information for - omit for a random snake - :return: A dict containing information on a snake + :return: A dict containing information about the requested snake """ + if name is None: + name = await self.get_rand_name() + + async with self.session.get(self.base_query.format(name)) as pg_resp, \ + self.session.get(self.info_query.format(name)) as if_resp: # noqa: E127 + data = await pg_resp.json() + info = await if_resp.json() + pg_id = str(data['parse']['pageid']) + pg_info = info['query']['pages'][pg_id] + + if 'categories' not in pg_info and pg_id != '23862': # 23862 == page ID of /wiki/Python_(programming_language) + raise BadSnake("This doesn't appear to be a snake!") - @command() - async def get(self, ctx: Context, name: str = None): + cats = pg_info.get('categories', []) + # i[9:] strips out 'Category:' + if any(i['title'][9:] in ('Disambiguation pages', 'All disambiguation pages') for i in cats): + try: + data, info = await self.disambiguate(ctx, data['parse']['text']['*']) + except BadSnake: + raise + pg_info = info['query']['pages'][str(data['parse']['pageid'])] + + soup = bs4.BeautifulSoup(data['parse']['text']['*']) + tidbits = [] + for section in data['parse']['sections']: + if sum(map(len, tidbits)) > 1500: + break + tag = rBRACK.sub('', str(soup.find(id=section['anchor']).find_next('p'))) + try: + tidbit = self.h2md.handle(rSENTENCE.match(tag).group()).replace('\n', ' ') + except AttributeError: + pass + else: + tidbits.append(rMDLINK.sub(lambda m: f'{m[1]}({WKPD}{m[2].replace(" ", "")})', tidbit)) + try: + img_url = pg_info['thumbnail']['source'] + except KeyError: + img_url = None + title = data['parse']['title'] + pg_url = f"{WKPD}/wiki/{title.replace(' ', '_')}" + return {'🐍': (img_url, pg_url, title), 'tidbits': tidbits} + + @commands.command() + async def get(self, ctx: Context, name: str.lower = None): """ Go online and fetch information about a snake @@ -40,8 +184,17 @@ async def get(self, ctx: Context, name: str = None): :param ctx: Context object passed from discord.py :param name: Optional, the name of the snake to get information for - omit for a random snake """ - - # Any additional commands can be placed here. Be creative, but keep it to a reasonable amount! + if name == 'python': + name = 'Python_(programming_language)' + try: + snek = await self.get_snek(ctx, name) + except BadSnake as e: + return await ctx.send(f'`{e}`') + image, page, title = snek['🐍'] + embed = discord.Embed(title=title, url=page, description='\n\n • '.join(snek['tidbits'])) + if image is not None: + embed.set_thumbnail(url=image) + await ctx.send(embed=embed) def setup(bot): diff --git a/bot/hardcoded.py b/bot/hardcoded.py new file mode 100644 index 00000000..b38f1262 --- /dev/null +++ b/bot/hardcoded.py @@ -0,0 +1,42 @@ +#encoding: utf-8 + +categories = [ + f'Category:{s}' for s in ( + 'Acrochordidae', + 'Alethinophidia', + 'Aniliidae', + 'Anomalepidae', + 'Anomochilidae', + 'Boidae', + 'Bolyeriidae', + 'Colubrids', + 'Colubrid_stubs', + 'Crotalinae', + 'Crotalis', + 'Cylindrophiidae', + 'Elapidae', + 'Gerrhopilidae', + 'Homalopsidae', + 'Lamprophiidae', + 'Leptotyphlopidae', + 'Loxocemidae', + 'Mambas', + 'Pareidae', + 'Pythonidae', + 'Snakes', + 'Snake_families', + 'Snake_genera', + 'Snake_stubs', + 'Thamnophis', + 'Tropidophiidae', + 'Typhlopidae', + 'Uropeltidae', + 'Venomous_snakes', + 'Viperidae', + 'Viperinae', + 'Xenodermidae', + 'Xenopeltidae', + 'Xenophidiidae', + 'Xenotyphlopidae', + ) +]