From 27d7f6fbd90f8f8ed9cb44fee7a80e39ebdca3ef Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 05:11:56 +0000 Subject: [PATCH 1/2] Initial plan From b619c1e08ceb9901c842971bb403a1bbb59a3aa2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 05:45:22 +0000 Subject: [PATCH 2/2] Update requirements and tests for bundled fixtures Co-authored-by: eamonma <16643012+eamonma@users.noreply.github.com> --- tests/wikitext/test_wikitext.py | 62 +++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/tests/wikitext/test_wikitext.py b/tests/wikitext/test_wikitext.py index 12fe1cf..7092ede 100644 --- a/tests/wikitext/test_wikitext.py +++ b/tests/wikitext/test_wikitext.py @@ -4,9 +4,12 @@ from wikigraph import wikitext from wikigraph.experiments import versus_wtp -with open('data/raw/reduced/hundredk.xml', 'r') as reader: +with open('tests/wikitext/anarchism.txt', 'r') as reader: sample_wikitext = reader.read() +# Extract the first page's wikitext content before the closing tag. +sample_wikitext = sample_wikitext.partition("")[0] + collected_links = wikitext.collect_links(sample_wikitext) @@ -16,20 +19,20 @@ def test_collect_links(): """ # random links that should be in it links = [ - "2012–13 UEFA Europa League", - "Propargyl alcohol", - "Category:American social commentators", - "Second French Empire", - "File:Banu Qurayza.png", - "The Great Gatsby (2013 film)", - "Image:Justus Sustermans - Portrait of Galileo Galilei (Uffizi).jpg", - "Image:Methane-2D-stereo.svg", - "Methane", - "Paraffin (disambiguation)", - "Tooth enamel", - "Chelsea, London", - "Category:April", - "microphone" + "political philosophy", + "Political movement", + "State (polity)", + "libertarian Marxism", + "libertarian socialism", + "history of anarchism", + "Spanish Civil War", + "File:WilhelmWeitling.jpg", + "File:Bakunin.png", + "Anarchist federalism", + "Anarchy Archives", + "Category:Anarchism", + "Category:Political ideologies", + "Category:Socialism" ] assert all(link in collected_links for link in links) @@ -76,22 +79,35 @@ def test_parse_wikilink(): assert parsed_links == expected_links -with open('data/raw/reduced/animation.xml', 'r') as reader: - animation_page = reader.read() +anarchism_page = ( + "\n" + " Anarchism\n" + " \n" + " 2017-06-05T04:18:18Z\n" + f" {sample_wikitext}\n" + " t7eab8s09kwusxrq46aqc8o2o8tvme1\n" + " \n" + " " +) def test_char_count(): - extracted = wikitext.extract_content(animation_page) + extracted = wikitext.extract_content(anarchism_page) - assert wikitext.char_count(animation_page) == 69345 - assert "[[Category:Film and video technology]]" in extracted - assert extracted[len(extracted) - - len("[[Category:Film and video technology]]"):len(extracted)] == "[[Category:Film and video technology]]" + assert wikitext.char_count(anarchism_page) == 96486 + assert "[[Category:Socialism]]" in extracted + assert extracted.endswith("[[Category:Socialism]]") def test_last_revision(): from datetime import datetime + timestamp = "2017-06-05T04:18:18Z" + # Mirror last_revision's reference date and seconds component behavior. + delta_seconds = (datetime.fromisoformat("2021-01-01T00:00:01+00:00").replace(tzinfo=None) - + datetime.fromisoformat( + timestamp.replace("Z", "+00:00")).replace(tzinfo=None)).total_seconds() + expected = int(delta_seconds) % (24 * 60 * 60) assert wikitext.last_revision( - animation_page) == 45100 + anarchism_page) == expected if __name__ == '__main__':