diff --git a/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md b/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md
index 5c256f17ae..41cae1637b 100644
--- a/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md
+++ b/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md
@@ -9,7 +9,7 @@ import CodeBlock from '@theme/CodeBlock';
import LegacyJsCourseAdmonition from '@site/src/components/LegacyJsCourseAdmonition';
import Exercises from '../scraping_basics/_exercises.mdx';
import JsLlmProjectsExercise from '!!raw-loader!roa-loader!./exercises/js_llm_projects.mjs';
-import CnnSportsShortestArticleExercise from '!!raw-loader!roa-loader!./exercises/cnn_sports_shortest_article.mjs';
+import EurozonePopulationExercise from '!!raw-loader!roa-loader!./exercises/eurozone_population.mjs';
@@ -394,19 +394,13 @@ Your output should look something like this:
{JsLlmProjectsExercise.code}
-### Find the shortest CNN article which made it to the Sports homepage
+### Count eurozone population from country pages
-Scrape the [CNN Sports](https://edition.cnn.com/sport) homepage. For each linked article, calculate its length in characters:
+Scrape the [Countries using the euro](https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en) page.
-- Locate the element that holds the main content of the article.
-- Use `.text()` to extract all the content as plain text.
-- Use `.length` to calculate the character count.
-
-Skip pages without text (like those that only have a video). Sort the results and print the URL of the shortest article that made it to the homepage.
-
-At the time of writing, the shortest article on the CNN Sports homepage is [about a donation to the Augusta National Golf Club](https://edition.cnn.com/2024/10/03/sport/masters-donation-hurricane-helene-relief-spt-intl/), which is just 1,642 characters long.
+Locate links for countries in the **Euro area countries** section. Visit each linked country detail page, find the value labeled **Population**, and sum them all to get the total population of all countries using euro as their currency. Print one number, the sum.
Solution
- {CnnSportsShortestArticleExercise.code}
+ {EurozonePopulationExercise.code}
diff --git a/sources/academy/webscraping/scraping_basics_javascript/exercises/cnn_sports_shortest_article.mjs b/sources/academy/webscraping/scraping_basics_javascript/exercises/cnn_sports_shortest_article.mjs
deleted file mode 100644
index c9e0bad89a..0000000000
--- a/sources/academy/webscraping/scraping_basics_javascript/exercises/cnn_sports_shortest_article.mjs
+++ /dev/null
@@ -1,40 +0,0 @@
-import * as cheerio from 'cheerio';
-
-async function download(url) {
- const response = await fetch(url);
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}`);
- }
- const html = await response.text();
- return cheerio.load(html);
-}
-
-const listingUrl = 'https://edition.cnn.com/sport';
-const $ = await download(listingUrl);
-
-const results = await Promise.all(
- $('.layout__main .card').toArray().map(async (element) => {
- const $element = $(element);
- const $link = $element.find('a').first();
- if (!$link.length) {
- return null;
- }
-
- const articleUrl = new URL($link.attr('href'), listingUrl).href;
- const $article = await download(articleUrl);
- const content = $article('.article__content').text().trim();
-
- if (!content) {
- return null;
- }
-
- return { url: articleUrl, length: content.length };
- }),
-);
-
-const nonEmpty = results.filter((item) => item && item.length > 0);
-nonEmpty.sort((a, b) => a.length - b.length);
-
-if (nonEmpty.length > 0) {
- console.log(nonEmpty[0].url);
-}
diff --git a/sources/academy/webscraping/scraping_basics_javascript/exercises/eurozone_population.mjs b/sources/academy/webscraping/scraping_basics_javascript/exercises/eurozone_population.mjs
new file mode 100644
index 0000000000..8ebf980d60
--- /dev/null
+++ b/sources/academy/webscraping/scraping_basics_javascript/exercises/eurozone_population.mjs
@@ -0,0 +1,42 @@
+import * as cheerio from 'cheerio';
+
+async function download(url) {
+ const response = await fetch(url);
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}`);
+ }
+ const html = await response.text();
+ return cheerio.load(html);
+}
+
+function parsePopulation($) {
+ for (const element of $('li').toArray()) {
+ const text = $(element).text();
+ if (text.includes('Population')) {
+ const digits = text
+ .replace('Population:', '')
+ .replaceAll(' ', '');
+ return Number.parseInt(digits, 10);
+ }
+ }
+ throw new Error('Population not found');
+}
+
+const listingUrl = 'https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en';
+const $ = await download(listingUrl);
+
+const $euroCountriesAccordion = $('.ecl-accordion__item').first();
+const $countryLinks = $euroCountriesAccordion.find('li a');
+
+const promises = $countryLinks.toArray().map(async (element) => {
+ const countryUrl = new URL($(element).attr('href'), listingUrl).href;
+ const $country = await download(countryUrl);
+ return parsePopulation($country);
+});
+
+const populations = await Promise.all(promises);
+const totalPopulation = populations
+ .filter((population) => Number.isInteger(population))
+ .reduce((sum, population) => sum + population, 0);
+
+console.log(totalPopulation);
diff --git a/sources/academy/webscraping/scraping_basics_javascript/exercises/test.bats b/sources/academy/webscraping/scraping_basics_javascript/exercises/test.bats
index caa3c9c75f..bb6d8cc0dc 100644
--- a/sources/academy/webscraping/scraping_basics_javascript/exercises/test.bats
+++ b/sources/academy/webscraping/scraping_basics_javascript/exercises/test.bats
@@ -144,10 +144,10 @@ teardown_file() {
[[ "$output" == *' updatedOn: '* ]]
}
-@test "finds the shortest CNN sports article" {
- run node cnn_sports_shortest_article.mjs
+@test "counts total eurozone population" {
+ run node eurozone_population.mjs
- [[ "$output" == 'https://edition.cnn.com/'* ]]
+ [[ "$output" -gt 300000000 ]]
}
@test "scrapes F1 Academy driver details with Crawlee" {
diff --git a/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md b/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md
index e654ee34eb..cb0ddc2b8c 100644
--- a/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md
+++ b/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md
@@ -8,7 +8,7 @@ slug: /scraping-basics-python/scraping-variants
import CodeBlock from '@theme/CodeBlock';
import Exercises from '../scraping_basics/_exercises.mdx';
import PythonJobsDatabaseExercise from '!!raw-loader!roa-loader!./exercises/python_jobs_database.py';
-import CnnSportsShortestArticleExercise from '!!raw-loader!roa-loader!./exercises/cnn_sports_shortest_article.py';
+import EurozonePopulationExercise from '!!raw-loader!roa-loader!./exercises/eurozone_population.py';
**In this lesson, we'll scrape the product detail pages to represent each product variant as a separate item in our dataset.**
@@ -348,19 +348,13 @@ You can find everything you need for working with dates and times in Python's [`
{PythonJobsDatabaseExercise.code}
-### Find the shortest CNN article which made it to the Sports homepage
+### Count eurozone population from country pages
-Scrape the [CNN Sports](https://edition.cnn.com/sport) homepage. For each linked article, calculate its length in characters:
+Scrape the [Countries using the euro](https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en) page.
-- Locate the element that holds the main content of the article.
-- Use [`get_text()`](https://beautiful-soup-4.readthedocs.io/en/latest/index.html#get-text) to extract all the content as plain text.
-- Use `len()` to calculate the character count.
-
-Skip pages without text (like those that only have a video). Sort the results and print the URL of the shortest article that made it to the homepage.
-
-At the time of writing, the shortest article on the CNN Sports homepage is [about a donation to the Augusta National Golf Club](https://edition.cnn.com/2024/10/03/sport/masters-donation-hurricane-helene-relief-spt-intl/), which is just 1,642 characters long.
+Locate links for countries in the **Euro area countries** section. Visit each linked country detail page, find the value labeled **Population**, and sum them all to get the total population of all countries using euro as their currency. Print one number, the sum.
Solution
- {CnnSportsShortestArticleExercise.code}
+ {EurozonePopulationExercise.code}
diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/cnn_sports_shortest_article.py b/sources/academy/webscraping/scraping_basics_python/exercises/cnn_sports_shortest_article.py
deleted file mode 100644
index bf8c03f07b..0000000000
--- a/sources/academy/webscraping/scraping_basics_python/exercises/cnn_sports_shortest_article.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import httpx
-from bs4 import BeautifulSoup
-from urllib.parse import urljoin
-
-
-def download(url: str) -> BeautifulSoup:
- response = httpx.get(url)
- response.raise_for_status()
- return BeautifulSoup(response.text, "html.parser")
-
-
-listing_url = "https://edition.cnn.com/sport"
-listing_soup = download(listing_url)
-
-results: list[tuple[int, str]] = []
-for card in listing_soup.select('.layout__main .card'):
- link = card.select_one('.container__link')
- if not link or 'href' not in link.attrs:
- continue
-
- article_url = urljoin(listing_url, link['href'])
- article_soup = download(article_url)
- content = article_soup.select_one('.article__content')
-
- if not content:
- continue
-
- results.append((len(content.get_text()), article_url))
-
-results.sort()
-if results:
- print(results[0][1])
diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/eurozone_population.py b/sources/academy/webscraping/scraping_basics_python/exercises/eurozone_population.py
new file mode 100644
index 0000000000..4cc1030afa
--- /dev/null
+++ b/sources/academy/webscraping/scraping_basics_python/exercises/eurozone_population.py
@@ -0,0 +1,29 @@
+import httpx
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+
+
+def download(url: str) -> BeautifulSoup:
+ response = httpx.get(url)
+ response.raise_for_status()
+ return BeautifulSoup(response.text, "html.parser")
+
+
+def parse_population(country_soup: BeautifulSoup) -> int | None:
+ for item in country_soup.select("li"):
+ if "Population" in item.text:
+ digits = item.text.replace("Population:", "").replace(" ", "")
+ return int(digits)
+ raise ValueError("Population not found")
+
+
+listing_url = "https://european-union.europa.eu/institutions-law-budget/euro/countries-using-euro_en"
+listing_soup = download(listing_url)
+
+total_population = 0
+euro_countries_accordion = listing_soup.select(".ecl-accordion__item")[0]
+for country_link in euro_countries_accordion.select("li a"):
+ country_url = urljoin(listing_url, country_link["href"])
+ country_soup = download(country_url)
+ total_population += parse_population(country_soup)
+print(total_population)
diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/test.bats b/sources/academy/webscraping/scraping_basics_python/exercises/test.bats
index 9832cb0634..1a0b2844ed 100644
--- a/sources/academy/webscraping/scraping_basics_python/exercises/test.bats
+++ b/sources/academy/webscraping/scraping_basics_python/exercises/test.bats
@@ -134,10 +134,10 @@ teardown() {
[[ "$output" == *"'posted_on': datetime.date("* ]]
}
-@test "finds the shortest CNN sports article" {
- run uv run -q --with=httpx --with=beautifulsoup4 python cnn_sports_shortest_article.py
+@test "counts total eurozone population" {
+ run uv run -q --with=httpx --with=beautifulsoup4 python eurozone_population.py
- [[ "$output" == 'https://edition.cnn.com/'* ]]
+ [[ "$output" -gt 300000000 ]]
}
@test "scrapes F1 Academy driver details with Crawlee" {