UCSBCarpentry · jairomelo · Jan 21, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/episodes/a-real-website.md b/episodes/a-real-website.md
@@ -56,11 +56,17 @@ from tqdm import tqdm
 
 # Getting the HTML from our desired URL as a text string
 url = 'https://carpentries.org/workshops/upcoming-workshops/'
-req = requests.get(url).text
+req = requests.get(url)
 
-# Cleaning and printing the string
-cleaned_req = re.sub(r'\s*\n\s*', '', req).strip()
-print(cleaned_req[0:1000])
+# Checking if the request was successful
+if req.status_code == 200:
+    req = req.text
+
+    # Cleaning and printing the string
+    cleaned_req = re.sub(r'\s*\n\s*', '', req).strip()
+    print(cleaned_req[0:1000])
+else:
+    print(f"Failed to retrieve the webpage. Status code: {req.status_code}")
 ```
 
 ```output
@@ -114,7 +120,7 @@ soup = BeautifulSoup(cleaned_req, 'html.parser')
 # Finding all third-level headers and doing a formatted print
 h3_by_tag = soup.find_all('h3')
 print("Number of h3 elements found: ", len(h3_by_tag))
-for n, h3 in enumerate(h3_by_tag):
+for n, h3 in enumerate(h3_by_tag, start=1):
     print(f"Workshop #{n} - {h3.get_text()}")
 ```
 
@@ -159,18 +165,18 @@ print(div_firsth3.prettify())
 
 Remember, the output shown here is probably different than yours, as the website is continuously updated.
 ```output
-<div class="p-8 mb-5 border" data-country="Puerto Rico" data-curriculum="Software Carpentry (Shell, Git, R for Reproducible Scientific Analysis)" data-meeting="In Person" data-program="Software Carpentry">
+<div class="p-8 mb-5 border" data-country="United States" data-curriculum="Library Carpentry (Intro to Data, Unix Shell, Git, and/or OpenRefine)" data-meeting="In Person" data-program="Library Carpentry">
  <div class="flex mb-4 -mx-2">
   <div class="flex items-center mx-2">
-   <img alt="" class="mx-1" src="/software.svg"/>
+   <img alt="" class="mx-1" src="/library.svg"/>
    <span class="text-[0.625rem] uppercase">
-    Software Carpentry
+    Library Carpentry
    </span>
   </div>
   <div class="flex items-center mx-2">
-   <img alt="" class="mr-1" height="20" src="/flags/pr.png" width="20"/>
+   <img alt="" class="mr-1" height="20" src="/flags/us.png" width="20"/>
    <span class="text-[0.625rem] uppercase">
-    Puerto Rico
+    United States
    </span>
   </div>
   <div class="flex items-center mx-2">
@@ -181,20 +187,20 @@ Remember, the output shown here is probably different than yours, as the website
   </div>
  </div>
  <h3 class="title text-base md:text-[1.75rem] leading-[2.125rem] font-semibold">
-  <a class="underline hover:text-blue-hover text-gray-dark" href="https://dept-ccom-uprrp.github.io/2025-06-04-uprrp-r/">
-   University of Puerto Rico
+  <a class="underline hover:text-blue-hover text-gray-dark" href="https://unt-carpentries.github.io/2026-01-22-unt/">
+   University of North Texas
   </a>
  </h3>
  <div class="mb-5 text-lg font-semibold text-gray-mid">
-  Software Carpentry (Shell, Git, R for Reproducible Scientific Analysis)
+  Library Carpentry (Intro to Data, Unix Shell, Git, and/or OpenRefine)
  </div>
  <div class="mb-2 text-xs">
   <strong class="font-bold">
    Instructors
   </strong>
   :
   <span class="instructors">
-   Humberto Ortiz-Zuazaga, Airined Montes Mercado
+   Sarah Lynn Fisher, Maristella Feustle, Whitney Johnson-Freeman
   </span>
  </div>
  <div class="mb-4 text-xs">
@@ -203,11 +209,11 @@ Remember, the output shown here is probably different than yours, as the website
   </strong>
   :
   <span class="helpers">
-   Isabel Rivera, Diana Buitrago Escobar, Yabdiel Ramos Valerio
+   Marcia McIntosh, Trey Clark
   </span>
  </div>
  <div class="text-sm font-semibold text-gray-mid">
-  Jun 04 - Jun 10 2025
+  Jan 22 - Jan 22 2026
  </div>
 </div>
 ```
@@ -224,7 +230,7 @@ As shown in the previous episode, we can store all this information in a Python
 # Create an empty dictionary and fill it with the info we are interested in
 dict_workshop = {}
 dict_workshop['host'] = div_firsth3.find('h3').get_text()
-dict_workshop['link'] = div_firsth3.find('h3').find('a').get('href')
+dict_workshop['link'] = div_firsth3.find('a').get('href')
 dict_workshop['curriculum'] = div_firsth3.get('data-curriculum')
 dict_workshop['country'] = div_firsth3.get('data-country')
 dict_workshop['format'] = div_firsth3.get('data-meeting')
@@ -246,11 +252,11 @@ workshop_list = []
 for item in divs: 
     dict_workshop = {}
     dict_workshop['host'] = item.find('h3').get_text()
-    dict_workshop['link'] = div_firsth3.find('h3').find('a').get('href')
-    dict_workshop['curriculum'] = div_firsth3.get('data-curriculum')
-    dict_workshop['country'] = div_firsth3.get('data-country')
-    dict_workshop['format'] = div_firsth3.get('data-meeting')
-    dict_workshop['program'] = div_firsth3.get('data-program')
+    dict_workshop['link'] = item.find('a').get('href') # get is used to access attribute values as a dictionary
+    dict_workshop['curriculum'] = item.get('data-curriculum')
+    dict_workshop['country'] = item.get('data-country')
+    dict_workshop['format'] = item.get('data-meeting')
+    dict_workshop['program'] = item.get('data-program')
     workshop_list.append(dict_workshop)
 
 # Transform list into a DataFrame
@@ -277,7 +283,7 @@ workshop_list = []
 while child_div is not None:
     dict_workshop = {}
     dict_workshop['host'] = child_div.find('h3').get_text()
-    dict_workshop['link'] = child_div.find('h3').find('a').get('href')
+    dict_workshop['link'] = child_div.find('a').get('href')
     dict_workshop['curriculum'] = child_div.get('data-curriculum')
     dict_workshop['country'] = child_div.get('data-country')
     dict_workshop['format'] = child_div.get('data-meeting')
@@ -319,7 +325,7 @@ workshop_list = []
 for item in divs_past:
     dict_workshop = {}
     dict_workshop['host'] = item.find('h3').get_text()
-    dict_workshop['link'] = item.find('h3').find('a').get('href')
+    dict_workshop['link'] = item.find('a').get('href')
     dict_workshop['curriculum'] = item.get('data-curriculum')
     dict_workshop['country'] = item.get('data-country')
     dict_workshop['format'] = item.get('data-meeting')
@@ -356,7 +362,7 @@ We only need to add three lines to our loop, and this is how it would look like.
 for item in divs: 
     dict_workshop = {}
     dict_workshop['host'] = item.find('h3').get_text()
-    dict_workshop['link'] = item.find('h3').find('a')['href']
+    dict_workshop['link'] = item.find('a')['href']
     dict_workshop['curriculum'] = item.get('data-curriculum')
     dict_workshop['country'] = item.get('data-country')
     dict_workshop['format'] = item.get('data-meeting')

diff --git a/episodes/fig/http1-req-res-details.png b/episodes/fig/http1-req-res-details.png
diff --git a/episodes/fig/http1-url-structure.png b/episodes/fig/http1-url-structure.png
diff --git a/episodes/hello-scraping.md b/episodes/hello-scraping.md
@@ -24,12 +24,58 @@ exercises: 10
 This workshop is a continuation of our Introduction to Web Scraping workshop.
 If you're looking for a gentler introduction that uses XPath and the Scraper Chrome extension, take a look at the [workshop materials for that workshop](https://carpentries-incubator.github.io/lc-webscraping/).
 
+For recall, Web scraping is necessary when websites do not offer any interface to automate information or data retrieval via Web services, such as REST or SOAP, or any Application Programming Interfaces (APIs). Therefore, it is necessary to “scrape” the information embedded in the website itself.
+
+When you want to extract information or download data from a website that is too large for efficient manual downloading or needs to be frequently updated, you should first:
+
+1. Check if the website has any available Web services or if APIs have been developed to this end
+2. Check if any R (or other language you know) package has been developed by others as a wrapper around the API to facilitate the use of these Web services 
+3. Nothing found? Well, let's code this ourselves then!
+
+
 Here, we’ll revisit some of those core ideas to build a more hands-on understanding of how content and data are structured on the web. 
-We’ll start by exploring what HTML (Hypertext Markup Language) is and how it uses tags to organize and format content.
+We’ll start by exploring what HTTP (Hypertext Transfer Protocol) and HTML (Hypertext Markup Language) is and how it uses tags to organize and format content.
 Then, we’ll introduce the BeautifulSoup library to parse HTML and make it easier to search for and extract specific elements from a webpage.
 
 We'll begin with simple examples and gradually move on to scraping more complex, real-world websites.
 
+### Be respectful
+
+When scraping data, it is essential to adhere to two main guidelines:
+
+1. **Data Privacy and Confidentiality**: Always confirm that the data being collected is publicly available and contains no personal or confidential information.
+2. **Server Load**: Avoid overwhelming the web server. When collecting large amount of data, best practice is to insert pauses between requests to allow the server to manage other traffic.
+
+
+## HTTP:  Hypertext Transfer Protocol quick overview
+
+### URL
+
+At the heart of web communications is the request message, which is sent via *U*niform *R*esource *L*ocators (URLs). Basic `URL` structure:
+
+![credits: https://code.tutsplus.com/tutorials/http-the-protocol-every-web-developer-must-know-part-1--net-31177](fig/http1-url-structure.png)
+
+The protocol is typically http or https for secure communications. The default port is 80, but one can be set explicitly, as illustrated in the above image. The resource path is the local path to the resource on the server.
+
+### Request
+
+![credits: https://code.tutsplus.com/tutorials/http-the-protocol-every-web-developer-must-know-part-1--net-31177](fig/http1-req-res-details.png) 
+
+The actions that should be performed on the host are specified via HTTP verbs. Today we are going to focus on two actions that are often used in web forms:
+
+- `GET`: fetch an existing resource. The URL contains all the necessary information the server needs to locate and return the resource.
+- `POST`: create a new resource. POST requests usually carry a payload that specifies the data for the new resource.
+
+### Response
+
+Status codes:
+
+- `1xx`: Informational Messages
+- `2xx`: Successful; most known is 200: OK, request was successfully processed
+- `3xx`: Redirection
+- `4xx`: Client Error; the famous 404: resource not found
+- `5xx`: Server Error
+
 ## HTML quick overview
 
 All websites have a Hypertext Markup Language (HTML) document behind them.