FullStackWithLawrence · lpm0073 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/Makefile b/Makefile
@@ -52,7 +52,8 @@ run:
 	@echo "==============================================================================="
 	@echo "Running solution ..."
 	@echo "==============================================================================="
-	make docker-run
+	make python-fetch-data
+	make python-build-dataset
 
 # destroy all Docker build and local artifacts
 # takes around 1 minute to complete
@@ -61,7 +62,6 @@ tear-down:
 	@echo "Tearing down solution ..."
 	@echo "==============================================================================="
 	make python-clean
-	make docker-prune
 
 pre-commit-init:
 	@echo "==============================================================================="
@@ -140,7 +140,6 @@ python-requirements:
 	pip install pip==25.3 setuptools wheel pip-tools
 	pip-compile requirements/in/base.in -o requirements/base.txt
 	pip-compile requirements/in/local.in -o requirements/local.txt
-	pip-compile requirements/in/docker.in -o requirements/docker.txt
 
 python-fetch-data:
 	@echo "==============================================================================="
@@ -150,7 +149,7 @@ python-fetch-data:
 
 python-build-dataset:
 	@echo "==============================================================================="
-	@echo "Building dataset from fetched data ..."
+	@echo "Building enriched Netflix dataset from fetched data ..."
 	@echo "==============================================================================="
 	$(ACTIVATE_VENV) && python -m netflix.fetch.dataset
 
@@ -177,4 +176,6 @@ help:
 	@echo 'python-lint            - Run Python linting using pre-commit and pylint'
 	@echo 'python-clean           - Destroy the Python virtual environment and remove __pycache__ directories'
 	@echo 'python-requirements    - Compile and update Python dependency files'
+	@echo 'python-fetch-data      - Fetch data from external APIs and save to local files'
+	@echo 'python-build-dataset   - Build enriched Netflix dataset from fetched data'
 	@echo '===================================================================='
diff --git a/README.md b/README.md
@@ -9,6 +9,41 @@ Netflix AI Greenlight Challenge: Can Data Science Predict the Next Hit Drama?
 
 ## Quickstart
 
+Install required system packages for your operating system:
+
+- [Windows](./setup/windows/setup.ps1)
+- [macOS](./setup/macos/setup.sh)
+- [Linux](./setup/linux/setup.sh)
+
+Initialize your environment. This includes creating and activating a Python virtual
+environment, and then downloading data files for Netflix, IMDb and The Movie
+Database (TMDB). The final dataset will be located at `./netflix/db/netflix_enriched_dataset.csv`.
+
+**The setup process will take between 5 and 15 minutes depending on your compute
+device and your Internet connection.**
+
+```console
+make python-init
+make run
+```
+
+Other helpful commands:
+
+```console
+source venv/bin/activate
+which python3
+which pip3
+python --version # you should see Python 3.13.x
+pip --version # you should see pip 25.3.x
+```
+
+## Completely Remove This Project
+
+```console
+make tear-down
+deactivate
+```
+
 Setup your [Kaggle API Key](./docs/KAGGLE.md)
 
 ## Support

diff --git a/changelogs/CHANGELOG.md b/changelogs/CHANGELOG.md
@@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p
 
 
 
+## [0.1.1-alpha.1](https://github.com/FullStackWithLawrence/netflix-writers/compare/v0.1.0...v0.1.1-alpha.1) (2026-06-16)
+
+### Bug Fixes
+
+* add os-specific setup scripts ([8d39cb0](https://github.com/FullStackWithLawrence/netflix-writers/commit/8d39cb04c762e5f3c096e5ac7b8be2a9f1d4b603))
+
 ## [0.1.0](https://github.com/FullStackWithLawrence/netflix-writers/compare/v0.0.1...v0.1.0) (2026-06-16)
 
 ### Features

diff --git a/netflix/__version__.py b/netflix/__version__.py
@@ -1,5 +1,5 @@
 # DO NOT EDIT.
 # Managed via automated CI/CD in .github/workflows/semanticVersionBump.yml.
-__version__ = "0.1.0"
+__version__ = "0.1.1-alpha.1"
 
 __all__ = ["__version__"]
diff --git a/netflix/fetch/fetch_imdb.py b/netflix/fetch/fetch_imdb.py
@@ -10,6 +10,7 @@
     - title.ratings.csv
 """
 
+import logging
 import os
 from pathlib import Path
 
@@ -22,6 +23,13 @@
 IMDB_DIR = os.path.join(DB_DIR, "imdb")
 IMDB_TITLE_TYPES = ["movie", "tvSeries"]
 
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
+
 
 def export_titles(con: duckdb.DuckDBPyConnection) -> None:
     """
@@ -39,7 +47,7 @@ def export_titles(con: duckdb.DuckDBPyConnection) -> None:
         TO '{output_path}'
         (FORMAT csv, HEADER true)
         """)
-    print(f"Exported title_basics to {output_path}.")
+    logger.info("Exported title_basics to %s.", output_path)
 
 
 def export_ratings(con: duckdb.DuckDBPyConnection) -> None:
@@ -58,7 +66,7 @@ def export_ratings(con: duckdb.DuckDBPyConnection) -> None:
         TO '{output_path}'
         (FORMAT csv, HEADER true)
         """)
-    print(f"Exported title_ratings to {output_path}.")
+    logger.info("Exported title_ratings to %s.", output_path)
 
 
 def build_titles_table(con: duckdb.DuckDBPyConnection, filename: Path) -> None:
@@ -72,7 +80,7 @@ def build_titles_table(con: duckdb.DuckDBPyConnection, filename: Path) -> None:
     .. returns:
         None
     """
-    print(f"Building title_basics table from {filename}...")
+    logger.info("Building title_basics table from %s...", filename)
     title_types = ", ".join(f"'{t}'" for t in IMDB_TITLE_TYPES)
     con.execute(
         f"""
@@ -85,7 +93,7 @@ def build_titles_table(con: duckdb.DuckDBPyConnection, filename: Path) -> None:
         """,
         [str(filename)],
     )
-    print("built title_basics table.")
+    logger.info("Built title_basics table.")
 
 
 def build_ratings_table(con: duckdb.DuckDBPyConnection, filename: Path) -> None:
@@ -99,7 +107,7 @@ def build_ratings_table(con: duckdb.DuckDBPyConnection, filename: Path) -> None:
     .. returns:
         None
     """
-    print(f"Building title_ratings table from {filename}...")
+    logger.info("Building title_ratings table from %s...", filename)
     con.execute(
         """
         CREATE OR REPLACE TABLE title_ratings AS
@@ -110,7 +118,7 @@ def build_ratings_table(con: duckdb.DuckDBPyConnection, filename: Path) -> None:
         """,
         [str(filename)],
     )
-    print("built title_ratings table.")
+    logger.info("Built title_ratings table.")
 
 
 def fetch_title_basics(with_cleanup: bool = False) -> None:
@@ -140,8 +148,8 @@ def fetch_title_basics(with_cleanup: bool = False) -> None:
         if with_cleanup:
             cleanup(titles_file)
 
-    print("Generated titles.basics.csv")
-    print("-" * 40)
+    logger.info("Generated titles.basics.csv")
+    logger.info("-" * 40)
 
 
 def fetch_title_ratings(with_cleanup: bool = False) -> None:
@@ -171,8 +179,8 @@ def fetch_title_ratings(with_cleanup: bool = False) -> None:
         if with_cleanup:
             cleanup(ratings_file)
 
-    print("Generated title.ratings.csv")
-    print("-" * 40)
+    logger.info("Generated title.ratings.csv")
+    logger.info("-" * 40)
 
 
 def main() -> None:

diff --git a/netflix/fetch/fetch_kaggle_netflix.py b/netflix/fetch/fetch_kaggle_netflix.py
@@ -8,6 +8,7 @@
 https://www.kaggle.com/datasets/dhruvildave/netflix-top-10-tv-shows-and-films
 """
 
+import logging
 import os
 
 from kaggle.api.kaggle_api_extended import KaggleApi  # type: ignore[import-untyped]
@@ -18,6 +19,12 @@
 DATASET = "dhruvildave/netflix-top-10-tv-shows-and-films"
 
 api = KaggleApi()
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
 
 
 def main():
@@ -32,8 +39,8 @@ def main():
     """
     api.authenticate()
     api.dataset_download_files(DATASET, path=KAGGLE_DIR, unzip=True)
-    print("Dataset downloaded successfully.")
-    print("-" * 40)
+    logger.info("Dataset downloaded successfully.")
+    logger.info("-" * 40)
 
 
 if __name__ == "__main__":

diff --git a/netflix/fetch/fetch_kaggle_tmdb.py b/netflix/fetch/fetch_kaggle_tmdb.py
@@ -7,6 +7,7 @@
 https://www.kaggle.com/datasets/asaniczka/full-tmdb-tv-shows-dataset-2023-150k-shows
 """
 
+import logging
 import os
 
 from kaggle.api.kaggle_api_extended import KaggleApi  # type: ignore[import-untyped]
@@ -17,6 +18,12 @@
 DATASET = "asaniczka/full-tmdb-tv-shows-dataset-2023-150k-shows"
 
 api = KaggleApi()
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
 
 
 def main():
@@ -31,8 +38,8 @@ def main():
     """
     api.authenticate()
     api.dataset_download_files(DATASET, path=KAGGLE_DIR, unzip=True)
-    print("Dataset downloaded successfully.")
-    print("-" * 40)
+    logger.info("Dataset downloaded successfully.")
+    logger.info("-" * 40)
 
 
 if __name__ == "__main__":

diff --git a/netflix/fetch/fetch_polti.py b/netflix/fetch/fetch_polti.py
@@ -9,12 +9,19 @@
 """
 
 import csv
+import logging
 import os
 from pathlib import Path
 
 from .const import DB_DIR
 
 POLTI_DIR = os.path.join(DB_DIR, "polti")
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
 
 
 def write_situations_csv(output_path: Path) -> None:
@@ -98,13 +105,13 @@ def write_situations_csv(output_path: Path) -> None:
         writer.writerow(["Number", "Situation", "Description"])
         writer.writerows(rows)
 
-    print(f"CSV written to: {output_path}")
+    logger.info("CSV written to: %s", output_path)
 
 
 def main():
     path = Path(os.path.join(POLTI_DIR, "situations.csv"))
     write_situations_csv(path)
-    print("-" * 40)
+    logger.info("-" * 40)
 
 
 if __name__ == "__main__":

diff --git a/netflix/fetch/lib.py b/netflix/fetch/lib.py
@@ -1,14 +1,23 @@
 """Utility functions for downloading and managing files."""
 
 import json
+import logging
 from pathlib import Path
+from typing import Any, Optional
 
 import pandas as pd
 import requests
 from tqdm import tqdm
 
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
 
-def fetch_url(url: str, output_dir: str | Path, timeout: int = 60) -> Path | None:
+
+def fetch_url(url: str, output_dir: str | Path, timeout: int = 60) -> Optional[Path]:
     """
     Download a file and return its local path.
 
@@ -26,12 +35,12 @@ def fetch_url(url: str, output_dir: str | Path, timeout: int = 60) -> Path | Non
     filename = url.rsplit("/", maxsplit=1)[-1]
     output_path = output_dir / filename
     if output_path.exists():
-        print(f"File {output_path} already exists, skipping download.")
+        logger.info("File %s already exists, skipping download.", output_path)
         return output_path
 
     # Reuse an existing download.
     if output_path.exists():
-        print(f"File {output_path} already exists, skipping download.")
+        logger.info("File %s already exists, skipping download.", output_path)
         return output_path
 
     try:
@@ -53,7 +62,7 @@ def fetch_url(url: str, output_dir: str | Path, timeout: int = 60) -> Path | Non
                             pbar.update(len(chunk))
 
     except (requests.RequestException, OSError) as exc:
-        print(f"Failed to download {url}: {exc}")
+        logger.error("Failed to download %s: %s", url, exc)
 
         # Avoid leaving behind a partial download.
         output_path.unlink(missing_ok=True)
@@ -68,10 +77,10 @@ def cleanup(filename: Path) -> None:
     try:
         filename.unlink(missing_ok=True)
     except OSError as exc:
-        print(f"Failed to remove {filename}: {exc}")
+        logger.error("Failed to remove %s: %s", filename, exc)
 
 
-def safe_cast(x) -> list:
+def safe_cast(x: Any) -> list[Any]:
     """
     Always returns a list safely, even if input is:
 
@@ -96,8 +105,8 @@ def safe_cast(x) -> list:
         if pd.isna(x):
             return []
     # pylint: disable=broad-except
-    except Exception:
-        pass
+    except Exception as exc:
+        logger.error("Failed to check NaN: %s", exc)
 
     if isinstance(x, str):
         try:
@@ -106,7 +115,8 @@ def safe_cast(x) -> list:
                 return [c.get("name") for c in parsed if isinstance(c, dict) and "name" in c]
             return []
         # pylint: disable=broad-except
-        except Exception:
+        except Exception as exc:
+            logger.error("Failed to parse JSON: %s", exc)
             return []
 
     return []
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "netflix-writers"
-version = "0.1.0"
+version = "0.1.1-alpha.1"
 requires-python = ">=3.12"
 description = "Netflix Writers: An AI-powered storytelling assistant for content creators."
 authors = [{ name = "Lawrence McDaniel", email = "lpm0073@gmail.com" }]