Skip to content

Commit d95759b

Browse files
author
Liu Haixin
committed
Add CI workflow
1 parent e444a73 commit d95759b

5 files changed

Lines changed: 176 additions & 1 deletion

File tree

.github/workflows/tests.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Run tests
2+
3+
on: push
4+
5+
jobs:
6+
tests:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- name: Clone repository
10+
uses: actions/checkout@v6
11+
# https://github.com/actions/setup-python
12+
- name: Install Python
13+
uses: actions/setup-python@v6
14+
with:
15+
python-version: "3.14"
16+
cache: pip
17+
- name: Install dependencies
18+
run: pip install -r requirements.txt
19+
- name: Run tests
20+
# https://pytest-cov.readthedocs.io/en/latest/readme.html
21+
run: pytest --cov
22+
23+
# https://github.com/astral-sh/ruff-action
24+
- name: Set up ruff
25+
uses: astral-sh/ruff-action@v3
26+
with:
27+
version: latest
28+
- name: Lint
29+
run: ruff check
30+
- name: Check formatting
31+
run: ruff format --check

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
.venv/
2-
.DS_Store
2+
.DS_Store
3+
4+
__pycache__/
5+
*.pyc
6+
.pytest_cache/

mta_ridership_project .ipynb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,14 @@
5555
"outputs": [],
5656
"source": [
5757
"# Convert date column to datetime and sort\n",
58+
<<<<<<< HEAD
5859
"df[\"date\"] = pd.to_datetime(df[\"date\"])\n",
5960
"df = df.sort_values(\"date\")\n",
61+
=======
62+
"from utils import clean_mta_df\n",
63+
"\n",
64+
"df = clean_mta_df(df)\n",
65+
>>>>>>> 256fe9bd2432cf77022322bf0b8ba1d9819c4c62
6066
"\n",
6167
"print(f\"Date range: {df['date'].min()} to {df['date'].max()}\")"
6268
]

tests/test_utils.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import pandas as pd
2+
import pytest
3+
import matplotlib
4+
matplotlib.use("Agg") # non-interactive backend for testing
5+
from utils import clean_mta_df, plot_ridership_recovery
6+
7+
def test_clean_mta_df_converts_date_and_sorts():
8+
df = pd.DataFrame({
9+
"date": ["2020-01-02", "2020-01-01"],
10+
"x": [2, 1],
11+
})
12+
13+
out = clean_mta_df(df)
14+
15+
assert str(out["date"].dtype).startswith("datetime64")
16+
17+
assert out["date"].is_monotonic_increasing
18+
19+
assert list(out["x"]) == [1, 2]
20+
21+
def test_clean_mta_df_missing_date_raises():
22+
df = pd.DataFrame({"x": [1, 2]})
23+
with pytest.raises(KeyError):
24+
clean_mta_df(df)
25+
26+
def test_clean_mta_df_does_not_modify_original():
27+
"""Test that the original DataFrame is not mutated."""
28+
df = pd.DataFrame({
29+
"date": ["2020-01-02", "2020-01-01"],
30+
"x": [2, 1],
31+
})
32+
original_dates = list(df["date"])
33+
34+
clean_mta_df(df)
35+
36+
# original df should remain unchanged
37+
assert list(df["date"]) == original_dates
38+
39+
40+
def test_clean_mta_df_already_sorted():
41+
"""Test that already-sorted data passes through correctly."""
42+
df = pd.DataFrame({
43+
"date": ["2020-01-01", "2020-01-02", "2020-01-03"],
44+
"x": [1, 2, 3],
45+
})
46+
47+
out = clean_mta_df(df)
48+
49+
assert out["date"].is_monotonic_increasing
50+
assert list(out["x"]) == [1, 2, 3]
51+
52+
53+
# ---------- Tests for plot_ridership_recovery ----------
54+
55+
def _make_ridership_df():
56+
"""Helper: create a small valid ridership DataFrame for testing."""
57+
return pd.DataFrame({
58+
"date": pd.to_datetime(["2020-03-01", "2020-03-02", "2020-03-03"]),
59+
"subways_of_comparable_pre_pandemic_day": [0.9, 0.5, 0.6],
60+
"buses_of_comparable_pre_pandemic_day": [0.95, 0.6, 0.7],
61+
"lirr_of_comparable_pre_pandemic_day": [0.85, 0.4, 0.5],
62+
"metro_north_of_comparable_pre_pandemic_day": [0.88, 0.45, 0.55],
63+
})
64+
65+
66+
def test_plot_ridership_recovery_returns_figure():
67+
"""Test that the function returns a matplotlib Figure without error."""
68+
df = _make_ridership_df()
69+
fig = plot_ridership_recovery(df)
70+
assert isinstance(fig, matplotlib.figure.Figure)
71+
matplotlib.pyplot.close(fig)
72+
73+
74+
def test_plot_ridership_recovery_missing_column_raises():
75+
"""Test that KeyError is raised when a required column is missing."""
76+
df = pd.DataFrame({
77+
"date": pd.to_datetime(["2020-03-01"]),
78+
"subways_of_comparable_pre_pandemic_day": [0.9],
79+
})
80+
with pytest.raises(KeyError):
81+
plot_ridership_recovery(df)

utils.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import pandas as pd
2+
import matplotlib.pyplot as plt
3+
4+
def clean_mta_df(df: pd.DataFrame) -> pd.DataFrame:
5+
out = df.copy()
6+
7+
if "date" not in out.columns:
8+
raise KeyError("Missing 'date' column")
9+
10+
out["date"] = pd.to_datetime(out["date"])
11+
out = out.sort_values("date").reset_index(drop=True)
12+
13+
return out
14+
15+
def plot_ridership_recovery(df: pd.DataFrame) -> plt.Figure:
16+
"""Plot MTA ridership recovery by transit mode as % of pre-pandemic levels."""
17+
required_cols = [
18+
"date",
19+
"subways_of_comparable_pre_pandemic_day",
20+
"buses_of_comparable_pre_pandemic_day",
21+
"lirr_of_comparable_pre_pandemic_day",
22+
"metro_north_of_comparable_pre_pandemic_day",
23+
]
24+
missing = [c for c in required_cols if c not in df.columns]
25+
if missing:
26+
raise KeyError(f"Missing required columns: {missing}")
27+
28+
fig, ax = plt.subplots(figsize=(14, 7))
29+
30+
ax.plot(df["date"], df["subways_of_comparable_pre_pandemic_day"],
31+
label="Subway", alpha=0.8, linewidth=1.2)
32+
ax.plot(df["date"], df["buses_of_comparable_pre_pandemic_day"],
33+
label="Bus", alpha=0.8, linewidth=1.2)
34+
ax.plot(df["date"], df["lirr_of_comparable_pre_pandemic_day"],
35+
label="LIRR", alpha=0.8, linewidth=1.2)
36+
ax.plot(df["date"], df["metro_north_of_comparable_pre_pandemic_day"],
37+
label="Metro-North", alpha=0.8, linewidth=1.2)
38+
39+
ax.axhline(y=1.0, color="gray", linestyle="--", linewidth=1.5,
40+
label="Pre-pandemic baseline (100%)")
41+
42+
ax.set_xlabel("Date", fontsize=12)
43+
ax.set_ylabel("% of Pre-Pandemic Ridership", fontsize=12)
44+
ax.set_title(
45+
"MTA Ridership Recovery: Subway vs Bus vs Commuter Rail (2020-Present)",
46+
fontsize=14, fontweight="bold",
47+
)
48+
ax.legend(loc="lower right", fontsize=10)
49+
ax.grid(True, alpha=0.3)
50+
ax.set_ylim(0, 1.5)
51+
fig.tight_layout()
52+
53+
return fig

0 commit comments

Comments
 (0)