BLSQ
diff --git a/‎.github/workflows/ci.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/ci.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyramid_matching/README.md‎
Lines changed: 143 additions & 3 deletions b/‎pyramid_matching/README.md‎
Lines changed: 143 additions & 3 deletions
diff --git a/‎pyramid_matching/pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyramid_matching/pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pyramid_matching/pyramid_matcher/matchers.py‎
Lines changed: 28 additions & 12 deletions b/‎pyramid_matching/pyramid_matcher/matchers.py‎
Lines changed: 28 additions & 12 deletions
@@ -22,6 +22,7 @@ jobs:
     name: Tests
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
         package: [d2d_development, pyramid_matching]
     steps:
 
@@ -13,11 +13,12 @@ dependencies = [
     "rapidfuzz>=3.0.0",
     "shapely>=2.0.0",
     "polars>=1.0.0",
+    "pandas>=2.3.1",
 ]
 
 [project.optional-dependencies]
 # These we only need to install when running the test, not when installing the package
-dev = ["pytest", "pytest-cov"]
+dev = ["pytest", "pytest-cov", "pyarrow"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, TypeAlias
+from typing import Generic, TypeAlias, TypeVar
 
 from rapidfuzz import fuzz, process
 from shapely.geometry.base import BaseGeometry
@@ -11,29 +11,33 @@
 
 CandidateAttributes: TypeAlias = list[str]
 
+K = TypeVar("K", str, BaseGeometry)
+
 
 @dataclass(frozen=True)
 class MatchResult:
     """Data class to hold the result of a match operation."""
 
     query: str
     matched: str
-    attributes: dict[str, Any]
+    attributes: CandidateAttributes
     score: float
 
 
-class BaseMatcher(ABC):
+class BaseMatcher(ABC, Generic[K]):
     """Abstract base class for matchers that compute similarity scores."""
 
     @abstractmethod
     def get_similarity(
-        self, query: str | BaseGeometry, candidates: dict[str | BaseGeometry, CandidateAttributes]
+        self,
+        query: K,
+        candidates: dict[K, CandidateAttributes],
     ) -> MatchResult | None:
         """Return similarity scores for the candidates."""
         pass
 
 
-class FuzzyMatcher(BaseMatcher):
+class FuzzyMatcher(BaseMatcher[str]):
     """Matcher that uses fuzzy string matching to compute similarity scores."""
 
     def __init__(self, threshold: float = 80, scorer_name: str = "wratio"):
@@ -86,7 +90,9 @@ def get_similarity(
             if no match meets the threshold.
         """
         candidate_strings = list(candidates.keys())
-        best_match = self.process.extractOne(query, candidate_strings, scorer=self.scorer)
+        best_match = self.process.extractOne(
+            query, candidate_strings, scorer=self.scorer
+        )
 
         if best_match is None:
             return None
@@ -108,13 +114,15 @@ def __str__(self) -> str:
         return f"FuzzyMatcher(scorer: {self.scorer.__name__})"
 
 
-class SentenceTransformerMatcher(BaseMatcher):
+class SentenceTransformerMatcher(BaseMatcher[str]):
     """Matcher that uses sentence transformers to compute similarity scores.
 
     NOTE: Not yet implemented.
     """
 
-    def __init__(self, model_name: str | None = "sentence-transformers/all-MiniLM-L6-v2"):
+    def __init__(
+        self, model_name: str | None = "sentence-transformers/all-MiniLM-L6-v2"
+    ):
         from sentence_transformers import SentenceTransformer  # noqa: PLC0415
 
         model_name = "sentence-transformers/all-MiniLM-L6-v2"
@@ -128,13 +136,15 @@ def get_similarity(
         # cand_embs = self.model.encode(candidates, convert_to_tensor=True)
         # scores = cos_sim(query_emb, cand_embs)[0].cpu().numpy()
         """Return similarity scores for the candidates using sentence transformers."""
-        raise NotImplementedError("SentenceTransformerMatcher.get_similarity is not implemented.")
+        raise NotImplementedError(
+            "SentenceTransformerMatcher.get_similarity is not implemented."
+        )
 
     def __str__(self) -> str:
         return f"TransformerMatcher(scorer: {self.model.__name__})"
 
 
-class GeometryMatcher(BaseMatcher):
+class GeometryMatcher(BaseMatcher[BaseGeometry]):
     """Match org units using spatial proximity and overlap.
 
     NOTE: Not yet implemented. This is a test implementation.
@@ -198,14 +208,20 @@ def _score(self, ref: BaseGeometry, cand: BaseGeometry) -> float | None:
         distance_score = 1.0 - (distance / self.max_distance)
 
         overlap_score = 0.0
-        if self.use_overlap and ref.geom_type == "Polygon" and cand.geom_type == "Polygon":
+        if (
+            self.use_overlap
+            and ref.geom_type == "Polygon"
+            and cand.geom_type == "Polygon"
+        ):
             inter = ref.intersection(cand).area
             union = ref.union(cand).area
             if union > 0:
                 overlap_score = inter / union
 
         # Final weighted score
-        return (1 - self.overlap_weight) * distance_score + self.overlap_weight * overlap_score
+        return (
+            1 - self.overlap_weight
+        ) * distance_score + self.overlap_weight * overlap_score
 
     def _geom_id(self, geom: BaseGeometry) -> str:
         """Return an identifier for the query geometry.