From 01e017e79aebfaee899fc0689592884ec00d3f3e Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Fri, 13 Mar 2026 09:42:09 +0100
Subject: [PATCH 1/8] review branch

---
 d2d_development/README.md                     |   6 +
 d2d_development/d2d_development/__init__.py   |   0
 .../d2d_development/data_models.py            | 228 +++++++++
 .../d2d_development/dataset_completion.py     | 422 +++++++++++++++
 d2d_development/d2d_development/exceptions.py |   6 +
 d2d_development/d2d_development/extract.py    | 462 +++++++++++++++++
 .../d2d_development/org_unit_aligner.py       | 351 +++++++++++++
 d2d_development/d2d_development/push.py       | 335 ++++++++++++
 d2d_development/d2d_development/utils.py      |  89 ++++
 d2d_development/pyproject.toml                |  95 ++++
 d2d_development/tests/__init__.py             |   0
 d2d_development/tests/mock_dhis2_get.py       | 238 +++++++++
 d2d_development/tests/mock_dhis2_post.py      | 484 ++++++++++++++++++
 d2d_development/tests/test_data_point.py      |  77 +++
 d2d_development/tests/test_extract.py         | 259 ++++++++++
 d2d_development/tests/test_push.py            | 463 +++++++++++++++++
 d2d_development/tests/test_utils.py           | 113 ++++
 17 files changed, 3628 insertions(+)
 create mode 100644 d2d_development/README.md
 create mode 100644 d2d_development/d2d_development/__init__.py
 create mode 100644 d2d_development/d2d_development/data_models.py
 create mode 100644 d2d_development/d2d_development/dataset_completion.py
 create mode 100644 d2d_development/d2d_development/exceptions.py
 create mode 100644 d2d_development/d2d_development/extract.py
 create mode 100644 d2d_development/d2d_development/org_unit_aligner.py
 create mode 100644 d2d_development/d2d_development/push.py
 create mode 100644 d2d_development/d2d_development/utils.py
 create mode 100644 d2d_development/pyproject.toml
 create mode 100644 d2d_development/tests/__init__.py
 create mode 100644 d2d_development/tests/mock_dhis2_get.py
 create mode 100644 d2d_development/tests/mock_dhis2_post.py
 create mode 100644 d2d_development/tests/test_data_point.py
 create mode 100644 d2d_development/tests/test_extract.py
 create mode 100644 d2d_development/tests/test_push.py
 create mode 100644 d2d_development/tests/test_utils.py

diff --git a/d2d_development/README.md b/d2d_development/README.md
new file mode 100644
index 0000000..6cd018d
--- /dev/null
+++ b/d2d_development/README.md
@@ -0,0 +1,6 @@
+# openhexa-ds-developments
+Development repo for DS team OpenHEXA utilities.
+
+
+Install package : pip install git+https://github.com/BLSQ/openhexa-ds-developments.git#subdirectory=d2d_development
+
diff --git a/d2d_development/d2d_development/__init__.py b/d2d_development/d2d_development/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/d2d_development/d2d_development/data_models.py b/d2d_development/d2d_development/data_models.py
new file mode 100644
index 0000000..2d0b344
--- /dev/null
+++ b/d2d_development/d2d_development/data_models.py
@@ -0,0 +1,228 @@
+import json
+from dataclasses import dataclass
+from enum import Enum
+from typing import NamedTuple
+
+import pandas as pd
+
+
+class DataType(Enum):
+    """Enumeration of supported DHIS2 data types for extraction."""
+
+    DATA_ELEMENT = "DATA_ELEMENT"
+    REPORTING_RATE = "REPORTING_RATE"
+    INDICATOR = "INDICATOR"
+
+
+@dataclass
+class DataPointModel:
+    """Data model representing a DHIS2 data point.
+
+    Attributes
+    ----------
+    dataElement : str
+        The unique identifier for the data element.
+    period : str
+        The reporting period for the data point.
+    orgUnit : str
+        The organizational unit associated with the data point.
+    categoryOptionCombo : str
+        The category option combination identifier.
+    attributeOptionCombo : str
+        The attribute option combination identifier.
+    value : float
+        The value of the data point.
+    """
+
+    dataElement: str  # noqa: N815
+    period: str
+    orgUnit: str  # noqa: N815
+    categoryOptionCombo: str  # noqa: N815
+    attributeOptionCombo: str  # noqa: N815
+    value: str
+
+    def to_json(self) -> dict:
+        """Return a dictionary representation of the data point suitable for DHIS2 JSON format.
+
+        Returns
+        -------
+        dict
+            A dictionary with keys corresponding to DHIS2 data value fields.
+        """
+        if self.value is None or (isinstance(self.value, str) and not self.value.strip()):
+            return {
+                "dataElement": self.dataElement,
+                "period": self.period,
+                "orgUnit": self.orgUnit,
+                "categoryOptionCombo": self.categoryOptionCombo,
+                "attributeOptionCombo": self.attributeOptionCombo,
+                "value": "",
+                "comment": "deleted value",
+            }
+
+        return {
+            "dataElement": self.dataElement,
+            "period": self.period,
+            "orgUnit": self.orgUnit,
+            "categoryOptionCombo": self.categoryOptionCombo,
+            "attributeOptionCombo": self.attributeOptionCombo,
+            "value": self.value,
+        }
+
+    def __str__(self) -> str:
+        return (
+            f"DataPointModel("
+            f"dataElement={self.dataElement}, "
+            f"period={self.period}, "
+            f"orgUnit={self.orgUnit}, "
+            f"categoryOptionCombo={self.categoryOptionCombo}, "
+            f"attributeOptionCombo={self.attributeOptionCombo}, "
+            f"value={self.value})"
+        )
+
+
+@dataclass
+class OrgUnitModel:
+    """Helper object definition to represent an organizational unit."""
+
+    id: str
+    name: str
+    shortName: str  # noqa: N815
+    openingDate: str  # noqa: N815
+    closedDate: str  # noqa: N815
+    parent: dict
+    level: int
+    path: str
+    geometry: str
+
+
+class OrgUnitRow(NamedTuple):
+    """Helper object definition to represent an organizational unit."""
+
+    id: str
+    name: str
+    shortName: str  # noqa: N815
+    openingDate: str  # noqa: N815
+    closedDate: str | None  # noqa: N815
+    parent: dict | None
+    level: int
+    path: str
+    geometry: str | dict | None
+
+
+class OrgUnitObj:  # noqa: PLW1641 (no hashing)
+    """Helper class definition to store/create the correct OrgUnit JSON format."""
+
+    def __init__(self, org_unit: OrgUnitRow | pd.Series | tuple):
+        """Create a new org unit instance.
+
+        Parameters
+        ----------
+        org_unit : OrgUnitRow | pd.Series
+            The organizational unit data.
+            Expects columns with names :
+                ['id', 'name', 'shortName', 'openingDate', 'closedDate', 'parent','level', 'path', 'geometry']
+        """
+        if isinstance(org_unit, pd.Series):
+            # Convert Series to OrgUnitRow
+            org_unit = OrgUnitRow(
+                id=org_unit["id"],
+                name=org_unit["name"],
+                shortName=org_unit["shortName"],
+                openingDate=org_unit["openingDate"],
+                closedDate=org_unit["closedDate"],
+                parent=org_unit["parent"],
+                level=org_unit["level"],
+                path=org_unit["path"],
+                geometry=org_unit["geometry"],
+            )
+        elif isinstance(org_unit, tuple) and hasattr(org_unit, "_fields"):
+            org_unit = OrgUnitRow(**org_unit._asdict())
+        elif not isinstance(org_unit, OrgUnitRow):
+            raise TypeError(f"Expected OrgUnitRow, pd.Series, or tuple, got {type(org_unit)}")
+
+        self.initialize_from(org_unit_tuple=org_unit)
+
+    def initialize_from(self, org_unit_tuple: OrgUnitRow):
+        """Initialize the OrgUnitObj instance from an OrgUnitRow tuple.
+
+        This object should represent a DHIS2 organizational unit with the same attribute naming.
+
+        Parameters
+        ----------
+        org_unit_tuple : tuple
+            A tuple containing organizational unit attributes.
+        """
+        # Keep names consistent
+        self.id = org_unit_tuple.id
+        self.name = org_unit_tuple.name
+        self.shortName = org_unit_tuple.shortName
+        self.openingDate = org_unit_tuple.openingDate
+        self.closedDate = org_unit_tuple.closedDate
+        self.parent = org_unit_tuple.parent
+        # Parse geometry safely
+        geometry = org_unit_tuple.geometry
+        if pd.notna(geometry):
+            if isinstance(geometry, str):
+                try:
+                    self.geometry = json.loads(geometry)
+                except json.JSONDecodeError:
+                    self.geometry = None
+            else:
+                self.geometry = geometry
+        else:
+            self.geometry = None
+
+    def to_json(self) -> dict:
+        """Return a dictionary representation of the organizational unit suitable for DHIS2 API.
+
+        Returns
+        -------
+        dict
+            Dictionary containing the organizational unit's attributes formatted for DHIS2.
+        """
+        json_dict = {
+            "id": self.id,
+            "name": self.name,
+            "shortName": self.shortName,
+            "openingDate": self.openingDate,
+        }
+
+        if pd.notna(self.closedDate):
+            json_dict["closedDate"] = self.closedDate
+
+        if self.parent and self.parent.get("id") and pd.notna(self.parent.get("id")):
+            json_dict["parent"] = {"id": self.parent.get("id")}
+
+        if self.geometry and pd.notna(self.geometry):
+            json_dict["geometry"] = {
+                "type": self.geometry["type"],
+                "coordinates": self.geometry["coordinates"],
+            }
+        return json_dict
+
+    def is_valid(self) -> bool:
+        """Check if the OrgUnitObj instance has all required attributes set.
+
+        Returns
+        -------
+        bool
+            True if all required attributes are not None, False otherwise.
+        """
+        return pd.notna(self.id) and pd.notna(self.name) and pd.notna(self.shortName) and pd.notna(self.openingDate)
+
+    def __str__(self) -> str:
+        return f"OrgUnitObj({self.id}, {self.name})"
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, OrgUnitObj):
+            return NotImplemented
+        return (
+            self.id == other.id
+            and self.name == other.name
+            and self.shortName == other.shortName
+            and self.openingDate == other.openingDate
+            and self.closedDate == other.closedDate
+            and self.parent == other.parent
+            and self.geometry == other.geometry
+        )
diff --git a/d2d_development/d2d_development/dataset_completion.py b/d2d_development/d2d_development/dataset_completion.py
new file mode 100644
index 0000000..2c85dac
--- /dev/null
+++ b/d2d_development/d2d_development/dataset_completion.py
@@ -0,0 +1,422 @@
+import json
+import logging
+from pathlib import Path
+
+import pandas as pd
+import requests
+from openhexa.sdk import current_run
+from openhexa.toolbox.dhis2 import DHIS2
+
+
+class DHIS2ImportError(RuntimeError):
+    """Custom exception for DHIS2 import errors."""
+
+    pass
+
+
+class DatasetCompletionSync:
+    """Main class to handle pushing data to DHIS2.
+
+    ATTENTION: This syncer assumes the source and target DHIS2 instances
+     have the same organisation units configured.
+    """
+
+    def __init__(
+        self,
+        source_dhis2: DHIS2,
+        target_dhis2: DHIS2,
+        import_strategy: str = "CREATE_AND_UPDATE",
+        dry_run: bool = True,
+        logger: logging.Logger | None = None,
+    ):
+        self.source_dhis2 = source_dhis2
+        self.target_dhis2 = target_dhis2
+        if import_strategy not in {"CREATE", "UPDATE", "CREATE_AND_UPDATE"}:
+            raise ValueError("Invalid import strategy (use 'CREATE', 'UPDATE' or 'CREATE_AND_UPDATE')")
+        self.import_strategy = import_strategy
+        self.dry_run = dry_run
+        self.import_summary = {
+            "import_counts": {"imported": 0, "updated": 0, "ignored": 0, "deleted": 0},
+            "errors": {"fetch_errors": 0, "no_completion": 0, "push_errors": 0},
+        }
+        self.completion_table = pd.DataFrame()
+        self.logger = logger if logger else logging.getLogger(__name__)
+
+    def _fetch_completion_status_from_source(
+        self,
+        dataset_id: str,
+        period: str,
+        org_unit: str,
+        children: bool = True,
+        timeout: int = 5,
+    ) -> list[dict]:
+        """Fetch completion status from source DHIS2.
+
+        Args:
+            dataset_id: The dataset ID to fetch completion status for.
+            period: The period for which to fetch the completion status.
+            org_unit: The organisation unit to fetch completion status for.
+            children: Whether to include child org units in the fetch.
+            timeout: Timeout for the request in seconds.
+
+        Returns:
+            list[dict]: A list of completion status dictionaries from the DHIS2 API.
+                Returns an empty list if the request fails or no data is found.
+        """
+        endpoint = f"{self.source_dhis2.api.url}/completeDataSetRegistrations"
+        params = {
+            "period": period,
+            "orgUnit": org_unit,
+            "children": "true" if children else "false",
+            "dataSet": dataset_id,
+        }
+
+        try:
+            response = self.source_dhis2.api.session.get(endpoint, params=params, timeout=timeout)
+            response.raise_for_status()  # raise exception for HTTP errors
+            try:
+                completion = response.json().get("completeDataSetRegistrations", [])
+            except ValueError as e:
+                self.import_summary["errors"]["fetch_errors"] += 1
+                self.logger.error(f"Invalid JSON from {endpoint} for ds:{dataset_id} pe:{period} ou:{org_unit}: {e}")
+                return []
+            if not completion and not children:
+                self.import_summary["errors"]["no_completion"] += 1
+                self.logger.info(
+                    f"No completion status found at source for ds: {dataset_id} pe: {period} ou: {org_unit}"
+                )
+            return completion if completion else []
+        except requests.RequestException as e:
+            self.import_summary["errors"]["fetch_errors"] += 1
+            self.logger.error(
+                f"GET request to {self.source_dhis2.api.url} failed to retrieve completion status for "
+                f"ds: {dataset_id} pe: {period} ou: {org_unit} failed : {e}"
+            )
+        return []
+
+    def _push_completion_status_to_target(
+        self,
+        dataset_id: str,
+        period: str,
+        org_unit: str,
+        date: str,
+        completed: bool,
+        timeout: int = 5,
+    ) -> None:
+        """Perform a PUT request (or POST with importStrategy) to a DHIS2 API endpoint.
+
+        Args:
+        dataset_id: The dataset ID to push completion status for.
+        period: The period for which to push the completion status.
+        org_unit: The organisation unit to push completion status for.
+        date: The date of completion.
+        completed: Whether the dataset is marked as completed.
+        timeout: Timeout for the request in seconds.
+
+        Raises:
+            requests.HTTPError if the request fails after retries.
+        """
+        endpoint = f"{self.target_dhis2.api.url}/completeDataSetRegistrations"
+        payload = {
+            "completeDataSetRegistrations": [
+                {
+                    "organisationUnit": org_unit,
+                    "period": period,
+                    "completed": completed,
+                    "date": date,
+                    "dataSet": dataset_id,
+                }
+            ]
+        }
+        params = {
+            "dryRun": str(self.dry_run).lower(),
+            "importStrategy": self.import_strategy,
+            "preheatCache": True,
+            "skipAudit": True,
+            "reportMode": "FULL",
+        }
+
+        response = None
+        try:
+            response = self.target_dhis2.api.session.post(endpoint, json=payload, params=params, timeout=timeout)
+            response.raise_for_status()
+        except requests.RequestException:
+            # avoid doube counting errors in summary
+            # self.import_summary["errors"]["push_errors"] += 1
+            raise
+        finally:
+            self._process_response(ds=dataset_id, pe=period, ou=org_unit, response=response)
+
+    def _try_build_source_completion_table(self, org_units: list[str], dataset_id: str, period: str) -> None:
+        """Build a completion status table for all organisation units provided.
+
+        Args:
+            org_units: List of organisation unit IDs to fetch completion status for (NOTE: use OU parents).
+            dataset_id: The dataset ID to fetch completion status for.
+            period: The period for which to fetch the completion status.
+        """
+        if not org_units:
+            return
+
+        completion_statuses = []
+        for ou in org_units:
+            completion = self._fetch_completion_status_from_source(
+                dataset_id=dataset_id, period=period, org_unit=ou, children=True, timeout=30
+            )
+            if completion:
+                completion_statuses.extend(completion)
+
+        self.completion_table = pd.DataFrame(completion_statuses)
+
+    def _get_source_completion_status_for_ou(self, dataset_id: str, period: str, org_unit: str) -> dict | None:
+        """Handle fetching completion status for a specific org unit.
+
+        Returns:
+            list: The completion status as dictionaries for the specified org unit (children) if found, otherwise [].
+        """
+        if not self.completion_table.empty:
+            completion_status = self.completion_table[self.completion_table["organisationUnit"] == org_unit]
+            if not completion_status.empty:
+                return completion_status.iloc[0].to_dict()
+
+        results = self._fetch_completion_status_from_source(
+            dataset_id=dataset_id, period=period, org_unit=org_unit, children=False
+        )
+        for item in results or []:
+            if item.get("organisationUnit") == org_unit:
+                return item
+
+        return None
+
+    def sync(
+        self,
+        source_dataset_id: str,
+        target_dataset_id: str,
+        org_units: list[str] | None,
+        parent_ou: list[str] | None,
+        period: list[str],
+        logging_interval: int = 2000,
+        ds_processed_path: Path | None = None,
+        mark_uncompleted_as_processed: bool = False,
+    ) -> None:
+        """Sync completion status between datasets.
+
+        source_dataset_id: The dataset ID in the source DHIS2 instance.
+        target_dataset_id: The dataset ID in the target DHIS2 instance.
+        org_units: List of organisation unit IDs to sync.
+        parent_ou: List of parent organisation unit IDs to build completion table (if None, no table built).
+        period: The period for which to sync the completion status.
+        logging_interval: Interval for logging progress (defaults to 2000).
+        ds_processed_path: Path to save processed org units (if None, no file saving nor comparison).
+        mark_uncompleted_as_processed: If True, org units with no completion status will be marked as processed.
+        """
+        self.reset_import_summary()
+
+        if not org_units:
+            msg = f"No org units provided for period {period}. DS sync skipped."
+            self.logger.warning(msg)
+            current_run.log_warning(msg)
+            return
+
+        org_units_to_process = self._get_unprocessed_org_units(org_units, ds_processed_path, period)
+        if not org_units_to_process:
+            msg = f"All org units already processed for period {period}. DS sync skipped."
+            self.logger.info(msg)
+            current_run.log_info(msg)
+            return
+
+        msg = (
+            f"Starting dataset '{target_dataset_id}' completion process for period: "
+            f"{period} org units: {len(org_units_to_process)}."
+        )
+        current_run.log_info(msg)
+        self.logger.info(msg)
+
+        self._try_build_source_completion_table(org_units=parent_ou, dataset_id=source_dataset_id, period=period)
+
+        try:
+            processed = []
+            for idx, ou in enumerate(org_units_to_process, start=1):
+                completion_status = self._get_source_completion_status_for_ou(
+                    dataset_id=source_dataset_id,
+                    period=period,
+                    org_unit=ou,
+                )
+
+                if not completion_status:
+                    if mark_uncompleted_as_processed:
+                        processed.append(ou)  # if True, empty completion -> mark as processed
+                    continue
+
+                if "date" not in completion_status or "completed" not in completion_status:
+                    self.import_summary["errors"]["push_errors"] += 1
+                    self.logger.error(
+                        f"Missing keys in completion status for period {period}, org unit {ou}: {completion_status}"
+                    )
+                    continue
+
+                try:
+                    self._push_completion_status_to_target(
+                        dataset_id=target_dataset_id,
+                        period=period,
+                        org_unit=ou,
+                        date=completion_status.get("date"),
+                        completed=completion_status.get("completed"),
+                    )
+                    processed.append(ou)
+                except Exception as e:
+                    self.logger.error(f"Error pushing completion status for period {period}, org unit {ou}: {e}")
+
+                if idx % logging_interval == 0 or idx == len(org_units_to_process):
+                    current_run.log_info(f"{idx} / {len(org_units_to_process)} OUs processed")
+                    self._update_processed_ds_sync_file(
+                        processed=processed,
+                        period=period,
+                        processed_path=ds_processed_path,
+                    )
+        except Exception as e:
+            self.logger.error(f"Error setting dataset completion for dataset {target_dataset_id}, period {period}: {e}")
+        finally:
+            self._log_summary(org_units=org_units_to_process, period=period)
+
+    def _get_unprocessed_org_units(self, org_units: list, processed_path: Path | None, period: str) -> list:
+        if processed_path is None:
+            return org_units
+        ds_processed_fname = processed_path / f"ds_ou_processed_{period}.parquet"
+        if not ds_processed_fname.exists():
+            return org_units
+
+        try:
+            processed_df = pd.read_parquet(ds_processed_fname)
+            if "ORG_UNIT" not in processed_df.columns:
+                raise KeyError("Missing ORG_UNIT column")
+
+            processed_set = set(processed_df["ORG_UNIT"].dropna().unique())
+            remaining = [ou for ou in org_units if ou not in processed_set]
+
+            msg = f"Loaded {len(processed_set)} processed org units, {len(remaining)} to process for period {period}."
+            self.logger.info(msg)
+            current_run.log_info(msg)
+            return remaining
+        except Exception as e:
+            msg = f"Error loading processed org units file: {ds_processed_fname}. Returning all org units to process."
+            self.logger.error(msg + f" Error: {e}")
+            current_run.log_info(msg)
+            return org_units
+
+    def _update_processed_ds_sync_file(
+        self,
+        processed: list,
+        period: str,
+        processed_path: Path | None,
+    ) -> None:
+        """Save the processed org units to a parquet file."""
+        if processed_path is None:
+            current_run.log_warning("No processed path provided, skipping saving processed org units.")
+            return
+
+        processed_path.mkdir(parents=True, exist_ok=True)
+        ds_processed_file = processed_path / f"ds_ou_processed_{period}.parquet"
+
+        msg = None
+        final_processed = processed
+
+        if ds_processed_file.exists():
+            existing_df = pd.read_parquet(ds_processed_file)
+            existing_org_units = set(existing_df["ORG_UNIT"].unique())
+            new_org_units = [ou for ou in processed if ou not in existing_org_units]
+            final_processed = list(existing_org_units) + new_org_units
+            msg = (
+                f"Found {len(existing_org_units)} processed OUs, "
+                f"updating file {ds_processed_file.name} with {len(new_org_units)} new OUs."
+            )
+
+        if final_processed:
+            df_processed = pd.DataFrame({"ORG_UNIT": final_processed})
+            df_processed.to_parquet(ds_processed_file, index=False)
+            msg = f"Saved {len(final_processed)} processed org units in {ds_processed_file.name}."
+
+        if msg:
+            current_run.log_info(msg)
+            self.logger.info(msg)
+
+    def _log_summary(self, org_units: list, period: str) -> None:
+        msg = (
+            f"Dataset completion period {period} summary: {self.import_summary['import_counts']} "
+            f"total org units: {len(org_units)} "
+        )
+        current_run.log_info(msg)
+        self.logger.info(msg)
+
+        if self.import_summary["errors"]["no_completion"] > 0:
+            msg = (
+                f"{self.import_summary['errors']['no_completion']} out of "
+                f"{len(org_units)} completion statuses failed to be retrieved from source."
+            )
+            current_run.log_warning(msg)
+            self.logger.warning(msg)
+
+        if self.import_summary["errors"]["fetch_errors"] > 0:
+            msg = (
+                f"{self.import_summary['errors']['fetch_errors']} out of "
+                f"{len(org_units)} completion statuses failed to fetch."
+            )
+            current_run.log_warning(msg)
+            self.logger.warning(msg)
+
+        if self.import_summary["errors"]["push_errors"] > 0:
+            msg = (
+                f"{self.import_summary['errors']['push_errors']} "
+                f"out of {len(org_units)} completion statuses failed to push."
+            )
+            current_run.log_warning(msg)
+            self.logger.warning(msg)
+
+    def _process_response(self, ds: str, pe: str, ou: str, response: dict) -> None:
+        """Log the response from the DHIS2 API after pushing completion status."""
+        json_or_none = self._safe_json(response)
+        if not json_or_none:
+            self.import_summary["errors"]["push_errors"] += 1
+            self.logger.error(
+                f"No JSON response received for completion request ds: {ds} pe: {pe} ou: {ou} from DHIS2 API."
+            )
+            raise DHIS2ImportError("Empty or invalid JSON response from DHIS2")
+
+        conflicts: list[str] = json_or_none.get("conflicts", {})
+        status = json_or_none.get("status")
+        if status in ["ERROR", "WARNING"] or conflicts:
+            for conflict in conflicts:
+                self.import_summary["errors"]["push_errors"] += 1
+                self.logger.error(
+                    f"Conflict pushing completion for ds: {ds} pe: {pe} ou: {ou} status: {status} - {conflict}"
+                )
+            self._update_import_summary(response=json_or_none)
+            raise DHIS2ImportError(
+                f"DHIS2 completion push failed with status={status} "
+                f"and {len(conflicts)} conflict(s) for ds:{ds} pe:{pe} ou:{ou}"
+            )
+
+        if status == "SUCCESS":
+            self.logger.info(f"Successfully pushed to target completion ds: {ds} pe:{pe} ou: {ou}")
+            self._update_import_summary(response=json_or_none)
+
+    def _safe_json(self, r: requests.Response) -> dict | None:
+        if r is None:
+            return None
+        try:
+            return r.json()
+        except (ValueError, json.JSONDecodeError):
+            return None
+
+    def _update_import_summary(self, response: dict) -> None:
+        if response:
+            import_counts = response.get("importCount", {})
+            for key in ["imported", "updated", "ignored", "deleted"]:
+                self.import_summary["import_counts"][key] += import_counts.get(key, 0)
+
+    def reset_import_summary(self) -> None:
+        """Reset the import summary to its initial state."""
+        self.import_summary = {
+            "import_counts": {"imported": 0, "updated": 0, "ignored": 0, "deleted": 0},
+            "errors": {"fetch_errors": 0, "no_completion": 0, "push_errors": 0},
+        }
diff --git a/d2d_development/d2d_development/exceptions.py b/d2d_development/d2d_development/exceptions.py
new file mode 100644
index 0000000..28096b9
--- /dev/null
+++ b/d2d_development/d2d_development/exceptions.py
@@ -0,0 +1,6 @@
+class ExtractorError(Exception):
+    """Custom exception for all DHIS2Extractor errors."""
+
+
+class PusherError(Exception):
+    """Custom exception for all DHIS2Pusher errors."""
diff --git a/d2d_development/d2d_development/extract.py b/d2d_development/d2d_development/extract.py
new file mode 100644
index 0000000..2937377
--- /dev/null
+++ b/d2d_development/d2d_development/extract.py
@@ -0,0 +1,462 @@
+import logging
+from pathlib import Path
+
+import polars as pl
+from openhexa.toolbox.dhis2 import DHIS2
+
+from .data_models import DataType
+from .exceptions import ExtractorError
+from .utils import log_message, save_to_parquet
+
+# TODO:
+# 1) Refactor the extractors to (Following DHIS2 client endpoints):
+# -DataValueSetsExtractor (DE)
+# -AnalyticsExtractor (DE, indicators, ReportingRates)
+
+
+class DataElementsExtractor:
+    """Handles downloading and formatting of data elements from DHIS2."""
+
+    def __init__(self, extractor: "DHIS2Extractor"):
+        self.extractor = extractor
+
+    def download_period(
+        self,
+        data_elements: list[str],
+        org_units: list[str],
+        period: str,
+        output_dir: Path,
+        filename: str | None = None,
+        **kwargs,  # noqa: ANN003
+    ) -> Path | None:
+        """Download and handle data extracts for the specified period, saving them to the output directory.
+
+        Parameters
+        ----------
+        data_elements : list[str]
+            List of DHIS2 data element UIDs to extract.
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        output_dir : Path
+            Directory where extracted data files will be saved.
+        filename : str | None
+            Optional filename for the extracted data file. If None, a default name will be used.
+        kwargs : dict
+            Additional keyword arguments for data retrieval, such as `last_updated` for filtering data.
+
+        Returns
+        -------
+        Path | None
+            The path to the extracted data file, or None if no data was extracted.
+
+        Raises
+        ------
+        ExtractorError
+            If an error occurs during the extract process.
+        """
+        try:
+            self.extractor._log_message(f"Retrieving data elements extract for period : {period}")
+            return self.extractor._handle_extract_for_period(
+                handler=self,
+                data_products=data_elements,
+                org_units=org_units,
+                period=period,
+                output_dir=output_dir,
+                filename=filename,
+                **kwargs,
+            )
+        except Exception as e:
+            self.extractor._log_message(
+                "Extract data elements download error.", log_current_run=False, error_details=str(e), level="error"
+            )
+            raise ExtractorError(f"Extract data elements download error : {e}") from e
+
+    def _retrieve_data(self, data_elements: list[str], org_units: list[str], period: str, **kwargs) -> pl.DataFrame:  # noqa: ANN003
+        if not self.extractor._valid_dhis2_period_format(period):
+            raise ExtractorError(f"Invalid DHIS2 period format: {period}")
+        last_updated = kwargs.get("last_updated")
+        try:
+            response = self.extractor.dhis2_client.data_value_sets.get(
+                data_elements=data_elements,
+                periods=[period],
+                org_units=org_units,
+                last_updated=last_updated,  # not implemented yet
+            )
+        except Exception as e:
+            msg = "Error retrieving data elements data"
+            self.extractor._log_message(msg, log_current_run=False, error_details=str(e), level="error")
+            raise ExtractorError(msg) from e
+
+        return self.extractor._map_to_dhis2_format(pl.DataFrame(response), data_type=DataType.DATA_ELEMENT)
+
+
+class IndicatorsExtractor:
+    """Handles downloading and formatting of indicators from DHIS2."""
+
+    def __init__(self, extractor: "DHIS2Extractor"):
+        self.extractor = extractor
+
+    def download_period(
+        self,
+        indicators: list[str],
+        org_units: list[str],
+        period: str,
+        output_dir: Path,
+        filename: str | None = None,
+        **kwargs,  # noqa: ANN003
+    ) -> Path | None:
+        """Download and handle data extracts for the specified periods, saving them to the output directory.
+
+        Parameters
+        ----------
+        indicators : list[str]
+            List of DHIS2 indicators UIDs to extract.
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        output_dir : Path
+            Directory where extracted data files will be saved.
+        filename : str | None
+            Optional filename for the extracted data file. If None, a default name will be used.
+        kwargs : dict
+            Additional keyword arguments for data retrieval from analytics like:
+              -include_cocs: bool, whether to include category option combo mapping for indicators.
+              -last_updated: datetime, not implemented yet, placeholder for future use to filter data
+                based on last updated timestamp.
+
+        Returns
+        -------
+        Path | None
+            The path to the extracted data file, or None if no data was extracted.
+
+        Raises
+        ------
+        ExtractorError
+            If an error occurs during the extract process.
+        """
+        try:
+            self.extractor._log_message(f"Retrieving indicators extract for period : {period}")
+            return self.extractor._handle_extract_for_period(
+                handler=self,
+                data_products=indicators,
+                org_units=org_units,
+                period=period,
+                output_dir=output_dir,
+                filename=filename,
+                **kwargs,
+            )
+        except Exception as e:
+            self.extractor._log_message(
+                "Extract indicators download error.", log_current_run=False, error_details=str(e), level="error"
+            )
+            raise ExtractorError(f"Extract indicators download error : {e}") from e
+
+    def _retrieve_data(self, indicators: list[str], org_units: list[str], period: str, **kwargs) -> pl.DataFrame:  # noqa: ANN003
+        if not self.extractor._valid_dhis2_period_format(period):
+            raise ExtractorError(f"Invalid DHIS2 period format: {period}")
+
+        # NOTE: This option is usefull to retrieve data Elements using the analytics endpoint.
+        include_cocs = kwargs.get("include_cocs", False)
+        try:
+            response = self.extractor.dhis2_client.analytics.get(
+                indicators=indicators,
+                periods=[period],
+                org_units=org_units,
+                include_cocs=include_cocs,
+            )
+        except Exception as e:
+            msg = "Error retrieving indicators data"
+            self.extractor._log_message(msg, log_current_run=False, error_details=str(e), level="error")
+            raise ExtractorError(msg) from e
+
+        raw_data_formatted = pl.DataFrame(response).rename({"pe": "period", "ou": "orgUnit"})
+        if "co" in raw_data_formatted.columns:
+            raw_data_formatted = raw_data_formatted.rename({"co": "categoryOptionCombo"})
+        return self.extractor._map_to_dhis2_format(
+            raw_data_formatted, data_type=DataType.INDICATOR, map_cocs=include_cocs
+        )
+
+
+class ReportingRatesExtractor:
+    """Handles downloading and formatting of reporting rates from DHIS2."""
+
+    def __init__(self, extractor: "DHIS2Extractor"):
+        self.extractor = extractor
+
+    def download_period(
+        self,
+        reporting_rates: list[str],
+        org_units: list[str],
+        period: str,
+        output_dir: Path,
+        filename: str | None = None,
+        **kwargs,  # noqa: ANN003
+    ) -> Path | None:
+        """Download and handle data extracts for the specified periods, saving them to the output directory.
+
+        Parameters
+        ----------
+        reporting_rates : list[str]
+            List of DHIS2 reporting rates UIDs.RATE to extract.
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        output_dir : Path
+            Directory where extracted data files will be saved.
+        filename : str | None
+            Optional filename for the extracted data file. If None, a default name will be used.
+        kwargs : dict
+            Additional keyword arguments for data retrieval, such as `last_updated` for filtering data.
+
+        Returns
+        -------
+        Path | None
+            The path to the extracted data file, or None if no data was extracted.
+
+        Raises
+        ------
+        ExtractorError
+            If an error occurs during the extract process.
+        """
+        try:
+            self.extractor._log_message(f"Retrieving reporting rates extract for period : {period}")
+            return self.extractor._handle_extract_for_period(
+                handler=self,
+                data_products=reporting_rates,
+                org_units=org_units,
+                period=period,
+                output_dir=output_dir,
+                filename=filename,
+                **kwargs,
+            )
+        except Exception as e:
+            self.extractor._log_message(
+                "Extract reporting rates download error.", log_current_run=False, error_details=str(e), level="error"
+            )
+            raise ExtractorError(f"Extract reporting rates download error : {e}") from e
+
+    def _retrieve_data(self, reporting_rates: list[str], org_units: list[str], period: str, **kwargs) -> pl.DataFrame:  # noqa: ANN003
+        if not self.extractor._valid_dhis2_period_format(period):
+            raise ExtractorError(f"Invalid DHIS2 period format: {period}")
+
+        try:
+            response = self.extractor.dhis2_client.analytics.get(
+                data_elements=reporting_rates,
+                periods=[period],
+                org_units=org_units,
+                include_cocs=False,  # avoid client error
+            )
+        except Exception as e:
+            msg = "Error retrieving reporting rates data"
+            self.extractor._log_message(msg, log_current_run=False, error_details=str(e), level="error")
+            raise ExtractorError(msg) from e
+
+        raw_data_formatted = pl.DataFrame(response).rename({"pe": "period", "ou": "orgUnit"})
+        return self.extractor._map_to_dhis2_format(raw_data_formatted, data_type=DataType.REPORTING_RATE)
+
+
+class DHIS2Extractor:
+    """Extracts data from DHIS2 using various handlers for data elements, indicators, and reporting rates.
+
+    Attributes
+    ----------
+    client : object
+        The DHIS2 client used for data extraction.
+    queue : object | None
+        Optional queue for managing extracted files.
+    download_mode : str
+        Mode for downloading files ("DOWNLOAD_REPLACE" or "DOWNLOAD_NEW").
+    last_updated : None
+        Placeholder for future use.
+    return_existing_file : bool
+        When DOWNLOAD_NEW mode is used:
+            True: returns the path to existing files.
+            False: returns None if the file already exists.
+        Default is False.
+
+    Handlers
+    --------
+    data_elements : DataElementsExtractor
+        Handler for extracting data elements.
+    indicators : IndicatorsExtractor
+        Handler for extracting indicators.
+    reporting_rates : ReportingRatesExtractor
+        Handler for extracting reporting rates.
+    """
+
+    def __init__(
+        self,
+        dhis2_client: DHIS2,
+        download_mode: str = "DOWNLOAD_REPLACE",
+        return_existing_file: bool = False,
+        logger: logging.Logger | None = None,
+    ):
+        self.dhis2_client = dhis2_client
+        if download_mode not in {"DOWNLOAD_REPLACE", "DOWNLOAD_NEW"}:
+            raise ExtractorError("Invalid 'download_mode', use 'DOWNLOAD_REPLACE' or 'DOWNLOAD_NEW'.")
+        self.download_mode = download_mode
+        self.last_updated = None  # NOTE: Placeholder for future use
+        self.data_elements = DataElementsExtractor(self)
+        self.indicators = IndicatorsExtractor(self)
+        self.reporting_rates = ReportingRatesExtractor(self)
+        self.return_existing_file = return_existing_file
+        self.logger = logger or logging.getLogger(__name__)
+        self.log_function = log_message
+
+    def _handle_extract_for_period(
+        self,
+        handler: DataElementsExtractor | IndicatorsExtractor | ReportingRatesExtractor,
+        data_products: list[str],
+        org_units: list[str],
+        period: str,
+        output_dir: Path,
+        filename: str | None = None,
+        **kwargs,  # noqa: ANN003
+    ) -> Path | None:
+        output_dir.mkdir(parents=True, exist_ok=True)
+        if filename:
+            extract_fname = output_dir / filename
+        else:
+            extract_fname = output_dir / f"data_{period}.parquet"
+
+        # Skip if already exists and mode is DOWNLOAD_NEW
+        if self.download_mode == "DOWNLOAD_NEW" and extract_fname.exists():
+            self._log_message(f"Extract for period {period} already exists, download skipped.")
+            return extract_fname if self.return_existing_file else None
+
+        raw_data = handler._retrieve_data(data_products, org_units, period, **kwargs)
+
+        if raw_data is None:
+            self._log_message(f"Nothing to save for period {period}.")
+            return None
+
+        if extract_fname.exists():
+            self._log_message(f"Replacing extract for period {period}.")
+
+        save_to_parquet(raw_data, extract_fname)
+        return extract_fname
+
+    def _map_to_dhis2_format(
+        self,
+        dhis_data: pl.DataFrame,
+        data_type: DataType = DataType.DATA_ELEMENT,
+        domain_type: str = "AGGREGATED",
+        map_cocs: bool = False,
+    ) -> pl.DataFrame:
+        """Maps DHIS2 data to a standardized data extraction table.
+
+        Parameters
+        ----------
+        dhis_data : pd.DataFrame
+            Input DataFrame containing DHIS2 data. Must include columns like `period`, `orgUnit`,
+            `categoryOptionCombo(DATA_ELEMENT)`, `attributeOptionCombo(DATA_ELEMENT)`, `dataElement`
+            and `value` based on the data type.
+        data_type : str
+            The type of data being mapped. Supported values are:
+            - "DATA_ELEMENT": Includes `categoryOptionCombo` and maps `dataElement` to `dx`.
+            - "INDICATOR": Maps `dx` to `dx`.
+            - "REPORTING_RATE": Maps `dx` to `dx` and `rateType` by split the string by `.`.
+            Default is "DATA_ELEMENT".
+        domain_type : str, optional
+            The domain of the data if its per period (Agg ex: monthly) or datapoint (Tracker ex: per day):
+            - "AGGREGATED": For aggregated data (default).
+            - "TRACKER": For tracker data.
+            **NOTE: THIS IS WORK IN PROGRESS AND NOT USED YET**
+        map_cocs : bool, optional
+            NOTE: IndicatorsExtractor can be used to retrieve data elements by passing valid data element ids
+             to the indicators parameter. Therefore we can use the client flag `include_coc` to include `co` column.
+            *Only applicable if `dataType` is "INDICATOR". Default is False.
+
+        Returns
+        -------
+        pl.DataFrame
+            A DataFrame formatted to SNIS standards, with the following columns (snake_case):
+            - "dataType": The type of data (DATA_ELEMENT, REPORTING_RATE, or INDICATOR).
+            - "dx": Data element, dataset, or indicator UID.
+            - "period": Reporting period.
+            - "orgUnit": Organization unit.
+            - "categoryOptionCombo": (Only for DATA_ELEMENT) Category option combo UID.
+            - "attributeOptionCombo": (Only for DATA_ELEMENT) Attribute option combo UID.
+            - "rateMetric": (Only for REPORTING_RATE) Rate metric.
+            - "domainType": Data domain (AGGREGATED or TRACKER).
+            - "value": Data value.
+        """
+        if dhis_data.height == 0:
+            return None
+
+        if data_type not in DataType:
+            raise ExtractorError(
+                "Incorrect 'data_type' configuration use: "
+                "(DataType.DATA_ELEMENT, DataType.REPORTING_RATE, DataType.INDICATOR)."
+            )
+
+        try:
+            n = dhis_data.height
+            data = {
+                "dataType": [data_type.value] * n,
+                "dx": None,
+                "period": dhis_data["period"] if "period" in dhis_data.columns else None,
+                "orgUnit": dhis_data["orgUnit"] if "orgUnit" in dhis_data.columns else None,
+                "categoryOptionCombo": None,
+                "attributeOptionCombo": None,
+                "rateMetric": None,
+                "domainType": [domain_type] * n,
+                "value": dhis_data["value"] if "value" in dhis_data.columns else None,
+            }
+            if data_type == DataType.DATA_ELEMENT:
+                data["dx"] = dhis_data["dataElement"] if "dataElement" in dhis_data.columns else None
+                data["categoryOptionCombo"] = (
+                    dhis_data["categoryOptionCombo"] if "categoryOptionCombo" in dhis_data.columns else None
+                )
+                data["attributeOptionCombo"] = (
+                    dhis_data["attributeOptionCombo"] if "attributeOptionCombo" in dhis_data.columns else None
+                )
+            elif data_type == DataType.REPORTING_RATE:
+                if "dx" in dhis_data.columns:
+                    split = dhis_data["dx"].str.split_exact(".", 1)
+                    data["dx"] = split.struct.field("field_0")
+                    data["rateMetric"] = split.struct.field("field_1")
+            elif data_type == DataType.INDICATOR:
+                data["dx"] = dhis_data["dx"] if "dx" in dhis_data.columns else None
+                if map_cocs and "categoryOptionCombo" in dhis_data.columns:
+                    data["categoryOptionCombo"] = dhis_data["categoryOptionCombo"]
+            return pl.DataFrame(data)
+
+        except AttributeError as e:
+            msg = (
+                f"Failed to map DHIS2 data to the expected format. "
+                f"Input columns: {list(dhis_data.columns)}. "
+                f"Expected columns depend on data_type: {data_type}."
+            )
+            self._log_message(msg, log_current_run=False, error_details=f"AttributeError: {e}", level="error")
+            raise ExtractorError(msg) from e
+        except Exception as e:
+            msg = "Unexpected error while mapping DHIS2 data"
+            self._log_message(msg, log_current_run=False, error_details=f"{type(e).__name__}: {e}", level="error")
+            raise ExtractorError(msg) from e
+
+    def _log_message(self, message: str, level: str = "info", log_current_run: bool = True, error_details: str = ""):
+        """Log a message using the configured logging function."""
+        self.log_function(
+            logger=self.logger,
+            message=message,
+            error_details=error_details,
+            level=level,
+            log_current_run=log_current_run,
+            exception_class=ExtractorError,
+        )
+
+    def _valid_dhis2_period_format(self, dhis2_period: str) -> bool:
+        """Validate if the given period string is in a valid DHIS2 format.
+
+        Returns
+        -------
+        bool
+        True if valid, False otherwise.
+        """
+        # TODO: Expand this function to cover more DHIS2 period formats as needed
+        return True
diff --git a/d2d_development/d2d_development/org_unit_aligner.py b/d2d_development/d2d_development/org_unit_aligner.py
new file mode 100644
index 0000000..aa0e576
--- /dev/null
+++ b/d2d_development/d2d_development/org_unit_aligner.py
@@ -0,0 +1,351 @@
+import json
+import logging
+
+import pandas as pd
+import requests
+from openhexa.sdk import current_run
+from openhexa.toolbox.dhis2 import DHIS2
+from packaging import version
+from requests import Response
+from requests.structures import CaseInsensitiveDict
+
+from .data_models import OrgUnitObj
+
+
+class OrgUnitCreateError(RuntimeError):
+    """Custom error for organisation unit create failures."""
+
+    pass
+
+
+class OrgUnitUpdateError(RuntimeError):
+    """Custom error for organisation unit update failures."""
+
+    pass
+
+
+class DHIS2PyramidAligner:
+    """Align organisation units between two DHIS2 instances.
+
+    This class is stateless and provides methods to synchronize organisation units
+    from a source DHIS2 instance to a target DHIS2 instance. The alignment process
+    compares the pyramids of both instances and performs the necessary operations
+    to keep the target up to date with the source.
+
+    Supported operations include:
+    - Creating organisation units that exist in the source but not in the target.
+    - Updating organisation units that exist in both but differ in their attributes.
+
+    This class does not store any state between calls; all data must be provided
+    as method parameters.
+    """
+
+    def __init__(self, logger: logging.Logger):
+        self.logger = logger if logger else logging.getLogger(__name__)
+        self._initialize_summary()
+
+    def _initialize_summary(self):
+        self.summary = {
+            "CREATE": {"CREATE_COUNT": 0, "CREATE_DETAILS": [], "ERROR_COUNT": 0, "ERROR_DETAILS": []},
+            "UPDATE": {"UPDATE_COUNT": 0, "UPDATE_DETAILS": [], "ERROR_COUNT": 0, "ERROR_DETAILS": []},
+            "INVALID": {"INVALID_COUNT": 0, "INVALID_DETAILS": []},
+        }
+
+    def align_to(
+        self,
+        target_dhis2: DHIS2,
+        source_pyramid: pd.DataFrame,
+        dry_run: bool = True,
+    ):
+        """Syncs the extracted pyramid data with the target DHIS2 instance."""
+        # Load the target pyramid
+        if source_pyramid.empty:
+            self._log_message("Source pyramid is empty. Organisation units alignment skipped.", level="warning")
+            return
+
+        self._initialize_summary()
+
+        self._log_message(f"Retrieving organisation units from target DHIS2: {target_dhis2.api.url}")
+        # Retrieve all organisation units from the target DHIS2
+        target_pyramid = target_dhis2.meta.organisation_units(
+            fields="id,name,shortName,openingDate,closedDate,parent,level,path,geometry"
+        )
+        target_pyramid = pd.DataFrame(target_pyramid)
+        self._log_message(f"Shape target pyramid: {target_pyramid.shape} - dry_run: {dry_run}")
+
+        # Select new OU: all OU in source not in target (set difference)
+        ou_new = list(set(source_pyramid.id) - set(target_pyramid.id))
+        ou_to_create = source_pyramid[source_pyramid.id.isin(ou_new)]
+        self._push_org_units_create(
+            ou_to_create=ou_to_create,
+            target_dhis2=target_dhis2,
+            dry_run=dry_run,
+        )
+
+        # Select matching OU: all OU uid that match between DHIS2 source and target (set intersection)
+        matching_ou_ids = list(set(source_pyramid.id).intersection(set(target_pyramid.id)))
+        self._push_org_units_update(
+            org_unit_source=source_pyramid,
+            org_unit_target=target_pyramid,
+            ou_ids_to_check=matching_ou_ids,
+            target_dhis2=target_dhis2,
+            dry_run=dry_run,
+        )
+
+    def _push_org_units_create(self, ou_to_create: pd.DataFrame, target_dhis2: DHIS2, dry_run: bool) -> None:
+        """Create organisation units in the target DHIS2 instance.
+
+        Parameters
+        ----------
+        ou_to_create : pd.DataFrame
+            DataFrame containing organisation unit data to be created.
+        target_dhis2 : DHIS2
+            DHIS2 client for the target instance.
+        dry_run : bool
+            If True, performs a dry run without making changes.
+
+        This function iterates over the organisation units, validates them, and
+        attempts to create them in the target DHIS2.
+        Logs errors and information about the creation process.
+        """
+        if not ou_to_create.shape[0] > 0:
+            self._log_message("No new organisation units to create.")
+            return
+
+        try:
+            # NOTE: Geometry is valid for versions > 2.32
+            if version.parse(target_dhis2.version) <= version.parse("2.32"):
+                ou_to_create["geometry"] = None
+                self._log_message(
+                    "DHIS2 version not compatible with geometry. Geometry will not be pushed.", level="warning"
+                )
+
+            self._log_message(f"Creating {len(ou_to_create)} organisation units.")
+            for row_tuple in ou_to_create.itertuples(index=False, name="OrgUnitRow"):
+                ou = OrgUnitObj(row_tuple)
+                if ou.is_valid():
+                    response = self._push_org_unit(
+                        dhis2_client=target_dhis2,
+                        org_unit=ou,
+                        strategy="CREATE",
+                        dry_run=dry_run,  # dry_run=False -> Apply changes in the DHIS2
+                    )
+                    if response.get("status") not in ("SUCCESS", "OK"):
+                        self.summary["CREATE"]["ERROR_COUNT"] += 1
+                        self.summary["CREATE"]["ERROR_DETAILS"].append(response)
+                        self.logger.error(str(response))
+                    else:
+                        created_ou = {"ACTION": "CREATE", "OU": str(ou.to_json()), "RESPONSE": response}
+                        self.summary["CREATE"]["CREATE_COUNT"] += 1
+                        self.summary["CREATE"]["CREATE_DETAILS"].append(created_ou)
+                        self.logger.info(created_ou)
+                else:
+                    invalid_ou = {"ACTION": "CREATE", "STATUS": "INVALID", "OU": str(ou.to_json())}
+                    self.summary["INVALID"]["INVALID_COUNT"] += 1
+                    self.summary["INVALID"]["INVALID_DETAILS"].append(invalid_ou)
+                    self.logger.warning(invalid_ou)
+
+        except Exception as e:
+            msg = "Unexpected error occurred while creating organisation units."
+            self.logger.exception(msg)
+            raise OrgUnitCreateError(f"{msg} Check logs for details.") from e
+
+    def _push_org_units_update(
+        self,
+        org_unit_source: pd.DataFrame,
+        org_unit_target: pd.DataFrame,
+        ou_ids_to_check: list[str],
+        target_dhis2: DHIS2,
+        dry_run: bool,
+        logging_interval: int = 5000,
+    ):
+        """Update org units based on matching id list."""
+        if not len(ou_ids_to_check) > 0:
+            self._log_message("No organisation units to update.")
+            return
+
+        try:
+            self._log_message(f"Checking for updates in {len(ou_ids_to_check)} organisation units.")
+            # NOTE: Geometry is valid for versions > 2.32
+            if version.parse(target_dhis2.version) <= version.parse("2.32"):
+                org_unit_source["geometry"] = None
+                org_unit_target["geometry"] = None
+                self._log_message(
+                    "DHIS2 version not compatible with geometry. Geometry will be ignored.", level="warning"
+                )
+
+            # build id dictionary (faster) to compare source vs target OU
+            index_dictionary = self._build_id_indexes(org_unit_source, org_unit_target, ou_ids_to_check)
+
+            total_ou = len(ou_ids_to_check)
+            for progress_count, (_, indices) in enumerate(index_dictionary.items(), start=1):
+                # Create the OU and check if there are differences
+                # NOTE: See OrgUnitObj._eq_() to check the comparison logic
+                ou_source = OrgUnitObj(org_unit_source.iloc[indices["source"]])
+                ou_target = OrgUnitObj(org_unit_target.iloc[indices["target"]])
+
+                if ou_source != ou_target:
+                    response = self._push_org_unit(
+                        dhis2_client=target_dhis2,
+                        org_unit=ou_source,
+                        strategy="UPDATE",
+                        dry_run=dry_run,  # dry_run=False -> Apply changes in the DHIS2
+                        is_testing=False,
+                    )
+                    if response.get("status") not in ("SUCCESS", "OK"):
+                        self.summary["UPDATE"]["ERROR_COUNT"] += 1
+                        self.summary["UPDATE"]["ERROR_DETAILS"].append(response)
+                        self.logger.error(str(response))
+                    else:
+                        updated_ou = {
+                            "ACTION": "UPDATE",
+                            "OLD_OU": str(ou_target.to_json()),
+                            "NEW_OU": str(ou_source.to_json()),
+                            "RESPONSE": str(response),
+                        }
+                        self.summary["UPDATE"]["UPDATE_COUNT"] += 1
+                        self.summary["UPDATE"]["UPDATE_DETAILS"].append(updated_ou)
+                        self.logger.info(str(updated_ou))
+
+                if progress_count % logging_interval == 0 or progress_count == total_ou:
+                    self._log_message(f"Organisation units checked: {progress_count}/{total_ou} for update.")
+
+        except Exception as e:
+            msg = "Unexpected error occurred while updating organisation units."
+            self.logger.exception(msg)
+            raise OrgUnitUpdateError(f"{msg} Check logs for details.") from e
+
+    def _log_message(self, message: str, level: str = "info") -> None:
+        """Log a message using self.logger and/or current_run."""
+        if not level or not message:
+            return
+
+        level = level.lower()
+        logger_methods = {
+            "info": "info",
+            "warning": "warning",
+            "error": "error",
+            "debug": "debug",
+        }
+        run_methods = {
+            "info": "log_info",
+            "warning": "log_warning",
+            "error": "log_error",
+            "debug": "log_debug",
+        }
+
+        if level not in logger_methods:
+            raise ValueError(f"Unsupported logging level: {level}")
+
+        # Log to standard logger
+        if self.logger and hasattr(self.logger, logger_methods[level]):
+            getattr(self.logger, logger_methods[level])(message)
+
+        # Log to OpenHexa current_run
+        if current_run and hasattr(current_run, run_methods[level]):
+            getattr(current_run, run_methods[level])(message)
+
+    def _push_org_unit(
+        self,
+        dhis2_client: DHIS2,
+        org_unit: OrgUnitObj,
+        strategy: str = "CREATE",
+        dry_run: bool = True,
+        is_testing: bool = False,
+    ) -> dict:
+        """Pushes an organisation unit to the DHIS2 instance using the specified strategy.
+
+        Parameters
+        ----------
+        dhis2_client : DHIS2
+            DHIS2 client for the target instance.
+        org_unit : OrgUnitObj
+            Organisation unit object to be pushed.
+        strategy : str, optional
+            Strategy for pushing ('CREATE' or 'UPDATE'), by default "CREATE".
+        dry_run : bool, optional
+            If True, performs a dry run without making changes, by default True.
+        is_testing : bool, optional
+            If True, runs the function in test mode, by default False.
+
+        Returns
+        -------
+        dict
+            Formatted response from the DHIS2 API.
+        """
+        if is_testing:
+            response = {"importCount": {"imported": 1, "ignored": 0}}
+            payload = {"status": "OK", "response": response}
+            r = Response()
+            r.status_code = 200
+            r.headers = CaseInsensitiveDict({"Content-Type": "application/json"})
+            r._content = json.dumps().encode("utf-8")  # private attr used internally
+        else:
+            if strategy == "CREATE":
+                endpoint = "organisationUnits"
+                payload = org_unit.to_json()
+
+            if strategy == "UPDATE":
+                endpoint = "metadata"
+                payload = {"organisationUnits": [org_unit.to_json()]}
+
+            r = dhis2_client.api.session.post(
+                f"{dhis2_client.api.url}/{endpoint}",
+                json=payload,
+                params={"dryRun": dry_run, "importStrategy": f"{strategy}"},
+            )
+
+        return self._build_formatted_response(response=r, strategy=strategy, ou_id=org_unit.id)
+
+    def _build_formatted_response(self, response: requests.Response, strategy: str, ou_id: str) -> dict:
+        """Build a formatted response dictionary from a requests.Response object.
+
+        Parameters
+        ----------
+        response : requests.Response
+            The HTTP response object from the requests library.
+        strategy : str
+            The strategy or action performed.
+        ou_id : str
+            The organisational unit ID related to the response.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the action, status code, status, response, and organisational unit ID.
+        """
+        return {
+            "action": strategy,
+            "statusCode": response.status_code,
+            "status": response.json().get("status"),
+            "response": response.json().get("response"),
+            "ou_id": ou_id,
+        }
+
+    def _build_id_indexes(self, ou_source: pd.DataFrame, ou_target: pd.DataFrame, ou_matching_ids: list) -> dict:
+        """Build a dictionary mapping matching OU IDs to their index positions in source and target DataFrames.
+
+        Parameters
+        ----------
+        ou_source : pd.DataFrame
+            Source DataFrame containing organisation units with an 'id' column.
+        ou_target : pd.DataFrame
+            Target DataFrame containing organisation units with an 'id' column.
+        ou_matching_ids : list
+            List of organisation unit IDs to match between source and target.
+
+        Returns
+        -------
+        dict
+            Dictionary where keys are matching IDs and values are dicts with 'source' and 'target' index positions.
+        """
+        # Set "id" as the index for faster lookup
+        df1_lookup = {val: idx for idx, val in enumerate(ou_source["id"])}
+        df2_lookup = {val: idx for idx, val in enumerate(ou_target["id"])}
+
+        # Build the dictionary using prebuilt lookups
+        return {
+            match_id: {"source": df1_lookup[match_id], "target": df2_lookup[match_id]}
+            for match_id in ou_matching_ids
+            if match_id in df1_lookup and match_id in df2_lookup
+        }
diff --git a/d2d_development/d2d_development/push.py b/d2d_development/d2d_development/push.py
new file mode 100644
index 0000000..766d415
--- /dev/null
+++ b/d2d_development/d2d_development/push.py
@@ -0,0 +1,335 @@
+import json
+import logging
+
+import pandas as pd
+import polars as pl
+import requests
+from openhexa.toolbox.dhis2 import DHIS2
+
+from .data_models import DataPointModel
+from .exceptions import PusherError
+from .utils import log_message
+
+
+class DHIS2Pusher:
+    """Main class to handle pushing data to DHIS2."""
+
+    def __init__(
+        self,
+        dhis2_client: DHIS2,
+        import_strategy: str = "CREATE_AND_UPDATE",
+        dry_run: bool = True,
+        max_post: int = 500,
+        logging_interval: int = 50000,
+        mandatory_fields: list[str] | None = None,
+        logger: logging.Logger | None = None,
+    ):
+        self.dhis2_client = dhis2_client
+
+        if import_strategy not in {"CREATE", "UPDATE", "CREATE_AND_UPDATE"}:
+            raise PusherError("Invalid import strategy (use 'CREATE', 'UPDATE' or 'CREATE_AND_UPDATE')")
+
+        if mandatory_fields is None:
+            self.mandatory_fields = ["dx", "period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
+        else:
+            self.mandatory_fields = mandatory_fields
+
+        self.import_strategy = import_strategy
+        self.dry_run = dry_run
+        self.max_post = max_post
+        self.logging_interval = logging_interval
+        self.summary = {}
+        self._reset_summary()
+        self.logger = logger if logger else logging.getLogger(__name__)
+        self.log_function = log_message
+
+    def push_data(
+        self,
+        df_data: pd.DataFrame | pl.DataFrame,
+    ) -> None:
+        """Push formatted data to DHIS2."""
+        self._reset_summary()
+        self._set_summary_import_options()
+
+        if isinstance(df_data, pd.DataFrame):
+            df_data = pl.from_pandas(df_data)
+
+        self._validate_input_data(df_data)
+
+        if df_data.height == 0:
+            self._log_message("Input DataFrame is empty. No data to push.")
+            return
+
+        valid, to_delete, to_ignore = self._classify_data_points(df_data)
+
+        self._push_valid(valid)
+        self._push_to_delete(to_delete)
+        self._log_summary_errors()
+        self._log_ignored_or_na(to_ignore)
+
+    def _validate_input_data(self, df_data: pl.DataFrame) -> None:
+        """Validate that the input DataFrame contains all mandatory fields.
+
+        Raises
+        ------
+            PusherError: If any mandatory field is missing from the DataFrame.
+        """
+        if not isinstance(df_data, pl.DataFrame):
+            raise PusherError("Input data must be a pandas or polars DataFrame.")
+
+        missing_fields = [field for field in self.mandatory_fields if field not in df_data.columns]
+        if missing_fields:
+            raise PusherError(f"Input data is missing mandatory columns: {', '.join(missing_fields)}")
+
+    def _classify_data_points(self, data_points: pl.DataFrame) -> tuple[pl.DataFrame, pl.DataFrame, pl.DataFrame]:
+        """Classify data points into valid, to delete, and to ignore based on mandatory fields.
+
+        Returns
+        -------
+            tuple: A tuple containing three lists: (valid_data_points, to_delete_data_points, to_ignore_data_points).
+        """
+        # Valid data points have all mandatory fields non-null
+        valid_mask = pl.all_horizontal([pl.col(col).is_not_null() for col in self.mandatory_fields])
+        valid = data_points.filter(valid_mask).select(self.mandatory_fields)
+
+        # Data points to delete have all mandatory fields non-null except 'value' which is null
+        mandatory_fields_without_value = [col for col in self.mandatory_fields if col != "value"]
+        delete_mask = (
+            pl.all_horizontal([pl.col(col).is_not_null() for col in mandatory_fields_without_value])
+            & pl.col("value").is_null()
+        )
+        to_delete = data_points.filter(delete_mask).select(self.mandatory_fields)
+
+        # To ignore are those that don't fit either of the above criteria
+        not_valid = data_points.filter(~valid_mask & ~delete_mask).select(self.mandatory_fields)
+
+        return valid, to_delete, not_valid
+
+    def _set_summary_import_options(self):
+        self.summary["import_options"] = {
+            "importStrategy": self.import_strategy,
+            "dryRun": self.dry_run,
+            "preheatCache": True,  # hardcoded for now, could be made configurable if needed
+            "skipAudit": True,  # hardcoded for now, could be made configurable if needed
+        }
+
+    def _push_valid(self, data_points_valid: pl.DataFrame) -> None:
+        """Push valid values to DHIS2."""
+        if len(data_points_valid) == 0:
+            self._log_message("No data to push.")
+            return
+
+        self._log_message(f"Pushing {len(data_points_valid)} data points.")
+        self._push_data_points(data_point_list=self._serialize_data_points(data_points_valid))
+        self._log_message(f"Data points push summary:  {self.summary['import_counts']}")
+
+    def _push_to_delete(self, data_points_to_delete: pl.DataFrame) -> None:
+        if data_points_to_delete.height == 0:
+            return
+
+        self._log_message(f"Pushing {len(data_points_to_delete)} data points with NA values.")
+        self._log_ignored_or_na(data_points_to_delete, is_na=True)
+        self._push_data_points(data_point_list=self._serialize_data_points(data_points_to_delete))
+        self._log_message(f"Data points delete summary: {self.summary['import_counts']}")
+
+    def _log_ignored_or_na(self, data_points: pl.DataFrame, is_na: bool = False):
+        """Logs ignored or NA data points."""
+        data_points_list = data_points.to_dicts()
+        if len(data_points_list) > 0:
+            self._log_message(
+                f"{len(data_points_list)} data points will be  {'set to NA' if is_na else 'ignored'}. "
+                "Please check the last execution report for details.",
+                level="warning",
+            )
+            for i, ignored in enumerate(data_points_list, start=1):
+                row_str = ", ".join(f"{k}={v}" for k, v in ignored.items())
+                self._log_message(
+                    f"{i}. Data point {'NA' if is_na else 'ignored'}: {row_str}", log_current_run=False, level="warning"
+                )
+
+    def _log_message(self, message: str, level: str = "info", log_current_run: bool = True, error_details: str = ""):
+        """Log a message using the configured logging function."""
+        self.log_function(
+            logger=self.logger,
+            message=message,
+            error_details=error_details,
+            level=level,
+            log_current_run=log_current_run,
+            exception_class=PusherError,
+        )
+
+    def _serialize_data_points(self, data_points: pl.DataFrame) -> list[dict]:
+        """Convert a Polars DataFrame of data points into a list of dictionaries for DHIS2 API.
+
+        Returns
+        -------
+            list[dict]: A list of dictionaries, each representing a data point formatted for DHIS2.
+        """
+        return [
+            DataPointModel(
+                dataElement=row["dx"],
+                period=row["period"],
+                orgUnit=row["orgUnit"],
+                categoryOptionCombo=row["categoryOptionCombo"],
+                attributeOptionCombo=row["attributeOptionCombo"],
+                value=row["value"],
+            ).to_json()
+            for row in data_points.to_dicts()
+        ]
+
+    def _log_summary_errors(self):
+        """Logs all the errors in the summary dictionary using the configured logging."""
+        errors = self.summary.get("ERRORS", [])
+        if not errors:
+            self._log_message("No errors found in the summary.")
+        else:
+            self._log_message(f"Logging {len(errors)} error(s) from import summary.", level="error")
+            for i_e, error in enumerate(errors, start=1):
+                self._log_message(f"Error response {i_e}: {error}", level="error")
+
+    def _post(self, chunk: list[dict]) -> requests.Response:
+        """Send a POST request to DHIS2 for a chunk of data values.
+
+        Returns
+        -------
+            requests.Response: The response object from the DHIS2 API.
+        """
+        return self.dhis2_client.api.session.post(
+            f"{self.dhis2_client.api.url}/dataValueSets",
+            json={"dataValues": chunk},
+            params={
+                "dryRun": self.dry_run,
+                "importStrategy": self.import_strategy,
+                "preheatCache": True,
+                "skipAudit": True,
+            },
+        )
+
+    def _push_data_points(
+        self,
+        data_point_list: list[dict],
+    ) -> None:
+        """dry_run: Set to true to get an import summary without actually importing data (DHIS2)."""
+        total_data_points = len(data_point_list)
+        processed_points = 0
+        last_logged_at = 0
+
+        for chunk_id, chunk in enumerate(self._split_list(data_point_list, self.max_post), start=1):
+            r = None
+            response = None
+            try:
+                r = self._post(chunk)
+                r.raise_for_status()
+                response = self._safe_json(r)
+
+                if response:
+                    self._update_import_counts(response)
+
+                # Capture conflicts/errorReports if present
+                self._extract_conflicts(response)
+
+            except requests.exceptions.RequestException as e:
+                self._raise_server_errors(r)  # Stop the process if there's a server error
+                response = self._safe_json(r)
+                if response:
+                    self._update_import_counts(response)
+                else:
+                    # No response JSON, at least log the request error msg
+                    self.summary["ERRORS"].extend(
+                        [{"chunk": chunk_id, "period": chunk[0].get("period", "-"), "exception": str(e)}]
+                    )
+                self._extract_conflicts(response)
+
+            processed_points += len(chunk)
+
+            # Log every logging_interval points
+            if processed_points - last_logged_at >= self.logging_interval:
+                progress_pct = (processed_points / total_data_points) * 100
+                self._log_message(
+                    f"{processed_points} / {total_data_points} data points ({progress_pct:.1f}%) "
+                    f" summary: {self.summary['import_counts']}"
+                )
+                last_logged_at = processed_points
+
+        # Final summary
+        self._log_message(
+            f"{processed_points} / {total_data_points} data points processed."
+            f" Final summary: {self.summary['import_counts']}"
+        )
+
+    def _raise_server_errors(self, r: requests.Response) -> None:
+        """Check if the response indicates a server error (stop process)."""
+        if r is not None and 500 <= r.status_code < 600:
+            response = self._safe_json(r)
+            if response and "message" in response:
+                message = response["message"]
+            else:
+                message = f"HTTP {r.status_code} error with no message"
+
+            error_info = {
+                "server_error_code": f"{r.status_code}",
+                "message": f"Server error: {message}",
+            }
+            self.summary["ERRORS"].append(error_info)
+            raise PusherError(f"Server error: {message}") from None
+
+    def _reset_summary(self) -> None:
+        self.summary = {
+            "import_counts": {"imported": 0, "updated": 0, "ignored": 0, "deleted": 0},
+            "import_options": {},
+            "ERRORS": [],
+        }
+
+    def _split_list(self, src_list: list, length: int):
+        """Split list into chunks.
+
+        Yields:
+            list: A chunk of the source list of the specified length.
+        """
+        for i in range(0, len(src_list), length):
+            yield src_list[i : i + length]
+
+    def _safe_json(self, r: requests.Response) -> dict | None:
+        if r is None:
+            return None
+
+        try:
+            return r.json()
+        except (ValueError, json.JSONDecodeError):
+            return None
+
+    def _update_import_counts(self, response: dict) -> None:
+        if not response:
+            return
+        if "importCount" in response:
+            import_counts = response.get("importCount", {})
+        elif "response" in response and "importCount" in response["response"]:
+            import_counts = response["response"].get("importCount", {})
+        else:
+            import_counts = {}
+        for key in ["imported", "updated", "ignored", "deleted"]:
+            self.summary["import_counts"][key] += import_counts.get(key, 0)
+
+    def _extract_conflicts(self, response: dict) -> None:
+        """Extract all conflicts and errorReports from a DHIS2 API response.
+
+        Handles both top-level and nested 'response' nodes. Optionally updates the summary.
+
+        Parameters
+        ----------
+        response : dict
+            The JSON response from DHIS2 after an import.
+        """
+        if not response:
+            return
+        conflicts = response.get("conflicts", [])
+        error_reports = response.get("errorReports", [])
+
+        # Check if nested under "response"
+        nested = response.get("response", {})
+        conflicts += nested.get("conflicts", [])
+        error_reports += nested.get("errorReports", [])
+        all_errors = conflicts + error_reports
+
+        if all_errors:
+            self.summary.setdefault("ERRORS", []).extend(all_errors)
diff --git a/d2d_development/d2d_development/utils.py b/d2d_development/d2d_development/utils.py
new file mode 100644
index 0000000..fceb493
--- /dev/null
+++ b/d2d_development/d2d_development/utils.py
@@ -0,0 +1,89 @@
+import logging
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+import polars as pl
+from openhexa.sdk import current_run
+
+
+def log_message(
+    logger: logging.Logger,
+    message: str,
+    error_details: str = "",
+    log_current_run: bool = True,
+    level: str = "info",
+    exception_class: type[Exception] = Exception,
+) -> None:
+    """Log a message to both the current run and the configured logger.
+
+    Parameters
+    ----------
+    logger : logging.Logger
+        The logger to use for logging the message.
+    message : str
+        The message to log.
+    error_details : str, optional
+        Additional details to include in error logs, by default "".
+    log_current_run : bool, optional
+        Whether to log the message to the current run, by default True.
+    level : str, optional
+        The logging level ('info', 'warning', 'error'), by default 'info'.
+    exception_class : Exception, optional
+        The exception class type to raise for invalid logging levels, by default Exception.
+    """
+    if level == "info":
+        logger.info(message)
+    elif level == "warning":
+        logger.warning(message)
+    elif level == "error":
+        logger.error(f"{message} Details: {error_details}")
+    else:
+        raise exception_class(f"Invalid logging level: {level}")
+
+    # Log to current_run only if it exists
+    if log_current_run and "current_run" in globals() and current_run is not None:
+        if level == "info":
+            current_run.log_info(message)
+        elif level == "warning":
+            current_run.log_warning(message)
+        elif level == "error":
+            current_run.log_error(message)
+
+
+def save_to_parquet(data: pl.DataFrame | pd.DataFrame, filename: Path) -> None:
+    """Safely saves a Pandas or Polars DataFrame to a Parquet file using a temporary file and atomic replace.
+
+    Args:
+        data (Union[pl.DataFrame, pd.DataFrame]): The DataFrame to save.
+        filename (Path): The path where the Parquet file will be saved.
+
+    Raises:
+        ValueError: If data is not a valid DataFrame.
+        Exception: If saving fails.
+    """
+    temp_filename = None
+    try:
+        # Validate input type
+        if not isinstance(data, (pl.DataFrame, pd.DataFrame)):
+            raise ValueError("The 'data' parameter must be a Pandas or Polars DataFrame.")
+
+        # Write to a temporary file in the same directory
+        with tempfile.NamedTemporaryFile(suffix=".parquet", dir=filename.parent, delete=False) as tmp_file:
+            temp_filename = Path(tmp_file.name)
+
+        # Use appropriate write method based on DataFrame type
+        if isinstance(data, pl.DataFrame):
+            data.write_parquet(temp_filename)
+        else:  # pd.DataFrame
+            data.to_parquet(temp_filename, index=False)
+
+        # Atomically replace the old file with the new one
+        temp_filename.replace(filename)
+        temp_filename = None  # Mark as successfully moved
+
+    except Exception as e:
+        # Clean up the temp file if it exists
+        if temp_filename is not None and temp_filename.exists():
+            temp_filename.unlink()
+        raise Exception(f"Failed to save parquet file to {filename}") from e
diff --git a/d2d_development/pyproject.toml b/d2d_development/pyproject.toml
new file mode 100644
index 0000000..96cf936
--- /dev/null
+++ b/d2d_development/pyproject.toml
@@ -0,0 +1,95 @@
+[build-system]
+# These lines tell pip what tools are needed to actually build your package.
+# This is standard for almost all modern Python projects.
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "d2d-development"
+version = "0.0.0"
+description = "OpenHEXA DHIS2 to DHIS2 development utililty library maintained by Bluesquare Data Services team."
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "openhexa-toolbox>=2.0.0",
+    "openhexa.sdk>=1.0.0",  # Baseline version for safety
+    "requests>=2.31.0",     # Baseline for modern security/features
+    "pandas>=2.2.0",        # Use >=2.2 for compatibility
+    "polars>=1.0.0"
+]
+
+[tool.setuptools]
+package-dir = {"" = ""}
+
+[tool.setuptools.packages.find]
+# Ensures it finds the 'd2d_development' folder inside 'src'
+where = ["."]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.format]
+docstring-code-format = true
+docstring-code-line-length = 120
+
+[tool.ruff.lint]
+preview = true
+select = [
+    "F", # Pyflakes
+    "E", # pycodestyle
+    "I", # isort
+    "D", # pydocstyle
+    "UP", # pyupgrade
+    "ANN", # flake8-annotations
+    "B", # bugbear
+    "A", # flake8-builtins
+    "COM", # flake8-commas
+    "FA", # flake8-future-annotations
+    "PT", # flake8-pytest-style
+    "Q", # flake8-quotes
+    "RET", # flake8-return
+    "SIM", # flake8-simplify
+    "PTH", # flake8-use-pathlib
+    "NPY", # NumPy rules
+    "PD", # pandas rules
+    "N", # pep8-naming
+    "DOC", # pydoclint
+    "PLC", # pylint convention
+    "PLE", # pylint error
+    "PLW", # pylint warning
+    "RUF", # ruff specific rules
+]
+
+ignore = [
+    "D100", # Missing docstring in public module
+    "D104", # Missing docstring in public package
+    "D105", # Missing docstring in magic method
+    "D106", # Missing docstring in public nested class
+    "D107", # Missing docstring in __init__
+    "D401", # First line should be in imperative mood
+    "D413", # Missing blank line after last section
+    "D203", # 1 blank line required before class docstring
+    "SIM108", # Use ternary operators
+    "SIM102", # Use a single if statement instead of nested if statements
+    "SIM114", # Combine `if` branches
+    "DOC501", # Raised exception {id} missing from docstring	
+    "DOC502", # Raised exception is not explicitly raised: {id}	
+    "RUF022", # `__all__` is not sorted
+    "RUF005", # Consider expression instead of concatenation
+    "PD901", # Avoid using the generic variable name df for dataframes
+    "PLR0904", # Too many public methods ({methods} > {max_methods})
+    "PLR0911", # Too many return statements ({returns} > {max_returns})
+    "PLR0912", # Too many branches ({branches} > {max_branches})
+    "PLR0913", # Too many arguments ({arguments} > {max_arguments})
+    "PLR0914", # Too many local variables ({variables} > {max_variables})
+    "PLR0915", # Too many statements ({statements} > {max_statements})
+    "PLR0916", # Too many Boolean expressions ({expressions} > {max_expressions})
+    "PLR1702", # Too many nested blocks ({blocks} > {max_blocks}),
+    "COM812", # Missing trailing comma
+]
+
+[tool.ruff.lint.flake8-annotations]
+allow-star-arg-any = true
+mypy-init-return = true
+suppress-dummy-args = true
+suppress-none-returning = true
diff --git a/d2d_development/tests/__init__.py b/d2d_development/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/d2d_development/tests/mock_dhis2_get.py b/d2d_development/tests/mock_dhis2_get.py
new file mode 100644
index 0000000..0c18c7f
--- /dev/null
+++ b/d2d_development/tests/mock_dhis2_get.py
@@ -0,0 +1,238 @@
+class MockDataValueSets:
+    """Mock class to simulate DHIS2 DataValueSets API responses for testing purposes."""
+
+    def get(self, data_elements=None, periods=None, org_units=None, last_updated=None) -> list[dict]:  # noqa: ANN001
+        """Simulate the retrieval of data values from DHIS2 based on the provided parameters.
+
+        Returns
+        -------
+        list[dict]
+            A list of dictionaries representing data values, formatted similarly to what the DHIS2 API would
+        """
+        # Return a mock response for data elements
+        # You can customize the returned data for your tests
+        return [
+            {
+                "dataElement": "AAA111",
+                "period": "202501",
+                "orgUnit": "ORG001",
+                "categoryOptionCombo": "CAT001",
+                "attributeOptionCombo": "ATTR001",
+                "value": "12",
+                "storedBy": "user1",
+                "created": "2025-01-01T10:00:00.000+0000",
+                "lastUpdated": "2025-01-01T10:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "BBB222",
+                "period": "202501",
+                "orgUnit": "ORG002",
+                "categoryOptionCombo": "CAT002",
+                "attributeOptionCombo": "ATTR002",
+                "value": "18",
+                "storedBy": "user2",
+                "created": "2025-01-02T11:00:00.000+0000",
+                "lastUpdated": "2025-01-02T11:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "CCC333",
+                "period": "202501",
+                "orgUnit": "ORG003",
+                "categoryOptionCombo": "CAT003",
+                "attributeOptionCombo": "ATTR003",
+                "value": "25",
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "DELETE1",
+                "period": "202501",
+                "orgUnit": "ORG004",
+                "categoryOptionCombo": "CAT004",
+                "attributeOptionCombo": "ATTR004",
+                "value": None,
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "INVALID1",
+                "period": None,
+                "orgUnit": "ORG005",
+                "categoryOptionCombo": "CAT005",
+                "attributeOptionCombo": "ATTR005",
+                "value": "55.0",
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "INVALID2",
+                "period": "202501",
+                "orgUnit": None,
+                "categoryOptionCombo": "CAT005",
+                "attributeOptionCombo": "ATTR005",
+                "value": "55.0",
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "INVALID3",
+                "period": "202501",
+                "orgUnit": "ORG005",
+                "categoryOptionCombo": None,
+                "attributeOptionCombo": "ATTR005",
+                "value": "55.0",
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": "INVALID4",
+                "period": "202501",
+                "orgUnit": "ORG005",
+                "categoryOptionCombo": "CAT005",
+                "attributeOptionCombo": None,
+                "value": "55.0",
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+            {
+                "dataElement": None,
+                "period": "202501",
+                "orgUnit": "ORG006",
+                "categoryOptionCombo": "CAT006",
+                "attributeOptionCombo": "ATTR006",
+                "value": "55.0",
+                "storedBy": "user3",
+                "created": "2025-01-03T12:00:00.000+0000",
+                "lastUpdated": "2025-01-03T12:05:00.000+0000",
+                "comment": None,
+                "followup": False,
+            },
+        ]
+
+
+class MockAnalytics:
+    """Mock class to simulate DHIS2 Analytics API responses for testing purposes."""
+
+    def get(self, indicators=None, data_elements=None, periods=None, org_units=None, include_cocs=False) -> list[dict]:  # noqa: ANN001
+        """Simulate the retrieval of analytics data from DHIS2 based on the provided parameters.
+
+        Returns
+        -------
+        list[dict]
+            A list of dictionaries representing analytics data, formatted similarly to what the DHIS2 API would
+        """
+        if data_elements:
+            return [
+                {
+                    "dx": "AAA111.REPORTING_RATE",
+                    "pe": "202409",
+                    "ou": "OU001",
+                    "value": "100",
+                },
+                {
+                    "dx": "BBB222.EXPECTED_REPORTS",
+                    "pe": "202409",
+                    "ou": "OU002",
+                    "value": "0",
+                },
+                {
+                    "dx": "CCC333.REPORTING_RATE",
+                    "pe": "202409",
+                    "ou": "OU003",
+                    "value": "100",
+                },
+            ]
+
+        if include_cocs:
+            return [
+                {
+                    "dx": "DATAELEMENT1",
+                    "pe": "202501",
+                    "ou": "ORG001",
+                    "co": "COC001",
+                    "value": "6.0",
+                },
+                {
+                    "dx": "DATAELEMENT2",
+                    "pe": "202501",
+                    "ou": "ORG002",
+                    "co": "COC002",
+                    "value": "7.0",
+                },
+                {
+                    "dx": "DATAELEMENT3",
+                    "pe": "202501",
+                    "ou": "ORG003",
+                    "co": "COC003",
+                    "value": "8.0",
+                },
+            ]
+        return [
+            {
+                "dx": "INDICATOR1",
+                "pe": "202501",
+                "ou": "ORG001",
+                "value": "5.0",
+            },
+            {
+                "dx": "INDICATOR2",
+                "pe": "202501",
+                "ou": "ORG002",
+                "value": "7.0",
+            },
+            {
+                "dx": "INDICATOR3",
+                "pe": "202501",
+                "ou": "ORG003",
+                "value": "9.0",
+            },
+        ]
+
+
+class MockSession:
+    """Mock class to simulate a requests.Session for testing purposes."""
+
+    def post(self, *args, **kwargs: object) -> None:  # noqa: ANN002
+        """Simulate a POST request to the DHIS2 API."""
+        # This will be patched in your test
+        pass
+
+
+class MockAPI:
+    """Mock class to simulate a DHIS2 API client for testing purposes."""
+
+    def __init__(self):
+        self.session = MockSession()
+        self.url = "https://mock-dhis2-instance.org/api"
+
+
+class MockDHIS2Client:
+    """Mock class to simulate a DHIS2 client for testing purposes."""
+
+    def __init__(self):
+        self.data_value_sets = MockDataValueSets()
+        self.analytics = MockAnalytics()
+        self.api = MockAPI()
+        self.session = MockSession()
diff --git a/d2d_development/tests/mock_dhis2_post.py b/d2d_development/tests/mock_dhis2_post.py
new file mode 100644
index 0000000..8e937e8
--- /dev/null
+++ b/d2d_development/tests/mock_dhis2_post.py
@@ -0,0 +1,484 @@
+import requests
+
+
+class MockDHIS2Response:
+    """Mock class to simulate a response from the DHIS2 API for testing purposes."""
+
+    def __init__(self, json_data, status_code=200):  # noqa: ANN001
+        self._json_data = json_data
+        self.status_code = status_code
+
+    def json(self) -> dict:  # noqa: D102
+        return self._json_data
+
+    def raise_for_status(self):  # noqa: D102
+        if not (200 <= self.status_code < 300):
+            raise requests.exceptions.HTTPError(f"HTTP {self.status_code}")
+
+
+MOCK_DHIS2_ERROR_503_RESPONSE = {
+    "httpStatus": "Service Unavailable",
+    "httpStatusCode": 503,
+    "status": "ERROR",
+    "message": "Service temporarily unavailable",
+}
+
+# Example OK response DHIS2 version: '2.40.9'
+MOCK_DHIS2_OK_RESPONSE = {
+    "httpStatus": "OK",
+    "httpStatusCode": 200,
+    "status": "OK",
+    "message": "Import was successful.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "SUCCESS",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {"imported": 1, "updated": 0, "ignored": 0, "deleted": 0},
+        "conflicts": [],
+        "rejectedIndexes": [],
+        "dataSetComplete": "false",
+    },
+}
+
+# Example 409 conflict response DHIS2 version: '2.40.9'
+MOCK_DHIS2_ERROR_409_RESPONSE_DE = {
+    "httpStatus": "Conflict",
+    "httpStatusCode": 409,
+    "status": "WARNING",
+    "message": "One more conflicts encountered, please check import summary.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "WARNING",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {
+            "imported": 1,
+            "updated": 0,
+            "ignored": 2,
+            "deleted": 0,
+        },
+        "conflicts": [
+            {
+                "object": "INVALID_1",
+                "objects": {"dataElement": "INVALID_1"},
+                "value": "Data element not found or not accessible: `INVALID_1`",
+                "errorCode": "E7610",
+                "property": "dataElement",
+                "indexes": [1],
+            },
+            {
+                "object": "INVALID_2",
+                "objects": {"dataElement": "INVALID_2"},
+                "value": "Data element not found or not accessible: `INVALID_2`",
+                "errorCode": "E7610",
+                "property": "dataElement",
+                "indexes": [2],
+            },
+        ],
+        "rejectedIndexes": [1, 2],
+        "dataSetComplete": "false",
+    },
+}
+
+# Example 409 conflict response DHIS2 version: '2.40.9'
+MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS = {
+    "httpStatus": "Conflict",
+    "httpStatusCode": 409,
+    "status": "WARNING",
+    "message": "One more conflicts encountered, please check import summary.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "WARNING",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {
+            "imported": 1,
+            "updated": 0,
+            "ignored": 2,
+            "deleted": 0,
+        },
+        "conflicts": [
+            {
+                "object": "INVALID_1_OU",
+                "objects": {"organisationUnit": "INVALID_1_OU"},
+                "value": "Organisation unit not found or not accessible: `INVALID_1_OU`",
+                "errorCode": "E7612",
+                "property": "orgUnit",
+                "indexes": [1],
+            },
+            {
+                "object": "INVALID_2_OU",
+                "objects": {"organisationUnit": "INVALID_2_OU"},
+                "value": "Organisation unit not found or not accessible: `INVALID_2_OU`",
+                "errorCode": "E7612",
+                "property": "orgUnit",
+                "indexes": [2],
+            },
+        ],
+        "rejectedIndexes": [1, 2],
+        "dataSetComplete": "false",
+    },
+}
+
+# Example 409 conflict response DHIS2 version: '2.40.9'
+MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD = {
+    "httpStatus": "Conflict",
+    "httpStatusCode": 409,
+    "status": "WARNING",
+    "message": "One more conflicts encountered, please check import summary.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "WARNING",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {
+            "imported": 1,
+            "updated": 0,
+            "ignored": 2,
+            "deleted": 0,
+        },
+        "conflicts": [
+            {
+                "object": "INVALID_PERIOD_1",
+                "objects": {"period": "INVALID_PERIOD_1"},
+                "value": "Period not valid: `INVALID_PERIOD_1`",
+                "errorCode": "E7611",
+                "property": "period",
+                "indexes": [1],
+            },
+            {
+                "object": "INVALID_PERIOD_2",
+                "objects": {"period": "INVALID_PERIOD_2"},
+                "value": "Period not valid: `INVALID_PERIOD_2`",
+                "errorCode": "E7611",
+                "property": "period",
+                "indexes": [2],
+            },
+        ],
+        "rejectedIndexes": [1, 2],
+        "dataSetComplete": "false",
+    },
+}
+
+# Example 409 conflict response DHIS2 version: '2.40.9'
+MOCK_DHIS2_ERROR_409_RESPONSE_COC = {
+    "httpStatus": "Conflict",
+    "httpStatusCode": 409,
+    "status": "WARNING",
+    "message": "One more conflicts encountered, please check import summary.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "WARNING",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {
+            "imported": 1,
+            "updated": 0,
+            "ignored": 2,
+            "deleted": 0,
+        },
+        "conflicts": [
+            {
+                "object": "INVALID_COC_1",
+                "objects": {"categoryOptionCombo": "INVALID_COC_1"},
+                "value": "Category option combo not found or not accessible for writing data: `INVALID_COC_1`",
+                "errorCode": "E7613",
+                "property": "categoryOptionCombo",
+                "indexes": [1],
+            },
+            {
+                "object": "INVALID_COC_2",
+                "objects": {"categoryOptionCombo": "INVALID_COC_2"},
+                "value": "Category option combo not found or not accessible for writing data: `INVALID_COC_2`",
+                "errorCode": "E7613",
+                "property": "categoryOptionCombo",
+                "indexes": [2],
+            },
+        ],
+        "rejectedIndexes": [1, 2],
+        "dataSetComplete": "false",
+    },
+}
+
+# Example 409 conflict response DHIS2 version: '2.40.9'
+MOCK_DHIS2_ERROR_409_RESPONSE_AOC = {
+    "httpStatus": "Conflict",
+    "httpStatusCode": 409,
+    "status": "WARNING",
+    "message": "One more conflicts encountered, please check import summary.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "WARNING",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {
+            "imported": 1,
+            "updated": 0,
+            "ignored": 2,
+            "deleted": 0,
+        },
+        "conflicts": [
+            {
+                "object": "INVALID_AOC_1",
+                "objects": {"categoryOptionCombo": "INVALID_AOC_1"},
+                "value": "Attribute option combo not found or not accessible for writing data: `INVALID_AOC_1`",
+                "errorCode": "E7615",
+                "property": "attributeOptionCombo",
+                "indexes": [1],
+            },
+            {
+                "object": "INVALID_AOC_2",
+                "objects": {"categoryOptionCombo": "INVALID_AOC_2"},
+                "value": "Attribute option combo not found or not accessible for writing data: `INVALID_AOC_2`",
+                "errorCode": "E7615",
+                "property": "attributeOptionCombo",
+                "indexes": [2],
+            },
+        ],
+        "rejectedIndexes": [1, 2],
+        "dataSetComplete": "false",
+    },
+}
+
+# Example 409 conflict response DHIS2 version: '2.40.9'
+MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT = {
+    "httpStatus": "Conflict",
+    "httpStatusCode": 409,
+    "status": "WARNING",
+    "message": "One more conflicts encountered, please check import summary.",
+    "response": {
+        "responseType": "ImportSummary",
+        "status": "WARNING",
+        "importOptions": {
+            "idSchemes": {},
+            "dryRun": True,
+            "preheatCache": True,
+            "async": False,
+            "importStrategy": "CREATE_AND_UPDATE",
+            "mergeMode": "REPLACE",
+            "reportMode": "FULL",
+            "skipExistingCheck": False,
+            "sharing": False,
+            "skipNotifications": False,
+            "skipAudit": True,
+            "datasetAllowsPeriods": False,
+            "strictPeriods": False,
+            "strictDataElements": False,
+            "strictCategoryOptionCombos": False,
+            "strictAttributeOptionCombos": False,
+            "strictOrganisationUnits": False,
+            "strictDataSetApproval": False,
+            "strictDataSetLocking": False,
+            "strictDataSetInputPeriods": False,
+            "requireCategoryOptionCombo": False,
+            "requireAttributeOptionCombo": False,
+            "skipPatternValidation": False,
+            "ignoreEmptyCollection": False,
+            "force": False,
+            "firstRowIsHeader": True,
+            "skipLastUpdated": False,
+            "mergeDataValues": False,
+            "skipCache": False,
+        },
+        "description": "Import process completed successfully",
+        "importCount": {
+            "imported": 2,
+            "updated": 0,
+            "ignored": 1,
+            "deleted": 0,
+        },
+        "conflicts": [
+            {
+                "object": "VALID2",
+                "objects": {"dataElement": "VALID2", "value": "value_not_numeric"},
+                "value": "Value must match value type of data element `VALID2`: `La valeur est non numérique`",
+                "errorCode": "E7619",
+                "property": "value",
+                "indexes": [1],
+            }
+        ],
+        "rejectedIndexes": [1],
+        "dataSetComplete": "false",
+    },
+}
diff --git a/d2d_development/tests/test_data_point.py b/d2d_development/tests/test_data_point.py
new file mode 100644
index 0000000..824ba02
--- /dev/null
+++ b/d2d_development/tests/test_data_point.py
@@ -0,0 +1,77 @@
+import polars as pl
+
+from d2d_development.data_models import DataPointModel
+from tests.mock_dhis2_get import MockDHIS2Client
+
+
+def test_data_point_model_to_str():
+    """Test conversion of a Polars DataFrame to JSON using the DataPointModel."""
+    single_point = DataPointModel(
+        dataElement="de1",
+        period="202601",
+        orgUnit="OU1",
+        categoryOptionCombo="coc1",
+        attributeOptionCombo="aoc1",
+        value="100.2",
+    )
+
+    assert "dataElement=de1" in str(single_point)
+    assert "period=202601" in str(single_point)
+    assert "orgUnit=OU1" in str(single_point)
+    assert "categoryOptionCombo=coc1" in str(single_point)
+    assert "attributeOptionCombo=aoc1" in str(single_point)
+    assert "value=100.2" in str(single_point)
+
+
+def test_data_point_model_to_json():
+    """Test conversion of a Polars DataFrame to JSON using the DataPointModel."""
+    data_elements = pl.DataFrame(MockDHIS2Client().data_value_sets.get())
+    single_point = DataPointModel(
+        dataElement=data_elements[0]["dataElement"].item(),
+        period=data_elements[0]["period"].item(),
+        orgUnit=data_elements[0]["orgUnit"].item(),
+        categoryOptionCombo=data_elements[0]["categoryOptionCombo"].item(),
+        attributeOptionCombo=data_elements[0]["attributeOptionCombo"].item(),
+        value=data_elements[0]["value"].item(),
+    )
+
+    payload = single_point.to_json()
+    assert payload["dataElement"] == data_elements[0]["dataElement"].item()
+    assert payload["period"] == data_elements[0]["period"].item()
+    assert payload["orgUnit"] == data_elements[0]["orgUnit"].item()
+    assert payload["categoryOptionCombo"] == data_elements[0]["categoryOptionCombo"].item()
+    assert payload["attributeOptionCombo"] == data_elements[0]["attributeOptionCombo"].item()
+    assert payload["value"] == data_elements[0]["value"].item()
+
+
+def test_data_point_model_to_json_delete():
+    """Test conversion of a Polars DataFrame to JSON using the DataPointModel."""
+    data_elements = pl.DataFrame(MockDHIS2Client().data_value_sets.get()).slice(2, 2)
+
+    # Set third datapoint to value None to simulate a deleted value
+    data_elements = data_elements.with_columns(
+        pl.when(pl.arange(0, data_elements.height) == 2).then(None).otherwise(pl.col("value")).alias("value")
+    )
+    points_list = [
+        DataPointModel(
+            dataElement=row["dataElement"],
+            period=row["period"],
+            orgUnit=row["orgUnit"],
+            categoryOptionCombo=row["categoryOptionCombo"],
+            attributeOptionCombo=row["attributeOptionCombo"],
+            value=row["value"],
+        ).to_json()
+        for row in data_elements.to_dicts()
+    ]
+
+    assert len(points_list) == 2
+    assert points_list[0]["dataElement"] == "CCC333"
+    assert points_list[0]["period"] == "202501"
+    assert points_list[0]["orgUnit"] == "ORG003"
+    assert points_list[0]["categoryOptionCombo"] == "CAT003"
+    assert points_list[0]["attributeOptionCombo"] == "ATTR003"
+    assert points_list[0]["value"] == "25"
+    assert points_list[0].get("comment") is None
+    assert points_list[1]["dataElement"] == "DELETE1"
+    assert not points_list[1]["value"]
+    assert points_list[1]["comment"] == "deleted value"
diff --git a/d2d_development/tests/test_extract.py b/d2d_development/tests/test_extract.py
new file mode 100644
index 0000000..8fd1175
--- /dev/null
+++ b/d2d_development/tests/test_extract.py
@@ -0,0 +1,259 @@
+import time
+from unittest.mock import patch
+
+import polars as pl
+
+from d2d_development.extract import DHIS2Extractor
+from tests.mock_dhis2_get import MockDHIS2Client
+
+
+def test_extract_map_data_elements():
+    """Test the mapping of data elements."""
+    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).data_elements._retrieve_data(
+        data_elements=[], org_units=[], period="202501"
+    )
+    assert isinstance(result, pl.DataFrame)
+    assert result.shape == (9, 9)
+    assert result.columns == [
+        "dataType",
+        "dx",
+        "period",
+        "orgUnit",
+        "categoryOptionCombo",
+        "attributeOptionCombo",
+        "rateMetric",
+        "domainType",
+        "value",
+    ]
+    assert set(result["dataType"]) == {"DATA_ELEMENT"}
+    assert set(result["dx"].drop_nulls()) == {
+        "AAA111",
+        "BBB222",
+        "CCC333",
+        "DELETE1",
+        "INVALID1",
+        "INVALID2",
+        "INVALID3",
+        "INVALID4",
+    }
+    assert set(result["period"].drop_nulls()) == {"202501"}
+    assert set(result["orgUnit"].drop_nulls()) == {
+        "ORG001",
+        "ORG003",
+        "ORG005",
+        "ORG006",
+        "ORG002",
+        "ORG004",
+    }
+    assert set(result["categoryOptionCombo"].drop_nulls()) == {
+        "CAT006",
+        "CAT005",
+        "CAT003",
+        "CAT002",
+        "CAT001",
+        "CAT004",
+    }
+    assert set(result["attributeOptionCombo"].drop_nulls()) == {
+        "ATTR001",
+        "ATTR002",
+        "ATTR003",
+        "ATTR004",
+        "ATTR005",
+        "ATTR006",
+    }
+    assert set(result["rateMetric"]) == {None}
+    assert set(result["domainType"]) == {"AGGREGATED"}
+    assert set(result["value"].drop_nulls()) == {"12", "18", "25", "55.0"}
+
+
+def test_extract_map_reporting_rates():
+    """Test the mapping of reporting rates."""
+    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).reporting_rates._retrieve_data(
+        reporting_rates=["AAA111.REPORTING_RATE", "BBB222.EXPECTED_REPORTS", "CCC333.REPORTING_RATE"],
+        org_units=[],
+        period="202409",
+    )
+    assert isinstance(result, pl.DataFrame)
+    assert result.shape == (3, 9)
+    assert result.columns == [
+        "dataType",
+        "dx",
+        "period",
+        "orgUnit",
+        "categoryOptionCombo",
+        "attributeOptionCombo",
+        "rateMetric",
+        "domainType",
+        "value",
+    ]
+    assert result["dataType"].unique().to_list() == ["REPORTING_RATE"]
+    assert result["dx"].to_list() == ["AAA111", "BBB222", "CCC333"]
+    assert result["period"].to_list() == ["202409", "202409", "202409"]
+    assert result["orgUnit"].to_list() == ["OU001", "OU002", "OU003"]
+    assert result["categoryOptionCombo"].to_list() == [None, None, None]
+    assert result["attributeOptionCombo"].to_list() == [None, None, None]
+    assert result["rateMetric"].to_list() == ["REPORTING_RATE", "EXPECTED_REPORTS", "REPORTING_RATE"]
+    assert result["domainType"].to_list() == ["AGGREGATED", "AGGREGATED", "AGGREGATED"]
+    assert result["value"].to_list() == ["100", "0", "100"]
+
+
+def test_extract_map_indicator():
+    """Test the mapping of indicators."""
+    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).indicators._retrieve_data(
+        indicators=["INDICATOR1", "INDICATOR2", "INDICATOR3"], org_units=[], period="202501"
+    )
+    assert isinstance(result, pl.DataFrame)
+    assert result.shape == (3, 9)
+    assert result.columns == [
+        "dataType",
+        "dx",
+        "period",
+        "orgUnit",
+        "categoryOptionCombo",
+        "attributeOptionCombo",
+        "rateMetric",
+        "domainType",
+        "value",
+    ]
+    assert result["dataType"].unique().to_list() == ["INDICATOR"]
+    assert result["dx"].to_list() == ["INDICATOR1", "INDICATOR2", "INDICATOR3"]
+    assert result["period"].to_list() == ["202501", "202501", "202501"]
+    assert result["orgUnit"].to_list() == ["ORG001", "ORG002", "ORG003"]
+    assert result["categoryOptionCombo"].to_list() == [None, None, None]
+    assert result["attributeOptionCombo"].to_list() == [None, None, None]
+    assert result["rateMetric"].to_list() == [None, None, None]
+    assert result["domainType"].to_list() == ["AGGREGATED", "AGGREGATED", "AGGREGATED"]
+    assert result["value"].to_list() == ["5.0", "7.0", "9.0"]
+
+
+def test_extract_download_replace_no_file(tmp_path):  # noqa: ANN001
+    """Test DOWNLOAD_REPLACE mode, downloads and saves data to a Parquet file."""
+    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE")
+    filename = "test_extract_202501.parquet"
+
+    # Call download_period
+    result_path = extractor.data_elements.download_period(
+        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+    )
+
+    # Assert file is created
+    assert result_path.exists()
+    assert result_path.name == filename
+
+
+def test_download_replace_replaces_file_and_logs(tmp_path):  # noqa: ANN001
+    """Test DOWNLOAD_REPLACE mode, replaces the file if it already exists and logs the replacement."""
+    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE")
+    output_dir = tmp_path
+    period = "202501"
+    filename = "test_extract.parquet"
+
+    # First call creates the file
+    file_path = extractor.data_elements.download_period(
+        data_elements=[], org_units=[], period=period, output_dir=output_dir, filename=filename
+    )
+    assert file_path.exists()
+    mtime_before = file_path.stat().st_mtime
+
+    time.sleep(1)  # Ensure the filesystem timestamp will change
+
+    # Patch current_run.log_info to capture log messages
+    with patch.object(extractor.logger, "info") as mock_log:
+        # Second call should replace the file and log the replacement
+        extractor.data_elements.download_period(
+            data_elements=[], org_units=[], period=period, output_dir=output_dir, filename=filename
+        )
+        mtime_after = file_path.stat().st_mtime
+        # Check that the log message about replacing the extract was called
+        found = any("Replacing extract for period 202501" in str(call.args[0]) for call in mock_log.call_args_list)
+        assert found, "Expected log message about replacing extract not found"
+        # Check that the file was actually replaced (mtime changed)
+        assert mtime_after > mtime_before, "File was not actually replaced"
+
+
+def test_extract_download_new_file_exists(tmp_path):  # noqa: ANN001
+    """Test DOWNLOAD_NEW mode, creates a new file if it does not exist, and skips if it does."""
+    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_NEW", return_existing_file=True)
+    filename = "test_extract_202501.parquet"
+
+    # First call: file is created
+    result_new_path = extractor.data_elements.download_period(
+        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+    )
+    assert result_new_path.exists()
+    assert result_new_path.name == filename
+
+    # Second call: should skip and log the skip message
+    with patch.object(extractor.logger, "info") as mock_log:
+        result_path = extractor.data_elements.download_period(
+            data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        )
+        assert result_path == result_new_path
+        found = any(
+            "Extract for period 202501 already exists, download skipped." in str(call.args[0])
+            for call in mock_log.call_args_list
+        )
+        assert found, "Expected log message about skipping extract not found"
+
+
+def test_extract_download_new_return_existing_file(tmp_path):  # noqa: ANN001
+    """Test DOWNLOAD_NEW mode with return_existing_file True and False."""
+    filename = "test_extract_202501.parquet"
+
+    # True: should return the file path if it exists
+    extractor_true = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_NEW", return_existing_file=True
+    )
+    # Create the file
+    path_true = extractor_true.data_elements.download_period(
+        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+    )
+    # Second call: should return the same file path
+    result_true = extractor_true.data_elements.download_period(
+        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+    )
+    assert result_true == path_true
+
+    # False: should return None if the file exists
+    extractor_false = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_NEW", return_existing_file=False
+    )
+    # Create the file
+    _ = extractor_false.data_elements.download_period(
+        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+    )
+    # Second call: should return None
+    result_false = extractor_false.data_elements.download_period(
+        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+    )
+    assert result_false is None
+
+
+def test_extract_get_data_elements_with_indicator_extractor():
+    """Test that we can retrieve data elements using the indicators extractor.
+
+    Passing valid data element ids to the indicators parameter and including
+    the `include_cocs=True` flag should allow us to retrieve data elements with the indicators endpoint.
+    """
+    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).indicators._retrieve_data(
+        indicators=["DATAELEMENT1", "DATAELEMENT2", "DATAELEMENT3"],
+        org_units=[],
+        period="202501",
+        include_cocs=True,  # Include category option combo in the response
+    )
+
+    assert result.shape == (3, 9)
+    assert result.columns == [
+        "dataType",
+        "dx",
+        "period",
+        "orgUnit",
+        "categoryOptionCombo",
+        "attributeOptionCombo",
+        "rateMetric",
+        "domainType",
+        "value",
+    ]
+    assert result["dataType"].unique().to_list() == ["INDICATOR"]
+    assert result["dx"].to_list() == ["DATAELEMENT1", "DATAELEMENT2", "DATAELEMENT3"]
+    assert result["categoryOptionCombo"].to_list() == ["COC001", "COC002", "COC003"]
diff --git a/d2d_development/tests/test_push.py b/d2d_development/tests/test_push.py
new file mode 100644
index 0000000..06e7e7d
--- /dev/null
+++ b/d2d_development/tests/test_push.py
@@ -0,0 +1,463 @@
+from unittest.mock import patch
+
+import polars as pl
+import pytest
+
+from d2d_development.extract import DHIS2Extractor
+from d2d_development.push import DHIS2Pusher, PusherError
+from tests.mock_dhis2_get import MockDHIS2Client
+from tests.mock_dhis2_post import (
+    MOCK_DHIS2_ERROR_409_RESPONSE_AOC,
+    MOCK_DHIS2_ERROR_409_RESPONSE_COC,
+    MOCK_DHIS2_ERROR_409_RESPONSE_DE,
+    MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS,
+    MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD,
+    MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT,
+    MOCK_DHIS2_ERROR_503_RESPONSE,
+    MOCK_DHIS2_OK_RESPONSE,
+    MockDHIS2Response,
+)
+
+
+def test_push_no_data_to_push():
+    """Test the push of data points to DHIS2."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    cols = ["dx", "period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
+    empty_df = pl.DataFrame({col: [] for col in cols})
+    with patch.object(DHIS2Pusher, "_log_message") as mock_log_message:
+        pusher.push_data(empty_df)
+        mock_log_message.assert_any_call("Input DataFrame is empty. No data to push.")
+    assert pusher.summary["import_counts"]["imported"] == 0
+
+
+def test_push_missing_mandatory_columns():
+    """Test the push of data points to DHIS2."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    cols = ["period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
+    empty_df = pl.DataFrame({col: [] for col in cols})
+    with pytest.raises(PusherError, match=r"Input data is missing mandatory columns: dx"):
+        pusher.push_data(df_data=empty_df)
+
+
+def test_push_wrong_input_type():
+    """Test the push of data points to DHIS2."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
+        pusher.push_data(df_data=[])
+    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
+        pusher.push_data(df_data="not a dataframe")
+    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
+        pusher.push_data(df_data={})
+
+
+def test_push_serialize_data_point_valid():
+    """Test the serialization of a DataPointModel to JSON format for DHIS2."""
+    data_point = (
+        DHIS2Extractor(dhis2_client=MockDHIS2Client())
+        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
+        .slice(0, 1)
+    )
+
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    json_payload = pusher._serialize_data_points(data_point)
+
+    assert json_payload[0]["dataElement"] == "AAA111"
+    assert json_payload[0]["period"] == "202501"
+    assert json_payload[0]["orgUnit"] == "ORG001"
+    assert json_payload[0]["categoryOptionCombo"] == "CAT001"
+    assert json_payload[0]["attributeOptionCombo"] == "ATTR001"
+    assert json_payload[0]["value"] == "12"
+
+
+def test_push_serialize_data_point_to_delete():
+    """Test the serialization of a DataPointModel to delete JSON format for DHIS2."""
+    data_point = (
+        DHIS2Extractor(dhis2_client=MockDHIS2Client())
+        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
+        .slice(3, 1)
+    )
+
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    json_payload = pusher._serialize_data_points(data_point)
+
+    assert json_payload[0]["dataElement"] == "DELETE1"
+    assert json_payload[0]["period"] == "202501"
+    assert json_payload[0]["orgUnit"] == "ORG004"
+    assert json_payload[0]["categoryOptionCombo"] == "CAT004"
+    assert json_payload[0]["attributeOptionCombo"] == "ATTR004"
+    assert not json_payload[0]["value"]
+    assert json_payload[0]["comment"] == "deleted value"
+
+
+def test_push_classify_points():
+    """Test the mapping of data elements."""
+    data_points = DHIS2Extractor(dhis2_client=MockDHIS2Client()).data_elements._retrieve_data(
+        data_elements=["AAA111", "BBB222", "CCC333"], org_units=[], period="202501"
+    )
+    assert isinstance(data_points, pl.DataFrame)
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    valid, to_delete, not_valid = pusher._classify_data_points(data_points)
+
+    # Verify no overlaps and all rows accounted for
+    assert len(valid) + len(to_delete) + len(not_valid) == len(data_points), (
+        "Row count mismatch! Check for overlaps or missing rows."
+    )
+    assert len(valid) == 3, "Expected 3 valid data points."
+    assert len(to_delete) == 1, "Expected 1 data point marked for deletion"
+    assert len(not_valid) == 5, "Expected 4 invalid data points."
+
+
+def test_push_log_invalid_data_points():
+    """Test the logging of invalid data points."""
+    data_points = (
+        DHIS2Extractor(dhis2_client=MockDHIS2Client())
+        .data_elements._retrieve_data(data_elements=[], org_units=[], period="202501")
+        .slice(4, 4)  # Select invalid data points (rows 4 to 7) for testing
+    )
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    _, _, not_valid = pusher._classify_data_points(data_points)
+
+    with patch.object(pusher, "_log_message") as mock_log_message:
+        pusher._log_ignored_or_na(not_valid)
+        assert mock_log_message.call_count == 5, "Expected a log message for each invalid data point."
+        for idx, call in enumerate(mock_log_message.call_args_list):
+            if idx == 0:
+                log_message = call.args[0]
+                assert "4 data points will be  ignored" in log_message, f"Unexpected log message: {log_message}"
+            else:
+                log_message = call.args[0]
+                assert f"Data point ignored: dx=INVALID{idx}" in log_message, f"Unexpected log message: {log_message}"
+
+
+def test_push_data_point():
+    """Test the push of data points to DHIS2."""
+    # 1 valid datapoint
+    data_points = (
+        DHIS2Extractor(dhis2_client=MockDHIS2Client())
+        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
+        .slice(0, 1)
+    )
+
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    # MOCK_DHIS2_OK_RESPONSE was manually manufactured to simulate a successful import response from DHIS2 for tests
+    with patch.object(pusher.dhis2_client.api.session, "post", return_value=MockDHIS2Response(MOCK_DHIS2_OK_RESPONSE)):
+        pusher.push_data(data_points)
+        assert pusher.summary["import_counts"]["imported"] == 1
+        assert pusher.summary["import_counts"]["ignored"] == 0
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert pusher.summary["import_options"] == {
+            "importStrategy": "CREATE_AND_UPDATE",
+            "dryRun": True,
+            "preheatCache": True,
+            "skipAudit": True,
+        }
+
+
+def test_push_data_points_connection_error():
+    """Test the error handling of error 503 to DHIS2."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_503_RESPONSE, status_code=503),
+    ):
+        with pytest.raises(PusherError, match=r"Server error: Service temporarily unavailable"):
+            pusher._push_data_points([{"dummy_datapoint": "1"}])
+        # After the exception, check the summary
+        assert len(pusher.summary["ERRORS"]) == 1
+        assert pusher.summary["ERRORS"][0]["message"] == "Server error: Service temporarily unavailable"
+        assert pusher.summary["ERRORS"][0]["server_error_code"] == "503"
+
+
+def test_push_data_points_data_element_error():
+    """Test the error handling push of data points with wrong data elements."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_data_points = [
+        {
+            "dataElement": "VALID",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "INVALID_1",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "INVALID_2",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_DE was manually manufactured to simulate a 409 Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_DE, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["import_counts"]["imported"] == 1
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["ignored"] == 2
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert len(pusher.summary["ERRORS"]) == 2
+        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_1"
+        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_2"
+
+
+def test_push_data_points_org_unit_error():
+    """Test the error handling push of data points with wrong org units."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_data_points = [
+        {
+            "dataElement": "VALID",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID",
+            "period": "202501",
+            "orgUnit": "INVALID_1",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID",
+            "period": "202501",
+            "orgUnit": "INVALID_2",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS was manually manufactured to simulate a Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["import_counts"]["imported"] == 1
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["ignored"] == 2
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert len(pusher.summary["ERRORS"]) == 2
+        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_1_OU"
+        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_2_OU"
+
+
+def test_push_data_points_period_error():
+    """Test the error handling push of data points with wrong periods."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_data_points = [
+        {
+            "dataElement": "VALID",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID",
+            "period": "INVALID_PERIOD_1",
+            "orgUnit": "VALID_OU",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID",
+            "period": "INVALID_PERIOD_2",
+            "orgUnit": "VALID_OU",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD was manually manufactured to simulate a Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["import_counts"]["imported"] == 1
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["ignored"] == 2
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert len(pusher.summary["ERRORS"]) == 2
+        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_PERIOD_1"
+        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_PERIOD_2"
+
+
+def test_push_data_points_coc_error():
+    """Test the error handling push of data points with wrong COC."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_data_points = [
+        {
+            "dataElement": "VALID1",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID2",
+            "period": "202501",
+            "orgUnit": "ORG002",
+            "categoryOptionCombo": "INVALID_COC_1",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID3",
+            "period": "202501",
+            "orgUnit": "ORG003",
+            "categoryOptionCombo": "INVALID_COC_2",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_COC was manually manufactured to simulate a Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_COC, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["import_counts"]["imported"] == 1
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["ignored"] == 2
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert len(pusher.summary["ERRORS"]) == 2
+        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_COC_1"
+        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_COC_2"
+
+
+def test_push_data_points_aoc_error():
+    """Test the error handling push of data points with wrong AOC."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_data_points = [
+        {
+            "dataElement": "VALID1",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID2",
+            "period": "202501",
+            "orgUnit": "ORG002",
+            "categoryOptionCombo": "INVALID_AOC_1",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID3",
+            "period": "202501",
+            "orgUnit": "ORG003",
+            "categoryOptionCombo": "INVALID_AOC_2",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_AOC was manually manufactured to simulate a Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_AOC, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["import_counts"]["imported"] == 1
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["ignored"] == 2
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert len(pusher.summary["ERRORS"]) == 2
+        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_AOC_1"
+        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_AOC_2"
+
+
+def test_push_data_points_value_format_error():
+    """Test the error handling push of data points with value not numeric."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_data_points = [
+        {
+            "dataElement": "VALID1",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        {
+            "dataElement": "VALID2",
+            "period": "202501",
+            "orgUnit": "ORG002",
+            "categoryOptionCombo": "CAT002",
+            "attributeOptionCombo": "ATTR002",
+            "value": "0.0000e15",  # Non numeric format for DHIS2 API
+        },
+        {
+            "dataElement": "VALID3",
+            "period": "202501",
+            "orgUnit": "ORG003",
+            "categoryOptionCombo": "CAT003",
+            "attributeOptionCombo": "ATTR003",
+            "value": "1",
+        },
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT was manually manufactured to simulate a Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["import_counts"]["imported"] == 2
+        assert pusher.summary["import_counts"]["updated"] == 0
+        assert pusher.summary["import_counts"]["ignored"] == 1
+        assert pusher.summary["import_counts"]["deleted"] == 0
+        assert len(pusher.summary["ERRORS"]) == 1
+        assert pusher.summary["ERRORS"][0]["object"] == "VALID2"
diff --git a/d2d_development/tests/test_utils.py b/d2d_development/tests/test_utils.py
new file mode 100644
index 0000000..37356c3
--- /dev/null
+++ b/d2d_development/tests/test_utils.py
@@ -0,0 +1,113 @@
+import logging
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pandas as pd
+import polars as pl
+import pytest
+
+from d2d_development.exceptions import ExtractorError
+from d2d_development.utils import log_message, save_to_parquet
+
+
+class CustomError(Exception):
+    """Custom exception for testing invalid logging levels."""
+
+    pass
+
+
+def test_log_message_info():
+    """Test that log_message logs info messages correctly."""
+    logger = Mock(spec=logging.Logger)
+    with patch("d2d_development.utils.current_run") as mock_run:
+        log_message(logger, "msg", level="info")
+        logger.info.assert_called_once_with("msg")
+        mock_run.log_info.assert_called_once_with("msg")
+
+
+def test_log_message_warning():
+    """Test that log_message logs warning messages correctly."""
+    logger = Mock(spec=logging.Logger)
+    with patch("d2d_development.utils.current_run") as mock_run:
+        log_message(logger, "warn", level="warning")
+        logger.warning.assert_called_once_with("warn")
+        mock_run.log_warning.assert_called_once_with("warn")
+
+
+def test_log_message_error():
+    """Test that log_message logs error messages correctly, including error details."""
+    logger = Mock(spec=logging.Logger)
+    with patch("d2d_development.utils.current_run") as mock_run:
+        log_message(logger, "err", error_details="details", level="error")
+        logger.error.assert_called_once_with("err Details: details")
+        mock_run.log_error.assert_called_once_with("err")
+
+
+def test_log_message_no_current_run():
+    """Test that log_message works even if current_run is not available."""
+    logger = Mock(spec=logging.Logger)
+    with patch("d2d_development.utils.current_run", None):
+        log_message(logger, "msg", level="info")
+        logger.info.assert_called_once_with("msg")
+
+
+def test_log_message_log_current_run_false():
+    """Test that log_message does not log to current_run when log_current_run is False."""
+    logger = Mock(spec=logging.Logger)
+    with patch("d2d_development.utils.current_run") as mock_run:
+        log_message(logger, "msg", level="info", log_current_run=False)
+        logger.info.assert_called_once_with("msg")
+        mock_run.log_info.assert_not_called()
+
+
+def test_log_message_invalid_level():
+    """Test that log_message raises the specified exception for invalid logging levels."""
+    logger = Mock(spec=logging.Logger)
+    with pytest.raises(CustomError):
+        log_message(logger, "bad", level="bad", exception_class=CustomError)
+
+
+def test_save_polars_dataframe(tmp_path: Path):
+    """Test saving a Polars DataFrame to Parquet."""
+    df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+    file = tmp_path / "test.parquet"
+    save_to_parquet(df, file)
+    assert file.exists()
+
+
+def test_save_pandas_dataframe(tmp_path: Path):
+    """Test saving a Pandas DataFrame to Parquet."""
+    df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    file = tmp_path / "test_pd.parquet"
+    save_to_parquet(df, file)
+    assert file.exists()
+
+
+def test_invalid_type_raises(tmp_path: Path):
+    """Test that passing an invalid type raises an ExtractorError."""
+    file = tmp_path / "fail.parquet"
+    with pytest.raises(ExtractorError):
+        save_to_parquet([1, 2, 3], file)
+
+
+def test_overwrite_file(tmp_path: Path):
+    """Test that saving to an existing file overwrites it."""
+    df1 = pd.DataFrame({"a": [1]})
+    df2 = pd.DataFrame({"a": [2]})
+    file = tmp_path / "overwrite.parquet"
+    save_to_parquet(df1, file)
+    save_to_parquet(df2, file)
+    result = pd.read_parquet(file)
+    assert result["a"].iloc[0] == 2
+
+
+def test_write_exception_cleanup(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+    """Test that if writing to Parquet raises an exception, no temp files are left behind."""
+    df = pd.DataFrame({"a": [1]})
+    file = tmp_path / "fail.parquet"
+    # Patch to_parquet to raise
+    monkeypatch.setattr(df, "to_parquet", lambda *a, **k: (_ for _ in ()).throw(Exception("fail")))
+    with pytest.raises(ExtractorError):
+        save_to_parquet(df, file)
+    # Check no temp files left
+    assert not any(tmp_path.glob("*.parquet*"))

From ff867092814c3caa57aafc76660f2b61173a3c74 Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Thu, 26 Mar 2026 18:00:50 +0100
Subject: [PATCH 2/8] fix(): pyproject to root + readme + ruff format

---
 d2d_development/README.md                |  96 +++++++++++++++++++
 d2d_development/d2d_development/push.py  |   9 +-
 d2d_development/pyproject.toml           |   1 +
 d2d_development/tests/mock_dhis2_get.py  |  13 ++-
 d2d_development/tests/test_data_point.py |  14 ++-
 d2d_development/tests/test_extract.py    | 110 ++++++++++++++++-----
 d2d_development/tests/test_push.py       | 116 +++++++++++++++++------
 d2d_development/tests/test_utils.py      |   4 +-
 pyproject.toml                           |  70 ++++++++++++++
 9 files changed, 370 insertions(+), 63 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/d2d_development/README.md b/d2d_development/README.md
index 3d0277a..962394b 100644
--- a/d2d_development/README.md
+++ b/d2d_development/README.md
@@ -9,3 +9,99 @@ Install this library on its own:
 ```bash
 pip install git+https://github.com/BLSQ/openhexa-ds-developments.git#subdirectory=d2d_development
 ```
+
+## Main Classes
+
+### DHIS2Extractor
+
+**Description:**  
+Main class to extract data from DHIS2. It provides unified handlers for extracting data elements, indicators, and reporting rates, saving them to disk in a standardized format.
+
+
+**Configuration Parameters:**
+When initializing `DHIS2Extractor`, you can configure the following parameters:
+
+- `dhis2_client` (required): The DHIS2 client instance.
+- `download_mode`: Controls how files are saved when extracting data. Use `"DOWNLOAD_REPLACE"` (default) to always overwrite files, or `"DOWNLOAD_NEW"` to skip downloading if the file already exists.
+- `return_existing_file`: If `True` and using `DOWNLOAD_NEW`, returns the path to the existing file instead of `None` when a file already exists (default: `False`).
+- `logger`: Optional custom logger instance.
+
+Example:
+```python
+extractor = DHIS2Extractor(dhis2_client, download_mode="DOWNLOAD_NEW", return_existing_file=True)
+```
+
+**Usage Example:**
+```python
+from d2d_development.extract import DHIS2Extractor
+from openhexa.sdk import workspace
+from openhexa.toolbox.dhis2 import DHIS2
+from pathlib import Path
+
+dhis2_client = DHIS2(workspace.get_connection("dhis2-connection"))
+extractor = DHIS2Extractor(dhis2_client, download_mode="DOWNLOAD_REPLACE")
+
+# Extract several periods of data elements
+for period in ["202401", "202402", "202403"]:
+    extractor.data_elements.download_period(
+        data_elements=["de1", "de2"],
+        org_units=["ou1", "ou2"],
+        period=period,
+        output_dir=Path("/output")
+    )
+# Extract one period of indicators
+extractor.indicators.download_period(
+	indicators=["ind1"],
+	org_units=["ou1"],
+	period="202401",
+	output_dir=Path("/tmp")
+)
+# Extract one period of reporting rates
+extractor.reporting_rates.download_period(
+	reporting_rates=["rr1"],
+	org_units=["ou1"],
+	period="202401",
+	output_dir=Path("/tmp")
+)
+```
+
+### DHIS2Pusher
+
+**Description:**  
+Main class to handle pushing data to DHIS2. It validates and pushes formatted data (pandas or polars DataFrame) to a DHIS2 instance.
+
+**Configuration Parameters:**
+When initializing `DHIS2Pusher`, you can configure the following parameters:
+
+- `dhis2_client` (required): The DHIS2 client instance.
+- `import_strategy`: Strategy flag passed to the DHIS2 API for data import. Accepts "CREATE", "UPDATE", or "CREATE_AND_UPDATE" (default: "CREATE_AND_UPDATE"). This only controls how the DHIS2 server processes the data; it does not affect client-side logic.
+- `dry_run`: If `True`, simulates the push without making changes on the server (default: `True`).
+- `max_post`: Maximum number of data points per POST request (default: `500`).
+- `logging_interval`: Log progress every N data points (default: `50000`).
+- `logger`: Optional custom logger instance.
+
+**Usage Example:**
+```python
+from d2d_development.push import DHIS2Pusher
+from openhexa.sdk import workspace
+from openhexa.toolbox.dhis2 import DHIS2
+import polars as pl
+
+dhis2_client = DHIS2(workspace.get_connection("dhis2-connection"))
+pusher = DHIS2Pusher(
+	dhis2_client,
+	import_strategy="CREATE_AND_UPDATE",  # or "CREATE", "UPDATE"
+	dry_run=False,
+	max_post=1000,
+	logging_interval=10000,
+)
+
+df = pl.DataFrame({
+    "dx": ["de1"], 
+    "period": ["202401"], 
+    "orgUnit": ["ou1"], 
+    "categoryOptionCombo": ["coc"], 
+    "attributeOptionCombo": ["aoc"], 
+    "value": [123]})
+pusher.push_data(df)
+```
diff --git a/d2d_development/d2d_development/push.py b/d2d_development/d2d_development/push.py
index 766d415..c6286b2 100644
--- a/d2d_development/d2d_development/push.py
+++ b/d2d_development/d2d_development/push.py
@@ -21,7 +21,6 @@ def __init__(
         dry_run: bool = True,
         max_post: int = 500,
         logging_interval: int = 50000,
-        mandatory_fields: list[str] | None = None,
         logger: logging.Logger | None = None,
     ):
         self.dhis2_client = dhis2_client
@@ -29,11 +28,7 @@ def __init__(
         if import_strategy not in {"CREATE", "UPDATE", "CREATE_AND_UPDATE"}:
             raise PusherError("Invalid import strategy (use 'CREATE', 'UPDATE' or 'CREATE_AND_UPDATE')")
 
-        if mandatory_fields is None:
-            self.mandatory_fields = ["dx", "period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
-        else:
-            self.mandatory_fields = mandatory_fields
-
+        self.mandatory_fields = ["dx", "period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
         self.import_strategy = import_strategy
         self.dry_run = dry_run
         self.max_post = max_post
@@ -185,7 +180,7 @@ def _log_summary_errors(self):
         else:
             self._log_message(f"Logging {len(errors)} error(s) from import summary.", level="error")
             for i_e, error in enumerate(errors, start=1):
-                self._log_message(f"Error response {i_e}: {error}", level="error")
+                self._log_message(f"Error response {i_e}: {error}", log_current_run=False, level="error")
 
     def _post(self, chunk: list[dict]) -> requests.Response:
         """Send a POST request to DHIS2 for a chunk of data values.
diff --git a/d2d_development/pyproject.toml b/d2d_development/pyproject.toml
index eb5bafa..99e6eb8 100644
--- a/d2d_development/pyproject.toml
+++ b/d2d_development/pyproject.toml
@@ -15,6 +15,7 @@ dependencies = [
     "pandas>=2.2.0",
     "polars>=1.0.0",
     "packaging>=23.0",
+    "pyarrow>=10.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/d2d_development/tests/mock_dhis2_get.py b/d2d_development/tests/mock_dhis2_get.py
index 0c18c7f..48df8ec 100644
--- a/d2d_development/tests/mock_dhis2_get.py
+++ b/d2d_development/tests/mock_dhis2_get.py
@@ -1,7 +1,9 @@
 class MockDataValueSets:
     """Mock class to simulate DHIS2 DataValueSets API responses for testing purposes."""
 
-    def get(self, data_elements=None, periods=None, org_units=None, last_updated=None) -> list[dict]:  # noqa: ANN001
+    def get(
+        self, data_elements=None, periods=None, org_units=None, last_updated=None
+    ) -> list[dict]:  # noqa: ANN001
         """Simulate the retrieval of data values from DHIS2 based on the provided parameters.
 
         Returns
@@ -135,7 +137,14 @@ def get(self, data_elements=None, periods=None, org_units=None, last_updated=Non
 class MockAnalytics:
     """Mock class to simulate DHIS2 Analytics API responses for testing purposes."""
 
-    def get(self, indicators=None, data_elements=None, periods=None, org_units=None, include_cocs=False) -> list[dict]:  # noqa: ANN001
+    def get(
+        self,
+        indicators=None,
+        data_elements=None,
+        periods=None,
+        org_units=None,
+        include_cocs=False,
+    ) -> list[dict]:  # noqa: ANN001
         """Simulate the retrieval of analytics data from DHIS2 based on the provided parameters.
 
         Returns
diff --git a/d2d_development/tests/test_data_point.py b/d2d_development/tests/test_data_point.py
index 824ba02..fe78919 100644
--- a/d2d_development/tests/test_data_point.py
+++ b/d2d_development/tests/test_data_point.py
@@ -39,8 +39,13 @@ def test_data_point_model_to_json():
     assert payload["dataElement"] == data_elements[0]["dataElement"].item()
     assert payload["period"] == data_elements[0]["period"].item()
     assert payload["orgUnit"] == data_elements[0]["orgUnit"].item()
-    assert payload["categoryOptionCombo"] == data_elements[0]["categoryOptionCombo"].item()
-    assert payload["attributeOptionCombo"] == data_elements[0]["attributeOptionCombo"].item()
+    assert (
+        payload["categoryOptionCombo"] == data_elements[0]["categoryOptionCombo"].item()
+    )
+    assert (
+        payload["attributeOptionCombo"]
+        == data_elements[0]["attributeOptionCombo"].item()
+    )
     assert payload["value"] == data_elements[0]["value"].item()
 
 
@@ -50,7 +55,10 @@ def test_data_point_model_to_json_delete():
 
     # Set third datapoint to value None to simulate a deleted value
     data_elements = data_elements.with_columns(
-        pl.when(pl.arange(0, data_elements.height) == 2).then(None).otherwise(pl.col("value")).alias("value")
+        pl.when(pl.arange(0, data_elements.height) == 2)
+        .then(None)
+        .otherwise(pl.col("value"))
+        .alias("value")
     )
     points_list = [
         DataPointModel(
diff --git a/d2d_development/tests/test_extract.py b/d2d_development/tests/test_extract.py
index 8fd1175..62b7d70 100644
--- a/d2d_development/tests/test_extract.py
+++ b/d2d_development/tests/test_extract.py
@@ -9,9 +9,9 @@
 
 def test_extract_map_data_elements():
     """Test the mapping of data elements."""
-    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).data_elements._retrieve_data(
-        data_elements=[], org_units=[], period="202501"
-    )
+    result = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client()
+    ).data_elements._retrieve_data(data_elements=[], org_units=[], period="202501")
     assert isinstance(result, pl.DataFrame)
     assert result.shape == (9, 9)
     assert result.columns == [
@@ -68,8 +68,14 @@ def test_extract_map_data_elements():
 
 def test_extract_map_reporting_rates():
     """Test the mapping of reporting rates."""
-    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).reporting_rates._retrieve_data(
-        reporting_rates=["AAA111.REPORTING_RATE", "BBB222.EXPECTED_REPORTS", "CCC333.REPORTING_RATE"],
+    result = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client()
+    ).reporting_rates._retrieve_data(
+        reporting_rates=[
+            "AAA111.REPORTING_RATE",
+            "BBB222.EXPECTED_REPORTS",
+            "CCC333.REPORTING_RATE",
+        ],
         org_units=[],
         period="202409",
     )
@@ -92,7 +98,11 @@ def test_extract_map_reporting_rates():
     assert result["orgUnit"].to_list() == ["OU001", "OU002", "OU003"]
     assert result["categoryOptionCombo"].to_list() == [None, None, None]
     assert result["attributeOptionCombo"].to_list() == [None, None, None]
-    assert result["rateMetric"].to_list() == ["REPORTING_RATE", "EXPECTED_REPORTS", "REPORTING_RATE"]
+    assert result["rateMetric"].to_list() == [
+        "REPORTING_RATE",
+        "EXPECTED_REPORTS",
+        "REPORTING_RATE",
+    ]
     assert result["domainType"].to_list() == ["AGGREGATED", "AGGREGATED", "AGGREGATED"]
     assert result["value"].to_list() == ["100", "0", "100"]
 
@@ -100,7 +110,9 @@ def test_extract_map_reporting_rates():
 def test_extract_map_indicator():
     """Test the mapping of indicators."""
     result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).indicators._retrieve_data(
-        indicators=["INDICATOR1", "INDICATOR2", "INDICATOR3"], org_units=[], period="202501"
+        indicators=["INDICATOR1", "INDICATOR2", "INDICATOR3"],
+        org_units=[],
+        period="202501",
     )
     assert isinstance(result, pl.DataFrame)
     assert result.shape == (3, 9)
@@ -128,12 +140,18 @@ def test_extract_map_indicator():
 
 def test_extract_download_replace_no_file(tmp_path):  # noqa: ANN001
     """Test DOWNLOAD_REPLACE mode, downloads and saves data to a Parquet file."""
-    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE")
+    extractor = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE"
+    )
     filename = "test_extract_202501.parquet"
 
     # Call download_period
     result_path = extractor.data_elements.download_period(
-        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        data_elements=[],
+        org_units=[],
+        period="202501",
+        output_dir=tmp_path,
+        filename=filename,
     )
 
     # Assert file is created
@@ -143,14 +161,20 @@ def test_extract_download_replace_no_file(tmp_path):  # noqa: ANN001
 
 def test_download_replace_replaces_file_and_logs(tmp_path):  # noqa: ANN001
     """Test DOWNLOAD_REPLACE mode, replaces the file if it already exists and logs the replacement."""
-    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE")
+    extractor = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE"
+    )
     output_dir = tmp_path
     period = "202501"
     filename = "test_extract.parquet"
 
     # First call creates the file
     file_path = extractor.data_elements.download_period(
-        data_elements=[], org_units=[], period=period, output_dir=output_dir, filename=filename
+        data_elements=[],
+        org_units=[],
+        period=period,
+        output_dir=output_dir,
+        filename=filename,
     )
     assert file_path.exists()
     mtime_before = file_path.stat().st_mtime
@@ -161,11 +185,18 @@ def test_download_replace_replaces_file_and_logs(tmp_path):  # noqa: ANN001
     with patch.object(extractor.logger, "info") as mock_log:
         # Second call should replace the file and log the replacement
         extractor.data_elements.download_period(
-            data_elements=[], org_units=[], period=period, output_dir=output_dir, filename=filename
+            data_elements=[],
+            org_units=[],
+            period=period,
+            output_dir=output_dir,
+            filename=filename,
         )
         mtime_after = file_path.stat().st_mtime
         # Check that the log message about replacing the extract was called
-        found = any("Replacing extract for period 202501" in str(call.args[0]) for call in mock_log.call_args_list)
+        found = any(
+            "Replacing extract for period 202501" in str(call.args[0])
+            for call in mock_log.call_args_list
+        )
         assert found, "Expected log message about replacing extract not found"
         # Check that the file was actually replaced (mtime changed)
         assert mtime_after > mtime_before, "File was not actually replaced"
@@ -173,12 +204,20 @@ def test_download_replace_replaces_file_and_logs(tmp_path):  # noqa: ANN001
 
 def test_extract_download_new_file_exists(tmp_path):  # noqa: ANN001
     """Test DOWNLOAD_NEW mode, creates a new file if it does not exist, and skips if it does."""
-    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_NEW", return_existing_file=True)
+    extractor = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client(),
+        download_mode="DOWNLOAD_NEW",
+        return_existing_file=True,
+    )
     filename = "test_extract_202501.parquet"
 
     # First call: file is created
     result_new_path = extractor.data_elements.download_period(
-        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        data_elements=[],
+        org_units=[],
+        period="202501",
+        output_dir=tmp_path,
+        filename=filename,
     )
     assert result_new_path.exists()
     assert result_new_path.name == filename
@@ -186,11 +225,16 @@ def test_extract_download_new_file_exists(tmp_path):  # noqa: ANN001
     # Second call: should skip and log the skip message
     with patch.object(extractor.logger, "info") as mock_log:
         result_path = extractor.data_elements.download_period(
-            data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+            data_elements=[],
+            org_units=[],
+            period="202501",
+            output_dir=tmp_path,
+            filename=filename,
         )
         assert result_path == result_new_path
         found = any(
-            "Extract for period 202501 already exists, download skipped." in str(call.args[0])
+            "Extract for period 202501 already exists, download skipped."
+            in str(call.args[0])
             for call in mock_log.call_args_list
         )
         assert found, "Expected log message about skipping extract not found"
@@ -202,29 +246,49 @@ def test_extract_download_new_return_existing_file(tmp_path):  # noqa: ANN001
 
     # True: should return the file path if it exists
     extractor_true = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_NEW", return_existing_file=True
+        dhis2_client=MockDHIS2Client(),
+        download_mode="DOWNLOAD_NEW",
+        return_existing_file=True,
     )
     # Create the file
     path_true = extractor_true.data_elements.download_period(
-        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        data_elements=[],
+        org_units=[],
+        period="202501",
+        output_dir=tmp_path,
+        filename=filename,
     )
     # Second call: should return the same file path
     result_true = extractor_true.data_elements.download_period(
-        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        data_elements=[],
+        org_units=[],
+        period="202501",
+        output_dir=tmp_path,
+        filename=filename,
     )
     assert result_true == path_true
 
     # False: should return None if the file exists
     extractor_false = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_NEW", return_existing_file=False
+        dhis2_client=MockDHIS2Client(),
+        download_mode="DOWNLOAD_NEW",
+        return_existing_file=False,
     )
     # Create the file
     _ = extractor_false.data_elements.download_period(
-        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        data_elements=[],
+        org_units=[],
+        period="202501",
+        output_dir=tmp_path,
+        filename=filename,
     )
     # Second call: should return None
     result_false = extractor_false.data_elements.download_period(
-        data_elements=[], org_units=[], period="202501", output_dir=tmp_path, filename=filename
+        data_elements=[],
+        org_units=[],
+        period="202501",
+        output_dir=tmp_path,
+        filename=filename,
     )
     assert result_false is None
 
diff --git a/d2d_development/tests/test_push.py b/d2d_development/tests/test_push.py
index 06e7e7d..226fe11 100644
--- a/d2d_development/tests/test_push.py
+++ b/d2d_development/tests/test_push.py
@@ -22,7 +22,14 @@
 def test_push_no_data_to_push():
     """Test the push of data points to DHIS2."""
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
-    cols = ["dx", "period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
+    cols = [
+        "dx",
+        "period",
+        "orgUnit",
+        "categoryOptionCombo",
+        "attributeOptionCombo",
+        "value",
+    ]
     empty_df = pl.DataFrame({col: [] for col in cols})
     with patch.object(DHIS2Pusher, "_log_message") as mock_log_message:
         pusher.push_data(empty_df)
@@ -35,18 +42,26 @@ def test_push_missing_mandatory_columns():
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
     cols = ["period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
     empty_df = pl.DataFrame({col: [] for col in cols})
-    with pytest.raises(PusherError, match=r"Input data is missing mandatory columns: dx"):
+    with pytest.raises(
+        PusherError, match=r"Input data is missing mandatory columns: dx"
+    ):
         pusher.push_data(df_data=empty_df)
 
 
 def test_push_wrong_input_type():
     """Test the push of data points to DHIS2."""
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
-    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
+    with pytest.raises(
+        PusherError, match=r"Input data must be a pandas or polars DataFrame."
+    ):
         pusher.push_data(df_data=[])
-    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
+    with pytest.raises(
+        PusherError, match=r"Input data must be a pandas or polars DataFrame."
+    ):
         pusher.push_data(df_data="not a dataframe")
-    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
+    with pytest.raises(
+        PusherError, match=r"Input data must be a pandas or polars DataFrame."
+    ):
         pusher.push_data(df_data={})
 
 
@@ -54,7 +69,9 @@ def test_push_serialize_data_point_valid():
     """Test the serialization of a DataPointModel to JSON format for DHIS2."""
     data_point = (
         DHIS2Extractor(dhis2_client=MockDHIS2Client())
-        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
+        .data_elements._retrieve_data(
+            data_elements=["AAA111"], org_units=[], period="202501"
+        )
         .slice(0, 1)
     )
 
@@ -73,7 +90,9 @@ def test_push_serialize_data_point_to_delete():
     """Test the serialization of a DataPointModel to delete JSON format for DHIS2."""
     data_point = (
         DHIS2Extractor(dhis2_client=MockDHIS2Client())
-        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
+        .data_elements._retrieve_data(
+            data_elements=["AAA111"], org_units=[], period="202501"
+        )
         .slice(3, 1)
     )
 
@@ -91,7 +110,9 @@ def test_push_serialize_data_point_to_delete():
 
 def test_push_classify_points():
     """Test the mapping of data elements."""
-    data_points = DHIS2Extractor(dhis2_client=MockDHIS2Client()).data_elements._retrieve_data(
+    data_points = DHIS2Extractor(
+        dhis2_client=MockDHIS2Client()
+    ).data_elements._retrieve_data(
         data_elements=["AAA111", "BBB222", "CCC333"], org_units=[], period="202501"
     )
     assert isinstance(data_points, pl.DataFrame)
@@ -119,14 +140,20 @@ def test_push_log_invalid_data_points():
 
     with patch.object(pusher, "_log_message") as mock_log_message:
         pusher._log_ignored_or_na(not_valid)
-        assert mock_log_message.call_count == 5, "Expected a log message for each invalid data point."
+        assert mock_log_message.call_count == 5, (
+            "Expected a log message for each invalid data point."
+        )
         for idx, call in enumerate(mock_log_message.call_args_list):
             if idx == 0:
                 log_message = call.args[0]
-                assert "4 data points will be  ignored" in log_message, f"Unexpected log message: {log_message}"
+                assert "4 data points will be  ignored" in log_message, (
+                    f"Unexpected log message: {log_message}"
+                )
             else:
                 log_message = call.args[0]
-                assert f"Data point ignored: dx=INVALID{idx}" in log_message, f"Unexpected log message: {log_message}"
+                assert f"Data point ignored: dx=INVALID{idx}" in log_message, (
+                    f"Unexpected log message: {log_message}"
+                )
 
 
 def test_push_data_point():
@@ -134,13 +161,19 @@ def test_push_data_point():
     # 1 valid datapoint
     data_points = (
         DHIS2Extractor(dhis2_client=MockDHIS2Client())
-        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
+        .data_elements._retrieve_data(
+            data_elements=["AAA111"], org_units=[], period="202501"
+        )
         .slice(0, 1)
     )
 
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
     # MOCK_DHIS2_OK_RESPONSE was manually manufactured to simulate a successful import response from DHIS2 for tests
-    with patch.object(pusher.dhis2_client.api.session, "post", return_value=MockDHIS2Response(MOCK_DHIS2_OK_RESPONSE)):
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_OK_RESPONSE),
+    ):
         pusher.push_data(data_points)
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["ignored"] == 0
@@ -162,11 +195,16 @@ def test_push_data_points_connection_error():
         "post",
         return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_503_RESPONSE, status_code=503),
     ):
-        with pytest.raises(PusherError, match=r"Server error: Service temporarily unavailable"):
+        with pytest.raises(
+            PusherError, match=r"Server error: Service temporarily unavailable"
+        ):
             pusher._push_data_points([{"dummy_datapoint": "1"}])
         # After the exception, check the summary
         assert len(pusher.summary["ERRORS"]) == 1
-        assert pusher.summary["ERRORS"][0]["message"] == "Server error: Service temporarily unavailable"
+        assert (
+            pusher.summary["ERRORS"][0]["message"]
+            == "Server error: Service temporarily unavailable"
+        )
         assert pusher.summary["ERRORS"][0]["server_error_code"] == "503"
 
 
@@ -207,9 +245,13 @@ def test_push_data_points_data_element_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_DE, status_code=409),
+        return_value=MockDHIS2Response(
+            MOCK_DHIS2_ERROR_409_RESPONSE_DE, status_code=409
+        ),
     ):
-        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        pusher._push_data_points(
+            invalid_data_points
+        )  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -256,9 +298,13 @@ def test_push_data_points_org_unit_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS, status_code=409),
+        return_value=MockDHIS2Response(
+            MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS, status_code=409
+        ),
     ):
-        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        pusher._push_data_points(
+            invalid_data_points
+        )  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -305,9 +351,13 @@ def test_push_data_points_period_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD, status_code=409),
+        return_value=MockDHIS2Response(
+            MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD, status_code=409
+        ),
     ):
-        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        pusher._push_data_points(
+            invalid_data_points
+        )  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -354,9 +404,13 @@ def test_push_data_points_coc_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_COC, status_code=409),
+        return_value=MockDHIS2Response(
+            MOCK_DHIS2_ERROR_409_RESPONSE_COC, status_code=409
+        ),
     ):
-        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        pusher._push_data_points(
+            invalid_data_points
+        )  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -403,9 +457,13 @@ def test_push_data_points_aoc_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_AOC, status_code=409),
+        return_value=MockDHIS2Response(
+            MOCK_DHIS2_ERROR_409_RESPONSE_AOC, status_code=409
+        ),
     ):
-        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        pusher._push_data_points(
+            invalid_data_points
+        )  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -452,9 +510,13 @@ def test_push_data_points_value_format_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT, status_code=409),
+        return_value=MockDHIS2Response(
+            MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT, status_code=409
+        ),
     ):
-        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        pusher._push_data_points(
+            invalid_data_points
+        )  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 2
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 1
diff --git a/d2d_development/tests/test_utils.py b/d2d_development/tests/test_utils.py
index 37356c3..06fc82c 100644
--- a/d2d_development/tests/test_utils.py
+++ b/d2d_development/tests/test_utils.py
@@ -106,7 +106,9 @@ def test_write_exception_cleanup(tmp_path: Path, monkeypatch: pytest.MonkeyPatch
     df = pd.DataFrame({"a": [1]})
     file = tmp_path / "fail.parquet"
     # Patch to_parquet to raise
-    monkeypatch.setattr(df, "to_parquet", lambda *a, **k: (_ for _ in ()).throw(Exception("fail")))
+    monkeypatch.setattr(
+        df, "to_parquet", lambda *a, **k: (_ for _ in ()).throw(Exception("fail"))
+    )
     with pytest.raises(ExtractorError):
         save_to_parquet(df, file)
     # Check no temp files left
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7650ea0
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,70 @@
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.format]
+docstring-code-format = true
+docstring-code-line-length = 100
+
+[tool.ruff.lint]
+preview = true
+select = [
+    "F", # Pyflakes
+    "E", # pycodestyle
+    "I", # isort
+    "D", # pydocstyle
+    "UP", # pyupgrade
+    "ANN", # flake8-annotations
+    "B", # bugbear
+    "A", # flake8-builtins
+    "COM", # flake8-commas
+    "FA", # flake8-future-annotations
+    "PT", # flake8-pytest-style
+    "Q", # flake8-quotes
+    "RET", # flake8-return
+    "SIM", # flake8-simplify
+    "PTH", # flake8-use-pathlib
+    "NPY", # NumPy rules
+    "PD", # pandas rules
+    "N", # pep8-naming
+    "DOC", # pydoclint
+    "PLC", # pylint convention
+    "PLE", # pylint error
+    "PLW", # pylint warning
+    "RUF", # ruff specific rules
+]
+
+ignore = [
+    "D100", # Missing docstring in public module
+    "D104", # Missing docstring in public package
+    "D105", # Missing docstring in magic method
+    "D106", # Missing docstring in public nested class
+    "D107", # Missing docstring in __init__
+    "D401", # First line should be in imperative mood
+    "D413", # Missing blank line after last section
+    "D203", # 1 blank line required before class docstring
+    "SIM108", # Use ternary operators
+    "SIM102", # Use a single if statement instead of nested if statements
+    "SIM114", # Combine `if` branches
+    "DOC501", # Raised exception {id} missing from docstring	
+    "DOC502", # Raised exception is not explicitly raised: {id}	
+    "RUF022", # `__all__` is not sorted
+    "RUF005", # Consider expression instead of concatenation
+    "RUF069", # Unreliable floating point equality comparison
+    "PD901", # Avoid using the generic variable name df for dataframes
+    "PLR0904", # Too many public methods ({methods} > {max_methods})
+    "PLR0911", # Too many return statements ({returns} > {max_returns})
+    "PLR0912", # Too many branches ({branches} > {max_branches})
+    "PLR0913", # Too many arguments ({arguments} > {max_arguments})
+    "PLR0914", # Too many local variables ({variables} > {max_variables})
+    "PLR0915", # Too many statements ({statements} > {max_statements})
+    "PLR0916", # Too many Boolean expressions ({expressions} > {max_expressions})
+    "PLR1702", # Too many nested blocks ({blocks} > {max_blocks}),
+    "COM812", # Missing trailing comma
+]
+
+[tool.ruff.lint.flake8-annotations]
+allow-star-arg-any = true
+mypy-init-return = true
+suppress-dummy-args = true
+suppress-none-returning = true
\ No newline at end of file

From 331609c800985fb583a7cb9baa7e02bd59e1a274 Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Thu, 26 Mar 2026 18:15:10 +0100
Subject: [PATCH 3/8] fix(): ruff crap again

---
 d2d_development/tests/mock_dhis2_get.py  |  20 +++--
 d2d_development/tests/test_data_point.py |  16 +---
 d2d_development/tests/test_extract.py    |  28 ++----
 d2d_development/tests/test_push.py       | 103 ++++++-----------------
 d2d_development/tests/test_utils.py      |   5 +-
 5 files changed, 52 insertions(+), 120 deletions(-)

diff --git a/d2d_development/tests/mock_dhis2_get.py b/d2d_development/tests/mock_dhis2_get.py
index 48df8ec..b10f767 100644
--- a/d2d_development/tests/mock_dhis2_get.py
+++ b/d2d_development/tests/mock_dhis2_get.py
@@ -2,8 +2,12 @@ class MockDataValueSets:
     """Mock class to simulate DHIS2 DataValueSets API responses for testing purposes."""
 
     def get(
-        self, data_elements=None, periods=None, org_units=None, last_updated=None
-    ) -> list[dict]:  # noqa: ANN001
+        self,
+        data_elements: list[str] = None,  # noqa: RUF013
+        periods: list[str] = None,  # noqa: RUF013
+        org_units: list[str] = None,  # noqa: RUF013
+        last_updated: str = None,  # noqa: RUF013
+    ) -> list[dict]:
         """Simulate the retrieval of data values from DHIS2 based on the provided parameters.
 
         Returns
@@ -139,12 +143,12 @@ class MockAnalytics:
 
     def get(
         self,
-        indicators=None,
-        data_elements=None,
-        periods=None,
-        org_units=None,
-        include_cocs=False,
-    ) -> list[dict]:  # noqa: ANN001
+        indicators: list[str] = None,  # noqa: RUF013
+        data_elements: list[str] = None,  # noqa: RUF013
+        periods: list[str] = None,  # noqa: RUF013
+        org_units: list[str] = None,  # noqa: RUF013
+        include_cocs: bool = False,
+    ) -> list[dict]:
         """Simulate the retrieval of analytics data from DHIS2 based on the provided parameters.
 
         Returns
diff --git a/d2d_development/tests/test_data_point.py b/d2d_development/tests/test_data_point.py
index fe78919..0c28766 100644
--- a/d2d_development/tests/test_data_point.py
+++ b/d2d_development/tests/test_data_point.py
@@ -1,6 +1,6 @@
 import polars as pl
-
 from d2d_development.data_models import DataPointModel
+
 from tests.mock_dhis2_get import MockDHIS2Client
 
 
@@ -39,13 +39,8 @@ def test_data_point_model_to_json():
     assert payload["dataElement"] == data_elements[0]["dataElement"].item()
     assert payload["period"] == data_elements[0]["period"].item()
     assert payload["orgUnit"] == data_elements[0]["orgUnit"].item()
-    assert (
-        payload["categoryOptionCombo"] == data_elements[0]["categoryOptionCombo"].item()
-    )
-    assert (
-        payload["attributeOptionCombo"]
-        == data_elements[0]["attributeOptionCombo"].item()
-    )
+    assert payload["categoryOptionCombo"] == data_elements[0]["categoryOptionCombo"].item()
+    assert payload["attributeOptionCombo"] == data_elements[0]["attributeOptionCombo"].item()
     assert payload["value"] == data_elements[0]["value"].item()
 
 
@@ -55,10 +50,7 @@ def test_data_point_model_to_json_delete():
 
     # Set third datapoint to value None to simulate a deleted value
     data_elements = data_elements.with_columns(
-        pl.when(pl.arange(0, data_elements.height) == 2)
-        .then(None)
-        .otherwise(pl.col("value"))
-        .alias("value")
+        pl.when(pl.arange(0, data_elements.height) == 2).then(None).otherwise(pl.col("value")).alias("value")
     )
     points_list = [
         DataPointModel(
diff --git a/d2d_development/tests/test_extract.py b/d2d_development/tests/test_extract.py
index 62b7d70..91521b9 100644
--- a/d2d_development/tests/test_extract.py
+++ b/d2d_development/tests/test_extract.py
@@ -2,16 +2,16 @@
 from unittest.mock import patch
 
 import polars as pl
-
 from d2d_development.extract import DHIS2Extractor
+
 from tests.mock_dhis2_get import MockDHIS2Client
 
 
 def test_extract_map_data_elements():
     """Test the mapping of data elements."""
-    result = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client()
-    ).data_elements._retrieve_data(data_elements=[], org_units=[], period="202501")
+    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).data_elements._retrieve_data(
+        data_elements=[], org_units=[], period="202501"
+    )
     assert isinstance(result, pl.DataFrame)
     assert result.shape == (9, 9)
     assert result.columns == [
@@ -68,9 +68,7 @@ def test_extract_map_data_elements():
 
 def test_extract_map_reporting_rates():
     """Test the mapping of reporting rates."""
-    result = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client()
-    ).reporting_rates._retrieve_data(
+    result = DHIS2Extractor(dhis2_client=MockDHIS2Client()).reporting_rates._retrieve_data(
         reporting_rates=[
             "AAA111.REPORTING_RATE",
             "BBB222.EXPECTED_REPORTS",
@@ -140,9 +138,7 @@ def test_extract_map_indicator():
 
 def test_extract_download_replace_no_file(tmp_path):  # noqa: ANN001
     """Test DOWNLOAD_REPLACE mode, downloads and saves data to a Parquet file."""
-    extractor = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE"
-    )
+    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE")
     filename = "test_extract_202501.parquet"
 
     # Call download_period
@@ -161,9 +157,7 @@ def test_extract_download_replace_no_file(tmp_path):  # noqa: ANN001
 
 def test_download_replace_replaces_file_and_logs(tmp_path):  # noqa: ANN001
     """Test DOWNLOAD_REPLACE mode, replaces the file if it already exists and logs the replacement."""
-    extractor = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE"
-    )
+    extractor = DHIS2Extractor(dhis2_client=MockDHIS2Client(), download_mode="DOWNLOAD_REPLACE")
     output_dir = tmp_path
     period = "202501"
     filename = "test_extract.parquet"
@@ -193,10 +187,7 @@ def test_download_replace_replaces_file_and_logs(tmp_path):  # noqa: ANN001
         )
         mtime_after = file_path.stat().st_mtime
         # Check that the log message about replacing the extract was called
-        found = any(
-            "Replacing extract for period 202501" in str(call.args[0])
-            for call in mock_log.call_args_list
-        )
+        found = any("Replacing extract for period 202501" in str(call.args[0]) for call in mock_log.call_args_list)
         assert found, "Expected log message about replacing extract not found"
         # Check that the file was actually replaced (mtime changed)
         assert mtime_after > mtime_before, "File was not actually replaced"
@@ -233,8 +224,7 @@ def test_extract_download_new_file_exists(tmp_path):  # noqa: ANN001
         )
         assert result_path == result_new_path
         found = any(
-            "Extract for period 202501 already exists, download skipped."
-            in str(call.args[0])
+            "Extract for period 202501 already exists, download skipped." in str(call.args[0])
             for call in mock_log.call_args_list
         )
         assert found, "Expected log message about skipping extract not found"
diff --git a/d2d_development/tests/test_push.py b/d2d_development/tests/test_push.py
index 226fe11..536ca5f 100644
--- a/d2d_development/tests/test_push.py
+++ b/d2d_development/tests/test_push.py
@@ -2,9 +2,9 @@
 
 import polars as pl
 import pytest
-
 from d2d_development.extract import DHIS2Extractor
 from d2d_development.push import DHIS2Pusher, PusherError
+
 from tests.mock_dhis2_get import MockDHIS2Client
 from tests.mock_dhis2_post import (
     MOCK_DHIS2_ERROR_409_RESPONSE_AOC,
@@ -42,26 +42,18 @@ def test_push_missing_mandatory_columns():
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
     cols = ["period", "orgUnit", "categoryOptionCombo", "attributeOptionCombo", "value"]
     empty_df = pl.DataFrame({col: [] for col in cols})
-    with pytest.raises(
-        PusherError, match=r"Input data is missing mandatory columns: dx"
-    ):
+    with pytest.raises(PusherError, match=r"Input data is missing mandatory columns: dx"):
         pusher.push_data(df_data=empty_df)
 
 
 def test_push_wrong_input_type():
     """Test the push of data points to DHIS2."""
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
-    with pytest.raises(
-        PusherError, match=r"Input data must be a pandas or polars DataFrame."
-    ):
+    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
         pusher.push_data(df_data=[])
-    with pytest.raises(
-        PusherError, match=r"Input data must be a pandas or polars DataFrame."
-    ):
+    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
         pusher.push_data(df_data="not a dataframe")
-    with pytest.raises(
-        PusherError, match=r"Input data must be a pandas or polars DataFrame."
-    ):
+    with pytest.raises(PusherError, match=r"Input data must be a pandas or polars DataFrame."):
         pusher.push_data(df_data={})
 
 
@@ -69,9 +61,7 @@ def test_push_serialize_data_point_valid():
     """Test the serialization of a DataPointModel to JSON format for DHIS2."""
     data_point = (
         DHIS2Extractor(dhis2_client=MockDHIS2Client())
-        .data_elements._retrieve_data(
-            data_elements=["AAA111"], org_units=[], period="202501"
-        )
+        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
         .slice(0, 1)
     )
 
@@ -90,9 +80,7 @@ def test_push_serialize_data_point_to_delete():
     """Test the serialization of a DataPointModel to delete JSON format for DHIS2."""
     data_point = (
         DHIS2Extractor(dhis2_client=MockDHIS2Client())
-        .data_elements._retrieve_data(
-            data_elements=["AAA111"], org_units=[], period="202501"
-        )
+        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
         .slice(3, 1)
     )
 
@@ -110,9 +98,7 @@ def test_push_serialize_data_point_to_delete():
 
 def test_push_classify_points():
     """Test the mapping of data elements."""
-    data_points = DHIS2Extractor(
-        dhis2_client=MockDHIS2Client()
-    ).data_elements._retrieve_data(
+    data_points = DHIS2Extractor(dhis2_client=MockDHIS2Client()).data_elements._retrieve_data(
         data_elements=["AAA111", "BBB222", "CCC333"], org_units=[], period="202501"
     )
     assert isinstance(data_points, pl.DataFrame)
@@ -140,20 +126,14 @@ def test_push_log_invalid_data_points():
 
     with patch.object(pusher, "_log_message") as mock_log_message:
         pusher._log_ignored_or_na(not_valid)
-        assert mock_log_message.call_count == 5, (
-            "Expected a log message for each invalid data point."
-        )
+        assert mock_log_message.call_count == 5, "Expected a log message for each invalid data point."
         for idx, call in enumerate(mock_log_message.call_args_list):
             if idx == 0:
                 log_message = call.args[0]
-                assert "4 data points will be  ignored" in log_message, (
-                    f"Unexpected log message: {log_message}"
-                )
+                assert "4 data points will be  ignored" in log_message, f"Unexpected log message: {log_message}"
             else:
                 log_message = call.args[0]
-                assert f"Data point ignored: dx=INVALID{idx}" in log_message, (
-                    f"Unexpected log message: {log_message}"
-                )
+                assert f"Data point ignored: dx=INVALID{idx}" in log_message, f"Unexpected log message: {log_message}"
 
 
 def test_push_data_point():
@@ -161,9 +141,7 @@ def test_push_data_point():
     # 1 valid datapoint
     data_points = (
         DHIS2Extractor(dhis2_client=MockDHIS2Client())
-        .data_elements._retrieve_data(
-            data_elements=["AAA111"], org_units=[], period="202501"
-        )
+        .data_elements._retrieve_data(data_elements=["AAA111"], org_units=[], period="202501")
         .slice(0, 1)
     )
 
@@ -195,16 +173,11 @@ def test_push_data_points_connection_error():
         "post",
         return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_503_RESPONSE, status_code=503),
     ):
-        with pytest.raises(
-            PusherError, match=r"Server error: Service temporarily unavailable"
-        ):
+        with pytest.raises(PusherError, match=r"Server error: Service temporarily unavailable"):
             pusher._push_data_points([{"dummy_datapoint": "1"}])
         # After the exception, check the summary
         assert len(pusher.summary["ERRORS"]) == 1
-        assert (
-            pusher.summary["ERRORS"][0]["message"]
-            == "Server error: Service temporarily unavailable"
-        )
+        assert pusher.summary["ERRORS"][0]["message"] == "Server error: Service temporarily unavailable"
         assert pusher.summary["ERRORS"][0]["server_error_code"] == "503"
 
 
@@ -245,13 +218,9 @@ def test_push_data_points_data_element_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(
-            MOCK_DHIS2_ERROR_409_RESPONSE_DE, status_code=409
-        ),
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_DE, status_code=409),
     ):
-        pusher._push_data_points(
-            invalid_data_points
-        )  # access private method for error handling testing
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -298,13 +267,9 @@ def test_push_data_points_org_unit_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(
-            MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS, status_code=409
-        ),
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_ORG_UNITS, status_code=409),
     ):
-        pusher._push_data_points(
-            invalid_data_points
-        )  # access private method for error handling testing
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -351,13 +316,9 @@ def test_push_data_points_period_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(
-            MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD, status_code=409
-        ),
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_PERIOD, status_code=409),
     ):
-        pusher._push_data_points(
-            invalid_data_points
-        )  # access private method for error handling testing
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -404,13 +365,9 @@ def test_push_data_points_coc_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(
-            MOCK_DHIS2_ERROR_409_RESPONSE_COC, status_code=409
-        ),
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_COC, status_code=409),
     ):
-        pusher._push_data_points(
-            invalid_data_points
-        )  # access private method for error handling testing
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -457,13 +414,9 @@ def test_push_data_points_aoc_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(
-            MOCK_DHIS2_ERROR_409_RESPONSE_AOC, status_code=409
-        ),
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_AOC, status_code=409),
     ):
-        pusher._push_data_points(
-            invalid_data_points
-        )  # access private method for error handling testing
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 1
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
@@ -510,13 +463,9 @@ def test_push_data_points_value_format_error():
     with patch.object(
         pusher.dhis2_client.api.session,
         "post",
-        return_value=MockDHIS2Response(
-            MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT, status_code=409
-        ),
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_VALUE_FORMAT, status_code=409),
     ):
-        pusher._push_data_points(
-            invalid_data_points
-        )  # access private method for error handling testing
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
         assert pusher.summary["import_counts"]["imported"] == 2
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 1
diff --git a/d2d_development/tests/test_utils.py b/d2d_development/tests/test_utils.py
index 06fc82c..4a7cc35 100644
--- a/d2d_development/tests/test_utils.py
+++ b/d2d_development/tests/test_utils.py
@@ -5,7 +5,6 @@
 import pandas as pd
 import polars as pl
 import pytest
-
 from d2d_development.exceptions import ExtractorError
 from d2d_development.utils import log_message, save_to_parquet
 
@@ -106,9 +105,7 @@ def test_write_exception_cleanup(tmp_path: Path, monkeypatch: pytest.MonkeyPatch
     df = pd.DataFrame({"a": [1]})
     file = tmp_path / "fail.parquet"
     # Patch to_parquet to raise
-    monkeypatch.setattr(
-        df, "to_parquet", lambda *a, **k: (_ for _ in ()).throw(Exception("fail"))
-    )
+    monkeypatch.setattr(df, "to_parquet", lambda *a, **k: (_ for _ in ()).throw(Exception("fail")))
     with pytest.raises(ExtractorError):
         save_to_parquet(df, file)
     # Check no temp files left

From 657fa538f3f8b56a0266754da4daf1d0c58cadd5 Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Fri, 27 Mar 2026 09:47:14 +0100
Subject: [PATCH 4/8] fix(test): utils tests + move ruff rules to d2d_dev only

---
 d2d_development/d2d_development/utils.py |  4 +-
 d2d_development/pyproject.toml           | 68 +++++++++++++++++++++++
 pyproject.toml                           | 70 ------------------------
 3 files changed, 71 insertions(+), 71 deletions(-)
 delete mode 100644 pyproject.toml

diff --git a/d2d_development/d2d_development/utils.py b/d2d_development/d2d_development/utils.py
index fceb493..55554fc 100644
--- a/d2d_development/d2d_development/utils.py
+++ b/d2d_development/d2d_development/utils.py
@@ -6,6 +6,8 @@
 import polars as pl
 from openhexa.sdk import current_run
 
+from d2d_development.exceptions import ExtractorError
+
 
 def log_message(
     logger: logging.Logger,
@@ -86,4 +88,4 @@ def save_to_parquet(data: pl.DataFrame | pd.DataFrame, filename: Path) -> None:
         # Clean up the temp file if it exists
         if temp_filename is not None and temp_filename.exists():
             temp_filename.unlink()
-        raise Exception(f"Failed to save parquet file to {filename}") from e
+        raise ExtractorError(f"Failed to save parquet file to {filename}") from e
diff --git a/d2d_development/pyproject.toml b/d2d_development/pyproject.toml
index 99e6eb8..44cf072 100644
--- a/d2d_development/pyproject.toml
+++ b/d2d_development/pyproject.toml
@@ -29,3 +29,71 @@ testpaths = ["tests"]
 where = ["."]
 exclude = ["tests*"]
 
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.format]
+docstring-code-format = true
+docstring-code-line-length = 100
+
+[tool.ruff.lint]
+preview = true
+select = [
+    "F", # Pyflakes
+    "E", # pycodestyle
+    "I", # isort
+    "D", # pydocstyle
+    "UP", # pyupgrade
+    "ANN", # flake8-annotations
+    "B", # bugbear
+    "A", # flake8-builtins
+    "COM", # flake8-commas
+    "FA", # flake8-future-annotations
+    "PT", # flake8-pytest-style
+    "Q", # flake8-quotes
+    "RET", # flake8-return
+    "SIM", # flake8-simplify
+    "PTH", # flake8-use-pathlib
+    "NPY", # NumPy rules
+    "PD", # pandas rules
+    "N", # pep8-naming
+    "DOC", # pydoclint
+    "PLC", # pylint convention
+    "PLE", # pylint error
+    "PLW", # pylint warning
+    "RUF", # ruff specific rules
+]
+
+ignore = [
+    "D100", # Missing docstring in public module
+    "D104", # Missing docstring in public package
+    "D105", # Missing docstring in magic method
+    "D106", # Missing docstring in public nested class
+    "D107", # Missing docstring in __init__
+    "D401", # First line should be in imperative mood
+    "D413", # Missing blank line after last section
+    "D203", # 1 blank line required before class docstring
+    "SIM108", # Use ternary operators
+    "SIM102", # Use a single if statement instead of nested if statements
+    "SIM114", # Combine `if` branches
+    "DOC501", # Raised exception {id} missing from docstring	
+    "DOC502", # Raised exception is not explicitly raised: {id}	
+    "RUF022", # `__all__` is not sorted
+    "RUF005", # Consider expression instead of concatenation
+    "RUF069", # Unreliable floating point equality comparison    
+    "PLR0904", # Too many public methods ({methods} > {max_methods})
+    "PLR0911", # Too many return statements ({returns} > {max_returns})
+    "PLR0912", # Too many branches ({branches} > {max_branches})
+    "PLR0913", # Too many arguments ({arguments} > {max_arguments})
+    "PLR0914", # Too many local variables ({variables} > {max_variables})
+    "PLR0915", # Too many statements ({statements} > {max_statements})
+    "PLR0916", # Too many Boolean expressions ({expressions} > {max_expressions})
+    "PLR1702", # Too many nested blocks ({blocks} > {max_blocks}),
+    "COM812", # Missing trailing comma
+]
+
+[tool.ruff.lint.flake8-annotations]
+allow-star-arg-any = true
+mypy-init-return = true
+suppress-dummy-args = true
+suppress-none-returning = true
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 7650ea0..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,70 +0,0 @@
-
-[tool.ruff]
-line-length = 120
-
-[tool.ruff.format]
-docstring-code-format = true
-docstring-code-line-length = 100
-
-[tool.ruff.lint]
-preview = true
-select = [
-    "F", # Pyflakes
-    "E", # pycodestyle
-    "I", # isort
-    "D", # pydocstyle
-    "UP", # pyupgrade
-    "ANN", # flake8-annotations
-    "B", # bugbear
-    "A", # flake8-builtins
-    "COM", # flake8-commas
-    "FA", # flake8-future-annotations
-    "PT", # flake8-pytest-style
-    "Q", # flake8-quotes
-    "RET", # flake8-return
-    "SIM", # flake8-simplify
-    "PTH", # flake8-use-pathlib
-    "NPY", # NumPy rules
-    "PD", # pandas rules
-    "N", # pep8-naming
-    "DOC", # pydoclint
-    "PLC", # pylint convention
-    "PLE", # pylint error
-    "PLW", # pylint warning
-    "RUF", # ruff specific rules
-]
-
-ignore = [
-    "D100", # Missing docstring in public module
-    "D104", # Missing docstring in public package
-    "D105", # Missing docstring in magic method
-    "D106", # Missing docstring in public nested class
-    "D107", # Missing docstring in __init__
-    "D401", # First line should be in imperative mood
-    "D413", # Missing blank line after last section
-    "D203", # 1 blank line required before class docstring
-    "SIM108", # Use ternary operators
-    "SIM102", # Use a single if statement instead of nested if statements
-    "SIM114", # Combine `if` branches
-    "DOC501", # Raised exception {id} missing from docstring	
-    "DOC502", # Raised exception is not explicitly raised: {id}	
-    "RUF022", # `__all__` is not sorted
-    "RUF005", # Consider expression instead of concatenation
-    "RUF069", # Unreliable floating point equality comparison
-    "PD901", # Avoid using the generic variable name df for dataframes
-    "PLR0904", # Too many public methods ({methods} > {max_methods})
-    "PLR0911", # Too many return statements ({returns} > {max_returns})
-    "PLR0912", # Too many branches ({branches} > {max_branches})
-    "PLR0913", # Too many arguments ({arguments} > {max_arguments})
-    "PLR0914", # Too many local variables ({variables} > {max_variables})
-    "PLR0915", # Too many statements ({statements} > {max_statements})
-    "PLR0916", # Too many Boolean expressions ({expressions} > {max_expressions})
-    "PLR1702", # Too many nested blocks ({blocks} > {max_blocks}),
-    "COM812", # Missing trailing comma
-]
-
-[tool.ruff.lint.flake8-annotations]
-allow-star-arg-any = true
-mypy-init-return = true
-suppress-dummy-args = true
-suppress-none-returning = true
\ No newline at end of file

From 55b88cc52d8254d6c6e38967761fe39ee3c80059 Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Fri, 27 Mar 2026 09:49:23 +0100
Subject: [PATCH 5/8] fix(): ruff imports

---
 d2d_development/tests/test_data_point.py | 2 +-
 d2d_development/tests/test_extract.py    | 2 +-
 d2d_development/tests/test_push.py       | 2 +-
 d2d_development/tests/test_utils.py      | 1 +
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/d2d_development/tests/test_data_point.py b/d2d_development/tests/test_data_point.py
index 0c28766..824ba02 100644
--- a/d2d_development/tests/test_data_point.py
+++ b/d2d_development/tests/test_data_point.py
@@ -1,6 +1,6 @@
 import polars as pl
-from d2d_development.data_models import DataPointModel
 
+from d2d_development.data_models import DataPointModel
 from tests.mock_dhis2_get import MockDHIS2Client
 
 
diff --git a/d2d_development/tests/test_extract.py b/d2d_development/tests/test_extract.py
index 91521b9..edbbe9d 100644
--- a/d2d_development/tests/test_extract.py
+++ b/d2d_development/tests/test_extract.py
@@ -2,8 +2,8 @@
 from unittest.mock import patch
 
 import polars as pl
-from d2d_development.extract import DHIS2Extractor
 
+from d2d_development.extract import DHIS2Extractor
 from tests.mock_dhis2_get import MockDHIS2Client
 
 
diff --git a/d2d_development/tests/test_push.py b/d2d_development/tests/test_push.py
index 536ca5f..cf1fec3 100644
--- a/d2d_development/tests/test_push.py
+++ b/d2d_development/tests/test_push.py
@@ -2,9 +2,9 @@
 
 import polars as pl
 import pytest
+
 from d2d_development.extract import DHIS2Extractor
 from d2d_development.push import DHIS2Pusher, PusherError
-
 from tests.mock_dhis2_get import MockDHIS2Client
 from tests.mock_dhis2_post import (
     MOCK_DHIS2_ERROR_409_RESPONSE_AOC,
diff --git a/d2d_development/tests/test_utils.py b/d2d_development/tests/test_utils.py
index 4a7cc35..37356c3 100644
--- a/d2d_development/tests/test_utils.py
+++ b/d2d_development/tests/test_utils.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import polars as pl
 import pytest
+
 from d2d_development.exceptions import ExtractorError
 from d2d_development.utils import log_message, save_to_parquet
 

From 630aae4f615d939b7df7422e5a295ad6eb76c932 Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Fri, 27 Mar 2026 12:00:14 +0100
Subject: [PATCH 6/8] feature(DHIS2Push): Summary delete & ignore + test + docs

---
 d2d_development/d2d_development/push.py | 67 +++++++++++++++++++----
 d2d_development/tests/test_push.py      | 71 +++++++++++++++++--------
 2 files changed, 107 insertions(+), 31 deletions(-)

diff --git a/d2d_development/d2d_development/push.py b/d2d_development/d2d_development/push.py
index c6286b2..7e335a1 100644
--- a/d2d_development/d2d_development/push.py
+++ b/d2d_development/d2d_development/push.py
@@ -23,6 +23,7 @@ def __init__(
         logging_interval: int = 50000,
         logger: logging.Logger | None = None,
     ):
+        """Initialize the DHIS2Pusher."""
         self.dhis2_client = dhis2_client
 
         if import_strategy not in {"CREATE", "UPDATE", "CREATE_AND_UPDATE"}:
@@ -42,7 +43,19 @@ def push_data(
         self,
         df_data: pd.DataFrame | pl.DataFrame,
     ) -> None:
-        """Push formatted data to DHIS2."""
+        """Push formatted data to DHIS2.
+
+        Parameters
+        ----------
+        df_data : pd.DataFrame or pl.DataFrame
+            DataFrame containing the data points to be pushed. Must include the following columns:
+            'dx', 'period', 'orgUnit', 'categoryOptionCombo', 'attributeOptionCombo', and 'value'.
+
+        Raises
+        ------
+        PusherError
+            If the input data is not a DataFrame or if mandatory fields are missing.
+        """
         self._reset_summary()
         self._set_summary_import_options()
 
@@ -101,6 +114,7 @@ def _classify_data_points(self, data_points: pl.DataFrame) -> tuple[pl.DataFrame
         return valid, to_delete, not_valid
 
     def _set_summary_import_options(self):
+        """Set the import options in the summary dictionary based on the current configuration."""
         self.summary["import_options"] = {
             "importStrategy": self.import_strategy,
             "dryRun": self.dry_run,
@@ -109,7 +123,13 @@ def _set_summary_import_options(self):
         }
 
     def _push_valid(self, data_points_valid: pl.DataFrame) -> None:
-        """Push valid values to DHIS2."""
+        """Push valid values to DHIS2.
+
+        Parameters
+        ----------
+        data_points_valid: pl.DataFrame
+            DataFrame containing valid data points to be pushed to DHIS2.
+        """
         if len(data_points_valid) == 0:
             self._log_message("No data to push.")
             return
@@ -119,6 +139,7 @@ def _push_valid(self, data_points_valid: pl.DataFrame) -> None:
         self._log_message(f"Data points push summary:  {self.summary['import_counts']}")
 
     def _push_to_delete(self, data_points_to_delete: pl.DataFrame) -> None:
+        """Push data points with NA values to DHIS2 to delete them."""
         if data_points_to_delete.height == 0:
             return
 
@@ -128,11 +149,19 @@ def _push_to_delete(self, data_points_to_delete: pl.DataFrame) -> None:
         self._log_message(f"Data points delete summary: {self.summary['import_counts']}")
 
     def _log_ignored_or_na(self, data_points: pl.DataFrame, is_na: bool = False):
-        """Logs ignored or NA data points."""
+        """Logs ignored or NA data points.
+
+        Parameters
+        ----------
+        data_points: pl.DataFrame
+            DataFrame containing the data points to be logged as ignored or NA.
+        is_na: bool
+            Flag whether the data points are NA (to be deleted) or ignored. Defaults to False (ignored).
+        """
         data_points_list = data_points.to_dicts()
         if len(data_points_list) > 0:
             self._log_message(
-                f"{len(data_points_list)} data points will be  {'set to NA' if is_na else 'ignored'}. "
+                f"{len(data_points_list)} data points will be {'set to NA' if is_na else 'ignored'}. "
                 "Please check the last execution report for details.",
                 level="warning",
             )
@@ -141,6 +170,9 @@ def _log_ignored_or_na(self, data_points: pl.DataFrame, is_na: bool = False):
                 self._log_message(
                     f"{i}. Data point {'NA' if is_na else 'ignored'}: {row_str}", log_current_run=False, level="warning"
                 )
+                if is_na:
+                    self.summary["delete_data_points"].append(ignored)
+                self.summary["ignored_data_points"].append(ignored)
 
     def _log_message(self, message: str, level: str = "info", log_current_run: bool = True, error_details: str = ""):
         """Log a message using the configured logging function."""
@@ -174,7 +206,7 @@ def _serialize_data_points(self, data_points: pl.DataFrame) -> list[dict]:
 
     def _log_summary_errors(self):
         """Logs all the errors in the summary dictionary using the configured logging."""
-        errors = self.summary.get("ERRORS", [])
+        errors = self.summary.get("import_errors", [])
         if not errors:
             self._log_message("No errors found in the summary.")
         else:
@@ -204,7 +236,13 @@ def _push_data_points(
         self,
         data_point_list: list[dict],
     ) -> None:
-        """dry_run: Set to true to get an import summary without actually importing data (DHIS2)."""
+        """Push data points to DHIS2 in chunks, handling responses and logging progress.
+
+        Parameters
+        ----------
+        data_point_list: list[dict]
+            A list of dictionaries, each representing a data point formatted for DHIS2.
+        """
         total_data_points = len(data_point_list)
         processed_points = 0
         last_logged_at = 0
@@ -230,7 +268,7 @@ def _push_data_points(
                     self._update_import_counts(response)
                 else:
                     # No response JSON, at least log the request error msg
-                    self.summary["ERRORS"].extend(
+                    self.summary["import_errors"].extend(
                         [{"chunk": chunk_id, "period": chunk[0].get("period", "-"), "exception": str(e)}]
                     )
                 self._extract_conflicts(response)
@@ -265,14 +303,17 @@ def _raise_server_errors(self, r: requests.Response) -> None:
                 "server_error_code": f"{r.status_code}",
                 "message": f"Server error: {message}",
             }
-            self.summary["ERRORS"].append(error_info)
+            self.summary["import_errors"].append(error_info)
             raise PusherError(f"Server error: {message}") from None
 
     def _reset_summary(self) -> None:
+        """Reset the summary dictionary to its initial state before starting a new push operation."""
         self.summary = {
             "import_counts": {"imported": 0, "updated": 0, "ignored": 0, "deleted": 0},
             "import_options": {},
-            "ERRORS": [],
+            "import_errors": [],
+            "ignored_data_points": [],
+            "delete_data_points": [],
         }
 
     def _split_list(self, src_list: list, length: int):
@@ -285,6 +326,11 @@ def _split_list(self, src_list: list, length: int):
             yield src_list[i : i + length]
 
     def _safe_json(self, r: requests.Response) -> dict | None:
+        """Safely parse the JSON response from a requests.Response object.
+
+        Returns:
+            dict: The parsed JSON response if successful, or None if parsing fails or if the response is None.
+        """
         if r is None:
             return None
 
@@ -294,6 +340,7 @@ def _safe_json(self, r: requests.Response) -> dict | None:
             return None
 
     def _update_import_counts(self, response: dict) -> None:
+        """Update the import counts in the summary dictionary based on the response from DHIS2."""
         if not response:
             return
         if "importCount" in response:
@@ -327,4 +374,4 @@ def _extract_conflicts(self, response: dict) -> None:
         all_errors = conflicts + error_reports
 
         if all_errors:
-            self.summary.setdefault("ERRORS", []).extend(all_errors)
+            self.summary.setdefault("import_errors", []).extend(all_errors)
diff --git a/d2d_development/tests/test_push.py b/d2d_development/tests/test_push.py
index cf1fec3..5f08ee3 100644
--- a/d2d_development/tests/test_push.py
+++ b/d2d_development/tests/test_push.py
@@ -130,10 +130,39 @@ def test_push_log_invalid_data_points():
         for idx, call in enumerate(mock_log_message.call_args_list):
             if idx == 0:
                 log_message = call.args[0]
-                assert "4 data points will be  ignored" in log_message, f"Unexpected log message: {log_message}"
+                assert "4 data points will be ignored" in log_message, f"Unexpected log message: {log_message}"
             else:
                 log_message = call.args[0]
                 assert f"Data point ignored: dx=INVALID{idx}" in log_message, f"Unexpected log message: {log_message}"
+        # Extra check for number of ignored data points in summary
+        assert "ignored_data_points" in pusher.summary, "summary should contain 'ignored_data_points' key"
+        assert len(pusher.summary["ignored_data_points"]) == 4, "Expected 4 ignored data points in summary"
+
+
+def test_push_log_delete_data_points():
+    """Test the logging of invalid data points."""
+    data_points = (
+        DHIS2Extractor(dhis2_client=MockDHIS2Client())
+        .data_elements._retrieve_data(data_elements=[], org_units=[], period="202501")
+        .slice(3, 1)  # Select invalid data points (rows 4 to 7) for testing
+    )
+    print(data_points)
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    _, to_delete, _ = pusher._classify_data_points(data_points)
+
+    with patch.object(pusher, "_log_message") as mock_log_message:
+        pusher._log_ignored_or_na(to_delete, is_na=True)
+        assert mock_log_message.call_count == 2, "Expected a log message for each invalid data point."
+        for idx, call in enumerate(mock_log_message.call_args_list):
+            if idx == 0:
+                log_message = call.args[0]
+                assert "1 data points will be set to NA" in log_message, f"Unexpected log message: {log_message}"
+            else:
+                log_message = call.args[0]
+                assert "Data point NA: dx=DELETE1" in log_message, f"Unexpected log message: {log_message}"
+        # Extra check for number of ignored data points in summary
+        assert "delete_data_points" in pusher.summary, "summary should contain 'delete_data_points' key"
+        assert len(pusher.summary["delete_data_points"]) == 1, "Expected 4 ignored data points in summary"
 
 
 def test_push_data_point():
@@ -176,9 +205,9 @@ def test_push_data_points_connection_error():
         with pytest.raises(PusherError, match=r"Server error: Service temporarily unavailable"):
             pusher._push_data_points([{"dummy_datapoint": "1"}])
         # After the exception, check the summary
-        assert len(pusher.summary["ERRORS"]) == 1
-        assert pusher.summary["ERRORS"][0]["message"] == "Server error: Service temporarily unavailable"
-        assert pusher.summary["ERRORS"][0]["server_error_code"] == "503"
+        assert len(pusher.summary["import_errors"]) == 1
+        assert pusher.summary["import_errors"][0]["message"] == "Server error: Service temporarily unavailable"
+        assert pusher.summary["import_errors"][0]["server_error_code"] == "503"
 
 
 def test_push_data_points_data_element_error():
@@ -225,9 +254,9 @@ def test_push_data_points_data_element_error():
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
         assert pusher.summary["import_counts"]["deleted"] == 0
-        assert len(pusher.summary["ERRORS"]) == 2
-        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_1"
-        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_2"
+        assert len(pusher.summary["import_errors"]) == 2
+        assert pusher.summary["import_errors"][0]["object"] == "INVALID_1"
+        assert pusher.summary["import_errors"][1]["object"] == "INVALID_2"
 
 
 def test_push_data_points_org_unit_error():
@@ -274,9 +303,9 @@ def test_push_data_points_org_unit_error():
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
         assert pusher.summary["import_counts"]["deleted"] == 0
-        assert len(pusher.summary["ERRORS"]) == 2
-        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_1_OU"
-        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_2_OU"
+        assert len(pusher.summary["import_errors"]) == 2
+        assert pusher.summary["import_errors"][0]["object"] == "INVALID_1_OU"
+        assert pusher.summary["import_errors"][1]["object"] == "INVALID_2_OU"
 
 
 def test_push_data_points_period_error():
@@ -323,9 +352,9 @@ def test_push_data_points_period_error():
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
         assert pusher.summary["import_counts"]["deleted"] == 0
-        assert len(pusher.summary["ERRORS"]) == 2
-        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_PERIOD_1"
-        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_PERIOD_2"
+        assert len(pusher.summary["import_errors"]) == 2
+        assert pusher.summary["import_errors"][0]["object"] == "INVALID_PERIOD_1"
+        assert pusher.summary["import_errors"][1]["object"] == "INVALID_PERIOD_2"
 
 
 def test_push_data_points_coc_error():
@@ -372,9 +401,9 @@ def test_push_data_points_coc_error():
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
         assert pusher.summary["import_counts"]["deleted"] == 0
-        assert len(pusher.summary["ERRORS"]) == 2
-        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_COC_1"
-        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_COC_2"
+        assert len(pusher.summary["import_errors"]) == 2
+        assert pusher.summary["import_errors"][0]["object"] == "INVALID_COC_1"
+        assert pusher.summary["import_errors"][1]["object"] == "INVALID_COC_2"
 
 
 def test_push_data_points_aoc_error():
@@ -421,9 +450,9 @@ def test_push_data_points_aoc_error():
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 2
         assert pusher.summary["import_counts"]["deleted"] == 0
-        assert len(pusher.summary["ERRORS"]) == 2
-        assert pusher.summary["ERRORS"][0]["object"] == "INVALID_AOC_1"
-        assert pusher.summary["ERRORS"][1]["object"] == "INVALID_AOC_2"
+        assert len(pusher.summary["import_errors"]) == 2
+        assert pusher.summary["import_errors"][0]["object"] == "INVALID_AOC_1"
+        assert pusher.summary["import_errors"][1]["object"] == "INVALID_AOC_2"
 
 
 def test_push_data_points_value_format_error():
@@ -470,5 +499,5 @@ def test_push_data_points_value_format_error():
         assert pusher.summary["import_counts"]["updated"] == 0
         assert pusher.summary["import_counts"]["ignored"] == 1
         assert pusher.summary["import_counts"]["deleted"] == 0
-        assert len(pusher.summary["ERRORS"]) == 1
-        assert pusher.summary["ERRORS"][0]["object"] == "VALID2"
+        assert len(pusher.summary["import_errors"]) == 1
+        assert pusher.summary["import_errors"][0]["object"] == "VALID2"

From d0e3b919afe9aa796695fa07ecc0bc6494a2c137 Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Fri, 27 Mar 2026 12:00:52 +0100
Subject: [PATCH 7/8] chores(DHIS2Extract): docs + Update readme

---
 d2d_development/README.md                  | 125 +++++++++++++++++++--
 d2d_development/d2d_development/extract.py |  86 +++++++++++++-
 2 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/d2d_development/README.md b/d2d_development/README.md
index 962394b..73fe283 100644
--- a/d2d_development/README.md
+++ b/d2d_development/README.md
@@ -17,7 +17,6 @@ pip install git+https://github.com/BLSQ/openhexa-ds-developments.git#subdirector
 **Description:**  
 Main class to extract data from DHIS2. It provides unified handlers for extracting data elements, indicators, and reporting rates, saving them to disk in a standardized format.
 
-
 **Configuration Parameters:**
 When initializing `DHIS2Extractor`, you can configure the following parameters:
 
@@ -31,6 +30,49 @@ Example:
 extractor = DHIS2Extractor(dhis2_client, download_mode="DOWNLOAD_NEW", return_existing_file=True)
 ```
 
+**Parameters for `download_period` (DataElementsExtractor, IndicatorsExtractor, ReportingRatesExtractor):**
+
+- **data_elements / indicators / reporting_rates** (`list[str]`):  
+  A list of DHIS2 UIDs to extract.  
+  - For `data_elements.download_period`, use `data_elements=["de1", "de2"]`.
+  - For `indicators.download_period`, use `indicators=["ind1", ...]`.
+  - For `reporting_rates.download_period`, use `reporting_rates=["rr1", ...]`.
+
+- **org_units** (`list[str]`):  
+  List of DHIS2 organisation unit UIDs to extract data for (e.g., `["ou1", "ou2"]`).
+
+- **period** (`str`):  
+  The DHIS2 period to extract data for (e.g., `"202401"` for January 2024). Must be a valid DHIS2 period string.
+
+- **output_dir** (`Path`):  
+  The directory where the extracted data file will be saved. The file will be named `data_<period>.parquet` by default unless you specify a custom filename.
+
+- **filename** (`str | None`, optional):  
+  Custom filename for the output file. If not provided, the default is `data_<period>.parquet`. Using the default is recommended when extracting multiple periods.
+
+- **kwargs** (`dict`, optional):  
+  Additional keyword arguments for advanced extraction options.  
+  - For data elements: `last_updated` (not yet implemented).
+  - For indicators: `include_cocs` (bool, whether to include category option combo, use only together with data element ids).
+  - For reporting rates: currently no extra options.
+
+**Returns:**  
+- The path to the saved Parquet file (`Path`), or `None` if no data was extracted or the file already exists and `return_existing_file` is `False`.
+
+**Output Format:**
+The extraction methods always save the data in a table with a fixed column structure. Each extraction creates a separate .parquet file, where each row represents a data point and the columns are always:
+
+- **dx**: Data element, indicator, or reporting rate UID
+- **period**: Period (e.g., `"202401"`)
+- **orgUnit**: Organisation unit UID
+- **categoryOptionCombo**: Category option combo UID 
+- **attributeOptionCombo**: Attribute option combo UID 
+- **rateMetric**: Rate metric (for reporting rates)
+- **domainType**: Data domain (e.g., `"AGGREGATED"`)
+- **value**: The value for the data point
+
+The file path to the saved Parquet file is returned by the extraction method. You can load the output using pandas, polars, or any tool that supports Parquet files.
+
 **Usage Example:**
 ```python
 from d2d_development.extract import DHIS2Extractor
@@ -49,6 +91,7 @@ for period in ["202401", "202402", "202403"]:
         period=period,
         output_dir=Path("/output")
     )
+
 # Extract one period of indicators
 extractor.indicators.download_period(
 	indicators=["ind1"],
@@ -56,6 +99,7 @@ extractor.indicators.download_period(
 	period="202401",
 	output_dir=Path("/tmp")
 )
+
 # Extract one period of reporting rates
 extractor.reporting_rates.download_period(
 	reporting_rates=["rr1"],
@@ -63,18 +107,44 @@ extractor.reporting_rates.download_period(
 	period="202401",
 	output_dir=Path("/tmp")
 )
+
+# Example load the output file
+import polars as pl
+df = pl.read_parquet(Path(/tmp) / f"data_{period}.parquet")  # Default naming
+print(df.head())
 ```
 
+**Note:**  
+- The same pattern applies for `extractor.indicators.download_period` and `extractor.reporting_rates.download_period`, just change the first argument name accordingly.
+- All extracted files are saved in Parquet format by default.
+
+
+---
+
+
 ### DHIS2Pusher
 
 **Description:**  
 Main class to handle pushing data to DHIS2. It validates and pushes formatted data (pandas or polars DataFrame) to a DHIS2 instance.
 
+**Input Data Format for `DHIS2Pusher`**
+
+The `push_data` method expects a pandas or polars DataFrame with the following columns (all required):
+
+- **dx**: Data element, indicator, or reporting rate UID
+- **period**: Period (e.g., `"202401"`)
+- **orgUnit**: Organisation unit UID
+- **categoryOptionCombo**: Category option combo UID
+- **attributeOptionCombo**: Attribute option combo UID
+- **value**: The value to be pushed (numeric or string, depending on DHIS2 configuration)
+
+If any of these columns are missing, or if the input is not a pandas or polars DataFrame, a `PusherError` will be raised.
+
 **Configuration Parameters:**
 When initializing `DHIS2Pusher`, you can configure the following parameters:
 
 - `dhis2_client` (required): The DHIS2 client instance.
-- `import_strategy`: Strategy flag passed to the DHIS2 API for data import. Accepts "CREATE", "UPDATE", or "CREATE_AND_UPDATE" (default: "CREATE_AND_UPDATE"). This only controls how the DHIS2 server processes the data; it does not affect client-side logic.
+- `import_strategy`: Strategy flag passed to the DHIS2 API for data import. Accepts "CREATE", "UPDATE", or "CREATE_AND_UPDATE" (default: "CREATE_AND_UPDATE"). **NOTE:** This only controls how the DHIS2 server processes the data; it does not affect client-side logic.
 - `dry_run`: If `True`, simulates the push without making changes on the server (default: `True`).
 - `max_post`: Maximum number of data points per POST request (default: `500`).
 - `logging_interval`: Log progress every N data points (default: `50000`).
@@ -88,13 +158,6 @@ from openhexa.toolbox.dhis2 import DHIS2
 import polars as pl
 
 dhis2_client = DHIS2(workspace.get_connection("dhis2-connection"))
-pusher = DHIS2Pusher(
-	dhis2_client,
-	import_strategy="CREATE_AND_UPDATE",  # or "CREATE", "UPDATE"
-	dry_run=False,
-	max_post=1000,
-	logging_interval=10000,
-)
 
 df = pl.DataFrame({
     "dx": ["de1"], 
@@ -103,5 +166,49 @@ df = pl.DataFrame({
     "categoryOptionCombo": ["coc"], 
     "attributeOptionCombo": ["aoc"], 
     "value": [123]})
+	
+pusher = DHIS2Pusher(
+	dhis2_client,
+	import_strategy="CREATE_AND_UPDATE",  # or "CREATE", "UPDATE"
+	dry_run=False,
+	max_post=1000,
+	logging_interval=10000,
+)
+
 pusher.push_data(df)
 ```
+
+**Accessing Push Summary Information**
+
+After calling `push_data`, the `DHIS2Pusher` instance provides detailed results of the push operation in its `summary` attribute. This dictionary contains:
+
+- `import_counts`: Number of data points imported, updated, ignored, or deleted (dict with keys: `imported`, `updated`, `ignored`, `deleted`).
+- `import_options`: The options used for the import (strategy, dry run, etc).
+- `import_errors`: List of errors, conflicts, or error reports returned by DHIS2 or encountered during the push.
+- `ignored_data_points`: List of data points that were ignored due to missing or invalid fields.
+- `delete_data_points`: List of data points that were marked for deletion (value is NA/null).
+
+**Example:**
+```python
+pusher.push_data(df)
+print(pusher.summary)
+# Example output:
+# {
+#   'import_counts': {'imported': 1, 'updated': 0, 'ignored': 0, 'deleted': 0},
+#   'import_options': {'importStrategy': 'CREATE_AND_UPDATE', 'dryRun': False, ...},
+#   'import_errors': [],
+#   'ignored_data_points': [],
+#   'delete_data_points': []
+# }
+```
+
+You can use these fields to programmatically inspect the results of your push, handle errors, or log/report the outcome. For example, to check if any data points were ignored:
+
+```python
+if pusher.summary["ignored_data_points"]:
+    print(f"Ignored {len(pusher.summary['ignored_data_points'])} data points:")
+    for dp in pusher.summary["ignored_data_points"]:
+        print(dp)
+```
+
+---
\ No newline at end of file
diff --git a/d2d_development/d2d_development/extract.py b/d2d_development/d2d_development/extract.py
index 2937377..407446c 100644
--- a/d2d_development/d2d_development/extract.py
+++ b/d2d_development/d2d_development/extract.py
@@ -8,11 +8,6 @@
 from .exceptions import ExtractorError
 from .utils import log_message, save_to_parquet
 
-# TODO:
-# 1) Refactor the extractors to (Following DHIS2 client endpoints):
-# -DataValueSetsExtractor (DE)
-# -AnalyticsExtractor (DE, indicators, ReportingRates)
-
 
 class DataElementsExtractor:
     """Handles downloading and formatting of data elements from DHIS2."""
@@ -74,6 +69,23 @@ def download_period(
             raise ExtractorError(f"Extract data elements download error : {e}") from e
 
     def _retrieve_data(self, data_elements: list[str], org_units: list[str], period: str, **kwargs) -> pl.DataFrame:  # noqa: ANN003
+        """Retrieve data from DHIS2 for the specified data elements, organization units, and period.
+
+        Parameters
+        ----------
+        data_elements : list[str]
+            List of DHIS2 data element UIDs to extract.
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        kwargs : dict
+            Additional keyword arguments for data retrieval, only `last_updated` available but not impemented yet.
+
+        Returns
+        -------
+        pl.DataFrame A DataFrame containing the retrieved data, formatted according to DHIS2 naming standards.
+        """
         if not self.extractor._valid_dhis2_period_format(period):
             raise ExtractorError(f"Invalid DHIS2 period format: {period}")
         last_updated = kwargs.get("last_updated")
@@ -96,6 +108,7 @@ class IndicatorsExtractor:
     """Handles downloading and formatting of indicators from DHIS2."""
 
     def __init__(self, extractor: "DHIS2Extractor"):
+        """Initialize the IndicatorsExtractor with a reference to the main DHIS2Extractor."""
         self.extractor = extractor
 
     def download_period(
@@ -155,6 +168,24 @@ def download_period(
             raise ExtractorError(f"Extract indicators download error : {e}") from e
 
     def _retrieve_data(self, indicators: list[str], org_units: list[str], period: str, **kwargs) -> pl.DataFrame:  # noqa: ANN003
+        """Retrieve data from DHIS2 for the specified indicators, organization units, and period.
+
+        Parameters
+        ----------
+        indicators : list[str]
+            List of DHIS2 indicator UIDs to extract.
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        kwargs : dict
+            Additional keyword arguments for data retrieval, only `include_cocs` currently implemented
+             to include category option combo mapping for data element ids passed to the DHIS2 client.
+
+        Returns
+        -------
+        pl.DataFrame A DataFrame containing the retrieved data, formatted according to DHIS2 naming standards.
+        """
         if not self.extractor._valid_dhis2_period_format(period):
             raise ExtractorError(f"Invalid DHIS2 period format: {period}")
 
@@ -184,6 +215,7 @@ class ReportingRatesExtractor:
     """Handles downloading and formatting of reporting rates from DHIS2."""
 
     def __init__(self, extractor: "DHIS2Extractor"):
+        """Initialize the ReportingRatesExtractor with a reference to the main DHIS2Extractor."""
         self.extractor = extractor
 
     def download_period(
@@ -240,6 +272,23 @@ def download_period(
             raise ExtractorError(f"Extract reporting rates download error : {e}") from e
 
     def _retrieve_data(self, reporting_rates: list[str], org_units: list[str], period: str, **kwargs) -> pl.DataFrame:  # noqa: ANN003
+        """Retrieve data from DHIS2 for the specified reporting rates, organization units, and period.
+
+        Parameters
+        ----------
+        reporting_rates : list[str]
+            List of DHIS2 reporting rate UIDs to extract.
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        kwargs : dict
+            Additional keyword arguments for data retrieval (not impemented).
+
+        Returns
+        -------
+        pl.DataFrame A DataFrame containing the retrieved data, formatted according to DHIS2 naming standards.
+        """
         if not self.extractor._valid_dhis2_period_format(period):
             raise ExtractorError(f"Invalid DHIS2 period format: {period}")
 
@@ -295,6 +344,7 @@ def __init__(
         return_existing_file: bool = False,
         logger: logging.Logger | None = None,
     ):
+        """Initialize the DHIS2Extractor with the given DHIS2 client and configuration."""
         self.dhis2_client = dhis2_client
         if download_mode not in {"DOWNLOAD_REPLACE", "DOWNLOAD_NEW"}:
             raise ExtractorError("Invalid 'download_mode', use 'DOWNLOAD_REPLACE' or 'DOWNLOAD_NEW'.")
@@ -317,6 +367,32 @@ def _handle_extract_for_period(
         filename: str | None = None,
         **kwargs,  # noqa: ANN003
     ) -> Path | None:
+        """Handles the extract process for a given period, including data retrieval, file saving, and logging.
+
+        Parameters
+        ----------
+        handler : DataElementsExtractor | IndicatorsExtractor | ReportingRatesExtractor
+            The specific handler to use for data retrieval.
+        data_products : list[str]
+            List of data product UIDs to extract (e.g., data elements, indicators, or reporting rates).
+        org_units : list[str]
+            List of DHIS2 organization unit UIDs to extract data for.
+        period : str
+            DHIS2 period (valid format) to extract data for.
+        output_dir : Path
+            Directory where extracted data files will be saved.
+        filename : str | None
+            Optional filename for the extracted data file. If None, a default name will be used.
+        kwargs : dict
+            Additional keyword arguments for data retrieval, such as `last_updated` for filtering data.
+
+        Returns
+        -------
+        Path | None
+            The path to the extracted data file, or None if no data was extracted or if
+            the file already exists and `return_existing_file` is False.
+
+        """
         output_dir.mkdir(parents=True, exist_ok=True)
         if filename:
             extract_fname = output_dir / filename

From 460d9782e0abf0166b47cdbda3b5ec0e26c8199e Mon Sep 17 00:00:00 2001
From: EMontandon <esteban14m@gmail.com>
Date: Mon, 30 Mar 2026 10:37:43 +0200
Subject: [PATCH 8/8] feat(push): rejected datapoints + test

---
 d2d_development/d2d_development/push.py  | 29 +++++++++++----
 d2d_development/d2d_development/utils.py |  2 +-
 d2d_development/tests/test_push.py       | 46 +++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/d2d_development/d2d_development/push.py b/d2d_development/d2d_development/push.py
index 7e335a1..ff496da 100644
--- a/d2d_development/d2d_development/push.py
+++ b/d2d_development/d2d_development/push.py
@@ -259,7 +259,7 @@ def _push_data_points(
                     self._update_import_counts(response)
 
                 # Capture conflicts/errorReports if present
-                self._extract_conflicts(response)
+                self._extract_conflicts(response, chunk)
 
             except requests.exceptions.RequestException as e:
                 self._raise_server_errors(r)  # Stop the process if there's a server error
@@ -271,7 +271,7 @@ def _push_data_points(
                     self.summary["import_errors"].extend(
                         [{"chunk": chunk_id, "period": chunk[0].get("period", "-"), "exception": str(e)}]
                     )
-                self._extract_conflicts(response)
+                self._extract_conflicts(response, chunk)
 
             processed_points += len(chunk)
 
@@ -312,6 +312,7 @@ def _reset_summary(self) -> None:
             "import_counts": {"imported": 0, "updated": 0, "ignored": 0, "deleted": 0},
             "import_options": {},
             "import_errors": [],
+            "rejected_datapoints": [],
             "ignored_data_points": [],
             "delete_data_points": [],
         }
@@ -352,26 +353,40 @@ def _update_import_counts(self, response: dict) -> None:
         for key in ["imported", "updated", "ignored", "deleted"]:
             self.summary["import_counts"][key] += import_counts.get(key, 0)
 
-    def _extract_conflicts(self, response: dict) -> None:
+    def _extract_conflicts(self, response: dict, chunk: list) -> None:
         """Extract all conflicts and errorReports from a DHIS2 API response.
 
-        Handles both top-level and nested 'response' nodes. Optionally updates the summary.
+        This method looks for 'conflicts' and 'errorReports' at both the top level and within a nested
+        'response' object. It also extracts 'rejectedIndexes' from the nested 'response' to identify which data
+        points were rejected by DHIS2, and adds them to the summary under 'rejected_datapoints'.
 
         Parameters
         ----------
-        response : dict
-            The JSON response from DHIS2 after an import.
+        response: dict
+            The JSON response from the DHIS2 API after attempting to push data points.
+        chunk: list
+            The list of data points that were included in the API request corresponding to the response.
         """
         if not response:
             return
+
         conflicts = response.get("conflicts", [])
         error_reports = response.get("errorReports", [])
 
-        # Check if nested under "response"
         nested = response.get("response", {})
         conflicts += nested.get("conflicts", [])
         error_reports += nested.get("errorReports", [])
+        rejected_indexes = nested.get("rejectedIndexes", [])
+
         all_errors = conflicts + error_reports
 
         if all_errors:
             self.summary.setdefault("import_errors", []).extend(all_errors)
+
+        # Extract rejected datapoints
+        if rejected_indexes:
+            rejected_datapoints = [
+                {"index": idx, "datapoint": chunk[idx]} for idx in rejected_indexes if 0 <= idx < len(chunk)
+            ]
+            if rejected_datapoints:
+                self.summary.setdefault("rejected_datapoints", []).extend(rejected_datapoints)
diff --git a/d2d_development/d2d_development/utils.py b/d2d_development/d2d_development/utils.py
index 55554fc..94e3f53 100644
--- a/d2d_development/d2d_development/utils.py
+++ b/d2d_development/d2d_development/utils.py
@@ -6,7 +6,7 @@
 import polars as pl
 from openhexa.sdk import current_run
 
-from d2d_development.exceptions import ExtractorError
+from .exceptions import ExtractorError
 
 
 def log_message(
diff --git a/d2d_development/tests/test_push.py b/d2d_development/tests/test_push.py
index 5f08ee3..840e2ef 100644
--- a/d2d_development/tests/test_push.py
+++ b/d2d_development/tests/test_push.py
@@ -146,7 +146,6 @@ def test_push_log_delete_data_points():
         .data_elements._retrieve_data(data_elements=[], org_units=[], period="202501")
         .slice(3, 1)  # Select invalid data points (rows 4 to 7) for testing
     )
-    print(data_points)
     pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
     _, to_delete, _ = pusher._classify_data_points(data_points)
 
@@ -501,3 +500,48 @@ def test_push_data_points_value_format_error():
         assert pusher.summary["import_counts"]["deleted"] == 0
         assert len(pusher.summary["import_errors"]) == 1
         assert pusher.summary["import_errors"][0]["object"] == "VALID2"
+
+
+def test_push_summary_rejected_points():
+    """Test that rejected data points are correctly tracked in the summary."""
+    pusher = DHIS2Pusher(dhis2_client=MockDHIS2Client())
+    # NOTE: This fake input is just to pass validation and
+    #  match the information manufactured in the response
+    invalid_dp_1 = {
+        "dataElement": "VALID2",
+        "period": "202501",
+        "orgUnit": "ORG002",
+        "categoryOptionCombo": "INVALID_AOC_1",
+        "attributeOptionCombo": "ATTR001",
+        "value": "1",
+    }
+    invalid_dp_2 = {
+        "dataElement": "VALID3",
+        "period": "202501",
+        "orgUnit": "ORG003",
+        "categoryOptionCombo": "INVALID_AOC_2",
+        "attributeOptionCombo": "ATTR001",
+        "value": "1",
+    }
+    invalid_data_points = [
+        {
+            "dataElement": "VALID1",
+            "period": "202501",
+            "orgUnit": "ORG001",
+            "categoryOptionCombo": "CAT001",
+            "attributeOptionCombo": "ATTR001",
+            "value": "1",
+        },
+        invalid_dp_1,
+        invalid_dp_2,
+    ]
+
+    # MOCK_DHIS2_ERROR_409_RESPONSE_AOC was manually manufactured to simulate a Conflict from DHIS2.
+    with patch.object(
+        pusher.dhis2_client.api.session,
+        "post",
+        return_value=MockDHIS2Response(MOCK_DHIS2_ERROR_409_RESPONSE_AOC, status_code=409),
+    ):
+        pusher._push_data_points(invalid_data_points)  # access private method for error handling testing
+        assert pusher.summary["rejected_datapoints"][0]["datapoint"] == invalid_dp_1
+        assert pusher.summary["rejected_datapoints"][1]["datapoint"] == invalid_dp_2