Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,5 +174,4 @@ cython_debug/
.pypirc
data/*
testing.ipynb

.DS_Store
test_data/*
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
- id: yamllint
exclude: pre-commit-config.yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.15.12"
rev: "v0.15.14"
hooks:
- id: ruff-format
- id: ruff-check
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ZedProfiler

[![Coverage](https://img.shields.io/badge/coverage-96%25-brightgreen)](#quality-gates)
[![Coverage](https://img.shields.io/badge/coverage-92%25-brightgreen)](#quality-gates)

CPU-first 3D image feature extraction toolkit for high-content and high-throughput image-based profiling.

Expand Down
10 changes: 5 additions & 5 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ The roadmap is intended to be a living document and may be updated as needed.

3. PR 3: RFC2119 naming specification and validators

- [ ] Port and adapt naming conventions into this repository.
- [ ] Add runtime and CI naming validation helpers and conformance tests.
- [x] Port and adapt naming conventions into this repository.
- [x] Add runtime and CI naming validation helpers and conformance tests.

### Phase 2: Feature modules and tests (PR 4-9)

Expand Down Expand Up @@ -79,7 +79,7 @@ The roadmap is intended to be a living document and may be updated as needed.

10. PR 10: Integration matrix and parallelization guidance

- [ ] Cross-module integration tests and explicit non-goal docs for internal parallelization.
- [x] Cross-module integration tests and explicit non-goal docs for internal parallelization.

11. PR 11: Example notebooks and public dataset references

Expand All @@ -100,8 +100,8 @@ The roadmap is intended to be a living document and may be updated as needed.

## Verification Gates

- [ ] Run full unit and integration tests on Linux with coverage >=85%.
- [ ] Run naming validation tests for all emitted feature names.
- [x] Run full unit and integration tests on Linux with coverage >=85%.
- [x] Run naming validation tests for all emitted feature names.
- [ ] Build Sphinx docs in CI with warnings treated as errors.
- [ ] Execute example notebooks in a clean environment.
- [ ] Validate install/import from both wheel and sdist.
Expand Down
5 changes: 4 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ coverage-check: coverage
lint:
uv run ruff check .

lint-fix:
uv run ruff check . --fix

# Build Sphinx docs with docs dependencies.
docs:
cd docs && uv run --group docs sphinx-build src build

# Run the full project workflow (env sync, lint, tests, coverage, and docs build).
all: sync lint test coverage-check docs
all: sync lint-fix lint test coverage-check docs
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"bioio-tifffile>=1.3",
"fire>=0.7.1",
"jinja2>=3.1.6",
"mahotas>=1.4.18",
"matplotlib>=3.10.8",
"pandas>=3.0.2",
"pandera>=0.31.1",
Expand Down
14 changes: 12 additions & 2 deletions src/zedprofiler/IO/loading_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import numpy
from beartype import beartype

from zedprofiler.contracts import ImageArrayModel

logging.basicConfig(level=logging.INFO)


Expand Down Expand Up @@ -327,10 +329,14 @@ def _load_array_based_images(
"""
if image_set_array is not None:
for key in config.raw_image_key_name:
self.image_set_dict[key] = image_set_array
# Run through pydantic validation to ensure the array is valid.
validated_array = ImageArrayModel(array=image_set_array).array
self.image_set_dict[key] = validated_array
if label_set_array is not None:
for key in config.label_key_name:
self.image_set_dict[key] = label_set_array
# Run through pydantic validation to ensure the array is valid.
validated_array = ImageArrayModel(array=label_set_array).array
self.image_set_dict[key] = validated_array

def get_unique_objects_in_compartments(self) -> None:
"""
Expand Down Expand Up @@ -471,6 +477,8 @@ def __init__(
self.object_ids = numpy.unique(self.label_image)
# drop the 0 label
self.object_ids = [x for x in self.object_ids if x != 0]
# inherit the image set loader
self.image_set_loader = image_set_loader


class TwoObjectLoader:
Expand Down Expand Up @@ -539,3 +547,5 @@ def __init__(
self.image1 = self.image_set_loader.get_image(channel1)
self.image2 = self.image_set_loader.get_image(channel2)
self.object_ids = image_set_loader.unique_compartment_objects[compartment]
# inherit the image set name for downstream use
self.image_set_name = image_set_loader.image_set_name
153 changes: 75 additions & 78 deletions src/zedprofiler/contracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@

from __future__ import annotations

import pathlib
from typing import Any

import numpy as np
import pandas as pd
import pandera as pa
import tomli
import pandera.pandas as pa
from beartype import beartype
from pydantic import (
BaseModel,
Expand Down Expand Up @@ -113,7 +111,14 @@ def validate_array_dtype_and_shape(_cls, arr: np.ndarray) -> np.ndarray:
"dimensions of size 1. Expected all three dimensions to "
"have size greater than 1."
)

if not validate_image_array_shape_contracts(arr):
raise ValueError(
f"Input array with shape {arr_shape} failed shape contract validation."
)
if not validate_image_array_type_contracts(arr):
raise ValueError(
f"Input array with dtype {arr.dtype} failed type contract validation."
)
return arr


Expand Down Expand Up @@ -330,59 +335,62 @@ def validate_image_array_type_contracts(


@beartype
def validate_image_with_pydantic(arr: np.ndarray) -> ImageArrayModel:
def validate_return_with_pydantic(
result: dict[str, object],
) -> ReturnSchemaModel:
"""
Validate image array using Pydantic model.
Validate return schema using Pydantic model.

Parameters
----------
arr : np.ndarray
Input array to validate
result : dict[str, object]
Return result to validate

Returns
-------
ImageArrayModel
Validated image array model
ReturnSchemaModel
Validated return schema model

Raises
------
ContractError
If validation fails
"""
try:
return ImageArrayModel(array=arr)
return ReturnSchemaModel(result=result)
except Exception as e:
raise ContractError(f"Image array validation failed: {e}")
msg = (
"Return schema validation failed. Please ensure that the data "
f"fit the expected schema: {e}"
)
raise ContractError(msg)


@beartype
def validate_return_with_pydantic(
result: dict[str, object],
) -> ReturnSchemaModel:
def validate_image_with_pydantic(arr: np.ndarray) -> ImageArrayModel:
"""
Validate return schema using Pydantic model.
Validate the input image array using Pydantic model.

Parameters
----------
result : dict[str, object]
Return result to validate
arr : np.ndarray
Input image array to validate

Returns
-------
ReturnSchemaModel
Validated return schema model
ImageArrayModel
Validated image array model

Raises
------
ContractError
If validation fails
"""
try:
return ReturnSchemaModel(result=result)
return ImageArrayModel(array=arr)
except Exception as e:
msg = (
"Return schema validation failed. Please ensure that the data "
f"fit the expected schema: {e}"
"Image array validation failed. Please ensure that the input "
f"array meets the expected contracts: {e}"
)
raise ContractError(msg)

Expand Down Expand Up @@ -457,67 +465,48 @@ def validate_return_schema_contract(
class ExpectedFeatureNameValues(BaseModel):
"""Pydantic model for expected values in feature naming validation."""

config_file_path: pathlib.Path
compartments: list[str] = Field(default_factory=list)
channels: list[str] = Field(default_factory=list)
features: list[str] = Field(default_factory=list)

compartments: list[str] | None = Field(default_factory=list)
channels: list[str] | None = Field(default_factory=list)
features: list[str] | None = Field(default_factory=list)
expected_values_dict: dict[str, list[str]] = Field(default_factory=dict)
model_config = ConfigDict(arbitrary_types_allowed=True)
print(features)

@field_validator("config_file_path", mode="before")
@classmethod
def validate_config_path(_cls, v: object) -> pathlib.Path:
"""Ensure config_file_path is a valid Path object."""
if not isinstance(v, pathlib.Path):
v = pathlib.Path(v)
return v

def model_post_init(self, _context: object) -> None:
"""Load expected values from a TOML configuration file."""
config = tomli.loads(self.config_file_path.read_text())
self.compartments = list(set(config["expected_values"]["compartments"]))
self.channels = list(set(config["expected_values"]["channels"]))
# add "NoChannel" as a valid channel for metadata columns
# This is automatically added in the ZedProfiler
# regardless of input channel we want this added
self.channels.append("NoChannel")
self.features = [
"AreaSizeShape",
"Correlation",
"Granularity",
"Intensity",
"Neighbors",
"Texture",
"SAMMed3D",
"CHAMMI-75",
]

def to_dict(self) -> dict[str, list[str]]:
"""Return expected values as a dictionary."""
return {
def __init__(self, **data: object) -> None:
super().__init__(**data)
if self.compartments is not None:
self.compartments = list(set(self.compartments))
else:
raise ValueError("Compartments list cannot be None.")
if self.channels is not None:
# Add "NoChannel" to channels list
self.channels = list(set(self.channels) | {"NoChannel"})
else:
raise ValueError("Channels list cannot be None.")
if self.features is not None and len(self.features) > 0:
self.features = list(set(self.features))
else:
self.features = [
"Colocalization",
"Granularity",
"Texture",
"Intensity",
"Neighbors",
"VolumeSizeShape",
]
self.expected_values_dict = {
"compartments": self.compartments,
"channels": self.channels,
"features": self.features,
}

def __init__(self, *args: object, **data: object) -> None:
"""Support positional `config_file_path` for backward compatibility.

Tests and existing code may instantiate ExpectedValues(path) using a
positional argument. Pydantic BaseModel requires keyword arguments, so
accept a single positional argument and forward it as
`config_file_path=` to the BaseModel initializer.
"""
if args and "config_file_path" not in data:
# take the first positional arg as config_file_path
data["config_file_path"] = args[0]
super().__init__(**data)


@beartype
def validate_column_name_schema(
column_name: str,
expected_values_config_path: pathlib.Path,
channels: list[str],
compartments: list[str],
features: list[str] | None = None,
) -> bool:
"""
Validate the column name schema for required fields and types
Expand All @@ -526,8 +515,12 @@ def validate_column_name_schema(
----------
column_name : str
The column name to validate
expected_values_config_path : pathlib.Path
Path to the configuration file containing expected values for validation
channels : list[str]
List of valid channels for feature naming
compartments : list[str]
List of valid compartments for feature naming
features : list[str] | None, optional
List of valid features for feature naming, by default None
Returns
-------
bool
Expand All @@ -540,7 +533,10 @@ def validate_column_name_schema(
non_metadata_underscore_separated_parts = NON_METADATA_UNDERSCORE_SEPARATED_PARTS
metadata_underscore_separated_parts = METADATA_UNDERSCORE_SEPARATED_PARTS

expected_values = ExpectedFeatureNameValues(expected_values_config_path).to_dict()
expected_values = ExpectedFeatureNameValues(
channels=channels, compartments=compartments, features=None
).expected_values_dict

# check if the column name is a string
if not isinstance(column_name, str):
raise ContractError(f"Column name must be a string, got {type(column_name)}")
Expand Down Expand Up @@ -568,7 +564,6 @@ def validate_column_name_schema(
f"underscores, got {len(parts)} parts in '{column_name}'"
)
return True

feature_components = pd.DataFrame(
[
{
Expand All @@ -578,6 +573,7 @@ def validate_column_name_schema(
}
]
)

feature_component_schema = pa.DataFrameSchema(
{
"compartment": pa.Column(
Expand All @@ -601,6 +597,7 @@ def validate_column_name_schema(
},
strict=True,
)

try:
feature_component_schema.validate(feature_components)
except (pa.errors.SchemaError, pa.errors.SchemaErrors) as e:
Expand Down
Loading
Loading