diff --git a/linopy/common.py b/linopy/common.py index 09f67355..cac816a9 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -8,7 +8,6 @@ from __future__ import annotations import operator -import os from collections.abc import Callable, Generator, Hashable, Iterable, Sequence from functools import partial, reduce, wraps from pathlib import Path @@ -18,7 +17,7 @@ import numpy as np import pandas as pd import polars as pl -from numpy import arange, signedinteger +from numpy import signedinteger from xarray import DataArray, Dataset, apply_ufunc, broadcast from xarray import align as xr_align from xarray.core import dtypes, indexing @@ -27,6 +26,7 @@ from linopy.config import options from linopy.constants import ( + DEFAULT_LABEL_DTYPE, HELPER_DIMS, SIGNS, SIGNS_alternative, @@ -340,11 +340,9 @@ def infer_schema_polars(ds: Dataset) -> dict[Hashable, pl.DataType]: dict: A dictionary mapping column names to their corresponding Polars data types. """ schema = {} - np_major_version = int(np.__version__.split(".")[0]) - use_int32 = os.name == "nt" and np_major_version < 2 for name, array in ds.items(): if np.issubdtype(array.dtype, np.integer): - schema[name] = pl.Int32 if use_int32 else pl.Int64 + schema[name] = pl.Int32 if array.dtype.itemsize <= 4 else pl.Int64 elif np.issubdtype(array.dtype, np.floating): schema[name] = pl.Float64 # type: ignore elif np.issubdtype(array.dtype, np.bool_): @@ -488,7 +486,7 @@ def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset: ) arrs = xr_align(*dataarrays, join="outer") if integer_dtype: - arrs = tuple([ds.fillna(-1).astype(int) for ds in arrs]) + arrs = tuple([ds.fillna(-1).astype(DEFAULT_LABEL_DTYPE) for ds in arrs]) return Dataset({ds.name: ds for ds in arrs}) @@ -549,7 +547,7 @@ def fill_missing_coords( # Fill in missing integer coordinates for dim in ds.dims: if dim not in ds.coords and dim not in skip_dims: - ds.coords[dim] = arange(ds.sizes[dim]) + ds.coords[dim] = np.arange(ds.sizes[dim], dtype=DEFAULT_LABEL_DTYPE) return ds diff --git a/linopy/constants.py b/linopy/constants.py index 00bbd705..d638a7cb 100644 --- a/linopy/constants.py +++ b/linopy/constants.py @@ -33,6 +33,8 @@ short_LESS_EQUAL: LESS_EQUAL, } +DEFAULT_LABEL_DTYPE = np.int32 + TERM_DIM = "_term" STACKED_TERM_DIM = "_stacked_term" diff --git a/linopy/constraints.py b/linopy/constraints.py index d3ebef19..02f689a0 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -55,6 +55,7 @@ ) from linopy.config import options from linopy.constants import ( + DEFAULT_LABEL_DTYPE, EQUAL, GREATER_EQUAL, HELPER_DIMS, @@ -1087,7 +1088,10 @@ def flat(self) -> pd.DataFrame: return pd.DataFrame(columns=["coeffs", "vars", "labels", "key"]) df = pd.concat(dfs, ignore_index=True) unique_labels = df.labels.unique() - map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels) + map_labels = pd.Series( + np.arange(len(unique_labels), dtype=DEFAULT_LABEL_DTYPE), + index=unique_labels, + ) df["key"] = df.labels.map(map_labels) return df diff --git a/linopy/expressions.py b/linopy/expressions.py index d2ae9022..a030920e 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -70,6 +70,7 @@ from linopy.config import options from linopy.constants import ( CV_DIM, + DEFAULT_LABEL_DTYPE, EQUAL, FACTOR_DIM, GREATER_EQUAL, @@ -291,7 +292,9 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression: def func(ds: Dataset) -> Dataset: ds = LinearExpression._sum(ds, str(self.groupby._group_dim)) - ds = ds.assign_coords({TERM_DIM: np.arange(len(ds._term))}) + ds = ds.assign_coords( + {TERM_DIM: np.arange(len(ds._term), dtype=DEFAULT_LABEL_DTYPE)} + ) return ds return self.map(func, **kwargs, shortcut=True) @@ -372,7 +375,9 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None: ) if np.issubdtype(data.vars, np.floating): - data = assign_multiindex_safe(data, vars=data.vars.fillna(-1).astype(int)) + data = assign_multiindex_safe( + data, vars=data.vars.fillna(-1).astype(DEFAULT_LABEL_DTYPE) + ) if not np.issubdtype(data.coeffs, np.floating): data["coeffs"].values = data.coeffs.values.astype(float) @@ -1436,7 +1441,7 @@ def sanitize(self: GenericExpression) -> GenericExpression: linopy.LinearExpression """ if not np.issubdtype(self.vars.dtype, np.integer): - return self.assign(vars=self.vars.fillna(-1).astype(int)) + return self.assign(vars=self.vars.fillna(-1).astype(DEFAULT_LABEL_DTYPE)) return self @@ -1840,12 +1845,12 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: # Combined has dimensions (.., CV_DIM, TERM_DIM) # Drop terms where all vars are -1 (i.e., empty terms across all coordinates) - vars = combined.isel({CV_DIM: 0}).astype(int) + vars = combined.isel({CV_DIM: 0}).astype(DEFAULT_LABEL_DTYPE) non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM]) combined = combined.isel({TERM_DIM: non_empty_terms}) # Extract vars and coeffs from the combined result - vars = combined.isel({CV_DIM: 0}).astype(int) + vars = combined.isel({CV_DIM: 0}).astype(DEFAULT_LABEL_DTYPE) coeffs = combined.isel({CV_DIM: 1}) # Create new dataset with simplified data diff --git a/linopy/model.py b/linopy/model.py index 54334411..c5bdca4d 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -36,6 +36,7 @@ to_path, ) from linopy.constants import ( + DEFAULT_LABEL_DTYPE, GREATER_EQUAL, HELPER_DIMS, LESS_EQUAL, @@ -633,7 +634,14 @@ def add_variables( start = self._xCounter end = start + data.labels.size - data.labels.values = np.arange(start, end).reshape(data.labels.shape) + if end > np.iinfo(DEFAULT_LABEL_DTYPE).max: + raise ValueError( + f"Number of labels ({end}) exceeds the maximum value for " + f"{DEFAULT_LABEL_DTYPE.__name__} ({np.iinfo(DEFAULT_LABEL_DTYPE).max}). " + ) + data.labels.values = np.arange(start, end, dtype=DEFAULT_LABEL_DTYPE).reshape( + data.labels.shape + ) self._xCounter += data.labels.size if mask is not None: @@ -872,7 +880,14 @@ def add_constraints( start = self._cCounter end = start + data.labels.size - data.labels.values = np.arange(start, end).reshape(data.labels.shape) + if end > np.iinfo(DEFAULT_LABEL_DTYPE).max: + raise ValueError( + f"Number of labels ({end}) exceeds the maximum value for " + f"{DEFAULT_LABEL_DTYPE.__name__} ({np.iinfo(DEFAULT_LABEL_DTYPE).max}). " + ) + data.labels.values = np.arange(start, end, dtype=DEFAULT_LABEL_DTYPE).reshape( + data.labels.shape + ) self._cCounter += data.labels.size if mask is not None: diff --git a/linopy/variables.py b/linopy/variables.py index 4332a037..3c2e2950 100644 --- a/linopy/variables.py +++ b/linopy/variables.py @@ -53,7 +53,13 @@ to_polars, ) from linopy.config import options -from linopy.constants import HELPER_DIMS, SOS_DIM_ATTR, SOS_TYPE_ATTR, TERM_DIM +from linopy.constants import ( + DEFAULT_LABEL_DTYPE, + HELPER_DIMS, + SOS_DIM_ATTR, + SOS_TYPE_ATTR, + TERM_DIM, +) from linopy.solver_capabilities import SolverFeature, solver_supports from linopy.types import ( ConstantLike, @@ -1191,7 +1197,9 @@ def ffill(self, dim: str, limit: None = None) -> Variable: .map(DataArray.ffill, dim=dim, limit=limit) .fillna(self._fill_value) ) - return self.assign_multiindex_safe(labels=data.labels.astype(int)) + return self.assign_multiindex_safe( + labels=data.labels.astype(DEFAULT_LABEL_DTYPE) + ) def bfill(self, dim: str, limit: None = None) -> Variable: """ @@ -1218,7 +1226,7 @@ def bfill(self, dim: str, limit: None = None) -> Variable: .map(DataArray.bfill, dim=dim, limit=limit) .fillna(self._fill_value) ) - return self.assign(labels=data.labels.astype(int)) + return self.assign(labels=data.labels.astype(DEFAULT_LABEL_DTYPE)) def sanitize(self) -> Variable: """ @@ -1229,7 +1237,9 @@ def sanitize(self) -> Variable: linopy.Variable """ if issubdtype(self.labels.dtype, floating): - return self.assign(labels=self.labels.fillna(-1).astype(int)) + return self.assign( + labels=self.labels.fillna(-1).astype(DEFAULT_LABEL_DTYPE) + ) return self def equals(self, other: Variable) -> bool: @@ -1681,7 +1691,10 @@ def flat(self) -> pd.DataFrame: """ df = pd.concat([self[k].flat for k in self], ignore_index=True) unique_labels = df.labels.unique() - map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels) + map_labels = pd.Series( + np.arange(len(unique_labels), dtype=DEFAULT_LABEL_DTYPE), + index=unique_labels, + ) df["key"] = df.labels.map(map_labels) return df diff --git a/test/test_constraints.py b/test/test_constraints.py index 9a467c8c..be0af123 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -36,9 +36,11 @@ def test_constraint_assignment() -> None: assert "con0" in getattr(m.constraints, attr) assert m.constraints.labels.con0.shape == (10, 10) - assert m.constraints.labels.con0.dtype == int + assert np.issubdtype(m.constraints.labels.con0.dtype, np.integer) assert m.constraints.coeffs.con0.dtype in (int, float) - assert m.constraints.vars.con0.dtype in (int, float) + assert np.issubdtype(m.constraints.vars.con0.dtype, np.integer) or np.issubdtype( + m.constraints.vars.con0.dtype, np.floating + ) assert m.constraints.rhs.con0.dtype in (int, float) assert_conequal(m.constraints.con0, con0) @@ -90,9 +92,11 @@ def test_anonymous_constraint_assignment() -> None: assert "con0" in getattr(m.constraints, attr) assert m.constraints.labels.con0.shape == (10, 10) - assert m.constraints.labels.con0.dtype == int + assert np.issubdtype(m.constraints.labels.con0.dtype, np.integer) assert m.constraints.coeffs.con0.dtype in (int, float) - assert m.constraints.vars.con0.dtype in (int, float) + assert np.issubdtype(m.constraints.vars.con0.dtype, np.integer) or np.issubdtype( + m.constraints.vars.con0.dtype, np.floating + ) assert m.constraints.rhs.con0.dtype in (int, float) diff --git a/test/test_dtypes.py b/test/test_dtypes.py new file mode 100644 index 00000000..ef0253e9 --- /dev/null +++ b/test/test_dtypes.py @@ -0,0 +1,56 @@ +"""Tests for int32 default label dtype.""" + +import numpy as np +import pytest + +from linopy import Model +from linopy.constants import DEFAULT_LABEL_DTYPE + + +def test_default_label_dtype_is_int32(): + assert DEFAULT_LABEL_DTYPE == np.int32 + + +def test_variable_labels_are_int32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + assert x.labels.dtype == np.int32 + + +def test_constraint_labels_are_int32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + m.add_constraints(x >= 1, name="c") + assert m.constraints["c"].labels.dtype == np.int32 + + +def test_expression_vars_are_int32(): + m = Model() + x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x") + expr = 2 * x + 1 + assert expr.vars.dtype == np.int32 + + +def test_solve_with_int32_labels(): + m = Model() + x = m.add_variables(lower=0, upper=10, name="x") + y = m.add_variables(lower=0, upper=10, name="y") + m.add_constraints(x + y <= 15, name="c1") + m.add_objective(x + 2 * y, sense="max") + m.solve("highs") + assert m.objective.value == pytest.approx(25.0) + + +def test_overflow_guard_variables(): + m = Model() + m._xCounter = np.iinfo(np.int32).max - 1 + with pytest.raises(ValueError, match="exceeds the maximum"): + m.add_variables(lower=0, upper=1, coords=[range(5)], name="x") + + +def test_overflow_guard_constraints(): + m = Model() + x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x") + m._cCounter = np.iinfo(np.int32).max - 1 + with pytest.raises(ValueError, match="exceeds the maximum"): + m.add_constraints(x >= 0, name="c")