diff --git a/CMakeLists.txt b/CMakeLists.txt index 717ecf5b..7774b3d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ set(MINIEXPR_BUILD_BENCH OFF CACHE BOOL "Build miniexpr benchmarks" FORCE) FetchContent_Declare(miniexpr GIT_REPOSITORY https://github.com/Blosc/miniexpr.git - GIT_TAG 979573da618e0443c3984bad8db3ed5d9ce72f75 + GIT_TAG 77d633cb2c134552da045b8d2cc0ad23908e6b9e ) FetchContent_MakeAvailable(miniexpr) @@ -116,7 +116,7 @@ else() include(FetchContent) FetchContent_Declare(blosc2 GIT_REPOSITORY https://github.com/Blosc/c-blosc2 - GIT_TAG f057d1519c0a990f6351cd39c6a659c752fb84e9 + GIT_TAG bf21f84680542e680fd94fdc05c5a76259df1345 ) FetchContent_MakeAvailable(blosc2) include_directories("${blosc2_SOURCE_DIR}/include") diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index b4c58350..4b65e791 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,7 +2,8 @@ ## Changes from 4.0.0-b1 to 4.0.0-b2 -XXX version-specific blurb XXX +- On Windows, miniexpr is temporarily disabled for integral outputs and mixed-dtype expressions. + Set `BLOSC2_ENABLE_MINIEXPR_WINDOWS=1` to override this for testing. ## Changes from 3.12.2 to 4.0.0-b1 diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx index 68e90b40..93ccfce8 100644 --- a/src/blosc2/blosc2_ext.pyx +++ b/src/blosc2/blosc2_ext.pyx @@ -1947,6 +1947,7 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock, memset(params_output, 0, udata.array.blocknitems * typesize) free(input_buffers) return 0 + for i in range(udata.ninputs): ndarr = udata.inputs[i] input_buffers[i] = malloc(ndarr.sc.blocksize) @@ -2013,6 +2014,11 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock, cdef uintptr_t offset_bytes = typesize * linear_block_index # Call thread-safe miniexpr C API + # NOTE: me_eval_nd expects the OUTPUT block size (in items), not the input block size. + # For element-wise operations with same dtypes, they're equal, but for type-changing + # operations (e.g., arccos(int32) -> float64), we must use the output's block item count. + cdef int output_blocknitems = udata.array.blocknitems + if udata.aux_reduc_ptr == NULL: aux_reduc_ptr = params_output else: @@ -2020,7 +2026,7 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock, # NOTE: miniexpr handles scalar outputs in me_eval_nd without touching tail bytes. aux_reduc_ptr = ( udata.aux_reduc_ptr + offset_bytes) rc = me_eval_nd(miniexpr_handle, input_buffers, udata.ninputs, - aux_reduc_ptr, blocknitems, nchunk, nblock, udata.eval_params) + aux_reduc_ptr, output_blocknitems, nchunk, nblock, udata.eval_params) if rc != 0: raise RuntimeError(f"miniexpr: issues during evaluation; error code: {rc}") diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index ed4b2d2b..62c60067 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -95,10 +95,9 @@ try_miniexpr = True if blosc2.IS_WASM: try_miniexpr = False -if sys.platform == "win32": - # Although miniexpr has support for windows, the integration with Blosc2 - # still has some rough edges. - try_miniexpr = False + +_MINIEXPR_WINDOWS_OVERRIDE = os.environ.get("BLOSC2_ENABLE_MINIEXPR_WINDOWS", "").strip().lower() +_MINIEXPR_WINDOWS_OVERRIDE = _MINIEXPR_WINDOWS_OVERRIDE not in ("", "0", "false", "no", "off") def ne_evaluate(expression, local_dict=None, **kwargs): @@ -1326,6 +1325,26 @@ def fast_eval( # noqa: C901 use_miniexpr = False if not (all_ndarray and out is None): use_miniexpr = False + has_complex = any( + isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating") + for op in operands.values() + ) + if isinstance(expression, str) and has_complex: + if sys.platform == "win32": + # On Windows, miniexpr has issues with complex numbers + use_miniexpr = False + if any(tok in expression for tok in ("!=", "==", "<=", ">=", "<", ">")): + use_miniexpr = False + if sys.platform == "win32" and use_miniexpr and not _MINIEXPR_WINDOWS_OVERRIDE: + # Work around Windows miniexpr issues for integer outputs and dtype conversions. + if blosc2.isdtype(dtype, "integral"): + use_miniexpr = False + else: + dtype_mismatch = any( + isinstance(op, blosc2.NDArray) and op.dtype != dtype for op in operands.values() + ) + if dtype_mismatch: + use_miniexpr = False if use_miniexpr: cparams = kwargs.pop("cparams", blosc2.CParams()) @@ -1333,7 +1352,7 @@ def fast_eval( # noqa: C901 res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs) try: res_eval._set_pref_expr(expression, operands, fp_accuracy=fp_accuracy) - print("expr->miniexpr:", expression, fp_accuracy) + # print("expr->miniexpr:", expression, fp_accuracy) # Data to compress is fetched from operands, so it can be uninitialized here data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8) # Exercise prefilter for each chunk @@ -2036,6 +2055,18 @@ def reduce_slices( # noqa: C901 isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating") for op in operands.values() ) + if has_complex and sys.platform == "win32": + # On Windows, miniexpr has issues with complex numbers + use_miniexpr = False + if sys.platform == "win32" and use_miniexpr and not _MINIEXPR_WINDOWS_OVERRIDE: + if blosc2.isdtype(dtype, "integral"): + use_miniexpr = False + else: + dtype_mismatch = any( + isinstance(op, blosc2.NDArray) and op.dtype != dtype for op in operands.values() + ) + if dtype_mismatch: + use_miniexpr = False if has_complex and any(tok in expression for tok in ("!=", "==", "<=", ">=", "<", ">")): use_miniexpr = False if where is not None and len(where) != 2: @@ -2073,7 +2104,7 @@ def reduce_slices( # noqa: C901 else: expression_miniexpr = f"{reduce_op_str}({expression})" res_eval._set_pref_expr(expression_miniexpr, operands, fp_accuracy, aux_reduc) - print("expr->miniexpr:", expression, reduce_op, fp_accuracy) + # print("expr->miniexpr:", expression, reduce_op, fp_accuracy) # Data won't even try to be compressed, so buffers can be unitialized and reused data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8) chunk_data = np.empty(res_eval.schunk.chunksize + blosc2.MAX_OVERHEAD, dtype=np.uint8)