diff --git a/CMakeLists.txt b/CMakeLists.txt
index 717ecf5b..7774b3d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,7 +58,7 @@ set(MINIEXPR_BUILD_BENCH OFF CACHE BOOL "Build miniexpr benchmarks" FORCE)
 
 FetchContent_Declare(miniexpr
     GIT_REPOSITORY https://github.com/Blosc/miniexpr.git
-    GIT_TAG 979573da618e0443c3984bad8db3ed5d9ce72f75
+    GIT_TAG 77d633cb2c134552da045b8d2cc0ad23908e6b9e
 )
 FetchContent_MakeAvailable(miniexpr)
 
@@ -116,7 +116,7 @@ else()
     include(FetchContent)
     FetchContent_Declare(blosc2
         GIT_REPOSITORY https://github.com/Blosc/c-blosc2
-        GIT_TAG f057d1519c0a990f6351cd39c6a659c752fb84e9
+        GIT_TAG bf21f84680542e680fd94fdc05c5a76259df1345
     )
     FetchContent_MakeAvailable(blosc2)
     include_directories("${blosc2_SOURCE_DIR}/include")
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index b4c58350..4b65e791 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -2,7 +2,8 @@
 
 ## Changes from 4.0.0-b1 to 4.0.0-b2
 
-XXX version-specific blurb XXX
+- On Windows, miniexpr is temporarily disabled for integral outputs and mixed-dtype expressions.
+  Set `BLOSC2_ENABLE_MINIEXPR_WINDOWS=1` to override this for testing.
 
 ## Changes from 3.12.2 to 4.0.0-b1
 
diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx
index 68e90b40..93ccfce8 100644
--- a/src/blosc2/blosc2_ext.pyx
+++ b/src/blosc2/blosc2_ext.pyx
@@ -1947,6 +1947,7 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
             memset(params_output, 0, udata.array.blocknitems * typesize)
         free(input_buffers)
         return 0
+
     for i in range(udata.ninputs):
         ndarr = udata.inputs[i]
         input_buffers[i] = malloc(ndarr.sc.blocksize)
@@ -2013,6 +2014,11 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
     cdef uintptr_t offset_bytes = typesize * linear_block_index
 
     # Call thread-safe miniexpr C API
+    # NOTE: me_eval_nd expects the OUTPUT block size (in items), not the input block size.
+    # For element-wise operations with same dtypes, they're equal, but for type-changing
+    # operations (e.g., arccos(int32) -> float64), we must use the output's block item count.
+    cdef int output_blocknitems = udata.array.blocknitems
+
     if udata.aux_reduc_ptr == NULL:
         aux_reduc_ptr = <void *> params_output
     else:
@@ -2020,7 +2026,7 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
         # NOTE: miniexpr handles scalar outputs in me_eval_nd without touching tail bytes.
         aux_reduc_ptr = <void *> (<uintptr_t> udata.aux_reduc_ptr + offset_bytes)
     rc = me_eval_nd(miniexpr_handle, <const void**> input_buffers, udata.ninputs,
-                    aux_reduc_ptr, blocknitems, nchunk, nblock, udata.eval_params)
+                    aux_reduc_ptr, output_blocknitems, nchunk, nblock, udata.eval_params)
     if rc != 0:
         raise RuntimeError(f"miniexpr: issues during evaluation; error code: {rc}")
 
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index ed4b2d2b..62c60067 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -95,10 +95,9 @@
 try_miniexpr = True
 if blosc2.IS_WASM:
     try_miniexpr = False
-if sys.platform == "win32":
-    # Although miniexpr has support for windows, the integration with Blosc2
-    # still has some rough edges.
-    try_miniexpr = False
+
+_MINIEXPR_WINDOWS_OVERRIDE = os.environ.get("BLOSC2_ENABLE_MINIEXPR_WINDOWS", "").strip().lower()
+_MINIEXPR_WINDOWS_OVERRIDE = _MINIEXPR_WINDOWS_OVERRIDE not in ("", "0", "false", "no", "off")
 
 
 def ne_evaluate(expression, local_dict=None, **kwargs):
@@ -1326,6 +1325,26 @@ def fast_eval(  # noqa: C901
             use_miniexpr = False
         if not (all_ndarray and out is None):
             use_miniexpr = False
+        has_complex = any(
+            isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating")
+            for op in operands.values()
+        )
+        if isinstance(expression, str) and has_complex:
+            if sys.platform == "win32":
+                # On Windows, miniexpr has issues with complex numbers
+                use_miniexpr = False
+            if any(tok in expression for tok in ("!=", "==", "<=", ">=", "<", ">")):
+                use_miniexpr = False
+        if sys.platform == "win32" and use_miniexpr and not _MINIEXPR_WINDOWS_OVERRIDE:
+            # Work around Windows miniexpr issues for integer outputs and dtype conversions.
+            if blosc2.isdtype(dtype, "integral"):
+                use_miniexpr = False
+            else:
+                dtype_mismatch = any(
+                    isinstance(op, blosc2.NDArray) and op.dtype != dtype for op in operands.values()
+                )
+                if dtype_mismatch:
+                    use_miniexpr = False
 
     if use_miniexpr:
         cparams = kwargs.pop("cparams", blosc2.CParams())
@@ -1333,7 +1352,7 @@ def fast_eval(  # noqa: C901
         res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs)
         try:
             res_eval._set_pref_expr(expression, operands, fp_accuracy=fp_accuracy)
-            print("expr->miniexpr:", expression, fp_accuracy)
+            # print("expr->miniexpr:", expression, fp_accuracy)
             # Data to compress is fetched from operands, so it can be uninitialized here
             data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8)
             # Exercise prefilter for each chunk
@@ -2036,6 +2055,18 @@ def reduce_slices(  # noqa: C901
             isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating")
             for op in operands.values()
         )
+        if has_complex and sys.platform == "win32":
+            # On Windows, miniexpr has issues with complex numbers
+            use_miniexpr = False
+        if sys.platform == "win32" and use_miniexpr and not _MINIEXPR_WINDOWS_OVERRIDE:
+            if blosc2.isdtype(dtype, "integral"):
+                use_miniexpr = False
+            else:
+                dtype_mismatch = any(
+                    isinstance(op, blosc2.NDArray) and op.dtype != dtype for op in operands.values()
+                )
+                if dtype_mismatch:
+                    use_miniexpr = False
         if has_complex and any(tok in expression for tok in ("!=", "==", "<=", ">=", "<", ">")):
             use_miniexpr = False
         if where is not None and len(where) != 2:
@@ -2073,7 +2104,7 @@ def reduce_slices(  # noqa: C901
             else:
                 expression_miniexpr = f"{reduce_op_str}({expression})"
             res_eval._set_pref_expr(expression_miniexpr, operands, fp_accuracy, aux_reduc)
-            print("expr->miniexpr:", expression, reduce_op, fp_accuracy)
+            # print("expr->miniexpr:", expression, reduce_op, fp_accuracy)
             # Data won't even try to be compressed, so buffers can be unitialized and reused
             data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8)
             chunk_data = np.empty(res_eval.schunk.chunksize + blosc2.MAX_OVERHEAD, dtype=np.uint8)