From 76dc6f35764d91088a2cbe79c7cf07992dc46406 Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Wed, 4 Feb 2026 14:01:58 +0100
Subject: [PATCH 1/2] [test] Use Python development module with warnings as
 errors for testing

From the docs [1]:

> The Python Development Mode introduces additional runtime checks that
  are too expensive to be enabled by default. It should not be more
  verbose than the default if the code is correct; new warnings are only
  emitted when an issue is detected.

Also use the `-W error` flag to treat warnings as errors.

This should help us to find bugs and problems with the tests.

[1] https://docs.python.org/3/library/devmode.html
---
 cmake/modules/RootMacros.cmake              | 10 +++++-----
 cmake/modules/SearchInstalledSoftware.cmake |  3 +++
 roottest/main/CMakeLists.txt                |  4 ++--
 roottest/python/JupyROOT/CMakeLists.txt     |  6 +++---
 roottest/root/meta/CMakeLists.txt           |  2 +-
 roottest/root/rint/CMakeLists.txt           |  4 ++--
 tutorials/CMakeLists.txt                    |  4 ++--
 7 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/cmake/modules/RootMacros.cmake b/cmake/modules/RootMacros.cmake
index c3fd867327ae5..28fb8161fd7ef 100644
--- a/cmake/modules/RootMacros.cmake
+++ b/cmake/modules/RootMacros.cmake
@@ -2014,7 +2014,7 @@ function(ROOT_ADD_PYUNITTESTS name)
 
   string(REGEX REPLACE "[_]" "-" good_name "${name}")
   ROOT_ADD_TEST(pyunittests-${good_name}
-                COMMAND ${Python3_EXECUTABLE} -B -m unittest discover -s ${CMAKE_CURRENT_SOURCE_DIR} -p "*.py" -v
+                COMMAND ${Python3_EXECUTABLE_DEV} -B -m unittest discover -s ${CMAKE_CURRENT_SOURCE_DIR} -p "*.py" -v
                 ENVIRONMENT PYTHONPATH=${pythonpaths_native})
 endfunction()
 
@@ -2074,9 +2074,9 @@ function(ROOT_ADD_PYUNITTEST name file)
   string(REPLACE "-test-" "-" clean_name_with_path ${name_with_path})
 
   if(ARG_GENERIC)
-    set(test_cmd COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${file_name})
+    set(test_cmd COMMAND ${Python3_EXECUTABLE_DEV} ${CMAKE_CURRENT_SOURCE_DIR}/${file_name})
   else()
-    set(test_cmd COMMAND ${Python3_EXECUTABLE} -B -m unittest discover -s ${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} -p ${file_name} -v)
+    set(test_cmd COMMAND ${Python3_EXECUTABLE_DEV} -B -m unittest discover -s ${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} -p ${file_name} -v)
   endif()
 
   set(test_name pyunittests${clean_name_with_path})
@@ -2993,7 +2993,7 @@ macro(ROOTTEST_SETUP_MACROTEST)
   # Add python script to CTest.
   elseif(ARG_MACRO MATCHES "[.]py")
     get_filename_component(realfp ${ARG_MACRO} REALPATH)
-    set(command ${Python3_EXECUTABLE} ${realfp} ${PYROOT_EXTRAFLAGS})
+    set(command ${Python3_EXECUTABLE_DEV} ${realfp} ${PYROOT_EXTRAFLAGS})
 
   elseif(DEFINED ARG_MACRO)
     set(command ${root_cmd} ${ARG_MACRO})
@@ -3029,7 +3029,7 @@ macro(ROOTTEST_SETUP_EXECTEST)
 
   if(MSVC)
     if(${realexec} MATCHES "[.]py" AND NOT ${realexec} MATCHES "[.]exe")
-      set(realexec ${Python3_EXECUTABLE} ${realexec})
+      set(realexec ${Python3_EXECUTABLE_DEV} ${realexec})
     else()
       set(realexec ${realexec})
     endif()
diff --git a/cmake/modules/SearchInstalledSoftware.cmake b/cmake/modules/SearchInstalledSoftware.cmake
index 108a5eaf33b66..805e969704533 100644
--- a/cmake/modules/SearchInstalledSoftware.cmake
+++ b/cmake/modules/SearchInstalledSoftware.cmake
@@ -626,6 +626,9 @@ if(tmva-pymva OR tmva-sofie)
 endif()
 find_package(Python3 3.10 COMPONENTS ${python_components})
 
+# The invocation of the Python 3 executable that should be used for testing:
+set(Python3_EXECUTABLE_DEV ${Python3_EXECUTABLE} -X dev -W error)
+
 #---Check for OpenGL installation-------------------------------------------------------
 # OpenGL is required by various graf3d features that are enabled with opengl=ON,
 # or by the Cocoa-related code that always requires it.
diff --git a/roottest/main/CMakeLists.txt b/roottest/main/CMakeLists.txt
index ac9cadcc8e59b..5df3e09865141 100644
--- a/roottest/main/CMakeLists.txt
+++ b/roottest/main/CMakeLists.txt
@@ -11,7 +11,7 @@ configure_file(RootcpReplaceEntireFileCheck.C . COPYONLY)
 # We should explicitly use the Python executable from the Python version that
 # was used to build ROOT. Otherwise, we risk picking up a different Python
 # version at test time.
-set(PY_TOOLS_PREFIX ${Python3_EXECUTABLE} ${ROOTSYS}/bin)
+set(PY_TOOLS_PREFIX ${Python3_EXECUTABLE_DEV} ${ROOTSYS}/bin)
 set(TOOLS_PREFIX ${ROOTSYS}/bin)
 
 if(MSVC)
@@ -519,7 +519,7 @@ if(pyroot)
 set(test_env PYTHONWARNINGS=ignore)
 
 ############################## PATTERN TESTS ############################
-set (TESTPATTERN_EXE ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/testPatternToFileNameAndPathSplitList.py)
+set (TESTPATTERN_EXE ${Python3_EXECUTABLE_DEV} ${CMAKE_CURRENT_SOURCE_DIR}/testPatternToFileNameAndPathSplitList.py)
 ROOTTEST_ADD_TEST(SimplePattern1
                   COMMAND ${TESTPATTERN_EXE} test.root
                   OUTREF SimplePattern.ref
diff --git a/roottest/python/JupyROOT/CMakeLists.txt b/roottest/python/JupyROOT/CMakeLists.txt
index 07547c27328fb..e163cf0ff3c23 100644
--- a/roottest/python/JupyROOT/CMakeLists.txt
+++ b/roottest/python/JupyROOT/CMakeLists.txt
@@ -23,7 +23,7 @@ foreach(pyfile ${pyfiles})
   get_filename_component(SHORTPYFILE ${pyfile} NAME_WE)
   if (NOT ${SHORTPYFILE} STREQUAL "__init__")
     ROOTTEST_ADD_TEST(${SHORTPYFILE}_doctest
-                      COMMAND ${Python3_EXECUTABLE} ${DOCTEST_LAUNCHER} ${pyfile}
+                      COMMAND ${Python3_EXECUTABLE_DEV} ${DOCTEST_LAUNCHER} ${pyfile}
                       PYTHON_DEPS IPython)
   endif()
 endforeach()
@@ -33,7 +33,7 @@ foreach(NOTEBOOK ${NOTEBOOKS})
   get_filename_component(NOTEBOOKBASE ${NOTEBOOK} NAME_WE)
   ROOTTEST_ADD_TEST(${NOTEBOOKBASE}_notebook
                     COPY_TO_BUILDDIR ${NOTEBOOK}
-                    COMMAND ${Python3_EXECUTABLE} ${NBDIFFUTIL} ${NOTEBOOK}
+                    COMMAND ${Python3_EXECUTABLE_DEV} ${NBDIFFUTIL} ${NOTEBOOK}
                     RUN_SERIAL
                     PYTHON_DEPS jupyter)
 endforeach()
@@ -44,7 +44,7 @@ if(imt)
   get_filename_component(NOTEBOOKBASE ${IMT_NB} NAME_WE)
   ROOTTEST_ADD_TEST(${NOTEBOOKBASE}_notebook
                     COPY_TO_BUILDDIR ${IMT_NB}
-                    COMMAND ${Python3_EXECUTABLE} ${NBDIFFUTIL} ${IMT_NB} "OFF"
+                    COMMAND ${Python3_EXECUTABLE_DEV} ${NBDIFFUTIL} ${IMT_NB} "OFF"
                     RUN_SERIAL
                     PYTHON_DEPS jupyter)
 endif()
diff --git a/roottest/root/meta/CMakeLists.txt b/roottest/root/meta/CMakeLists.txt
index f7bded827b134..bf11257e12e15 100644
--- a/roottest/root/meta/CMakeLists.txt
+++ b/roottest/root/meta/CMakeLists.txt
@@ -26,7 +26,7 @@ ROOTTEST_ADD_TEST(ROOT5268
                   PASSREGEX "error: unknown type name 'Tbrowser'")
 
 ROOTTEST_ADD_TEST(rlibmap
-                  COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/rlibmapLauncher.py
+                  COMMAND ${Python3_EXECUTABLE_DEV} ${CMAKE_CURRENT_SOURCE_DIR}/rlibmapLauncher.py
                   PASSRC 1
                   OUTREF  rlibmap.ref)
 
diff --git a/roottest/root/rint/CMakeLists.txt b/roottest/root/rint/CMakeLists.txt
index f346988b0140b..828dab8e368bf 100644
--- a/roottest/root/rint/CMakeLists.txt
+++ b/roottest/root/rint/CMakeLists.txt
@@ -1,7 +1,7 @@
 if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64.*|x86.*|amd64.*|AMD64.*|i686.*|i386.*")
   # All platforms except of ARM/AARCH64
   ROOTTEST_ADD_TEST(TabCom
-                    COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/driveTabCom.py
+                    COMMAND ${Python3_EXECUTABLE_DEV} ${CMAKE_CURRENT_SOURCE_DIR}/driveTabCom.py
                     INPUT TabCom_input.txt
                     OUTCNV filterOpt.sh
                     OUTREF TabCom.oref
@@ -9,7 +9,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64.*|x86.*|amd64.*|AMD64.*|i686.*|i386
                     COPY_TO_BUILDDIR MyClass.h)
 
   ROOTTEST_ADD_TEST(BackslashNewline
-                    COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/driveTabCom.py
+                    COMMAND ${Python3_EXECUTABLE_DEV} ${CMAKE_CURRENT_SOURCE_DIR}/driveTabCom.py
                     INPUT BackslashNewline_input.txt)
 endif()
 
diff --git a/tutorials/CMakeLists.txt b/tutorials/CMakeLists.txt
index 07052cb9c7c08..ea129d571b9fd 100644
--- a/tutorials/CMakeLists.txt
+++ b/tutorials/CMakeLists.txt
@@ -687,7 +687,7 @@ if(geom)
 endif()
 # define Python GNN parsing tutorial needed to run before
 if (ROOT_SONNET_FOUND AND ROOT_GRAPH_NETS_FOUND)
-  ROOT_ADD_TEST(tutorial-machine_learning-TMVA_SOFIE_GNN_Parser COMMAND ${Python3_EXECUTABLE}
+  ROOT_ADD_TEST(tutorial-machine_learning-TMVA_SOFIE_GNN_Parser COMMAND ${Python3_EXECUTABLE_DEV}
   ${CMAKE_CURRENT_SOURCE_DIR}/machine_learning/TMVA_SOFIE_GNN_Parser.py
   PASSRC 0 FAILREGEX "Error in" ": error:" LABELS tutorial ENVIRONMENT ${TUTORIAL_ENV})
   set (machine_learning-TMVA_SOFIE_GNN_Application-depends tutorial-machine_learning-TMVA_SOFIE_GNN_Parser)
@@ -1043,7 +1043,7 @@ if(pyroot)
     endif()
 
     ROOT_ADD_TEST(${tutorial_name}
-                COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${t}
+                COMMAND ${Python3_EXECUTABLE_DEV} ${CMAKE_CURRENT_SOURCE_DIR}/${t}
                 PASSRC ${rc} FAILREGEX "Error in" ": error:" "segmentation violation"
                 LABELS ${labels}
                 DEPENDS ${tutorial_dependency}

From 0701b481533da832fbb7f5999dff8ff6434c62b8 Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Mon, 30 Mar 2026 22:10:12 +0200
Subject: [PATCH 2/2] [tmva][sofie] Improve Keras test robustness and
 diagnostics

  * Skip training for models without trainable weights to avoid spurious
    Keras warnings about "no trainable weights"

  * Suppress NumPy 2.0 __array__ DeprecationWarning during model.save()
    (upstream TF/Keras compatibility issue) using a scoped warnings context

  * Fix Keras inference call for single-input models by unwrapping
    singleton input lists (Keras expects a tensor, not [tensor])

  * Remove redundant load_weights() after load_model()

  * Replace custom is_accurate() with np.testing.assert_allclose()
    for clearer diagnostics and standardized comparison
    * use atol=1e-2 and rtol=0 to preserve previous absolute tolerance behavior

  * Update LeakyReLU argument (alpha -> negative_slope) to avoid
    deprecation warnings

These changes make the tests more robust, reduce noise from external
dependencies, and significantly improve failure diagnostics.
---
 .../test/generate_keras_functional.py         | 18 ++++++++--
 .../test/generate_keras_sequential.py         | 19 +++++++---
 .../test/parser_test_function.py              | 35 +++++++------------
 3 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/bindings/pyroot/pythonizations/test/generate_keras_functional.py b/bindings/pyroot/pythonizations/test/generate_keras_functional.py
index 11f7bdefda00e..fccc59c18b443 100644
--- a/bindings/pyroot/pythonizations/test/generate_keras_functional.py
+++ b/bindings/pyroot/pythonizations/test/generate_keras_functional.py
@@ -1,3 +1,5 @@
+import warnings
+
 def generate_keras_functional(dst_dir):
 
     import numpy as np
@@ -16,8 +18,18 @@ def train_and_save(model, name):
 
         model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
         model.summary()
-        model.fit(x_train, y_train, epochs=1, verbose=0)
-        model.save(f"{dst_dir}/Functional_{name}_test.keras")
+        if len(model.trainable_weights) > 0:
+            model.fit(x_train, y_train, epochs=1, verbose=0)
+
+        with warnings.catch_warnings():
+            # Some object inside TensorFlow/Keras has an outdated __array__ implementation
+            warnings.filterwarnings(
+                "ignore",
+                category=DeprecationWarning,
+                message=".*__array__.*copy keyword.*"
+            )
+            model.save(f"{dst_dir}/Functional_{name}_test.keras")
+
         print("generated and saved functional model",name)
 
 
@@ -211,7 +223,7 @@ def train_and_save(model, name):
     sub = layers.Subtract()([d1, d2])
     mul = layers.Multiply()([d1, d2])
     merged = layers.Concatenate()([add, sub, mul])
-    merged = layers.LeakyReLU(alpha=0.1)(merged)
+    merged = layers.LeakyReLU(negative_slope=0.1)(merged)
     out = layers.Dense(4, activation="softmax")(merged)
     model = models.Model([inp1, inp2], out)
     train_and_save(model, "Layer_Combination_3")
diff --git a/bindings/pyroot/pythonizations/test/generate_keras_sequential.py b/bindings/pyroot/pythonizations/test/generate_keras_sequential.py
index 40b6c645b1fd4..e19a23f93eea7 100644
--- a/bindings/pyroot/pythonizations/test/generate_keras_sequential.py
+++ b/bindings/pyroot/pythonizations/test/generate_keras_sequential.py
@@ -1,3 +1,5 @@
+import warnings
+
 def generate_keras_sequential(dst_dir):
 
     import numpy as np
@@ -9,10 +11,19 @@ def train_and_save(model, name):
         x_train = np.random.rand(32, *model.input_shape[1:])
         y_train = np.random.rand(32, *model.output_shape[1:])
         model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
-        model.fit(x_train, y_train, epochs=1, verbose=0)
+        if len(model.trainable_weights) > 0:
+            model.fit(x_train, y_train, epochs=1, verbose=0)
         model.summary()
         print("fitting sequential model",name)
-        model.save(f"{dst_dir}/Sequential_{name}_test.keras")
+
+        with warnings.catch_warnings():
+            # Some object inside TensorFlow/Keras has an outdated __array__ implementation
+            warnings.filterwarnings(
+                "ignore",
+                category=DeprecationWarning,
+                message=".*__array__.*copy keyword.*"
+            )
+            model.save(f"{dst_dir}/Sequential_{name}_test.keras")
 
 
     # Binary Ops: Add, Subtract, Multiply are not typical in Sequential - skipping those
@@ -193,7 +204,7 @@ def train_and_save(model, name):
         layers.Permute((2, 1)),
         layers.Flatten(),
         layers.Dense(32),
-        layers.LeakyReLU(alpha=0.1),
+        layers.LeakyReLU(negative_slope=0.1),
         layers.Dense(10, activation='softmax'),
     ])
     train_and_save(modelB, "Layer_Combination_2")
@@ -210,4 +221,4 @@ def train_and_save(model, name):
         layers.Dense(8, activation='swish'),
         layers.Dense(3, activation='softmax'),
     ])
-    train_and_save(modelC, "Layer_Combination_3")
\ No newline at end of file
+    train_and_save(modelC, "Layer_Combination_3")
diff --git a/bindings/pyroot/pythonizations/test/parser_test_function.py b/bindings/pyroot/pythonizations/test/parser_test_function.py
index eaa4a0ed5fb2f..9232362ed45ba 100644
--- a/bindings/pyroot/pythonizations/test/parser_test_function.py
+++ b/bindings/pyroot/pythonizations/test/parser_test_function.py
@@ -2,10 +2,6 @@
 
 '''
 The test file contains two types of functions:
-    is_accurate:
-        - This function checks whether the inference results from SOFIE and Keras are accurate within a specified
-          tolerance. Since the inference result from Keras is not flattened, the function flattens both tensors before
-          performing the comparison.
 
     generate_and_test_inference:
         - This function accepts the following inputs:
@@ -29,7 +25,7 @@
           shape from the model object.
         - Convert the inference results to NumPy arrays:
           The SOFIE result is of type vector<float>, and the Keras result is a TensorFlow tensor. Both are converted to
-          NumPy arrays before being passed to the is_accurate function for comparison.
+          NumPy arrays before being passed to the np.testing.assert_allclose function for comparison.
 
 '''
 def is_channels_first_supported() :
@@ -42,16 +38,6 @@ def is_channels_first_supported() :
 
       return True
 
-def is_accurate(tensor_a, tensor_b, tolerance=1e-2):
-    tensor_a = tensor_a.flatten()
-    tensor_b = tensor_b.flatten()
-    for i in range(len(tensor_a)):
-        difference = abs(tensor_a[i] - tensor_b[i])
-        if difference > tolerance:
-            print(tensor_a[i], tensor_b[i])
-            return False
-    return True
-
 def generate_and_test_inference(model_file_path: str, generated_header_file_dir: str = None, batch_size=1):
 
     import keras
@@ -81,7 +67,6 @@ def generate_and_test_inference(model_file_path: str, generated_header_file_dir:
     sofie_model_namespace = getattr(ROOT, "TMVA_SOFIE_" + model_name)
     inference_session = sofie_model_namespace.Session(generated_header_file_path.removesuffix(".hxx") + ".dat")
     keras_model = keras.models.load_model(model_file_path)
-    keras_model.load_weights(model_file_path)
 
     input_tensors = []
     for model_input in keras_model.inputs:
@@ -91,11 +76,17 @@ def generate_and_test_inference(model_file_path: str, generated_header_file_dir:
     sofie_inference_result = inference_session.infer(*input_tensors)
     sofie_output_tensor_shape = list(rmodel.GetTensorShape(rmodel.GetOutputTensorNames()[0]))   # get output shape
                                                                                                 # from SOFIE
-    keras_inference_result = keras_model(input_tensors)
+    # Keras explicitly forbids input tensor lists of size 1
+    if len(keras_model.inputs) == 1:
+        keras_inference_result = keras_model(input_tensors[0])
+    else:
+        keras_inference_result = keras_model(input_tensors)
     if sofie_output_tensor_shape != list(keras_inference_result.shape):
         raise AssertionError("Output tensor dimensions from SOFIE and Keras do not match")
-    sofie_inference_result = np.asarray(sofie_inference_result)
-    keras_inference_result = np.asarray(keras_inference_result)
-    is_inference_accurate = is_accurate(sofie_inference_result, keras_inference_result)
-    if not is_inference_accurate:
-        raise AssertionError("Inference results from SOFIE and Keras do not match")
\ No newline at end of file
+
+    np.testing.assert_allclose(
+        np.asarray(sofie_inference_result).flatten(),
+        np.asarray(keras_inference_result).flatten(),
+        atol=1e-2,
+        rtol=0.  # explicitly disable relative tolerance (NumPy uses |a - b| <= atol + rtol * |b|)
+    )