NGSolve · ksugahar · Jan 29, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/linalg/CMakeLists.txt b/linalg/CMakeLists.txt
@@ -1,5 +1,8 @@
 include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR})
 
+# SparseSolv headers (header-only library)
+include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/sparsesolv)
+
 add_library(ngla ${NGS_LIB_TYPE}
         basematrix.cpp basevector.cpp multivector.cpp
         blockjacobi.cpp cg.cpp chebyshev.cpp commutingAMG.cpp eigen.cpp	     
@@ -14,6 +17,9 @@ add_library(ngla ${NGS_LIB_TYPE}
 target_include_directories(ngla PRIVATE ${UMFPACK_INCLUDE_DIR} ${NETGEN_PYTHON_INCLUDE_DIRS})
 target_compile_definitions(ngla PRIVATE ${NGSOLVE_COMPILE_DEFINITIONS_PRIVATE})
 
+# SparseSolv: use NGSolve TaskManager instead of OpenMP
+target_compile_definitions(ngla PRIVATE SPARSESOLV_USE_NGSOLVE_TASKMANAGER)
+
 if(UMFPACK_STATIC)
     # work-around for private linking of interface libraries
     # see https://gitlab.kitware.com/cmake/cmake/-/issues/15415
@@ -39,14 +45,23 @@ endif()
 
 install( FILES
         basematrix.hpp basevector.hpp blockjacobi.hpp cg.hpp multivector.hpp
-        chebyshev.hpp commutingAMG.hpp eigen.hpp jacobi.hpp la.hpp order.hpp   
+        chebyshev.hpp commutingAMG.hpp eigen.hpp jacobi.hpp la.hpp order.hpp
         pardisoinverse.hpp sparsecholesky.hpp sparsematrix.hpp
         sparsematrix_impl.hpp sparsematrix_dyn.hpp diagonalmatrix.hpp
         special_matrix.hpp superluinverse.hpp mumpsinverse.hpp
         umfpackinverse.hpp vvector.hpp python_linalg.hpp
         elementbyelement.hpp arnoldi.hpp paralleldofs.hpp
         sparsefactorization_interface.hpp
+        sparsesolv_precond.hpp
+        sparsesolv_python_export.hpp
+        DESTINATION ${NGSOLVE_INSTALL_DIR_INCLUDE}
+        COMPONENT ngsolve_devel
+       )
+
+# Install SparseSolv headers
+install( DIRECTORY sparsesolv
         DESTINATION ${NGSOLVE_INSTALL_DIR_INCLUDE}
         COMPONENT ngsolve_devel
+        FILES_MATCHING PATTERN "*.hpp"
        )
 
diff --git a/linalg/la.hpp b/linalg/la.hpp
@@ -34,5 +34,6 @@ namespace ngla
 #include "chebyshev.hpp"
 #include "eigen.hpp"
 #include "arnoldi.hpp"
+#include "sparsesolv_precond.hpp"
 
 #endif
diff --git a/linalg/python_linalg.cpp b/linalg/python_linalg.cpp
@@ -6,6 +6,10 @@
 #include "../ngstd/python_ngstd.hpp"
 #include "sparsefactorization_interface.hpp"
 
+// SparseSolv preconditioners and Python export
+#include "sparsesolv_precond.hpp"
+#include "sparsesolv_python_export.hpp"
+
 using namespace ngla;
 // include netgen-header to get access to PyMPI
 #include <myadt.hpp>
@@ -1944,6 +1948,11 @@ shift : object
     solvers.append(GetInverseName(SPARSECHOLESKY));
     return solvers;
   });
+
+
+  // SparseSolv Preconditioners and Solvers
+  // Based on JP-MARs/SparseSolv (https://github.com/JP-MARs/SparseSolv)
+  ExportSparseSolvBindings(m);
 }
 
 

diff --git a/linalg/sparsesolv/core/constants.hpp b/linalg/sparsesolv/core/constants.hpp
@@ -0,0 +1,31 @@
+/**
+ * @file constants.hpp
+ * @brief Named numeric constants for SparseSolv library
+ *
+ * Centralizes threshold values used internally by solvers and preconditioners.
+ * SolverConfig default values (tolerance, max_iterations, shift_parameter, etc.)
+ * are NOT included here as they are part of the public API.
+ */
+
+#ifndef SPARSESOLV_CORE_CONSTANTS_HPP
+#define SPARSESOLV_CORE_CONSTANTS_HPP
+
+namespace sparsesolv {
+namespace constants {
+
+/// Threshold for detecting numerical breakdown in CG (pAp ~ 0)
+/// and MRTR first-iteration (v_w ~ 0)
+constexpr double BREAKDOWN_THRESHOLD = 1e-30;
+
+/// Threshold for detecting denominator collapse in MRTR/SGS-MRTR
+/// general iterations (denom ~ 0)
+constexpr double DENOMINATOR_BREAKDOWN = 1e-60;
+
+/// Minimum absolute diagonal value for safe inversion in preconditioners
+/// (Jacobi, SGS, IC diagonal checks)
+constexpr double MIN_DIAGONAL_TOLERANCE = 1e-15;
+
+} // namespace constants
+} // namespace sparsesolv
+
+#endif // SPARSESOLV_CORE_CONSTANTS_HPP
diff --git a/linalg/sparsesolv/core/level_schedule.hpp b/linalg/sparsesolv/core/level_schedule.hpp
@@ -0,0 +1,142 @@
+/**
+ * @file level_schedule.hpp
+ * @brief Level scheduling for parallel triangular solves
+ *
+ * Computes dependency levels for rows of a triangular matrix so that
+ * rows at the same level can be processed in parallel. This enables
+ * parallel forward/backward substitution using parallel_for.
+ *
+ * Algorithm:
+ *   For lower triangular L:
+ *     level[i] = max(level[j] for j in L[i,:] where j < i) + 1
+ *   Rows at the same level are independent and can run in parallel.
+ *
+ * For upper triangular (backward solve), build levels from the transpose
+ * pattern, processing from the last row backward.
+ */
+
+#ifndef SPARSESOLV_CORE_LEVEL_SCHEDULE_HPP
+#define SPARSESOLV_CORE_LEVEL_SCHEDULE_HPP
+
+#include "types.hpp"
+#include "parallel.hpp"
+#include <vector>
+#include <algorithm>
+
+namespace sparsesolv {
+
+/**
+ * @brief Level schedule for parallel triangular solves
+ *
+ * Stores rows grouped by dependency level. Rows within the same level
+ * are independent and can be processed in parallel via parallel_for.
+ *
+ * Construction cost: O(nnz), done once per factorization.
+ * Usage: iterate levels sequentially, within each level use parallel_for.
+ */
+struct LevelSchedule {
+    /// levels[k] = vector of row indices that can be processed in parallel at step k
+    std::vector<std::vector<index_t>> levels;
+
+    /// Total number of rows
+    index_t size = 0;
+
+    /**
+     * @brief Build level schedule from lower triangular CSR pattern
+     *
+     * For forward substitution: L * y = b, processing rows 0..n-1.
+     * Row i depends on rows j < i where L[i,j] != 0.
+     *
+     * @param row_ptr CSR row pointer array (size n+1)
+     * @param col_idx CSR column index array
+     * @param n Number of rows
+     */
+    void build_from_lower(const index_t* row_ptr, const index_t* col_idx, index_t n) {
+        size = n;
+        if (n <= 0) {
+            levels.clear();
+            return;
+        }
+
+        // Compute level for each row
+        std::vector<index_t> row_level(n, 0);
+        index_t max_level = 0;
+
+        for (index_t i = 0; i < n; ++i) {
+            index_t max_dep = -1;
+            // Scan off-diagonal entries (j < i) in row i
+            for (index_t k = row_ptr[i]; k < row_ptr[i + 1]; ++k) {
+                index_t j = col_idx[k];
+                if (j < i) {
+                    max_dep = std::max(max_dep, row_level[j]);
+                }
+            }
+            row_level[i] = max_dep + 1;
+            max_level = std::max(max_level, row_level[i]);
+        }
+
+        // Group rows by level
+        levels.resize(max_level + 1);
+        for (auto& lev : levels) lev.clear();
+
+        for (index_t i = 0; i < n; ++i) {
+            levels[row_level[i]].push_back(i);
+        }
+    }
+
+    /**
+     * @brief Build level schedule from upper triangular CSR pattern
+     *
+     * For backward substitution: U * y = b, processing rows n-1..0.
+     * Row i depends on rows j > i where U[i,j] != 0.
+     *
+     * The levels are ordered so that level 0 should be processed first
+     * (contains the last rows), then level 1, etc.
+     *
+     * @param row_ptr CSR row pointer array (size n+1)
+     * @param col_idx CSR column index array
+     * @param n Number of rows
+     */
+    void build_from_upper(const index_t* row_ptr, const index_t* col_idx, index_t n) {
+        size = n;
+        if (n <= 0) {
+            levels.clear();
+            return;
+        }
+
+        // Compute level for each row (processing from last to first)
+        std::vector<index_t> row_level(n, 0);
+        index_t max_level = 0;
+
+        for (index_t i = n; i-- > 0;) {
+            index_t max_dep = -1;
+            // Scan off-diagonal entries (j > i) in row i
+            for (index_t k = row_ptr[i]; k < row_ptr[i + 1]; ++k) {
+                index_t j = col_idx[k];
+                if (j > i) {
+                    max_dep = std::max(max_dep, row_level[j]);
+                }
+            }
+            row_level[i] = max_dep + 1;
+            max_level = std::max(max_level, row_level[i]);
+        }
+
+        // Group rows by level
+        levels.resize(max_level + 1);
+        for (auto& lev : levels) lev.clear();
+
+        for (index_t i = 0; i < n; ++i) {
+            levels[row_level[i]].push_back(i);
+        }
+    }
+
+    /// Check if the schedule has been built
+    bool is_built() const { return !levels.empty(); }
+
+    /// Number of levels (= sequential steps needed)
+    index_t num_levels() const { return static_cast<index_t>(levels.size()); }
+};
+
+} // namespace sparsesolv
+
+#endif // SPARSESOLV_CORE_LEVEL_SCHEDULE_HPP
diff --git a/linalg/sparsesolv/core/parallel.hpp b/linalg/sparsesolv/core/parallel.hpp
@@ -0,0 +1,94 @@
+/**
+ * @file parallel.hpp
+ * @brief Portable parallel primitives for SparseSolv
+ *
+ * Provides compile-time dispatch to:
+ * - NGSolve TaskManager (when SPARSESOLV_USE_NGSOLVE_TASKMANAGER is defined)
+ * - OpenMP (when _OPENMP is defined)
+ * - Serial fallback (otherwise)
+ *
+ * This allows SparseSolv to work both as a standalone library and
+ * integrated into NGSolve without OpenMP/TaskManager thread pool conflicts.
+ */
+
+#ifndef SPARSESOLV_CORE_PARALLEL_HPP
+#define SPARSESOLV_CORE_PARALLEL_HPP
+
+#include "types.hpp"
+#include <type_traits>
+#include <complex>
+
+#ifdef SPARSESOLV_USE_NGSOLVE_TASKMANAGER
+  #include <core/taskmanager.hpp>
+#elif defined(_OPENMP)
+  #include <omp.h>
+#endif
+
+namespace sparsesolv {
+
+/**
+ * @brief Parallel for loop over [0, n)
+ *
+ * Dispatches to ngcore::ParallelFor, OpenMP, or serial loop.
+ */
+template<typename FUNC>
+inline void parallel_for(index_t n, FUNC f) {
+#ifdef SPARSESOLV_USE_NGSOLVE_TASKMANAGER
+    if (n > 0)
+        ngcore::ParallelFor(static_cast<size_t>(n),
+            [&](size_t i) { f(static_cast<index_t>(i)); });
+#elif defined(_OPENMP)
+    #pragma omp parallel for schedule(static)
+    for (index_t i = 0; i < n; ++i)
+        f(i);
+#else
+    for (index_t i = 0; i < n; ++i)
+        f(i);
+#endif
+}
+
+/**
+ * @brief Parallel reduction (summation) over [0, n)
+ *
+ * Computes sum of f(i) for i in [0, n).
+ * Uses ngcore::ParallelReduce (supports all types including complex),
+ * OpenMP reduction (double only on MSVC), or serial loop.
+ *
+ * @tparam T Result type (double, complex<double>, etc.)
+ * @tparam FUNC Function type: index_t -> T
+ * @param n Loop bound
+ * @param f Function to evaluate at each index
+ * @param init Initial value for reduction
+ * @return Sum of f(i) for i in [0, n)
+ */
+template<typename T, typename FUNC>
+inline T parallel_reduce_sum(index_t n, FUNC f, T init = T(0)) {
+#ifdef SPARSESOLV_USE_NGSOLVE_TASKMANAGER
+    if (n <= 0) return init;
+    return ngcore::ParallelReduce(static_cast<size_t>(n),
+        [&](size_t i) { return f(static_cast<index_t>(i)); },
+        [](T a, T b) { return a + b; }, init);
+#elif defined(_OPENMP)
+    T sum = init;
+    // MSVC OpenMP (v2.0) only supports reduction on scalar arithmetic types
+    if constexpr (std::is_same_v<T, double>) {
+        #pragma omp parallel for reduction(+:sum)
+        for (index_t i = 0; i < n; ++i)
+            sum += f(i);
+    } else {
+        // Complex or other types: serial fallback on MSVC
+        for (index_t i = 0; i < n; ++i)
+            sum += f(i);
+    }
+    return sum;
+#else
+    T sum = init;
+    for (index_t i = 0; i < n; ++i)
+        sum += f(i);
+    return sum;
+#endif
+}
+
+} // namespace sparsesolv
+
+#endif // SPARSESOLV_CORE_PARALLEL_HPP