Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion linalg/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR})

# SparseSolv headers (header-only library)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/sparsesolv)

add_library(ngla ${NGS_LIB_TYPE}
basematrix.cpp basevector.cpp multivector.cpp
blockjacobi.cpp cg.cpp chebyshev.cpp commutingAMG.cpp eigen.cpp
Expand All @@ -14,6 +17,9 @@ add_library(ngla ${NGS_LIB_TYPE}
target_include_directories(ngla PRIVATE ${UMFPACK_INCLUDE_DIR} ${NETGEN_PYTHON_INCLUDE_DIRS})
target_compile_definitions(ngla PRIVATE ${NGSOLVE_COMPILE_DEFINITIONS_PRIVATE})

# SparseSolv: use NGSolve TaskManager instead of OpenMP
target_compile_definitions(ngla PRIVATE SPARSESOLV_USE_NGSOLVE_TASKMANAGER)

if(UMFPACK_STATIC)
# work-around for private linking of interface libraries
# see https://gitlab.kitware.com/cmake/cmake/-/issues/15415
Expand All @@ -39,14 +45,23 @@ endif()

install( FILES
basematrix.hpp basevector.hpp blockjacobi.hpp cg.hpp multivector.hpp
chebyshev.hpp commutingAMG.hpp eigen.hpp jacobi.hpp la.hpp order.hpp
chebyshev.hpp commutingAMG.hpp eigen.hpp jacobi.hpp la.hpp order.hpp
pardisoinverse.hpp sparsecholesky.hpp sparsematrix.hpp
sparsematrix_impl.hpp sparsematrix_dyn.hpp diagonalmatrix.hpp
special_matrix.hpp superluinverse.hpp mumpsinverse.hpp
umfpackinverse.hpp vvector.hpp python_linalg.hpp
elementbyelement.hpp arnoldi.hpp paralleldofs.hpp
sparsefactorization_interface.hpp
sparsesolv_precond.hpp
sparsesolv_python_export.hpp
DESTINATION ${NGSOLVE_INSTALL_DIR_INCLUDE}
COMPONENT ngsolve_devel
)

# Install SparseSolv headers
install( DIRECTORY sparsesolv
DESTINATION ${NGSOLVE_INSTALL_DIR_INCLUDE}
COMPONENT ngsolve_devel
FILES_MATCHING PATTERN "*.hpp"
)

1 change: 1 addition & 0 deletions linalg/la.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ namespace ngla
#include "chebyshev.hpp"
#include "eigen.hpp"
#include "arnoldi.hpp"
#include "sparsesolv_precond.hpp"

#endif
9 changes: 9 additions & 0 deletions linalg/python_linalg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#include "../ngstd/python_ngstd.hpp"
#include "sparsefactorization_interface.hpp"

// SparseSolv preconditioners and Python export
#include "sparsesolv_precond.hpp"
#include "sparsesolv_python_export.hpp"

using namespace ngla;
// include netgen-header to get access to PyMPI
#include <myadt.hpp>
Expand Down Expand Up @@ -1944,6 +1948,11 @@ shift : object
solvers.append(GetInverseName(SPARSECHOLESKY));
return solvers;
});


// SparseSolv Preconditioners and Solvers
// Based on JP-MARs/SparseSolv (https://github.com/JP-MARs/SparseSolv)
ExportSparseSolvBindings(m);
}


Expand Down
31 changes: 31 additions & 0 deletions linalg/sparsesolv/core/constants.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* @file constants.hpp
* @brief Named numeric constants for SparseSolv library
*
* Centralizes threshold values used internally by solvers and preconditioners.
* SolverConfig default values (tolerance, max_iterations, shift_parameter, etc.)
* are NOT included here as they are part of the public API.
*/

#ifndef SPARSESOLV_CORE_CONSTANTS_HPP
#define SPARSESOLV_CORE_CONSTANTS_HPP

namespace sparsesolv {
namespace constants {

/// Threshold for detecting numerical breakdown in CG (pAp ~ 0)
/// and MRTR first-iteration (v_w ~ 0)
constexpr double BREAKDOWN_THRESHOLD = 1e-30;

/// Threshold for detecting denominator collapse in MRTR/SGS-MRTR
/// general iterations (denom ~ 0)
constexpr double DENOMINATOR_BREAKDOWN = 1e-60;

/// Minimum absolute diagonal value for safe inversion in preconditioners
/// (Jacobi, SGS, IC diagonal checks)
constexpr double MIN_DIAGONAL_TOLERANCE = 1e-15;

} // namespace constants
} // namespace sparsesolv

#endif // SPARSESOLV_CORE_CONSTANTS_HPP
142 changes: 142 additions & 0 deletions linalg/sparsesolv/core/level_schedule.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/**
* @file level_schedule.hpp
* @brief Level scheduling for parallel triangular solves
*
* Computes dependency levels for rows of a triangular matrix so that
* rows at the same level can be processed in parallel. This enables
* parallel forward/backward substitution using parallel_for.
*
* Algorithm:
* For lower triangular L:
* level[i] = max(level[j] for j in L[i,:] where j < i) + 1
* Rows at the same level are independent and can run in parallel.
*
* For upper triangular (backward solve), build levels from the transpose
* pattern, processing from the last row backward.
*/

#ifndef SPARSESOLV_CORE_LEVEL_SCHEDULE_HPP
#define SPARSESOLV_CORE_LEVEL_SCHEDULE_HPP

#include "types.hpp"
#include "parallel.hpp"
#include <vector>
#include <algorithm>

namespace sparsesolv {

/**
* @brief Level schedule for parallel triangular solves
*
* Stores rows grouped by dependency level. Rows within the same level
* are independent and can be processed in parallel via parallel_for.
*
* Construction cost: O(nnz), done once per factorization.
* Usage: iterate levels sequentially, within each level use parallel_for.
*/
struct LevelSchedule {
/// levels[k] = vector of row indices that can be processed in parallel at step k
std::vector<std::vector<index_t>> levels;

/// Total number of rows
index_t size = 0;

/**
* @brief Build level schedule from lower triangular CSR pattern
*
* For forward substitution: L * y = b, processing rows 0..n-1.
* Row i depends on rows j < i where L[i,j] != 0.
*
* @param row_ptr CSR row pointer array (size n+1)
* @param col_idx CSR column index array
* @param n Number of rows
*/
void build_from_lower(const index_t* row_ptr, const index_t* col_idx, index_t n) {
size = n;
if (n <= 0) {
levels.clear();
return;
}

// Compute level for each row
std::vector<index_t> row_level(n, 0);
index_t max_level = 0;

for (index_t i = 0; i < n; ++i) {
index_t max_dep = -1;
// Scan off-diagonal entries (j < i) in row i
for (index_t k = row_ptr[i]; k < row_ptr[i + 1]; ++k) {
index_t j = col_idx[k];
if (j < i) {
max_dep = std::max(max_dep, row_level[j]);
}
}
row_level[i] = max_dep + 1;
max_level = std::max(max_level, row_level[i]);
}

// Group rows by level
levels.resize(max_level + 1);
for (auto& lev : levels) lev.clear();

for (index_t i = 0; i < n; ++i) {
levels[row_level[i]].push_back(i);
}
}

/**
* @brief Build level schedule from upper triangular CSR pattern
*
* For backward substitution: U * y = b, processing rows n-1..0.
* Row i depends on rows j > i where U[i,j] != 0.
*
* The levels are ordered so that level 0 should be processed first
* (contains the last rows), then level 1, etc.
*
* @param row_ptr CSR row pointer array (size n+1)
* @param col_idx CSR column index array
* @param n Number of rows
*/
void build_from_upper(const index_t* row_ptr, const index_t* col_idx, index_t n) {
size = n;
if (n <= 0) {
levels.clear();
return;
}

// Compute level for each row (processing from last to first)
std::vector<index_t> row_level(n, 0);
index_t max_level = 0;

for (index_t i = n; i-- > 0;) {
index_t max_dep = -1;
// Scan off-diagonal entries (j > i) in row i
for (index_t k = row_ptr[i]; k < row_ptr[i + 1]; ++k) {
index_t j = col_idx[k];
if (j > i) {
max_dep = std::max(max_dep, row_level[j]);
}
}
row_level[i] = max_dep + 1;
max_level = std::max(max_level, row_level[i]);
}

// Group rows by level
levels.resize(max_level + 1);
for (auto& lev : levels) lev.clear();

for (index_t i = 0; i < n; ++i) {
levels[row_level[i]].push_back(i);
}
}

/// Check if the schedule has been built
bool is_built() const { return !levels.empty(); }

/// Number of levels (= sequential steps needed)
index_t num_levels() const { return static_cast<index_t>(levels.size()); }
};

} // namespace sparsesolv

#endif // SPARSESOLV_CORE_LEVEL_SCHEDULE_HPP
94 changes: 94 additions & 0 deletions linalg/sparsesolv/core/parallel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/**
* @file parallel.hpp
* @brief Portable parallel primitives for SparseSolv
*
* Provides compile-time dispatch to:
* - NGSolve TaskManager (when SPARSESOLV_USE_NGSOLVE_TASKMANAGER is defined)
* - OpenMP (when _OPENMP is defined)
* - Serial fallback (otherwise)
*
* This allows SparseSolv to work both as a standalone library and
* integrated into NGSolve without OpenMP/TaskManager thread pool conflicts.
*/

#ifndef SPARSESOLV_CORE_PARALLEL_HPP
#define SPARSESOLV_CORE_PARALLEL_HPP

#include "types.hpp"
#include <type_traits>
#include <complex>

#ifdef SPARSESOLV_USE_NGSOLVE_TASKMANAGER
#include <core/taskmanager.hpp>
#elif defined(_OPENMP)
#include <omp.h>
#endif

namespace sparsesolv {

/**
* @brief Parallel for loop over [0, n)
*
* Dispatches to ngcore::ParallelFor, OpenMP, or serial loop.
*/
template<typename FUNC>
inline void parallel_for(index_t n, FUNC f) {
#ifdef SPARSESOLV_USE_NGSOLVE_TASKMANAGER
if (n > 0)
ngcore::ParallelFor(static_cast<size_t>(n),
[&](size_t i) { f(static_cast<index_t>(i)); });
#elif defined(_OPENMP)
#pragma omp parallel for schedule(static)
for (index_t i = 0; i < n; ++i)
f(i);
#else
for (index_t i = 0; i < n; ++i)
f(i);
#endif
}

/**
* @brief Parallel reduction (summation) over [0, n)
*
* Computes sum of f(i) for i in [0, n).
* Uses ngcore::ParallelReduce (supports all types including complex),
* OpenMP reduction (double only on MSVC), or serial loop.
*
* @tparam T Result type (double, complex<double>, etc.)
* @tparam FUNC Function type: index_t -> T
* @param n Loop bound
* @param f Function to evaluate at each index
* @param init Initial value for reduction
* @return Sum of f(i) for i in [0, n)
*/
template<typename T, typename FUNC>
inline T parallel_reduce_sum(index_t n, FUNC f, T init = T(0)) {
#ifdef SPARSESOLV_USE_NGSOLVE_TASKMANAGER
if (n <= 0) return init;
return ngcore::ParallelReduce(static_cast<size_t>(n),
[&](size_t i) { return f(static_cast<index_t>(i)); },
[](T a, T b) { return a + b; }, init);
#elif defined(_OPENMP)
T sum = init;
// MSVC OpenMP (v2.0) only supports reduction on scalar arithmetic types
if constexpr (std::is_same_v<T, double>) {
#pragma omp parallel for reduction(+:sum)
for (index_t i = 0; i < n; ++i)
sum += f(i);
} else {
// Complex or other types: serial fallback on MSVC
for (index_t i = 0; i < n; ++i)
sum += f(i);
}
return sum;
#else
T sum = init;
for (index_t i = 0; i < n; ++i)
sum += f(i);
return sum;
#endif
}

} // namespace sparsesolv

#endif // SPARSESOLV_CORE_PARALLEL_HPP
Loading