Skip to content
Open
Show file tree
Hide file tree
Changes from 44 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
d52e573
initial commit, added an algorithm to fixedSizeSquareMatrixOps that c…
povolny1 Jan 24, 2023
6d4875e
implementation of polar decomposition complete, still need to test
povolny1 Jan 25, 2023
02e0e1c
Fixed cmake so that GEOSX TPLs work and also updated spack.
corbett5 Jan 26, 2023
63a85d1
Merge branch 'corbett/tpl-fix-spack-update' into feature/povolny1/pol…
povolny1 Jan 27, 2023
943b300
implemented unit test for polar decomposition
povolny1 Mar 9, 2023
ddcd7b4
made error message in polar decomposition GPU-compatible
povolny1 May 4, 2023
f591570
Merge branch 'develop' into feature/povolny1/polarDecomposition
povolny1 May 5, 2023
ab63869
merge from develop
povolny1 May 18, 2023
1339ea7
Added device math functions for ceil, floor, and power
Oct 30, 2024
82f1ce3
Merge branch 'develop' into feature/crook5/math
Nov 16, 2024
1e0127a
Corrected host only function inside of checkIndices preventing device…
Dec 5, 2024
343432d
Merge branch 'develop' into feature/crook5/math
rrsettgast Jan 9, 2025
bf73593
Added debugging flags to host configs
Jan 14, 2025
8e86706
wip: trying to fix build error on Lassen.
CusiniM Jan 17, 2025
c021cc5
use fold expression to do index checks
rrsettgast Jan 22, 2025
efc221d
revert changes to typeManipulation.
CusiniM Jan 22, 2025
9275fe7
use bool return value.
CusiniM Jan 22, 2025
882b587
use fold expression.
CusiniM Jan 23, 2025
e599472
fix function name
CusiniM Jan 23, 2025
00e117d
doxygen.
CusiniM Jan 23, 2025
1ccdce5
fix typo.
CusiniM Jan 23, 2025
956aa7c
add other integral types.
CusiniM Jan 23, 2025
8eb4727
add debug assert.
CusiniM Jan 23, 2025
2fa2667
add all integral types.
CusiniM Jan 23, 2025
2343b7d
add char types
CusiniM Jan 23, 2025
ddc611e
add decay type.
CusiniM Jan 23, 2025
52c1d5e
add a static cast option
CusiniM Jan 23, 2025
58fc69d
remove assert.
CusiniM Jan 23, 2025
d89ed76
clean up.
CusiniM Jan 23, 2025
ac6c0fd
remove chars.
CusiniM Jan 24, 2025
c2efd00
just always cast to long long.
CusiniM Jan 24, 2025
e60fa8f
forgotten one.
CusiniM Jan 24, 2025
4194bc9
Merged cusini/fix-lassen-error
Feb 3, 2025
65c00ec
aoa and aoav traits
wrtobin Apr 8, 2025
cff7f1b
just a typo fix
wrtobin Apr 8, 2025
1ada56a
Committing changes to pull from remote
Apr 10, 2025
2522acf
Merge branch 'feature/crook5/math' of github.com:GEOS-DEV/LvArray int…
Apr 10, 2025
557f07c
Merge branch 'develop' into feature/crook5/math
Apr 17, 2025
6a90517
Merged develop
Jun 2, 2025
c3fcf1f
Merge branch 'feature/povolny1/polarDecomposition' into feature/crook…
Jun 2, 2025
fead905
Updated branch with current versions from develop that were overriden…
Jun 3, 2025
00327f8
Fixed compilation error related to polarDecomposition
Jun 17, 2025
95aa41e
Removed wrong namespace
Jun 17, 2025
85136be
Fixed incorrect type
Jun 17, 2025
34a4ea5
Added round to math and cofactor to square tensor ops for 2x2 and 3x3…
Jul 6, 2025
fa06984
Updated formatting
Jul 6, 2025
ebf14c8
Merge branch 'feature/crook5/math' of github.com:GEOS-DEV/LvArray int…
Jul 6, 2025
7d4a46b
Fixed bug in unit tests for cofactors
Jul 6, 2025
a1f3c56
Fixed typo in unittests
Jul 6, 2025
bed5b66
Removed unused variable
Jul 6, 2025
8aebaa5
Added invert for 4x4 matrices
Aug 22, 2025
d452637
Added determinant and inverse operations for square 4x4 matrices
Sep 17, 2025
cd4a94d
Added host config for MPM solver
Nov 11, 2025
c485354
Minor formatting changes
Dec 1, 2025
53da4ed
Removed host-configs for machines that no longer exist
Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/ArrayOfArrays.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,4 +484,21 @@ class ArrayOfArrays : protected ArrayOfArraysView< T, INDEX_TYPE, false, BUFFER_
}
};

/**
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little unsure if this should go here or in GEOS itself.

* @brief True if the template type is an ArrayOfArrays.
*/
template< class >
constexpr bool isArrayOfArrays = false;

/**
* @tparam T The type contained in the ArrayOfArrays.
* @tparam INDEX_TYPE The integral type used as an index.
* @tparam BUFFER_TYPE The type used to manage the underlying allocation.
* @brief Specialization of isArrayOfArrays for the ArrayOfArrays class.
*/
template< typename T,
typename INDEX_TYPE,
template< typename > class BUFFER_TYPE >
constexpr bool isArrayOfArrays< ArrayOfArrays< T, INDEX_TYPE, BUFFER_TYPE > > = true;

} /* namespace LvArray */
25 changes: 22 additions & 3 deletions src/ArrayOfArraysView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ class ArrayOfArraysView

/**
* @brief Move assignment operator..
* @param src the SparsityPatternView to be moved from.
* @param src the ArrayOfArraysView to be moved from.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

* @return *this.
*/
LVARRAY_HOST_DEVICE
Expand Down Expand Up @@ -858,10 +858,10 @@ class ArrayOfArraysView
destroyValues( 0, m_numArrays, pairs.first ... );

INDEX_TYPE const offsetsSize = ( m_numArrays == 0 ) ? 0 : m_numArrays + 1;

bufferManipulation::copyInto( m_offsets, offsetsSize, srcOffsets, srcNumArrays + 1 );
bufferManipulation::copyInto( m_sizes, m_numArrays, srcSizes, srcNumArrays );

INDEX_TYPE const maxOffset = m_offsets[ m_numArrays ];
typeManipulation::forEachArg( [maxOffset, srcMaxOffset]( auto & dstBuffer )
{
Expand Down Expand Up @@ -1087,4 +1087,23 @@ class ArrayOfArraysView
}
};

/**
* @brief True if the template type is an ArrayOfArraysView.
*/
template< class >
constexpr bool isArrayOfArraysView = false;

/**
* @tparam T The type contained in the ArrayOfArraysView.
* @tparam INDEX_TYPE The integral type used as an index.
* @tparam CONST_SIZES True iff the size of each array is constant.
* @tparam BUFFER_TYPE The type used to manager the underlying allocation.
* @brief Specialization of isArrayOfArraysView for the ArrayOfArraysView class.
*/
template< typename T,
typename INDEX_TYPE,
bool CONST_SIZES,
template< typename > class BUFFER_TYPE >
constexpr bool isArrayOfArraysView< ArrayOfArraysView< T, INDEX_TYPE, CONST_SIZES, BUFFER_TYPE > > = true;

} /* namespace LvArray */
2 changes: 1 addition & 1 deletion src/ChaiBuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ class ChaiBuffer

if( size > 0 )
{
LVARRAY_ERROR_IF_NE_MSG( space, MemorySpace::host, "Calling reallocate with a non-zero current size is not yet supporeted for the GPU." );
LVARRAY_ERROR_IF_NE_MSG( space, MemorySpace::host, "Calling reallocate with a non-zero current size is not yet supported for the GPU." );
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

std::ptrdiff_t const overlapAmount = std::min( newCapacity, size );
arrayManipulation::uninitializedMove( newPointer, overlapAmount, m_pointer );
arrayManipulation::destroy( m_pointer, size );
Expand Down
18 changes: 18 additions & 0 deletions src/fixedSizeSquareMatrixOps.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,24 @@ void symmetricToDense( DST_MATRIX && dstMatrix, SRC_SYM_MATRIX const & srcSymMat
srcSymMatrix );
}

/**
* @brief Determine the polar decomposition of the matrix @p srcMatrix
* @tparam M The size of @p R and @p srcMatrix.
* @tparam DST_MATRIX The type of @p R.
* @tparam MATRIX The type of @p srcMatrix.
* @param R The resultant rotation matrix.
* @param matrix The matrix to be decomposed.
* @details The polar decomposition returns a rotation matrix such that @p R . U = V . @p R = @p srcMatrix.
* This is done using Higham's iterative algorithm.
*/
template< std::ptrdiff_t M, typename DST_MATRIX, typename MATRIX >
LVARRAY_HOST_DEVICE constexpr inline
void polarDecomposition( DST_MATRIX && R, MATRIX const & srcMatrix )
{
return internal::SquareMatrixOps< M >::polarDecomposition( std::forward< DST_MATRIX >( R ),
srcMatrix );
}

///@}

} // namespace tensorOps
Expand Down
95 changes: 94 additions & 1 deletion src/fixedSizeSquareMatrixOpsImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#pragma once

#include "genericTensorOps.hpp"
#include "limits.hpp"

namespace LvArray
{
Expand Down Expand Up @@ -71,6 +72,65 @@ template< std::ptrdiff_t M >
struct SquareMatrixOps
{};

/**
* @brief Determine the polar decomposition of @p matrix
* @tparam DST_MATRIX The type of @p R.
* @tparam MATRIX The type of @p matrix.
* @param R The resultant orthogonal matrix.
* @param matrix The matrix to be decomposed.
* @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix.
* This is done using Higham's iterative algorithm.
*/
template< std::ptrdiff_t M, typename DST_MATRIX, typename MATRIX >
LVARRAY_HOST_DEVICE inline
static void polarDecompositionBase( DST_MATRIX && LVARRAY_RESTRICT_REF R,
MATRIX const & LVARRAY_RESTRICT_REF matrix )
{
checkSizes< M, M >( R );
checkSizes< M, M >( matrix );

using FloatingPoint = std::decay_t< decltype( R[0][0] ) >;

// Initialize
copy< M, M >( R, matrix );
FloatingPoint RInverse[M][M] = { {0} },
RInverseTranspose[M][M] = { {0} },
RRTMinusI[M][M] = { {0} };

// Higham Algorithm
FloatingPoint errorSquared = 1.0;
FloatingPoint tolerance = 10 * LvArray::NumericLimits< FloatingPoint >::epsilon;
int iter = 0;
while( errorSquared > tolerance * tolerance && iter < 100 )
{
iter++;
errorSquared = 0.0;

// Average the current R with its inverse tranpose
SquareMatrixOps< M >::invert( RInverse, R );
transpose< M, M >( RInverseTranspose, RInverse );
add< M, M >( R, RInverseTranspose );
scale< M, M >( R, 0.5 );

// Determine how close R is to being orthogonal using L2Norm(R.R^T-I)
FloatingPoint copyR[M][M] = { { 0.0 } };
copy< M, M >( copyR, R);
Rij_eq_AikBjk< M, M, M >( RRTMinusI, R, copyR );
addIdentity< M >( RRTMinusI, -1.0 );
for( std::ptrdiff_t i = 0 ; i < M ; i++ )
{
for( std::ptrdiff_t j = 0 ; j < M ; j++ )
{
errorSquared += RRTMinusI[i][j] * RRTMinusI[i][j];
}
}
}
if( iter == 100 )
{
printf("Polar decomposition did not converge in 100 iterations!");
}
}

/**
* @struct SquareMatrixOps< 2 >
* @brief Performs operations on 2x2 square matrices.
Expand Down Expand Up @@ -529,8 +589,24 @@ struct SquareMatrixOps< 2 >
dstMatrix[ 1 ][ 0 ] = srcSymMatrix[ 2 ];
}

private:
/**
* @brief Determine the polar decomposition of the 2x2 matrix @p matrix
* @tparam DST_MATRIX The type of @p R.
* @tparam MATRIX The type of @p matrix.
* @param R The resultant orthogonal matrix.
* @param matrix The matrix to be decomposed.
* @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix.
* This is done using Higham's iterative algorithm.
*/
template< typename DST_MATRIX, typename MATRIX >
LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline
static void polarDecomposition( DST_MATRIX && LVARRAY_RESTRICT_REF R,
MATRIX const & LVARRAY_RESTRICT_REF matrix )
{
polarDecompositionBase< 2 >( R, matrix );
}

private:
/**
* @brief Compute the eigenvalues of the 2x2 symmetric matrix @p matrix.
* @tparam FloatingPoint A floating point type.
Expand Down Expand Up @@ -1162,6 +1238,23 @@ struct SquareMatrixOps< 3 >
dstMatrix[ 2 ][ 1 ] = srcSymMatrix[ 3 ];
}

/**
* @brief Determine the polar decomposition of the 3x3 matrix @p matrix
* @tparam DST_MATRIX The type of @p R.
* @tparam MATRIX The type of @p matrix.
* @param R The resultant orthogonal matrix.
* @param matrix The matrix to be decomposed.
* @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix.
* This is done using Higham's iterative algorithm.
*/
template< typename DST_MATRIX, typename MATRIX >
LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline
static void polarDecomposition( DST_MATRIX && LVARRAY_RESTRICT_REF R,
MATRIX const & LVARRAY_RESTRICT_REF matrix )
{
polarDecompositionBase< 3 >( R, matrix );
}

private:
/**
* @brief Compute the eigenvalues of the 3x3 symmetric matrix @p matrix.
Expand Down
145 changes: 145 additions & 0 deletions src/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,113 @@ __half2 abs( __half2 const x )

#endif


/**
* @return The ceiling value of @p x.
* @param x The number to get the ceiling value of.
* @note This set of overloads is valid for any numeric type.
*/
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
float ceil( float const x )
{
#if defined(LVARRAY_DEVICE_COMPILE)
return ::ceilf( x );
#else
return std::ceil( x );
#endif
}

template< typename T >
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr
double ceil( T const x )
{
#if defined(LVARRAY_DEVICE_COMPILE)
return ::ceil( double ( x ) );
#else
return std::ceil( x );
#endif
}

#if defined( LVARRAY_USE_DEVICE )

/// @copydoc ceil( T )
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless you added tests for __half I'd just get rid of them.

LVARRAY_DEVICE LVARRAY_FORCE_INLINE
__half ceil( __half const x )
{
#if CUDART_VERSION > 11000
return hceil( x );
#else
return x > __half( 0 ) ? x : -x;
#endif
}

/// @copydoc ceil( T )
LVARRAY_DEVICE LVARRAY_FORCE_INLINE
__half2 ceil( __half2 const x )
{
#if CUDART_VERSION > 11000
return h2ceil( x );
#else
return LVARRAY_THROW( "h2ceil is not implemented for host", std::runtime_error ); // This is wrong, copied from other function used to mimic
#endif
}

#endif


/**
* @return The floor value of @p x.
* @param x The number to get the floor value of.
* @note This set of overloads is valid for any numeric type.
*/
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
float floor( float const x )
{
#if defined(LVARRAY_DEVICE_COMPILE)
return ::floorf( x );
#else
return std::floor( x );
#endif
}

template< typename T >
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr
double floor( T const x )
{
#if defined(LVARRAY_DEVICE_COMPILE)
return ::floor( double ( x ) );
#else
return std::floor( x );
#endif
}

#if defined( LVARRAY_USE_DEVICE )

/// @copydoc floor( T )
LVARRAY_DEVICE LVARRAY_FORCE_INLINE
__half floor( __half const x )
{
#if CUDART_VERSION > 11000
return hfloor( x );
#else
return x > __half( 0 ) ? x : -x;
#endif
}

/// @copydoc floor( T )
LVARRAY_DEVICE LVARRAY_FORCE_INLINE
__half2 floor( __half2 const x )
{
#if CUDART_VERSION > 11000
return h2floor( x );
#else
return LVARRAY_THROW( "h2floor is not implemented for host", std::runtime_error );
#endif
}

#endif


/**
* @return @code x * x @endcode.
* @tparam T The typeof @p x.
Expand All @@ -452,6 +559,44 @@ T square( T const x )

///@}


/**
* @name Power.
*/
///@{

/**
* @return The power of @p x.
* @param x The number to get the power of.
* @param n The exponent.
* @note This set of overloads is valid for any numeric type. If @p x is integral it is converted to @c double
* and the return type is @c double.
*/
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
float pow( float const x, float const n )
{
#if defined(LVARRAY_DEVICE_COMPILE)
return ::powf( x, n );
#else
return std::pow( x, n );
#endif
}

/// @copydoc pow( float )
template< typename T >
LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
double pow( T const x, T const n )
{
#if defined(LVARRAY_DEVICE_COMPILE)
return ::pow( double( x ), double( n ) );
#else
return std::pow( x, n );
#endif
}

///@}


/**
* @name Square root and inverse square root.
*/
Expand Down
1 change: 1 addition & 0 deletions unitTests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ set( testSources
testTensorOpsEigen.cpp
testTensorOpsInverseOneArg.cpp
testTensorOpsInverseTwoArgs.cpp
testTensorOpsPolarDecomposition.cpp
testTensorOpsSymDeterminant.cpp
testTensorOpsSymInverseOneArg.cpp
testTensorOpsSymInverseTwoArgs.cpp
Expand Down
Loading