GEOS-DEV · cmcrook5 · Jan 24, 2023 · Jan 25, 2023 · Jan 26, 2023 · Jan 27, 2023
diff --git a/src/ArrayOfArrays.hpp b/src/ArrayOfArrays.hpp
@@ -484,4 +484,21 @@ class ArrayOfArrays : protected ArrayOfArraysView< T, INDEX_TYPE, false, BUFFER_
   }
 };
 
+/**
+ * @brief True if the template type is an ArrayOfArrays.
+ */
+template< class >
+constexpr bool isArrayOfArrays = false;
+
+/**
+ * @tparam T The type contained in the ArrayOfArrays.
+ * @tparam INDEX_TYPE The integral type used as an index.
+ * @tparam BUFFER_TYPE The type used to manage the underlying allocation.
+ * @brief Specialization of isArrayOfArrays for the ArrayOfArrays class.
+ */
+template< typename T,
+  typename INDEX_TYPE,
+  template< typename > class BUFFER_TYPE >
+constexpr bool isArrayOfArrays< ArrayOfArrays< T, INDEX_TYPE, BUFFER_TYPE > > = true;
+
 } /* namespace LvArray */
diff --git a/src/ArrayOfArraysView.hpp b/src/ArrayOfArraysView.hpp
@@ -249,7 +249,7 @@ class ArrayOfArraysView
 
   /**
    * @brief Move assignment operator..
-   * @param src the SparsityPatternView to be moved from.
+   * @param src the ArrayOfArraysView to be moved from.
    * @return *this.
    */
   LVARRAY_HOST_DEVICE
@@ -858,10 +858,10 @@ class ArrayOfArraysView
     destroyValues( 0, m_numArrays, pairs.first ... );
 
     INDEX_TYPE const offsetsSize = ( m_numArrays == 0 ) ? 0 : m_numArrays + 1;
-
+    
     bufferManipulation::copyInto( m_offsets, offsetsSize, srcOffsets, srcNumArrays + 1 );
     bufferManipulation::copyInto( m_sizes, m_numArrays, srcSizes, srcNumArrays );
-
+    
     INDEX_TYPE const maxOffset = m_offsets[ m_numArrays ];
     typeManipulation::forEachArg( [maxOffset, srcMaxOffset]( auto & dstBuffer )
     {
@@ -1087,4 +1087,23 @@ class ArrayOfArraysView
   }
 };
 
+/**
+ * @brief True if the template type is an ArrayOfArraysView.
+ */
+template< class >
+constexpr bool isArrayOfArraysView = false;
+
+/**
+ * @tparam T The type contained in the ArrayOfArraysView.
+ * @tparam INDEX_TYPE The integral type used as an index.
+ * @tparam CONST_SIZES True iff the size of each array is constant.
+ * @tparam BUFFER_TYPE The type used to manager the underlying allocation.
+ * @brief Specialization of isArrayOfArraysView for the ArrayOfArraysView class.
+ */
+template< typename T,
+      typename INDEX_TYPE,
+      bool CONST_SIZES,
+      template< typename > class BUFFER_TYPE >
+constexpr bool isArrayOfArraysView< ArrayOfArraysView< T, INDEX_TYPE, CONST_SIZES, BUFFER_TYPE > > = true;
+
 } /* namespace LvArray */
diff --git a/src/ChaiBuffer.hpp b/src/ChaiBuffer.hpp
@@ -340,7 +340,7 @@ class ChaiBuffer
 
     if( size > 0 )
     {
-      LVARRAY_ERROR_IF_NE_MSG( space, MemorySpace::host, "Calling reallocate with a non-zero current size is not yet supporeted for the GPU." );
+      LVARRAY_ERROR_IF_NE_MSG( space, MemorySpace::host, "Calling reallocate with a non-zero current size is not yet supported for the GPU." );
       std::ptrdiff_t const overlapAmount = std::min( newCapacity, size );
       arrayManipulation::uninitializedMove( newPointer, overlapAmount, m_pointer );
       arrayManipulation::destroy( m_pointer, size );

diff --git a/src/fixedSizeSquareMatrixOps.hpp b/src/fixedSizeSquareMatrixOps.hpp
@@ -354,6 +354,24 @@ void symmetricToDense( DST_MATRIX && dstMatrix, SRC_SYM_MATRIX const & srcSymMat
                                                            srcSymMatrix );
 }
 
+/**
+ * @brief Determine the polar decomposition of the matrix @p srcMatrix
+ * @tparam M The size of @p R and @p srcMatrix.
+ * @tparam DST_MATRIX The type of @p R.
+ * @tparam MATRIX The type of @p srcMatrix.
+ * @param R The resultant rotation matrix.
+ * @param matrix The matrix to be decomposed.
+ * @details The polar decomposition returns a rotation matrix such that @p R . U = V . @p R = @p srcMatrix.
+ *   This is done using Higham's iterative algorithm.
+ */
+template< std::ptrdiff_t M, typename DST_MATRIX, typename MATRIX >
+LVARRAY_HOST_DEVICE constexpr inline
+void polarDecomposition( DST_MATRIX && R, MATRIX const & srcMatrix )
+{
+  return internal::SquareMatrixOps< M >::polarDecomposition( std::forward< DST_MATRIX >( R ),
+                                                             srcMatrix );
+}
+
 ///@}
 
 } // namespace tensorOps

diff --git a/src/fixedSizeSquareMatrixOpsImpl.hpp b/src/fixedSizeSquareMatrixOpsImpl.hpp
@@ -13,6 +13,7 @@
 #pragma once
 
 #include "genericTensorOps.hpp"
+#include "limits.hpp"
 
 namespace LvArray
 {
@@ -71,6 +72,65 @@ template< std::ptrdiff_t M >
 struct SquareMatrixOps
 {};
 
+/**
+ * @brief Determine the polar decomposition of @p matrix
+ * @tparam DST_MATRIX The type of @p R.
+ * @tparam MATRIX The type of @p matrix.
+ * @param R The resultant orthogonal matrix.
+ * @param matrix The matrix to be decomposed.
+ * @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix.
+ *   This is done using Higham's iterative algorithm.
+ */
+template< std::ptrdiff_t M, typename DST_MATRIX, typename MATRIX >
+LVARRAY_HOST_DEVICE inline
+static void polarDecompositionBase( DST_MATRIX && LVARRAY_RESTRICT_REF R,
+                                    MATRIX const & LVARRAY_RESTRICT_REF matrix )
+{
+  checkSizes< M, M >( R );
+  checkSizes< M, M >( matrix );
+
+  using FloatingPoint = std::decay_t< decltype( R[0][0] ) >;
+
+  // Initialize
+  copy< M, M >( R, matrix );
+  FloatingPoint RInverse[M][M] = { {0} },
+                RInverseTranspose[M][M] = { {0} },
+                RRTMinusI[M][M] = { {0} };
+
+  // Higham Algorithm
+  FloatingPoint errorSquared = 1.0;
+  FloatingPoint tolerance = 10 * LvArray::NumericLimits< FloatingPoint >::epsilon;
+  int iter = 0;
+  while( errorSquared > tolerance * tolerance && iter < 100 )
+  {
+    iter++;
+    errorSquared = 0.0;
+
+    // Average the current R with its inverse tranpose
+    SquareMatrixOps< M >::invert( RInverse, R );
+    transpose< M, M >( RInverseTranspose, RInverse );
+    add< M, M >( R, RInverseTranspose );
+    scale< M, M >( R, 0.5 );
+
+    // Determine how close R is to being orthogonal using L2Norm(R.R^T-I)
+    FloatingPoint copyR[M][M] = { { 0.0 } };
+    copy< M, M >( copyR, R);
+    Rij_eq_AikBjk< M, M, M >( RRTMinusI, R, copyR );
+    addIdentity< M >( RRTMinusI, -1.0 );
+    for( std::ptrdiff_t i = 0 ; i < M ; i++ )
+    {
+      for( std::ptrdiff_t j = 0 ; j < M ; j++ )
+      {
+        errorSquared += RRTMinusI[i][j] * RRTMinusI[i][j];
+      }
+    }
+  }
+  if( iter == 100 )
+  {
+    printf("Polar decomposition did not converge in 100 iterations!");
+  }
+}
+
 /**
  * @struct SquareMatrixOps< 2 >
  * @brief Performs operations on 2x2 square matrices.
@@ -529,8 +589,24 @@ struct SquareMatrixOps< 2 >
     dstMatrix[ 1 ][ 0 ] = srcSymMatrix[ 2 ];
   }
 
-private:
+  /**
+   * @brief Determine the polar decomposition of the 2x2 matrix @p matrix
+   * @tparam DST_MATRIX The type of @p R.
+   * @tparam MATRIX The type of @p matrix.
+   * @param R The resultant orthogonal matrix.
+   * @param matrix The matrix to be decomposed.
+   * @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix.
+   *   This is done using Higham's iterative algorithm.
+   */
+  template< typename DST_MATRIX, typename MATRIX >
+  LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline
+  static void polarDecomposition( DST_MATRIX && LVARRAY_RESTRICT_REF R,
+                                  MATRIX const & LVARRAY_RESTRICT_REF matrix )
+  {
+    polarDecompositionBase< 2 >( R, matrix );
+  }
 
+private:
   /**
    * @brief Compute the eigenvalues of the 2x2 symmetric matrix @p matrix.
    * @tparam FloatingPoint A floating point type.
@@ -1162,6 +1238,23 @@ struct SquareMatrixOps< 3 >
     dstMatrix[ 2 ][ 1 ] = srcSymMatrix[ 3 ];
   }
 
+  /**
+   * @brief Determine the polar decomposition of the 3x3 matrix @p matrix
+   * @tparam DST_MATRIX The type of @p R.
+   * @tparam MATRIX The type of @p matrix.
+   * @param R The resultant orthogonal matrix.
+   * @param matrix The matrix to be decomposed.
+   * @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix.
+   *   This is done using Higham's iterative algorithm.
+   */
+  template< typename DST_MATRIX, typename MATRIX >
+  LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline
+  static void polarDecomposition( DST_MATRIX && LVARRAY_RESTRICT_REF R,
+                                  MATRIX const & LVARRAY_RESTRICT_REF matrix )
+  {
+    polarDecompositionBase< 3 >( R, matrix );
+  }
+
 private:
   /**
    * @brief Compute the eigenvalues of the 3x3 symmetric matrix @p matrix.

diff --git a/src/math.hpp b/src/math.hpp
@@ -440,6 +440,113 @@ __half2 abs( __half2 const x )
 
 #endif
 
+
+/**
+ * @return The ceiling value of @p x.
+ * @param x The number to get the ceiling value of.
+ * @note This set of overloads is valid for any numeric type.
+ */
+LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
+float ceil( float const x )
+{
+#if defined(LVARRAY_DEVICE_COMPILE)
+  return ::ceilf( x );
+#else
+  return std::ceil( x );
+#endif
+}
+
+template< typename T >
+LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr
+double ceil( T const x )
+{
+#if defined(LVARRAY_DEVICE_COMPILE)
+  return ::ceil( double ( x ) );
+#else
+  return std::ceil( x );
+#endif
+}
+
+#if defined( LVARRAY_USE_DEVICE )
+
+/// @copydoc ceil( T )
+LVARRAY_DEVICE LVARRAY_FORCE_INLINE
+__half ceil( __half const x )
+{
+#if CUDART_VERSION > 11000
+  return hceil( x );
+#else
+  return x > __half( 0 ) ? x : -x;
+#endif
+}
+
+/// @copydoc ceil( T )
+LVARRAY_DEVICE LVARRAY_FORCE_INLINE
+__half2 ceil( __half2 const x )
+{
+#if CUDART_VERSION > 11000
+  return h2ceil( x );
+#else
+  return LVARRAY_THROW( "h2ceil is not implemented for host", std::runtime_error ); // This is wrong, copied from other function used to mimic
+#endif
+}
+
+#endif
+
+
+/**
+ * @return The floor value of @p x.
+ * @param x The number to get the floor value of.
+ * @note This set of overloads is valid for any numeric type.
+ */
+LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
+float floor( float const x )
+{
+#if defined(LVARRAY_DEVICE_COMPILE)
+  return ::floorf( x );
+#else
+  return std::floor( x );
+#endif
+}
+
+template< typename T >
+LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr
+double floor( T const x )
+{
+#if defined(LVARRAY_DEVICE_COMPILE)
+  return ::floor( double ( x ) );
+#else
+  return std::floor( x );
+#endif
+}
+
+#if defined( LVARRAY_USE_DEVICE )
+
+/// @copydoc floor( T )
+LVARRAY_DEVICE LVARRAY_FORCE_INLINE
+__half floor( __half const x )
+{
+#if CUDART_VERSION > 11000
+  return hfloor( x );
+#else
+  return x > __half( 0 ) ? x : -x;
+#endif
+}
+
+/// @copydoc floor( T )
+LVARRAY_DEVICE LVARRAY_FORCE_INLINE
+__half2 floor( __half2 const x )
+{
+#if CUDART_VERSION > 11000
+  return h2floor( x );
+#else
+  return LVARRAY_THROW( "h2floor is not implemented for host", std::runtime_error );
+#endif
+}
+
+#endif
+
+
 /**
  * @return @code x * x @endcode.
  * @tparam T The typeof @p x.
@@ -452,6 +559,44 @@ T square( T const x )
 
 ///@}
 
+
+/**
+ * @name Power.
+ */
+///@{
+
+/**
+ * @return The power of @p x.
+ * @param x The number to get the power of.
+ * @param n The exponent.
+ * @note This set of overloads is valid for any numeric type. If @p x is integral it is converted to @c double
+ *   and the return type is @c double.
+ */
+LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
+float pow( float const x, float const n )
+{
+#if defined(LVARRAY_DEVICE_COMPILE)
+  return ::powf( x, n );
+#else
+  return std::pow( x, n );
+#endif
+}
+
+/// @copydoc pow( float )
+template< typename T >
+LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE
+double pow( T const x, T const n )
+{
+#if defined(LVARRAY_DEVICE_COMPILE)
+  return ::pow( double( x ), double( n ) );
+#else
+  return std::pow( x, n );
+#endif
+}
+
+///@}
+
+
 /**
  * @name Square root and inverse square root.
  */

diff --git a/unitTests/CMakeLists.txt b/unitTests/CMakeLists.txt
@@ -72,6 +72,7 @@ set( testSources
      testTensorOpsEigen.cpp
      testTensorOpsInverseOneArg.cpp
      testTensorOpsInverseTwoArgs.cpp
+     testTensorOpsPolarDecomposition.cpp
      testTensorOpsSymDeterminant.cpp
      testTensorOpsSymInverseOneArg.cpp
      testTensorOpsSymInverseTwoArgs.cpp