From 519780d020cee33d08ef9cacddf37bc3c50e50e4 Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Tue, 28 Apr 2026 18:13:18 +0200 Subject: [PATCH 1/9] Fix MSVC native cl.exe build compatibility on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GauXC was only buildable with GCC, Clang, and clang-cl. This commit fixes all compilation and linking issues when building with MSVC's native `cl.exe` compiler. ## Compiler compatibility fixes - `__PRETTY_FUNCTION__` → `__FUNCSIG__` (`exceptions.hpp`): MSVC does not support `__PRETTY_FUNCTION__`; use `__FUNCSIG__` under `_MSC_VER` guard. - `__builtin_popcount` → `std::popcount` (`exx_screening.cxx`): MSVC does not provide GCC builtins; use C++20 `` header instead. - C99 `[restrict]` array params → `*__restrict` pointers (14 rys files): MSVC does not support the C99 array parameter syntax. The `*__restrict` pointer form is portable across GCC, Clang, and MSVC. - `__attribute__((always_inline))` → `FORCE_INLINE` macro (`rys_integral.c`): Maps to `__forceinline` on MSVC and `__attribute__((always_inline))` on GCC/Clang. Also replaces the GNU statement-expression `MIN` macro with a simple ternary. - C99 VLAs → `_malloca`/`_freea` (`rys_rw.c`, `rys_xrw.c`): MSVC does not support VLAs. Uses `_malloca`/`_freea` (stack with heap fallback) under `_MSC_VER` guards. - `__attribute__((__aligned__(64)))` → `alignas(64)` (20 `integral_*.cxx` files): Portable C++11 alignment specifier, works on GCC, Clang, and MSVC. - `__restrict__` → `__restrict` (`integral_1_0.cxx`): `__restrict__` is GCC/Clang-only; `__restrict` is portable across all three compilers. - Missing `#include ` (`reduction_driver.hpp`): MSVC does not provide `` transitively through other headers. - Non-const `operator==` (`molecule.hpp`): Made `Molecule::operator==` `const` to fix C++20 ambiguity with synthesized reverse candidates on MSVC. ## Static HDF5 linking fixes (`src/external/CMakeLists.txt`) - HighFive incorrectly propagates `H5_BUILT_AS_DYNAMIC_LIB`: CMake's `FindHDF5` module sets this define on Windows when `HDF5_USE_STATIC_LIBRARIES` is unset, regardless of whether the library is actually static. HighFive then propagates it via its `libdeps` INTERFACE target. The fix patches `libdeps` after `FetchContent` to replace `H5_BUILT_AS_DYNAMIC_LIB` with `H5_BUILT_AS_STATIC_LIB`. - HDF5 transitive static dependencies: When linking HDF5 statically on Windows, its dependencies (zlib, szip/aec, shlwapi) must be linked explicitly as they are not auto-resolved. --- include/gauxc/exceptions.hpp | 5 +++ include/gauxc/molecule.hpp | 2 +- include/gauxc/reduction_driver.hpp | 1 + src/external/CMakeLists.txt | 29 +++++++++++++++-- .../integrator_util/exx_screening.cxx | 3 +- .../host/obara_saika/src/integral_0.cxx | 2 +- .../host/obara_saika/src/integral_0_0.cxx | 2 +- .../host/obara_saika/src/integral_1.cxx | 2 +- .../host/obara_saika/src/integral_1_0.cxx | 10 +++--- .../host/obara_saika/src/integral_1_1.cxx | 2 +- .../host/obara_saika/src/integral_2.cxx | 2 +- .../host/obara_saika/src/integral_2_0.cxx | 2 +- .../host/obara_saika/src/integral_2_1.cxx | 2 +- .../host/obara_saika/src/integral_2_2.cxx | 2 +- .../host/obara_saika/src/integral_3.cxx | 2 +- .../host/obara_saika/src/integral_3_0.cxx | 2 +- .../host/obara_saika/src/integral_3_1.cxx | 2 +- .../host/obara_saika/src/integral_3_2.cxx | 2 +- .../host/obara_saika/src/integral_3_3.cxx | 2 +- .../host/obara_saika/src/integral_4.cxx | 2 +- .../host/obara_saika/src/integral_4_0.cxx | 2 +- .../host/obara_saika/src/integral_4_1.cxx | 2 +- .../host/obara_saika/src/integral_4_2.cxx | 2 +- .../host/obara_saika/src/integral_4_3.cxx | 2 +- .../host/obara_saika/src/integral_4_4.cxx | 2 +- .../local_work_driver/host/rys/src/rys_1rw.c | 2 +- .../local_work_driver/host/rys/src/rys_1rw.h | 2 +- .../local_work_driver/host/rys/src/rys_2rw.c | 2 +- .../local_work_driver/host/rys/src/rys_2rw.h | 2 +- .../local_work_driver/host/rys/src/rys_3rw.c | 2 +- .../local_work_driver/host/rys/src/rys_3rw.h | 2 +- .../local_work_driver/host/rys/src/rys_4rw.c | 2 +- .../local_work_driver/host/rys/src/rys_4rw.h | 2 +- .../local_work_driver/host/rys/src/rys_5rw.c | 2 +- .../local_work_driver/host/rys/src/rys_5rw.h | 2 +- .../host/rys/src/rys_integral.c | 22 ++++++++----- .../local_work_driver/host/rys/src/rys_rw.c | 18 ++++++++--- .../local_work_driver/host/rys/src/rys_rw.h | 2 +- .../local_work_driver/host/rys/src/rys_xrw.c | 31 ++++++++++++++++--- .../local_work_driver/host/rys/src/rys_xrw.h | 8 ++--- 40 files changed, 129 insertions(+), 60 deletions(-) diff --git a/include/gauxc/exceptions.hpp b/include/gauxc/exceptions.hpp index 84b9b4893..fa6a874e8 100644 --- a/include/gauxc/exceptions.hpp +++ b/include/gauxc/exceptions.hpp @@ -76,8 +76,13 @@ class generic_gauxc_exception : public std::exception { } +#ifdef _MSC_VER +#define GAUXC_GENERIC_EXCEPTION( MSG ) \ + throw generic_gauxc_exception( __FILE__, __FUNCSIG__, __LINE__, MSG ) +#else #define GAUXC_GENERIC_EXCEPTION( MSG ) \ throw generic_gauxc_exception( __FILE__, __PRETTY_FUNCTION__, __LINE__, MSG ) +#endif #define GAUXC_PIMPL_NOT_INITIALIZED() \ GAUXC_GENERIC_EXCEPTION("PIMPL NOT INITIALIZED") diff --git a/include/gauxc/molecule.hpp b/include/gauxc/molecule.hpp index 9f4fe6a74..2b6743620 100644 --- a/include/gauxc/molecule.hpp +++ b/include/gauxc/molecule.hpp @@ -47,7 +47,7 @@ class Molecule : public std::vector { })->Z; } - bool operator==(const Molecule& other) { + bool operator==(const Molecule& other) const { if(other.size() != this->size()) return false; for( auto i = 0ul; i < this->size(); ++i ) if( other[i] != operator[](i) ) return false; diff --git a/include/gauxc/reduction_driver.hpp b/include/gauxc/reduction_driver.hpp index f3bef1886..9cb1dffc7 100644 --- a/include/gauxc/reduction_driver.hpp +++ b/include/gauxc/reduction_driver.hpp @@ -11,6 +11,7 @@ */ #pragma once #include +#include #include #include #include diff --git a/src/external/CMakeLists.txt b/src/external/CMakeLists.txt index 46612c81b..c9c9d7077 100644 --- a/src/external/CMakeLists.txt +++ b/src/external/CMakeLists.txt @@ -21,19 +21,42 @@ if( GAUXC_ENABLE_HDF5 ) message(STATUS "HighFive REV = ${GAUXC_HIGHFIVE_REVISION} ") FetchContent_Declare( HighFive GIT_REPOSITORY ${GAUXC_HIGHFIVE_REPOSITORY} - GIT_TAG ${GAUXC_HIGHFIVE_REVISION} + GIT_TAG ${GAUXC_HIGHFIVE_REVISION} ) - + set(HIGHFIVE_USE_BOOST OFF CACHE BOOL "" ) set(HIGHFIVE_UNIT_TESTS OFF CACHE BOOL "" ) set(HIGHFIVE_EXAMPLES OFF CACHE BOOL "" ) #set(HIGHFIVE_PARALLEL_HDF5 ON CACHE BOOL "" ) set(HIGHFIVE_BUILD_DOCS OFF CACHE BOOL "" ) FetchContent_MakeAvailable( HighFive ) - + + # HighFive propagates HDF5_DEFINITIONS via its libdeps target. + # CMake's FindHDF5 module sets H5_BUILT_AS_DYNAMIC_LIB on Windows + # when HDF5_USE_STATIC_LIBRARIES is not set, even for static libs. + # Correct this when linking statically. + if(WIN32 AND HDF5_PROVIDES_STATIC_LIBS AND TARGET libdeps) + get_target_property(_libdeps_defs libdeps INTERFACE_COMPILE_DEFINITIONS) + if(_libdeps_defs) + list(REMOVE_ITEM _libdeps_defs "H5_BUILT_AS_DYNAMIC_LIB") + list(APPEND _libdeps_defs "H5_BUILT_AS_STATIC_LIB") + set_target_properties(libdeps PROPERTIES INTERFACE_COMPILE_DEFINITIONS "${_libdeps_defs}") + endif() + endif() + endif() target_sources( gauxc PRIVATE hdf5_write.cxx hdf5_read.cxx ) target_link_libraries( gauxc PUBLIC HighFive ) + + # When linking HDF5 statically on Windows, HDF5's transitive + # dependencies (zlib, szip/aec, shlwapi) must be linked explicitly. + if(WIN32 AND HDF5_PROVIDES_STATIC_LIBS) + find_library(ZLIB_LIBRARY NAMES zlib z) + find_library(SZIP_LIBRARY NAMES szip-static szip sz) + find_library(AEC_LIBRARY NAMES aec-static aec) + target_link_libraries( gauxc PUBLIC + ${ZLIB_LIBRARY} ${SZIP_LIBRARY} ${AEC_LIBRARY} shlwapi ) + endif() else() message(WARNING "GAUXC_ENABLE_HDF5 was enabled, but HDF5 was not found, Disabling HDF5 Bindings") endif() diff --git a/src/xc_integrator/integrator_util/exx_screening.cxx b/src/xc_integrator/integrator_util/exx_screening.cxx index 5c7efcd13..f55148742 100644 --- a/src/xc_integrator/integrator_util/exx_screening.cxx +++ b/src/xc_integrator/integrator_util/exx_screening.cxx @@ -13,6 +13,7 @@ #include "host/blas.hpp" #include #include +#include //#include //#include #ifdef GAUXC_HAS_CUDA @@ -195,7 +196,7 @@ void exx_ek_screening( } uint32_t total_shells = 0; - for( auto x : task_ek_shells ) total_shells += __builtin_popcount(x); + for( auto x : task_ek_shells ) total_shells += std::popcount(x); std::vector ek_shells; ek_shells.reserve(total_shells); for( auto i_block = 0u; i_block < util::div_ceil(nshells,32); ++i_block ) { diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx index c64d2d54b..cd6d444d7 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx @@ -30,7 +30,7 @@ void integral_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 1 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx index 6971c1a71..d5024357a 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx @@ -32,7 +32,7 @@ void integral_0_0(size_t npts, int ldG, double *weights, double * /*boys_table*/) { - __attribute__((__aligned__(64))) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 1 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx index 3638d86af..045a5c860 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx @@ -30,7 +30,7 @@ void integral_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 9 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx index d0e655413..34f39af6a 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx @@ -32,12 +32,12 @@ void integral_1_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[3 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[3 * NPTS_LOCAL + 3 * NPTS_LOCAL]; - double * __restrict__ temp = (buffer + 0); - double * __restrict__ Tval = (buffer + 3 * NPTS_LOCAL + 0 * NPTS_LOCAL); - double * __restrict__ Tval_inv_e = (buffer + 3 * NPTS_LOCAL + 1 * NPTS_LOCAL); - double * __restrict__ FmT = (buffer + 3 * NPTS_LOCAL + 2 * NPTS_LOCAL); + double * __restrict temp = (buffer + 0); + double * __restrict Tval = (buffer + 3 * NPTS_LOCAL + 0 * NPTS_LOCAL); + double * __restrict Tval_inv_e = (buffer + 3 * NPTS_LOCAL + 1 * NPTS_LOCAL); + double * __restrict FmT = (buffer + 3 * NPTS_LOCAL + 2 * NPTS_LOCAL); size_t npts_upper = NPTS_LOCAL * (npts / NPTS_LOCAL); size_t p_outer = 0; diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx index ee58d18f0..bf46e1efe 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx @@ -32,7 +32,7 @@ void integral_1_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 9 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx index 035be5bef..bcaef2609 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx @@ -30,7 +30,7 @@ void integral_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 31 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx index 0343e6675..378ee6641 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx @@ -32,7 +32,7 @@ void integral_2_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[6 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[6 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 6 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx index 6904c15d7..c8abdf6be 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx @@ -32,7 +32,7 @@ void integral_2_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[16 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[16 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 16 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx index dbd9f500d..95989043d 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx @@ -32,7 +32,7 @@ void integral_2_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 31 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx index c3faf7f43..48b26caf2 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx @@ -30,7 +30,7 @@ void integral_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 74 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx index 44c3542e0..bca56cfc9 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx @@ -32,7 +32,7 @@ void integral_3_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[10 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[10 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 10 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx index 197e948ad..91148e596 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx @@ -32,7 +32,7 @@ void integral_3_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[25 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[25 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 25 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx index 7c4a2ec67..eea293316 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx @@ -32,7 +32,7 @@ void integral_3_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[46 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[46 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 46 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx index 251de89d9..3d46f0ac5 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx @@ -32,7 +32,7 @@ void integral_3_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 74 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx index 67a9cace1..ad0e89b82 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx @@ -30,7 +30,7 @@ void integral_4(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 145 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx index 1b2f57f14..275edeed3 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx @@ -32,7 +32,7 @@ void integral_4_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[15 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[15 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 15 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx index 6fefd7870..f189fe0d0 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx @@ -32,7 +32,7 @@ void integral_4_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[36 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[36 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 36 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx index 0a88c5dd7..393bd36d7 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx @@ -32,7 +32,7 @@ void integral_4_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[64 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[64 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 64 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx index e318e860f..4e2cdc506 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx @@ -32,7 +32,7 @@ void integral_4_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[100 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[100 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 100 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx index 5aca482ab..301bde852 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx @@ -32,7 +32,7 @@ void integral_4_4(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 145 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c index 4aa876364..2e18f7159 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_1rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_1rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump1[34] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7 diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h index c98f10241..5f1f05672 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h @@ -1,6 +1,6 @@ #ifndef RYS_1RW_H_ #define RYS_1RW_H_ -void rys_1rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_1rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c index 78459eb8a..d98966172 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_2rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_2rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump2[41] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8 diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h index 309c3ec26..e18221d57 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h @@ -1,6 +1,6 @@ #ifndef RYS_2RW_H_ #define RYS_2RW_H_ -void rys_2rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_2rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c index 299073ca9..c86131622 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_3rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_3rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump3[48] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h index 904139b2c..affe560f0 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h @@ -1,6 +1,6 @@ #ifndef RYS_3RW_H_ #define RYS_3RW_H_ -void rys_3rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_3rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c index 2b83ae652..f59c31eb2 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_4rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_4rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump4[54] = { 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h index dd6fac71f..5294e181d 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h @@ -1,6 +1,6 @@ #ifndef RYS_4RW_H_ #define RYS_4RW_H_ -void rys_4rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_4rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c index a478610c9..ccc677f98 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_5rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_5rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump5[60] = { 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9 }; diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h index 8e4278431..1e76fe9e5 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h @@ -1,6 +1,6 @@ #ifndef RYS_5RW_H_ #define RYS_5RW_H_ -void rys_5rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_5rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c index a9f8d22da..cee3f63e3 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c @@ -21,17 +21,23 @@ #define PI 3.14159265358979323846 +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#else +#define FORCE_INLINE inline __attribute__((always_inline)) #define MIN(a,b) \ ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) +#endif // codelets -inline void __attribute__((always_inline)) compute_00(double beta, double *int_array, double *wgh) { +FORCE_INLINE void compute_00(double beta, double *int_array, double *wgh) { *(int_array + 0) = (*(int_array + 0)) * beta + *(wgh + 0); } -inline void __attribute__((always_inline)) compute_10_01(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_10_01(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double beta, double *int_array, double *rts, double *wgh) { double rt, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1; rt = *(rts + 0); @@ -49,7 +55,7 @@ inline void __attribute__((always_inline)) compute_10_01(double xPX, double yPX, *(int_array + 2) = (*(int_array + 2)) * beta + (*(wgh + 0)) * Cz0 + (*(wgh + 1)) * Cz1; } -inline void __attribute__((always_inline)) compute_20_02(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_20_02(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { double B0, B1, rt0, rt1, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1, Cx2, Cy2, Cz2, Cx3, Cy3, Cz3; rt0 = *(rts + 0); @@ -82,7 +88,7 @@ inline void __attribute__((always_inline)) compute_20_02(double xPX, double yPX, *(int_array + 5) = (*(int_array + 5)) * beta + Cz2 * (*(wgh + 0)) + Cz3 * (*(wgh + 1)); } -inline void __attribute__((always_inline)) compute_11(double xAB, double yAB, double zAB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_11(double xAB, double yAB, double zAB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { double B0, B1, rt0, rt1, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1, Cx2, Cy2, Cz2, Cx3, Cy3, Cz3; rt0 = *(rts + 0); @@ -120,7 +126,7 @@ inline void __attribute__((always_inline)) compute_11(double xAB, double yAB, do } // nr roots > 2 -inline void __attribute__((always_inline)) compute_vrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double * rts, double *vrr_array, double *hrr_array) { +FORCE_INLINE void compute_vrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double * rts, double *vrr_array, double *hrr_array) { double *roots = (rts + 0); double *vrr = (vrr_array + 0); for(int r = 0; r < nr_roots; ++r) { @@ -210,7 +216,7 @@ inline void __attribute__((always_inline)) compute_vrr3(int nr_roots, int l, int } } -inline void __attribute__((always_inline)) compute_hrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xAB, double yAB, double zAB, double *vrr_array, double *hrr_array) { +FORCE_INLINE void compute_hrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xAB, double yAB, double zAB, double *vrr_array, double *hrr_array) { for(int j = 1; j <= lA; ++j) { double *hrrj = (hrr_array + llA * j); @@ -271,11 +277,11 @@ inline void __attribute__((always_inline)) compute_hrr3(int nr_roots, int l, int } } -inline int __attribute__((always_inline)) index_calculation(int i, int j, int L) { +FORCE_INLINE int index_calculation(int i, int j, int L) { return (L - i) * (L - i + 1) / 2 + j; } -inline void __attribute__((always_inline)) compute_reduction(int nr_roots, int lA, int lB, double *weights, double *hrr_array, double *result, double beta) { +FORCE_INLINE void compute_reduction(int nr_roots, int lA, int lB, double *weights, double *hrr_array, double *result, double beta) { int offsetB = (lB + 1) * (lB + 2) / 2; for(int ia = 0; ia <= lA; ++ia) { diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c index 905d05d49..f9eba534d 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c @@ -3,6 +3,9 @@ #include #include #include +#ifdef _MSC_VER +#include +#endif #include "boys.h" @@ -15,9 +18,9 @@ void rys_rw(int nt, int ngqp, - double tval[restrict], - double rts[restrict], - double wts[restrict]) { + double *__restrict tval, + double *__restrict rts, + double *__restrict wts) { switch (ngqp) { case 1: rys_1rw(nt, tval, rts, wts); @@ -36,7 +39,11 @@ void rys_rw(int nt, return; default: { +#ifdef _MSC_VER + double *ryszero = (double *)_malloca(nt * sizeof(double)); +#else double ryszero[nt]; +#endif for (int n = 0; n < nt; n++) { const double t = tval[n]; @@ -61,7 +68,10 @@ void rys_rw(int nt, int nmom = (ngqp << 1) - 1; rys_xrw(nt, ntgqp, ngqp, nmom, tval, ryszero, rts, wts); - + +#ifdef _MSC_VER + _freea(ryszero); +#endif return; } } diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h index 659cddefb..9d0f6ed21 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h @@ -1,6 +1,6 @@ #ifndef RYS_RW_H_ #define RYS_RW_H_ -void rys_rw(int nt, int ngqp, double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_rw(int nt, int ngqp, double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c index 35ba680fe..2089bd0f6 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c @@ -2,16 +2,28 @@ #include #include #include +#ifdef _MSC_VER +#include +#endif #include "jacobi.h" void rys_xrw(int nt, int ntgqp, int ngqp, int nmom, - const double tval[restrict], - const double ryszero[restrict], - double rts[restrict], - double wts[restrict]) { + const double *__restrict tval, + const double *__restrict ryszero, + double *__restrict rts, + double *__restrict wts) { +#ifdef _MSC_VER + double *a = (double *)_malloca(nmom * sizeof(double)); + double *b = (double *)_malloca((nmom-1) * sizeof(double)); + double *mom = (double *)_malloca(nmom * sizeof(double)); + double *dia = (double *)_malloca(ngqp * sizeof(double)); + double *off = (double *)_malloca(ngqp * sizeof(double)); + double *row1 = (double *)_malloca(nmom * sizeof(double)); + double *row2 = (double *)_malloca(nmom * sizeof(double)); +#else double a[nmom]; double b[nmom-1]; double mom[nmom]; @@ -19,6 +31,7 @@ void rys_xrw(int nt, double off[ngqp]; double row1[nmom]; double row2[nmom]; +#endif int nrts = 0; for (int n = 0; n < nt; n += 1) { @@ -261,4 +274,14 @@ void rys_xrw(int nt, nrts += ngqp; } } + +#ifdef _MSC_VER + _freea(row2); + _freea(row1); + _freea(off); + _freea(dia); + _freea(mom); + _freea(b); + _freea(a); +#endif } diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h index f107d589b..b99cdcc4a 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h @@ -5,9 +5,9 @@ void rys_xrw(int nt, int ntgqp, int ngqp, int nmom, - const double tval[restrict], - const double ryszero[restrict], - double rts[restrict], - double wts[restrict]); + const double *__restrict tval, + const double *__restrict ryszero, + double *__restrict rts, + double *__restrict wts); #endif From 5104ef4e474b29244c31ca1917242bb2e9594696 Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Tue, 28 Apr 2026 18:14:30 +0200 Subject: [PATCH 2/9] Set temporary ExchCXX & IntegratorXX versions --- cmake/gauxc-dep-versions.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/gauxc-dep-versions.cmake b/cmake/gauxc-dep-versions.cmake index 62fbcb26f..032b0188b 100644 --- a/cmake/gauxc-dep-versions.cmake +++ b/cmake/gauxc-dep-versions.cmake @@ -7,14 +7,14 @@ set( GAUXC_CUB_REVISION 1.10.0 ) set( GAUXC_CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git ) set( GAUXC_CUTLASS_REVISION v2.10.0 ) -set( GAUXC_EXCHCXX_REPOSITORY https://github.com/wavefunction91/ExchCXX.git ) -set( GAUXC_EXCHCXX_REVISION v1.0.0 ) +set( GAUXC_EXCHCXX_REPOSITORY https://github.com/lorisercole/ExchCXX.git ) +set( GAUXC_EXCHCXX_REVISION 7d83223e72e2eb1446af87546b75cb81cfeca719 ) set( GAUXC_GAU2GRID_REPOSITORY https://github.com/dgasmith/gau2grid.git ) set( GAUXC_GAU2GRID_REVISION v2.0.6 ) -set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/wavefunction91/IntegratorXX.git ) -set( GAUXC_INTEGRATORXX_REVISION 1369be58d7a3235dac36d75dd964fef058830622 ) +set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/lorisercole/IntegratorXX.git ) +set( GAUXC_INTEGRATORXX_REVISION 60e45e74b4a8939a4b0fb9ca3e9e2a7304f9356f ) set( GAUXC_HIGHFIVE_REPOSITORY https://github.com/highfive-devs/HighFive.git ) set( GAUXC_HIGHFIVE_REVISION 805f0e13d09b47c4b01d40682621904aa3b31bb8 ) From 80e011e747977f41edaee83388834c649fe208b7 Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Tue, 5 May 2026 11:35:05 +0200 Subject: [PATCH 3/9] avoid implicit declaration error in gau2grid_helper.c with clang-cl --- external/gau2grid/generated_source/gau2grid_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/external/gau2grid/generated_source/gau2grid_helper.c b/external/gau2grid/generated_source/gau2grid_helper.c index e5868df7e..31956e084 100644 --- a/external/gau2grid/generated_source/gau2grid_helper.c +++ b/external/gau2grid/generated_source/gau2grid_helper.c @@ -8,6 +8,7 @@ #include #if defined(__clang__) && defined(_MSC_VER) #include +#include #elif defined __clang__ #include #elif defined _MSC_VER From 67f9e70913f11b96df756100b68368b26d918683 Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Tue, 5 May 2026 11:36:06 +0200 Subject: [PATCH 4/9] avoid newline-eof warning --- external/gau2grid/generated_source/gau2grid/gau2grid.h | 2 +- external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h | 2 +- src/xc_integrator/integrator_util/spherical_harmonics.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/external/gau2grid/generated_source/gau2grid/gau2grid.h b/external/gau2grid/generated_source/gau2grid/gau2grid.h index 29f888852..0e097a526 100644 --- a/external/gau2grid/generated_source/gau2grid/gau2grid.h +++ b/external/gau2grid/generated_source/gau2grid/gau2grid.h @@ -79,4 +79,4 @@ void gg_collocation_deriv3(int L, const unsigned long npoints, const double* PRA #ifdef __cplusplus } #endif -#endif /* GAU2GRID_GUARD_H */ \ No newline at end of file +#endif /* GAU2GRID_GUARD_H */ diff --git a/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h b/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h index f6033886a..d85679263 100644 --- a/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h +++ b/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h @@ -96,4 +96,4 @@ #define PRAGMA_RESTRICT __restrict__ -#endif \ No newline at end of file +#endif diff --git a/src/xc_integrator/integrator_util/spherical_harmonics.hpp b/src/xc_integrator/integrator_util/spherical_harmonics.hpp index 7ce495d8d..73c6a3bcb 100644 --- a/src/xc_integrator/integrator_util/spherical_harmonics.hpp +++ b/src/xc_integrator/integrator_util/spherical_harmonics.hpp @@ -4,4 +4,4 @@ #include -void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts, const std::array center, const double radius, double* ylm_matrix); \ No newline at end of file +void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts, const std::array center, const double radius, double* ylm_matrix); From 8f9b3cbbb650ae70ab7cd12bcab12bf2c1b11b41 Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Tue, 5 May 2026 11:36:20 +0200 Subject: [PATCH 5/9] silence noisy clang/cl warnings --- src/CMakeLists.txt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1aed4b428..17c5ef749 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -78,6 +78,36 @@ if( GAUXC_CXX_HAS_WSHADOW ) target_compile_options( gauxc PRIVATE $<$: -Wshadow> ) endif() +if(MSVC) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options( gauxc PRIVATE + -Wno-covered-switch-default + -Wno-documentation + -Wno-documentation-unknown-command + -Wno-implicit-int-float-conversion + -Wno-language-extension-token + -Wno-reserved-identifier + -Wno-shorten-64-to-32 + -Wno-sign-compare + -Wno-undef + ) + else() + target_compile_options( gauxc PRIVATE + /W2 + /wd4100 # unreferenced parameter + /wd4101 # unreferenced local variable + # /wd4018 # signed/unsigned mismatch + # /wd4100 # unreferenced parameter + # /wd4189 # unreferenced local variable + /wd4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + # /wd4388 # signed/unsigned mismatch + # /wd4464 # relative include path contains '..' + # /wd4668 # undefined preprocessor macro + /wd5219 # implicit conversion from 'type-1' to 'type-2', possible loss of data + ) + endif() +endif() + target_link_libraries( gauxc PUBLIC ExchCXX::ExchCXX IntegratorXX::IntegratorXX From 512a0655ddd86e80f3d7bd12c710385026eb394b Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Tue, 5 May 2026 14:25:24 +0200 Subject: [PATCH 6/9] update deps hashes --- cmake/gauxc-dep-versions.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/gauxc-dep-versions.cmake b/cmake/gauxc-dep-versions.cmake index 032b0188b..f00e3ced6 100644 --- a/cmake/gauxc-dep-versions.cmake +++ b/cmake/gauxc-dep-versions.cmake @@ -8,13 +8,13 @@ set( GAUXC_CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git ) set( GAUXC_CUTLASS_REVISION v2.10.0 ) set( GAUXC_EXCHCXX_REPOSITORY https://github.com/lorisercole/ExchCXX.git ) -set( GAUXC_EXCHCXX_REVISION 7d83223e72e2eb1446af87546b75cb81cfeca719 ) +set( GAUXC_EXCHCXX_REVISION 601f72eb668e0721a8452fc3eaff510f431946b0 ) set( GAUXC_GAU2GRID_REPOSITORY https://github.com/dgasmith/gau2grid.git ) set( GAUXC_GAU2GRID_REVISION v2.0.6 ) set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/lorisercole/IntegratorXX.git ) -set( GAUXC_INTEGRATORXX_REVISION 60e45e74b4a8939a4b0fb9ca3e9e2a7304f9356f ) +set( GAUXC_INTEGRATORXX_REVISION 58012a0b32c45f5b403380fab594047dd4587f55 ) set( GAUXC_HIGHFIVE_REPOSITORY https://github.com/highfive-devs/HighFive.git ) set( GAUXC_HIGHFIVE_REVISION 805f0e13d09b47c4b01d40682621904aa3b31bb8 ) From 716f51383662130ed33c1e21b0f5c5d05b3d2e79 Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Wed, 6 May 2026 14:12:36 +0200 Subject: [PATCH 7/9] avoid clang warning: strdup warning C4996: 'strdup': The POSIX name for this item is deprecated. Instead, use the ISO C and C++ conformant name: _strdup. --- include/gauxc/exceptions.hpp | 2 +- src/exceptions/cublas_exception.hpp | 2 +- src/exceptions/cuda_exception.hpp | 2 +- src/exceptions/cutlass_exception.hpp | 2 +- src/exceptions/hip_exception.hpp | 2 +- src/exceptions/hipblas_exception.hpp | 2 +- src/exceptions/magma_exception.hpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/gauxc/exceptions.hpp b/include/gauxc/exceptions.hpp index fa6a874e8..c49d1939d 100644 --- a/include/gauxc/exceptions.hpp +++ b/include/gauxc/exceptions.hpp @@ -54,7 +54,7 @@ class generic_gauxc_exception : public std::exception { << " Line " << line_ << std::endl; auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); }; public: diff --git a/src/exceptions/cublas_exception.hpp b/src/exceptions/cublas_exception.hpp index 503fc9007..c3378185c 100644 --- a/src/exceptions/cublas_exception.hpp +++ b/src/exceptions/cublas_exception.hpp @@ -96,7 +96,7 @@ class cublas_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/cuda_exception.hpp b/src/exceptions/cuda_exception.hpp index 6d4767d1a..02746690e 100644 --- a/src/exceptions/cuda_exception.hpp +++ b/src/exceptions/cuda_exception.hpp @@ -48,7 +48,7 @@ class cuda_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/cutlass_exception.hpp b/src/exceptions/cutlass_exception.hpp index 4de854bef..7b7697a03 100644 --- a/src/exceptions/cutlass_exception.hpp +++ b/src/exceptions/cutlass_exception.hpp @@ -48,7 +48,7 @@ class cutlass_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/hip_exception.hpp b/src/exceptions/hip_exception.hpp index 08a403022..b16ab6040 100644 --- a/src/exceptions/hip_exception.hpp +++ b/src/exceptions/hip_exception.hpp @@ -48,7 +48,7 @@ class hip_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/hipblas_exception.hpp b/src/exceptions/hipblas_exception.hpp index bb89a3316..392dd277e 100644 --- a/src/exceptions/hipblas_exception.hpp +++ b/src/exceptions/hipblas_exception.hpp @@ -103,7 +103,7 @@ class hipblas_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/magma_exception.hpp b/src/exceptions/magma_exception.hpp index 300565735..3ef42e7ce 100644 --- a/src/exceptions/magma_exception.hpp +++ b/src/exceptions/magma_exception.hpp @@ -46,7 +46,7 @@ class magma_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: From 5e57b86e77ab56985a97e3151579c2eee49dec1b Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Wed, 6 May 2026 15:38:31 +0200 Subject: [PATCH 8/9] avoid newline-eof warning --- src/xc_integrator/integrator_util/spherical_harmonics.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xc_integrator/integrator_util/spherical_harmonics.cxx b/src/xc_integrator/integrator_util/spherical_harmonics.cxx index bbc838e10..d84adc7fa 100644 --- a/src/xc_integrator/integrator_util/spherical_harmonics.cxx +++ b/src/xc_integrator/integrator_util/spherical_harmonics.cxx @@ -168,4 +168,4 @@ void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts const std::array x = {points[3 * i], points[3 * i + 1], points[3 * i + 2]}; scaled_ylm_new(lmax, x, center, radius, nlm, ylm_matrix + i * nharmonics); } -} \ No newline at end of file +} From 4e18eb1c4fc3b7bc1d2f91c59d8a4826b0997a4f Mon Sep 17 00:00:00 2001 From: Loris Ercole Date: Thu, 7 May 2026 11:15:12 +0200 Subject: [PATCH 9/9] cleanup --- src/CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 17c5ef749..944820a4d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,13 +96,7 @@ if(MSVC) /W2 /wd4100 # unreferenced parameter /wd4101 # unreferenced local variable - # /wd4018 # signed/unsigned mismatch - # /wd4100 # unreferenced parameter - # /wd4189 # unreferenced local variable /wd4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data - # /wd4388 # signed/unsigned mismatch - # /wd4464 # relative include path contains '..' - # /wd4668 # undefined preprocessor macro /wd5219 # implicit conversion from 'type-1' to 'type-2', possible loss of data ) endif()