diff --git a/cmake/gauxc-dep-versions.cmake b/cmake/gauxc-dep-versions.cmake index 62fbcb26f..f00e3ced6 100644 --- a/cmake/gauxc-dep-versions.cmake +++ b/cmake/gauxc-dep-versions.cmake @@ -7,14 +7,14 @@ set( GAUXC_CUB_REVISION 1.10.0 ) set( GAUXC_CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git ) set( GAUXC_CUTLASS_REVISION v2.10.0 ) -set( GAUXC_EXCHCXX_REPOSITORY https://github.com/wavefunction91/ExchCXX.git ) -set( GAUXC_EXCHCXX_REVISION v1.0.0 ) +set( GAUXC_EXCHCXX_REPOSITORY https://github.com/lorisercole/ExchCXX.git ) +set( GAUXC_EXCHCXX_REVISION 601f72eb668e0721a8452fc3eaff510f431946b0 ) set( GAUXC_GAU2GRID_REPOSITORY https://github.com/dgasmith/gau2grid.git ) set( GAUXC_GAU2GRID_REVISION v2.0.6 ) -set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/wavefunction91/IntegratorXX.git ) -set( GAUXC_INTEGRATORXX_REVISION 1369be58d7a3235dac36d75dd964fef058830622 ) +set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/lorisercole/IntegratorXX.git ) +set( GAUXC_INTEGRATORXX_REVISION 58012a0b32c45f5b403380fab594047dd4587f55 ) set( GAUXC_HIGHFIVE_REPOSITORY https://github.com/highfive-devs/HighFive.git ) set( GAUXC_HIGHFIVE_REVISION 805f0e13d09b47c4b01d40682621904aa3b31bb8 ) diff --git a/external/gau2grid/generated_source/gau2grid/gau2grid.h b/external/gau2grid/generated_source/gau2grid/gau2grid.h index 29f888852..0e097a526 100644 --- a/external/gau2grid/generated_source/gau2grid/gau2grid.h +++ b/external/gau2grid/generated_source/gau2grid/gau2grid.h @@ -79,4 +79,4 @@ void gg_collocation_deriv3(int L, const unsigned long npoints, const double* PRA #ifdef __cplusplus } #endif -#endif /* GAU2GRID_GUARD_H */ \ No newline at end of file +#endif /* GAU2GRID_GUARD_H */ diff --git a/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h b/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h index f6033886a..d85679263 100644 --- a/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h +++ b/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h @@ -96,4 +96,4 @@ #define PRAGMA_RESTRICT __restrict__ -#endif \ No newline at end of file +#endif diff --git a/external/gau2grid/generated_source/gau2grid_helper.c b/external/gau2grid/generated_source/gau2grid_helper.c index e5868df7e..31956e084 100644 --- a/external/gau2grid/generated_source/gau2grid_helper.c +++ b/external/gau2grid/generated_source/gau2grid_helper.c @@ -8,6 +8,7 @@ #include #if defined(__clang__) && defined(_MSC_VER) #include +#include #elif defined __clang__ #include #elif defined _MSC_VER diff --git a/include/gauxc/exceptions.hpp b/include/gauxc/exceptions.hpp index 84b9b4893..c49d1939d 100644 --- a/include/gauxc/exceptions.hpp +++ b/include/gauxc/exceptions.hpp @@ -54,7 +54,7 @@ class generic_gauxc_exception : public std::exception { << " Line " << line_ << std::endl; auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); }; public: @@ -76,8 +76,13 @@ class generic_gauxc_exception : public std::exception { } +#ifdef _MSC_VER +#define GAUXC_GENERIC_EXCEPTION( MSG ) \ + throw generic_gauxc_exception( __FILE__, __FUNCSIG__, __LINE__, MSG ) +#else #define GAUXC_GENERIC_EXCEPTION( MSG ) \ throw generic_gauxc_exception( __FILE__, __PRETTY_FUNCTION__, __LINE__, MSG ) +#endif #define GAUXC_PIMPL_NOT_INITIALIZED() \ GAUXC_GENERIC_EXCEPTION("PIMPL NOT INITIALIZED") diff --git a/include/gauxc/molecule.hpp b/include/gauxc/molecule.hpp index 9f4fe6a74..2b6743620 100644 --- a/include/gauxc/molecule.hpp +++ b/include/gauxc/molecule.hpp @@ -47,7 +47,7 @@ class Molecule : public std::vector { })->Z; } - bool operator==(const Molecule& other) { + bool operator==(const Molecule& other) const { if(other.size() != this->size()) return false; for( auto i = 0ul; i < this->size(); ++i ) if( other[i] != operator[](i) ) return false; diff --git a/include/gauxc/reduction_driver.hpp b/include/gauxc/reduction_driver.hpp index f3bef1886..9cb1dffc7 100644 --- a/include/gauxc/reduction_driver.hpp +++ b/include/gauxc/reduction_driver.hpp @@ -11,6 +11,7 @@ */ #pragma once #include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1aed4b428..944820a4d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -78,6 +78,30 @@ if( GAUXC_CXX_HAS_WSHADOW ) target_compile_options( gauxc PRIVATE $<$: -Wshadow> ) endif() +if(MSVC) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options( gauxc PRIVATE + -Wno-covered-switch-default + -Wno-documentation + -Wno-documentation-unknown-command + -Wno-implicit-int-float-conversion + -Wno-language-extension-token + -Wno-reserved-identifier + -Wno-shorten-64-to-32 + -Wno-sign-compare + -Wno-undef + ) + else() + target_compile_options( gauxc PRIVATE + /W2 + /wd4100 # unreferenced parameter + /wd4101 # unreferenced local variable + /wd4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /wd5219 # implicit conversion from 'type-1' to 'type-2', possible loss of data + ) + endif() +endif() + target_link_libraries( gauxc PUBLIC ExchCXX::ExchCXX IntegratorXX::IntegratorXX diff --git a/src/exceptions/cublas_exception.hpp b/src/exceptions/cublas_exception.hpp index 503fc9007..c3378185c 100644 --- a/src/exceptions/cublas_exception.hpp +++ b/src/exceptions/cublas_exception.hpp @@ -96,7 +96,7 @@ class cublas_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/cuda_exception.hpp b/src/exceptions/cuda_exception.hpp index 6d4767d1a..02746690e 100644 --- a/src/exceptions/cuda_exception.hpp +++ b/src/exceptions/cuda_exception.hpp @@ -48,7 +48,7 @@ class cuda_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/cutlass_exception.hpp b/src/exceptions/cutlass_exception.hpp index 4de854bef..7b7697a03 100644 --- a/src/exceptions/cutlass_exception.hpp +++ b/src/exceptions/cutlass_exception.hpp @@ -48,7 +48,7 @@ class cutlass_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/hip_exception.hpp b/src/exceptions/hip_exception.hpp index 08a403022..b16ab6040 100644 --- a/src/exceptions/hip_exception.hpp +++ b/src/exceptions/hip_exception.hpp @@ -48,7 +48,7 @@ class hip_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/hipblas_exception.hpp b/src/exceptions/hipblas_exception.hpp index bb89a3316..392dd277e 100644 --- a/src/exceptions/hipblas_exception.hpp +++ b/src/exceptions/hipblas_exception.hpp @@ -103,7 +103,7 @@ class hipblas_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/exceptions/magma_exception.hpp b/src/exceptions/magma_exception.hpp index 300565735..3ef42e7ce 100644 --- a/src/exceptions/magma_exception.hpp +++ b/src/exceptions/magma_exception.hpp @@ -46,7 +46,7 @@ class magma_exception : public std::exception { auto msg = ss.str(); - return strdup( msg.c_str() ); + return _strdup( msg.c_str() ); } public: diff --git a/src/external/CMakeLists.txt b/src/external/CMakeLists.txt index 46612c81b..c9c9d7077 100644 --- a/src/external/CMakeLists.txt +++ b/src/external/CMakeLists.txt @@ -21,19 +21,42 @@ if( GAUXC_ENABLE_HDF5 ) message(STATUS "HighFive REV = ${GAUXC_HIGHFIVE_REVISION} ") FetchContent_Declare( HighFive GIT_REPOSITORY ${GAUXC_HIGHFIVE_REPOSITORY} - GIT_TAG ${GAUXC_HIGHFIVE_REVISION} + GIT_TAG ${GAUXC_HIGHFIVE_REVISION} ) - + set(HIGHFIVE_USE_BOOST OFF CACHE BOOL "" ) set(HIGHFIVE_UNIT_TESTS OFF CACHE BOOL "" ) set(HIGHFIVE_EXAMPLES OFF CACHE BOOL "" ) #set(HIGHFIVE_PARALLEL_HDF5 ON CACHE BOOL "" ) set(HIGHFIVE_BUILD_DOCS OFF CACHE BOOL "" ) FetchContent_MakeAvailable( HighFive ) - + + # HighFive propagates HDF5_DEFINITIONS via its libdeps target. + # CMake's FindHDF5 module sets H5_BUILT_AS_DYNAMIC_LIB on Windows + # when HDF5_USE_STATIC_LIBRARIES is not set, even for static libs. + # Correct this when linking statically. + if(WIN32 AND HDF5_PROVIDES_STATIC_LIBS AND TARGET libdeps) + get_target_property(_libdeps_defs libdeps INTERFACE_COMPILE_DEFINITIONS) + if(_libdeps_defs) + list(REMOVE_ITEM _libdeps_defs "H5_BUILT_AS_DYNAMIC_LIB") + list(APPEND _libdeps_defs "H5_BUILT_AS_STATIC_LIB") + set_target_properties(libdeps PROPERTIES INTERFACE_COMPILE_DEFINITIONS "${_libdeps_defs}") + endif() + endif() + endif() target_sources( gauxc PRIVATE hdf5_write.cxx hdf5_read.cxx ) target_link_libraries( gauxc PUBLIC HighFive ) + + # When linking HDF5 statically on Windows, HDF5's transitive + # dependencies (zlib, szip/aec, shlwapi) must be linked explicitly. + if(WIN32 AND HDF5_PROVIDES_STATIC_LIBS) + find_library(ZLIB_LIBRARY NAMES zlib z) + find_library(SZIP_LIBRARY NAMES szip-static szip sz) + find_library(AEC_LIBRARY NAMES aec-static aec) + target_link_libraries( gauxc PUBLIC + ${ZLIB_LIBRARY} ${SZIP_LIBRARY} ${AEC_LIBRARY} shlwapi ) + endif() else() message(WARNING "GAUXC_ENABLE_HDF5 was enabled, but HDF5 was not found, Disabling HDF5 Bindings") endif() diff --git a/src/xc_integrator/integrator_util/exx_screening.cxx b/src/xc_integrator/integrator_util/exx_screening.cxx index 5c7efcd13..f55148742 100644 --- a/src/xc_integrator/integrator_util/exx_screening.cxx +++ b/src/xc_integrator/integrator_util/exx_screening.cxx @@ -13,6 +13,7 @@ #include "host/blas.hpp" #include #include +#include //#include //#include #ifdef GAUXC_HAS_CUDA @@ -195,7 +196,7 @@ void exx_ek_screening( } uint32_t total_shells = 0; - for( auto x : task_ek_shells ) total_shells += __builtin_popcount(x); + for( auto x : task_ek_shells ) total_shells += std::popcount(x); std::vector ek_shells; ek_shells.reserve(total_shells); for( auto i_block = 0u; i_block < util::div_ceil(nshells,32); ++i_block ) { diff --git a/src/xc_integrator/integrator_util/spherical_harmonics.cxx b/src/xc_integrator/integrator_util/spherical_harmonics.cxx index bbc838e10..d84adc7fa 100644 --- a/src/xc_integrator/integrator_util/spherical_harmonics.cxx +++ b/src/xc_integrator/integrator_util/spherical_harmonics.cxx @@ -168,4 +168,4 @@ void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts const std::array x = {points[3 * i], points[3 * i + 1], points[3 * i + 2]}; scaled_ylm_new(lmax, x, center, radius, nlm, ylm_matrix + i * nharmonics); } -} \ No newline at end of file +} diff --git a/src/xc_integrator/integrator_util/spherical_harmonics.hpp b/src/xc_integrator/integrator_util/spherical_harmonics.hpp index 7ce495d8d..73c6a3bcb 100644 --- a/src/xc_integrator/integrator_util/spherical_harmonics.hpp +++ b/src/xc_integrator/integrator_util/spherical_harmonics.hpp @@ -4,4 +4,4 @@ #include -void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts, const std::array center, const double radius, double* ylm_matrix); \ No newline at end of file +void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts, const std::array center, const double radius, double* ylm_matrix); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx index c64d2d54b..cd6d444d7 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx @@ -30,7 +30,7 @@ void integral_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 1 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx index 6971c1a71..d5024357a 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx @@ -32,7 +32,7 @@ void integral_0_0(size_t npts, int ldG, double *weights, double * /*boys_table*/) { - __attribute__((__aligned__(64))) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 1 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx index 3638d86af..045a5c860 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx @@ -30,7 +30,7 @@ void integral_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 9 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx index d0e655413..34f39af6a 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx @@ -32,12 +32,12 @@ void integral_1_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[3 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[3 * NPTS_LOCAL + 3 * NPTS_LOCAL]; - double * __restrict__ temp = (buffer + 0); - double * __restrict__ Tval = (buffer + 3 * NPTS_LOCAL + 0 * NPTS_LOCAL); - double * __restrict__ Tval_inv_e = (buffer + 3 * NPTS_LOCAL + 1 * NPTS_LOCAL); - double * __restrict__ FmT = (buffer + 3 * NPTS_LOCAL + 2 * NPTS_LOCAL); + double * __restrict temp = (buffer + 0); + double * __restrict Tval = (buffer + 3 * NPTS_LOCAL + 0 * NPTS_LOCAL); + double * __restrict Tval_inv_e = (buffer + 3 * NPTS_LOCAL + 1 * NPTS_LOCAL); + double * __restrict FmT = (buffer + 3 * NPTS_LOCAL + 2 * NPTS_LOCAL); size_t npts_upper = NPTS_LOCAL * (npts / NPTS_LOCAL); size_t p_outer = 0; diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx index ee58d18f0..bf46e1efe 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx @@ -32,7 +32,7 @@ void integral_1_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 9 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx index 035be5bef..bcaef2609 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx @@ -30,7 +30,7 @@ void integral_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 31 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx index 0343e6675..378ee6641 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx @@ -32,7 +32,7 @@ void integral_2_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[6 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[6 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 6 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx index 6904c15d7..c8abdf6be 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx @@ -32,7 +32,7 @@ void integral_2_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[16 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[16 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 16 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx index dbd9f500d..95989043d 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx @@ -32,7 +32,7 @@ void integral_2_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 31 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx index c3faf7f43..48b26caf2 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx @@ -30,7 +30,7 @@ void integral_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 74 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx index 44c3542e0..bca56cfc9 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx @@ -32,7 +32,7 @@ void integral_3_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[10 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[10 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 10 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx index 197e948ad..91148e596 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx @@ -32,7 +32,7 @@ void integral_3_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[25 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[25 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 25 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx index 7c4a2ec67..eea293316 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx @@ -32,7 +32,7 @@ void integral_3_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[46 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[46 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 46 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx index 251de89d9..3d46f0ac5 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx @@ -32,7 +32,7 @@ void integral_3_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 74 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx index 67a9cace1..ad0e89b82 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx @@ -30,7 +30,7 @@ void integral_4(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 145 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx index 1b2f57f14..275edeed3 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx @@ -32,7 +32,7 @@ void integral_4_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[15 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[15 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 15 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx index 6fefd7870..f189fe0d0 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx @@ -32,7 +32,7 @@ void integral_4_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[36 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[36 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 36 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx index 0a88c5dd7..393bd36d7 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx @@ -32,7 +32,7 @@ void integral_4_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[64 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[64 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 64 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx index e318e860f..4e2cdc506 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx @@ -32,7 +32,7 @@ void integral_4_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[100 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[100 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 100 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx index 5aca482ab..301bde852 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx @@ -32,7 +32,7 @@ void integral_4_4(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 145 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c index 4aa876364..2e18f7159 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_1rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_1rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump1[34] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7 diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h index c98f10241..5f1f05672 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h @@ -1,6 +1,6 @@ #ifndef RYS_1RW_H_ #define RYS_1RW_H_ -void rys_1rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_1rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c index 78459eb8a..d98966172 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_2rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_2rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump2[41] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8 diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h index 309c3ec26..e18221d57 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h @@ -1,6 +1,6 @@ #ifndef RYS_2RW_H_ #define RYS_2RW_H_ -void rys_2rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_2rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c index 299073ca9..c86131622 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_3rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_3rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump3[48] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h index 904139b2c..affe560f0 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h @@ -1,6 +1,6 @@ #ifndef RYS_3RW_H_ #define RYS_3RW_H_ -void rys_3rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_3rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c index 2b83ae652..f59c31eb2 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_4rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_4rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump4[54] = { 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h index dd6fac71f..5294e181d 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h @@ -1,6 +1,6 @@ #ifndef RYS_4RW_H_ #define RYS_4RW_H_ -void rys_4rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_4rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c index a478610c9..ccc677f98 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_5rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_5rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump5[60] = { 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9 }; diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h index 8e4278431..1e76fe9e5 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h @@ -1,6 +1,6 @@ #ifndef RYS_5RW_H_ #define RYS_5RW_H_ -void rys_5rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_5rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c index a9f8d22da..cee3f63e3 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c @@ -21,17 +21,23 @@ #define PI 3.14159265358979323846 +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#else +#define FORCE_INLINE inline __attribute__((always_inline)) #define MIN(a,b) \ ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) +#endif // codelets -inline void __attribute__((always_inline)) compute_00(double beta, double *int_array, double *wgh) { +FORCE_INLINE void compute_00(double beta, double *int_array, double *wgh) { *(int_array + 0) = (*(int_array + 0)) * beta + *(wgh + 0); } -inline void __attribute__((always_inline)) compute_10_01(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_10_01(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double beta, double *int_array, double *rts, double *wgh) { double rt, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1; rt = *(rts + 0); @@ -49,7 +55,7 @@ inline void __attribute__((always_inline)) compute_10_01(double xPX, double yPX, *(int_array + 2) = (*(int_array + 2)) * beta + (*(wgh + 0)) * Cz0 + (*(wgh + 1)) * Cz1; } -inline void __attribute__((always_inline)) compute_20_02(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_20_02(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { double B0, B1, rt0, rt1, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1, Cx2, Cy2, Cz2, Cx3, Cy3, Cz3; rt0 = *(rts + 0); @@ -82,7 +88,7 @@ inline void __attribute__((always_inline)) compute_20_02(double xPX, double yPX, *(int_array + 5) = (*(int_array + 5)) * beta + Cz2 * (*(wgh + 0)) + Cz3 * (*(wgh + 1)); } -inline void __attribute__((always_inline)) compute_11(double xAB, double yAB, double zAB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_11(double xAB, double yAB, double zAB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { double B0, B1, rt0, rt1, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1, Cx2, Cy2, Cz2, Cx3, Cy3, Cz3; rt0 = *(rts + 0); @@ -120,7 +126,7 @@ inline void __attribute__((always_inline)) compute_11(double xAB, double yAB, do } // nr roots > 2 -inline void __attribute__((always_inline)) compute_vrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double * rts, double *vrr_array, double *hrr_array) { +FORCE_INLINE void compute_vrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double * rts, double *vrr_array, double *hrr_array) { double *roots = (rts + 0); double *vrr = (vrr_array + 0); for(int r = 0; r < nr_roots; ++r) { @@ -210,7 +216,7 @@ inline void __attribute__((always_inline)) compute_vrr3(int nr_roots, int l, int } } -inline void __attribute__((always_inline)) compute_hrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xAB, double yAB, double zAB, double *vrr_array, double *hrr_array) { +FORCE_INLINE void compute_hrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xAB, double yAB, double zAB, double *vrr_array, double *hrr_array) { for(int j = 1; j <= lA; ++j) { double *hrrj = (hrr_array + llA * j); @@ -271,11 +277,11 @@ inline void __attribute__((always_inline)) compute_hrr3(int nr_roots, int l, int } } -inline int __attribute__((always_inline)) index_calculation(int i, int j, int L) { +FORCE_INLINE int index_calculation(int i, int j, int L) { return (L - i) * (L - i + 1) / 2 + j; } -inline void __attribute__((always_inline)) compute_reduction(int nr_roots, int lA, int lB, double *weights, double *hrr_array, double *result, double beta) { +FORCE_INLINE void compute_reduction(int nr_roots, int lA, int lB, double *weights, double *hrr_array, double *result, double beta) { int offsetB = (lB + 1) * (lB + 2) / 2; for(int ia = 0; ia <= lA; ++ia) { diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c index 905d05d49..f9eba534d 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c @@ -3,6 +3,9 @@ #include #include #include +#ifdef _MSC_VER +#include +#endif #include "boys.h" @@ -15,9 +18,9 @@ void rys_rw(int nt, int ngqp, - double tval[restrict], - double rts[restrict], - double wts[restrict]) { + double *__restrict tval, + double *__restrict rts, + double *__restrict wts) { switch (ngqp) { case 1: rys_1rw(nt, tval, rts, wts); @@ -36,7 +39,11 @@ void rys_rw(int nt, return; default: { +#ifdef _MSC_VER + double *ryszero = (double *)_malloca(nt * sizeof(double)); +#else double ryszero[nt]; +#endif for (int n = 0; n < nt; n++) { const double t = tval[n]; @@ -61,7 +68,10 @@ void rys_rw(int nt, int nmom = (ngqp << 1) - 1; rys_xrw(nt, ntgqp, ngqp, nmom, tval, ryszero, rts, wts); - + +#ifdef _MSC_VER + _freea(ryszero); +#endif return; } } diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h index 659cddefb..9d0f6ed21 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h @@ -1,6 +1,6 @@ #ifndef RYS_RW_H_ #define RYS_RW_H_ -void rys_rw(int nt, int ngqp, double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_rw(int nt, int ngqp, double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c index 35ba680fe..2089bd0f6 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c @@ -2,16 +2,28 @@ #include #include #include +#ifdef _MSC_VER +#include +#endif #include "jacobi.h" void rys_xrw(int nt, int ntgqp, int ngqp, int nmom, - const double tval[restrict], - const double ryszero[restrict], - double rts[restrict], - double wts[restrict]) { + const double *__restrict tval, + const double *__restrict ryszero, + double *__restrict rts, + double *__restrict wts) { +#ifdef _MSC_VER + double *a = (double *)_malloca(nmom * sizeof(double)); + double *b = (double *)_malloca((nmom-1) * sizeof(double)); + double *mom = (double *)_malloca(nmom * sizeof(double)); + double *dia = (double *)_malloca(ngqp * sizeof(double)); + double *off = (double *)_malloca(ngqp * sizeof(double)); + double *row1 = (double *)_malloca(nmom * sizeof(double)); + double *row2 = (double *)_malloca(nmom * sizeof(double)); +#else double a[nmom]; double b[nmom-1]; double mom[nmom]; @@ -19,6 +31,7 @@ void rys_xrw(int nt, double off[ngqp]; double row1[nmom]; double row2[nmom]; +#endif int nrts = 0; for (int n = 0; n < nt; n += 1) { @@ -261,4 +274,14 @@ void rys_xrw(int nt, nrts += ngqp; } } + +#ifdef _MSC_VER + _freea(row2); + _freea(row1); + _freea(off); + _freea(dia); + _freea(mom); + _freea(b); + _freea(a); +#endif } diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h index f107d589b..b99cdcc4a 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h @@ -5,9 +5,9 @@ void rys_xrw(int nt, int ntgqp, int ngqp, int nmom, - const double tval[restrict], - const double ryszero[restrict], - double rts[restrict], - double wts[restrict]); + const double *__restrict tval, + const double *__restrict ryszero, + double *__restrict rts, + double *__restrict wts); #endif