From f9868b4fde8f5306156442321c31e1451145af70 Mon Sep 17 00:00:00 2001 From: Vladimir Mironov Date: Fri, 24 Apr 2026 08:44:30 +0000 Subject: [PATCH] Fix SSF issues on large grids Large molecules (e.g. ubiquitin) could fail during SSF grid reweighting. Changing lddist type to size_t seems to fix it. --- .../device/cuda/cuda_aos_scheme1_weights.cu | 4 ++-- .../device/cuda/cuda_aos_scheme1_weights.hpp | 4 ++-- .../local_work_driver/device/cuda/kernels/cuda_ssf_1d.cu | 8 ++++---- .../local_work_driver/device/cuda/kernels/cuda_ssf_1d.hpp | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.cu b/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.cu index deeba830c..e836f61d7 100644 --- a/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.cu +++ b/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.cu @@ -21,7 +21,7 @@ namespace GauXC { void cuda_aos_scheme1_weights_wrapper( int32_t npts, int32_t natoms, const double* points_x, const double* points_y, const double* points_z, const double* RAB, int32_t ldRAB, const double* coords, - double* dist, int32_t lddist, const int32_t* iparent, + double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, double* weights, cudaStream_t stream ) { constexpr auto weight_unroll = @@ -64,7 +64,7 @@ void cuda_aos_scheme1_weight_1st_deriv_wrapper( int32_t npts, int32_t natoms, const double* points_x, const double* points_y, const double* points_z, const double* RAB, int32_t ldRAB, const double* coords, - double* dist, int32_t lddist, const int32_t* iparent, + double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, const double* w_times_f, double* exc_grad_w, cudaStream_t stream ){ diff --git a/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.hpp b/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.hpp index affd940f6..6b6d0f039 100644 --- a/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.hpp +++ b/src/xc_integrator/local_work_driver/device/cuda/cuda_aos_scheme1_weights.hpp @@ -14,14 +14,14 @@ namespace GauXC { void cuda_aos_scheme1_weights_wrapper( int32_t npts, int32_t natoms, const double* points_x, const double* points_y, const double* points_z, const double* RAB, int32_t ldRAB, const double* coords, - double* dist, int32_t lddist, const int32_t* iparent, + double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, double* weights, cudaStream_t stream ); void cuda_aos_scheme1_weight_1st_deriv_wrapper( int32_t npts, int32_t natoms, const double* points_x, const double* points_y, const double* points_z, const double* RAB, int32_t ldRAB, const double* coords, - double* dist, int32_t lddist, const int32_t* iparent, + double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, const double* w_times_f, double* exc_grad_w, cudaStream_t stream ); diff --git a/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.cu b/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.cu index 54d2486eb..d654d64de 100644 --- a/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.cu +++ b/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.cu @@ -28,7 +28,7 @@ __global__ void modify_weights_ssf_kernel_1d( int32_t ldRAB, const double* coords, const double* dist_scratch, - int32_t lddist, + size_t lddist, const int32_t* iparent_device, const double* dist_nearest_device, double* weights_device @@ -131,7 +131,7 @@ __global__ void modify_weights_ssf_kernel_1d( } void partition_weights_ssf_1d( int32_t npts, int32_t natoms, const double* RAB, - int32_t ldRAB, const double* coords, const double* dist, int32_t lddist, + int32_t ldRAB, const double* coords, const double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, double* weights, cudaStream_t stream ) { @@ -153,7 +153,7 @@ __global__ void eval_weight_1st_deriv_contracted_ssf_kernel_1d( const double* points_y, const double* points_z, const double* dist_scratch, - int32_t lddist, + size_t lddist, const int32_t* iparent_device, const double* dist_nearest_device, const double* __restrict__ w_times_f_device, @@ -352,7 +352,7 @@ __global__ void eval_weight_1st_deriv_contracted_ssf_kernel_1d( void eval_weight_1st_deriv_contracted_ssf_1d( int32_t npts, int32_t natoms, const double* RAB, int32_t ldRAB, const double* coords, const double* points_x, const double* points_y, const double* points_z, - const double* dist, int32_t lddist, + const double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, const double* w_times_f, double* exc_grad_w, cudaStream_t stream){ diff --git a/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.hpp b/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.hpp index bb9d3b749..ba0150e46 100644 --- a/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.hpp +++ b/src/xc_integrator/local_work_driver/device/cuda/kernels/cuda_ssf_1d.hpp @@ -13,14 +13,14 @@ namespace GauXC { void partition_weights_ssf_1d( int32_t npts, int32_t natoms, const double* RAB, - int32_t ldRAB, const double* coords, const double* dist, int32_t lddist, + int32_t ldRAB, const double* coords, const double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, double* weights, cudaStream_t stream); void eval_weight_1st_deriv_contracted_ssf_1d( int32_t npts, int32_t natoms, const double* RAB, int32_t ldRAB, const double* coords, const double* points_x, const double* points_y, const double* points_z, - const double* dist, int32_t lddist, + const double* dist, size_t lddist, const int32_t* iparent, const double* dist_nearest, const double* w_times_f, double* exc_grad_w, cudaStream_t stream);