Skip to content

Commit a969650

Browse files
authored
Merge pull request #61 from MennoVeerman/main
updates and bug fixes to bw implementation
2 parents 334e93e + f65960c commit a969650

25 files changed

Lines changed: 518 additions & 446 deletions

data/mie_lut_broadband.nc

0 Bytes
Binary file not shown.

data/mie_lut_visualisation.nc

499 Bytes
Binary file not shown.

include_rt/Gas_optics_rrtmgp_rt.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,12 @@ class Gas_optics_rrtmgp_rt : public Gas_optics_rt
145145
const Array_gpu<Float,2>& play,
146146
const Array_gpu<Float,2>& plev,
147147
const Array_gpu<Float,2>& tlay,
148+
const Array_gpu<Float,2>& tlev,
148149
const Array_gpu<Float,1>& tsfc,
149150
const Gas_concs_gpu& gas_desc,
150151
std::unique_ptr<Optical_props_arry_rt>& optical_props,
151152
Source_func_lw_rt& sources,
152-
const Array_gpu<Float,2>& col_dry,
153-
const Array_gpu<Float,2>& tlev);
153+
const Array_gpu<Float,2>& col_dry);
154154

155155
// shortwave variant
156156
void gas_optics(
@@ -219,7 +219,7 @@ class Gas_optics_rrtmgp_rt : public Gas_optics_rt
219219
Array<Float,4> krayl;
220220

221221
int idx_h2o;
222-
222+
223223
Array_gpu<Float,1> solar_source_g;
224224
Array_gpu<Float,2> totplnk_gpu;
225225
Array_gpu<Float,4> planck_frac_gpu;
@@ -294,7 +294,7 @@ class Gas_optics_rrtmgp_rt : public Gas_optics_rt
294294
const Float md_index, const Float sb_index);
295295

296296
void compute_gas_taus(
297-
const int col_s, const int ncol_block, const int ncol, const int nlay,
297+
const int col_s, const int ncol_block, const int ncol, const int nlay,
298298
const int ngpt, const int nband, const int igpt,
299299
const Array_gpu<Float,2>& play,
300300
const Array_gpu<Float,2>& plev,
@@ -313,7 +313,7 @@ class Gas_optics_rrtmgp_rt : public Gas_optics_rt
313313
std::unique_ptr<Optical_props_arry_rt>& optical_props);
314314

315315
void source(
316-
const int ncol, const int nlay, const int nband, const int ngpt, const int igpt,
316+
const int col_s, const int ncol_sub, const int ncol, const int nlay, const int nband, const int ngpt, const int igpt,
317317
const Array_gpu<Float,2>& play, const Array_gpu<Float,2>& plev,
318318
const Array_gpu<Float,2>& tlay, const Array_gpu<Float,1>& tsfc,
319319
const Array_gpu<int,2>& jtemp, const Array_gpu<int,2>& jpress,

include_rt/Gas_optics_rt.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ class Gas_optics_rt : public Optical_props_rt
6161
const Array_gpu<Float,2>& play,
6262
const Array_gpu<Float,2>& plev,
6363
const Array_gpu<Float,2>& tlay,
64+
const Array_gpu<Float,2>& tlev,
6465
const Array_gpu<Float,1>& tsfc,
6566
const Gas_concs_gpu& gas_desc,
6667
std::unique_ptr<Optical_props_arry_rt>& optical_props,
6768
Source_func_lw_rt& sources,
68-
const Array_gpu<Float,2>& col_dry,
69-
const Array_gpu<Float,2>& tlev) = 0;
69+
const Array_gpu<Float,2>& col_dry) = 0;
7070

7171
// Shortwave variant.
7272
virtual void gas_optics(
@@ -80,7 +80,7 @@ class Gas_optics_rt : public Optical_props_rt
8080
const Array_gpu<Float,2>& col_dry) = 0;
8181

8282
virtual Float get_tsi() const = 0;
83-
83+
8484
virtual Float band_source(const int gpt_start, const int gpt_end) const = 0;
8585
};
8686
#endif

include_rt/Source_functions_rt.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
#ifndef SOURCE_FUNCTIONS_RT_H
2626
#define SOURCE_FUNCTIONS_RT_H
27-
#include "Optical_props_rt.h"
27+
#include "Optical_props_rt.h"
2828

2929
template<typename, int> class Array_gpu;
3030

@@ -48,21 +48,18 @@ class Source_func_lw_rt : public Optical_props_rt
4848
Array_gpu<Float,1>& get_sfc_source() { return sfc_source; }
4949
Array_gpu<Float,1>& get_sfc_source_jac() { return sfc_source_jac; }
5050
Array_gpu<Float,2>& get_lay_source() { return lay_source; }
51-
Array_gpu<Float,2>& get_lev_source_inc() { return lev_source_inc; }
52-
Array_gpu<Float,2>& get_lev_source_dec() { return lev_source_dec; }
51+
Array_gpu<Float,2>& get_lev_source() { return lev_source; }
5352

5453
const Array_gpu<Float,1>& get_sfc_source() const { return sfc_source; }
5554
const Array_gpu<Float,1>& get_sfc_source_jac() const { return sfc_source_jac; }
5655
const Array_gpu<Float,2>& get_lay_source() const { return lay_source; }
57-
const Array_gpu<Float,2>& get_lev_source_inc() const { return lev_source_inc; }
58-
const Array_gpu<Float,2>& get_lev_source_dec() const { return lev_source_dec; }
56+
const Array_gpu<Float,2>& get_lev_source() const { return lev_source; }
5957

6058
private:
6159
Array_gpu<Float,1> sfc_source;
6260
Array_gpu<Float,1> sfc_source_jac;
6361
Array_gpu<Float,2> lay_source;
64-
Array_gpu<Float,2> lev_source_inc;
65-
Array_gpu<Float,2> lev_source_dec;
62+
Array_gpu<Float,2> lev_source;
6663
};
6764

6865
#endif

include_rt/raytracer_functions.h

Lines changed: 39 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -87,104 +87,62 @@ namespace Raytracer_functions
8787
}
8888

8989
__device__
90-
inline Float mie_sample_angle(const Float* mie_cdf, const Float* mie_lut, const Float random_number, const Float r_eff, const int n_mie)
90+
inline int find_index(const float* mie_cdf, const int size, const float random_number)
9191
{
92-
// interpolation over effective radius. Currently, r_eff should range between 2.5 and 21.5 (similar to RRTMGP) OR
93-
// be exactly 100 micrometer for optical effects such as rainbows
94-
const int r_idx = (r_eff == Float(100.)) ? 20 : min(max(int(r_eff-2.5), 0), 18);
95-
const Float r_rest = fmod(r_eff-Float(2.5),Float(1.));
92+
int left = 0;
93+
int right = size - 1;
9694

97-
int i = 0;
98-
while (random_number < mie_cdf[i])
99-
{
100-
++i;
101-
}
95+
while (left < right) {
96+
int mid = left + (right - left) / 2;
10297

103-
// sampled scattering angle
104-
Float ang;
105-
if (r_idx < 20)
106-
{
107-
if (i==0)
108-
{
109-
const Float ang_lwr = mie_lut[r_idx*n_mie]*(1-r_rest);
110-
const Float ang_upr = mie_lut[(r_idx+1)*n_mie]*r_rest;
111-
ang = ang_lwr + ang_upr;
112-
}
113-
else
114-
{
115-
const int midx_lwr = r_idx*n_mie;
116-
const int midx_upr = (r_idx+1)*n_mie;
117-
const Float dr = abs(mie_cdf[i] - mie_cdf[i-1]);
118-
119-
const Float ang_lwr = (abs(random_number - mie_cdf[i])*mie_lut[(i-1)+midx_lwr] + abs(mie_cdf[i-1]-random_number)*mie_lut[i+midx_lwr]) / dr;
120-
const Float ang_upr = (abs(random_number - mie_cdf[i])*mie_lut[(i-1)+midx_upr] + abs(mie_cdf[i-1]-random_number)*mie_lut[i+midx_upr]) / dr;
121-
ang = ang_lwr * (1-r_rest) + ang_upr * r_rest;
98+
if (random_number >= mie_cdf[mid]) {
99+
right = mid;
100+
} else {
101+
left = mid + 1;
122102
}
123103
}
124-
else
125-
{
126-
if (i==0)
127-
{
128-
ang = mie_lut[r_idx*n_mie];
129-
}
130-
else
131-
{
132-
const int midx = r_idx*n_mie;
133-
const Float dr = abs(mie_cdf[i] - mie_cdf[i-1]);
134104

135-
ang = (abs(random_number - mie_cdf[i])*mie_lut[(i-1)+midx] + abs(mie_cdf[i-1]-random_number)*mie_lut[i+midx]) / dr;
136-
}
137-
}
105+
return left - 1;
106+
}
107+
108+
__device__
109+
inline Float mie_sample_angle(const Float* mie_cdf, const Float* mie_lut, const Float random_number, const Float r_eff, const int n_mie)
110+
{
111+
// interpolation over effective radius. Currently, r_eff should range between 2.5 and 21.5 (similar to RRTMGP)
112+
const int r_idx = min(max(int(r_eff-2.5), 0), 18);
113+
const Float r_rest = fmod(r_eff-Float(2.5),Float(1.));
114+
115+
const int i = min(max(0, find_index(mie_cdf, n_mie, random_number)), n_mie - 2);
116+
117+
const int midx_lwr = r_idx*n_mie;
118+
const int midx_upr = (r_idx+1)*n_mie;
119+
const Float dr = abs(mie_cdf[i+1] - mie_cdf[i]);
120+
121+
const Float ang_lwr = (abs(random_number - mie_cdf[i+1])*mie_lut[(i)+midx_lwr] + abs(mie_cdf[i]-random_number)*mie_lut[i+midx_lwr+1]) / dr;
122+
const Float ang_upr = (abs(random_number - mie_cdf[i+1])*mie_lut[(i)+midx_upr] + abs(mie_cdf[i]-random_number)*mie_lut[i+midx_upr+1]) / dr;
123+
const Float ang = ang_lwr * (1-r_rest) + ang_upr * r_rest;
138124
return ang;
139125
}
140126

141127
__device__
142128
inline Float mie_interpolate_phase_table(const Float* mie_phase, const Float* mie_lut, const Float scat_ang, const Float r_eff, const int n_mie)
143129
{
144-
// interpolation over effective radius. Currently, r_eff should range between 2.5 and 21.5 (similar to RRTMGP) OR
145-
// be exactly 100 micrometer for optical effects such as rainbows
146-
const int r_idx = (r_eff == Float(100.)) ? 20 : min(max(int(r_eff-2.5), 0), 18);
130+
// interpolation over effective radius. Currently, r_eff should range between 2.5 and 21.5 (similar to RRTMGP)
131+
const int r_idx = min(max(int(r_eff-2.5), 0), 18);
147132
const Float r_rest = fmod(r_eff-Float(2.5),Float(1.));
148133

149134
// interpolation between 1800 equally spaced scattering angles between 0 and PI (both inclusive).
150-
const Float d_pi = Float(1.74629942e-03);
151-
const int i = min(max(0, int(1800-(scat_ang/d_pi+1))), 1798);
135+
constexpr Float d_pi = Float(1.74629942e-03);
136+
const int i = min(max(0, int(scat_ang/d_pi)), 1798);
152137

153-
// probability (of scattering at angle scat_ang)
154-
Float prob;
155-
if (r_idx < 20)
156-
{
157-
if (i==0)
158-
{
159-
const Float prob_lwr = mie_lut[r_idx*n_mie]*(1-r_rest);
160-
const Float prob_upr = mie_lut[(r_idx+1)*n_mie]*r_rest;
161-
prob = prob_lwr + prob_upr;
162-
}
163-
else
164-
{
165-
const int midx_lwr = r_idx*n_mie;
166-
const int midx_upr = (r_idx+1)*n_mie;
167-
const Float dr = abs(mie_phase[i] - mie_phase[i-1]);
168-
169-
const Float prob_lwr = (abs(scat_ang - mie_phase[i])*mie_lut[(i-1)+midx_lwr] + abs(mie_phase[i-1]-scat_ang)*mie_lut[i+midx_lwr]) / dr;
170-
const Float prob_upr = (abs(scat_ang - mie_phase[i])*mie_lut[(i-1)+midx_upr] + abs(mie_phase[i-1]-scat_ang)*mie_lut[i+midx_upr]) / dr;
171-
prob = prob_lwr * (1-r_rest) + prob_upr * r_rest;
172-
}
173-
}
174-
else
175-
{
176-
if (i==0)
177-
{
178-
prob = mie_lut[r_idx*n_mie];
179-
}
180-
else
181-
{
182-
const int midx = r_idx*n_mie;
183-
const Float dr = abs(mie_phase[i] - mie_phase[i-1]);
138+
const int midx_lwr = r_idx*n_mie;
139+
const int midx_upr = (r_idx+1)*n_mie;
140+
const Float dr = abs(mie_phase[i+1] - mie_phase[i]);
141+
142+
const Float prob_lwr = (abs(scat_ang - mie_phase[i+1])*mie_lut[(i)+midx_lwr] + abs(mie_phase[i]-scat_ang)*mie_lut[i+1+midx_lwr]) / dr;
143+
const Float prob_upr = (abs(scat_ang - mie_phase[i+1])*mie_lut[(i)+midx_upr] + abs(mie_phase[i]-scat_ang)*mie_lut[i+1+midx_upr]) / dr;
144+
const Float prob = prob_lwr * (1-r_rest) + prob_upr * r_rest;
184145

185-
prob = (abs(scat_ang - mie_phase[i])*mie_lut[(i-1)+midx] + abs(mie_phase[i-1]-scat_ang)*mie_lut[i+midx]) / dr;
186-
}
187-
}
188146
return prob;
189147
}
190148

include_rt_kernels/gas_optics_rrtmgp_kernels_cuda_rt.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ namespace Gas_optics_rrtmgp_kernels_cuda_rt
3333
void reorder123x321(const int ni, const int nj, const int nk,
3434
const Float* arr_in, Float* arr_out);
3535

36-
36+
3737
void reorder12x21(const int ni, const int nj, const Float* arr_in, Float* arr_out);
3838

39-
39+
4040
void zero_array(const int ni, const int nj, const int nk, const int nn, Float* arr);
4141
void zero_array(const int ni, const int nj, const int nk, Float* arr);
4242
void zero_array(const int ni, const int nj, Float* arr);
@@ -67,7 +67,7 @@ namespace Gas_optics_rrtmgp_kernels_cuda_rt
6767

6868
void combine_abs_and_rayleigh(
6969
const int col_s, const int ncol_sub, const int ncol, const int nlay,
70-
const Float* tau_local, const Float* tau_rayleigh,
70+
const Float* tau_rayleigh,
7171
Float* tau, Float* ssa, Float* g);
7272

7373

@@ -84,7 +84,7 @@ namespace Gas_optics_rrtmgp_kernels_cuda_rt
8484

8585

8686
void compute_tau_absorption(
87-
const int col_s, const int ncol_sub, const int ncol, const int nlay, const int nband,
87+
const int col_s, const int ncol_sub, const int ncol, const int nlay, const int nband,
8888
const int ngpt, const int igpt,
8989
const int ngas, const int nflav, const int neta, const int npres, const int ntemp,
9090
const int nminorlower, const int nminorklower,
@@ -117,8 +117,8 @@ namespace Gas_optics_rrtmgp_kernels_cuda_rt
117117
Float* tau);
118118

119119

120-
void Planck_source(
121-
const int ncol, const int nlay, const int nbnd, const int ngpt, const int igpt,
120+
void compute_planck_source(
121+
const int col_s, const int ncol_sub, const int ncol, const int nlay, const int nbnd, const int ngpt, const int igpt,
122122
const int nflav, const int neta, const int npres, const int ntemp,
123123
const int nPlanckTemp,
124124
const Float* tlay,
@@ -137,8 +137,7 @@ namespace Gas_optics_rrtmgp_kernels_cuda_rt
137137
const Float* totplnk,
138138
Float* sfc_src,
139139
Float* lay_src,
140-
Float* lev_src_inc,
141-
Float* lev_src_dec,
140+
Float* lev_src,
142141
Float* sfc_src_jac);
143142
}
144143
#endif

include_rt_kernels/raytracer_kernels_bw.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ using namespace Raytracer_functions;
1717

1818

1919
#ifdef RTE_USE_SP
20-
constexpr int bw_kernel_block= 512;
21-
constexpr int bw_kernel_grid = 1024;
20+
constexpr int bw_kernel_block= 128;
21+
constexpr int bw_kernel_grid = 2048;
2222
#else
2323
constexpr int bw_kernel_block = 256;
2424
constexpr int bw_kernel_grid = 256;

include_rt_kernels/rte_solver_kernels_cuda_rt.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,25 @@
3232
namespace Rte_solver_kernels_cuda_rt
3333
{
3434
void apply_BC(
35-
const int ncol, const int nlay, const int ngpt, const Bool top_at_1,
35+
const int ncol, const int nlay, const Bool top_at_1,
3636
const Float* inc_flux_dir, const Float* mu0, Float* gpt_flux_dir);
3737

38-
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, Float* gpt_flux_dn);
38+
void apply_BC(const int ncol, const int nlay, const Bool top_at_1, Float* gpt_flux_dn);
3939

40-
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* inc_flux_dif, Float* gpt_flux_dn);
41-
42-
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float inc_flux, Float* gpt_flux_dn);
40+
void apply_BC(const int ncol, const int nlay, const Bool top_at_1, const Float* inc_flux_dif, Float* gpt_flux_dn);
41+
42+
void apply_BC(const int ncol, const int nlay, const Bool top_at_1, const Float inc_flux, Float* gpt_flux_dn);
4343

4444
void sw_solver_2stream(
45-
const int ncol, const int nlay, const int ngpt, const Bool top_at_1,
45+
const int ncol, const int nlay, const Bool top_at_1,
4646
const Float* tau, const Float* ssa, const Float* g,
4747
const Float* mu0, const Float* sfc_alb_dir, const Float* sfc_alb_dif,
4848
Float* flux_up, Float* flux_dn, Float* flux_dir);
4949

5050
void lw_solver_noscat_gaussquad(
51-
const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const int nmus,
51+
const int ncol, const int nlay, const Bool top_at_1, const int nmus,
5252
const Float* ds, const Float* weights, const Float* tau, const Float* lay_source,
53-
const Float* lev_source_inc, const Float* lev_source_dec, const Float* sfc_emis,
53+
const Float* lev_source, const Float* sfc_emis,
5454
const Float* sfc_src, Float* flux_up, Float* flux_dn,
5555
const Float* sfc_src_jac, Float* flux_up_jac);
5656
}

include_test/Radiation_solver_rt.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,26 @@ class Radiation_solver_longwave
4545
#ifdef __CUDACC__
4646
void solve_gpu(
4747
const bool switch_fluxes,
48+
const bool switch_raytracing,
4849
const bool switch_cloud_optics,
50+
const bool switch_aerosol_optics,
4951
const bool switch_single_gpt,
5052
const int single_gpt,
53+
const Int ray_count,
54+
const Vector<int> grid_cells,
55+
const Vector<Float> grid_d,
56+
const Vector<int> kn_grid,
5157
const Gas_concs_gpu& gas_concs,
58+
Aerosol_concs_gpu& aerosol_concs,
5259
const Array_gpu<Float,2>& p_lay, const Array_gpu<Float,2>& p_lev,
5360
const Array_gpu<Float,2>& t_lay, const Array_gpu<Float,2>& t_lev,
5461
Array_gpu<Float,2>& col_dry,
5562
const Array_gpu<Float,1>& t_sfc, const Array_gpu<Float,2>& emis_sfc,
5663
const Array_gpu<Float,2>& lwp, const Array_gpu<Float,2>& iwp,
5764
const Array_gpu<Float,2>& rel, const Array_gpu<Float,2>& dei,
58-
Array_gpu<Float,2>& tau, Array_gpu<Float,2>& lay_source,
59-
Array_gpu<Float,2>& lev_source_inc, Array_gpu<Float,2>& lev_source_dec, Array_gpu<Float,1>& sfc_source,
65+
const Array_gpu<Float,2>& rh,
66+
Array_gpu<Float,2>& tot_tau_out, Array_gpu<Float,2>& cld_tau_out, Array_gpu<Float,2>& lay_source,
67+
Array_gpu<Float,2>& lev_source, Array_gpu<Float,1>& sfc_source,
6068
Array_gpu<Float,2>& lw_flux_up, Array_gpu<Float,2>& lw_flux_dn, Array_gpu<Float,2>& lw_flux_net,
6169
Array_gpu<Float,2>& lw_gpt_flux_up, Array_gpu<Float,2>& lw_gpt_flux_dn, Array_gpu<Float,2>& lw_gpt_flux_net);
6270

@@ -74,13 +82,16 @@ class Radiation_solver_longwave
7482
#ifdef __CUDACC__
7583
std::unique_ptr<Gas_optics_rrtmgp_rt> kdist_gpu;
7684
std::unique_ptr<Cloud_optics_rt> cloud_optics_gpu;
85+
std::unique_ptr<Aerosol_optics_rt> aerosol_optics_gpu;
7786
Rte_lw_rt rte_lw;
7887

7988
std::unique_ptr<Optical_props_arry_rt> optical_props;
8089

8190
std::unique_ptr<Source_func_lw_rt> sources;
8291

8392
std::unique_ptr<Optical_props_1scl_rt> cloud_optical_props;
93+
94+
std::unique_ptr<Optical_props_1scl_rt> aerosol_optical_props;
8495
#endif
8596
};
8697

@@ -124,7 +135,7 @@ class Radiation_solver_shortwave
124135
const Array_gpu<Float,2>& lwp, const Array_gpu<Float,2>& iwp,
125136
const Array_gpu<Float,2>& rel, const Array_gpu<Float,2>& dei,
126137
const Array_gpu<Float,2>& rh,
127-
const Aerosol_concs_gpu& aerosol_concs,
138+
Aerosol_concs_gpu& aerosol_concs,
128139
Array_gpu<Float,2>& tot_tau_out, Array_gpu<Float,2>& tot_ssa_out,
129140
Array_gpu<Float,2>& cld_tau_out, Array_gpu<Float,2>& cld_ssa_out, Array_gpu<Float,2>& cld_asy_out,
130141
Array_gpu<Float,2>& aer_tau_out, Array_gpu<Float,2>& aer_ssa_out, Array_gpu<Float,2>& aer_asy_out,

0 commit comments

Comments
 (0)