Skip to content

Commit e01fbd1

Browse files
authored
Merge pull request #3831 from KratosMultiphysics/core/amgcl-test-opencl
[core] Test GPGPU (OpenCL/CUDA) backend with AMGCL solver
2 parents 169a21a + 22f0b38 commit e01fbd1

213 files changed

Lines changed: 62327 additions & 297 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

applications/trilinos_application/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ set( KRATOS_TRILINOS_APPLICATION_SOURCES
5050
${CMAKE_CURRENT_SOURCE_DIR}/custom_factories/trilinos_linear_solver_factory.cpp
5151
${CMAKE_SOURCE_DIR}/applications/FluidDynamicsApplication/fluid_dynamics_application_variables.cpp #TODO: this should REALLY NOT BE HERE
5252
${CMAKE_CURRENT_SOURCE_DIR}/custom_utilities/mpi_normal_calculation_utilities.cpp;
53+
${CMAKE_CURRENT_SOURCE_DIR}/amgcl_mpi_solver_impl.cpp;
5354
)
5455

5556
## Kratos tests sources. Enabled by default
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
2+
#define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
3+
4+
#include <boost/range/iterator_range.hpp>
5+
#include <boost/property_tree/ptree.hpp>
6+
7+
#include <amgcl/adapter/crs_tuple.hpp>
8+
#include <amgcl/adapter/epetra.hpp>
9+
#include <amgcl/adapter/ublas.hpp>
10+
#include <amgcl/adapter/zero_copy.hpp>
11+
#include <amgcl/adapter/block_matrix.hpp>
12+
#include <amgcl/backend/builtin.hpp>
13+
#include <amgcl/value_type/static_matrix.hpp>
14+
#include <amgcl/solver/runtime.hpp>
15+
16+
#include <amgcl/mpi/util.hpp>
17+
#include <amgcl/mpi/make_solver.hpp>
18+
#include <amgcl/mpi/preconditioner.hpp>
19+
20+
#ifdef AMGCL_GPGPU
21+
# include <amgcl/backend/vexcl.hpp>
22+
# include <amgcl/backend/vexcl_static_matrix.hpp>
23+
#endif
24+
25+
#include "Epetra_FECrsMatrix.h"
26+
#include "Epetra_FEVector.h"
27+
#include "trilinos_space.h"
28+
29+
#include "external_includes/amgcl_mpi_solve_functions.h"
30+
31+
namespace Kratos
32+
{
33+
34+
#ifdef AMGCL_GPGPU
35+
vex::Context& vexcl_context();
36+
37+
template <int TBlockSize>
38+
void register_vexcl_static_matrix_type();
39+
#endif
40+
41+
// Spacialization of AMGCLScalarSolve for distribued systems.
42+
void AMGCLScalarSolve(
43+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA,
44+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,
45+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,
46+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,
47+
double& rResidual,
48+
const boost::property_tree::ptree &amgclParams,
49+
int verbosity_level,
50+
bool use_gpgpu
51+
)
52+
{
53+
#ifdef AMGCL_GPGPU
54+
if (use_gpgpu && vexcl_context()) {
55+
auto &ctx = vexcl_context();
56+
57+
typedef amgcl::backend::vexcl<double> Backend;
58+
59+
typedef
60+
amgcl::mpi::make_solver<
61+
amgcl::runtime::mpi::preconditioner<Backend>,
62+
amgcl::runtime::solver::wrapper
63+
>
64+
Solver;
65+
66+
Backend::params bprm;
67+
bprm.q = ctx;
68+
69+
Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams, bprm);
70+
71+
std::size_t n = rA.NumMyRows();
72+
73+
vex::vector<double> b(ctx, n, rB.Values());
74+
vex::vector<double> x(ctx, n, rX.Values());
75+
76+
std::tie(rIterationNumber, rResidual) = solve(b, x);
77+
78+
vex::copy(x.begin(), x.end(), rX.Values());
79+
} else
80+
#endif
81+
{
82+
typedef amgcl::backend::builtin<double> Backend;
83+
84+
typedef
85+
amgcl::mpi::make_solver<
86+
amgcl::runtime::mpi::preconditioner<Backend>,
87+
amgcl::runtime::solver::wrapper
88+
>
89+
Solver;
90+
91+
Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams);
92+
93+
std::size_t n = rA.NumMyRows();
94+
95+
auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n);
96+
auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n);
97+
98+
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
99+
}
100+
}
101+
102+
// Spacialization of AMGCLBlockSolve for distribued systems.
103+
template <int TBlockSize>
104+
void AMGCLBlockSolve(
105+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType & rA,
106+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,
107+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,
108+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,
109+
double& rResidual,
110+
boost::property_tree::ptree amgclParams,
111+
int verbosity_level,
112+
bool use_gpgpu
113+
)
114+
{
115+
if(amgclParams.get<std::string>("precond.class") != "amg")
116+
amgclParams.erase("precond.coarsening");
117+
else
118+
amgclParams.put("precond.coarsening.aggr.block_size",1);
119+
120+
typedef amgcl::static_matrix<double, TBlockSize, TBlockSize> val_type;
121+
typedef amgcl::static_matrix<double, TBlockSize, 1> rhs_type;
122+
123+
std::size_t n = rA.RowMap().NumMyElements();
124+
std::size_t nb = n / TBlockSize;
125+
126+
#ifdef AMGCL_GPGPU
127+
if (use_gpgpu && vexcl_context()) {
128+
auto &ctx = vexcl_context();
129+
register_vexcl_static_matrix_type<TBlockSize>();
130+
131+
typedef amgcl::backend::vexcl<val_type> Backend;
132+
133+
typedef
134+
amgcl::mpi::make_solver<
135+
amgcl::runtime::mpi::preconditioner<Backend>,
136+
amgcl::runtime::solver::wrapper
137+
>
138+
Solver;
139+
140+
typename Backend::params bprm;
141+
bprm.q = ctx;
142+
143+
Solver solve(
144+
MPI_COMM_WORLD,
145+
amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
146+
amgclParams, bprm
147+
);
148+
149+
auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
150+
auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
151+
152+
vex::vector<rhs_type> x(ctx, nb, x_begin);
153+
vex::vector<rhs_type> b(ctx, nb, b_begin);
154+
155+
std::tie(rIterationNumber, rResidual) = solve(b, x);
156+
157+
vex::copy(x.begin(), x.end(), x_begin);
158+
} else
159+
#endif
160+
{
161+
typedef amgcl::backend::builtin<val_type> Backend;
162+
163+
typedef
164+
amgcl::mpi::make_solver<
165+
amgcl::runtime::mpi::preconditioner<Backend>,
166+
amgcl::runtime::solver::wrapper
167+
>
168+
Solver;
169+
170+
Solver solve(
171+
MPI_COMM_WORLD,
172+
amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
173+
amgclParams
174+
);
175+
176+
auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
177+
auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
178+
179+
auto b_range = boost::make_iterator_range(b_begin, b_begin + nb);
180+
auto x_range = boost::make_iterator_range(x_begin, x_begin + nb);
181+
182+
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
183+
}
184+
}
185+
186+
void AMGCLSolve(
187+
int block_size,
188+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA,
189+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,
190+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,
191+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,
192+
double& rResidual,
193+
const boost::property_tree::ptree &amgclParams,
194+
int verbosity_level,
195+
bool use_gpgpu
196+
)
197+
{
198+
switch (block_size) {
199+
case 2:
200+
AMGCLBlockSolve<2>(rA, rX, rB, rIterationNumber, rResidual, amgclParams, verbosity_level, use_gpgpu);
201+
return;
202+
case 3:
203+
AMGCLBlockSolve<3>(rA, rX, rB, rIterationNumber, rResidual, amgclParams, verbosity_level, use_gpgpu);
204+
return;
205+
case 4:
206+
AMGCLBlockSolve<4>(rA, rX, rB, rIterationNumber, rResidual, amgclParams, verbosity_level, use_gpgpu);
207+
return;
208+
default:
209+
AMGCLScalarSolve(rA, rX, rB, rIterationNumber, rResidual, amgclParams, verbosity_level, use_gpgpu);
210+
return;
211+
}
212+
}
213+
214+
} // namespace Kratos
215+
216+
#endif

applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h

Lines changed: 10 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,22 @@
11
#ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
22
#define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
33

4-
#include <boost/range/iterator_range.hpp>
5-
6-
#include <amgcl/adapter/crs_tuple.hpp>
7-
#include <amgcl/adapter/epetra.hpp>
8-
#include <amgcl/adapter/ublas.hpp>
9-
#include <amgcl/adapter/zero_copy.hpp>
10-
#include <amgcl/adapter/block_matrix.hpp>
11-
#include <amgcl/backend/builtin.hpp>
12-
#include <amgcl/value_type/static_matrix.hpp>
13-
#include <amgcl/solver/runtime.hpp>
14-
15-
#include <amgcl/mpi/util.hpp>
16-
#include <amgcl/mpi/make_solver.hpp>
17-
#include <amgcl/mpi/preconditioner.hpp>
4+
#include <boost/property_tree/ptree.hpp>
185

196
namespace Kratos
207
{
218

22-
// Spacialization of AMGCLScalarSolve for distribued systems.
23-
template <class TSparseSpaceType>
24-
typename std::enable_if<TSparseSpaceType::IsDistributed(), void>::type
25-
AMGCLScalarSolve(
26-
typename TSparseSpaceType::MatrixType& rA,
27-
typename TSparseSpaceType::VectorType& rX,
28-
typename TSparseSpaceType::VectorType& rB,
29-
typename TSparseSpaceType::IndexType& rIterationNumber,
9+
void AMGCLSolve(
10+
int block_size,
11+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA,
12+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,
13+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,
14+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,
3015
double& rResidual,
3116
const boost::property_tree::ptree &amgclParams,
32-
int verbosity_level
33-
)
34-
{
35-
typedef amgcl::backend::builtin<double> Backend;
36-
37-
typedef
38-
amgcl::mpi::make_solver<
39-
amgcl::runtime::mpi::preconditioner<Backend>,
40-
amgcl::runtime::solver::wrapper
41-
>
42-
Solver;
43-
44-
Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams);
45-
46-
std::size_t n = rA.NumMyRows();
47-
48-
auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n);
49-
auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n);
50-
51-
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
52-
}
53-
54-
// Spacialization of AMGCLBlockSolve for distribued systems.
55-
template <int TBlockSize, class TSparseSpaceType>
56-
typename std::enable_if<TSparseSpaceType::IsDistributed(), void>::type
57-
AMGCLBlockSolve(
58-
typename TSparseSpaceType::MatrixType & rA,
59-
typename TSparseSpaceType::VectorType& rX,
60-
typename TSparseSpaceType::VectorType& rB,
61-
typename TSparseSpaceType::IndexType& rIterationNumber,
62-
double& rResidual,
63-
boost::property_tree::ptree amgclParams,
64-
int verbosity_level
65-
)
66-
{
67-
if(amgclParams.get<std::string>("precond.class") != "amg")
68-
amgclParams.erase("precond.coarsening");
69-
else
70-
amgclParams.put("precond.coarsening.aggr.block_size",1);
71-
72-
typedef amgcl::static_matrix<double, TBlockSize, TBlockSize> val_type;
73-
typedef amgcl::static_matrix<double, TBlockSize, 1> rhs_type;
74-
typedef amgcl::backend::builtin<val_type> Backend;
75-
76-
std::size_t n = rA.RowMap().NumMyElements();
77-
78-
typedef
79-
amgcl::mpi::make_solver<
80-
amgcl::runtime::mpi::preconditioner<Backend>,
81-
amgcl::runtime::solver::wrapper
82-
>
83-
Solver;
84-
85-
Solver solve(
86-
MPI_COMM_WORLD,
87-
amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
88-
amgclParams
89-
);
90-
91-
auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
92-
auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
93-
94-
auto b_range = boost::make_iterator_range(b_begin, b_begin + n / TBlockSize);
95-
auto x_range = boost::make_iterator_range(x_begin, x_begin + n / TBlockSize);
96-
97-
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
98-
}
17+
int verbosity_level,
18+
bool use_gpgpu
19+
);
9920

10021
} // namespace Kratos
10122

0 commit comments

Comments
 (0)