-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathsequential.py
More file actions
307 lines (262 loc) · 11.9 KB
/
sequential.py
File metadata and controls
307 lines (262 loc) · 11.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# This file is part of PyOP2
#
# PyOP2 is Copyright (c) 2012, Imperial College London and
# others. Please see the AUTHORS file in the main source directory for
# a full list of copyright holders. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * The name of Imperial College London or that of other
# contributors may not be used to endorse or promote products
# derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS
# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
"""OP2 sequential backend."""
import os
from copy import deepcopy as dcopy
import ctypes
import loopy
from pyop2.datatypes import IntType, as_ctypes
from pyop2 import base
from pyop2 import compilation
from pyop2 import petsc_base
from pyop2.base import par_loop # noqa: F401
from pyop2.base import READ, WRITE, RW, INC, MIN, MAX # noqa: F401
from pyop2.base import ALL
from pyop2.base import Map, MixedMap, Sparsity, Halo # noqa: F401
from pyop2.base import Set, ExtrudedSet, MixedSet, Subset # noqa: F401
from pyop2.base import DatView # noqa: F401
from pyop2.base import Kernel # noqa: F401
from pyop2.base import Arg # noqa: F401
from pyop2.petsc_base import DataSet, MixedDataSet # noqa: F401
from pyop2.petsc_base import Global, GlobalDataSet # noqa: F401
from pyop2.petsc_base import Dat, MixedDat, Mat # noqa: F401
from pyop2.exceptions import * # noqa: F401
from pyop2.mpi import collective
from pyop2.profiling import timed_region
from pyop2.utils import cached_property, get_petsc_dir
from pyop2.configuration import configuration
def vectorise(wrapper, iname, batch_size):
"""Return a vectorised version of wrapper, vectorising over iname.
:arg wrapper: A loopy kernel to vectorise.
:arg iname: The iteration index to vectorise over.
:arg batch_size: The vector width."""
if batch_size == 1:
return wrapper
wrapper = wrapper.copy(target=loopy.CVecTarget())
kernel = wrapper.root_kernel
# split iname and vectorize the inner loop
slabs = (1, 1)
inner_iname = iname + "_batch"
if configuration["vectorization_strategy"] == "ve":
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=slabs, inner_tag="vec", inner_iname=inner_iname)
alignment = configuration["alignment"]
tmps = dict((name, tv.copy(alignment=alignment)) for name, tv in kernel.temporary_variables.items())
kernel = kernel.copy(temporary_variables=tmps)
wrapper = wrapper.with_root_kernel(kernel)
return wrapper
class JITModule(base.JITModule):
_cppargs = []
_libraries = []
_system_headers = []
def __init__(self, kernel, iterset, *args, **kwargs):
r"""
A cached compiled function to execute for a specified par_loop.
See :func:`~.par_loop` for the description of arguments.
.. warning ::
Note to implementors. This object is *cached*, and therefore
should not hold any long term references to objects that
you want to be collected. In particular, after the
``args`` have been inspected to produce the compiled code,
they **must not** remain part of the object's slots,
otherwise they (and the :class:`~.Dat`\s, :class:`~.Map`\s
and :class:`~.Mat`\s they reference) will never be collected.
"""
# Return early if we were in the cache.
if self._initialized:
return
self.comm = iterset.comm
self._kernel = kernel
self._fun = None
self._iterset = iterset
self._args = args
self._iteration_region = kwargs.get('iterate', ALL)
self._pass_layer_arg = kwargs.get('pass_layer_arg', False)
# Copy the class variables, so we don't overwrite them
self._cppargs = dcopy(type(self)._cppargs)
self._libraries = dcopy(type(self)._libraries)
self._system_headers = dcopy(type(self)._system_headers)
if not kwargs.get('delay', False):
self.compile()
self._initialized = True
@collective
def __call__(self, *args):
return self._fun(*args)
@cached_property
def _wrapper_name(self):
return 'wrap_%s' % self._kernel.name
@cached_property
def code_to_compile(self):
from pyop2.codegen.builder import WrapperBuilder
from pyop2.codegen.rep2loopy import generate
builder = WrapperBuilder(kernel=self._kernel,
iterset=self._iterset,
iteration_region=self._iteration_region,
pass_layer_to_kernel=self._pass_layer_arg)
for arg in self._args:
builder.add_argument(arg)
wrapper = generate(builder)
if self._iterset._extruded:
iname = "layer"
else:
iname = "n"
has_matrix = any(arg._is_mat for arg in self._args)
has_rw = any(arg.access == RW for arg in self._args)
is_cplx = any(arg.dtype.name == 'complex128' for arg in self._args)
vectorisable = not (has_matrix or has_rw) and (configuration["vectorization_strategy"])
if (isinstance(self._kernel.code, loopy.LoopKernel) and vectorisable):
wrapper = loopy.inline_callable_kernel(wrapper, self._kernel.name)
if not is_cplx:
wrapper = vectorise(wrapper, iname, configuration["simd_width"])
code = loopy.generate_code_v2(wrapper)
if self._kernel._cpp:
from loopy.codegen.result import process_preambles
preamble = "".join(process_preambles(getattr(code, "device_preambles", [])))
device_code = "\n\n".join(str(dp.ast) for dp in code.device_programs)
return preamble + "\nextern \"C\" {\n" + device_code + "\n}\n"
return code.device_code()
@collective
def compile(self):
# If we weren't in the cache we /must/ have arguments
if not hasattr(self, '_args'):
raise RuntimeError("JITModule has no args associated with it, should never happen")
compiler = configuration["compiler"]
extension = "cpp" if self._kernel._cpp else "c"
cppargs = self._cppargs
cppargs += ["-I%s/include" % d for d in get_petsc_dir()] + \
["-I%s" % d for d in self._kernel._include_dirs] + \
["-I%s" % os.path.abspath(os.path.dirname(__file__))]
ldargs = ["-L%s/lib" % d for d in get_petsc_dir()] + \
["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + \
["-lpetsc", "-lm"] + self._libraries
ldargs += self._kernel._ldargs
self._fun = compilation.load(self,
extension,
self._wrapper_name,
cppargs=cppargs,
ldargs=ldargs,
restype=ctypes.c_int,
compiler=compiler,
comm=self.comm)
# Blow away everything we don't need any more
del self._args
del self._kernel
del self._iterset
@cached_property
def argtypes(self):
index_type = as_ctypes(IntType)
argtypes = (index_type, index_type)
argtypes += self._iterset._argtypes_
for arg in self._args:
argtypes += arg._argtypes_
seen = set()
for arg in self._args:
maps = arg.map_tuple
for map_ in maps:
for k, t in zip(map_._kernel_args_, map_._argtypes_):
if k in seen:
continue
argtypes += (t,)
seen.add(k)
return argtypes
class ParLoop(petsc_base.ParLoop):
def set_nbytes(self, args):
nbytes = 0
seen = set()
for arg in args:
if arg.access is INC:
nbytes += arg.data.nbytes * 2
else:
nbytes += arg.data.nbytes
for map_ in arg.map_tuple:
if map_ is None:
continue
for k in map_._kernel_args_:
if k in seen:
continue
nbytes += map_.values.nbytes
seen.add(k)
self.nbytes = nbytes
def prepare_arglist(self, iterset, *args):
arglist = iterset._kernel_args_
for arg in args:
arglist += arg._kernel_args_
seen = set()
for arg in args:
maps = arg.map_tuple
for map_ in maps:
if map_ is None:
continue
for k in map_._kernel_args_:
if k in seen:
continue
arglist += (k,)
seen.add(k)
return arglist
@cached_property
def _jitmodule(self):
return JITModule(self.kernel, self.iterset, *self.args,
iterate=self.iteration_region,
pass_layer_arg=self._pass_layer_arg)
@cached_property
def _compute_event(self):
return timed_region("ParLoop_{0}_{1}".format(self.iterset.name, self._jitmodule._wrapper_name))
@collective
def _compute(self, part, fun, *arglist):
with self._compute_event:
self.log_flops(part.size * self.num_flops)
fun(part.offset, part.offset + part.size, *arglist)
def generate_single_cell_wrapper(iterset, args, forward_args=(), kernel_name=None, wrapper_name=None):
"""Generates wrapper for a single cell. No iteration loop, but cellwise data is extracted.
Cell is expected as an argument to the wrapper. For extruded, the numbering of the cells
is columnwise continuous, bottom to top.
:param iterset: The iteration set
:param args: :class:`Arg`s
:param forward_args: To forward unprocessed arguments to the kernel via the wrapper,
give an iterable of strings describing their C types.
:param kernel_name: Kernel function name
:param wrapper_name: Wrapper function name
:return: string containing the C code for the single-cell wrapper
"""
from pyop2.codegen.builder import WrapperBuilder
from pyop2.codegen.rep2loopy import generate
from loopy.types import OpaqueType
forward_arg_types = [OpaqueType(fa) for fa in forward_args]
empty_kernel = Kernel("", kernel_name)
builder = WrapperBuilder(kernel=empty_kernel,
iterset=iterset, single_cell=True,
forward_arg_types=forward_arg_types)
for arg in args:
builder.add_argument(arg)
wrapper = generate(builder, wrapper_name)
code = loopy.generate_code_v2(wrapper)
return code.device_code()