Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from devito.operator.operator import rcompile
from devito.passes import stream_dimensions
from devito.passes.clusters import (
Lift, blocking, buffering, cire, cse, factorize, fission, fuse, optimize_hyperplanes,
optimize_pows
Lift, apply_par_tiles, blocking, buffering, cire, cse, factorize, fission, fuse,
optimize_hyperplanes, optimize_pows
)
from devito.passes.equations import collect_derivatives
from devito.passes.iet import (
Expand Down Expand Up @@ -67,6 +67,7 @@ def _normalize_kwargs(cls, **kwargs):
reduce=oo.pop('par-tile-reduce', None))

# CIRE
o['cire-block-temps'] = oo.pop('cire-block-temps', cls.CIRE_BLOCK_TEMPS)
o['min-storage'] = oo.pop('min-storage', False)
o['cire-rotate'] = oo.pop('cire-rotate', False)
o['cire-maxpar'] = oo.pop('cire-maxpar', False)
Expand Down Expand Up @@ -198,6 +199,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
if options['blocklazy']:
clusters = blocking(clusters, sregistry, options)

# Unfold the `par-tile`s, if any
clusters = apply_par_tiles(clusters, **kwargs)

return clusters

@classmethod
Expand Down
12 changes: 9 additions & 3 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from devito.operator.operator import rcompile
from devito.passes import is_on_device, stream_dimensions
from devito.passes.clusters import (
Lift, blocking, buffering, cire, cse, factorize, fission, fuse, memcpy_prefetch,
optimize_pows, tasking
Lift, apply_par_tiles, blocking, buffering, cire, cse, factorize, fission, fuse,
memcpy_prefetch, optimize_pows, tasking
)
from devito.passes.equations import collect_derivatives
from devito.passes.iet import (
Expand Down Expand Up @@ -38,7 +38,9 @@

class DeviceOperatorMixin:

# Overrides the default values in the main Operator class
BLOCK_LEVELS = 0
CIRE_BLOCK_TEMPS = False

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it intended to be a default or enforced?

MPI_MODES = (True, 'basic',)

GPU_FIT = 'all-fallback'
Expand Down Expand Up @@ -76,9 +78,10 @@ def _normalize_kwargs(cls, **kwargs):
o['skewing'] = oo.pop('skewing', False)

# CIRE
o['cire-block-temps'] = oo.pop('cire-block-temps', cls.CIRE_BLOCK_TEMPS)
o['min-storage'] = False
o['cire-rotate'] = False
o['cire-maxpar'] = oo.pop('cire-maxpar', True)
o['cire-maxpar'] = oo.pop('cire-maxpar', 'basic')
o['cire-ftemps'] = oo.pop('cire-ftemps', False)
o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
Expand Down Expand Up @@ -239,6 +242,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
if options['blocklazy']:
clusters = blocking(clusters, sregistry, options)

# Unfold the `par-tile`s, if any
clusters = apply_par_tiles(clusters, **kwargs)

return clusters

@classmethod
Expand Down
10 changes: 10 additions & 0 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ class BasicOperator(Operator):
situations where the performance impact might be detrimental.
"""

CIRE_BLOCK_TEMPS = True
"""
If an aliasing expression is computed within a blocked loop nest, all CIRE-
generated temporaries will inherit the block shape. If set to False, the
temporaries shape will systematically be defined by the root Dimensions.
"""

CIRE_MINGAIN = 10
"""
Minimum operation count reduction for a redundant expression to be optimized
Expand Down Expand Up @@ -240,6 +247,9 @@ def _check_kwargs(cls, **kwargs):
if oo['mpi'] and oo['mpi'] not in cls.MPI_MODES:
raise InvalidOperator(f"Unsupported MPI mode `{oo['mpi']}`")

if oo['cire-maxpar'] not in (False, 'basic', 'compact'):
raise InvalidOperator("Illegal `cire-maxpar` value")

if oo['cse-algo'] not in ('basic', 'smartsort', 'advanced'):
raise InvalidOperator("Illegal `cse-algo` value")

Expand Down
12 changes: 10 additions & 2 deletions devito/finite_differences/differentiable.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,9 @@ def value(self, idx):
class IndexDerivative(IndexSum):

__rargs__ = ('expr', 'mapper')
__rkwargs__ = IndexSum.__rkwargs__ + ('deriv_order',)

def __new__(cls, expr, mapper, **kwargs):
def __new__(cls, expr, mapper, deriv_order=None, **kwargs):
dimensions = as_tuple(set(mapper.values()))

# Detect the Weights among the arguments
Expand All @@ -1008,6 +1009,8 @@ def __new__(cls, expr, mapper, **kwargs):
obj._weights = weights
obj._mapper = frozendict(mapper)

obj._deriv_order = deriv_order

return obj

def _hashable_content(self):
Expand Down Expand Up @@ -1036,6 +1039,10 @@ def weights(self):
def mapper(self):
return self._mapper

@property
def deriv_order(self):
return self._deriv_order

@property
def depth(self):
iderivs = self.expr.find(IndexDerivative)
Expand Down Expand Up @@ -1212,7 +1219,8 @@ def _diff2sympy(obj):

# Handle special objects
if isinstance(obj, DiffDerivative):
return IndexDerivative(*args, obj.mapper), True
return IndexDerivative(*args, obj.mapper,
deriv_order=obj.deriv_order), True

# Handle generic objects such as arithmetic operations
try:
Expand Down
4 changes: 3 additions & 1 deletion devito/finite_differences/finite_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ def make_derivative(expr, dim, fd_order, deriv_order, side, matvec, x0, coeffici
with suppress(AttributeError):
expr = expr._evaluate(expand=False)

deriv = DiffDerivative(expr*weights, {dim: indices.free_dim})
deriv = DiffDerivative(
expr*weights, {dim: indices.free_dim}, deriv_order=deriv_order
)
else:
terms = []
for i, c in zip(indices, weights, strict=True):
Expand Down
37 changes: 36 additions & 1 deletion devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from devito.mpi.halo_scheme import HaloScheme, HaloTouch
from devito.mpi.reduction_scheme import DistReduce
from devito.symbolics import estimate_cost
from devito.symbolics import estimate_cost, uxreplace
from devito.tools import as_tuple, filter_ordered, flatten, infer_dtype
from devito.types import (
CriticalRegion, Fence, Indexed, PhaseMarker, TensorMove, ThreadArrive, ThreadCommit,
Expand Down Expand Up @@ -128,6 +128,33 @@ def rebuild(self, *args, **kwargs):
syncs=kwargs.get('syncs', self.syncs),
halo_scheme=kwargs.get('halo_scheme', self.halo_scheme))

def subs(self, mapper, compact=()):
"""
Build a new Cluster applying substitutions rules to `self`.
"""
if not mapper:
return self

if self.halo_scheme:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can probably be relaxed to self.halo_scheme.distributed_aindices & mapper.keys()

raise NotImplementedError

key0 = lambda i: i.is_Block
subs0 = {d: self.ispace[d].promote(key0).dim for d in compact}

subs = {**mapper, **subs0}
exprs = [uxreplace(e, subs) for e in self.exprs]

ispace = self.ispace.switch(mapper)
key = lambda i: key0(i) and i in flatten(d._defines for d in subs0)
ispace = ispace.promote(key, mode='total')

guards = self.guards.subs(mapper).promote(subs0)
properties = self.properties.subs(mapper).promote(subs0)
syncs = self.syncs.subs(mapper)

return self.__class__(exprs=exprs, ispace=ispace, guards=guards,
properties=properties, syncs=syncs)

@property
def exprs(self):
return self._exprs
Expand Down Expand Up @@ -591,6 +618,14 @@ def dspace(self):
"""Return the DataSpace of this ClusterGroup."""
return DataSpace.union(*[i.dspace.reset() for i in self])

@property

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cached?

def is_dense(self):
return all(i.is_dense for i in self)

@property
def is_wild(self):
return all(i.is_wild for i in self)

@property
def is_halo_touch(self):
return all(i.is_halo_touch for i in self)
Expand Down
21 changes: 12 additions & 9 deletions devito/ir/clusters/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,7 @@ def _make_key(self, cluster, level):
assert self._q_ispace_in_key
ispace = cluster.ispace[:level]

if self._q_guards_in_key:
try:
guards = tuple(cluster.guards.get(i.dim) for i in ispace)
except AttributeError:
# `cluster` is actually a ClusterGroup
assert len(cluster.guards) == 1
guards = tuple(cluster.guards[0].get(i.dim) for i in ispace)
else:
guards = None
guards = self._make_key_guards(cluster, ispace)

if self._q_properties_in_key:
properties = cluster.properties.drop(cluster.ispace[level:].itdims)
Expand All @@ -68,6 +60,17 @@ def _make_key(self, cluster, level):

return (prefix,) + subkey

def _make_key_guards(self, cluster, ispace):
if not self._q_guards_in_key:
return None

try:
return tuple(cluster.guards.get(i.dim) for i in ispace)
except AttributeError:
# `cluster` is actually a ClusterGroup
assert len(cluster.guards) == 1
return tuple(cluster.guards[0].get(i.dim) for i in ispace)

def _make_key_hook(self, cluster, level):
return ()

Expand Down
30 changes: 21 additions & 9 deletions devito/ir/support/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
)
from devito.tools import (
CacheInstances, Tag, as_mapper, as_tuple, filter_sorted, flatten, is_integer,
memoized_generator, memoized_meth, smart_gt, smart_lt
memoized_generator, memoized_meth, smart_gt, smart_lt, split
)
from devito.types import (
ComponentAccess, CriticalRegion, Dimension, DimensionTuple, Fence, Function, Symbol,
TBArray, Temp, TempArray
TBArray, Temp, TempArray, TensorMove
)

__all__ = ['ExprGeometry', 'IterationInstance', 'Scope', 'TimedAccess']
Expand Down Expand Up @@ -1383,19 +1383,31 @@ def vinf(entries):

def retrieve_accesses(exprs, **kwargs):
"""
Like retrieve_terminals, but ensure that if a ComponentAccess is found,
the ComponentAccess itself is returned, while the wrapped Indexed is discarded.
Similar to `retrieve_terminals`, but with some adjustments:

* ComponentAccess's are retained, but the wrapped Indexed are discarded;
* TensorMove's are upcasted to the logical Indexed they represent.
"""
kwargs['mode'] = 'unique'

compaccs = search(exprs, ComponentAccess)
if not compaccs:
return retrieve_terminals(exprs, **kwargs)

subs = {i: Symbol(f'dummy{n}') for n, i in enumerate(compaccs)}
exprs1 = uxreplace(exprs, subs)
if compaccs:
# Handle ComponentAccesses
subs = {i: Symbol(f'dummy{n}') for n, i in enumerate(compaccs)}
exprs1 = uxreplace(exprs, subs)
terms1 = retrieve_terminals(exprs1, **kwargs)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would likely be "cheaper" to have an ignore=compaccs in retrieve_terminals


accesses = compaccs | terms1 - set(subs.values())
else:
accesses = retrieve_terminals(exprs, **kwargs)

# Handle TensorMoves
key = lambda i: isinstance(i, TensorMove)
tmovs, other = split(accesses, key)
accesses = {i.access for i in tmovs} | other

return compaccs | retrieve_terminals(exprs1, **kwargs) - set(subs.values())
return accesses


def disjoint_test(e0, e1, d, it):
Expand Down
25 changes: 25 additions & 0 deletions devito/ir/support/guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,31 @@ def as_map(self, d, cls):

return dict(i.args for i in search(self.get(d), cls))

def subs(self, mapper):
m = {mapper.get(d, d): v.xreplace(mapper) for d, v in self.items()}

return Guards(m)

def promote(self, subs):
m = self
for d, v in subs.items():
guards = {self.get(i) for i in d._defines} - {true}
if len(guards) > 1:
raise NotImplementedError(
f"Cannot promote {d} to {v} due to multiple guards: {guards}"
)
elif len(guards) == 0:
continue

guard = guards.pop()
guard = guard.xreplace({d: v})

m = m.impose(v, guard)

m = m.popany(subs)

return m


class GuardExpr(LocalObject, BooleanFunction):

Expand Down
Loading
Loading