Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ jobs:
run: |
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
# if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "fft" ]]; then
# DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@pyvkfft
# fi
if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "e2p" ]]; then
DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@e2p
fi
test_downstream "$DOWNSTREAM_PROJECT"

# vim: sw=4
6 changes: 4 additions & 2 deletions sumpy/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,11 @@ def generate_preambles(self, target):

def register_bessel_callables(loopy_knl):
from sumpy.codegen import BesselJvvp1, Hankel1_01
loopy_knl = lp.register_callable(loopy_knl, "bessel_jvvp1",
if "bessel_jvvp1" not in loopy_knl.callables_table:
loopy_knl = lp.register_callable(loopy_knl, "bessel_jvvp1",
BesselJvvp1("bessel_jvvp1"))
loopy_knl = lp.register_callable(loopy_knl, "hank1_01",
if "hank1_01" not in loopy_knl.callables_table:
loopy_knl = lp.register_callable(loopy_knl, "hank1_01",
Hankel1_01("hank1_01"))
return loopy_knl

Expand Down
175 changes: 84 additions & 91 deletions sumpy/e2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@

import numpy as np
import loopy as lp
import sumpy.symbolic as sym

from sumpy.tools import KernelCacheMixin
from sumpy.tools import KernelCacheMixin, gather_loopy_arguments
from loopy.version import MOST_RECENT_LANGUAGE_VERSION


Expand Down Expand Up @@ -82,55 +81,32 @@ def __init__(self, ctx, expansion, kernels,
def default_name(self):
pass

def get_loopy_insns_and_result_names(self):
from sumpy.symbolic import make_sym_vector
bvec = make_sym_vector("b", self.dim)

import sumpy.symbolic as sp
rscale = sp.Symbol("rscale")

from sumpy.assignment_collection import SymbolicAssignmentCollection
sac = SymbolicAssignmentCollection()

coeff_exprs = [
sym.Symbol(f"coeff{i}")
for i in range(len(self.expansion.get_coefficient_identifiers()))]

result_names = [
sac.assign_unique(f"result_{i}_p",
self.expansion.evaluate(knl, coeff_exprs, bvec, rscale, sac=sac))
for i, knl in enumerate(self.kernels)
]

sac.run_global_cse()
def get_cache_key(self):
return (type(self).__name__, self.expansion, tuple(self.kernels))

from sumpy.codegen import to_loopy_insns
loopy_insns = to_loopy_insns(
sac.assignments.items(),
vector_names={"b"},
pymbolic_expr_maps=[
knl.get_code_transformer() for knl in self.kernels],
retain_names=result_names,
complex_dtype=np.complex128 # FIXME
)
def add_loopy_eval_callable(
self, loopy_knl: lp.TranslationUnit) -> lp.TranslationUnit:
inner_knl = self.expansion.get_loopy_evaluator(self.kernels)
loopy_knl = lp.merge([loopy_knl, inner_knl])
loopy_knl = lp.inline_callable_kernel(loopy_knl, "e2p")
loopy_knl = lp.remove_unused_inames(loopy_knl)
for kernel in self.kernels:
loopy_knl = kernel.prepare_loopy_kernel(loopy_knl)
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")
return loopy_knl

return loopy_insns, result_names
def get_loopy_args(self):
return gather_loopy_arguments((self.expansion,) + tuple(self.kernels))

def get_kernel_scaling_assignment(self):
from sumpy.symbolic import SympyToPymbolicMapper
from sumpy.tools import ScalingAssignmentTag
sympy_conv = SympyToPymbolicMapper()
return [lp.Assignment(id=None,
return [lp.Assignment(id="kernel_scaling",
assignee="kernel_scaling",
expression=sympy_conv(
self.expansion.kernel.get_global_scaling_const()),
temp_var_type=lp.Optional(None),
tags=frozenset([ScalingAssignmentTag()]),
)]

def get_cache_key(self):
return (type(self).__name__, self.expansion, tuple(self.kernels))

# }}}


Expand All @@ -143,14 +119,15 @@ def default_name(self):

def get_kernel(self):
ncoeffs = len(self.expansion)

loopy_insns, result_names = self.get_loopy_insns_and_result_names()
loopy_args = self.get_loopy_args()

loopy_knl = lp.make_kernel(
[
"{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
"{[itgt,idim]: itgt_start<=itgt<itgt_end and 0<=idim<dim}",
],
"{[icoeff]: 0<=icoeff<ncoeffs}",
"{[iknl]: 0<=iknl<nresults}",
],
self.get_kernel_scaling_assignment()
+ ["""
for itgt_box
Expand All @@ -160,27 +137,32 @@ def get_kernel(self):

<> center[idim] = centers[idim, tgt_ibox] {id=fetch_center}

"""] + ["""
<> coeff{coeffidx} = \
src_expansions[tgt_ibox - src_base_ibox, {coeffidx}]
""".format(coeffidx=i) for i in range(ncoeffs)] + ["""
<> coeffs[icoeff] = \
src_expansions[tgt_ibox - src_base_ibox, icoeff] \
{id=fetch_coeffs}

for itgt
<> b[idim] = targets[idim, itgt] - center[idim] {dup=idim}

"""] + loopy_insns + ["""

result[{resultidx},itgt] = \
kernel_scaling * result_{resultidx}_p \
{{id_prefix=write_result}}
""".format(resultidx=i) for i in range(len(result_names))
] + ["""
<> tgt[idim] = targets[idim, itgt] {id=fetch_tgt,dup=idim}
<> result_temp[iknl] = 0 {id=init_result,dup=iknl}
[iknl]: result_temp[iknl] = e2p(
[iknl]: result_temp[iknl],
[icoeff]: coeffs[icoeff],
[idim]: center[idim],
[idim]: tgt[idim],
rscale,
itgt,
ntargets,
targets,
""" + ",".join(arg.name for arg in loopy_args) + """
) {dep=fetch_coeffs:fetch_center:init_result:fetch_tgt,\
id=update_result}
result[iknl, itgt] = result_temp[iknl] * kernel_scaling \
{id=write_result,dep=update_result}
end
end
"""],
[
lp.GlobalArg("targets", None, shape=(self.dim, "ntargets"),
dim_tags="sep,C"),
lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")),
lp.GlobalArg("box_target_starts,box_target_counts_nonchild",
None, shape=None),
lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
Expand All @@ -192,26 +174,28 @@ def get_kernel(self):
lp.ValueArg("nsrc_level_boxes,naligned_boxes", np.int32),
lp.ValueArg("src_base_ibox", np.int32),
lp.ValueArg("ntargets", np.int32),
*loopy_args,
"..."
] + [arg.loopy_arg for arg in self.expansion.get_args()],
],
name=self.name,
assumptions="ntgt_boxes>=1",
silenced_warnings="write_race(write_result*)",
silenced_warnings="write_race(*_result)",
default_offset=lp.auto,
fixed_parameters={"dim": self.dim, "nresults": len(result_names)},
fixed_parameters={"dim": self.dim, "nresults": len(self.kernels),
"ncoeffs": ncoeffs},
lang_version=MOST_RECENT_LANGUAGE_VERSION)

loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
for knl in self.kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
loopy_knl = lp.tag_inames(loopy_knl, "iknl*:unr")
loopy_knl = self.add_loopy_eval_callable(loopy_knl)

return loopy_knl

def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
knl = lp.tag_inames(knl, {"itgt_box": "g.0"})
knl = self._allow_redundant_execution_of_knl_scaling(knl)
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
knl = lp.set_options(knl,
enforce_variable_access_ordered="no_check")

Expand Down Expand Up @@ -247,16 +231,17 @@ def default_name(self):

def get_kernel(self):
ncoeffs = len(self.expansion)

loopy_insns, result_names = self.get_loopy_insns_and_result_names()
loopy_args = self.get_loopy_args()

loopy_knl = lp.make_kernel(
[
"{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
"{[itgt]: itgt_start<=itgt<itgt_end}",
"{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_end }",
"{[idim]: 0<=idim<dim}",
],
"{[icoeff]: 0<=icoeff<ncoeffs}",
"{[iknl]: 0<=iknl<nresults}",
],
self.get_kernel_scaling_assignment()
+ ["""
for itgt_box
Expand All @@ -265,35 +250,40 @@ def get_kernel(self):
<> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox]

for itgt
<> tgt[idim] = targets[idim,itgt]
<> tgt[idim] = targets[idim,itgt] {id=fetch_tgt,dup=idim}

<> isrc_box_start = source_box_starts[itgt_box]
<> isrc_box_end = source_box_starts[itgt_box+1]

<> result_temp[iknl] = 0 {id=init_result,dup=iknl}
for isrc_box
<> src_ibox = source_box_lists[isrc_box]
"""] + ["""
<> coeff{coeffidx} = \
src_expansions[src_ibox - src_base_ibox, {coeffidx}]
""".format(coeffidx=i) for i in range(ncoeffs)] + ["""

<> center[idim] = centers[idim, src_ibox] {dup=idim}
<> b[idim] = tgt[idim] - center[idim] {dup=idim}

"""] + loopy_insns + ["""
<> coeffs[icoeff] = \
src_expansions[src_ibox - src_base_ibox, icoeff] \
{id=fetch_coeffs,dup=icoeff}
<> center[idim] = centers[idim, src_ibox] \
{dup=idim,id=fetch_center}
[iknl]: result_temp[iknl] = e2p(
[iknl]: result_temp[iknl],
[icoeff]: coeffs[icoeff],
[idim]: center[idim],
[idim]: tgt[idim],
rscale,
itgt,
ntargets,
targets,
""" + ",".join(arg.name for arg in loopy_args) + """
) {id=update_result, \
dep=fetch_coeffs:fetch_center:fetch_tgt:init_result}
end
"""] + ["""
result[{resultidx}, itgt] = result[{resultidx}, itgt] + \
kernel_scaling * simul_reduce(sum, isrc_box,
result_{resultidx}_p) {{id_prefix=write_result}}
""".format(resultidx=i) for i in range(len(result_names))]
+ ["""
result[iknl, itgt] = result[iknl, itgt] + result_temp[iknl] \
* kernel_scaling \
{dep=update_result:init_result,id=write_result,dup=iknl}
end
end
"""],
[
lp.GlobalArg("targets", None, shape=(self.dim, "ntargets"),
dim_tags="sep,C"),
lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")),
lp.GlobalArg("box_target_starts,box_target_counts_nonchild",
None, shape=None),
lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
Expand All @@ -306,29 +296,32 @@ def get_kernel(self):
dim_tags="sep,C"),
lp.GlobalArg("source_box_starts, source_box_lists,",
None, shape=None, offset=lp.auto),
*loopy_args,
"..."
] + [arg.loopy_arg for arg in self.expansion.get_args()],
],
name=self.name,
assumptions="ntgt_boxes>=1",
silenced_warnings="write_race(write_result*)",
silenced_warnings="write_race(*_result)",
default_offset=lp.auto,
fixed_parameters={
"ncoeffs": ncoeffs,
"dim": self.dim,
"nresults": len(result_names)},
"nresults": len(self.kernels)},
lang_version=MOST_RECENT_LANGUAGE_VERSION)

loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.tag_inames(loopy_knl, "iknl*:unr")
loopy_knl = lp.prioritize_loops(loopy_knl, "itgt_box,itgt,isrc_box")
for knl in self.kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
loopy_knl = self.add_loopy_eval_callable(loopy_knl)
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")

return loopy_knl

def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
knl = lp.tag_inames(knl, {"itgt_box": "g.0"})
knl = self._allow_redundant_execution_of_knl_scaling(knl)
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
knl = lp.set_options(knl,
enforce_variable_access_ordered="no_check")
return knl
Expand Down
25 changes: 24 additions & 1 deletion sumpy/expansion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
"""

from abc import ABC, abstractmethod
from typing import Any, ClassVar, Dict, Hashable, List, Optional, Tuple, Type
from typing import (
Any, ClassVar, Dict, Hashable, List, Optional, Sequence, Tuple, Type)

from pytools import memoize_method
import loopy as lp

import sumpy.symbolic as sym
from sumpy.kernel import Kernel
Expand Down Expand Up @@ -63,7 +65,9 @@ class ExpansionBase(ABC):
.. automethod:: get_coefficient_identifiers
.. automethod:: coefficients_from_source
.. automethod:: coefficients_from_source_vec
.. automethod:: get_loopy_expansion_formation
.. automethod:: evaluate
.. automethod:: get_loopy_evaluator

.. automethod:: with_kernel
.. automethod:: copy
Expand Down Expand Up @@ -159,6 +163,17 @@ def coefficients_from_source_vec(self,
result[i] += weight * coeffs[i]
return result

def get_loopy_expansion_formation(
self, kernels: Sequence[Kernel],
strength_usage: Sequence[int], nstrengths: int) -> lp.TranslationUnit:
"""
:returns: a :mod:`loopy` kernel that returns the coefficients
for the expansion given by *kernels* with each kernel using
the strength given by *strength_usage*.
"""
from sumpy.expansion.loopy import make_p2e_loopy_kernel
return make_p2e_loopy_kernel(self, kernels, strength_usage, nstrengths)

@abstractmethod
def evaluate(self, kernel, coeffs, bvec, rscale, sac=None):
"""
Expand All @@ -167,6 +182,14 @@ def evaluate(self, kernel, coeffs, bvec, rscale, sac=None):
in *coeffs*.
"""

def get_loopy_evaluator(self, kernels: Sequence[Kernel]) -> lp.TranslationUnit:
"""
:returns: a :mod:`loopy` kernel that returns the evaluated
target transforms of the potential given by *kernels*.
"""
from sumpy.expansion.loopy import make_e2p_loopy_kernel
return make_e2p_loopy_kernel(self, kernels)

# }}}

# {{{ copy
Expand Down
Loading