diff --git a/pytential/linalg/__init__.py b/pytential/linalg/__init__.py
index 9b04cf0b9..bfbd4d82e 100644
--- a/pytential/linalg/__init__.py
+++ b/pytential/linalg/__init__.py
@@ -23,18 +23,27 @@
 from pytential.linalg.utils import (
         IndexList, TargetAndSourceClusterList,
         make_index_list, make_index_cluster_cartesian_product,
+        interp_decomp,
         )
 from pytential.linalg.proxy import (
-        ProxyClusterGeometryData,
+        ProxyClusterGeometryData, ProxyPointTarget, ProxyPointSource,
         ProxyGeneratorBase, ProxyGenerator, QBXProxyGenerator,
         partition_by_nodes, gather_cluster_neighbor_points,
         )
+from pytential.linalg.skeletonization import (
+        SkeletonizationWrangler, make_skeletonization_wrangler,
+        SkeletonizationResult, skeletonize_by_proxy,
+        )
 
 __all__ = (
     "IndexList", "TargetAndSourceClusterList",
     "make_index_list", "make_index_cluster_cartesian_product",
+    "interp_decomp",
 
-    "ProxyClusterGeometryData",
+    "ProxyClusterGeometryData", "ProxyPointTarget", "ProxyPointSource",
     "ProxyGeneratorBase", "ProxyGenerator", "QBXProxyGenerator",
     "partition_by_nodes", "gather_cluster_neighbor_points",
+
+    "SkeletonizationWrangler", "make_skeletonization_wrangler",
+    "SkeletonizationResult", "skeletonize_by_proxy",
 )
diff --git a/pytential/linalg/direct_solver_symbolic.py b/pytential/linalg/direct_solver_symbolic.py
index 7c7e6786e..0215e694a 100644
--- a/pytential/linalg/direct_solver_symbolic.py
+++ b/pytential/linalg/direct_solver_symbolic.py
@@ -20,6 +20,8 @@
 THE SOFTWARE.
 """
 
+from pytools.obj_array import make_obj_array
+
 from pytential.symbolic.mappers import (
         IdentityMapper, OperatorCollector, LocationTagger)
 
@@ -30,6 +32,39 @@
 """
 
 
+# {{{ utils
+
+class PROXY_SKELETONIZATION_SOURCE:             # noqa: N801
+    pass
+
+
+class PROXY_SKELETONIZATION_TARGET:             # noqa: N801
+    pass
+
+
+def prepare_expr(places, exprs, auto_where=None):
+    from pytential.symbolic.execution import _prepare_expr
+    return make_obj_array([
+        _prepare_expr(places, expr, auto_where=auto_where)
+        for expr in exprs])
+
+
+def prepare_proxy_expr(places, exprs, auto_where=None):
+    def _prepare_expr(expr):
+        # remove all diagonal / non-operator terms in the expression
+        expr = IntGTermCollector()(expr)
+        # ensure all IntGs remove all the kernel derivatives
+        expr = KernelTransformationRemover()(expr)
+        # ensure all IntGs have their source and targets set
+        expr = DOFDescriptorReplacer(auto_where[0], auto_where[1])(expr)
+
+        return expr
+
+    return make_obj_array([_prepare_expr(expr) for expr in exprs])
+
+# }}}
+
+
 # {{{ KernelTransformationRemover
 
 class KernelTransformationRemover(IdentityMapper):
diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 99cea778f..642879316 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -33,6 +33,9 @@
 from pytential import GeometryCollection, bind, sym
 from pytential.symbolic.dof_desc import DOFDescriptorLike
 from pytential.linalg.utils import IndexList
+from pytential.source import PointPotentialSource
+from pytential.target import PointsTarget
+from pytential.qbx import QBXLayerPotentialSource
 
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
@@ -44,7 +47,10 @@
 
 .. currentmodule:: pytential.linalg
 
+.. autoclass:: ProxyPointSource
+.. autoclass:: ProxyPointTarget
 .. autoclass:: ProxyClusterGeometryData
+
 .. autoclass:: ProxyGeneratorBase
 .. autoclass:: ProxyGenerator
 .. autoclass:: QBXProxyGenerator
@@ -122,28 +128,49 @@ def partition_by_nodes(
 # }}}
 
 
-# {{{ proxy point generator
-
-def _generate_unit_sphere(ambient_dim: int, approx_npoints: int) -> np.ndarray:
-    """Generate uniform points on a unit sphere.
+# {{{ proxy points
 
-    :arg ambient_dim: dimension of the ambient space.
-    :arg approx_npoints: approximate number of points to generate. If the
-        ambient space is 3D, this will not generate the exact number of points.
-    :return: array of shape ``(ambient_dim, npoints)``, where ``npoints``
-        will not generally be the same as ``approx_npoints``.
+class ProxyPointSource(PointPotentialSource):
+    """
+    .. automethod:: get_expansion_for_qbx_direct_eval
     """
 
-    if ambient_dim == 2:
-        t = np.linspace(0.0, 2.0 * np.pi, approx_npoints)
-        points = np.vstack([np.cos(t), np.sin(t)])
-    elif ambient_dim == 3:
-        from pytools import sphere_sample_equidistant
-        points = sphere_sample_equidistant(approx_npoints, r=1)
-    else:
-        raise ValueError("ambient_dim > 3 not supported.")
+    def __init__(self,
+            lpot_source: QBXLayerPotentialSource,
+            proxies: np.ndarray) -> None:
+        """
+        :arg lpot_source: the layer potential for which the proxy are constructed.
+        :arg proxies: an array of shape ``(ambient_dim, nproxies)`` containing
+            the proxy points.
+        """
+        assert proxies.shape[0] == lpot_source.ambient_dim
+
+        super().__init__(proxies)
+        self.lpot_source = lpot_source
+
+    def get_expansion_for_qbx_direct_eval(self, base_kernel, target_kernels):
+        """Wrapper around
+        ``pytential.qbx.QBXLayerPotentialSource.get_expansion_for_qbx_direct_eval``
+        to allow this class to be used by the matrix builders.
+        """
+        return self.lpot_source.get_expansion_for_qbx_direct_eval(
+                base_kernel, target_kernels)
 
-    return points
+
+class ProxyPointTarget(PointsTarget):
+    def __init__(self,
+            lpot_source: QBXLayerPotentialSource,
+            proxies: np.ndarray) -> None:
+        """
+        :arg lpot_source: the layer potential for which the proxy are constructed.
+            This argument is kept for symmetry with :class:`ProxyPointSource`.
+        :arg proxies: an array of shape ``(ambient_dim, nproxies)`` containing
+            the proxy points.
+        """
+        assert proxies.shape[0] == lpot_source.ambient_dim
+
+        super().__init__(proxies)
+        self.lpot_source = lpot_source
 
 
 @dataclass(frozen=True)
@@ -176,6 +203,8 @@ class ProxyClusterGeometryData:
 
     .. automethod:: __init__
     .. automethod:: to_numpy
+    .. automethod:: as_sources
+    .. automethod:: as_targets
     """
 
     places: GeometryCollection
@@ -188,17 +217,65 @@ class ProxyClusterGeometryData:
     centers: np.ndarray
     radii: np.ndarray
 
+    _cluster_radii: Optional[np.ndarray] = None
+
     @property
     def nclusters(self) -> int:
         return self.srcindex.nclusters
 
+    @property
+    def discr(self):
+        return self.places.get_discretization(
+                self.dofdesc.geometry,
+                self.dofdesc.discr_stage)
+
     def to_numpy(self, actx: PyOpenCLArrayContext) -> "ProxyClusterGeometryData":
         from arraycontext import to_numpy
+        if self._cluster_radii is not None:
+            cluster_radii = to_numpy(self._cluster_radii, actx)
+        else:
+            cluster_radii = None
+
         from dataclasses import replace
         return replace(self,
-                points=to_numpy(self.points, actx),
-                centers=to_numpy(self.centers, actx),
-                radii=to_numpy(self.radii, actx))
+                points=np.stack(to_numpy(self.points, actx)),
+                centers=np.stack(to_numpy(self.centers, actx)),
+                radii=to_numpy(self.radii, actx),
+                _cluster_radii=cluster_radii)
+
+    def as_sources(self) -> ProxyPointSource:
+        lpot_source = self.places.get_geometry(self.dofdesc.geometry)
+        return ProxyPointSource(lpot_source, self.points)
+
+    def as_targets(self) -> ProxyPointTarget:
+        lpot_source = self.places.get_geometry(self.dofdesc.geometry)
+        return ProxyPointTarget(lpot_source, self.points)
+
+# }}}
+
+
+# {{{ proxy point generator
+
+def _generate_unit_sphere(ambient_dim: int, approx_npoints: int) -> np.ndarray:
+    """Generate uniform points on a unit sphere.
+
+    :arg ambient_dim: dimension of the ambient space.
+    :arg approx_npoints: approximate number of points to generate. If the
+        ambient space is 3D, this will not generate the exact number of points.
+    :return: array of shape ``(ambient_dim, npoints)``, where ``npoints``
+        will not generally be the same as ``approx_npoints``.
+    """
+
+    if ambient_dim == 2:
+        t = np.linspace(0.0, 2.0 * np.pi, approx_npoints, endpoint=False)
+        points = np.vstack([np.cos(t), np.sin(t)])
+    elif ambient_dim == 3:
+        from pytools import sphere_sample_equidistant
+        points = sphere_sample_equidistant(approx_npoints, r=1)
+    else:
+        raise ValueError("ambient_dim > 3 not supported.")
+
+    return points
 
 
 def make_compute_cluster_centers_knl(
@@ -346,6 +423,8 @@ def __call__(self,
         discr = self.places.get_discretization(
                 source_dd.geometry, source_dd.discr_stage)
 
+        include_cluster_radii = kwargs.pop("include_cluster_radii", False)
+
         # {{{ get proxy centers and radii
 
         sources = flatten(discr.nodes(), actx, leaf_class=DOFArray)
@@ -365,6 +444,18 @@ def __call__(self,
                 proxy_centers=centers_dev,
                 **kwargs)
 
+        if include_cluster_radii:
+            knl = make_compute_cluster_radii_knl(actx, self.ambient_dim)
+            _, (cluster_radii,) = knl(actx.queue,
+                    sources=sources,
+                    srcindices=dof_index.indices,
+                    srcstarts=dof_index.starts,
+                    radius_factor=1.0,
+                    proxy_centers=centers_dev)
+            cluster_radii = actx.freeze(cluster_radii)
+        else:
+            cluster_radii = None
+
         # }}}
 
         # {{{ build proxy points for each cluster
@@ -398,6 +489,7 @@ def __call__(self,
                 points=actx.freeze(from_numpy(proxies, actx)),
                 centers=actx.freeze(centers_dev),
                 radii=actx.freeze(radii_dev),
+                _cluster_radii=cluster_radii
                 )
 
 
diff --git a/pytential/linalg/skeletonization.py b/pytential/linalg/skeletonization.py
new file mode 100644
index 000000000..ba15ec7ee
--- /dev/null
+++ b/pytential/linalg/skeletonization.py
@@ -0,0 +1,645 @@
+__copyright__ = "Copyright (C) 2018-2022 Alexandru Fikl"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from dataclasses import dataclass
+from typing import (
+        Any, Callable, Dict, Hashable, Iterable, Optional, Tuple, Union)
+
+import numpy as np
+
+from arraycontext import PyOpenCLArrayContext
+
+from pytential import GeometryCollection, sym
+from pytential.linalg.utils import IndexList, TargetAndSourceClusterList
+from pytential.linalg.proxy import ProxyGeneratorBase, ProxyClusterGeometryData
+from pytential.linalg.direct_solver_symbolic import (
+        PROXY_SKELETONIZATION_TARGET, PROXY_SKELETONIZATION_SOURCE,
+        prepare_expr, prepare_proxy_expr)
+
+
+__doc__ = """
+.. currentmodule:: pytential.linalg
+
+Skeletonization
+---------------
+
+.. autoclass:: SkeletonizationWrangler
+.. autoclass:: make_skeletonization_wrangler
+
+.. autoclass:: SkeletonizationResult
+.. autofunction:: skeletonize_by_proxy
+"""
+
+
+# {{{ wrangler
+
+def _approximate_geometry_waa_magnitude(actx, places, nbrindex, domain):
+    from pytools import memoize_in
+    from pytential import bind, sym
+
+    @memoize_in(actx, (_approximate_geometry_waa_magnitude, "mean_over_cluster"))
+    def prg():
+        import loopy as lp
+        knl = lp.make_kernel([
+            "{[icluster]: 0 <= icluster < nclusters}",
+            "{[i]: 0 <= i < npoints}",
+            ],
+            """
+            <> ioffset = starts[icluster]
+            <> npoints = starts[icluster + 1] - ioffset
+            result[icluster] = reduce(sum, i, waa[indices[i + ioffset]]) / npoints
+            """)
+
+        return knl
+
+    waa = bind(places, sym.weights_and_area_elements(
+        places.ambient_dim, dofdesc=domain))(actx)
+    result = actx.zeros((nbrindex.nclusters,), dtype=waa.entry_dtype)
+
+    from arraycontext import flatten
+    _, (waa_per_cluster,) = prg()(actx.queue,         # pylint: disable=not-callable
+            waa=flatten(waa, actx),
+            result=result,
+            indices=nbrindex.indices,
+            starts=nbrindex.starts)
+
+    return waa_per_cluster
+
+
+def _apply_weights(actx, mat, places, tgt_pxy_index, nbrindex, domain):
+    assert tgt_pxy_index.nclusters == nbrindex.nclusters
+    waa = actx.to_numpy(
+            _approximate_geometry_waa_magnitude(actx, places, nbrindex, domain)
+            )
+
+    for i in range(tgt_pxy_index.nclusters):
+        istart, iend = tgt_pxy_index._flat_cluster_starts[i:i + 2]
+        mat[istart:iend] *= waa[i]
+
+    return mat
+
+
+@dataclass(frozen=True)
+class SkeletonizationWrangler:
+    """
+    .. attribute:: exprs
+
+        An :class:`~numpy.ndarray` of expressions (layer potentials)
+        that correspond to the output blocks of the matrix. These expressions
+        are tagged for nearfield neighbor evalution.
+
+    .. attribute:: source_proxy_exprs
+    .. attribute:: target_proxy_exprs
+
+        Like :attr:`exprs`, but stripped down for farfield proxy
+        evaluation.
+
+    .. attribute:: input_exprs
+
+        A :class:`tuple` of densities that correspond to the input blocks
+        of the matrix.
+
+    .. attribute:: domains
+
+        A :class:`tuple` of the same length as *input_exprs* defining the
+        domain of each input.
+
+    .. attribute:: context
+
+        A :class:`dict` with additional parameters required to evaluate the
+        expressions.
+
+    The following attributes and methods are internal and used for skeletonization.
+
+    .. attribute:: weighted_sources
+
+        A flag which if *True* adds a weight to the source to proxy evaluation.
+
+    .. attribute:: weighted_targets
+
+        A flag which if *True* adds a weight to the proxy to target evaluation.
+
+    .. attribute:: proxy_source_cluster_builder
+    .. attribute:: proxy_target_cluster_builder
+
+        A callable that is used to evaluate farfield proxy interactions.
+        This should follow the calling convention of the constructor to
+        :class:`pytential.symbolic.matrix.P2PClusterMatrixBuilder`.
+
+    .. attribute:: neighbor_cluster_builder
+
+        A callable that is used to evaluate nearfield neighbour interactions.
+        This should follow the calling convention of the constructor to
+        :class:`pytential.symbolic.matrix.QBXClusterMatrixBuilder`.
+
+    .. automethod:: evaluate_source_proxy_interaction
+    .. automethod:: evaluate_target_proxy_interaction
+    .. automethod:: evaluate_source_neighbor_interaction
+    .. automethod:: evaluate_target_neighbor_interaction
+    """
+
+    # operator
+    exprs: np.ndarray
+    input_exprs: Tuple[sym.var, ...]
+    domains: Tuple[Hashable, ...]
+    context: Dict[str, Any]
+
+    neighbor_cluster_builder: Callable[..., np.ndarray]
+
+    # target skeletonization
+    weighted_targets: bool
+    target_proxy_exprs: np.ndarray
+    proxy_target_cluster_builder: Callable[..., np.ndarray]
+
+    # source skeletonization
+    weighted_sources: bool
+    source_proxy_exprs: np.ndarray
+    proxy_source_cluster_builder: Callable[..., np.ndarray]
+
+    @property
+    def nrows(self):
+        return len(self.exprs)
+
+    @property
+    def ncols(self):
+        return len(self.input_exprs)
+
+    def _evaluate_expr(self,
+            actx, places, eval_mapper_cls, tgt_src_index, expr, idomain,
+            **kwargs):
+        domain = self.domains[idomain]
+        dep_source = places.get_geometry(domain.geometry)
+        dep_discr = places.get_discretization(domain.geometry, domain.discr_stage)
+
+        return eval_mapper_cls(actx,
+                dep_expr=self.input_exprs[idomain],
+                other_dep_exprs=(
+                    self.input_exprs[:idomain]
+                    + self.input_exprs[idomain+1:]),
+                dep_source=dep_source,
+                dep_discr=dep_discr,
+                places=places,
+                tgt_src_index=tgt_src_index,
+                context=self.context,
+                **kwargs)(expr)
+
+    # {{{ nearfield
+
+    def evaluate_source_neighbor_interaction(self,
+            actx: PyOpenCLArrayContext,
+            places: GeometryCollection,
+            pxy: ProxyClusterGeometryData, nbrindex: IndexList, *,
+            ibrow: int, ibcol: int) -> Tuple[np.ndarray, TargetAndSourceClusterList]:
+        nbr_src_index = TargetAndSourceClusterList(nbrindex, pxy.srcindex)
+
+        eval_mapper_cls = self.neighbor_cluster_builder
+        expr = self.exprs[ibrow]
+        mat = self._evaluate_expr(
+                actx, places, eval_mapper_cls, nbr_src_index, expr,
+                idomain=ibcol, _weighted=self.weighted_sources)
+
+        return mat, nbr_src_index
+
+    def evaluate_target_neighbor_interaction(self,
+            actx: PyOpenCLArrayContext,
+            places: GeometryCollection,
+            pxy: ProxyClusterGeometryData, nbrindex: IndexList, *,
+            ibrow: int, ibcol: int) -> Tuple[np.ndarray, TargetAndSourceClusterList]:
+        tgt_nbr_index = TargetAndSourceClusterList(pxy.srcindex, nbrindex)
+
+        eval_mapper_cls = self.neighbor_cluster_builder
+        expr = self.exprs[ibrow]
+        mat = self._evaluate_expr(
+                actx, places, eval_mapper_cls, tgt_nbr_index, expr,
+                idomain=ibcol, _weighted=self.weighted_targets)
+
+        return mat, tgt_nbr_index
+
+    # }}}
+
+    # {{{ proxy
+
+    def evaluate_source_proxy_interaction(self,
+            actx: PyOpenCLArrayContext,
+            places: GeometryCollection,
+            pxy: ProxyClusterGeometryData, nbrindex: IndexList, *,
+            ibrow: int, ibcol: int) -> Tuple[np.ndarray, TargetAndSourceClusterList]:
+        from pytential.collection import add_geometry_to_collection
+        pxy_src_index = TargetAndSourceClusterList(pxy.pxyindex, pxy.srcindex)
+        places = add_geometry_to_collection(
+                places, {PROXY_SKELETONIZATION_TARGET: pxy.as_targets()}
+                )
+
+        eval_mapper_cls = self.proxy_source_cluster_builder
+        expr = self.source_proxy_exprs[ibrow]
+        mat = self._evaluate_expr(
+                actx, places, eval_mapper_cls, pxy_src_index, expr,
+                idomain=ibcol,
+                _weighted=self.weighted_sources,
+                exclude_self=False)
+
+        return mat, pxy_src_index
+
+    def evaluate_target_proxy_interaction(self,
+            actx: PyOpenCLArrayContext,
+            places: GeometryCollection,
+            pxy: ProxyClusterGeometryData, nbrindex: IndexList, *,
+            ibrow: int, ibcol: int) -> Tuple[np.ndarray, TargetAndSourceClusterList]:
+        from pytential.collection import add_geometry_to_collection
+        tgt_pxy_index = TargetAndSourceClusterList(pxy.srcindex, pxy.pxyindex)
+        places = add_geometry_to_collection(
+                places, {PROXY_SKELETONIZATION_SOURCE: pxy.as_sources()}
+                )
+
+        eval_mapper_cls = self.proxy_target_cluster_builder
+        expr = self.target_proxy_exprs[ibrow]
+        mat = self._evaluate_expr(
+                actx, places, eval_mapper_cls, tgt_pxy_index, expr,
+                idomain=ibcol,
+                _weighted=False,
+                exclude_self=False)
+
+        if self.weighted_targets:
+            mat = _apply_weights(
+                    actx, mat, places,
+                    tgt_pxy_index, nbrindex, self.domains[ibcol])
+
+        return mat, tgt_pxy_index
+
+    # }}}
+
+
+def make_skeletonization_wrangler(
+        places: GeometryCollection,
+        exprs: Union[sym.var, Iterable[sym.var]],
+        input_exprs: Union[sym.var, Iterable[sym.var]], *,
+        domains: Optional[Iterable[Hashable]] = None,
+        context: Optional[Dict[str, Any]] = None,
+        auto_where: Optional[Union[Hashable, Tuple[Hashable, Hashable]]] = None,
+
+        # internal
+        _weighted_proxy: Optional[Union[bool, Tuple[bool, bool]]] = None,
+        _proxy_source_cluster_builder: Optional[Callable[..., np.ndarray]] = None,
+        _proxy_target_cluster_builder: Optional[Callable[..., np.ndarray]] = None,
+        _neighbor_cluster_builder: Optional[Callable[..., np.ndarray]] = None):
+    if context is None:
+        context = {}
+
+    # {{{ setup expressions
+
+    try:
+        exprs = list(exprs)
+    except TypeError:
+        exprs = [exprs]
+
+    try:
+        input_exprs = list(input_exprs)
+    except TypeError:
+        input_exprs = [input_exprs]
+
+    from pytential.symbolic.execution import _prepare_auto_where
+    auto_where = _prepare_auto_where(auto_where, places)
+    from pytential.symbolic.execution import _prepare_domains
+    domains = _prepare_domains(len(input_exprs), places, domains, auto_where[0])
+
+    exprs = prepare_expr(places, exprs, auto_where)
+    source_proxy_exprs = prepare_proxy_expr(
+            places, exprs, (auto_where[0], PROXY_SKELETONIZATION_TARGET))
+    target_proxy_exprs = prepare_proxy_expr(
+            places, exprs, (PROXY_SKELETONIZATION_SOURCE, auto_where[1]))
+
+    # }}}
+
+    # {{{ weighting
+
+    if _weighted_proxy is None:
+        weighted_sources = weighted_targets = True
+    elif isinstance(_weighted_proxy, bool):
+        weighted_sources = weighted_targets = _weighted_proxy
+    elif isinstance(_weighted_proxy, tuple):
+        weighted_sources, weighted_targets = _weighted_proxy
+    else:
+        raise ValueError(f"unknown value for weighting: '{_weighted_proxy}'")
+
+    # }}}
+
+    # {{{ builders
+
+    from pytential.symbolic.matrix import (
+            P2PClusterMatrixBuilder, QBXClusterMatrixBuilder)
+
+    if _neighbor_cluster_builder is None:
+        _neighbor_cluster_builder = QBXClusterMatrixBuilder
+
+    if _proxy_source_cluster_builder is None:
+        _proxy_source_cluster_builder = P2PClusterMatrixBuilder
+
+    if _proxy_target_cluster_builder is None:
+        _proxy_target_cluster_builder = QBXClusterMatrixBuilder
+
+    # }}}
+
+    return SkeletonizationWrangler(
+            # operator
+            exprs=exprs,
+            input_exprs=tuple(input_exprs),
+            domains=tuple(domains),
+            context=context,
+            neighbor_cluster_builder=_neighbor_cluster_builder,
+            # source
+            weighted_sources=weighted_sources,
+            source_proxy_exprs=source_proxy_exprs,
+            proxy_source_cluster_builder=_proxy_source_cluster_builder,
+            # target
+            weighted_targets=weighted_targets,
+            target_proxy_exprs=target_proxy_exprs,
+            proxy_target_cluster_builder=_proxy_target_cluster_builder,
+            )
+
+# }}}
+
+
+# {{{ skeletonize_block_by_proxy
+
+@dataclass(frozen=True)
+class _ProxyNeighborEvaluationResult:
+    pxy: ProxyClusterGeometryData
+
+    pxymat: np.ndarray
+    pxyindex: TargetAndSourceClusterList
+
+    nbrmat: np.ndarray
+    nbrindex: TargetAndSourceClusterList
+
+    def __getitem__(self, i: int) -> Tuple[np.ndarray, np.ndarray]:
+        shape = self.nbrindex.cluster_shape(i, i)
+        nbrmat_i = self.nbrindex.flat_cluster_take(self.nbrmat, i).reshape(*shape)
+
+        shape = self.pxyindex.cluster_shape(i, i)
+        pxymat_i = self.pxyindex.flat_cluster_take(self.pxymat, i).reshape(*shape)
+
+        return [pxymat_i, nbrmat_i]
+
+
+def _make_block_proxy_skeleton(
+        actx, ibrow, ibcol,
+        places, proxy_generator, wrangler, cluster_index,
+        evaluate_proxy, evaluate_neighbor,
+        max_particles_in_box=None):
+    """Builds a block matrix that can be used to skeletonize the
+    rows (targets) or columns (sources) of the symbolic matrix block
+    described by ``(ibrow, ibcol)``.
+    """
+
+    if cluster_index.nclusters == 1:
+        raise ValueError("cannot make a proxy skeleton for a single cluster")
+
+    # {{{ generate proxies
+
+    domain = wrangler.domains[ibcol]
+    pxy = proxy_generator(actx, domain, cluster_index)
+
+    # }}}
+
+    # {{{ evaluate proxy and neighbor interactions
+
+    from pytential.linalg import gather_cluster_neighbor_points
+    nbrindex = gather_cluster_neighbor_points(
+            actx, pxy,
+            max_particles_in_box=max_particles_in_box)
+
+    pxymat, pxy_geo_index = evaluate_proxy(
+            actx, places, pxy, nbrindex, ibrow=ibrow, ibcol=ibcol)
+    nbrmat, nbr_geo_index = evaluate_neighbor(
+            actx, places, pxy, nbrindex, ibrow=ibrow, ibcol=ibcol)
+
+    # }}}
+
+    return _ProxyNeighborEvaluationResult(
+            pxy=pxy,
+            pxymat=pxymat, pxyindex=pxy_geo_index,
+            nbrmat=nbrmat, nbrindex=nbr_geo_index)
+
+
+def _skeletonize_block_by_proxy_with_mats(
+        actx: PyOpenCLArrayContext, ibrow: int, ibcol: int,
+        places: GeometryCollection,
+        proxy_generator: ProxyGeneratorBase,
+        wrangler: SkeletonizationWrangler,
+        tgt_src_index: TargetAndSourceClusterList, *,
+        id_eps: Optional[float] = None, id_rank: Optional[int] = None,
+        max_particles_in_box: Optional[int] = None
+        ):
+    nclusters = tgt_src_index.nclusters
+    if nclusters == 1:
+        raise ValueError("cannot make a proxy skeleton for a single cluster")
+
+    # construct proxy matrices to skeletonize
+    from functools import partial
+    make_proxy_skeleton = partial(_make_block_proxy_skeleton,
+            actx, ibrow, ibcol, places, proxy_generator, wrangler,
+            max_particles_in_box=max_particles_in_box)
+
+    src_result = make_proxy_skeleton(
+            tgt_src_index.sources,
+            evaluate_proxy=wrangler.evaluate_source_proxy_interaction,
+            evaluate_neighbor=wrangler.evaluate_source_neighbor_interaction,
+            )
+    tgt_result = make_proxy_skeleton(
+            tgt_src_index.targets,
+            evaluate_proxy=wrangler.evaluate_target_proxy_interaction,
+            evaluate_neighbor=wrangler.evaluate_target_neighbor_interaction,
+            )
+
+    src_skl_indices = np.empty(nclusters, dtype=object)
+    tgt_skl_indices = np.empty(nclusters, dtype=object)
+    skel_starts = np.zeros(nclusters + 1, dtype=np.int32)
+
+    L = np.full((nclusters, nclusters), 0, dtype=object)
+    R = np.full((nclusters, nclusters), 0, dtype=object)
+
+    from pytential.linalg import interp_decomp
+    for i in range(nclusters):
+        k = id_rank
+        src_mat = np.vstack(src_result[i])
+        tgt_mat = np.hstack(tgt_result[i])
+
+        assert np.all(np.isfinite(tgt_mat)), np.where(np.isfinite(tgt_mat))
+        assert np.all(np.isfinite(src_mat)), np.where(np.isfinite(src_mat))
+
+        # skeletonize target points
+        k, idx, interp = interp_decomp(tgt_mat.T, k, id_eps)
+        assert k > 0
+
+        L[i, i] = interp.T
+        tgt_skl_indices[i] = tgt_src_index.targets.cluster_indices(i)[idx[:k]]
+
+        # skeletonize source points
+        k, idx, interp = interp_decomp(src_mat, k, id_eps)
+        assert k > 0
+
+        R[i, i] = interp
+        src_skl_indices[i] = tgt_src_index.sources.cluster_indices(i)[idx[:k]]
+
+        skel_starts[i + 1] = skel_starts[i] + k
+        assert R[i, i].shape == (k, src_mat.shape[1])
+        assert L[i, i].shape == (tgt_mat.shape[0], k)
+
+    from pytential.linalg import make_index_list
+    src_skl_index = make_index_list(np.hstack(src_skl_indices), skel_starts)
+    tgt_skl_index = make_index_list(np.hstack(tgt_skl_indices), skel_starts)
+    skel_tgt_src_index = TargetAndSourceClusterList(tgt_skl_index, src_skl_index)
+
+    return L, R, skel_tgt_src_index, src_result, tgt_result
+
+# }}}
+
+
+# {{{ skeletonize_by_proxy
+
+@dataclass(frozen=True)
+class SkeletonizationResult:
+    r"""Result of a skeletonization procedure.
+
+    A matrix :math:`A` can be reconstructed using:
+
+    .. math::
+
+        A \approx L S R
+
+    where :math:`S = A_{I, J}` for a subset :math:`I` and :math:`J` of the
+    rows and columns of :math:`A`, respectively. This applies to each cluster
+    in :attr:`tgt_src_index`.
+
+    .. attribute:: nclusters
+
+        Number of clusters that have been skeletonized.
+
+    .. attribute:: L
+
+        A block diagonal :class:`~numpy.ndarray` object array.
+
+    .. attribute:: R
+
+        A block diagonal :class:`~numpy.ndarray` object array.
+
+    .. attribute:: tgt_src_index
+
+        A :class:`~pytential.linalg.TargetAndSourceClusterList` representing the
+        indices in the original matrix :math:`A` that have been skeletonized.
+
+    .. attribute:: skel_tgt_src_index
+
+        A :class:`~pytential.linalg.TargetAndSourceClusterList` representing a
+        subset of :attr:`tgt_src_index` of the matrix :math:`S`, i.e. the
+        skeleton of each cluster of :math:`A`.
+    """
+
+    L: np.ndarray
+    R: np.ndarray
+
+    tgt_src_index: TargetAndSourceClusterList
+    skel_tgt_src_index: TargetAndSourceClusterList
+
+    def __post_init__(self):
+        if __debug__:
+            nclusters = self.tgt_src_index.nclusters
+            shape = (nclusters, nclusters)
+
+            if self.tgt_src_index.nclusters != self.skel_tgt_src_index.nclusters:
+                raise ValueError("'tgt_src_index' and 'skel_tgt_src_index' have "
+                        f"different number of blocks: {self.tgt_src_index.nclusters}"
+                        f" vs {self.skel_tgt_src_index.nclusters}")
+
+            if self.L.shape != shape:
+                raise ValueError(f"'L' has shape {self.L.shape}, expected {shape}")
+
+            if self.R.shape != shape:
+                raise ValueError(f"'R' has shape {self.R.shape}, expected {shape}")
+
+    @property
+    def nclusters(self):
+        return self.tgt_src_index.nclusters
+
+
+def skeletonize_by_proxy(
+        actx: PyOpenCLArrayContext,
+        places: GeometryCollection,
+
+        tgt_src_index: TargetAndSourceClusterList,
+        exprs: Union[sym.var, Iterable[sym.var]],
+        input_exprs: Union[sym.var, Iterable[sym.var]], *,
+        domains: Optional[Iterable[Hashable]] = None,
+        context: Optional[Dict[str, Any]] = None,
+
+        approx_nproxy: Optional[int] = None,
+        proxy_radius_factor: Optional[float] = None,
+
+        id_eps: Optional[float] = None,
+        id_rank: Optional[int] = None,
+        max_particles_in_box: Optional[int] = None) -> np.ndarray:
+    r"""Evaluate and skeletonize a symbolic expression using proxy-based methods.
+
+    :arg tgt_src_index: a :class:`~pytential.linalg.TargetAndSourceClusterList`
+        indicating which indices participate in the skeletonization.
+
+    :arg exprs: see :func:`make_skeletonization_wrangler`.
+    :arg input_exprs: see :func:`make_skeletonization_wrangler`.
+    :arg domains: see :func:`make_skeletonization_wrangler`.
+    :arg context: see :func:`make_skeletonization_wrangler`.
+
+    :arg approx_nproxy: see :class:`~pytential.linalg.ProxyGenerator`.
+    :arg proxy_radius_factor: see :class:`~pytential.linalg.ProxyGenerator`.
+
+    :arg id_eps: a floating point value used as a tolerance in skeletonizing
+        each block in *tgt_src_index*.
+    :arg id_rank: an alternative to *id_eps*, which fixes the rank of each
+        skeletonization.
+    :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder` as necessary.
+
+    :returns: an :class:`~numpy.ndarray` of :class:`SkeletonizationResult` of
+        shape ``(len(exprs), len(input_exprs))``.
+    """
+    from pytential.linalg.proxy import QBXProxyGenerator
+    wrangler = make_skeletonization_wrangler(
+            places, exprs, input_exprs,
+            domains=domains, context=context)
+    proxy = QBXProxyGenerator(places,
+            approx_nproxy=approx_nproxy,
+            radius_factor=proxy_radius_factor)
+
+    skels = np.empty((wrangler.nrows, wrangler.ncols), dtype=object)
+    for ibrow in range(wrangler.nrows):
+        for ibcol in range(wrangler.ncols):
+            L, R, skel_tgt_src_index, _, _ = _skeletonize_block_by_proxy_with_mats(
+                    actx, ibrow, ibcol, places, proxy, wrangler, tgt_src_index,
+                    id_eps=id_eps, id_rank=id_rank,
+                    max_particles_in_box=max_particles_in_box)
+
+            skels[ibrow, ibcol] = SkeletonizationResult(
+                    L=L, R=R,
+                    tgt_src_index=tgt_src_index,
+                    skel_tgt_src_index=skel_tgt_src_index)
+
+    return skels
+
+# }}}
diff --git a/pytential/linalg/utils.py b/pytential/linalg/utils.py
index 4469fda9b..1bb8b2bec 100644
--- a/pytential/linalg/utils.py
+++ b/pytential/linalg/utils.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright (C) 2018-2021 Alexandru Fikl"
+__copyright__ = "Copyright (C) 2018-2022 Alexandru Fikl"
 
 __license__ = """
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -21,13 +21,18 @@
 """
 
 from dataclasses import dataclass
-from typing import Optional, Tuple
+from typing import Optional, Tuple, TYPE_CHECKING
 
 import numpy as np
+import numpy.linalg as la
 
 from arraycontext import PyOpenCLArrayContext, Array
 from pytools import memoize_in, memoize_method
 
+if TYPE_CHECKING:
+    from pytential.linalg.skeletonization import SkeletonizationResult
+
+
 __doc__ = """
 Misc
 ~~~~
@@ -307,11 +312,127 @@ def make_flat_cluster_diag(
         diagonal element :math:`(i, i)` is the reshaped slice of *mat* that
         corresponds to the cluster :math:`i`.
     """
-    block_mat = np.full((mindex.nclusters, mindex.nclusters), 0, dtype=object)
+    cluster_mat = np.full((mindex.nclusters, mindex.nclusters), 0, dtype=object)
     for i in range(mindex.nclusters):
         shape = mindex.cluster_shape(i, i)
-        block_mat[i, i] = mindex.flat_cluster_take(mat, i).reshape(*shape)
+        cluster_mat[i, i] = mindex.flat_cluster_take(mat, i).reshape(*shape)
+
+    return cluster_mat
+
+# }}}
+
+
+# {{{ interpolative decomposition
+
+def interp_decomp(
+        A: np.ndarray, rank: Optional[int], eps: Optional[float],
+        ) -> Tuple[int, np.ndarray, np.ndarray]:
+    """Wrapper for :func:`~scipy.linalg.interpolative.interp_decomp` that
+    always has the same output signature.
+
+    :return: a tuple ``(k, idx, interp)`` containing the numerical rank,
+        the column indices and the resulting interpolation matrix.
+    """
+
+    import scipy.linalg.interpolative as sli    # pylint:disable=no-name-in-module
+    if rank is None:
+        k, idx, proj = sli.interp_decomp(A, eps)
+    else:
+        idx, proj = sli.interp_decomp(A, rank)
+        k = rank
+
+    interp = sli.reconstruct_interp_matrix(idx, proj)
+    return k, idx, interp
+
+# }}}
+
+
+# {{{ cluster matrix errors
+
+def cluster_skeletonization_error(
+        mat: np.ndarray, skeleton: "SkeletonizationResult", *,
+        ord: Optional[float] = None,
+        relative: bool = False) -> np.ndarray:
+    from itertools import product
+
+    L = skeleton.L
+    R = skeleton.R
+    skel_tgt_src_index = skeleton.skel_tgt_src_index
+    tgt_src_index = skeleton.tgt_src_index
+    nclusters = skeleton.nclusters
+
+    def mnorm(x: np.ndarray, y: np.ndarray) -> float:
+        result = la.norm(x - y, ord=ord)
+        if relative:
+            result = result / la.norm(x, ord=ord)
+
+        return result
+
+    # {{{ compute cluster-wise errors
+
+    tgt_error = np.zeros((nclusters, nclusters))
+    src_error = np.zeros((nclusters, nclusters))
+
+    for i, j in product(range(nclusters), repeat=2):
+        if i == j:
+            continue
+
+        # full matrix indices
+        f_tgt = tgt_src_index.targets.cluster_indices(i)
+        f_src = tgt_src_index.sources.cluster_indices(j)
+        A = mat[np.ix_(f_tgt, f_src)]
+
+        # skeleton matrix indices
+        s_tgt = skel_tgt_src_index.targets.cluster_indices(i)
+        s_src = skel_tgt_src_index.sources.cluster_indices(j)
+
+        # compute cluster-wise errors
+        S = mat[np.ix_(s_tgt, f_src)]
+        tgt_error[i, j] = mnorm(A, L[i, i] @ S)
+
+        S = mat[np.ix_(f_tgt, s_src)]
+        src_error[i, j] = mnorm(A, S @ R[j, j])
+
+    # }}}
+
+    return tgt_error, src_error
+
+
+def skeletonization_error(
+        mat: np.ndarray, skeleton: "SkeletonizationResult", *,
+        ord: Optional[float] = None,
+        relative: bool = False) -> np.ndarray:
+    L = skeleton.L
+    R = skeleton.R
+    tgt_src_index = skeleton.tgt_src_index
+    skel_tgt_src_index = skeleton.skel_tgt_src_index
+
+    # {{{ contruct matrices
+
+    # NOTE: the diagonal should be the same by definition
+    skl = mat.copy()
+
+    from itertools import product
+    for i, j in product(range(skeleton.nclusters), repeat=2):
+        if i == j:
+            continue
+
+        # full matrix indices
+        f_tgt = tgt_src_index.targets.cluster_indices(i)
+        f_src = tgt_src_index.sources.cluster_indices(j)
+        # skeleton matrix indices
+        s_tgt = skel_tgt_src_index.targets.cluster_indices(i)
+        s_src = skel_tgt_src_index.sources.cluster_indices(j)
+
+        S = mat[np.ix_(s_tgt, s_src)]
+        skl[np.ix_(f_tgt, f_src)] = L[i, i] @ S @ R[j, j]
+
+    # }}}
+
+    result = la.norm(mat - skl, ord=ord)
+    if relative:
+        result = result / la.norm(mat, ord=ord)
 
-    return block_mat
+    return result
 
 # }}}
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index 9e87cfd01..dfee08d2f 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -322,11 +322,13 @@ class MatrixBuilderDirectResamplerCacheKey:
 
 class MatrixBuilder(MatrixBuilderBase):
     def __init__(self, actx, dep_expr, other_dep_exprs,
-            dep_source, dep_discr, places, context):
+            dep_source, dep_discr, places, context, _weighted=True):
         super().__init__(
                 actx, dep_expr, other_dep_exprs,
                 dep_source, dep_discr, places, context)
 
+        self.weighted = _weighted
+
     def map_interpolation(self, expr):
         from pytential import sym
 
@@ -385,6 +387,9 @@ def map_int_g(self, expr):
         target_discr = self.places.get_discretization(
                 expr.target.geometry, expr.target.discr_stage)
 
+        actx = self.array_context
+        assert abs(expr.qbx_forced_limit) > 0
+
         result = 0
         for kernel, density in zip(expr.source_kernels, expr.densities):
             rec_density = self.rec(density)
@@ -395,18 +400,8 @@ def map_int_g(self, expr):
             if not self.is_kind_matrix(rec_density):
                 raise NotImplementedError("layer potentials on non-variables")
 
-            actx = self.array_context
-            kernel_args = _get_layer_potential_args(
-                    actx, self.places, expr, context=self.context)
-            local_expn = lpot_source.get_expansion_for_qbx_direct_eval(
-                    kernel.get_base_kernel(), (expr.target_kernel,))
+            # {{{ geometry
 
-            from sumpy.qbx import LayerPotentialMatrixGenerator
-            mat_gen = LayerPotentialMatrixGenerator(actx.context,
-                expansion=local_expn, source_kernels=(kernel,),
-                target_kernels=(expr.target_kernel,))
-
-            assert abs(expr.qbx_forced_limit) > 0
             from pytential import bind, sym
             radii = bind(self.places, sym.expansion_radii(
                 source_discr.ambient_dim,
@@ -416,6 +411,25 @@ def map_int_g(self, expr):
                 expr.qbx_forced_limit,
                 dofdesc=expr.target))(actx)
 
+            # }}}
+
+            # {{{ expansion
+
+            local_expn = lpot_source.get_expansion_for_qbx_direct_eval(
+                    kernel.get_base_kernel(), (expr.target_kernel,))
+
+            from sumpy.qbx import LayerPotentialMatrixGenerator
+            mat_gen = LayerPotentialMatrixGenerator(actx.context,
+                expansion=local_expn, source_kernels=(kernel,),
+                target_kernels=(expr.target_kernel,))
+
+            # }}}
+
+            # {{{ evaluate
+
+            kernel_args = _get_layer_potential_args(
+                    actx, self.places, expr, context=self.context)
+
             _, (mat,) = mat_gen(actx.queue,
                     targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray),
                     sources=flatten(source_discr.nodes(), actx, leaf_class=DOFArray),
@@ -424,10 +438,13 @@ def map_int_g(self, expr):
                     **kernel_args)
             mat = actx.to_numpy(mat)
 
-            waa = bind(self.places, sym.weights_and_area_elements(
-                source_discr.ambient_dim,
-                dofdesc=expr.source))(actx)
-            mat[:, :] *= actx.to_numpy(flatten(waa, actx))
+            if self.weighted:
+                waa = bind(self.places, sym.weights_and_area_elements(
+                    source_discr.ambient_dim,
+                    dofdesc=expr.source))(actx)
+                mat[:, :] *= actx.to_numpy(flatten(waa, actx))
+
+            # }}}
 
             result += mat @ rec_density
 
@@ -441,13 +458,13 @@ def map_int_g(self, expr):
 class P2PMatrixBuilder(MatrixBuilderBase):
     def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context,
-            weighted=False, exclude_self=True):
+            exclude_self=True, _weighted=False):
         super().__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
 
-        self.weighted = weighted
         self.exclude_self = exclude_self
+        self.weighted = _weighted
 
     def map_int_g(self, expr):
         source_discr = self.places.get_discretization(
@@ -455,6 +472,9 @@ def map_int_g(self, expr):
         target_discr = self.places.get_discretization(
                 expr.target.geometry, expr.target.discr_stage)
 
+        actx = self.array_context
+        target_base_kernel = expr.target_kernel.get_base_kernel()
+
         result = 0
         for density, kernel in zip(expr.densities, expr.source_kernels):
             rec_density = self.rec(density)
@@ -465,25 +485,36 @@ def map_int_g(self, expr):
             if not self.is_kind_matrix(rec_density):
                 raise NotImplementedError("layer potentials on non-variables")
 
-            # NOTE: copied from pytential.symbolic.primitives.IntG
+            # {{{ generator
+
             base_kernel = kernel.get_base_kernel()
+
+            from sumpy.p2p import P2PMatrixGenerator
+            mat_gen = P2PMatrixGenerator(actx.context,
+                    source_kernels=(base_kernel,),
+                    target_kernels=(target_base_kernel,),
+                    exclude_self=self.exclude_self)
+
+            # }}}
+
+            # {{{ evaluation
+
+            # {{{ kernel args
+
+            # NOTE: copied from pytential.symbolic.primitives.IntG
             kernel_args = base_kernel.get_args() + base_kernel.get_source_args()
             kernel_args = {arg.loopy_arg.name for arg in kernel_args}
 
-            actx = self.array_context
             kernel_args = _get_layer_potential_args(
                     actx, self.places, expr, context=self.context,
                     include_args=kernel_args)
+
             if self.exclude_self:
                 kernel_args["target_to_source"] = actx.from_numpy(
                         np.arange(0, target_discr.ndofs, dtype=np.int64)
                         )
 
-            from sumpy.p2p import P2PMatrixGenerator
-            mat_gen = P2PMatrixGenerator(actx.context,
-                    source_kernels=(base_kernel,),
-                    target_kernels=(expr.target_kernel.get_base_kernel(),),
-                    exclude_self=self.exclude_self)
+            # }}}
 
             _, (mat,) = mat_gen(actx.queue,
                     targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray),
@@ -491,8 +522,7 @@ def map_int_g(self, expr):
                     **kernel_args)
             mat = actx.to_numpy(mat)
 
-            from meshmode.discretization import Discretization
-            if self.weighted and isinstance(source_discr, Discretization):
+            if self.weighted:
                 from pytential import bind, sym
                 waa = bind(self.places, sym.weights_and_area_elements(
                     source_discr.ambient_dim,
@@ -500,6 +530,8 @@ def map_int_g(self, expr):
 
                 mat[:, :] *= actx.to_numpy(flatten(waa, actx))
 
+            # }}}
+
             result += mat @ rec_density
 
         return result
@@ -511,11 +543,14 @@ def map_int_g(self, expr):
 
 class QBXClusterMatrixBuilder(ClusterMatrixBuilderBase):
     def __init__(self, queue, dep_expr, other_dep_exprs, dep_source, dep_discr,
-            places, tgt_src_index, context):
+            places, tgt_src_index, context,
+            exclude_self=False, _weighted=True):
         super().__init__(queue,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, tgt_src_index, context)
 
+        self.weighted = _weighted
+
     def map_int_g(self, expr):
         lpot_source = self.places.get_geometry(expr.source.geometry)
         source_discr = self.places.get_discretization(
@@ -523,33 +558,28 @@ def map_int_g(self, expr):
         target_discr = self.places.get_discretization(
                 expr.target.geometry, expr.target.discr_stage)
 
-        if source_discr is not target_discr:
+        if self.weighted and source_discr is not target_discr:
             raise NotImplementedError
 
+        actx = self.array_context
+
         result = 0
+        assert abs(expr.qbx_forced_limit) > 0
+
         for kernel, density in zip(expr.source_kernels, expr.densities):
             rec_density = self._inner_mapper.rec(density)
             if is_zero(rec_density):
                 continue
 
             if not np.isscalar(rec_density):
-                raise NotImplementedError
+                raise NotImplementedError("layer potentials on non-variables")
 
-            actx = self.array_context
-            kernel_args = _get_layer_potential_args(
-                    actx, self.places, expr, context=self.context)
-            local_expn = lpot_source.get_expansion_for_qbx_direct_eval(
-                    kernel.get_base_kernel(), (expr.target_kernel,))
+            # {{{ geometry
 
             from pytential.linalg import make_index_cluster_cartesian_product
             tgtindices, srcindices = make_index_cluster_cartesian_product(
                     actx, self.tgt_src_index)
 
-            from sumpy.qbx import LayerPotentialMatrixSubsetGenerator
-            mat_gen = LayerPotentialMatrixSubsetGenerator(actx.context, local_expn,
-                source_kernels=(kernel,), target_kernels=(expr.target_kernel,))
-
-            assert abs(expr.qbx_forced_limit) > 0
             from pytential import bind, sym
             radii = bind(self.places, sym.expansion_radii(
                 source_discr.ambient_dim,
@@ -559,6 +589,24 @@ def map_int_g(self, expr):
                 expr.qbx_forced_limit,
                 dofdesc=expr.target))(actx)
 
+            # }}}
+
+            # {{{ generator
+
+            local_expn = lpot_source.get_expansion_for_qbx_direct_eval(
+                    kernel.get_base_kernel(), (expr.target_kernel,))
+
+            from sumpy.qbx import LayerPotentialMatrixSubsetGenerator
+            mat_gen = LayerPotentialMatrixSubsetGenerator(actx.context, local_expn,
+                source_kernels=(kernel,), target_kernels=(expr.target_kernel,))
+
+            # }}}
+
+            # {{{ evaluate
+
+            kernel_args = _get_layer_potential_args(
+                    actx, self.places, expr, context=self.context)
+
             _, (mat,) = mat_gen(actx.queue,
                     targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray),
                     sources=flatten(source_discr.nodes(), actx, leaf_class=DOFArray),
@@ -568,13 +616,16 @@ def map_int_g(self, expr):
                     srcindices=srcindices,
                     **kernel_args)
 
-            waa = flatten(
-                    bind(self.places,
-                        sym.weights_and_area_elements(
-                            source_discr.ambient_dim,
-                            dofdesc=expr.source))(actx),
-                    actx)
-            mat *= waa[srcindices]
+            if self.weighted:
+                waa = flatten(
+                        bind(self.places,
+                            sym.weights_and_area_elements(
+                                source_discr.ambient_dim,
+                                dofdesc=expr.source))(actx),
+                        actx)
+                mat *= waa[srcindices]
+
+            # }}}
 
             result += actx.to_numpy(mat) * rec_density
 
@@ -584,13 +635,13 @@ def map_int_g(self, expr):
 class P2PClusterMatrixBuilder(ClusterMatrixBuilderBase):
     def __init__(self, queue, dep_expr, other_dep_exprs, dep_source, dep_discr,
             places, tgt_src_index, context,
-            weighted=False, exclude_self=False):
+            exclude_self=False, _weighted=False):
         super().__init__(queue,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, tgt_src_index, context)
 
-        self.weighted = weighted
         self.exclude_self = exclude_self
+        self.weighted = _weighted
 
     def map_int_g(self, expr):
         source_discr = self.places.get_discretization(
@@ -598,6 +649,9 @@ def map_int_g(self, expr):
         target_discr = self.places.get_discretization(
                 expr.target.geometry, expr.target.discr_stage)
 
+        actx = self.array_context
+        target_base_kernel = expr.target_kernel.get_base_kernel()
+
         result = 0
         for kernel, density in zip(expr.source_kernels, expr.densities):
             rec_density = self._inner_mapper.rec(density)
@@ -607,29 +661,44 @@ def map_int_g(self, expr):
             if not np.isscalar(rec_density):
                 raise NotImplementedError
 
-            # NOTE: copied from pytential.symbolic.primitives.IntG
+            # {{{ geometry
+
+            from pytential.linalg import make_index_cluster_cartesian_product
+            tgtindices, srcindices = make_index_cluster_cartesian_product(
+                    actx, self.tgt_src_index)
+
+            # }}}
+
+            # {{{ generator
+
             base_kernel = kernel.get_base_kernel()
+
+            from sumpy.p2p import P2PMatrixSubsetGenerator
+            mat_gen = P2PMatrixSubsetGenerator(actx.context,
+                    source_kernels=(base_kernel,),
+                    target_kernels=(target_base_kernel,),
+                    exclude_self=self.exclude_self)
+
+            # }}}
+
+            # {{{ evaluation
+
+            # {{{ kernel args
+
+            # NOTE: copied from pytential.symbolic.primitives.IntG
             kernel_args = base_kernel.get_args() + base_kernel.get_source_args()
             kernel_args = {arg.loopy_arg.name for arg in kernel_args}
 
-            actx = self.array_context
             kernel_args = _get_layer_potential_args(
                     actx, self.places, expr, context=self.context,
                     include_args=kernel_args)
+
             if self.exclude_self:
                 kernel_args["target_to_source"] = actx.from_numpy(
                         np.arange(0, target_discr.ndofs, dtype=np.int64)
                         )
 
-            from pytential.linalg import make_index_cluster_cartesian_product
-            tgtindices, srcindices = make_index_cluster_cartesian_product(
-                    actx, self.tgt_src_index)
-
-            from sumpy.p2p import P2PMatrixSubsetGenerator
-            mat_gen = P2PMatrixSubsetGenerator(actx.context,
-                    source_kernels=(base_kernel,),
-                    target_kernels=(expr.target_kernel.get_base_kernel(),),
-                    exclude_self=self.exclude_self)
+            # }}}
 
             _, (mat,) = mat_gen(actx.queue,
                     targets=flatten(target_discr.nodes(), actx, leaf_class=DOFArray),
diff --git a/setup.py b/setup.py
index 4a0cd2020..a0ce4fce7 100644
--- a/setup.py
+++ b/setup.py
@@ -33,10 +33,7 @@ def find_git_revision(tree_root):
     )
     (git_rev, _) = p.communicate()
 
-    import sys
-
     git_rev = git_rev.decode()
-
     git_rev = git_rev.rstrip()
 
     retcode = p.returncode
@@ -128,6 +125,8 @@ def write_git_revision(package_name):
         "sumpy>=2020.2beta1",
         "pyfmmlib>=2019.1.1",
 
+        "dataclasses; python_version<'3.7'",
+        "scipy>=1.2.0",
         "immutables",
     ],
 )
diff --git a/test/extra_int_eq_data.py b/test/extra_int_eq_data.py
index f7a188f1c..1f43baea4 100644
--- a/test/extra_int_eq_data.py
+++ b/test/extra_int_eq_data.py
@@ -28,6 +28,8 @@
 from pytential import sym
 from pytools import memoize_method
 from sumpy.kernel import Kernel
+from meshmode.mesh import MeshElementGroup, SimplexElementGroup
+from meshmode.discretization import ElementGroupFactory
 from meshmode.discretization.poly_element import InterpolatoryQuadratureGroupFactory
 
 import logging
@@ -98,7 +100,8 @@ class IntegralEquationTestCase:
     source_ovsmp: int = 4
     target_order: Optional[int] = None
     use_refinement: bool = True
-    group_factory_cls: type = InterpolatoryQuadratureGroupFactory
+    group_cls: Type[MeshElementGroup] = SimplexElementGroup
+    group_factory_cls: ElementGroupFactory = InterpolatoryQuadratureGroupFactory
 
     # fmm
     fmm_backend: str = "sumpy"
@@ -183,7 +186,9 @@ def get_mesh(self, resolution, mesh_order):
 
     def get_discretization(self, actx, resolution, mesh_order):
         mesh = self.get_mesh(resolution, mesh_order)
+        return self._get_discretization(actx, mesh)
 
+    def _get_discretization(self, actx, mesh):
         from meshmode.discretization import Discretization
         return Discretization(actx, mesh,
                 self.group_factory_cls(self.target_order))
@@ -395,7 +400,8 @@ class SphereTestCase(IntegralEquationTestCase):
     def get_mesh(self, resolution, mesh_order):
         from meshmode.mesh.generation import generate_sphere
         return generate_sphere(self.radius, mesh_order,
-                uniform_refinement_rounds=resolution)
+                uniform_refinement_rounds=resolution,
+                group_cls=self.group_cls)
 
 
 @dataclass
@@ -431,6 +437,52 @@ def get_mesh(self, resolution, mesh_order):
         return mesh
 
 
+@dataclass
+class GMSHSphereTestCase(SphereTestCase):
+    name: str = "gmsphere"
+
+    radius: float = 1.5
+    resolutions: List[float] = field(default_factory=lambda: [0.4])
+
+    def get_mesh(self, resolution, mesh_order):
+        from meshmode.mesh.io import ScriptSource
+        from meshmode.mesh import SimplexElementGroup, TensorProductElementGroup
+        if issubclass(self.group_cls, SimplexElementGroup):
+            script = ScriptSource(
+                """
+                Mesh.CharacteristicLengthMax = %(length)g;
+                Mesh.HighOrderOptimize = 1;
+                Mesh.Algorithm = 1;
+
+                SetFactory("OpenCASCADE");
+                Sphere(1) = {0, 0, 0, %(radius)g};
+                """ % {"length": resolution, "radius": self.radius},
+                "geo")
+        elif issubclass(self.group_cls, TensorProductElementGroup):
+            script = ScriptSource(
+                """
+                Mesh.CharacteristicLengthMax = %(length)g;
+                Mesh.HighOrderOptimize = 1;
+                Mesh.Algorithm = 6;
+
+                SetFactory("OpenCASCADE");
+                Sphere(1) = {0, 0, 0, %(radius)g};
+                Recombine Surface "*" = 0.0001;
+                """ % {"length": resolution, "radius": self.radius},
+                "geo")
+        else:
+            raise TypeError
+
+        from meshmode.mesh.io import generate_gmsh
+        return generate_gmsh(
+                script,
+                order=mesh_order,
+                dimensions=2,
+                force_ambient_dim=3,
+                target_unit="MM",
+                )
+
+
 @dataclass
 class TorusTestCase(IntegralEquationTestCase):
     name: str = "torus"
@@ -450,7 +502,8 @@ class TorusTestCase(IntegralEquationTestCase):
 
     def get_mesh(self, resolution, mesh_order):
         from meshmode.mesh.generation import generate_torus
-        mesh = generate_torus(self.r_major, self.r_minor, order=mesh_order)
+        mesh = generate_torus(self.r_major, self.r_minor, order=mesh_order,
+                              group_cls=self.group_cls)
 
         from meshmode.mesh.refinement import refine_uniformly
         return refine_uniformly(mesh, resolution)
diff --git a/test/extra_matrix_data.py b/test/extra_matrix_data.py
index 54e7523b0..62deefd76 100644
--- a/test/extra_matrix_data.py
+++ b/test/extra_matrix_data.py
@@ -1,19 +1,29 @@
 from dataclasses import dataclass
-from typing import Optional
+from typing import Any, Callable, Optional
 
 import numpy as np
 
 from pytools.obj_array import make_obj_array
 
 from pytential import sym
+from pytential.symbolic.dof_desc import DOFGranularities
 
 import extra_int_eq_data as extra
 
 
 # {{{ MatrixTestCase
 
+class _NoArgSentinel:
+    pass
+
+
 @dataclass
 class MatrixTestCaseMixin:
+    # operators
+    op_type: str = "scalar"
+    # disable fmm for matrix tests
+    fmm_backend: Optional[str] = None
+
     # partitioning
     approx_cluster_count: int = 10
     max_particles_in_box: Optional[int] = None
@@ -21,14 +31,17 @@ class MatrixTestCaseMixin:
     index_sparsity_factor: float = 1.0
 
     # proxy
-    proxy_radius_factor: float = 1.1
-    proxy_approx_count: float = 32
+    proxy_radius_factor: float = 1.0
+    proxy_approx_count: Optional[float] = None
 
-    # operators
-    op_type: str = "scalar"
+    # skeletonization
+    id_eps: float = 1.0e-8
+    skel_discr_stage: DOFGranularities = sym.QBX_SOURCE_STAGE2
 
-    # disable fmm for matrix tests
-    fmm_backend: Optional[str] = None
+    weighted_proxy: Optional[bool] = None
+    proxy_source_cluster_builder: Callable[..., Any] = None
+    proxy_target_cluster_builder: Callable[..., Any] = None
+    neighbor_cluster_builder: Callable[..., Any] = None
 
     def get_cluster_index(self, actx, places, dofdesc=None):
         if dofdesc is None:
@@ -68,36 +81,54 @@ def get_tgt_src_cluster_index(self, actx, places, dofdesc=None):
         cindex = self.get_cluster_index(actx, places, dofdesc=dofdesc)
         return TargetAndSourceClusterList(cindex, cindex)
 
-    def get_operator(self, ambient_dim, qbx_forced_limit="avg"):
+    def get_operator(self, ambient_dim, qbx_forced_limit=_NoArgSentinel):
         knl = self.knl_class(ambient_dim)
-        kwargs = self.knl_sym_kwargs.copy()
-        kwargs["qbx_forced_limit"] = qbx_forced_limit
+
+        single_kwargs = self.knl_sym_kwargs.copy()
+        single_kwargs["qbx_forced_limit"] = (
+                +1 if qbx_forced_limit is _NoArgSentinel else qbx_forced_limit)
+        double_kwargs = self.knl_sym_kwargs.copy()
+        double_kwargs["qbx_forced_limit"] = (
+                "avg" if qbx_forced_limit is _NoArgSentinel else qbx_forced_limit)
 
         if self.op_type in ("scalar", "single"):
             sym_u = sym.var("u")
-            sym_op = sym.S(knl, sym_u, **kwargs)
+            sym_op = sym.S(knl, sym_u, **single_kwargs)
+
         elif self.op_type == "double":
             sym_u = sym.var("u")
-            sym_op = sym.D(knl, sym_u, **kwargs)
+            sym_op = sym.D(knl, sym_u, **double_kwargs)
+
+            if double_kwargs["qbx_forced_limit"] == "avg":
+                sym_op = 0.5 * self.side * sym_u + sym_op
+
         elif self.op_type == "scalar_mixed":
             sym_u = sym.var("u")
-            sym_op = sym.S(knl, 0.3 * sym_u, **kwargs) \
-                    + sym.D(knl, 0.5 * sym_u, **kwargs)
+            sym_op = (
+                    sym.S(knl, 0.3 * sym_u, **single_kwargs)
+                    + sym.D(knl, 0.5 * sym_u, **double_kwargs))
+
+            if double_kwargs["qbx_forced_limit"] == "avg":
+                sym_op = 0.25 * self.side * sym_u + sym_op
+
         elif self.op_type == "vector":
             sym_u = sym.make_sym_vector("u", ambient_dim)
-
             sym_op = make_obj_array([
-                sym.Sp(knl, sym_u[0], **kwargs)
-                + sym.D(knl, sym_u[1], **kwargs),
-                sym.S(knl, 0.4 * sym_u[0], **kwargs)
-                + 0.3 * sym.D(knl, sym_u[0], **kwargs)
+                sym.Sp(knl, sym_u[0], **double_kwargs)
+                + sym.D(knl, sym_u[1], **double_kwargs),
+                sym.S(knl, 0.4 * sym_u[0], **single_kwargs)
+                + 0.3 * sym.D(knl, sym_u[0], **double_kwargs)
                 ])
+
+            if double_kwargs["qbx_forced_limit"] == "avg":
+                sym_op = 0.5 * self.side * make_obj_array([
+                    -sym_u[0] + sym_u[1],
+                    0.3 * sym_u[0]
+                    ]) + sym_op
+
         else:
             raise ValueError(f"unknown operator type: '{self.op_type}'")
 
-        if self.side is not None:
-            sym_op = 0.5 * self.side * sym_u + sym_op
-
         return sym_u, sym_op
 
 
@@ -110,4 +141,9 @@ class CurveTestCase(MatrixTestCaseMixin, extra.CurveTestCase):
 class TorusTestCase(MatrixTestCaseMixin, extra.TorusTestCase):
     pass
 
+
+@dataclass
+class GMSHSphereTestCase(MatrixTestCaseMixin, extra.GMSHSphereTestCase):
+    pass
+
 # }}}
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index a0bbfc3fd..3acb87aea 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -69,11 +69,6 @@ def plot_proxy_geometry(
                 flatten(discr.nodes(), actx)
                 ).reshape(ambient_dim, -1)
 
-        if pxy is not None:
-            proxies = np.stack(pxy.points)
-            pxycenters = np.stack(pxy.centers)
-            pxystarts = pxy.pxyindex.starts
-
         if with_qbx_centers:
             ci = actx.to_numpy(flatten(
                 bind(places, sym.expansion_centers(ambient_dim, -1))(actx),
@@ -102,8 +97,11 @@ def plot_proxy_geometry(
                     ax.add_artist(c)
 
             if pxy is not None:
+                pxystarts = pxy.pxyindex.starts
+
                 ipxy = np.s_[pxystarts[i]:pxystarts[i + 1]]
-                pt.plot(proxies[0, ipxy], proxies[1, ipxy], "o", ms=2.0)
+                pt.plot(pxy.points[0, ipxy], pxy.points[1, ipxy], "o", ms=2.0)
+                pt.text(*pxy.centers[:, i], f"{i}", fontsize=18)
 
             if nbrindex is not None:
                 inbr = nbrindex.cluster_indices(i)
@@ -156,12 +154,11 @@ def plot_proxy_geometry(
 
             from meshmode.mesh.generation import generate_sphere
             ref_mesh = generate_sphere(1, 4, uniform_refinement_rounds=1)
-            pxycenters = np.stack(pxy.centers)
 
             for i in range(cindex.nclusters):
                 mesh = affine_map(ref_mesh,
                     A=pxy.radii[i],
-                    b=pxycenters[:, i].reshape(-1))
+                    b=pxy.centers[:, i].reshape(-1))
 
                 mesh = merge_disjoint_meshes([mesh, discr.mesh])
                 discr = Discretization(actx, mesh,
@@ -268,8 +265,6 @@ def test_proxy_generator(actx_factory, case,
             radius_factor=case.proxy_radius_factor)
     pxy = generator(actx, places.auto_source, cindex).to_numpy(actx)
 
-    pxypoints = np.stack(pxy.points)
-    pxycenters = np.stack(pxy.centers)
     sources = actx.to_numpy(
             flatten(density_discr.nodes(), actx)
             ).reshape(places.ambient_dim, -1)
@@ -279,11 +274,11 @@ def test_proxy_generator(actx_factory, case,
         ipxy = pxy.pxyindex.cluster_indices(i)
 
         # check all the proxy points are inside the radius
-        r = la.norm(pxypoints[:, ipxy] - pxycenters[:, i].reshape(-1, 1), axis=0)
+        r = la.norm(pxy.points[:, ipxy] - pxy.centers[:, i].reshape(-1, 1), axis=0)
         p_error = la.norm(r - pxy.radii[i])
 
         # check all sources are inside the radius too
-        r = la.norm(sources[:, isrc] - pxycenters[:, i].reshape(-1, 1), axis=0)
+        r = la.norm(sources[:, isrc] - pxy.centers[:, i].reshape(-1, 1), axis=0)
         n_error = la.norm(r - pxy.radii[i], np.inf)
 
         assert p_error < 1.0e-14, f"cluster {i}"
@@ -345,7 +340,6 @@ def test_neighbor_points(actx_factory, case,
     nbrindex = gather_cluster_neighbor_points(actx, pxy)
 
     pxy = pxy.to_numpy(actx)
-    pxycenters = np.stack(pxy.centers)
     nodes = actx.to_numpy(
             flatten(density_discr.nodes(), actx)
             ).reshape(places.ambient_dim, -1)
@@ -358,7 +352,7 @@ def test_neighbor_points(actx_factory, case,
         assert not np.any(np.isin(inbr, isrc))
 
         # check that the neighbors are all within the proxy radius
-        r = la.norm(nodes[:, inbr] - pxycenters[:, i].reshape(-1, 1), axis=0)
+        r = la.norm(nodes[:, inbr] - pxy.centers[:, i].reshape(-1, 1), axis=0)
         assert np.all((r - pxy.radii[i]) < 0.0)
 
     # }}}
diff --git a/test/test_linalg_skeletonization.py b/test/test_linalg_skeletonization.py
new file mode 100644
index 000000000..292ebba63
--- /dev/null
+++ b/test/test_linalg_skeletonization.py
@@ -0,0 +1,460 @@
+__copyright__ = "Copyright (C) 2018 Alexandru Fikl"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from dataclasses import replace
+from functools import partial
+import pytest
+
+import numpy as np
+import numpy.linalg as la
+
+from pytential import sym
+from pytential import GeometryCollection
+
+from meshmode.mesh.generation import ellipse, NArmedStarfish
+
+from meshmode import _acf           # noqa: F401
+from arraycontext import pytest_generate_tests_for_array_contexts
+from meshmode.array_context import PytestPyOpenCLArrayContextFactory
+
+import extra_matrix_data as extra
+import logging
+logger = logging.getLogger(__name__)
+
+pytest_generate_tests = pytest_generate_tests_for_array_contexts([
+    PytestPyOpenCLArrayContextFactory,
+    ])
+
+SKELETONIZE_TEST_CASES = [
+        extra.CurveTestCase(
+            name="ellipse",
+            op_type="scalar",
+            target_order=7,
+            curve_fn=partial(ellipse, 3.0)),
+        extra.CurveTestCase(
+            name="starfish",
+            op_type="scalar",
+            target_order=4,
+            curve_fn=NArmedStarfish(5, 0.25),
+            resolutions=[32]),
+        extra.TorusTestCase(
+            target_order=4,
+            op_type="scalar",
+            resolutions=[0])
+        ]
+
+
+def _plot_skeleton_with_proxies(name, sources, pxy, srcindex, sklindex):
+    import matplotlib.pyplot as pt
+
+    fig, ax = pt.subplots(1, figsize=(10, 10), dpi=300)
+    ax.plot(sources[0][srcindex.indices], sources[1][srcindex.indices],
+            "ko", alpha=0.5)
+
+    for i in range(srcindex.nclusters):
+        iskl = sklindex.cluster_indices(i)
+        pt.plot(sources[0][iskl], sources[1][iskl], "o")
+
+        c = pt.Circle(pxy.centers[:, i], pxy.radii[i], color="k", alpha=0.1)
+        ax.add_artist(c)
+        ax.text(*pxy.centers[:, i], f"{i}",
+                fontweight="bold", ha="center", va="center")
+
+    ax.set_aspect("equal")
+    ax.set_xlim([-1.5, 1.5])
+    ax.set_ylim([-1.5, 1.5])
+    fig.savefig(f"test_skeletonize_by_proxy_{name}")
+    pt.close(fig)
+
+
+# {{{ test_skeletonize_symbolic
+
+@pytest.mark.parametrize("case", [
+    # Laplace
+    replace(SKELETONIZE_TEST_CASES[0], op_type="single", knl_class_or_helmholtz_k=0),
+    replace(SKELETONIZE_TEST_CASES[0], op_type="double", knl_class_or_helmholtz_k=0),
+    # Helmholz
+    replace(SKELETONIZE_TEST_CASES[0], op_type="single", knl_class_or_helmholtz_k=5),
+    replace(SKELETONIZE_TEST_CASES[0], op_type="double", knl_class_or_helmholtz_k=5),
+    ])
+def test_skeletonize_symbolic(actx_factory, case, visualize=False):
+    """Tests that the symbolic manipulations work for different kernels / IntGs."""
+    actx = actx_factory()
+
+    if visualize:
+        logging.basicConfig(level=logging.INFO)
+    logger.info("\n%s", case)
+
+    # {{{ geometry
+
+    dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage)
+    qbx = case.get_layer_potential(actx, case.resolutions[0], case.target_order)
+    places = GeometryCollection(qbx, auto_where=dd)
+
+    density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
+    tgt_src_index = case.get_tgt_src_cluster_index(actx, places, dd)
+
+    logger.info("nclusters %3d ndofs %7d",
+            tgt_src_index.nclusters, density_discr.ndofs)
+
+    # }}}
+
+    # {{{ wranglers
+
+    from pytential.linalg import QBXProxyGenerator
+    from pytential.linalg.skeletonization import make_skeletonization_wrangler
+    proxy_generator = QBXProxyGenerator(places,
+            radius_factor=case.proxy_radius_factor,
+            approx_nproxy=case.proxy_approx_count)
+
+    sym_u, sym_op = case.get_operator(places.ambient_dim)
+    wrangler = make_skeletonization_wrangler(places, sym_op, sym_u,
+            domains=None,
+            context=case.knl_concrete_kwargs,
+            _weighted_proxy=case.weighted_proxy,
+            _proxy_source_cluster_builder=case.proxy_source_cluster_builder,
+            _proxy_target_cluster_builder=case.proxy_target_cluster_builder,
+            _neighbor_cluster_builder=case.neighbor_cluster_builder)
+
+    # }}}
+
+    from pytential.linalg.skeletonization import (
+            _skeletonize_block_by_proxy_with_mats)
+    _skeletonize_block_by_proxy_with_mats(
+        actx, 0, 0, places, proxy_generator, wrangler, tgt_src_index,
+        id_eps=1.0e-8
+    )
+
+# }}}
+
+
+# {{{ test_skeletonize_by_proxy
+
+
+def run_skeletonize_by_proxy(actx, case, resolution,
+                             places=None, mat=None,
+                             force_assert=True,
+                             suffix="", visualize=False):
+    from pytools import ProcessTimer
+
+    # {{{ geometry
+
+    dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage)
+    if places is None:
+        qbx = case.get_layer_potential(actx, resolution, case.target_order)
+        places = GeometryCollection(qbx, auto_where=dd)
+
+    density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
+    tgt_src_index = case.get_tgt_src_cluster_index(actx, places, dd)
+
+    logger.info("nclusters %3d ndofs %7d",
+            tgt_src_index.nclusters, density_discr.ndofs)
+
+    # }}}
+
+    # {{{ wranglers
+
+    proxy_approx_count = case.proxy_approx_count
+    if proxy_approx_count is None:
+        # FIXME: replace this with an estimate from an error model
+        proxy_approx_count = int(1.5 * np.max(np.diff(tgt_src_index.targets.starts)))
+
+    logger.info("proxy factor %.2f count %7d",
+                case.proxy_radius_factor, proxy_approx_count)
+
+    from pytential.linalg import QBXProxyGenerator
+    from pytential.linalg.skeletonization import make_skeletonization_wrangler
+    proxy_generator = QBXProxyGenerator(places,
+            radius_factor=case.proxy_radius_factor,
+            approx_nproxy=proxy_approx_count)
+
+    sym_u, sym_op = case.get_operator(places.ambient_dim)
+    wrangler = make_skeletonization_wrangler(places, sym_op, sym_u,
+            domains=None,
+            context=case.knl_concrete_kwargs,
+            _weighted_proxy=case.weighted_proxy,
+            _proxy_source_cluster_builder=case.proxy_source_cluster_builder,
+            _proxy_target_cluster_builder=case.proxy_target_cluster_builder,
+            _neighbor_cluster_builder=case.neighbor_cluster_builder)
+
+    # }}}
+
+    # {{{ check proxy id decomposition
+
+    if mat is None:
+        from pytential.symbolic.execution import _prepare_expr
+        expr = _prepare_expr(places, sym_op)
+
+        # dense matrix
+        from pytential.symbolic.matrix import MatrixBuilder
+        with ProcessTimer() as p:
+            mat = MatrixBuilder(
+                actx,
+                dep_expr=sym_u,
+                other_dep_exprs=[],
+                dep_source=qbx,
+                dep_discr=density_discr,
+                places=places,
+                context=case.knl_concrete_kwargs,
+                _weighted=wrangler.weighted_sources)(expr)
+
+        logger.info("[time] dense matrix construction: %s", p)
+
+    # skeleton
+    from pytential.linalg.skeletonization import \
+            _skeletonize_block_by_proxy_with_mats
+    with ProcessTimer() as p:
+        L, R, skel_tgt_src_index, src, tgt = _skeletonize_block_by_proxy_with_mats(
+                actx, 0, 0, places, proxy_generator, wrangler, tgt_src_index,
+                id_eps=case.id_eps,
+                max_particles_in_box=case.max_particles_in_box)
+
+    logger.info("[time] skeletonization by proxy: %s", p)
+
+    for i in range(tgt_src_index.nclusters):
+        # targets (rows)
+        bi = np.searchsorted(
+            tgt_src_index.targets.cluster_indices(i),
+            skel_tgt_src_index.targets.cluster_indices(i),
+            )
+
+        A = np.hstack(tgt[i])
+        S = A[bi, :]
+        tgt_error = la.norm(A - L[i, i] @ S, ord=2) / la.norm(A, ord=2)
+
+        # sources (columns)
+        bj = np.searchsorted(
+            tgt_src_index.sources.cluster_indices(i),
+            skel_tgt_src_index.sources.cluster_indices(i),
+            )
+
+        A = np.vstack(src[i])
+        S = A[:, bj]
+        src_error = la.norm(A - S @ R[i, i], ord=2) / la.norm(A, ord=2)
+
+        logger.info("[%04d] id_eps %.5e src %.5e tgt %.5e rank %d/%d",
+                i, case.id_eps,
+                src_error, tgt_error, R[i, i].shape[0], R[i, i].shape[1])
+
+        if force_assert:
+            assert src_error < 6 * case.id_eps
+            assert tgt_error < 6 * case.id_eps
+
+    # }}}
+
+    # {{{ check skeletonize
+
+    from pytential.linalg import SkeletonizationResult
+    skeleton = SkeletonizationResult(
+            L=L, R=R,
+            tgt_src_index=tgt_src_index,
+            skel_tgt_src_index=skel_tgt_src_index)
+
+    from pytential.linalg.utils import (
+            cluster_skeletonization_error, skeletonization_error)
+
+    with ProcessTimer() as p:
+        blk_err_l, blk_err_r = cluster_skeletonization_error(
+                mat, skeleton, ord=2, relative=True)
+
+        err_l = la.norm(blk_err_l, np.inf)
+        err_r = la.norm(blk_err_r, np.inf)
+
+    logger.info("[time] cluster error: %s", p)
+
+    if density_discr.ndofs > 4096:
+        err_f = max(err_l, err_r)
+    else:
+        with ProcessTimer() as p:
+            err_f = skeletonization_error(mat, skeleton, ord=2, relative=True)
+
+        logger.info("[time] full error: %s", p)
+
+    # FIXME: why is the 3D error so large?
+    rtol = 10**places.ambient_dim * case.id_eps
+
+    logger.info("error: id_eps %.5e R %.5e L %.5e F %.5e (rtol %.5e)",
+            case.id_eps, err_r, err_l, err_f, rtol)
+
+    if force_assert:
+        assert err_l < rtol
+        assert err_r < rtol
+        assert err_f < rtol
+
+    # }}}
+
+    # {{{ visualize
+
+    if visualize:
+        import matplotlib.pyplot as pt
+        pt.imshow(np.log10(blk_err_l + 1.0e-16))
+        pt.colorbar()
+        pt.savefig(f"test_skeletonize_by_proxy_err_l{suffix}")
+        pt.clf()
+
+        pt.imshow(np.log10(blk_err_r + 1.0e-16))
+        pt.colorbar()
+        pt.savefig(f"test_skeletonize_by_proxy_err_r{suffix}")
+        pt.clf()
+
+        if places.ambient_dim == 2:
+            pxy = proxy_generator(
+                    actx, wrangler.domains[0], tgt_src_index.targets
+                    ).to_numpy(actx)
+
+            from arraycontext import flatten
+            sources = actx.to_numpy(
+                    flatten(density_discr.nodes(), actx)
+                    ).reshape(places.ambient_dim, -1)
+
+            _plot_skeleton_with_proxies(f"sources{suffix}", sources, pxy,
+                    tgt_src_index.sources, skel_tgt_src_index.sources)
+            _plot_skeleton_with_proxies(f"targets{suffix}", sources, pxy,
+                    tgt_src_index.targets, skel_tgt_src_index.targets)
+        else:
+            # TODO: would be nice to figure out a way to visualize some of these
+            # skeletonization results in 3D. Probably need to teach the
+            # visualizers to spit out point clouds
+            pass
+
+    # }}}
+
+    return err_f, (places, mat)
+
+
+@pytest.mark.parametrize("case", SKELETONIZE_TEST_CASES)
+def test_skeletonize_by_proxy(actx_factory, case, visualize=False):
+    """Test single-level level skeletonization accuracy."""
+    import scipy.linalg.interpolative as sli    # pylint:disable=no-name-in-module
+    sli.seed(42)
+
+    actx = actx_factory()
+
+    if visualize:
+        logging.basicConfig(level=logging.INFO)
+
+    case = replace(case, approx_cluster_count=6, id_eps=1.0e-8)
+    logger.info("\n%s", case)
+
+    run_skeletonize_by_proxy(actx, case, case.resolutions[0], visualize=visualize)
+
+# }}}
+
+
+# {{{ test_skeletonize_by_proxy_convergence
+
+CONVERGENCE_TEST_CASES = [
+        replace(SKELETONIZE_TEST_CASES[0], resolutions=[256]),
+        replace(SKELETONIZE_TEST_CASES[1], resolutions=[256]),
+        replace(SKELETONIZE_TEST_CASES[2], resolutions=[0]),
+        extra.GMSHSphereTestCase(
+            target_order=8,
+            op_type="scalar",
+            resolutions=[0.4]),
+        ]
+
+
+@pytest.mark.parametrize("case", [
+    CONVERGENCE_TEST_CASES[0],
+    CONVERGENCE_TEST_CASES[1],
+    pytest.param(CONVERGENCE_TEST_CASES[2], marks=pytest.mark.slowtest),
+    pytest.param(CONVERGENCE_TEST_CASES[3], marks=pytest.mark.slowtest),
+    ])
+def test_skeletonize_by_proxy_convergence(
+        actx_factory, case, weighted=True,
+        visualize=False):
+    """Test single-level level skeletonization accuracy."""
+    import scipy.linalg.interpolative as sli    # pylint:disable=no-name-in-module
+    sli.seed(42)
+
+    actx = actx_factory()
+
+    if visualize:
+        logging.basicConfig(level=logging.INFO)
+
+    if case.ambient_dim == 2:
+        nclusters = 6
+    else:
+        nclusters = 12
+
+    case = replace(case, approx_cluster_count=nclusters)
+    logger.info("\n%s", case)
+
+    id_eps = 10.0 ** (-np.arange(2, 16))
+    rec_error = np.zeros_like(id_eps)
+
+    from pytools.convergence import EOCRecorder
+    eoc = EOCRecorder()
+
+    r = case.resolutions[-1]
+    w = "ww" if weighted or weighted is None else "nw"
+    if isinstance(r, int):
+        suffix = f"_{case.name}_{r:06d}_{w}"
+    else:
+        suffix = f"_{case.name}_{r:.3f}_{w}"
+
+    was_zero = False
+    places = mat = None
+    for i in range(id_eps.size):
+        case = replace(case, id_eps=id_eps[i], weighted_proxy=weighted)
+
+        if not was_zero:
+            rec_error[i], (places, mat) = run_skeletonize_by_proxy(
+                actx, case, r, places=places, mat=mat,
+                force_assert=False,
+                suffix=f"{suffix}_{i:04d}", visualize=False)
+
+        was_zero = rec_error[i] == 0.0
+        eoc.add_data_point(id_eps[i], rec_error[i])
+
+    logger.info("\n%s", eoc.pretty_print())
+
+    if visualize:
+        import matplotlib.pyplot as pt
+        fig = pt.figure(figsize=(10, 10), dpi=300)
+        ax = fig.gca()
+
+        ax.loglog(id_eps, id_eps, "k--")
+        ax.loglog(id_eps, rec_error)
+
+        ax.grid(True)
+        ax.set_xlabel(r"$\epsilon_{id}$")
+        ax.set_ylabel("$Error$")
+        ax.set_title(case.name)
+
+        fig.savefig(f"test_skeletonize_by_proxy_convergence{suffix}")
+        pt.close(fig)
+
+    assert eoc.order_estimate() > 0.9
+
+# }}}
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: fdm=marker
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 4ae84b340..46fb3dd1e 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -235,6 +235,10 @@ def test_build_matrix_conditioning(actx_factory, side, op_type, visualize=False)
             places.ambient_dim,
             qbx_forced_limit="avg")
 
+    if case.op_type == "single":
+        # NOTE: add identity to make sure this is well-conditioned too
+        sym_op = 0.5 * case.side * sym_u + sym_op
+
     mat = actx.to_numpy(build_matrix(
         actx, places, sym_op, sym_u,
         context=case.knl_concrete_kwargs
@@ -496,6 +500,7 @@ def test_build_matrix_fixed_stage(actx_factory,
 
     # qbx
     from pytential.symbolic import matrix
+
     mat = matrix.MatrixBuilder(
             actx, **kwargs)(sym_prep_op)
     flat_cluster_mat = matrix.QBXClusterMatrixBuilder(