diff --git a/Project.toml b/Project.toml
index 6615069..88eeacf 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,24 +1,22 @@
 name = "AutoBZCore"
 uuid = "66bd3e16-1600-45cf-8f55-0b550710682b"
 authors = ["lxvm <lorenzo@vanmunoz.com>"]
-version = "0.3.8"
+version = "0.4.0"
 
 [deps]
 AutoSymPTR = "78a0c066-08f1-49a8-82f0-b29cd485e1d3"
 ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
+CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
 FourierSeriesEvaluators = "2a892dea-6eef-4bb5-9d1c-de966c9f6db5"
 FunctionWrappers = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
 HCubature = "19dc6840-f33b-545b-b366-655c7e3ffd49"
 IteratedIntegration = "3ecdc4d6-ee34-4049-885a-a4e3631db98b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 
 [weakdeps]
 AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
-HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 Polyhedra = "67491407-f73d-577b-9b50-8179a7c68029"
 SymmetryReduceBZ = "49a35663-c880-4242-bebb-1ec8c0fa8046"
 Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
@@ -26,7 +24,6 @@ WannierIO = "cb1bc77f-5443-4951-af9f-05b616a3e422"
 
 [extensions]
 AtomsBaseExt = "AtomsBase"
-HDF5Ext = "HDF5"
 SymmetryReduceBZExt = ["SymmetryReduceBZ", "Polyhedra"]
 UnitfulExt = "Unitful"
 WannierIOExt = "WannierIO"
@@ -36,17 +33,15 @@ Aqua = "0.7"
 AtomsBase = "0.3"
 AutoSymPTR = "0.4"
 ChunkSplitters = "2"
+CommonSolve = "0.2"
 Elliptic = "1"
 FourierSeriesEvaluators = "1"
 FunctionWrappers = "1"
 GeneralizedGaussianQuadrature = "0.1"
 HCubature = "1.4"
-HDF5 = "0.16.15"
 IteratedIntegration = "0.5"
 LinearAlgebra = "1.9"
-Printf = "1.9"
 QuadGK = "2.6"
-Reexport = "1"
 StaticArrays = "1"
 SymmetryReduceBZ = "0.2"
 WannierIO = "0.1,0.2"
@@ -56,9 +51,8 @@ julia = "1.9"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
 Elliptic = "b305315f-e792-5b7a-8f41-49f472929428"
-HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 GeneralizedGaussianQuadrature = "958e0c08-f14d-42e8-a0ab-84193b3783f2"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 Polyhedra = "67491407-f73d-577b-9b50-8179a7c68029"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -68,4 +62,4 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
 WannierIO = "cb1bc77f-5443-4951-af9f-05b616a3e422"
 
 [targets]
-test = ["Aqua", "Elliptic", "LinearAlgebra", "GeneralizedGaussianQuadrature", "Test", "HDF5", "StaticArrays", "OffsetArrays", "SymmetryReduceBZ", "Polyhedra"]
+test = ["Aqua", "Elliptic", "LinearAlgebra", "GeneralizedGaussianQuadrature", "Test", "StaticArrays", "OffsetArrays", "SymmetryReduceBZ", "Polyhedra"]
diff --git a/aps_example/Project.toml b/aps_example/Project.toml
index fd3a1f7..acc2155 100644
--- a/aps_example/Project.toml
+++ b/aps_example/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 AutoBZCore = "66bd3e16-1600-45cf-8f55-0b550710682b"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+FourierSeriesEvaluators = "2a892dea-6eef-4bb5-9d1c-de966c9f6db5"
 HChebInterp = "78faba9b-a54b-441f-8118-62407cbe4e59"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
diff --git a/aps_example/README.md b/aps_example/README.md
index cae6563..38d6fbe 100644
--- a/aps_example/README.md
+++ b/aps_example/README.md
@@ -1,4 +1,6 @@
-[Scripts updated for AutoBZCore v0.3]
+[Scripts in development for AutoBZCore v0.4]
+
+Warning: this example may not work well using Julia 1.10. 
 
 Hello, to reproduce the code example in these
 [slides](https://web.mit.edu/lxvm/www/slides/autobz_aps.pdf) follow these steps:
diff --git a/aps_example/aps_example.jl b/aps_example/aps_example.jl
index b18a316..cf65d93 100644
--- a/aps_example/aps_example.jl
+++ b/aps_example/aps_example.jl
@@ -20,36 +20,73 @@ for (i, h, n) in zip(hrdat.Rvectors, hrdat.H, hrdat.Rdegens)
     H_R[CartesianIndex(Tuple(i))] = h / n
 end
 
-using AutoBZCore, LinearAlgebra
+using FourierSeriesEvaluators, LinearAlgebra
 
-bz = load_bz(CubicSymIBZ(), "svo.wout")
-# bz = load_bz(IBZ(), "svo.wout") # works with SymmetryReduceBZ.jl installed
 h = FourierSeries(H_R, period=1.0)
 
 η = 1e-2                    # 10 meV (scattering amplitude)
-dos_integrand(h_k::FourierValue, η, ω) = -imag(tr(inv((ω+im*η)*I - h_k.s)))/pi
-integrand = FourierIntegrand(dos_integrand, h, η)
+ω_min = 10
+ω_max = 15
+p0 = (; η, ω=(ω_min + ω_max)/2) # initial parameters
+# BUG cannot redefine this function without breaking functionwrappers in v1.10
+# https://github.com/JuliaLang/julia/issues/52635#issuecomment-2150808569
+greens_function(k, h_k, (; η, ω)) = tr(inv((ω+im*η)*I - h_k))
+prototype = let k = FourierSeriesEvaluators.period(h)
+    greens_function(k, h(k), p0)
+end
+
+using AutoBZCore
+bz = load_bz(CubicSymIBZ(), "svo.wout")
+# bz = load_bz(IBZ(), "svo.wout") # works with SymmetryReduceBZ.jl installed
 
-dos_solver_iai = IntegralSolver(integrand, bz, IAI(); abstol=1e-3)
-dos_solver_ptr = IntegralSolver(integrand, bz, PTR(npt=100); abstol=1e-3)
+integrand = FourierIntegralFunction(greens_function, h, prototype)
+prob_dos = AutoBZProblem(integrand, bz, p0; abstol=1e-3)
 
 using HChebInterp
 
-dos_iai = hchebinterp(dos_solver_iai, 10, 15; atol=1e-2)
-dos_ptr = hchebinterp(dos_solver_ptr, 10, 15; atol=1e-2)
+cheb_order = 15
+
+function dos_solver(prob, alg)
+    solver = init(prob, alg)
+    ω -> begin
+        solver.p = (; solver.p..., ω)
+        solve!(solver).value
+    end
+end
+function threaded_dos_solver(prob, alg; nthreads=min(cheb_order, Threads.nthreads()))
+    solvers = [init(prob, alg) for _ in 1:nthreads]
+    BatchFunction() do ωs
+        out = Vector{typeof(prototype)}(undef, length(ωs))
+        Threads.@threads for i in 1:nthreads
+            solver = solvers[i]
+            for j in i:nthreads:length(ωs)
+                ω = ωs[j]
+                solver.p = (; solver.p..., ω)
+                out[j] = solve!(solver).value
+            end
+        end
+        return out
+    end
+end
+
+dos_solver_iai = dos_solver(prob_dos, IAI(QuadGKJL()))
+@time greens_iai = hchebinterp(dos_solver_iai, ω_min, ω_max; atol=1e-2, order=cheb_order)
+
+dos_solver_ptr = dos_solver(prob_dos, PTR(; npt=100))
+@time greens_ptr = hchebinterp(dos_solver_ptr, ω_min, ω_max; atol=1e-2, order=cheb_order)
 
 using CairoMakie
 
 set_theme!(fontsize=24, linewidth=4)
 
 fig1 = Figure()
-ax1 = Axis(fig1[1,1], limits=((10,15), (0,det(bz.B)*6)), xlabel="ω (eV)", ylabel="SVO DOS (eV⁻¹ Å⁻³)")
-p1 = lines!(ax1, 10:η/100:15, dos_iai; label="IAI(), η=$η")
+ax1 = Axis(fig1[1,1], limits=((10,15), (0,6)), xlabel="ω (eV)", ylabel="SVO DOS (eV⁻¹)")
+p1 = lines!(ax1, 10:η/100:15, ω -> -imag(greens_iai(ω))/pi/det(bz.B); label="IAI, η=$η")
 axislegend(ax1)
 save("iai_svo_dos.pdf", fig1)
 
 fig2 = Figure()
-ax2 = Axis(fig2[1,1], limits=((10,15), (0,det(bz.B)*6)), xlabel="ω (eV)", ylabel="SVO DOS (eV⁻¹ Å⁻³)")
-p2 = lines!(ax2, 10:η/100:15, dos_ptr; label="PTR(), η=$η")
+ax2 = Axis(fig2[1,1], limits=((10,15), (0,6)), xlabel="ω (eV)", ylabel="SVO DOS (eV⁻¹)")
+p2 = lines!(ax2, 10:η/100:15, ω -> -imag(greens_ptr(ω))/pi/det(bz.B); label="PTR, η=$η")
 axislegend(ax2)
 save("ptr_svo_dos.pdf", fig2)
diff --git a/docs/Project.toml b/docs/Project.toml
index dfa65cd..9b8f31f 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,2 +1,5 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+FourierSeriesEvaluators = "2a892dea-6eef-4bb5-9d1c-de966c9f6db5"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
diff --git a/docs/make.jl b/docs/make.jl
index abee2bd..50d83c4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -23,7 +23,6 @@ makedocs(
         "Algorithms" => "algorithms.md",
         "Reference" => "reference.md",
         "Extensions" => "extensions.md",
-        "Density of States" => "dos.md",
     ],
 )
 
diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md
index 41f0303..ab620af 100644
--- a/docs/src/algorithms.md
+++ b/docs/src/algorithms.md
@@ -1,10 +1,12 @@
-# Integral algorithms
+# Algorithms
+
+## `IntegralProblem` algorithms
 
 ```@docs
 AutoBZCore.IntegralAlgorithm
 ```
 
-## Quadrature
+### Quadrature
 
 ```@docs
 AutoBZCore.QuadratureFunction
@@ -14,7 +16,7 @@ AutoBZCore.ContQuadGKJL
 AutoBZCore.MeroQuadGKJL
 ```
 
-## Cubature
+### Cubature
 
 ```@docs
 AutoBZCore.HCubatureJL
@@ -22,20 +24,17 @@ AutoBZCore.MonkhorstPack
 AutoBZCore.AutoSymPTRJL
 ```
 
-## Meta-algorithms
+### Meta-algorithms
 
 ```@docs
 AutoBZCore.NestedQuad
-AutoBZCore.EvalCounter
-AutoBZCore.AbsoluteEstimate
 ```
 
-# BZ-specific integral algorithms
+## `AutoBZProblem` algorithms
 
 In order to make algorithms domain-agnostic, the BZ loaded from
-[`load_bz`](@ref) can be called with the algorithms below, which are wrappers
-for algorithms above with the additional capability of mapping integrals over
-the IBZ to the FBZ.
+[`load_bz`](@ref) can be called with the algorithms below, which are aliases
+for algorithms above
 
 ```@docs
 AutoBZCore.AutoBZAlgorithm
@@ -43,6 +42,16 @@ AutoBZCore.IAI
 AutoBZCore.TAI
 AutoBZCore.PTR
 AutoBZCore.AutoPTR
-AutoBZCore.PTR_IAI
-AutoBZCore.AutoPTR_IAI
-```
\ No newline at end of file
+```
+
+## `DOSProblem` algorithms
+
+Currently the available algorithms are an initial release and we would like to include
+the following reference algorithms that are also common in the literature in a future release:
+- (Linear) Tetrahedron Method
+- Adaptive Gaussian broadening
+
+```@docs
+AutoBZCore.DOSAlgorithm
+AutoBZCore.GGR
+```
diff --git a/docs/src/dos.md b/docs/src/dos.md
deleted file mode 100644
index 9dfd240..0000000
--- a/docs/src/dos.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Density of States
-
-Computing the density of states (DOS) of a self-adjoint, or Hermitian, operator is a
-related, but distinct problem to the integrals also presented in this package.
-In fact, many DOS algorithms will compute integrals to approximate the DOS of an
-operator that depends on a parameter such as crystal momentum. To solve these
-types of problems, this package defines the following problem type:
-
-```@docs
-AutoBZCore.DOSProblem
-```
-
-## Algorithms
-
-Currently the available algorithms are experimental and we would like to include
-the following reference algorithms that are more common in the literature in a future release:
-- (Linear) Tetrahedron Method
-- Adaptive Gaussian broadening
-
-```@docs
-AutoBZCore.DOSAlgorithm
-AutoBZCore.GGR
-```
-
-## Caching interface
-
-Using the [`AutoBZCore.init`](@ref) and [`AutoBZCore.solve!`](@ref) functions, it is possible to
-construct a cache to solve a [`DOSProblem`](@ref) for several energies or
-several Hamiltonians. As an example of solving for several energies,
-```julia
-using AutoBZCore, StaticArrays
-h = FourierSeries(SMatrix{1,1,Float64,1}.([0.5, 0.0, 0.5]), period=1.0, offset=-2)
-E = 0.3
-bz = load_bz(FBZ(), [2pi;;])
-prob = DOSProblem(h, E, bz)
-alg = GGR(; npt=100)
-cache = AutoBZCore.init(prob, alg)
-Es = range(-2, 2, length=1000)
-data = map(Es) do E
-    cache.domain = E
-    AutoBZCore.solve!(cache).u
-end
-```
-
-As an example of interchanging Hamiltonians, which must remain the same type,
-```julia
-using AutoBZCore, StaticArrays
-
-h = FourierSeries(SMatrix{1,1,Float64,1}.([0.5, 0.0, 0.5]), period=1.0, offset=-2)
-bz = load_bz(FBZ(), [2pi;;])
-prob = DOSProblem(h, 0.0, bz)
-alg = GGR()
-
-cache = AutoBZCore.init(prob, alg)
-sol1 = AutoBZCore.solve!(cache)
-
-h.c .*= 2
-cache.isfresh = true
-
-sol2 = AutoBZCore.solve!(cache)
-
-sol1.u*2 ≈ sol2.u
-```
\ No newline at end of file
diff --git a/docs/src/examples.md b/docs/src/examples.md
index a2e5f1d..a64be6d 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -2,9 +2,9 @@
 
 The following are several examples of how to use the algorithms and integrands
 provided by AutoBZCore.jl.
-For background on defining integrals see the [Problem definitions](@ref) page
+For background on the essential interface see the [Problem definitions](@ref) page
 
-## Green's function
+## Green's function integration
 
 A common integral appearing in [Dynamical mean-field
 theory](https://en.wikipedia.org/wiki/Dynamical_mean-field_theory) is that of
@@ -14,11 +14,13 @@ G(\omega) = \int d^d \vec{k}\ \operatorname{Tr} \left[ \left( \omega - H \left(
 ```
 
 For simplicity, we take ``\Sigma(\omega) = -i\eta``. We can define the integrand
-as a function of ``\vec{k}`` and ``H`` and (required) parameters ``\eta, \omega``.
-```julia
+as a function of ``\vec{k}`` and ``H`` and parameters ``\eta, \omega``.
+```@example gloc
 using LinearAlgebra
-gloc_integrand(k, h; η, ω) = inv(complex(ω,η)*I-h(k))
+gloc_integrand(k, (; h, η, ω)) = tr(inv(complex(ω,η)*I-h(k)))
 ```
+Here we use named tuple destructuring syntax to unpack a named tuple of
+parameters in the function definition.
 
 Commonly, ``H(\vec{k})`` is evaluated using Wannier interpolation, i.e. as a
 Fourier series. For a simple tight-binding model, the integer lattice, the
@@ -31,33 +33,44 @@ complex Fourier series it becomes easier to use the representation in terms of
 Fourier coefficients. Using the package
 [FourierSeriesEvaluators.jl](https://github.com/lxvm/FourierSeriesEvaluators.jl),
 we can define ``H(k) = \cos(2\pi k)`` by the following:
-```julia
+```@example gloc
 using FourierSeriesEvaluators
 h = FourierSeries([0.5, 0.0, 0.5]; period=1, offset=-2)
 ```
 The coefficient values of ``1/2`` can be determined from Euler's formula, as
-used in the expansion of ``cos`` above, and the value of `offset` is chosen to
+used in the expansion of ``\cos`` above, and the value of `offset` is chosen to
 offset the coefficient array indices, `1:3` since Julia has 1-based indexing, to
 correspond to values of ``n`` in the phase factors ``e^{2\pi i n k}`` used in
-the Fourier series above, i.e. `-1:1`. Now we proceed to the define the integral problem
-```julia
+the Fourier series above, i.e. `-1:1`. Now we proceed to the define the
+[`IntegralProblem`](@ref) and solve it with a generic adaptive
+integration scheme, [`QuadGKJL`](@ref)
+```@example gloc
 using AutoBZCore
-using AutoBZCore: IntegralProblem
-integrand = ParameterIntegrand(gloc_integrand, h, η=0.1)
-prob = IntegralProblem(integrand, 0, 1)
-```
-Here we wrapped our function with two of its arguments, `h, η` as a
-[`ParameterIntegrand`](@ref) that allows us to provide partial arguments so that
-we can solve the integral as a function of the remaining parameters, in this
-case `ω`. We also created an [`AutoBZCore.IntegralProblem`](@ref) to integrate our function
-over its period ``[0,1]``. To solve this problem, we pick any of the package's
-[Integral algorithms](@ref) and the
-tolerance to which we would like the solution. Then we make an
-[`IntegralSolver`](@ref) to evaluate ``G(\omega)`` as a function.
-```julia
+dom = (0, 1)
+p = (; h, η=0.1, ω=0.0)
+prob = IntegralProblem(gloc_integrand, dom, p)
 alg = QuadGKJL()
-gloc = IntegralSolver(prob, alg; abstol=1e-3)
-gloc(ω=0.0) # -2.7755575615628914e-17 - 0.9950375451895513im
+solve(prob, alg; abstol=1e-3).value
+```
+
+## BZ integration
+
+To perform integration over a Brillouin zone, we can load one using the
+[`load_bz`](@ref) function and then construct an [`AutoBZProblem`](@ref) to
+solve. Since the Brillouin zone may be reduced using point group symmetries, a
+common optimization, it is also required to specify the symmetry representation
+of the integrand. Continuing the previous example, the trace of the Green's
+function has no band/orbital degrees of freedom and transforms trivially under
+the point group, so it is a [`TrivialRep`](@ref). The previous calculation can
+be replicated as:
+
+```@example gloc
+using AutoBZCore
+bz = load_bz(FBZ(), 2pi*I(1))
+p = (; h, η=0.1, ω=0.0)
+prob = AutoBZProblem(TrivialRep(), IntegralFunction(gloc_integrand), bz, p)
+alg = TAI()
+solve(prob, alg; abstol=1e-3).value
 ```
 
 Now we proceed to multi-dimensional integrals. In this case, Wannier
@@ -76,37 +89,73 @@ which is comparable to the computational complexity of a [multi-dimensional
 FFT](https://en.wikipedia.org/wiki/Fast_Fourier_transform#Multidimensional_FFTs).
 Since the constants of a FFT may not be trivial, this scheme is competitive.
 
-This capability is provided by [`FourierIntegrand`](@ref).
 Let's use this with a Fourier series corresponding to
 ``H(\vec{k}) = \cos(2\pi k_{x}) + \cos(2\pi k_{y})``
-```julia
+and define a new method of `gloc_integrand` that accepts the (efficiently)
+evaluated Fourier series in the second argument
+```@example gloc
 h = FourierSeries([0.0; 0.5; 0.0;; 0.5; 0.0; 0.5;; 0.0; 0.5; 0.0]; period=1, offset=-2)
-integrand = FourierIntegrand(gloc_integrand, h, η=0.1)
-```
-However, since [`FourierIntegrand`](@ref) evaluates ``H(k)`` for us and gives it
-as a `FourierValue` together with ``k``, we need to define another method for
-our integrand to comply with the interface
-```julia
-gloc_integrand(h_k::FourierValue; η, ω) = inv(complex(ω,η)*I-h_k.s)
+gloc_integrand(k, h_k, (; η, ω)) = tr(inv(complex(ω,η)*I-h_k))
 ```
 Similar to before, we construct an [`AutoBZCore.IntegralProblem`](@ref) and this time we
 take the integration domain to correspond to the full Brillouin zone of a square
-lattice with lattice vectors `2pi*I(2)`. (See the
-[Reference](@ref) for more details on constructing BZs.)
-```julia
+lattice with lattice vectors `2pi*I(2)`.
+```@example gloc
+integrand = FourierIntegralFunction(gloc_integrand, h)
 bz = load_bz(FBZ(2), 2pi*I(2))
-prob = IntegralProblem(integrand, bz)
-```
-This package provides several [BZ-specific integral algorithms](@ref) that we
-can use to solve the multidimensional integral.
-```julia
+p = (; η=0.1, ω=0.0)
+prob = AutoBZProblem(TrivialRep(), integrand, bz, p)
 alg = IAI()
-gloc = IntegralSolver(prob, alg; abstol=1e-3)
-gloc(ω=0.0) # 1.5265566588595902e-16 - 1.3941704019631334im
+solve(prob, alg).value
 ```
-
-## Density of states
+This package provides several [`AutoBZProblem` algorithms](@ref) that we
+can use to solve the multidimensional integral.
 
 The [repo's demo](https://github.com/lxvm/AutoBZCore.jl/tree/main/aps_example)
 on density of states provides a complete example of how to compute and
-interpolate an integral as a function of its parameters.
+interpolate an integral as a function of its parameters using the [`init` and
+`solve!`](@ref) interface
+
+
+## Density of States
+
+Computing the density of states (DOS) of a self-adjoint, or Hermitian, operator is a
+related, but distinct problem to the integrals also presented in this package.
+In fact, many DOS algorithms will compute integrals to approximate the DOS of an
+operator by introducing an artificial broadening.
+To handle the ``T=0^{+}`` limit of the broadening, we implement the well-known
+[Gilat-Raubenheimer method](https://arxiv.org/abs/1711.07993) as an algorithm
+for the [`AutoBZCore.DOSProblem`](@ref)
+
+Using the [`AutoBZCore.init`](@ref) and [`AutoBZCore.solve!`](@ref) functions, it is possible to
+construct a cache to solve a [`DOSProblem`](@ref) for several energies or
+several Hamiltonians. As an example of solving for several energies,
+```@example dos
+using AutoBZCore, FourierSeriesEvaluators, StaticArrays
+h = FourierSeries(SMatrix{1,1,Float64,1}.([0.5, 0.0, 0.5]), period=1.0, offset=-2)
+E = 0.3
+bz = load_bz(FBZ(), [2pi;;])
+prob = DOSProblem(h, E, bz)
+alg = GGR(; npt=100)
+cache = init(prob, alg)
+Es = range(-1, 1, length=10) * 1.1
+data = map(Es) do E
+    cache.domain = E
+    solve!(cache).value
+end
+```
+
+As an example of interchanging Hamiltonians, which must remain the same type, we
+can double the energies, which will halve the DOS
+```@example dos
+cache.domain = E
+sol1 = AutoBZCore.solve!(cache)
+
+h.c .*= 2
+cache.isfresh = true
+cache.domain = 2E
+
+sol2 = AutoBZCore.solve!(cache)
+
+sol1.value ≈ 2sol2.value
+```
\ No newline at end of file
diff --git a/docs/src/extensions.md b/docs/src/extensions.md
index 64b5015..956c14d 100644
--- a/docs/src/extensions.md
+++ b/docs/src/extensions.md
@@ -6,12 +6,6 @@ Loading [SymmetryReduceBZ.jl](https://github.com/jerjorg/SymmetryReduceBZ.jl)
 provides a specialized method of [`load_bz`](@ref) that when provided atom
 species and positions can compute the [`IBZ`](@ref).
 
-## HDF5.jl
-
-Loading [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) provides a specialized
-method of [`batchsolve`](@ref) that accepts an H5 archive or group in the first
-argument and writes the integral results and timings to a dataset.
-
 ## WannierIO.jl
 
 Loading [WannierIO.jl](https://github.com/qiaojunfeng/WannierIO.jl) provides a
diff --git a/docs/src/index.md b/docs/src/index.md
index 654a97b..1619965 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,8 +1,5 @@
 # AutoBZCore.jl
 
-!!! note "Work in progress"
-    We apologize for the inconvenience as we continue improving the documentation.
-
 ```@docs
 AutoBZCore
 ```
\ No newline at end of file
diff --git a/docs/src/integrands.md b/docs/src/integrands.md
index a57abfd..f99f4e5 100644
--- a/docs/src/integrands.md
+++ b/docs/src/integrands.md
@@ -3,39 +3,12 @@
 The design of AutoBZCore.jl uses multiple dispatch to provide multiple
 interfaces for user integrands that allow various optimizations to be compatible
 with a common interface for solvers.
-Unfortunately, not all algorithms support all integrands, since the underlying
-libraries must support the same interface.
-
-# Functions
-
-A user can pass an integrand of the form `f(x,p)` in the same way as in Integrals.jl
-
-# `ParameterIntegrand`
-
-```@docs
-AutoBZCore.ParameterIntegrand
-```
-
-# `InplaceIntegrand`
-
-```@docs
-AutoBZCore.InplaceIntegrand
-```
-
-# `BatchIntegrand`
-
-```@docs
-AutoBZCore.BatchIntegrand
-```
-
-# `FourierIntegrand`
-
-```@docs
-AutoBZCore.FourierIntegrand
-```
-
-# `NestedBatchIntegrand`
 
 ```@docs
-AutoBZCore.NestedBatchIntegrand
+AutoBZCore.IntegralFunction
+AutoBZCore.InplaceIntegralFunction
+AutoBZCore.InplaceBatchIntegralFunction
+AutoBZCore.CommonSolveIntegralFunction
+AutoBZCore.FourierIntegralFunction
+AutoBZCore.CommonSolveFourierIntegralFunction
 ```
\ No newline at end of file
diff --git a/docs/src/problems.md b/docs/src/problems.md
index 6397a9e..346d59b 100644
--- a/docs/src/problems.md
+++ b/docs/src/problems.md
@@ -1,64 +1,71 @@
 # Problem definitions
 
-The design of AutoBZCore.jl is heavily influenced by the
-[SciML](https://sciml.ai/) package
-[Integrals.jl](https://github.com/SciML/Integrals.jl)
-and may eventually become implemented in it.
+The design of AutoBZCore.jl is heavily influenced by
+[SciML](https://sciml.ai/) packages and uses the 
+[CommonSolve.jl](https://github.com/SciML/CommonSolve.jl)
+interface. Eventually, this package may contribute to
+[Integrals.jl](https://github.com/SciML/Integrals.jl).
 
-## SciML interface
+## Problem interface
 
-AutoBZCore.jl replicates the Integrals.jl interface, but does not export it in
-order to avoid name conflicts with other SciML packages.
+AutoBZCore.jl replicates the Integrals.jl interface, using an
+[`IntegralProblem`](@ref) type to setup an integral from an
+integrand, a domain, and parameters.
 
-### Quickstart
+```@example prob
+using AutoBZCore
 
-```julia
-using AutoBZCore: IntegralProblem, init, solve!
-
-prob = IntegralProblem((x,p) -> sin(p*x), 0, 1, 0.3)
-cache = init(prob, QuadGKJL())
-solve!(cache)   # 0.14887836958131329
-
-# solve again at a new parameter
-cache.p = 0.4
-solve!(cache)   # 0.1973475149927873
+f = (x,p) -> sin(p*x)
+dom = (0, 1)
+p = 0.3
+prob = IntegralProblem(f, dom, p)
 ```
 
-### Reference
-
 ```@docs
 AutoBZCore.IntegralProblem
-AutoBZCore.solve
-AutoBZCore.init
-AutoBZCore.solve!
 AutoBZCore.NullParameters
 ```
 
-## Functor interface
+## `solve`
+
+To solve an integral problem, pick an algorithm and call [`solve`](@ref)
 
-As shown in the quickstart of the [`AutoBZCore`](@ref) page, AutoBZCore.jl also defines
-a functor interface to solving integrals
+```@example prob
+alg = QuadGKJL()
+solve(prob, alg)
+```
 
 ```@docs
-AutoBZCore.IntegralSolver
+AutoBZCore.solve
 ```
 
-The functor interface is also extended by [`ParameterIntegrand`](@ref) and
-[`FourierIntegrand`](@ref) to
-allow a more flexible interface for passing (partial) positional and keyword
-arguments to user-defined integrands.
+## `init` and `solve!`
 
-```@docs
-AutoBZCore.MixedParameters
-AutoBZCore.paramzip
-AutoBZCore.paramproduct
+To solve many problems with the same integrand but different domains or
+parameters, use [`init`](@ref) to allocate a solver and
+[`solve!`](@ref) to get the solution
+
+```@example prob
+solver = init(prob, alg)
+solve!(solver).value
 ```
 
-## Batched evaluation
+To solve again, update the parameters of the solver in place and `solve!` again
+```@example prob
+# solve again at a new parameter
+solver.p = 0.4
+solve!(solver).value
+```
+
+
+```@docs
+AutoBZCore.init
+AutoBZCore.solve!
+```
 
-The routine [`batchsolve`](@ref) allows multi-threaded evaluation of an
-[`IntegralSolver`](@ref) at many parameter points.
+## Additional problems
 
 ```@docs
-AutoBZCore.batchsolve
-```
\ No newline at end of file
+AutoBZCore.AutoBZProblem
+AutoBZCore.DOSProblem
+```
diff --git a/docs/src/reference.md b/docs/src/reference.md
index 4d74657..9b84c2b 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -2,18 +2,11 @@
 
 The following symbols are exported by AutoBZCore.jl
 
-## Domains
-
-```@docs
-AutoBZCore.PuncturedInterval
-AutoBZCore.HyperCube
-AutoBZCore.load_bz
-AutoBZCore.SymmetricBZ
-```
-
 ## Brillouin-zone kinds
 
 ```@docs
+AutoBZCore.load_bz
+AutoBZCore.load_bz(::IBZ, A, B, species, positions)
 AutoBZCore.AbstractBZ
 AutoBZCore.FBZ
 AutoBZCore.IBZ
@@ -25,21 +18,20 @@ AutoBZCore.CubicSymIBZ
 
 ```@docs
 AutoBZCore.AbstractSymRep
-AutoBZCore.SymRep
 AutoBZCore.TrivialRep
 AutoBZCore.UnknownRep
-AutoBZCore.symmetrize_
+AutoBZCore.symmetrize
 ```
 
 ## Internal
 
-The following docstrings belong to internal functions that may change between
+The following docstrings belong to internal types and functions that may change between
 versions of AutoBZCore.
 
 ```@docs
+AutoBZCore.PuncturedInterval
+AutoBZCore.HyperCube
+AutoBZCore.SymmetricBZ
 AutoBZCore.trapz
 AutoBZCore.cube_automorphisms
-AutoBZCore.batchparam
-AutoBZCore.symmetrize
-AutoBZCore.FourierValue
 ```
\ No newline at end of file
diff --git a/ext/HDF5Ext.jl b/ext/HDF5Ext.jl
deleted file mode 100644
index 40feaf8..0000000
--- a/ext/HDF5Ext.jl
+++ /dev/null
@@ -1,160 +0,0 @@
-module HDF5Ext
-
-using Printf: @sprintf
-using LinearAlgebra: norm
-
-using AutoBZCore: IntegralSolver, MixedParameters, solver_type, AuxValue, IntegralSolution
-using HDF5: h5open, write_dataset, read_dataset, Group, H5DataStore, create_dataset, create_group, delete_object, name
-import AutoBZCore: batchsolve
-
-
-"""
-    read_h5_to_nt(filename)
-
-Loads the h5 archive from `filename` and reads its datasets into a `NamedTuple`
-and its groups into `NamedTuple`s recursively.
-"""
-read_h5_to_nt(filename) = h5open(read_h5_to_nt_, filename, "r")
-read_h5_to_nt_(h5) = NamedTuple([Pair(Symbol(key), ((val = h5[key]) isa Group) ? read_h5_to_nt_(val) : read_dataset(h5, key)) for key in keys(h5)])
-
-"""
-    write_nt_to_h5(nt::NamedTuple, filename)
-
-Takes a `NamedTuple` and writes its values, which must be arrays, into an h5
-archive at `filename` with dataset names corresponding to the tuple names.
-If a value is a `NamedTuple`, its datasets are written to h5 groups recursively.
-"""
-write_nt_to_h5(nt::NamedTuple, filename) = h5open(filename, "w") do h5
-    write_nt_to_h5_(nt, h5)
-end
-function write_nt_to_h5_(nt::NamedTuple, h5)
-    for key in keys(nt)
-        if (val = nt[key]) isa NamedTuple
-            write_nt_to_h5_(val, create_group(h5, string(key)))
-        else
-            write_dataset(h5, string(key), val)
-        end
-    end
-end
-
-# parallelization
-
-# returns (dset, ax) to allow for array-valued data types
-function autobz_create_dataset(parent, path, T::Type, dims_)
-    dims = ((ndims(T) == 0 ? () : size(T))..., dims_...)
-    ax = ntuple(_-> :, Val(ndims(T)))
-    return create_dataset(parent, path, eltype(T), dims), ax
-end
-function autobz_create_dataset(parent, path, ::Type{AuxValue{T,A}}, dims) where {T, A}
-    # split auxvalue into two groups for easier interoperability with other languages, since
-    # the HDF5 compound type could be a challenge
-    g = create_group(parent, "I")
-    gval, axval = autobz_create_dataset(g, "val", T, dims)
-    gaux, axaux = autobz_create_dataset(g, "aux", A, dims)
-    return (gval, gaux), (axval, axaux)
-end
-
-set_value(g, i::CartesianIndex, val) = g[i.I...] = val
-set_value(parent, ax::Tuple, i::CartesianIndex, sol) = parent[ax...,i.I...] = sol
-function set_value((gval, gaux)::Tuple, (axval, axaux)::Tuple, i::CartesianIndex, sol::AuxValue)
-    set_value(gval, axval, i, sol.val)
-    set_value(gaux, axaux, i, sol.aux)
-    return nothing
-end
-# special cases for 0-d arrays with scalar fields, for which rewrite is not supported
-function set_value(g, ::CartesianIndex{0}, val)
-    p = parent(g)
-    s = basename(name(g))
-    delete_object(p, s)
-    write_dataset(p, s, val)
-    return nothing
-end
-set_value(parent, ::Tuple{}, i::CartesianIndex{0}, sol) = set_value(parent, i, sol)
-
-
-
-param_group(parent, T, dims) = create_dataset(parent, "p", T, dims)
-function param_group(parent, ::Type{T}, dims) where {T<:Tuple}
-    g = create_group(parent, "params")
-    for (i, S) in enumerate(T.parameters)
-        create_dataset(g, string(i), S, dims)
-    end
-    return g
-end
-function param_group(parent, ::Type{MixedParameters{T,NamedTuple{K,V}}}, dims) where {T,K,V}
-    g = create_group(parent, "args")
-    for (i, S) in enumerate(T.parameters)
-        create_dataset(g, string(i), S, dims)
-    end
-    q = create_group(parent, "kwargs")
-    for (key, val) in zip(K,V.parameters)
-        create_dataset(q, string(key), val, dims)
-    end
-    return (g,q)
-end
-function param_record(group, p, i)
-    set_value(group, i, p)
-    return nothing
-end
-function param_record(group, p::Tuple, i)
-    for (j,e) in enumerate(p)
-        set_value(group[string(j)], i, e)
-    end
-    return nothing
-end
-function param_record((g, q), p::MixedParameters, i)
-    for (j,e) in enumerate(getfield(p, :args))
-        set_value(g[string(j)], i, e)
-    end
-    for (k,v) in pairs(getfield(p, :kwargs))
-        set_value(q[string(k)], i, v)
-    end
-    return nothing
-end
-
-const liblock = ReentrantLock()
-
-"""
-    batchsolve(h5::H5DataStore, f::IntegralSolver, ps, [T]; verb=true, nthreads=Threads.nthreads())
-
-Solves the integral `f` at all parameters `ps` in a multi-threaded fashion, using `nthreads`
-threads to parallelize over `ps`, and writes the results to the `h5` archive.
-"""
-function batchsolve(h5::H5DataStore, f::IntegralSolver, ps::AbstractArray, T=solver_type(f, ps[begin]); flush=true, verb=true, nthreads=Threads.nthreads())
-    isconcretetype(T) || throw(ArgumentError("Result type of integrand is abstract or could not be inferred. Please provide the concrete return type to save to HDF5"))
-    len = length(ps)
-    dims = size(ps)
-    gI, ax = autobz_create_dataset(h5, "I", T, dims)
-    gE = create_dataset(h5, "E", Float64, dims)
-    gt = create_dataset(h5, "t", Float64, dims)
-    gr = create_dataset(h5, "retcode", Int32, dims)
-    gn = create_dataset(h5, "numevals", Int, dims)
-    gp = param_group(h5, eltype(ps), dims)
-    flush && Base.flush(h5)
-
-    function h5callback(_, i, n, p, sol, t)
-        lock(liblock)
-        try
-            verb && @info @sprintf "%5i / %i done in %e (s)" n len t
-            set_value(gI, ax, i, sol.u)
-            set_value(gE, i, isnothing(sol.resid) ? NaN : convert(Float64, T<:AuxValue ? sol.resid.val : sol.resid))
-            set_value(gt, i, t)
-            set_value(gr, i, Integer(sol.retcode))
-            set_value(gn, i, sol.numevals)
-            param_record(gp, p, i)
-            flush && Base.flush(h5)
-        finally
-            unlock(liblock)
-        end
-    end
-
-    verb && @info "Started parameter sweep"
-    t = time()
-    sol = batchsolve(f, ps, T; callback=h5callback, nthreads=nthreads)
-    t = time()-t
-    verb && @info @sprintf "Finished parameter sweep in %e (s) CPU time and %e (s) wall clock time" sum(read(gt)) t
-
-    return sol
-end
-
-end
diff --git a/src/AutoBZCore.jl b/src/AutoBZCore.jl
index 45c167c..fc5d408 100644
--- a/src/AutoBZCore.jl
+++ b/src/AutoBZCore.jl
@@ -1,7 +1,7 @@
 """
 A package providing a common interface to integration algorithms intended for applications
 including Brillouin-zone integration and Wannier interpolation. Its design is influenced by
-high-level libraries like Integrals.jl, and it makes use of Julia's multiple dispatch to
+high-level libraries like Integrals.jl to implement the CommonSolve.jl interface, and it makes use of Julia's multiple dispatch to
 provide the same interface for integrands with optimized inplace, batched, and Fourier
 series evaluation.
 
@@ -11,23 +11,24 @@ As a first example, we integrate sine over [0,1] as a function of its period.
 ```
 julia> using AutoBZCore
 
-julia> f = IntegralSolver((x,p) -> sin(p*x), 0, 1, QuadGKJL());
+julia> prob = IntegralProblem((x,p) -> sin(p*x), (0, 1), 0.3);
 
-julia> f(0.3) # solves the integral of sin(p*x) over [0,1] with p=0.3
+julia> solve(prob, QuadGKJL()).value # solves the integral of sin(p*x) over [0,1] with p=0.3
 0.14887836958131329
 ```
-Notice that we construct an [`IntegralSolver`](@ref) object that we can evaluate at
-different parameters with a function-like interface. For more examples, see the
+Notice that we construct an [`IntegralProblem`](@ref) object that we can [`solve`](@ref) at
+with a choice of algorithm. For more examples, see the
 documentation.
 
 ### Features
 
 Special integrand interfaces
-- [`ParameterIntegrand`](@ref): allows user integrand to use keyword arguments
-- [`InplaceIntegrand`](@ref): allows an integrand to write its result inplace to an array
-- [`BatchIntegrand`](@ref): allows user-side parallelization on e.g. shared memory,
+- [`IntegralFunction`](@ref): generic user integrand of the form `f(x, p)`
+- [`InplaceIntegralFunction`](@ref): allows an integrand to write its result inplace to an array
+- [`InplaceBatchIntegralFunction`](@ref): allows user-side parallelization on e.g. shared memory,
   distributed memory, or the gpu
-- [`FourierIntegrand`](@ref): efficient evaluation of Fourier series for cubatures with
+- [`CommonSolveIntegralFunction`](@ref): define an integrand that also solves a problem
+- [`FourierIntegralFunction`](@ref): efficient evaluation of Fourier series for cubatures with
   hierachical grids
 
 Quadrature algorithms:
@@ -38,7 +39,6 @@ Quadrature algorithms:
 
 Meta-Algorithms:
 - Iterated integration: [`NestedQuad`](@ref)
-- Integrand evaluation counter: [`EvalCounter`](@ref)
 
 # Extended help
 
@@ -49,49 +49,45 @@ module AutoBZCore
 
 using LinearAlgebra: I, norm, det, checksquare, isdiag, Diagonal, tr, diag, eigen, Hermitian
 
-using StaticArrays: SVector, SMatrix, pushfirst, sacollect
+using StaticArrays: SVector, SMatrix, sacollect
 using FunctionWrappers: FunctionWrapper
 using ChunkSplitters: chunks, getchunk
-using Reexport
-@reexport using AutoSymPTR
-@reexport using FourierSeriesEvaluators
-@reexport using IteratedIntegration
-@reexport using QuadGK
-@reexport using HCubature
-
+using AutoSymPTR
+using FourierSeriesEvaluators
+using IteratedIntegration
+using QuadGK: quadgk, quadgk!, BatchIntegrand
+using HCubature: hcubature
 using FourierSeriesEvaluators: workspace_allocate, workspace_contract!, workspace_evaluate!, workspace_evaluate, period
 using IteratedIntegration: limit_iterate, interior_point
-using HCubature: hcubature
+using HCubature: hcubature, hquadrature
+using CommonSolve: solve
+import CommonSolve: init, solve!
+export init, solve!, solve
 
-export PuncturedInterval, HyperCube
 include("domains.jl")
 
-export InplaceIntegrand
-include("inplace.jl")
-
-export BatchIntegrand, NestedBatchIntegrand
-include("batch.jl")
-
-# export IntegralProblem, solve, init, solve! # we don't export the SciML interface
-export IntegralSolver, batchsolve
+export IntegralFunction, InplaceIntegralFunction, InplaceBatchIntegralFunction
+export CommonSolveIntegralFunction
+export IntegralProblem
 include("interfaces.jl")
 
 export QuadGKJL, HCubatureJL, QuadratureFunction
+include("algorithms.jl")
 export AuxQuadGKJL, ContQuadGKJL, MeroQuadGKJL
+include("algorithms_iterated.jl")
 export MonkhorstPack, AutoSymPTRJL
-export NestedQuad, AbsoluteEstimate, EvalCounter
-include("algorithms.jl")
+include("algorithms_autosymptr.jl")
+export NestedQuad#, AbsoluteEstimate, EvalCounter
+include("algorithms_meta.jl")
 
 export SymmetricBZ, nsyms
 export load_bz, FBZ, IBZ, InversionSymIBZ, CubicSymIBZ
-export AbstractSymRep, SymRep, UnknownRep, TrivialRep
-export IAI, PTR, AutoPTR, TAI, PTR_IAI, AutoPTR_IAI
+export AbstractSymRep, UnknownRep, TrivialRep
+export AutoBZProblem
+export IAI, PTR, AutoPTR, TAI
 include("brillouin.jl")
 
-export ParameterIntegrand, paramzip, paramproduct
-include("parameters.jl")
-
-export FourierIntegrand, FourierValue
+export FourierIntegralFunction, CommonSolveFourierIntegralFunction
 include("fourier.jl")
 
 export DOSProblem
diff --git a/src/algorithms.jl b/src/algorithms.jl
index 720ee9a..4c70209 100644
--- a/src/algorithms.jl
+++ b/src/algorithms.jl
@@ -1,9 +1,6 @@
 # Methods an algorithm must define
 # - init_cacheval
-# - do_solve
-
-
-# Here we replicate the algorithms provided by SciML
+# - do_integral
 
 """
     QuadGKJL(; order = 7, norm = norm)
@@ -37,64 +34,77 @@ function init_midpoint_scale(a::T, b::T) where {T}
     end
 end
 init_midpoint_scale(dom::PuncturedInterval) = init_midpoint_scale(endpoints(dom)...)
-function init_segbuf(f, dom, p, norm)
-    x, s = init_midpoint_scale(dom)
-    u = x/oneunit(x)
-    TX = typeof(u)
-    fx_s = f(x, p) * s/oneunit(s)
-    TI = typeof(fx_s)
-    TE = typeof(norm(fx_s))
-    return IteratedIntegration.alloc_segbuf(TX, TI, TE)
-end
-function init_segbuf(f::InplaceIntegrand, dom, p, norm)
-    x, s = init_midpoint_scale(dom)
-    u = x/oneunit(x)
-    TX = typeof(u)
-    TI = typeof(f.I *s/oneunit(s))
-    fill!(f.I, zero(eltype(f.I)))
-    TE = typeof(norm(f.I)*s/oneunit(s))
-    return IteratedIntegration.alloc_segbuf(TX, TI, TE)
-end
-function init_segbuf(f::BatchIntegrand, dom, p, norm)
-    x, s = init_midpoint_scale(dom)
+function init_segbuf(prototype, segs, alg)
+    x, s = init_midpoint_scale(segs)
     u = x/oneunit(x)
     TX = typeof(u)
-    fx_s = zero(eltype(f.y)) * s/oneunit(s)    # TODO BatchIntegrand(InplaceIntegrand) should depend on size of result
+    fx_s = prototype * s/oneunit(s)
     TI = typeof(fx_s)
-    TE = typeof(norm(fx_s))
+    TE = typeof(alg.norm(fx_s))
     return IteratedIntegration.alloc_segbuf(TX, TI, TE)
 end
-function init_cacheval(f, dom, p, alg::QuadGKJL)
-    f isa NestedBatchIntegrand && throw(ArgumentError("QuadGK doesn't support nested batching"))
-    f isa BatchIntegrand && throw(ArgumentError("QuadGK doesn't support batched integrands"))
-    return init_segbuf(f, dom, p, alg.norm)
-end
 
-function do_solve(f::F, dom, p, alg::QuadGKJL, cacheval;
-                    reltol = nothing, abstol = nothing, maxiters = typemax(Int)) where {F}
-    segs = segments(dom)
+function init_cacheval(f::IntegralFunction, dom, p, alg::QuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    prototype = get_prototype(f, get_prototype(segs), p)
+    return init_segbuf(prototype, segs, alg)
+end
+function init_cacheval(f::InplaceIntegralFunction, dom, p, alg::QuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    prototype = get_prototype(f, get_prototype(segs), p)
+    return init_segbuf(prototype, segs, alg), similar(prototype)
+end
+function init_cacheval(f::InplaceBatchIntegralFunction, dom, p, alg::QuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    pt = get_prototype(segs)
+    prototype = get_prototype(f, pt, p)
+    prototype isa AbstractVector || throw(ArgumentError("QuadGKJL only supports batch integrands with vector outputs"))
+    pts = zeros(typeof(pt), 2*alg.order+1)
+    upts = pts / pt
+    return init_segbuf(first(prototype), segs, alg), similar(prototype), pts, upts
+end
+function init_cacheval(f::CommonSolveIntegralFunction, dom, p, alg::QuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    x = get_prototype(segs)
+    cache, integrand, prototype = _init_commonsolvefunction(f, dom, p; x)
+    return init_segbuf(prototype, segs, alg), cache, integrand
+end
+function do_integral(f, dom, p, alg::QuadGKJL, cacheval;
+                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
     # we need to strip units from the limits since infinity transformations change the units
     # of the limits, which can break the segbuf
     u = oneunit(eltype(dom))
-    usegs = map(x -> x/u, segs)
-    if f isa InplaceIntegrand
-        g! = (y, x) -> f.f!(y, u*x, p)
-        result = f.I / u
-        val, err = quadgk!(g!, result, usegs..., maxevals = maxiters,
-                        rtol = reltol, atol = isnothing(abstol) ? abstol : abstol/u, order = alg.order, norm = alg.norm, segbuf=cacheval)
-        return IntegralSolution(f.I .= u .* val, u*err, true, -1)
-    else
-        g = x -> f(u*x, p)
-        val, err = quadgk(g, usegs..., maxevals = maxiters,
-                        rtol = reltol, atol = isnothing(abstol) ? abstol : abstol/u, order = alg.order, norm = alg.norm, segbuf=cacheval)
-        return IntegralSolution(u*val, u*err, true, -1)
-    end
+    usegs = map(x -> x/u, dom)
+    atol = isnothing(abstol) ? abstol : abstol/u
+    val, err = call_quadgk(f, p, u, usegs, cacheval; maxevals = maxiters, rtol = reltol, atol, order = alg.order, norm = alg.norm)
+    value = u*val
+    retcode = err < max(something(atol, zero(err)), alg.norm(val)*something(reltol, isnothing(atol) ? sqrt(eps(one(eltype(usegs)))) : 0)) ? Success : Failure
+    stats = (; error=u*err)
+    return IntegralSolution(value, retcode, stats)
+end
+function call_quadgk(f::IntegralFunction, p, u, usegs, cacheval; kws...)
+    quadgk(x -> f.f(u*x, p), usegs...; kws..., segbuf=cacheval)
+end
+function call_quadgk(f::InplaceIntegralFunction, p, u, usegs, cacheval; kws...)
+    # TODO allocate everything in the QuadGK.InplaceIntegrand in the cacheval
+    quadgk!((y, x) -> f.f!(y, u*x, p), cacheval[2], usegs...; kws..., segbuf=cacheval[1])
+end
+function call_quadgk(f::InplaceBatchIntegralFunction, p, u, usegs, cacheval; kws...)
+    pts = cacheval[3]
+    g = BatchIntegrand((y, x) -> f.f!(y, resize!(pts, length(x)) .= u .* x, p), cacheval[2], cacheval[4]; max_batch=f.max_batch)
+    quadgk(g, usegs...; kws..., segbuf=cacheval[1])
+end
+function call_quadgk(f::CommonSolveIntegralFunction, p, u, usegs, cacheval; kws...)
+    # cache = cacheval[2] could call do_solve!(cache, f, x, p) to fully specialize
+    integrand = cacheval[3]
+    quadgk(x -> integrand(u * x, p), usegs...; kws..., segbuf=cacheval[1])
 end
 
+
 """
     HCubatureJL(; norm=norm, initdiv=1)
 
-A copy of `HCubatureJL` from Integrals.jl.
+Multi-dimensional h-adaptive cubature from HCubature.jl.
 """
 struct HCubatureJL{N} <: IntegralAlgorithm
     norm::N
@@ -102,25 +112,30 @@ struct HCubatureJL{N} <: IntegralAlgorithm
 end
 HCubatureJL(; norm=norm, initdiv=1) = HCubatureJL(norm, initdiv)
 
-function init_cacheval(f, dom, p, ::HCubatureJL)
-    f isa NestedBatchIntegrand && throw(ArgumentError("HCubatureJL doesn't support nested batching"))
-    f isa BatchIntegrand && throw(ArgumentError("HCubatureJL doesn't support batching"))
-    return Some(nothing)
+function init_cacheval(f::IntegralFunction, dom, p, ::HCubatureJL; kws...)
+    # TODO utilize hcubature_buffer
+    return
+end
+function init_cacheval(f::CommonSolveIntegralFunction, dom, p, ::HCubatureJL; kws...)
+    cache, integrand, = _init_commonsolvefunction(f, dom, p)
+    return cache, integrand
 end
 
-function do_solve(f, dom, p, alg::HCubatureJL, cacheval;
-                    reltol = 0, abstol = 0, maxiters = typemax(Int))
-
-    g = if f isa InplaceIntegrand
-        fx = f.I / oneunit(eltype(dom))
-        x -> (f.f!(fx, x, p); fx*one(eltype(dom)))
-    else
-        x -> f(x, p)
-    end
+function do_integral(f, dom, p, alg::HCubatureJL, cacheval; reltol = 0, abstol = 0, maxiters = typemax(Int))
     a, b = endpoints(dom)
+    g = hcubature_integrand(f, p, a, b, cacheval)
     routine = a isa Number ? hquadrature : hcubature
-    val, err = routine(g, a, b; norm = alg.norm, initdiv = alg.initdiv, atol=abstol, rtol=reltol, maxevals=maxiters)
-    return IntegralSolution(val, err, true, -1)
+    value, error = routine(g, a, b; norm = alg.norm, initdiv = alg.initdiv, atol=abstol, rtol=reltol, maxevals=maxiters)
+    retcode = error < max(something(abstol, zero(error)), alg.norm(value)*something(reltol, isnothing(abstol) ? sqrt(eps(eltype(a))) : abstol)) ? Success : Failure
+    stats = (; error)
+    return IntegralSolution(value, retcode, stats)
+end
+function hcubature_integrand(f::IntegralFunction, p, a, b, cacheval)
+    x -> f.f(x, p)
+end
+function hcubature_integrand(f::CommonSolveIntegralFunction, p, a, b, cacheval)
+    integrand = cacheval[2]
+    return x -> integrand(x, p)
 end
 
 """
@@ -150,7 +165,7 @@ The default quadrature rule is [`trapz`](@ref), although other packages provide
     alg = QuadratureFunction(fun=gausslegendre, npt=100)
 
 `nthreads` sets the numbers of threads used to parallelize the quadrature only when the
-integrand is a [`BatchIntegrand`](@ref), in which case the user must parallelize the
+integrand is a , in which case the user must parallelize the
 integrand evaluations. For no threading set `nthreads=1`.
 """
 struct QuadratureFunction{F} <: IntegralAlgorithm
@@ -159,27 +174,43 @@ struct QuadratureFunction{F} <: IntegralAlgorithm
     nthreads::Int
 end
 QuadratureFunction(; fun=trapz, npt=50, nthreads=1) = QuadratureFunction(fun, npt, nthreads)
-init_buffer(f, len) = nothing
-init_buffer(f::BatchIntegrand, len) = Vector{eltype(f.y)}(undef, len)
-function init_cacheval(f, dom::PuncturedInterval, p, alg::QuadratureFunction)
-    f isa NestedBatchIntegrand && throw(ArgumentError("QuadratureFunction doesn't support nested batching"))
-    buf = init_buffer(f, alg.nthreads)
+
+function init_rule(dom, alg::QuadratureFunction)
     x, w = alg.fun(alg.npt)
-    return (rule=[(w,x) for (w,x) in zip(w,x)], buffer=buf)
+    return [(w,x) for (w,x) in zip(w,x)]
+end
+function init_autosymptr_cache(f::IntegralFunction, dom, p, bufsize; kws...)
+    return (; buffer=nothing)
+end
+function init_autosymptr_cache(f::InplaceIntegralFunction, dom, p, bufsize; kws...)
+    x = get_prototype(dom)
+    proto = get_prototype(f, x, p)
+    y = similar(proto)
+    ytmp = similar(proto)
+    I = y * prod(x)
+    Itmp = similar(I)
+    return (; buffer=nothing, I, Itmp, y, ytmp)
+end
+function init_autosymptr_cache(f::InplaceBatchIntegralFunction, dom, p, bufsize; kws...)
+    x0 = get_prototype(dom)
+    proto=get_prototype(f, x0, p)
+    return (; buffer=similar(proto, bufsize), y=similar(proto, bufsize), x=Vector{typeof(x0)}(undef, bufsize))
+end
+function init_autosymptr_cache(f::CommonSolveIntegralFunction, dom, p, bufsize; kws...)
+    cache, integrand, = _init_commonsolvefunction(f, dom, p)
+    return (; buffer=nothing, cache, integrand)
+end
+function init_cacheval(f, dom, p, alg::QuadratureFunction; kws...)
+    rule = init_rule(dom, alg)
+    cache = init_autosymptr_cache(f, dom, p, alg.npt; kws...)
+    return (; rule, cache...)
 end
 
-function do_solve(f, dom, p, alg::QuadratureFunction, cacheval;
+function do_integral(f, dom, p, alg::QuadratureFunction, cacheval;
                     reltol = nothing, abstol = nothing, maxiters = typemax(Int))
     rule = cacheval.rule; buffer=cacheval.buffer
     segs = segments(dom)
-    g = if f isa BatchIntegrand
-        xx = eltype(f.x) === Nothing ? typeof((segs[1]+segs[end])/2)[] : f.x
-        AutoSymPTR.BatchIntegrand((y,x) -> f.f!(y,x,p), f.y, xx, max_batch=f.max_batch)
-    elseif f isa InplaceIntegrand
-        AutoSymPTR.InplaceIntegrand((y,x) -> f.f!(y,x,p), f.I)
-    else
-        x -> f(x, p)
-    end
+    g = autosymptr_integrand(f, p, segs, cacheval)
     A = sum(1:length(segs)-1) do i
         a, b = segs[i], segs[i+1]
         s = (b-a)/2
@@ -187,505 +218,18 @@ function do_solve(f, dom, p, alg::QuadratureFunction, cacheval;
         return AutoSymPTR.quadsum(arule, g, s, buffer)
     end
 
-    return IntegralSolution(A, nothing, true, -1)
-end
-
-# Here we put the quadrature algorithms from IteratedIntegration
-
-"""
-    AuxQuadGKJL(; order = 7, norm = norm)
-
-Generalization of the QuadGKJL provided by Integrals.jl that allows for `AuxValue`d
-integrands for auxiliary integration and multi-threaded evaluation with the `batch` argument
-to `IntegralProblem`
-"""
-struct AuxQuadGKJL{F} <: IntegralAlgorithm
-    order::Int
-    norm::F
-end
-function AuxQuadGKJL(; order = 7, norm = norm)
-    return AuxQuadGKJL(order, norm)
-end
-
-function init_cacheval(f, dom, p, alg::AuxQuadGKJL)
-    f isa NestedBatchIntegrand && throw(ArgumentError("AuxQuadGKJL doesn't support nested batching"))
-    return init_segbuf(f, dom, p, alg.norm)
-end
-
-function do_solve(f, dom, p, alg::AuxQuadGKJL, cacheval;
-                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
-
-    segs = segments(dom)
-    u = oneunit(eltype(dom))
-    usegs = map(x -> x/u, segs)
-    if f isa InplaceIntegrand
-        g! = (y, x) -> f.f!(y, u*x, p)
-        result = f.I / u
-        val, err = auxquadgk!(g!, result, usegs, maxevals = maxiters,
-                        rtol = reltol, atol = isnothing(abstol) ? abstol : abstol/u, order = alg.order, norm = alg.norm, segbuf=cacheval)
-        return IntegralSolution(f.I .= u .* val, u*err, true, -1)
-    elseif f isa BatchIntegrand
-        xx = eltype(f.x) === Nothing ? typeof((segs[1]+segs[end])/2)[] : f.x
-        g_ = (y, x) -> (resize!(xx, length(x)); f.f!(y, xx .= u .* x, p))
-        g = IteratedIntegration.AuxQuadGK.BatchIntegrand(g_, f.y, xx/u, max_batch=f.max_batch)
-        val, err = auxquadgk(g, usegs, maxevals = maxiters,
-                        rtol = reltol, atol = isnothing(abstol) ? abstol : abstol/u, order = alg.order, norm = alg.norm, segbuf=cacheval)
-        return IntegralSolution(u*val, u*err, true, -1)
-    else
-        g = x -> f(u*x, p)
-        val, err = auxquadgk(g, usegs, maxevals = maxiters,
-                        rtol = reltol, atol = isnothing(abstol) ? abstol : abstol/u, order = alg.order, norm = alg.norm, segbuf=cacheval)
-        return IntegralSolution(u*val, u*err, true, -1)
-    end
-end
-
-"""
-    ContQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.ContQuadGK.NewtonDeflation())
-
-A 1d contour deformation quadrature scheme for scalar, complex-valued integrands. It
-defaults to regular `quadgk` behavior on the real axis, but if it finds a root of 1/f
-nearby, in the sense of Bernstein ellipse for the standard segment `[-1,1]` with semiaxes
-`cosh(rho)` and `sinh(rho)`, on either the upper/lower half planes, then it dents the
-contour away from the presumable pole.
-"""
-struct ContQuadGKJL{F,M} <: IntegralAlgorithm
-    order::Int
-    norm::F
-    rho::Float64
-    rootmeth::M
-end
-function ContQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.ContQuadGK.NewtonDeflation())
-    return ContQuadGKJL(order, norm, rho, rootmeth)
-end
-
-function init_cacheval(f, dom, p, alg::ContQuadGKJL)
-    f isa NestedBatchIntegrand && throw(ArgumentError("ContQuadGK doesn't support nested batching"))
-    f isa BatchIntegrand && throw(ArgumentError("ContQuadGK doesn't support batching"))
-    f isa InplaceIntegrand && throw(ArgumentError("ContQuadGK doesn't support inplace integrands"))
-
-    a, b = endpoints(dom)
-    x, s = (a+b)/2, (b-a)/2
-    TX = typeof(x)
-    fx_s = one(ComplexF64) * s # currently the integrand is forcibly written to a ComplexF64 buffer
-    TI = typeof(fx_s)
-    TE = typeof(alg.norm(fx_s))
-    r_segbuf = IteratedIntegration.ContQuadGK.PoleSegment{TX,TI,TE}[]
-    fc_s = f(complex(x), p) * complex(s) # the regular evalrule is used on complex segments
-    TCX = typeof(complex(x))
-    TCI = typeof(fc_s)
-    TCE = typeof(alg.norm(fc_s))
-    c_segbuf = IteratedIntegration.ContQuadGK.Segment{TCX,TCI,TCE}[]
-    return (r=r_segbuf, c=c_segbuf)
-end
-
-function do_solve(f, dom, p, alg::ContQuadGKJL, cacheval;
-                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
-
-    segs = segments(dom)
-    g = x -> f(x, p)
-    val, err = contquadgk(g, segs, maxevals = maxiters, rho = alg.rho, rootmeth = alg.rootmeth,
-                    rtol = reltol, atol = abstol, order = alg.order, norm = alg.norm, r_segbuf=cacheval.r, c_segbuf=cacheval.c)
-    return IntegralSolution(val, err, true, -1)
-end
-
-"""
-    MeroQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.MeroQuadGK.NewtonDeflation())
-
-A 1d pole subtraction quadrature scheme for scalar, complex-valued integrands that are
-meromorphic. It defaults to regular `quadgk` behavior on the real axis, but if it finds
-nearby roots of 1/f, in the sense of Bernstein ellipse for the standard segment `[-1,1]`
-with semiaxes `cosh(rho)` and `sinh(rho)`, it attempts pole subtraction on that segment.
-"""
-struct MeroQuadGKJL{F,M} <: IntegralAlgorithm
-    order::Int
-    norm::F
-    rho::Float64
-    rootmeth::M
-end
-function MeroQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.MeroQuadGK.NewtonDeflation())
-    return MeroQuadGKJL(order, norm, rho, rootmeth)
-end
-
-function init_cacheval(f, dom, p, alg::MeroQuadGKJL)
-    f isa NestedBatchIntegrand && throw(ArgumentError("MeroQuadGK doesn't support nested batching"))
-    f isa BatchIntegrand && throw(ArgumentError("MeroQuadGK doesn't support batching"))
-    f isa InplaceIntegrand && throw(ArgumentError("MeroQuadGK doesn't support inplace integrands"))
-    a, b = endpoints(dom)
-    x, s = (a + b)/2, (b-a)/2
-    fx_s = one(ComplexF64) * s # ignore the actual integrand since it is written to CF64 array
-    err = alg.norm(fx_s)
-    return IteratedIntegration.alloc_segbuf(typeof(x), typeof(fx_s), typeof(err))
-end
-
-function do_solve(f, dom, p, alg::MeroQuadGKJL, cacheval;
-                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
-
-    segs = segments(dom)
-    g = x -> f(x, p)
-    val, err = meroquadgk(g, segs, maxevals = maxiters, rho = alg.rho, rootmeth = alg.rootmeth,
-                    rtol = reltol, atol = abstol, order = alg.order, norm = alg.norm, segbuf=cacheval)
-    return IntegralSolution(val, err, true, -1)
-end
-
-# Algorithms from AutoSymPTR.jl
-
-"""
-    MonkhorstPack(; npt=50, syms=nothing, nthreads=1)
-
-Periodic trapezoidal rule with a fixed number of k-points per dimension, `npt`,
-using the `PTR` rule from [AutoSymPTR.jl](https://github.com/lxvm/AutoSymPTR.jl).
-`nthreads` sets the numbers of threads used to parallelize the quadrature only when the
-integrand is a [`BatchIntegrand`](@ref), in which case the user must parallelize the
-integrand evaluations. For no threading set `nthreads=1`.
-**The caller should check that the integral is converged w.r.t. `npt`**.
-"""
-struct MonkhorstPack{S} <: IntegralAlgorithm
-    npt::Int
-    syms::S
-    nthreads::Int
-end
-MonkhorstPack(; npt=50, syms=nothing, nthreads=1) = MonkhorstPack(npt, syms, nthreads)
-function init_rule(dom::Basis, alg::MonkhorstPack)
-    # rule = AutoSymPTR.MonkhorstPackRule(alg.syms, alg.a, alg.nmin, alg.nmax, alg.n₀, alg.Δn)
-    # return rule(eltype(dom), Val(ndims(dom)))
-    if alg.syms === nothing
-        return AutoSymPTR.PTR(eltype(dom), Val(ndims(dom)), alg.npt)
-    else
-        return AutoSymPTR.MonkhorstPack(eltype(dom), Val(ndims(dom)), alg.npt, alg.syms)
-    end
-end
-
-rule_type(::AutoSymPTR.PTR{N,T}) where {N,T} = SVector{N,T}
-rule_type(::AutoSymPTR.MonkhorstPack{N,T}) where {N,T} = SVector{N,T}
-
-function init_cacheval(f, dom::Basis, p, alg::MonkhorstPack)
-    f isa NestedBatchIntegrand && throw(ArgumentError("MonkhorstPack doesn't support nested batching"))
-    rule = init_rule(dom, alg)
-    buf = init_buffer(f, alg.nthreads)
-    return (rule=rule, buffer=buf)
-end
-
-function do_solve(f, dom, p, alg::MonkhorstPack, cacheval;
-                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
-    g = if f isa BatchIntegrand
-        xx = eltype(f.x) === Nothing ? typeof(dom*zero(rule_type(cacheval.rule)))[] : f.x
-        AutoSymPTR.BatchIntegrand((y,x) -> f.f!(y,x,p), f.y, xx, max_batch=f.max_batch)
-    elseif f isa InplaceIntegrand
-        AutoSymPTR.InplaceIntegrand((y,x) -> f.f!(y,x,p), f.I)
-    else
-        x -> f(x, p)
-    end
-    I = cacheval.rule(g, dom, cacheval.buffer)
-    return IntegralSolution(I, nothing, true, -1)
-end
-
-"""
-    AutoSymPTRJL(; norm=norm, a=1.0, nmin=50, nmax=1000, n₀=6, Δn=log(10), keepmost=2, nthreads=1)
-
-Periodic trapezoidal rule with automatic convergence to tolerances passed to the
-solver with respect to `norm` using the routine `autosymptr` from
-[AutoSymPTR.jl](https://github.com/lxvm/AutoSymPTR.jl).
-`nthreads` sets the numbers of threads used to parallelize the quadrature only when the
-integrand is a [`BatchIntegrand`](@ref), in which case the user must parallelize the
-integrand evaluations. For no threading set `nthreads=1`.
-**This algorithm is the most efficient for smooth integrands**.
-"""
-struct AutoSymPTRJL{F,S} <: IntegralAlgorithm
-    norm::F
-    a::Float64
-    nmin::Int
-    nmax::Int
-    n₀::Float64
-    Δn::Float64
-    keepmost::Int
-    syms::S
-    nthreads::Int
-end
-function AutoSymPTRJL(; norm=norm, a=1.0, nmin=50, nmax=1000, n₀=6.0, Δn=log(10), keepmost=2, syms=nothing, nthreads=1)
-    return AutoSymPTRJL(norm, a, nmin, nmax, n₀, Δn, keepmost, syms, nthreads)
-end
-function init_rule(dom::Basis, alg::AutoSymPTRJL)
-    return AutoSymPTR.MonkhorstPackRule(alg.syms, alg.a, alg.nmin, alg.nmax, alg.n₀, alg.Δn)
-end
-function init_cacheval(f, dom::Basis, p, alg::AutoSymPTRJL)
-    f isa NestedBatchIntegrand && throw(ArgumentError("AutoSymPTRJL doesn't support nested batching"))
-    rule = init_rule(dom, alg)
-    cache = AutoSymPTR.alloc_cache(eltype(dom), Val(ndims(dom)), rule)
-    buffer = init_buffer(f, alg.nthreads)
-    return (rule=rule, cache=cache, buffer=buffer)
-end
-
-function do_solve(f, dom, p, alg::AutoSymPTRJL, cacheval;
-                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
-
-    g = if f isa BatchIntegrand
-        xx = eltype(f.x) === Nothing ? typeof(dom*zero(rule_type(cacheval.cache[1])))[] : f.x
-        AutoSymPTR.BatchIntegrand((y,x) -> f.f!(y,x,p), f.y, xx, max_batch=f.max_batch)
-    elseif f isa InplaceIntegrand
-        AutoSymPTR.InplaceIntegrand((y,x) -> f.f!(y,x,p), f.I)
-    else
-        x -> f(x, p)
-    end
-    val, err = autosymptr(g, dom; syms = alg.syms, rule = cacheval.rule, cache = cacheval.cache, keepmost = alg.keepmost,
-        abstol = abstol, reltol = reltol, maxevals = maxiters, norm=alg.norm, buffer=cacheval.buffer)
-    return IntegralSolution(val, err, true, -1)
-end
-
-# Meta-algorithms
-
-"""
-    NestedQuad(alg::IntegralAlgorithm)
-    NestedQuad(algs::IntegralAlgorithm...)
-
-Nested integration by repeating one quadrature algorithm or composing a list of algorithms.
-The domain of integration must be an `AbstractIteratedLimits` from the
-IteratedIntegration.jl package. Analogous to `nested_quad` from IteratedIntegration.jl.
-The integrand should expect `SVector` inputs. Do not use this for very high-dimensional
-integrals, since the compilation time scales very poorly with respect to dimensionality.
-In order to improve the compilation time, FunctionWrappers.jl is used to enforce type
-stability of the integrand, so you should always pick the widest integration limit type so
-that inference works properly. For example, if [`ContQuadGKJL`](@ref) is used as an
-algorithm in the nested scheme, then the limits of integration should be made complex.
-"""
-struct NestedQuad{T} <: IntegralAlgorithm
-    algs::T
-    NestedQuad(alg::IntegralAlgorithm) = new{typeof(alg)}(alg)
-    NestedQuad(algs::Tuple{Vararg{IntegralAlgorithm}}) = new{typeof(algs)}(algs)
-end
-NestedQuad(algs::IntegralAlgorithm...) = NestedQuad(algs)
-
-# this function helps create a tree of the cachevals used by each quadrature
-function nested_cacheval(f::F, p::P, algs, segs, lims, state, x, xs...) where {F,P}
-    dom = PuncturedInterval(segs)
-    a, b = segs[1], segs[2]
-    dim = ndims(lims)
-    alg = algs[dim]
-    mid = (a+b)/2 # sample point that should be safe to evaluate
-    next = limit_iterate(lims, state, mid) # see what the next limit gives
-    if xs isa Tuple{} # the next integral takes us to the inner integral
-        # base case test integrand of the inner integral
-        # we need to pass dummy integrands to all the outer integrals so that they can build
-        # their caches with the right types
-        if f isa BatchIntegrand || f isa NestedBatchIntegrand
-            # Batch integrate the inner integral only
-            cacheval = init_cacheval(BatchIntegrand(nothing, f.y, f.x, max_batch=f.max_batch), dom, p, alg)
-            return (nothing, cacheval, oneunit(eltype(f.y))*mid)
-        elseif f isa InplaceIntegrand
-            # Inplace integrate through the whole nest structure
-            fxi = f.I*mid/oneunit(prod(next))
-            cacheval = init_cacheval(InplaceIntegrand(nothing, fxi), dom, p, alg)
-            return (nothing, cacheval, fxi)
-        else
-            fx = f(next,p)
-            cacheval = init_cacheval((x, p) -> fx, dom, p, alg)
-            return (nothing, cacheval, fx*mid)
-        end
-    elseif f isa NestedBatchIntegrand
-        algs_ = algs[1:dim-1]
-        # numbered names to avoid type instabilities (we are not using dispatch, but the
-        # compiler's optimization for the recursive function's argument types)
-        nest0 = nested_cacheval(f.f[1], p, algs_, next..., x, xs[1:dim-2]...)
-        cacheval = init_cacheval(BatchIntegrand(nothing, f.y, f.x, max_batch=f.max_batch), dom, p, alg)
-        return (ntuple(n -> n == 1 ? nest0 : deepcopy(nest0), Val(length(f.f))), cacheval, nest0[3]*mid)
-    else
-        algs_ = algs[1:dim-1]
-        nest1 = nested_cacheval(f, p, algs_, next..., x, xs[1:dim-2]...)
-        h = nest1[3]
-        hx = h*mid
-        # units may change for outer integral
-        if f isa InplaceIntegrand
-            cacheval = init_cacheval(InplaceIntegrand(nothing, hx), dom, p, alg)
-            return (nest1, cacheval, hx)
-        else
-            cacheval = init_cacheval((x, p) -> h, dom, p, alg)
-            return (nest1, cacheval, hx)
-        end
-    end
-end
-function init_cacheval(f, dom::AbstractIteratedLimits, p, alg::NestedQuad)
-    algs = alg.algs isa IntegralAlgorithm ? ntuple(i -> alg.algs, Val(ndims(dom))) : alg.algs
-    return nested_cacheval(f, p, algs, limit_iterate(dom)..., interior_point(dom)...)
-end
-
-function init_nest(f::F, fxx, dom, p,lims, state, algs, cacheval; kws_...) where {F}
-    kws = NamedTuple(kws_)
-    xx = float(oneunit(eltype(dom)))
-    FX = typeof(fxx/xx)
-    TX = typeof(xx)
-    TP = Tuple{typeof(p),typeof(lims),typeof(state)}
-    if algs isa Tuple{} # inner integral
-        if f isa BatchIntegrand
-            return f
-        elseif f isa NestedBatchIntegrand
-            nchunk = length(f.f)
-            return BatchIntegrand(FunctionWrapper{Nothing,Tuple{typeof(f.y),typeof(f.x),TP}}() do y, x, (p, lims, state)
-                Threads.@threads for ichunk in 1:min(nchunk, length(x))
-                    for (i, j) in zip(getchunk(x, ichunk, nchunk, :scatter), getchunk(y, ichunk, nchunk, :scatter))
-                        xi = x[i]
-                        y[j] = f.f[ichunk](limit_iterate(lims, state, xi), p)
-                    end
-                end
-                return nothing
-            end, f.y, f.x, max_batch=f.max_batch)
-        elseif f isa InplaceIntegrand
-            return InplaceIntegrand(FunctionWrapper{Nothing,Tuple{FX,TX,TP}}() do y, x, (p, lims, state)
-                f.f!(y, limit_iterate(lims, state, x), p)
-                return nothing
-            end, f.I)
-        else
-            return FunctionWrapper{FX,Tuple{TX,TP}}() do x, (p, lims, state)
-                return f(limit_iterate(lims, state, x), p)
-            end
-        end
-    else
-        if f isa InplaceIntegrand
-            return InplaceIntegrand(FunctionWrapper{Nothing,Tuple{FX,TX,TP}}() do y, x, (p, lims, state)
-                segs, lims_, state_ = limit_iterate(lims, state, x)
-                len = segs[end] - segs[1]
-                kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
-                do_solve(InplaceIntegrand(f.f!, y), lims_, NestState(p, segs, state_), NestedQuad(algs), cacheval; kwargs...)
-                return nothing
-            end, f.I)
-        elseif f isa NestedBatchIntegrand
-            nchunks = length(f.f)
-            return BatchIntegrand(FunctionWrapper{Nothing,Tuple{typeof(f.y),typeof(f.x),TP}}() do y, x, (p, lims, state)
-                Threads.@threads for ichunk in 1:min(nchunks, length(x))
-                    for (i, j) in zip(getchunk(x, ichunk, nchunks, :scatter), getchunk(y, ichunk, nchunks, :scatter))
-                        xi = x[i]
-                        segs, lims_, state_ = limit_iterate(lims, state, xi)
-                        len = segs[end] - segs[1]
-                        kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
-                        y[j] = do_solve(f.f[ichunk], lims_, NestState(p, segs, state_), NestedQuad(algs), cacheval[ichunk]; kwargs...).u
-                    end
-                end
-                return nothing
-            end, f.y, f.x, max_batch=f.max_batch)
-        else
-            return FunctionWrapper{FX,Tuple{TX,TP}}() do x, (p, lims, state)
-                segs, lims_, state_ = limit_iterate(lims, state, x)
-                len = segs[end] - segs[1]
-                kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
-                sol = do_solve(f, lims_, NestState(p, segs, state_), NestedQuad(algs), cacheval; kwargs...)
-                return sol.u
-            end
-        end
-    end
+    return IntegralSolution(A, Success, (; numevals = length(cacheval.rule)*(length(segs)-1)))
 end
-
-struct NestState{P,G,S}
-    p::P
-    segs::G
-    state::S
+function autosymptr_integrand(f::IntegralFunction, p, segs, cacheval)
+    x -> f.f(x, p)
 end
-
-function do_solve(f::F, lims::AbstractIteratedLimits, p_, alg::NestedQuad, cacheval; kws...) where {F}
-    g, p, segs, state = if p_ isa NestState
-        gg = if f isa NestedBatchIntegrand
-            fx = eltype(f.x) === Nothing ? float(eltype(p_.segs))[] : f.x
-            NestedBatchIntegrand(f.f, f.y, fx, max_batch=f.max_batch)
-        else
-            f
-        end
-        gg, p_.p, p_.segs, p_.state
-    else
-        seg, lim, sta = limit_iterate(lims)
-        gg = if f isa BatchIntegrand
-            fx = eltype(f.x) === Nothing ? typeof(interior_point(lims))[] : f.x
-            BatchIntegrand(f.y, similar(f.x, eltype(eltype(fx))), max_batch=f.max_batch) do y, xs, (p, lims, state)
-                resize!(fx, length(xs))
-                f.f!(y, map!(x -> limit_iterate(lims, state, x), fx, xs), p)
-            end
-        elseif f isa NestedBatchIntegrand
-            # this should be done recursively at the outermost level, but it is lazy.
-            fx = eltype(f.x) === Nothing ? float(eltype(seg))[] : f.x
-            NestedBatchIntegrand(f.f, f.y, fx, max_batch=f.max_batch)
-        else
-            f
-        end
-        gg, p_, seg, sta
-    end
-    dom = PuncturedInterval(segs)
-    dim = ndims(lims) # constant propagation :)
-    algs = alg.algs isa IntegralAlgorithm ? ntuple(i -> alg.algs, Val(dim)) : alg.algs
-    nest = init_nest(g, cacheval[3], dom, p, lims, state, algs[1:dim-1], cacheval[1]; kws...)
-    return do_solve(nest, dom, (p, lims, state), algs[dim], cacheval[2]; kws...)
+function autosymptr_integrand(f::InplaceIntegralFunction, p, segs, cacheval)
+    AutoSymPTR.InplaceIntegrand((y,x) -> f.f!(y,x,p), cacheval.I, cacheval.Itmp, cacheval.y, cacheval.ytmp)
 end
-
-"""
-    AbsoluteEstimate(est_alg, abs_alg; kws...)
-
-Most algorithms are efficient when using absolute error tolerances, but how do you know the
-size of the integral? One option is to estimate it using second algorithm.
-
-A multi-algorithm to estimate an integral using an `est_alg` to generate a rough estimate of
-the integral that is combined with a user's relative tolerance to re-calculate the integral
-to higher accuracy using the `abs_alg`. The keywords passed to the algorithm may include
-`reltol`, `abstol` and `maxiters` and are given to the `est_alg` solver. They should limit
-the amount of work of `est_alg` so as to only generate an order-of-magnitude estimate of the
-integral. The tolerances passed to `abs_alg` are `abstol=max(abstol,reltol*norm(I))` and
-`reltol=0`.
-"""
-struct AbsoluteEstimate{E<:IntegralAlgorithm,A<:IntegralAlgorithm,F,K<:NamedTuple} <: IntegralAlgorithm
-    est_alg::E
-    abs_alg::A
-    norm::F
-    kws::K
-end
-function AbsoluteEstimate(est_alg, abs_alg; norm=norm, kwargs...)
-    kws = NamedTuple(kwargs)
-    checkkwargs(kws)
-    return AbsoluteEstimate(est_alg, abs_alg, norm, kws)
-end
-
-function init_cacheval(f, dom, p, alg::AbsoluteEstimate)
-    return (est=init_cacheval(f, dom, p, alg.est_alg),
-            abs=init_cacheval(f, dom, p, alg.abs_alg))
-end
-
-function do_solve(f, dom, p, alg::AbsoluteEstimate, cacheval;
-                    abstol=nothing, reltol=nothing, maxiters=typemax(Int))
-    sol = do_solve(f, dom, p, alg.est_alg, cacheval.est; alg.kws...)
-    val = alg.norm(sol.u) # has same units as sol
-    rtol = reltol === nothing ? sqrt(eps(one(val))) : reltol # use the precision of the solution to set the default relative tolerance
-    atol = max(abstol === nothing ? zero(val) : abstol, rtol*val)
-    return do_solve(f, dom, p, alg.abs_alg, cacheval.abs;
-                    abstol=atol, reltol=zero(rtol), maxiters=maxiters)
+function autosymptr_integrand(f::InplaceBatchIntegralFunction, p, segs, cacheval)
+    AutoSymPTR.BatchIntegrand((y,x) -> f.f!(y,x,p), cacheval.y, cacheval.x, max_batch=f.max_batch)
 end
-
-
-"""
-    EvalCounter(::IntegralAlgorithm)
-
-An algorithm which counts the evaluations used by another algorithm.
-The count is stored in the `sol.numevals` field.
-"""
-struct EvalCounter{T<:IntegralAlgorithm} <: IntegralAlgorithm
-    alg::T
-end
-
-function init_cacheval(f, dom, p, alg::EvalCounter)
-    return init_cacheval(f, dom, p, alg.alg)
-end
-
-function do_solve(f, dom, p, alg::EvalCounter, cacheval; kws...)
-    if f isa InplaceIntegrand
-        ni::Int = 0
-        gi = (y, x, p) -> (ni += 1; f.f!(y, x, p))
-        soli = do_solve(InplaceIntegrand(gi, f.I), dom, p, alg.alg, cacheval; kws...)
-        return IntegralSolution(soli.u, soli.resid, soli.retcode, ni)
-    elseif f isa BatchIntegrand
-        nb::Int = 0
-        gb = (y, x, p) -> (nb += length(x); f.f!(y, x, p))
-        solb = do_solve(BatchIntegrand(gb, f.y, f.x, max_batch=f.max_batch), dom, p, alg.alg, cacheval; kws...)
-        return IntegralSolution(solb.u, solb.resid, solb.retcode, nb)
-    elseif f isa NestedBatchIntegrand
-        # TODO allocate a bunch of accumulators associated with the leaves of the nested
-        # integrand or rewrap the algorithms in NestedQuad
-        error("NestedBatchIntegrand not yet supported with EvalCounter")
-    else
-        n::Int = 0
-        g = (x, p) -> (n += 1; f(x, p)) # we need let to prevent Core.Box around the captured variable
-        sol = do_solve(g, dom, p, alg.alg, cacheval; kws...)
-        return IntegralSolution(sol.u, sol.resid, sol.retcode, n)
-    end
+function autosymptr_integrand(f::CommonSolveIntegralFunction, p, segs, cacheval)
+    integrand = cacheval.integrand
+    return x -> integrand(x, p)
 end
diff --git a/src/algorithms_autosymptr.jl b/src/algorithms_autosymptr.jl
new file mode 100644
index 0000000..19aa389
--- /dev/null
+++ b/src/algorithms_autosymptr.jl
@@ -0,0 +1,95 @@
+# We could move these into an extension, although QuadratureFunction also uses AutoSymPTR.jl
+# for evaluation
+
+
+"""
+    MonkhorstPack(; npt=50, syms=nothing, nthreads=1)
+
+Periodic trapezoidal rule with a fixed number of k-points per dimension, `npt`,
+using the `PTR` rule from [AutoSymPTR.jl](https://github.com/lxvm/AutoSymPTR.jl).
+`nthreads` sets the numbers of threads used to parallelize the quadrature only when the
+integrand is a , in which case the user must parallelize the
+integrand evaluations. For no threading set `nthreads=1`.
+**The caller should check that the integral is converged w.r.t. `npt`**.
+"""
+struct MonkhorstPack{S} <: IntegralAlgorithm
+    npt::Int
+    syms::S
+    nthreads::Int
+end
+MonkhorstPack(; npt=50, syms=nothing, nthreads=1) = MonkhorstPack(npt, syms, nthreads)
+function init_rule(dom, alg::MonkhorstPack)
+    # rule = AutoSymPTR.MonkhorstPackRule(alg.syms, alg.a, alg.nmin, alg.nmax, alg.n₀, alg.Δn)
+    # return rule(eltype(dom), Val(ndims(dom)))
+    if alg.syms === nothing
+        return AutoSymPTR.PTR(eltype(dom), Val(ndims(dom)), alg.npt)
+    else
+        return AutoSymPTR.MonkhorstPack(eltype(dom), Val(ndims(dom)), alg.npt, alg.syms)
+    end
+end
+
+function init_cacheval(f, dom, p, alg::MonkhorstPack; kws...)
+    b = get_basis(dom)
+    rule = init_rule(b, alg)
+    cache = init_autosymptr_cache(f, b, p, alg.nthreads; kws...)
+    return (; rule, cache...)
+end
+function do_integral(f, dom, p, alg::MonkhorstPack, cacheval;
+                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
+    b = get_basis(dom)
+    g = autosymptr_integrand(f, p, b, cacheval)
+    value = cacheval.rule(g, b, cacheval.buffer)
+    retcode = Success
+    stats = (; numevals=length(cacheval.rule))
+    return IntegralSolution(value, retcode, stats)
+end
+
+"""
+    AutoSymPTRJL(; norm=norm, a=1.0, nmin=50, nmax=1000, n₀=6, Δn=log(10), keepmost=2, nthreads=1)
+
+Periodic trapezoidal rule with automatic convergence to tolerances passed to the
+solver with respect to `norm` using the routine `autosymptr` from
+[AutoSymPTR.jl](https://github.com/lxvm/AutoSymPTR.jl).
+`nthreads` sets the numbers of threads used to parallelize the quadrature only when the
+integrand is a  in which case the user must parallelize the
+integrand evaluations. For no threading set `nthreads=1`.
+**This algorithm is the most efficient for smooth integrands**.
+"""
+struct AutoSymPTRJL{F,S} <: IntegralAlgorithm
+    norm::F
+    a::Float64
+    nmin::Int
+    nmax::Int
+    n₀::Float64
+    Δn::Float64
+    keepmost::Int
+    syms::S
+    nthreads::Int
+end
+function AutoSymPTRJL(; norm=norm, a=1.0, nmin=50, nmax=1000, n₀=6.0, Δn=log(10), keepmost=2, syms=nothing, nthreads=1)
+    return AutoSymPTRJL(norm, a, nmin, nmax, n₀, Δn, keepmost, syms, nthreads)
+end
+function init_rule(dom, alg::AutoSymPTRJL)
+    return AutoSymPTR.MonkhorstPackRule(alg.syms, alg.a, alg.nmin, alg.nmax, alg.n₀, alg.Δn)
+end
+
+
+function init_cacheval(f, dom, p, alg::AutoSymPTRJL; kws...)
+    b = get_basis(dom)
+    rule = init_rule(dom, alg)
+    rule_cache = AutoSymPTR.alloc_cache(eltype(dom), Val(ndims(dom)), rule)
+    cache = init_autosymptr_cache(f, b, p, alg.nthreads; kws...)
+    return (; rule, rule_cache, cache...)
+end
+
+function do_integral(f, dom, p, alg::AutoSymPTRJL, cacheval;
+                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
+
+    g = autosymptr_integrand(f, p, dom, cacheval)
+    bas = get_basis(dom)
+    value, error = autosymptr(g, bas; syms = alg.syms, rule = cacheval.rule, cache = cacheval.rule_cache, keepmost = alg.keepmost,
+        abstol = abstol, reltol = reltol, maxevals = maxiters, norm=alg.norm, buffer=cacheval.buffer)
+    retcode = error < max(something(abstol, zero(error)), alg.norm(value)*something(reltol, isnothing(abstol) ? sqrt(eps(eltype(bas))) : abstol)) ? Success : Failure
+    stats = (; error)
+    return IntegralSolution(value, retcode, stats)
+end
diff --git a/src/algorithms_iterated.jl b/src/algorithms_iterated.jl
new file mode 100644
index 0000000..6bd1a69
--- /dev/null
+++ b/src/algorithms_iterated.jl
@@ -0,0 +1,191 @@
+"""
+    AuxQuadGKJL(; order = 7, norm = norm)
+
+Generalization of the QuadGKJL provided by Integrals.jl that allows for `AuxValue`d
+integrands for auxiliary integration and multi-threaded evaluation with the `batch` argument
+to `IntegralProblem`
+"""
+struct AuxQuadGKJL{F} <: IntegralAlgorithm
+    order::Int
+    norm::F
+end
+function AuxQuadGKJL(; order = 7, norm = norm)
+    return AuxQuadGKJL(order, norm)
+end
+
+function init_cacheval(f::IntegralFunction, dom, p, alg::AuxQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    prototype = get_prototype(f, get_prototype(segs), p)
+    return init_segbuf(prototype, segs, alg)
+end
+function init_cacheval(f::InplaceIntegralFunction, dom, p, alg::AuxQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    prototype = get_prototype(f, get_prototype(segs), p)
+    return init_segbuf(prototype, segs, alg), similar(prototype)
+end
+function init_cacheval(f::InplaceBatchIntegralFunction, dom, p, alg::AuxQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    pt = get_prototype(segs)
+    prototype = get_prototype(f, pt, p)
+    prototype isa AbstractVector || throw(ArgumentError("QuadGKJL only supports batch integrands with vector outputs"))
+    pts = zeros(typeof(pt), 2*alg.order+1)
+    upts = pts / pt
+    return init_segbuf(first(prototype), segs, alg), similar(prototype), pts, upts
+end
+function init_cacheval(f::CommonSolveIntegralFunction, dom, p, alg::AuxQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    x = get_prototype(segs)
+    cache, integrand, prototype = _init_commonsolvefunction(f, dom, p; x)
+    return init_segbuf(prototype, segs, alg), cache, integrand
+end
+function do_integral(f, dom, p, alg::AuxQuadGKJL, cacheval;
+                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
+    # we need to strip units from the limits since infinity transformations change the units
+    # of the limits, which can break the segbuf
+    u = oneunit(eltype(dom))
+    usegs = map(x -> x/u, dom)
+    atol = isnothing(abstol) ? abstol : abstol/u
+    val, err = call_auxquadgk(f, p, u, usegs, cacheval; maxevals = maxiters, rtol = reltol, atol, order = alg.order, norm = alg.norm)
+    value = u*val
+    retcode = err < max(something(atol, zero(err)), alg.norm(val)*something(reltol, isnothing(atol) ? sqrt(eps(one(eltype(usegs)))) : 0)) ? Success : Failure
+    stats = (; error=u*err)
+    return IntegralSolution(value, retcode, stats)
+end
+function call_auxquadgk(f::IntegralFunction, p, u, usegs, cacheval; kws...)
+    auxquadgk(x -> f.f(u*x, p), usegs...; kws..., segbuf=cacheval)
+end
+function call_auxquadgk(f::InplaceIntegralFunction, p, u, usegs, cacheval; kws...)
+    # TODO allocate everything in the AuxQuadGK.InplaceIntegrand in the cacheval
+    auxquadgk!((y, x) -> f.f!(y, u*x, p), cacheval[2], usegs...; kws..., segbuf=cacheval[1])
+end
+function call_auxquadgk(f::InplaceBatchIntegralFunction, p, u, usegs, cacheval; kws...)
+    pts = cacheval[3]
+    g = IteratedIntegration.AuxQuadGK.BatchIntegrand((y, x) -> f.f!(y, resize!(pts, length(x)) .= u .* x, p), cacheval[2], cacheval[4]; max_batch=f.max_batch)
+    auxquadgk(g, usegs...; kws..., segbuf=cacheval[1])
+end
+function call_auxquadgk(f::CommonSolveIntegralFunction, p, u, usegs, cacheval; kws...)
+    # cache = cacheval[2] could call do_solve!(cache, f, x, p) to fully specialize
+    integrand = cacheval[3]
+    auxquadgk(x -> integrand(u*x, p), usegs...; kws..., segbuf=cacheval[1])
+end
+
+"""
+    ContQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.ContQuadGK.NewtonDeflation())
+
+A 1d contour deformation quadrature scheme for scalar, complex-valued integrands. It
+defaults to regular `quadgk` behavior on the real axis, but if it finds a root of 1/f
+nearby, in the sense of Bernstein ellipse for the standard segment `[-1,1]` with semiaxes
+`cosh(rho)` and `sinh(rho)`, on either the upper/lower half planes, then it dents the
+contour away from the presumable pole.
+"""
+struct ContQuadGKJL{F,M} <: IntegralAlgorithm
+    order::Int
+    norm::F
+    rho::Float64
+    rootmeth::M
+end
+function ContQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.ContQuadGK.NewtonDeflation())
+    return ContQuadGKJL(order, norm, rho, rootmeth)
+end
+
+function init_csegbuf(prototype, dom, alg::ContQuadGKJL)
+    segs = PuncturedInterval(dom)
+    a, b = endpoints(segs)
+    x, s = (a+b)/2, (b-a)/2
+    TX = typeof(x)
+    convert(ComplexF64, prototype)
+    fx_s = one(ComplexF64) * s # currently the integrand is forcibly written to a ComplexF64 buffer
+    TI = typeof(fx_s)
+    TE = typeof(alg.norm(fx_s))
+    r_segbuf = IteratedIntegration.ContQuadGK.PoleSegment{TX,TI,TE}[]
+    fc_s = prototype * complex(s) # the regular evalrule is used on complex segments
+    TCX = typeof(complex(x))
+    TCI = typeof(fc_s)
+    TCE = typeof(alg.norm(fc_s))
+    c_segbuf = IteratedIntegration.ContQuadGK.Segment{TCX,TCI,TCE}[]
+    return (r=r_segbuf, c=c_segbuf)
+end
+function init_cacheval(f::IntegralFunction, dom, p, alg::ContQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    prototype = get_prototype(f, get_prototype(segs), p)
+    init_csegbuf(prototype, dom, alg)
+end
+function init_cacheval(f::CommonSolveIntegralFunction, dom, p, alg::ContQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    cache, integrand, prototype = _init_commonsolvefunction(f, dom, p; x=get_prototype(segs))
+    segbufs = init_csegbuf(prototype, dom, alg)
+    return (; segbufs..., cache, integrand)
+end
+
+function do_integral(f, dom, p, alg::ContQuadGKJL, cacheval;
+                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
+
+    value, err = call_contquadgk(f, p, dom, cacheval;  maxevals = maxiters, rho = alg.rho, rootmeth = alg.rootmeth,
+                    rtol = reltol, atol = abstol, order = alg.order, norm = alg.norm, r_segbuf=cacheval.r, c_segbuf=cacheval.c)
+    retcode = err < max(something(abstol, zero(err)), alg.norm(value)*something(reltol, isnothing(abstol) ? sqrt(eps(one(eltype(dom)))) : 0)) ? Success : Failure
+    stats = (; error=err)
+    return IntegralSolution(value, retcode, stats)
+end
+function call_contquadgk(f::IntegralFunction, p, segs, cacheval; kws...)
+    contquadgk(x -> f.f(x, p), segs; kws...)
+end
+function call_contquadgk(f::CommonSolveIntegralFunction, p, segs, cacheval; kws...)
+    integrand = cacheval.integrand
+    contquadgk(x -> integrand(x, p), segs...; kws...)
+end
+
+"""
+    MeroQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.MeroQuadGK.NewtonDeflation())
+
+A 1d pole subtraction quadrature scheme for scalar, complex-valued integrands that are
+meromorphic. It defaults to regular `quadgk` behavior on the real axis, but if it finds
+nearby roots of 1/f, in the sense of Bernstein ellipse for the standard segment `[-1,1]`
+with semiaxes `cosh(rho)` and `sinh(rho)`, it attempts pole subtraction on that segment.
+"""
+struct MeroQuadGKJL{F,M} <: IntegralAlgorithm
+    order::Int
+    norm::F
+    rho::Float64
+    rootmeth::M
+end
+function MeroQuadGKJL(; order = 7, norm = norm, rho = 1.0, rootmeth = IteratedIntegration.MeroQuadGK.NewtonDeflation())
+    return MeroQuadGKJL(order, norm, rho, rootmeth)
+end
+
+function init_msegbuf(prototype, dom, alg::MeroQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    a, b = endpoints(segs)
+    x, s = (a + b)/2, (b-a)/2
+    convert(ComplexF64, prototype)
+    fx_s = one(ComplexF64) * s # ignore the actual integrand since it is written to CF64 array
+    err = alg.norm(fx_s)
+    return IteratedIntegration.alloc_segbuf(typeof(x), typeof(fx_s), typeof(err))
+end
+function init_cacheval(f::IntegralFunction, dom, p, alg::MeroQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    prototype = get_prototype(f, get_prototype(segs), p)
+    segbuf = init_msegbuf(prototype, dom, alg)
+    return (; segbuf)
+end
+function init_cacheval(f::CommonSolveIntegralFunction, dom, p, alg::MeroQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    cache, integrand, prototype = _init_commonsolvefunction(f, dom, p; x=get_prototype(segs))
+    segbuf = init_msegbuf(prototype, dom, alg)
+    return (; segbuf, cache, integrand)
+end
+
+function do_integral(f, dom, p, alg::MeroQuadGKJL, cacheval;
+                    reltol = nothing, abstol = nothing, maxiters = typemax(Int))
+    value, err = call_meroquadgk(f, p, dom, cacheval;  maxevals = maxiters, rho = alg.rho, rootmeth = alg.rootmeth,
+            rtol = reltol, atol = abstol, order = alg.order, norm = alg.norm, segbuf=cacheval.segbuf)
+    retcode = err < max(something(abstol, zero(err)), alg.norm(value)*something(reltol, isnothing(abstol) ? sqrt(eps(one(eltype(dom)))) : 0)) ? Success : Failure
+    stats = (; error=err)
+    return IntegralSolution(value, retcode, stats)
+end
+function call_meroquadgk(f::IntegralFunction, p, segs, cacheval; kws...)
+    meroquadgk(x -> f.f(x, p), segs; kws...)
+end
+function call_meroquadgk(f::CommonSolveIntegralFunction, p, segs, cacheval; kws...)
+    integrand = cacheval.integrand
+    meroquadgk(x -> integrand(x, p), segs...; kws...)
+end
diff --git a/src/algorithms_meta.jl b/src/algorithms_meta.jl
new file mode 100644
index 0000000..c769c06
--- /dev/null
+++ b/src/algorithms_meta.jl
@@ -0,0 +1,163 @@
+"""
+    NestedQuad(alg::IntegralAlgorithm)
+    NestedQuad(algs::IntegralAlgorithm...)
+
+Nested integration by repeating one quadrature algorithm or composing a list of algorithms.
+The domain of integration must be an `AbstractIteratedLimits` from the
+IteratedIntegration.jl package. Analogous to `nested_quad` from IteratedIntegration.jl.
+The integrand should expect `SVector` inputs. Do not use this for very high-dimensional
+integrals, since the compilation time scales very poorly with respect to dimensionality.
+In order to improve the compilation time, FunctionWrappers.jl is used to enforce type
+stability of the integrand, so you should always pick the widest integration limit type so
+that inference works properly. For example, if [`ContQuadGKJL`](@ref) is used as an
+algorithm in the nested scheme, then the limits of integration should be made complex.
+"""
+struct NestedQuad{T,S} <: IntegralAlgorithm
+    algs::T
+    specialize::S
+    NestedQuad(alg::IntegralAlgorithm, specialize::AbstractSpecialization=FunctionWrapperSpecialize()) = new{typeof(alg),typeof(specialize)}(alg, specialize)
+    NestedQuad(algs::Tuple{Vararg{IntegralAlgorithm}}, specialize::Tuple{Vararg{AbstractSpecialization}}=ntuple(_->FunctionWrapperSpecialize(), length(algs))) = new{typeof(algs),typeof(specialize)}(algs, specialize)
+end
+NestedQuad(algs::IntegralAlgorithm...) = NestedQuad(algs)
+# TODO add a parallelization option for use when it is safe to do so
+
+function _update!(cache, x, (; p, lims_state))
+    segs, lims, state = limit_iterate(lims_state..., x)
+    len = segs[end] - segs[begin]
+    kws = cache.kwargs
+    cache.p = p
+    cache.cacheval.dom = segs
+    cache.cacheval.kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
+    cache.cacheval.p = (; cache.cacheval.p..., lims_state=(lims, state))
+    return
+end
+_postsolve(sol, x, p) = sol.value
+function init_cacheval(f, nextdom, p, alg::NestedQuad; kws...)
+    x0, (segs, lims, state) = if nextdom isa AbstractIteratedLimits
+        interior_point(nextdom), limit_iterate(nextdom)
+    else
+        nextdom
+    end
+    algs = alg.algs isa IntegralAlgorithm ? ntuple(i -> alg.algs, Val(ndims(lims))) : alg.algs
+    spec = alg.specialize isa AbstractSpecialization ? ntuple(i -> alg.specialize, Val(ndims(lims))) : alg.specialize
+    if ndims(lims) == 1
+        func, ws = inner_integralfunction(f, x0, p)
+    else
+        integrand, ws, update!, postsolve = outer_integralfunction(f, x0, p)
+        proto = get_prototype(integrand, x0, p)
+        a, b, = segs
+        x = (a+b)/2
+        next = (x0[begin:end-1], limit_iterate(lims, state, x))
+        kws = NamedTuple(kws)
+        len = segs[end] - segs[begin]
+        kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
+        subprob = IntegralProblem(integrand, next, p; kwargs...)
+        func = CommonSolveIntegralFunction(subprob, NestedQuad(algs[1:ndims(lims)-1], spec[1:ndims(lims)-1]), update!, postsolve, proto*x^(ndims(lims)-1), spec[ndims(lims)])
+    end
+    prob = IntegralProblem(func, segs, (; p, lims_state=(lims, state), ws); kws...)
+    return init(prob, algs[ndims(lims)])
+    # the order of updates is somewhat tricky. I think some could be simplified if instead
+    # we use an IntegralProblem modified to contain lims_state, instead of passing the
+    # parameter as well
+end
+
+
+function do_integral(f, dom, p, alg::NestedQuad, cacheval; kws...)
+    cacheval.p = (; cacheval.p..., p)
+    cacheval.kwargs = (; cacheval.kwargs..., kws...)
+    return solve!(cacheval)
+end
+function inner_integralfunction(f::IntegralFunction, x0, p)
+    proto = get_prototype(f, x0, p)
+    func = IntegralFunction(proto) do x, (; p, lims_state)
+        f.f(limit_iterate(lims_state..., x), p)
+    end
+    ws = nothing
+    return func, ws
+end
+function outer_integralfunction(f::IntegralFunction, x0, p)
+    proto = get_prototype(f, x0, p)
+    func = IntegralFunction(f.f, proto)
+    ws = nothing
+    return func, ws, _update!, _postsolve
+end
+#=
+"""
+    AbsoluteEstimate(est_alg, abs_alg; kws...)
+
+Most algorithms are efficient when using absolute error tolerances, but how do you know the
+size of the integral? One option is to estimate it using second algorithm.
+
+A multi-algorithm to estimate an integral using an `est_alg` to generate a rough estimate of
+the integral that is combined with a user's relative tolerance to re-calculate the integral
+to higher accuracy using the `abs_alg`. The keywords passed to the algorithm may include
+`reltol`, `abstol` and `maxiters` and are given to the `est_alg` solver. They should limit
+the amount of work of `est_alg` so as to only generate an order-of-magnitude estimate of the
+integral. The tolerances passed to `abs_alg` are `abstol=max(abstol,reltol*norm(I))` and
+`reltol=0`.
+"""
+struct AbsoluteEstimate{E<:IntegralAlgorithm,A<:IntegralAlgorithm,F,K<:NamedTuple} <: IntegralAlgorithm
+    est_alg::E
+    abs_alg::A
+    norm::F
+    kws::K
+end
+function AbsoluteEstimate(est_alg, abs_alg; norm=norm, kwargs...)
+    kws = NamedTuple(kwargs)
+    checkkwargs(kws)
+    return AbsoluteEstimate(est_alg, abs_alg, norm, kws)
+end
+
+function init_cacheval(f, dom, p, alg::AbsoluteEstimate)
+    return (est=init_cacheval(f, dom, p, alg.est_alg),
+            abs=init_cacheval(f, dom, p, alg.abs_alg))
+end
+
+function do_solve(f, dom, p, alg::AbsoluteEstimate, cacheval;
+                    abstol=nothing, reltol=nothing, maxiters=typemax(Int))
+    sol = do_solve(f, dom, p, alg.est_alg, cacheval.est; alg.kws...)
+    val = alg.norm(sol.u) # has same units as sol
+    rtol = reltol === nothing ? sqrt(eps(one(val))) : reltol # use the precision of the solution to set the default relative tolerance
+    atol = max(abstol === nothing ? zero(val) : abstol, rtol*val)
+    return do_solve(f, dom, p, alg.abs_alg, cacheval.abs;
+                    abstol=atol, reltol=zero(rtol), maxiters=maxiters)
+end
+
+
+"""
+    EvalCounter(::IntegralAlgorithm)
+
+An algorithm which counts the evaluations used by another algorithm.
+The count is stored in the `sol.numevals` field.
+"""
+struct EvalCounter{T<:IntegralAlgorithm} <: IntegralAlgorithm
+    alg::T
+end
+
+function init_cacheval(f, dom, p, alg::EvalCounter)
+    return init_cacheval(f, dom, p, alg.alg)
+end
+
+function do_solve(f, dom, p, alg::EvalCounter, cacheval; kws...)
+    if f isa InplaceIntegrand
+        ni::Int = 0
+        gi = (y, x, p) -> (ni += 1; f.f!(y, x, p))
+        soli = do_solve(InplaceIntegrand(gi, f.I), dom, p, alg.alg, cacheval; kws...)
+        return IntegralSolution(soli.u, soli.resid, soli.retcode, ni)
+    elseif f isa BatchIntegrand
+        nb::Int = 0
+        gb = (y, x, p) -> (nb += length(x); f.f!(y, x, p))
+        solb = do_solve(BatchIntegrand(gb, f.y, f.x, max_batch=f.max_batch), dom, p, alg.alg, cacheval; kws...)
+        return IntegralSolution(solb.u, solb.resid, solb.retcode, nb)
+    elseif f isa NestedBatchIntegrand
+        # TODO allocate a bunch of accumulators associated with the leaves of the nested
+        # integrand or rewrap the algorithms in NestedQuad
+        error("NestedBatchIntegrand not yet supported with EvalCounter")
+    else
+        n::Int = 0
+        g = (x, p) -> (n += 1; f(x, p)) # we need let to prevent Core.Box around the captured variable
+        sol = do_solve(g, dom, p, alg.alg, cacheval; kws...)
+        return IntegralSolution(sol.u, sol.resid, sol.retcode, n)
+    end
+end
+=#
diff --git a/src/batch.jl b/src/batch.jl
deleted file mode 100644
index 1642c82..0000000
--- a/src/batch.jl
+++ /dev/null
@@ -1,77 +0,0 @@
-"""
-    BatchIntegrand(f!, y::AbstractArray, x::AbstractVector, max_batch=typemax(Int))
-
-Constructor for a `BatchIntegrand` accepting an integrand of the form `f!(y,x,p) = y .= f!.(x, Ref(p))`
-that can evaluate the integrand at multiple quadrature nodes using, for example, threads,
-the GPU, or distributed-memory. The `max_batch` keyword is a soft limit on the number of
-nodes passed to the integrand. The buffers `y,x` must both be `resize!`-able since the
-number of evaluation points may vary between calls to `f!`.
-"""
-struct BatchIntegrand{F,Y,X}
-    # in-place function f!(y, x, p) that takes an array of x values and outputs an array of results in-place
-    f!::F
-    y::Y
-    x::X
-    max_batch::Int # maximum number of x to supply in parallel
-    function BatchIntegrand(f!, y::AbstractArray, x::AbstractVector, max_batch::Integer=typemax(Int))
-        max_batch > 0 || throw(ArgumentError("maximum batch size must be positive"))
-        return new{typeof(f!),typeof(y),typeof(x)}(f!, y, x, max_batch)
-    end
-end
-
-
-"""
-    BatchIntegrand(f!, y, x; max_batch=typemax(Int))
-
-Constructor for a `BatchIntegrand` with pre-allocated buffers.
-"""
-BatchIntegrand(f!, y, x; max_batch::Integer=typemax(Int)) =
-    BatchIntegrand(f!, y, x, max_batch)
-
-"""
-    BatchIntegrand(f!, y::Type, x::Type=Nothing; max_batch=typemax(Int))
-
-Constructor for a `BatchIntegrand` whose range type is known. The domain type is optional.
-Array buffers for those types are allocated internally.
-"""
-BatchIntegrand(f!, Y::Type, X::Type=Nothing; max_batch::Integer=typemax(Int)) =
-    BatchIntegrand(f!, Y[], X[], max_batch)
-
-
-"""
-    NestedBatchIntegrand(f::Tuple, y::AbstractVector, x::AbstractVector, max_batch::Integer)
-
-An integrand type intended for multi-threaded evaluation of [`NestedQuad`](@ref). The caller
-provides a tuple `f` of worker functions that can evaluate the same integrand on different
-threads, so as to avoid race conditions. These workers can also be `NestedBatchIntegrand`s
-depending on if the user wants to parallelize the integration at multiple levels of nesting.
-The other arguments are the same as for [`BatchIntegrand`](@ref).
-"""
-struct NestedBatchIntegrand{F,T,Y<:AbstractVector,X<:AbstractVector}
-    f::T
-    y::Y
-    x::X
-    max_batch::Int
-    function NestedBatchIntegrand(f::NTuple, y::Y, x::X, max_batch::Integer) where {Y,X}
-        if eltype(f) <: NestedBatchIntegrand
-            return new{_nesttype(eltype(f)),typeof(f),Y,X}(f, y, x, max_batch)
-        else
-            return new{eltype(f),typeof(f),Y,X}(f, y, x, max_batch)
-        end
-    end
-    function NestedBatchIntegrand(f::AbstractArray{F}, y::Y, x::X, max_batch::Integer) where {F,Y,X}
-        return new{F,typeof(f),Y,X}(f, y, x, max_batch)
-    end
-    function NestedBatchIntegrand(f::AbstractArray{T}, y::Y, x::X, max_batch::Integer) where {F,T<:NestedBatchIntegrand{F},Y,X}
-        return new{F,typeof(f),Y,X}(f, y, x, max_batch)
-    end
-end
-
-_nesttype(::Type{<:NestedBatchIntegrand{F}}) where {F} = F
-function NestedBatchIntegrand(f, y, x; max_batch::Integer=typemax(Int))
-    return NestedBatchIntegrand(f, y, x, max_batch)
-end
-
-function NestedBatchIntegrand(f, ::Type{Y}, ::Type{X}=Nothing; kws...) where {Y,X}
-    return NestedBatchIntegrand(f, Y[], X[]; kws...)
-end
diff --git a/src/brillouin.jl b/src/brillouin.jl
index 84b1b71..8472624 100644
--- a/src/brillouin.jl
+++ b/src/brillouin.jl
@@ -47,6 +47,9 @@ nsyms(::FullBZ) = 1
 
 Base.summary(bz::SymmetricBZ) = string(checksquare(bz.A), "-dimensional Brillouin zone with ", bz isa FullBZ ? "trivial" : nsyms(bz), " symmetries")
 Base.show(io::IO, bz::SymmetricBZ) = print(io, summary(bz))
+Base.ndims(::SymmetricBZ{S,L,d}) where {S,L,d} = d
+Base.eltype(::Type{<:SymmetricBZ{S,L,d,TA,TB}}) where {S,L,d,TA,TB} = TB
+get_prototype(bz::SymmetricBZ) = interior_point(bz.lims)
 
 # Define traits for symmetrization based on symmetry representations
 
@@ -61,6 +64,7 @@ abstract type AbstractSymRep end
     UnknownRep()
 
 Fallback symmetry representation for array types without a user-defined `SymRep`.
+Will perform FBZ integration regardless of available BZ symmetries.
 """
 struct UnknownRep <: AbstractSymRep end
 
@@ -71,47 +75,20 @@ Symmetry representation of objects with trivial transformation under the group.
 """
 struct TrivialRep <: AbstractSymRep end
 
-
-"""
-    SymRep(f)
-
-`SymRep` specifies the symmetry representation of the integral of the function
-`f`. When you define a new integrand, you can choose to implement this trait to
-specify how the integral is transformed under the symmetries of the lattice in
-order to map the integral of `f` on the IBZ to the result for the FBZ.
-
-New types for `SymRep` should also extend a corresponding method for
-[`AutoBZCore.symmetrize_`](@ref).
 """
-SymRep(::Any) = UnknownRep()
-const TrivialRepType = Union{Number,AbstractArray{<:Any,0}}
+    symmetrize(rep::AbstractSymRep, ::SymmetricBZ, x)
 
+Transform `x` by the representation of the symmetries of the point group used to reduce the
+domain, thus mapping the value of `x` on to the full Brillouin zone.
 """
-    symmetrize(f, ::SymmetricBZ, xs...)
-    symmetrize(f, ::SymmetricBZ, x::Union{Number,AbstractArray{<:Any,0}})
-
-Transform `x` by the symmetries of the parametrization used to reduce the
-domain, thus mapping the value of `x` on the parametrization to the full domain.
-"""
-symmetrize(f, bz, xs...) = map(x -> symmetrize(f, bz, x), xs)
-symmetrize(f, bz, x) = symmetrize_(f isa AbstractSymRep ? f : SymRep(f), bz, x)
-symmetrize(f, bz, x::TrivialRepType) =
-    symmetrize_(TrivialRep(), bz, x)
-
-"""
-    symmetrize_(rep::AbstractSymRep, bz::SymmetricBZ, x)
-
-Transform `x` under representation `rep` using the symmetries in `bz` to obtain
-the result of an integral on the FBZ from `x`, which was calculated on the IBZ.
-"""
-symmetrize_(::TrivialRep, bz::SymmetricBZ, x) = nsyms(bz)*x
-symmetrize_(::UnknownRep, ::SymmetricBZ, x) = x
-
+symmetrize(rep, bz::SymmetricBZ, x) = symmetrize_(rep, bz, x)
 symmetrize(_, ::FullBZ, x) = x
-symmetrize(_, ::FullBZ, x::TrivialRepType) = x
 
-symmetrize(f, bz, x::AuxValue) = AuxValue(symmetrize(f, bz, x.val, x.aux)...)
-symmetrize(_, ::FullBZ, x::AuxValue) = x
+symmetrize_(rep, bz, x) = symmetrize__(rep, bz, x)
+symmetrize_(rep, bz, x::AuxValue) = AuxValue(symmetrize__(rep, bz, x.val), symmetrize__(rep, bz, x.aux))
+
+symmetrize__(::TrivialRep, bz, x) = nsyms(bz)*x
+symmetrize__(::UnknownRep, bz, x) = error("unknown representation cannot be symmetrized")
 
 struct SymmetricRule{R,U,B}
     rule::R
@@ -123,10 +100,10 @@ Base.getindex(r::SymmetricRule, i) = getindex(r.rule, i)
 Base.eltype(::Type{SymmetricRule{R,U,B}}) where {R,U,B} = eltype(R)
 Base.length(r::SymmetricRule) = length(r.rule)
 Base.iterate(r::SymmetricRule, args...) = iterate(r.rule, args...)
-rule_type(r::SymmetricRule) = rule_type(r.rule)
 function (r::SymmetricRule)(f::F, args...) where {F}
     out = r.rule(f, args...)
-    return symmetrize(r.rep, r.bz, out)
+    val = symmetrize(r.rep, r.bz, out)
+    return val
 end
 
 struct SymmetricRuleDef{R,U,B}
@@ -167,7 +144,8 @@ Interface to loading Brillouin zones.
 - `B::AbstractMatrix`: a ``d \\times d`` matrix whose columns are the reciprocal-space
   lattice vectors of a ``d``-dimensional Brillouin zone (default: `A' \\ 2πI`)
 
-!!! note "Assumptions" `AutoBZCore` assumes that all calculations occur in the reciprocal
+!!! note "Assumptions"
+    `AutoBZCore` assumes that all calculations occur in the reciprocal
     lattice basis, since that is the basis in which Wannier interpolants are most
     efficiently described. See [`SymmetricBZ`](@ref) for details. We also assume that the
     integrands are cheap to evaluate, which is why we provide adaptive methods in the first
@@ -320,38 +298,100 @@ These algorithms also use the symmetries of the Brillouin zone and the integrand
 """
 abstract type AutoBZAlgorithm <: IntegralAlgorithm end
 
-function init_cacheval(f, bz::SymmetricBZ, p, bzalg::AutoBZAlgorithm)
-    _, dom, alg = bz_to_standard(bz, bzalg)
-    return init_cacheval(f, dom, p, alg)
-end
+"""
+    AutoBZProblem([rep], f, bz, [p]; kwargs...)
+
+Construct a BZ integration problem.
+
+## Arguments
+- `rep::AbstractSymRep`: The symmetry representation of `f` (default: `UnknownRep()`)
+- `f::AbstractIntegralFunction`: The integrand
+- `bz::SymmetricBZ`: The Brillouin zone to integrate over
+- `p`: parameters for the integrand (default: `NullParameters()`)
 
-function do_solve(f, bz::SymmetricBZ, p, bzalg::AutoBZAlgorithm, cacheval; kws...)
-    do_solve_autobz(bz_to_standard, f, bz, p, bzalg, cacheval; kws...)
+## Keywords
+Additional keywords are passed directly to the solver
+"""
+struct AutoBZProblem{R<:AbstractSymRep,F<:AbstractIntegralFunction,BZ<:SymmetricBZ,P,K<:NamedTuple}
+    rep::R
+    f::F
+    bz::BZ
+    p::P
+    kwargs::K
 end
 
 const WARN_UNKNOWN_SYMMETRY = """
 A symmetric BZ was used with an integrand whose symmetry representation is unknown.
-For correctness, the calculation will be repeated on the full BZ.
-However, it is better either to integrate without symmetries or to use symmetries by extending SymRep for your type.
+For correctness, the calculation will proceed on the full BZ, i.e. without symmetry.
+To integrate with symmetry, define an AbstractSymRep for your integrand.
 """
-function do_solve_autobz(bz_to_standard, f, bz, p, bzalg::AutoBZAlgorithm, cacheval; _kws...)
-    bz_, dom, alg = bz_to_standard(bz, bzalg)
-
-    j = abs(det(bz_.B))  # rescale tolerance to (I)BZ coordinate and get the right number of digits
-    kws = NamedTuple(_kws)
-    kws_ = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol / (j * nsyms(bz_)),)) : kws
 
-    sol = do_solve(f, dom, p, alg, cacheval; kws_...)
-    # TODO find a way to throw a warning when constructing the problem instead of after a solve
-    SymRep(f) isa UnknownRep && !(bz_ isa FullBZ) && !(sol.u isa TrivialRepType) && begin
+function AutoBZProblem(rep::AbstractSymRep, f::AbstractIntegralFunction, bz::SymmetricBZ, p=NullParameters(); kws...)
+    proto = get_prototype(f, get_prototype(bz), p)
+    if rep isa UnknownRep && !(bz isa FullBZ)
         @warn WARN_UNKNOWN_SYMMETRY
-        fbz = SymmetricBZ(bz_.A, bz_.B, lattice_bz_limits(bz_.B), nothing)
-        _cacheval = init_cacheval(f, fbz, p, bzalg)
-        return do_solve(f, fbz, p, bzalg, _cacheval; _kws...)
+        fbz = SymmetricBZ(bz.A, bz.B, lattice_bz_limits(bz.B), nothing)
+        return AutoBZProblem(rep, f, fbz, p, NamedTuple(kws))
+    else
+        return AutoBZProblem(rep, f, bz, p, NamedTuple(kws))
     end
-    val = j*symmetrize(f, bz_, sol.u)
-    err = sol.resid === nothing ? nothing : j*symmetrize(f, bz_, sol.resid)
-    return IntegralSolution(val, err, sol.retcode, sol.numevals)
+end
+function AutoBZProblem(f::AbstractIntegralFunction, bz::SymmetricBZ, p=NullParameters(); kws...)
+    return AutoBZProblem(UnknownRep(), f, bz, p; kws...)
+end
+function AutoBZProblem(f, bz::SymmetricBZ, p=NullParameters(); kws...)
+    return AutoBZProblem(IntegralFunction(f), bz, p; kws...)
+end
+
+mutable struct AutoBZCache{R,F,BZ,P,A,C,K}
+    rep::R
+    f::F
+    bz::BZ
+    p::P
+    alg::A
+    cacheval::C
+    kwargs::K
+end
+
+function init(prob::AutoBZProblem, alg::AutoBZAlgorithm; kwargs...)
+    rep = prob.rep; f = prob.f; bz = prob.bz; p = prob.p
+    kws = (; prob.kwargs..., kwargs...)
+    checkkwargs(kws)
+    cacheval = init_cacheval(rep, f, bz, p, alg; kws...)
+    return AutoBZCache(rep, f, bz, p, alg, cacheval, kws)
+end
+
+"""
+    solve(::AutoBZProblem, ::AutoBZAlgorithm; kws...)::IntegralSolution
+"""
+solve(prob::AutoBZProblem, alg::AutoBZAlgorithm; kwargs...)
+
+"""
+    solve!(::IntegralCache)::IntegralSolution
+
+Compute the solution to an [`IntegralProblem`](@ref) constructed from [`init`](@ref).
+"""
+function solve!(c::AutoBZCache)
+    return do_solve_autobz(c.rep, c.f, c.bz, c.p, c.alg, c.cacheval; c.kwargs...)
+end
+
+function init_cacheval(rep, f, bz::SymmetricBZ, p, bzalg::AutoBZAlgorithm; kws...)
+    prob, alg = bz_to_standard(f, bz, p, bzalg; kws...)
+    return init(prob, alg)
+end
+
+function do_solve_autobz(rep, f, bz, p, bzalg::AutoBZAlgorithm, cacheval; _kws...)
+    j = abs(det(bz.B))  # rescale tolerance to (I)BZ coordinate and get the right number of digits
+    kws = NamedTuple(_kws)
+    cacheval.f = f
+    cacheval.p = p
+    cacheval.kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol / (j * nsyms(bz)),)) : kws
+
+    sol = solve!(cacheval)
+    value = j*symmetrize(rep, bz, sol.value)
+    stats = (; sol.stats...)
+    # err = sol.resid === nothing ? nothing : j*symmetrize(f, bz_, sol.resid)
+    return IntegralSolution(value, sol.retcode, stats)
 end
 
 # AutoBZAlgorithms must implement:
@@ -365,15 +405,16 @@ Iterated-adaptive integration using `nested_quad` from
 [IteratedIntegration.jl](https://github.com/lxvm/IteratedIntegration.jl).
 **This algorithm is the most efficient for localized integrands**.
 """
-struct IAI{T} <: AutoBZAlgorithm
+struct IAI{T,S} <: AutoBZAlgorithm
     algs::T
-    IAI(alg::IntegralAlgorithm=AuxQuadGKJL()) = new{typeof(alg)}(alg)
-    IAI(algs::Tuple{Vararg{IntegralAlgorithm}}) = new{typeof(algs)}(algs)
+    specialize::S
+    IAI(alg::IntegralAlgorithm=AuxQuadGKJL(), specialize::AbstractSpecialization=FunctionWrapperSpecialize()) = new{typeof(alg),typeof(specialize)}(alg, specialize)
+    IAI(algs::Tuple{Vararg{IntegralAlgorithm}}, specialize::Tuple{Vararg{AbstractSpecialization}}=ntuple(_->FunctionWrapperSpecialize(),length(algs))) = new{typeof(algs),typeof(specialize)}(algs, specialize)
 end
 IAI(algs::IntegralAlgorithm...) = IAI(algs)
 
-function bz_to_standard(bz::SymmetricBZ, alg::IAI)
-    return bz, bz.lims, NestedQuad(alg.algs)
+function bz_to_standard(f, bz, p, bzalg::IAI; kws...)
+    return IntegralProblem(f, bz.lims, p; kws...), NestedQuad(bzalg.algs, bzalg.specialize)
 end
 
 """
@@ -389,8 +430,8 @@ struct PTR <: AutoBZAlgorithm
 end
 PTR(; npt=50, nthreads=1) = PTR(npt, nthreads)
 
-function bz_to_standard(bz::SymmetricBZ, alg::PTR)
-     return bz, canonical_ptr_basis(bz.B), MonkhorstPack(npt=alg.npt, syms=bz.syms, nthreads=alg.nthreads)
+function bz_to_standard(f, bz, p, alg::PTR; kws...)
+    return IntegralProblem(f, canonical_ptr_basis(bz.B), p; kws...), MonkhorstPack(npt=alg.npt, syms=bz.syms, nthreads=alg.nthreads)
 end
 
 
@@ -415,34 +456,44 @@ end
 function AutoPTR(; norm=norm, a=1.0, nmin=50, nmax=1000, n₀=6.0, Δn=log(10), keepmost=2, nthreads=1)
     return AutoPTR(norm, a, nmin, nmax, n₀, Δn, keepmost, nthreads)
 end
-function bz_to_standard(bz::SymmetricBZ, alg::AutoPTR)
-    return bz, canonical_ptr_basis(bz.B), AutoSymPTRJL(norm=alg.norm, a=alg.a, nmin=alg.nmin, nmax=alg.nmax, n₀=alg.n₀, Δn=alg.Δn, keepmost=alg.keepmost, syms=bz.syms, nthreads=alg.nthreads)
+
+
+struct RepBZ{R,B}
+    rep::R
+    bz::B
+end
+Base.ndims(dom::RepBZ) = ndims(dom.bz)
+Base.eltype(::Type{RepBZ{R,B}}) where {R,B} = eltype(B)
+get_prototype(dom::RepBZ) = get_prototype(dom.bz)
+
+
+function init_cacheval(rep, f, bz::SymmetricBZ, p, bzalg::AutoPTR; kws...)
+    prob = IntegralProblem(f, RepBZ(rep, bz), p; kws...)
+    alg = AutoSymPTRJL(norm=bzalg.norm, a=bzalg.a, nmin=bzalg.nmin, nmax=bzalg.nmax, n₀=bzalg.n₀, Δn=bzalg.Δn, keepmost=bzalg.keepmost, syms=bz.syms, nthreads=bzalg.nthreads)
+    return init(prob, alg)
 end
-function init_cacheval(f, bz::SymmetricBZ, p, bzalg::AutoPTR)
-    bz_, dom, alg = bz_to_standard(bz, bzalg)
-    f isa NestedBatchIntegrand && throw(ArgumentError("AutoSymPTRJL doesn't support nested batching"))
-    rule = SymmetricRuleDef(init_rule(dom, alg), SymRep(f), bz_)
-    cache = AutoSymPTR.alloc_cache(eltype(dom), Val(ndims(dom)), rule)
-    buffer = init_buffer(f, alg.nthreads)
-    return (rule=rule, cache=cache, buffer=buffer)
+get_basis(dom::RepBZ) = canonical_ptr_basis(dom.bz.B)
+function init_rule(dom::RepBZ, alg::AutoSymPTRJL)
+    B = get_basis(dom)
+    rule = init_rule(B, alg)
+    return SymmetricRuleDef(rule, dom.rep, dom.bz)
 end
-function do_solve_autobz(bz_to_standard, f, bz, p, bzalg::AutoPTR, cacheval; _kws...)
-    bz_, dom, alg = bz_to_standard(bz, bzalg)
-    j = abs(det(bz_.B))  # rescale tolerance to (I)BZ coordinate and get the right number of digits
+# The spectral convergence of the PTR for integrands with non-trivial symmetry action
+# requires symmetrizing inside the quadrature
+function do_solve_autobz(rep, f, bz, p, bzalg::AutoPTR, cacheval; _kws...)
+    j = abs(det(bz.B))  # rescale tolerance to (I)BZ coordinate and get the right number of digits
     kws = NamedTuple(_kws)
-    kws_ = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol / j,)) : kws
+    cacheval.f = f
+    cacheval.p = p
+    cacheval.kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol / j,)) : kws
 
-    sol = do_solve(f, dom, p, alg, cacheval; kws_...)
-    # TODO find a way to throw a warning when constructing the problem instead of after a solve
-    SymRep(f) isa UnknownRep && !(bz_ isa FullBZ) && !(sol.u isa TrivialRepType) && begin
-        @warn WARN_UNKNOWN_SYMMETRY
-        fbz = SymmetricBZ(bz_.A, bz_.B, lattice_bz_limits(bz_.B), nothing)
-        _cacheval = init_cacheval(f, fbz, p, bzalg)
-        return do_solve(f, fbz, p, bzalg, _cacheval; _kws...)
-    end
-    return IntegralSolution(sol.u * j, sol.resid * j, sol.retcode, sol.numevals)
+    sol = solve!(cacheval)
+    value = j*sol.value
+    stats = (; sol.stats..., error=sol.stats.error*j)
+    return IntegralSolution(value, sol.retcode, stats)
 end
 
+
 """
     TAI(; norm=norm, initdivs=1)
 
@@ -456,13 +507,12 @@ struct TAI{N} <: AutoBZAlgorithm
 end
 TAI(; norm=norm, initdiv=1) = TAI(norm, initdiv)
 
-function bz_to_standard(bz::SymmetricBZ, alg::TAI)
-    bz_ = bz.lims isa CubicLimits ? bz : SymmetricBZ(bz.A, bz.B, lattice_bz_limits(bz.B), nothing)
-    l = bz_.lims
-    return bz_, HyperCube(l.a, l.b), HCubatureJL(norm=alg.norm, initdiv = alg.initdiv)
+function bz_to_standard(f, bz, p, alg::TAI; kws...)
+    @assert bz.lims isa CubicLimits "TAI can only integrate rectangular regions"
+    return IntegralProblem(f, HyperCube(bz.lims.a, bz.lims.b), p; kws...), HCubatureJL(norm=alg.norm, initdiv = alg.initdiv)
 end
 
-
+#=
 """
     PTR_IAI(; ptr=PTR(), iai=IAI())
 
@@ -497,3 +547,4 @@ end
 function do_solve(f, bz::SymmetricBZ, p, alg::EvalCounter{<:AutoBZAlgorithm}, cacheval; kws...)
     return do_solve_autobz(count_bz_to_standard, f, bz, p, alg.alg, cacheval; kws...)
 end
+=#
diff --git a/src/domains.jl b/src/domains.jl
index c58abef..cc725b0 100644
--- a/src/domains.jl
+++ b/src/domains.jl
@@ -1,3 +1,18 @@
+endpoints(dom) = (first(dom), last(dom))
+breakpoints(dom) = dom[begin+1:end-1] # or Iterators.drop(Iterators.take(dom, length(dom)-1), 1)
+segments(dom) = dom
+function get_prototype(dom)
+    a, b, = dom
+    return (a+b)/2
+end
+
+function get_prototype(B::Basis)
+    return B * zero(SVector{ndims(B),float(eltype(B))})
+end
+
+get_basis(B::Basis) = B
+get_basis(B::AbstractMatrix) = Basis(B)
+
 """
     PuncturedInterval(s)
 
@@ -10,9 +25,14 @@ struct PuncturedInterval{T,S}
     PuncturedInterval(s::S) where {N,S<:NTuple{N}} = new{eltype(s),S}(s)
     PuncturedInterval(s::S) where {T,S<:AbstractVector{T}} = new{T,S}(s)
 end
+PuncturedInterval(s::PuncturedInterval) = s
 Base.eltype(::Type{PuncturedInterval{T,S}}) where {T,S} = T
 segments(p::PuncturedInterval) = p.s
 endpoints(p::PuncturedInterval) = (p.s[begin], p.s[end])
+function get_prototype(p::PuncturedInterval)
+    a, b, = segments(p)
+    return (a + b)/2
+end
 
 """
     HyperCube(a, b)
@@ -31,3 +51,9 @@ HyperCube(a, b) = HyperCube(promote(a...), promote(b...))
 Base.eltype(::Type{HyperCube{d,T}}) where {d,T} = T
 
 endpoints(c::HyperCube) = (c.a, c.b)
+function get_prototype(p::HyperCube)
+    a, b = endpoints(p)
+    return (a + b)/2
+end
+
+get_prototype(l::AbstractIteratedLimits) = interior_point(l)
diff --git a/src/dos_ggr.jl b/src/dos_ggr.jl
index d3384ab..2a4f6e6 100644
--- a/src/dos_ggr.jl
+++ b/src/dos_ggr.jl
@@ -52,7 +52,7 @@ function dos_solve(h, domain, p, alg::GGR, cacheval;
 
     A = sum_ggr(ndims(bz.lims), alg.npt, E, cacheval...)
 
-    return DOSSolution(A, nothing, true, -1)
+    return DOSSolution(A, Success, (;))
 end
 
 function sum_ggr(ndim, npt, E, weights, energies, velocities)
diff --git a/src/dos_interfaces.jl b/src/dos_interfaces.jl
index e6b0f63..cf47f9c 100644
--- a/src/dos_interfaces.jl
+++ b/src/dos_interfaces.jl
@@ -30,30 +30,32 @@ where ``E \\in \\text{domain}`` and ``\\delta`` is the Dirac Delta distribution.
   of freedom) or continuous (e.g. for `H` a Hamiltonian parameterized by crystal
   momentum).
 """
-struct DOSProblem{H,D,P}
+struct DOSProblem{H,D,P,K<:NamedTuple}
     H::H
     domain::D
     p::P
+    kwargs::K
+end
+function DOSProblem(H, domain, p=NullParameters(); kws...)
+    return DOSProblem(H, domain, p, NamedTuple(kws))
 end
-DOSProblem(H, domain) = DOSProblem(H, domain, NullParameters())
 
-struct DOSSolution{U,E}
-    u::U
-    err::E
-    retcode::Bool
-    numevals::Int
+struct DOSSolution{V,S}
+    value::V
+    retcode::ReturnCode
+    stats::S
 end
 
 # store the data in a mutable cache so that the user can update the cache and
 # compute the result again without setting up new problems.
 mutable struct DOSCache{H,D,P,A,C,K}
-  H::H
-  domain::D
-  p::P
-  alg::A
-  cacheval::C
-  isfresh::Bool # true if H has been replaced/modified, otherwise false
-  kwargs::K
+    H::H
+    domain::D
+    p::P
+    alg::A
+    cacheval::C
+    isfresh::Bool # true if H has been replaced/modified, otherwise false
+    kwargs::K
 end
 
 function Base.setproperty!(cache::DOSCache, name::Symbol, item)
@@ -66,11 +68,6 @@ end
 # by default, algorithms won't have anything in the cache
 init_cacheval(h, dom, p, ::DOSAlgorithm) = nothing
 
-function make_cache(h, dom, p, alg::DOSAlgorithm; kwargs...)
-  cacheval = init_cacheval(h, dom, p, alg)
-  return DOSCache(h, dom, p, alg, cacheval, false, NamedTuple(kwargs))
-end
-
 # check same keywords as for integral problems: abstol, reltol, maxiters
 checkkwargs_dos(kws) = checkkwargs(kws)
 
@@ -81,19 +78,10 @@ Create a cache of the data used by an algorithm to solve the given problem.
 """
 function init(prob::DOSProblem, alg::DOSAlgorithm; kwargs...)
     h = prob.H; dom = prob.domain; p = prob.p
-    checkkwargs_dos(NamedTuple(kwargs))
-    return make_cache(h, dom, p, alg; kwargs...)
-end
-
-"""
-    solve(::DOSProblem, ::DOSAlgorithm; kwargs...)::DOSSolution
-
-Compute the solution to a [`DOSProblem`](@ref) using the given algorithm.
-The keyword arguments to the solver can be `abstol`, `reltol`, and `maxiters`.
-"""
-function solve(prob::DOSProblem, alg::DOSAlgorithm; kwargs...)
-    cache = init(prob, alg; kwargs...)
-    return solve!(cache)
+    kws = (; prob.kwargs..., kwargs...)
+    checkkwargs_dos(kws)
+    cacheval = init_cacheval(h, dom, p, alg)
+    return DOSCache(h, dom, p, alg, cacheval, false, kws)
 end
 
 """
@@ -112,3 +100,8 @@ function solve!(c::DOSCache)
 end
 
 function dos_solve end
+
+"""
+    solve(::DOSProblem, ::DOSAlgorithm; kws...)::DOSSolution
+"""
+solve(prob::DOSProblem, alg::DOSAlgorithm; kwargs...)
diff --git a/src/fourier.jl b/src/fourier.jl
index 98a0457..f5d28da 100644
--- a/src/fourier.jl
+++ b/src/fourier.jl
@@ -18,45 +18,100 @@
 # We use the pattern of allowing the user to pass a container with the integrand, Fourier
 # series and workspace, and use dispatch to enable the optimizations
 
-# the nested batched integrand is optional, but when included it allows for thread-safe parallelization
-struct FourierIntegrand{F,P,W,N}
-    f::ParameterIntegrand{F,P}
-    w::W
-    nest::N
-    function FourierIntegrand(f::ParameterIntegrand{F,P}, w::FourierWorkspace) where {F,P}
-        return new{F,P,typeof(w),Nothing}(f, w, nothing)
-    end
-    function FourierIntegrand(f::ParameterIntegrand{F,P}, w::FourierWorkspace, nest::NestedBatchIntegrand{<:ParameterIntegrand{F}}) where {F,P}
-        return new{F,P,typeof(w),typeof(nest)}(f, w, nest)
-    end
-    function FourierIntegrand(f::ParameterIntegrand{F,P}, w::FourierWorkspace, nest::ParameterIntegrand{F}) where {F,P}
-        return new{F,P,typeof(w),typeof(nest)}(f, w, nest)
-    end
-end
+# the nested batched integrand is optional, but when included it allows for thread-safe
+# parallelization
+
+abstract type AbstractFourierIntegralFunction <: AbstractIntegralFunction end
 
 """
-    FourierIntegrand(f, w::FourierWorkspace, args...; kws...)
+    FourierIntegralFunction(f, s, [prototype=nothing]; alias=false)
 
-Constructs an integrand of the form `f(FourierValue(x,w(x)), args...; kws...)` where the
-Fourier series in `w` is evaluated efficiently, i.e. one dimension at a time, with
-compatible algorithms. `f` should accept parameters as arguments and keywords, similar to a
-[`ParameterIntegrand`](@ref) although the first argument to `f` will always be a
-[`FourierValue`](@ref).
+## Arguments
+- `f`: The integrand, accepting inputs `f(x, s(x), p)`
+- `s::AbstractFourierSeries`: The Fourier series to evaluate
+- `prototype`:
+- `alias::Bool`: whether to `deepcopy` the series (false) or use the series as-is (true)
 """
-function FourierIntegrand(f, w::FourierWorkspace, args...; kws...)
-    return FourierIntegrand(ParameterIntegrand(f, args...; kws...), w)
+struct FourierIntegralFunction{F,S,P} <: AbstractFourierIntegralFunction
+    f::F
+    s::S
+    prototype::P
+    alias::Bool
 end
+FourierIntegralFunction(f, s, p=nothing; alias=false) = FourierIntegralFunction(f, s, p, alias)
 
-"""
-    FourierIntegrand(f, s::AbstractFourierSeries, args...; kws...)
+function get_prototype(f::FourierIntegralFunction, x, ws, p)
+    f.prototype === nothing ? f.f(x, ws(x), p) : f.prototype
+end
+get_prototype(f::FourierIntegralFunction, x, p) = get_prototype(f, x, f.s, p)
+
+function get_fourierworkspace(f::AbstractFourierIntegralFunction)
+    f.s isa FourierWorkspace ? f.s : FourierSeriesEvaluators.workspace_allocate(f.alias ? f.s : deepcopy(f.s), FourierSeriesEvaluators.period(f.s))
+end
 
-Outer constructor for `FourierIntegrand` that wraps the Fourier series `s` into a
-single-threaded `FourierWorkspace`.
+# TODO implement FourierInplaceIntegrand FourierInplaceBatchIntegrand
+
+"""
+    CommonSolveFourierIntegralFunction(prob, alg, update!, postsolve, s, [prototype, specialize]; alias=false, kws...)
+
+Constructor for an integrand that solves a problem defined with the CommonSolve.jl
+interface, `prob`, which is instantiated using `init(prob, alg; kws...)`. Helper functions
+include: `update!(cache, x, s(x), p)` is called before
+`solve!(cache)`, followed by `postsolve(sol, x, s(x), p)`, which should return the value of the solution.
+The `prototype` argument can help control how much to `specialize` on the type of the
+problem, which defaults to `FullSpecialize()` so that run times are improved. However
+`FunctionWrapperSpecialize()` may help reduce compile times.
 """
-function FourierIntegrand(f, s::AbstractFourierSeries, args...; kws...)
-    return FourierIntegrand(f, workspace_allocate_vec(s, period(s)), args...; kws...)
+struct CommonSolveFourierIntegralFunction{P,A,S,K,U,PS,T,M<:AbstractSpecialization} <: AbstractFourierIntegralFunction
+    prob::P
+    alg::A
+    s::S
+    kwargs::K
+    update!::U
+    postsolve::PS
+    prototype::T
+    specialize::M
+    alias::Bool
+end
+function CommonSolveFourierIntegralFunction(prob, alg, update!, postsolve, s, prototype=nothing, specialize=FullSpecialize(); alias=false, kws...)
+    return CommonSolveFourierIntegralFunction(prob, alg, s, NamedTuple(kws), update!, postsolve, prototype, specialize, alias)
+end
+
+function do_solve!(cache, f::CommonSolveFourierIntegralFunction, x, s, p)
+    f.update!(cache, x, s, p)
+    sol = solve!(cache)
+    return f.postsolve(sol, x, s, p)
+end
+function get_prototype(f::CommonSolveFourierIntegralFunction, x, ws, p)
+    if isnothing(f.prototype)
+        cache = init(f.prob, f.alg; f.kwargs...)
+        do_solve!(cache, f, x, ws(x), p)
+    else
+        f.prototype
+    end
+end
+get_prototype(f::CommonSolveFourierIntegralFunction, x, p) = get_prototype(f, x, f.s, p)
+
+function init_specialized_fourierintegrand(cache, f, dom, p; x=get_prototype(dom), ws=f.s, s = ws(x), prototype=f.prototype)
+    proto = prototype === nothing ? do_solve!(cache, f, x, s, p) : prototype
+    func = (x, s, p) -> do_solve!(cache, f, x, s, p)
+    integrand = if f.specialize isa FullSpecialize
+        func
+    elseif f.specialize isa FunctionWrapperSpecialize
+        FunctionWrapper{typeof(prototype), typeof((x, s, p))}(func)
+    else
+        throw(ArgumentError("$(f.specialize) is not implemented"))
+    end
+    return integrand, proto
+end
+function _init_commonsolvefourierfunction(f, dom, p; kws...)
+    cache = init(f.prob, f.alg; f.kwargs...)
+    integrand, prototype = init_specialized_fourierintegrand(cache, f, dom, p; kws...)
+    return cache, integrand, prototype
 end
 
+# TODO implement CommonSolveFourierInplaceIntegrand CommonSolveFourierInplaceBatchIntegrand
+
 # similar to workspace_allocate, but more type-stable because of loop unrolling and vector types
 function workspace_allocate_vec(s::AbstractFourierSeries{N}, x::NTuple{N,Any}, len::NTuple{N,Integer}=ntuple(one,Val(N))) where {N}
     # Only the top-level workspace has an AbstractFourierSeries in the series field
@@ -73,53 +128,191 @@ function workspace_allocate_vec(s::AbstractFourierSeries{N}, x::NTuple{N,Any}, l
     else
         c = FourierSeriesEvaluators.allocate(s, x[N], dim)
         t = FourierSeriesEvaluators.contract!(c, s, x[N], dim)
-        c_ = FourierWorkspace(c, workspace_allocate_vec(t, x[1:N-1], len[1:N-1]).cache)
+        c_ = FourierWorkspace(c, FourierSeriesEvaluators.workspace_allocate(t, x[1:N-1], len[1:N-1]).cache)
         ws = Vector{typeof(c_)}(undef, len[N])
         ws[1] = c_
         for n in 2:len[N]
             _c = FourierSeriesEvaluators.allocate(s, x[N], dim)
             _t = FourierSeriesEvaluators.contract!(_c, s, x[N], dim)
-            ws[n] = FourierWorkspace(_c, workspace_allocate_vec(_t, x[1:N-1], len[1:N-1]).cache)
+            ws[n] = FourierWorkspace(_c, FourierSeriesEvaluators.workspace_allocate(_t, x[1:N-1], len[1:N-1]).cache)
         end
     end
     return FourierWorkspace(s, ws)
 end
 
-
-function (s::IntegralSolver{<:FourierIntegrand})(args...; kwargs...)
-    p = MixedParameters(args...; kwargs...)
-    sol = solve_p(s, p)
-    return sol.u
-end
-
-function remake_cache(f::FourierIntegrand, dom, p, alg, cacheval, kwargs)
-    # TODO decide what to do with the nest, since it may have allocated
-    fp = ParameterIntegrand(f.f.f)
-    new = f.nest === nothing ? FourierIntegrand(fp, f.w) : FourierIntegrand(fp, f.w, f.nest)
-    return remake_integrand_cache(new, dom, merge(f.f.p, p), alg, cacheval, kwargs)
-end
-
-# FourierIntegrands should expect a FourierValue as input
-
-"""
-    FourierValue(x, s)
-
-A container used by [`FourierIntegrand`](@ref) to pass a point, `x`, and the value of a
-Fourier series evaluated at the point, `s`, to integrands. The properties `x` and `s` of a
-`FourierValue` store the point and evaluated series, respectively.
-"""
 struct FourierValue{X,S}
     x::X
     s::S
 end
-Base.convert(::Type{T}, f::FourierValue) where {T<:FourierValue} = T(f.x,f.s)
+@inline AutoSymPTR.mymul(w, x::FourierValue) = FourierValue(AutoSymPTR.mymul(w, x.x), x.s)
+@inline AutoSymPTR.mymul(::AutoSymPTR.One, x::FourierValue) = x
 
-Base.zero(::Type{FourierValue{X,S}}) where {X,S} = FourierValue(zero(X), zero(S))
-Base.:*(B::AbstractMatrix, f::FourierValue) = FourierValue(B*f.x, f.s)
+function init_cacheval(f::FourierIntegralFunction, dom, p, alg::QuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    prototype = get_prototype(f, get_prototype(segs), ws, p)
+    return init_segbuf(prototype, segs, alg), ws
+end
+function init_cacheval(f::CommonSolveFourierIntegralFunction, dom, p, alg::QuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    x = get_prototype(segs)
+    ws = get_fourierworkspace(f)
+    cache, integrand, prototype = _init_commonsolvefourierfunction(f, dom, p; x, ws)
+    return init_segbuf(prototype, segs, alg), ws, cache, integrand
+end
+function call_quadgk(f::FourierIntegralFunction, p, u, usegs, cacheval; kws...)
+    segbuf, ws = cacheval
+    quadgk(x -> (ux =
+     u*x; f.f(ux, ws(ux), p)), usegs...; kws..., segbuf)
+end
+function call_quadgk(f::CommonSolveFourierIntegralFunction, p, u, usegs, cacheval; kws...)
+    segbuf, ws, _, integrand = cacheval
+    quadgk(x -> (ux = u*x; integrand(ux, ws(ux), p)), usegs...; kws..., segbuf)
+end
 
-(f::FourierIntegrand)(x::FourierValue, p) = f.f(x, p)
-# fallback evaluator of FourierIntegrand for algorithms without specialized rules
-(f::FourierIntegrand)(x, p) = f(FourierValue(x, f.w(x)), p)
+function init_cacheval(f::FourierIntegralFunction, dom, p, ::HCubatureJL; kws...)
+    # TODO utilize hcubature_buffer
+    ws = get_fourierworkspace(f)
+    return ws
+end
+function hcubature_integrand(f::FourierIntegralFunction, p, a, b, ws)
+    x -> f.f(x, ws(x), p)
+end
+
+function init_autosymptr_cache(f::FourierIntegralFunction, dom, p, bufsize; kws...)
+    ws = get_fourierworkspace(f)
+    return (; buffer=nothing, ws)
+end
+function init_autosymptr_cache(f::CommonSolveFourierIntegralFunction, dom, p, bufsize; kws...)
+    ws = get_fourierworkspace(f)
+    cache, integrand, = _init_commonsolvefourierfunction(f, dom, p; ws)
+    return (; buffer=nothing, ws, cache, integrand)
+end
+function autosymptr_integrand(f::FourierIntegralFunction, p, segs, cacheval)
+    ws = cacheval.ws
+    x -> x isa FourierValue ? f.f(x.x, x.s, p) : f.f(x, ws(x), p)
+end
+function autosymptr_integrand(f::CommonSolveFourierIntegralFunction, p, segs, cacheval)
+    integrand = cacheval.integrand
+    ws = cacheval.ws
+    return x -> x isa FourierValue ? integrand(x.x, x.s, p) : integrand(x, ws(x), p)
+end
+
+
+function init_cacheval(f::FourierIntegralFunction, dom, p, alg::AuxQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    prototype = get_prototype(f, get_prototype(segs), ws, p)
+    return init_segbuf(prototype, segs, alg), ws
+end
+function init_cacheval(f::CommonSolveFourierIntegralFunction, dom, p, alg::AuxQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    x = get_prototype(segs)
+    cache, integrand, prototype = _init_commonsolvefourierfunction(f, dom, p; x, ws)
+    return init_segbuf(prototype, segs, alg), ws, cache, integrand
+end
+function call_auxquadgk(f::FourierIntegralFunction, p, u, usegs, cacheval; kws...)
+    segbuf, ws = cacheval
+    auxquadgk(x -> (ux=u*x; f.f(ux, ws(ux), p)), usegs...; kws..., segbuf)
+end
+function call_auxquadgk(f::CommonSolveFourierIntegralFunction, p, u, usegs, cacheval; kws...)
+    # cache = cacheval[2] could call do_solve!(cache, f, x, p) to fully specialize
+    segbuf, ws, _, integrand = cacheval
+    auxquadgk(x -> (ux=u*x; integrand(ux, ws(ux), p)), usegs...; kws..., segbuf)
+end
+
+
+function init_cacheval(f::FourierIntegralFunction, dom, p, alg::ContQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    prototype = get_prototype(f, get_prototype(segs), ws, p)
+    segbufs = init_csegbuf(prototype, dom, alg)
+    return (; segbufs..., ws)
+end
+function init_cacheval(f::CommonSolveFourierIntegralFunction, dom, p, alg::ContQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    cache, integrand, prototype = _init_commonsolvefourierfunction(f, dom, p; ws, x=get_prototype(segs))
+    segbufs = init_csegbuf(prototype, dom, alg)
+    return (; segbufs..., ws, cache, integrand)
+end
+function call_contquadgk(f::FourierIntegralFunction, p, segs, cacheval; kws...)
+    ws = cacheval.ws
+    contquadgk(x -> f.f(x, ws(x), p), segs; kws...)
+end
+function call_contquadgk(f::CommonSolveFourierIntegralFunction, p, segs, cacheval; kws...)
+    integrand = cacheval.integrand
+    ws = cacheval.ws
+    contquadgk(x -> integrand(x, ws(x), p), segs...; kws...)
+end
+
+function init_cacheval(f::FourierIntegralFunction, dom, p, alg::MeroQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    prototype = get_prototype(f, get_prototype(segs), ws, p)
+    segbuf = init_msegbuf(prototype, dom, alg)
+    return (; segbuf, ws)
+end
+function init_cacheval(f::CommonSolveFourierIntegralFunction, dom, p, alg::MeroQuadGKJL; kws...)
+    segs = PuncturedInterval(dom)
+    ws = get_fourierworkspace(f)
+    cache, integrand, prototype = _init_commonsolvefourierfunction(f, dom, p; ws, x=get_prototype(segs))
+    segbuf = init_msegbuf(prototype, dom, alg)
+    return (; segbuf, ws, cache, integrand)
+end
+function call_meroquadgk(f::FourierIntegralFunction, p, segs, cacheval; kws...)
+    ws = cacheval.ws
+    meroquadgk(x -> f.f(x, ws(x), p), segs; kws...)
+end
+function call_meroquadgk(f::CommonSolveFourierIntegralFunction, p, segs, cacheval; kws...)
+    integrand = cacheval.integrand
+    ws = cacheval.ws
+    meroquadgk(x -> integrand(x, ws(x), p), segs...; kws...)
+end
+
+function _fourier_update!(cache, x, p)
+    _update!(cache, x, p)
+    s = workspace_contract!(p.ws, x)
+    cache.cacheval.p = (; cache.cacheval.p..., ws=s)
+    return
+end
+function inner_integralfunction(f::FourierIntegralFunction, x0, p)
+    ws = get_fourierworkspace(f)
+    proto = get_prototype(f, x0, ws, p)
+    func = IntegralFunction(proto) do x, (; p, ws, lims_state)
+        f.f(limit_iterate(lims_state..., x), workspace_evaluate!(ws, x), p)
+    end
+    return func, ws
+end
+function outer_integralfunction(f::FourierIntegralFunction, x0, p)
+    ws = get_fourierworkspace(f)
+    proto = get_prototype(f, x0, ws, p)
+    s = workspace_contract!(ws, x0[end])
+    func = FourierIntegralFunction(f.f, s, proto; alias=true)
+    return func, ws, _fourier_update!, _postsolve
+end
+# TODO it would be desirable to allow the inner integralfunction to be of the
+# same type as f, which requires moving workspace out of the parameters into
+# some kind of mutable storage
+function inner_integralfunction(f::CommonSolveFourierIntegralFunction, x0, p)
+    ws = get_fourierworkspace(f)
+    proto = get_prototype(f, x0, ws, p)
+    cache = init(f.prob, f.alg; f.kwargs...)
+    func = IntegralFunction(proto) do x, (; p, ws, lims_state)
+        y = limit_iterate(lims_state..., x)
+        s = workspace_evaluate!(ws, x)
+        do_solve!(cache, f, y, s, p)
+    end
+    return func, ws
+end
+function outer_integralfunction(f::CommonSolveFourierIntegralFunction, x0, p)
+    ws = get_fourierworkspace(f)
+    proto = get_prototype(f, x0, ws, p)
+    s = workspace_contract!(ws, x0[end])
+    func = CommonSolveFourierIntegralFunction(f.prob, f.alg, f.update!, f.postsolve, s, proto, f.specialize; alias=true, f.kwargs...)
+    return func, ws, _fourier_update!, _postsolve
+end
 
 # PTR rules
 
@@ -227,7 +420,7 @@ function _fourier_symptr!(vals::AbstractVector, w::FourierWorkspace, x::Abstract
         @inbounds wi = wsym[i, idx...]
         iszero(wi) && continue
         @inbounds xi = x[i]
-        vals[o+(n+=1)] = (wi, FourierValue((xi, coord...), workspace_evaluate!(w, t*xi)))
+        vals[o+(n+=1)] = (wi, FourierValue(SVector(xi, coord...), workspace_evaluate!(w, t*xi)))
     end
     return vals
 end
@@ -284,8 +477,6 @@ Base.eltype(::Type{FourierMonkhorstPack{d,W,T,S}}) where {d,W,T,S} = Tuple{W,Fou
 Base.length(r::FourierMonkhorstPack) = length(r.wxs)
 Base.iterate(rule::FourierMonkhorstPack, args...) = iterate(rule.wxs, args...)
 
-rule_type(::FourierMonkhorstPack{d,W,T,S}) where {d,W,T,S} = FourierValue{SVector{d,T},S}
-
 function (rule::FourierMonkhorstPack{d})(f::F, B::Basis, buffer=nothing) where {d,F}
     arule = AutoSymPTR.AffineQuad(rule, B)
     return AutoSymPTR.quadsum(arule, f, arule.vol / (rule.npt^d * rule.nsyms), buffer)
@@ -322,12 +513,11 @@ end
 
 # dispatch on PTR algorithms
 
-function init_buffer(f::FourierIntegrand, len)
-    return f.nest isa NestedBatchIntegrand ? Vector{eltype(f.nest.y)}(undef, len) : nothing
-end
+# function init_buffer(f::FourierIntegrand, len)
+#     return f.nest isa NestedBatchIntegrand ? Vector{eltype(f.nest.y)}(undef, len) : nothing
+# end
 
-rule_type(::FourierPTR{N,T,S}) where {N,T,S} = FourierValue{SVector{N,T},S}
-function init_fourier_rule(w::FourierWorkspace, dom::Basis, alg::MonkhorstPack)
+function init_fourier_rule(w::FourierWorkspace, dom, alg::MonkhorstPack)
     @assert ndims(w.series) == ndims(dom)
     if alg.syms === nothing
         return FourierPTR(w, eltype(dom), Val(ndims(dom)), alg.npt)
@@ -335,196 +525,26 @@ function init_fourier_rule(w::FourierWorkspace, dom::Basis, alg::MonkhorstPack)
         return FourierMonkhorstPack(w, eltype(dom), Val(ndims(dom)), alg.npt, alg.syms)
     end
 end
-function init_cacheval(f::FourierIntegrand, dom::Basis , p, alg::MonkhorstPack)
-    rule = init_fourier_rule(f.w, dom, alg)
-    buf = init_buffer(f, alg.nthreads)
-    return (rule=rule, buffer=buf)
+function init_cacheval(f::AbstractFourierIntegralFunction, dom , p, alg::MonkhorstPack; kws...)
+    cache = init_autosymptr_cache(f, dom, p, alg.nthreads; kws...)
+    ws = cache.ws
+    rule = init_fourier_rule(ws, dom, alg)
+    return (; rule, buffer=nothing, ws, cache...)
 end
 
-function init_fourier_rule(w::FourierWorkspace, dom::Basis, alg::AutoSymPTRJL)
+function init_fourier_rule(w::FourierWorkspace, dom, alg::AutoSymPTRJL)
     @assert ndims(w.series) == ndims(dom)
     return FourierMonkhorstPackRule(w, alg.syms, alg.a, alg.nmin, alg.nmax, alg.n₀, alg.Δn)
 end
-function init_cacheval(f::FourierIntegrand, dom::Basis, p, alg::AutoSymPTRJL)
-    rule = init_fourier_rule(f.w, dom, alg)
-    cache = AutoSymPTR.alloc_cache(eltype(dom), Val(ndims(dom)), rule)
-    buffer = init_buffer(f, alg.nthreads)
-    return (rule=rule, cache=cache, buffer=buffer)
-end
-function init_cacheval(f::FourierIntegrand, bz::SymmetricBZ, p, bzalg::AutoPTR)
-    bz_, dom, alg = bz_to_standard(bz, bzalg)
-    rule = SymmetricRuleDef(init_fourier_rule(f.w, dom, alg), SymRep(f), bz_)
-    cache = AutoSymPTR.alloc_cache(eltype(dom), Val(ndims(dom)), rule)
-    buffer = init_buffer(f, alg.nthreads)
-    return (rule=rule, cache=cache, buffer=buffer)
-end
-function init_fourier_rule(s::AbstractFourierSeries, bz::SymmetricBZ, alg::PTR)
-    dom = Basis(bz.B)
-    return FourierMonkhorstPack(s, eltype(dom), Val(ndims(dom)), alg.npt, bz.syms)
-end
-
-function nested_to_batched(f::FourierIntegrand, dom::Basis, rule)
-    ys = f.nest.y/prod(ntuple(n -> oneunit(eltype(dom)), Val(ndims(dom))))
-    xs = rule_type(rule)[]
-    return BatchIntegrand(ys, xs, max_batch=f.nest.max_batch) do y, x, p
-        # would be better to fully unwrap the nested structure, but this is one level
-        nchunk = length(f.nest.f)
-        Threads.@threads for ichunk in 1:min(nchunk, length(x))
-            for (i, j) in zip(getchunk(x, ichunk, nchunk, :batch), getchunk(y, ichunk, nchunk, :batch))
-                y[j] = FourierIntegrand(f.nest.f[ichunk], FourierWorkspace(nothing,nothing))(x[i], p)
-            end
-        end
-        return nothing
-    end
-end
-
-function do_solve(f::FourierIntegrand, dom, p, alg::MonkhorstPack, cacheval; kws...)
-    g = f.nest isa NestedBatchIntegrand ? nested_to_batched(f, dom, cacheval.rule) : f.f
-    return do_solve(g, dom, p, alg, cacheval; kws...)
-end
-
-function do_solve(f::FourierIntegrand, dom, p, alg::AutoSymPTRJL, cacheval; kws...)
-    g = f.nest isa NestedBatchIntegrand ? nested_to_batched(f, dom, cacheval.cache[1]) : f.f
-    return do_solve(g, dom, p, alg, cacheval; kws...)
-end
-
-
-# dispatch on IAI algorithms
-
-function nested_cacheval(f::FourierIntegrand, p::P, algs, segs, lims, state, x, xs...) where {P}
-    dom = PuncturedInterval(segs)
-    a, b = segs[1], segs[2]
-    dim = ndims(lims)
-    alg = algs[dim]
-    mid = (a+b)/2 # sample point that should be safe to evaluate
-    next = limit_iterate(lims, state, mid) # see what the next limit gives
-    if xs isa Tuple{} # the next integral takes us to the inner integral
-        # base case test integrand of the inner integral
-        # we need to pass dummy integrands to all the outer integrals so that they can build
-        # their caches with the right types
-        if f.nest isa NestedBatchIntegrand
-            # Batch integrate the inner integral only
-            cacheval = init_cacheval(BatchIntegrand(nothing, f.nest.y, f.nest.x, max_batch=f.nest.max_batch), dom, p, alg)
-            return (nothing, cacheval, zero(eltype(f.nest.y))*mid)
-        else
-            fx = f(next,p)
-            cacheval = init_cacheval((x, p) -> fx, dom, p, alg)
-            return (nothing, cacheval, fx*mid)
-        end
-    elseif f.nest isa NestedBatchIntegrand
-        algs_ = algs[1:dim-1]
-        # numbered names to avoid type instabilities (we are not using dispatch, but the
-        # compiler's optimization for the recursive function's argument types)
-        nest0 = nested_cacheval(FourierIntegrand(f.f, f.w, f.nest.f[1]), p, algs_, next..., x, xs[1:dim-2]...)
-        cacheval = init_cacheval(BatchIntegrand(nothing, f.nest.y, f.nest.x, max_batch=f.nest.max_batch), dom, p, alg)
-        return (ntuple(n -> n == 1 ? nest0 : deepcopy(nest0), Val(length(f.nest.f))), cacheval, nest0[3]*mid)
-    else
-        algs_ = algs[1:dim-1]
-        nest1 = nested_cacheval(f, p, algs_, next..., x, xs[1:dim-2]...)
-        h = nest1[3]
-        hx = h*mid
-        # units may change for outer integral
-        cacheval = init_cacheval((x, p) -> h, dom, p, alg)
-        return (nest1, cacheval, hx)
-    end
-end
-
-function init_nest(f::FourierIntegrand, fxx, dom, p,lims, state, algs, cacheval; kws_...)
-    kws = NamedTuple(kws_)
-    xx = float(oneunit(eltype(dom)))
-    FX = typeof(fxx/xx)
-    TX = typeof(xx)
-    TP = Tuple{typeof(p),typeof(lims),typeof(state)}
-    if algs isa Tuple{} # inner integral
-        if f.nest isa NestedBatchIntegrand
-            nchunk = min(length(f.nest.f), length(f.w.cache))
-            return BatchIntegrand(f.nest.y, f.nest.x, max_batch=f.nest.max_batch) do y, x, (p, lims, state)
-                Threads.@threads for ichunk in 1:min(nchunk, length(x))
-                    for (i, j) in zip(getchunk(x, ichunk, nchunk, :scatter), getchunk(y, ichunk, nchunk, :scatter))
-                        xi = x[i]
-                        v = FourierValue(limit_iterate(lims, state, xi), workspace_evaluate!(f.w, xi, ichunk))
-                        y[j] = f.nest.f[ichunk](v, p)
-                    end
-                end
-                return nothing
-            end
-        else
-            return (x, (p, lims, state)) -> begin
-            # return FunctionWrapper{FX,Tuple{TX,TP}}() do x, (p, lims, state)
-                v = FourierValue(limit_iterate(lims, state, x), workspace_evaluate!(f.w, x))
-                return f.f(v, p)
-            end
-        end
-    else
-        if f.nest isa NestedBatchIntegrand
-            nchunks = min(length(f.nest.f), length(f.w.cache))
-            return BatchIntegrand(FunctionWrapper{Nothing,Tuple{typeof(f.nest.y),typeof(f.nest.x),TP}}() do y, x, (p, lims, state)
-                Threads.@threads for ichunk in 1:min(nchunks, length(x))
-                    for (i, j) in zip(getchunk(x, ichunk, nchunks, :scatter), getchunk(y, ichunk, nchunks, :scatter))
-                        xi = x[i]
-                        segs, lims_, state_ = limit_iterate(lims, state, xi)
-                        len = segs[end] - segs[1]
-                        kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
-                        fx = FourierIntegrand(f.f, workspace_contract!(f.w, xi, ichunk), f.nest.f[ichunk])
-                        y[j] = do_solve(fx, lims_, NestState(p, segs, state_), NestedQuad(algs), cacheval[ichunk]; kwargs...).u
-                    end
-                end
-                return nothing
-            end, f.nest.y, f.nest.x, max_batch=f.nest.max_batch)
-        else
-            g = f.nest === nothing ? f.f : f.nest
-            return FunctionWrapper{FX,Tuple{TX,TP}}() do x, (p, lims, state)
-                segs, lims_, state_ = limit_iterate(lims, state, x)
-                fx = FourierIntegrand(g, workspace_contract!(f.w, x))
-                len = segs[end] - segs[1]
-                kwargs = haskey(kws, :abstol) ? merge(kws, (abstol=kws.abstol/len,)) : kws
-                sol = do_solve(fx, lims_, NestState(p, segs, state_), NestedQuad(algs), cacheval; kwargs...)
-                return sol.u
-            end
-        end
-    end
-end
-
-function init_cacheval(f::FourierIntegrand, dom::AbstractIteratedLimits, p, alg::NestedQuad)
-    algs = alg.algs isa IntegralAlgorithm ? ntuple(i -> alg.algs, Val(ndims(dom))) : alg.algs
-    return nested_cacheval(f, p, algs, limit_iterate(dom)..., interior_point(dom)...)
-end
-
-function do_solve(f::FourierIntegrand, lims::AbstractIteratedLimits, p_, alg::NestedQuad, cacheval; kws...)
-    p, segs, state = if p_ isa NestState
-        p_.p, p_.segs, p_.state
-    else
-        seg, lim, sta = limit_iterate(lims)
-        p_, seg, sta
-    end
-    dom = PuncturedInterval(segs)
-    g = if f.nest isa NestedBatchIntegrand && eltype(f.nest.x) === Nothing
-        FourierIntegrand(f.f, f.w, NestedBatchIntegrand(f.nest.f, f.nest.y, float(eltype(dom))[], max_batch=f.nest.max_batch))
-    else
-        f
-    end
-    (dim = ndims(lims)) == ndims(f.w.series) || throw(ArgumentError("variables in Fourier series don't match domain"))
-    algs = alg.algs isa IntegralAlgorithm ? ntuple(i -> alg.algs, Val(dim)) : alg.algs
-    nest = init_nest(g, cacheval[3], dom, p, lims, state, algs[1:dim-1], cacheval[1]; kws...)
-    return do_solve(nest, dom, (p, lims, state), algs[dim], cacheval[2]; kws...)
-end
-
-function do_solve(f::FourierIntegrand, dom, p, alg::EvalCounter, cacheval; kws...)
-    if f.nest isa NestedBatchIntegrand
-        error("NestedBatchIntegrand not yet supported with EvalCounter")
-    else
-        n::Int = 0
-        function g(args...; kwargs...)
-            n += 1
-            return f.f.f(args...; kwargs...)
-        end
-        h = FourierIntegrand(ParameterIntegrand{typeof(g)}(g, f.f.p), f.w)
-        sol = do_solve(h, dom, p, alg.alg, cacheval; kws...)
-        return IntegralSolution(sol.u, sol.resid, true, n)
-    end
-end
-
-# method needed to resolve ambiguities
-function do_solve(f::FourierIntegrand, bz::SymmetricBZ, p, alg::EvalCounter{<:AutoBZAlgorithm}, cacheval; kws...)
-    return do_solve_autobz(count_bz_to_standard, f, bz, p, alg.alg, cacheval; kws...)
+function init_fourier_rule(w::FourierWorkspace, dom::RepBZ, alg::AutoSymPTRJL)
+    B = get_basis(dom)
+    rule = init_fourier_rule(w, B, alg)
+    return SymmetricRuleDef(rule, dom.rep, dom.bz)
+end
+function init_cacheval(f::AbstractFourierIntegralFunction, dom, p, alg::AutoSymPTRJL; kws...)
+    cache = init_autosymptr_cache(f, dom, p, alg.nthreads; kws...)
+    ws = cache.ws
+    rule = init_fourier_rule(ws, dom, alg)
+    rule_cache = AutoSymPTR.alloc_cache(eltype(dom), Val(ndims(dom)), rule)
+    return (; rule, rule_cache, cache...)
 end
diff --git a/src/inplace.jl b/src/inplace.jl
deleted file mode 100644
index 6e5c375..0000000
--- a/src/inplace.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-    InplaceIntegrand(f!, result::AbstractArray)
-
-Constructor for a `InplaceIntegrand` accepting an integrand of the form `f!(y,x,p)`. The
-caller also provides an output array needed to store the result of the quadrature.
-Intermediate `y` arrays are allocated during the calculation, and the final result is
-may or may not be written to `result`, so use the IntegralSolution immediately after the
-calculation to read the result, and don't expect it to persist if the same integrand is used
-for another calculation.
-"""
-struct InplaceIntegrand{F,T<:AbstractArray}
-    # in-place function f!(y, x, p) that takes one x value and outputs an array of results in-place
-    f!::F
-    I::T
-end
diff --git a/src/interfaces.jl b/src/interfaces.jl
index f5cb728..df8d748 100644
--- a/src/interfaces.jl
+++ b/src/interfaces.jl
@@ -1,8 +1,127 @@
-# we recreate a lot of the SciML Integrals.jl functionality, but only for our algorithms
-# the features we omit are: inplace integrands, infinite limit transformations, nout and
-# batch keywords. Otherwise, there is a correspondence between.
-# solve -> solve!
-# init -> init
+abstract type AbstractIntegralFunction end
+# should have at least two fields:
+# - f
+# - integrand_prototype
+
+"""
+    IntegralFunction(f, [prototype=nothing])
+
+Constructor for an out-of-place integrand of the form `f(x, p)`.
+Optionally, a `prototype` can be provided for the output of the function.
+"""
+struct IntegralFunction{F,P} <: AbstractIntegralFunction
+    f::F
+    prototype::P
+end
+IntegralFunction(f) = IntegralFunction(f, nothing)
+
+function get_prototype(f::IntegralFunction, x, p)
+    f.prototype === nothing ? f.f(x, p) : f.prototype
+end
+
+"""
+    InplaceIntegralFunction(f!, prototype::AbstractArray)
+
+Constructor for an inplace integrand of the form `f!(y, x, p)`.
+A `prototype` array is required to store the same type and size as the result, `y`.
+"""
+struct InplaceIntegralFunction{F,P<:AbstractArray} <: AbstractIntegralFunction
+    # in-place function f!(y, x, p) that takes one x value and outputs an array of results in-place
+    f!::F
+    prototype::P
+end
+
+function get_prototype(f::InplaceIntegralFunction, x, p)
+    # iip is required to have a prototype array
+    f.prototype
+end
+
+"""
+    InplaceBatchIntegralFunction(f!, prototype; max_batch::Integer=typemax(Int))
+
+Constructor for an inplace, batched integrand of the form `f!(y, x, p)` that accepts an
+array `x` containing a batch of evaluation points stored along the last axis of the array.
+A `prototype` array is required to store the same type and size as the result, `y`, however
+the last axis, which is reserved for batching, which should contain at least one element.
+The `max_batch` keyword sets a soft limit on the number of points batched simultaneously.
+"""
+struct InplaceBatchIntegralFunction{F,P<:AbstractArray} <: AbstractIntegralFunction
+    f!::F
+    prototype::P
+    max_batch::Int
+end
+
+function InplaceBatchIntegralFunction(f!, p::AbstractArray; max_batch::Integer=typemax(Int))
+    return InplaceBatchIntegralFunction(f!, p, max_batch)
+end
+
+function get_prototype(f::InplaceBatchIntegralFunction, x, p)
+    # iip is required to have a prototype array
+    f.prototype
+end
+
+abstract type AbstractSpecialization end
+struct NoSpecialize <: AbstractSpecialization end
+struct FunctionWrapperSpecialize <: AbstractSpecialization end
+struct FullSpecialize <: AbstractSpecialization end
+
+"""
+    CommonSolveIntegralFunction(prob, alg, update!, postsolve, [prototype, specialize]; kws...)
+
+Constructor for an integrand that solves a problem defined with the CommonSolve.jl
+interface, `prob`, which is instantiated using `init(prob, alg; kws...)`. Helper functions
+include: `update!(cache, x, p)` is called before
+`solve!(cache)`, followed by `postsolve(sol, x, p)`, which should return the value of the solution.
+The `prototype` argument can help control how much to `specialize` on the type of the
+problem, which defaults to `FullSpecialize()` so that run times are improved. However
+`FunctionWrapperSpecialize()` may help reduce compile times.
+"""
+struct CommonSolveIntegralFunction{P,A,K,U,S,T,M<:AbstractSpecialization} <: AbstractIntegralFunction
+    prob::P
+    alg::A
+    kwargs::K
+    update!::U
+    postsolve::S
+    prototype::T
+    specialize::M
+end
+function CommonSolveIntegralFunction(prob, alg, update!, postsolve, prototype=nothing, specialize=FullSpecialize(); kws...)
+    return CommonSolveIntegralFunction(prob, alg, NamedTuple(kws), update!, postsolve, prototype, specialize)
+end
+
+function do_solve!(cache, f::CommonSolveIntegralFunction, x, p)
+    f.update!(cache, x, p)
+    sol = solve!(cache)
+    return f.postsolve(sol, x, p)
+end
+function get_prototype(f::CommonSolveIntegralFunction, x, p)
+    if isnothing(f.prototype)
+        cache = init(f.prob, f.alg; f.kwargs...)
+        do_solve!(cache, f, x, p)
+    else
+        f.prototype
+    end
+end
+function init_specialized_integrand(cache, f, dom, p; x=get_prototype(dom), prototype=f.prototype)
+    proto = prototype === nothing ? do_solve!(cache, f, x, p) : prototype
+    func = (x, p) -> do_solve!(cache, f, x, p)
+    integrand = if f.specialize isa FullSpecialize
+        func
+    elseif f.specialize isa FunctionWrapperSpecialize
+        FunctionWrapper{typeof(prototype), typeof((x, p))}(func)
+    else
+        throw(ArgumentError("$(f.specialize) is not implemented"))
+    end
+    return integrand, proto
+end
+function _init_commonsolvefunction(f, dom, p; kws...)
+    cache = init(f.prob, f.alg; f.kwargs...)
+    integrand, prototype = init_specialized_integrand(cache, f, dom, p; kws...)
+    return cache, integrand, prototype
+end
+
+# TODO add InplaceCommonSolveIntegralFunction and InplaceBatchCommonSolveIntegralFunction
+# TODO add ThreadedCommonSolveIntegralFunction and DistributedCommonSolveIntegralFunction
 
 """
     IntegralAlgorithm
@@ -11,10 +130,6 @@ Abstract supertype for integration algorithms.
 """
 abstract type IntegralAlgorithm end
 
-# Methods an algorithm must define
-# - init_cacheval
-# - solve!
-
 """
     NullParameters()
 
@@ -23,31 +138,30 @@ A singleton type representing absent parameters
 struct NullParameters end
 
 """
-    IntegralProblem(f, domain, [p=NullParameters])
-    IntegralProblem(f, a::T, b::T, [p=NullParameters]) where {T}
+    IntegralProblem(f, domain, [p=NullParameters]; kwargs...)
 
-Collects the data need to define an integral of a function `f(x, p)` over a `domain`
-containing the points, `x`, and set with parameters `p` (default: [`NullParameters`](@ref)).
-If the domain is an interval or hypercube, it can also be specified by its endpoints `a, b`,
-and it gets converted to a [`PuncturedInterval`](@ref) or [`HyperCube`](@ref).
+## Arguments
+- `f::AbstractIntegralFunction`: The function to integrate
+- `domain`: The domain to integrate over, e.g. `(lb, ub)`
+- `p`: Parameters to pass to the integrand
+
+## Keywords
+Additional keywords are passed directly to the solver
 """
-struct IntegralProblem{F,D,P}
+struct IntegralProblem{F<:AbstractIntegralFunction,D,P,K<:NamedTuple}
     f::F
     dom::D
     p::P
-    function IntegralProblem{F,D,P}(f::F, dom::D, p::P) where {F,D,P}
-        return new{F,D,P}(f, dom, p)
-    end
+    kwargs::K
 end
-function IntegralProblem(f::F, dom::D, p::P=NullParameters()) where {F,D,P}
-    return IntegralProblem{F,D,P}(f, dom, p)
+function IntegralProblem(f::AbstractIntegralFunction, dom, p=NullParameters(); kws...)
+    return IntegralProblem(f, dom, p, NamedTuple(kws))
 end
-function IntegralProblem(f::F, a::T, b::T, p::P=NullParameters()) where {F,T,P}
-    dom = T <: Number ? PuncturedInterval((a, b)) : HyperCube(a, b)
-    return IntegralProblem{F,typeof(dom),P}(f, dom, p)
+function IntegralProblem(f, dom, p=NullParameters(); kws...)
+    return IntegralProblem(IntegralFunction(f), dom, p; kws...)
 end
 
-mutable struct IntegralCache{F,D,P,A,C,K}
+mutable struct IntegralSolver{F,D,P,A,C,K}
     f::F
     dom::D
     p::P
@@ -56,11 +170,6 @@ mutable struct IntegralCache{F,D,P,A,C,K}
     kwargs::K
 end
 
-function make_cache(f, dom, p, alg::IntegralAlgorithm; kwargs...)
-    cacheval = init_cacheval(f, dom, p, alg)
-    return IntegralCache(f, dom, p, alg, cacheval, NamedTuple(kwargs))
-end
-
 function checkkwargs(kwargs)
     for key in keys(kwargs)
         key in (:abstol, :reltol, :maxiters) || throw(ArgumentError("keyword $key unrecognized"))
@@ -69,16 +178,18 @@ function checkkwargs(kwargs)
 end
 
 """
-    init(::IntegralProblem, ::IntegralAlgorithm; kws...)::IntegralCache
+    init(::IntegralProblem, ::IntegralAlgorithm; kws...)::IntegralSolver
 
 Construct a cache for an [`IntegralProblem`](@ref), [`IntegralAlgorithm`](@ref), and the
 keyword arguments to the solver (i.e. `abstol`, `reltol`, or `maxiters`) that can be reused
 for solving the problem for multiple different parameters of the same type.
 """
 function init(prob::IntegralProblem, alg::IntegralAlgorithm; kwargs...)
-    checkkwargs(NamedTuple(kwargs))
     f = prob.f; dom = prob.dom; p = prob.p
-    return make_cache(f, dom, p, alg; kwargs...)
+    kws = (; prob.kwargs..., kwargs...)
+    checkkwargs(kws)
+    cacheval = init_cacheval(f, dom, p, alg; kws...)
+    return IntegralSolver(f, dom, p, alg, cacheval, kws)
 end
 
 """
@@ -103,141 +214,25 @@ tolerance can be used in combination with a smaller-than necessary absolute tole
 that the solution is resolved up to the requested significant digits, unless the integral is
 smaller than the absolute tolerance.
 """
-function solve(prob::IntegralProblem, alg::IntegralAlgorithm; kwargs...)
-    cache = init(prob, alg; kwargs...)
-    return solve!(cache)
-end
+solve(prob::IntegralProblem, alg::IntegralAlgorithm; kwargs...)
 
 """
-    solve!(::IntegralCache)::IntegralSolution
+    solve!(::IntegralSolver)::IntegralSolution
 
 Compute the solution to an [`IntegralProblem`](@ref) constructed from [`init`](@ref).
 """
-function solve!(c::IntegralCache)
-    return do_solve(c.f, c.dom, c.p, c.alg, c.cacheval; c.kwargs...)
-end
-
-struct IntegralSolution{T,E}
-    u::T
-    resid::E
-    retcode::Bool
-    numevals::Int
-end
-# a value of numevals < 0 means it was undefined
-
-
-"""
-    IntegralSolver(f, dom, alg; [abstol, reltol, maxiters])
-    IntegralSolver(f, lb, ub, alg::AbstractIntegralAlgorithm; [abstol, reltol, maxiters])
-
-Returns a functor, `fun`, that accepts input parameters `p` and solves the corresponding
-integral `fun(p) -> solve(IntegralProblem(f, lb, ub, p), alg).u`. See [`solve`](@ref) for
-details on the keywords.
-
-If `f` is a [`ParameterIntegrand`](@ref) or [`FourierIntegrand`](@ref), then the functor
-interface is modified to accept parameters as function arguments, and the following is done:
-`fun(args...; kwargs...) ->  solve(IntegralProblem(f, lb, ub, merge(f.p,
-MixedParameters(args...; kwargs...))), alg).u` where `f.p` are the preset parameters of `f`.
-"""
-struct IntegralSolver{F,D,A,T,K} <: Function
-    f::F
-    dom::D
-    alg::A
-    cacheval::T
-    kwargs::K
-end
-
-# some algorithms can already allocate a rule based on `dom`, but we leave extending this
-# method as an optimization to the caller
-init_solver_cacheval(f, dom, alg) = nothing
-
-function IntegralSolver(f, dom, alg::IntegralAlgorithm; kwargs...)
-    checkkwargs(NamedTuple(kwargs))
-    cacheval = init_solver_cacheval(f, dom, alg)
-    return IntegralSolver(f, dom, alg, cacheval, NamedTuple(kwargs))
-end
-
-function IntegralSolver(f, a::T, b::T, alg::IntegralAlgorithm; kwargs...) where {T}
-    dom = T <: Number ? PuncturedInterval((a, b)) : HyperCube(a, b)
-    return IntegralSolver(f, dom, alg; kwargs...)
+function solve!(c::IntegralSolver)
+    return do_integral(c.f, c.dom, c.p, c.alg, c.cacheval; c.kwargs...)
 end
 
-function IntegralSolver(prob::IntegralProblem, alg::IntegralAlgorithm; kwargs...)
-    return IntegralSolver(prob.f, prob.dom, alg; kwargs...)
+@enum ReturnCode begin
+    Success
+    Failure
+    MaxIters
 end
 
-
-# layer to intercept the problem parameters & algorithm and transform them
-remake_cache(args...) = IntegralCache(args...)
-remake_cache(c, p) = remake_cache(c.f, c.dom, p, c.alg, c.cacheval, c.kwargs)
-
-function solve_p(s::IntegralSolver, p)
-    c = if s.cacheval===nothing
-        prob = IntegralProblem(s.f, s.dom, p)
-        init(prob, s.alg; s.kwargs...)
-    else
-        remake_cache(s, p)
-    end
-    return solve!(c)
-end
-
-function (s::IntegralSolver)(p)
-    sol = solve_p(s, p)
-    return sol.u
-end
-
-# parallelization
-
-"""
-    batchparam(ps, nthreads)
-
-If the cost of a calculation smoothly varies with the parameters `ps`, then
-batch `ps` into `nthreads` groups where the `i`th element of group `j` is
-`ps[j+(i-1)*nthreads]` along the longest axis of `ps`. We assume that multidimensional
-arrays of parameters have smoothest cost along their longest axis
-"""
-function batchparam(xs::AbstractArray{T,N}, nthreads) where {T,N}
-    (s = size(xs)) === () && return (((CartesianIndex(()), only(xs)),),)
-    @assert nthreads >= 1
-    len, dim = findmax(s)
-    batches = [Tuple{CartesianIndex{N},T}[] for _ in 1:min(nthreads, len)]
-    for i in CartesianIndices(xs)
-        push!(batches[mod(i[dim]-1, nthreads)+1], (i, xs[i]))
-    end
-    return batches
-end
-
-function batchsolve!(out, f, ps, nthreads, callback)
-    n = Threads.Atomic{Int}(0)
-    Threads.@threads for batch in batchparam(ps, nthreads)
-        f_ = Threads.threadid() == 1 ? f : deepcopy(f) # avoid data races for in place integrators
-        for (i, p) in batch
-            t = time()
-            sol = solve_p(f_, p)
-            callback(f, i, Threads.atomic_add!(n, 1) + 1, p, sol, time() - t)
-            out[i] = sol.u
-        end
-    end
-    return out
-end
-
-solver_type(::F, ::P) where {F,P} = Base.promote_op((f, p) -> solve_p(f, p).u, F, P)
-
-"""
-    batchsolve(f::IntegralSolver, ps::AbstractArray, [T]; nthreads=Threads.nthreads())
-
-Evaluate the [`IntegralSolver`](@ref) `f` at each of the parameters `ps` in
-parallel. Returns an array similar to `ps` containing the evaluated integrals `I`. This is
-a form of multithreaded broadcasting. Providing the return type `f(eltype(ps))::T` is
-optional, but will help in case inference of that type fails.
-"""
-function batchsolve(f::IntegralSolver, ps::AbstractArray, T=solver_type(f, ps[begin]); nthreads=Threads.nthreads(), callback=(x...)->nothing)
-    solver = if f.cacheval === nothing
-        prob = IntegralProblem(f.f, f.dom, ps[begin])
-        cache = init(prob, f.alg; f.kwargs...)
-        IntegralSolver(f.f, f.dom, f.alg, cache.cacheval, f.kwargs)
-    else
-        f
-    end
-    return batchsolve!(similar(ps, T), solver, ps, nthreads, callback)
+struct IntegralSolution{T,S}
+    value::T
+    retcode::ReturnCode
+    stats::S
 end
diff --git a/src/parameters.jl b/src/parameters.jl
deleted file mode 100644
index fa19f0d..0000000
--- a/src/parameters.jl
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-    MixedParameters(args::Tuple, kwargs::NamedTuple)
-    MixedParameters(args...; kwargs...)
-
-A struct to store the arguments and keyword arguments to a function. Indicies access
-`args`, i.e. `MixedParameters(args...; kwargs...)[i] == args[i]` and properties access
-`kwargs`, i.e. `MixedParameters(args...; kwargs...).name == kwargs.name`.
-
-Used internally to store partially complete collections of function arguments or parameters.
-"""
-struct MixedParameters{A<:Tuple,K<:NamedTuple}
-    args::A
-    kwargs::K
-end
-MixedParameters(args...; kwargs...) = MixedParameters(args, NamedTuple(kwargs))
-
-# parameter fusion and iteration utilities
-
-Base.getindex(p::MixedParameters, i::Int) = getindex(getfield(p, :args), i)
-Base.getproperty(p::MixedParameters, name::Symbol) = getproperty(getfield(p, :kwargs), name)
-
-Base.merge(p::MixedParameters, q) =
-    MixedParameters((getfield(p, :args)..., q), getfield(p, :kwargs))
-Base.merge(q, p::MixedParameters) =
-    MixedParameters((q, getfield(p, :args)...), getfield(p, :kwargs))
-Base.merge(p::MixedParameters, q::NamedTuple) =
-    MixedParameters(getfield(p, :args), merge(getfield(p, :kwargs), q))
-Base.merge(q::NamedTuple, p::MixedParameters) =
-    MixedParameters(getfield(p, :args), merge(q, getfield(p, :kwargs)))
-Base.merge(p::MixedParameters, q::Tuple) =
-    MixedParameters((getfield(p, :args)..., q...), getfield(p, :kwargs))
-Base.merge(q::Tuple, p::MixedParameters) =
-    MixedParameters((q..., getfield(p, :args)...), getfield(p, :kwargs))
-Base.merge(p::MixedParameters, q::MixedParameters) =
-    MixedParameters((getfield(p, :args)..., getfield(q, :args)...), merge(getfield(p, :kwargs), getfield(q, :kwargs)))
-
-function paramzip_(::Tuple{}, ::NamedTuple{(), Tuple{}})
-    MixedParameters{Tuple{},NamedTuple{(), Tuple{}}}[]
-end
-function paramzip_(args::Tuple, ::NamedTuple{(), Tuple{}})
-    [MixedParameters(arg, NamedTuple()) for arg in zip(args...)]
-end
-function paramzip_(::Tuple{}, kwargs::NamedTuple)
-    [MixedParameters((), NamedTuple{keys(kwargs)}(val)) for val in zip(values(kwargs)...)]
-end
-function paramzip_(args::Tuple, kwargs::NamedTuple)
-    [MixedParameters(arg, NamedTuple{keys(kwargs)}(val)) for (arg, val) in zip(zip(args...), zip(values(kwargs)...))]
-end
-"""
-    paramzip(args...; kwargs...)
-
-Behaves similarly to `zip(zip(args...), zip(kwargs...))` with [`MixedParameters`](@ref)
-return values so that `paramzip(args...; kwargs...)[i][j] == args[j][i]` and
-`paramzip(args...; kwargs...)[i].name == kwargs.name[i]`.
-"""
-paramzip(args...; kwargs...) = paramzip_(args, NamedTuple(kwargs))
-
-function paramproduct_(args::Tuple, kwargs::NamedTuple)
-    [MixedParameters(item[1:length(args)], NamedTuple{keys(kwargs)}(item[length(args)+1:end])) for item in Iterators.product(args..., values(kwargs)...)]
-end
-
-"""
-    paramproduct(args...; kwargs...)
-
-Behaves similarly to `product(args..., kwargs...)` with [`MixedParameters`](@ref) return
-values so that `paramzip(args...; kwargs...)[i1, ...,ij, il, ...in] ==
-MixedParameters(args[begin][i1], ..., args[end][ij]; kwargs[begin][il], ..., kwargs[end][in])`
-"""
-paramproduct(args...; kwargs...) = paramproduct_(args, NamedTuple(kwargs))
-
-"""
-    ParameterIntegrand(f, args...; kwargs...)
-
-Represent an integrand with a partial collection of parameters `p`. When the
-`ParameterIntegrand` is invoked with one argument, e.g. `int(x)`, it evaluates `f(x,
-p...; kwargs...)`. However when invoked with two arguments, as in an `IntegralProblem`,
-e.g. `int(x, p2)`, it evaluates the union of parameters `f(x, p..., p2...; kwargs...)`.
-This allows for convenient parametrization of the integrand.
-"""
-struct ParameterIntegrand{F,P}
-    f::F
-    p::P
-    function ParameterIntegrand{F}(f::F, p::P) where {F,P<:MixedParameters}
-        return new{F,P}(f, p)
-    end
-end
-
-function ParameterIntegrand(f, args...; kwargs...)
-    p = MixedParameters(args...; kwargs...)
-    return ParameterIntegrand{typeof(f)}(f, p)
-end
-
-# provide Integrals.jl interface
-function (f::ParameterIntegrand)(x, q)
-    p = merge(f.p, q)
-    return f.f(x, getfield(p, :args)...; getfield(p, :kwargs)...)
-end
-(f::ParameterIntegrand)(x, ::NullParameters) = f(x, ())
-
-# move all parameters from f.p to p for convenience
-remake_integrand_cache(args...) = IntegralCache(args...)
-function remake_cache(f::ParameterIntegrand, dom, p, alg, cacheval, kwargs)
-    new = ParameterIntegrand(f.f)
-    return remake_integrand_cache(new, dom, merge(f.p, p), alg, cacheval, kwargs)
-end
-
-function (s::IntegralSolver{<:ParameterIntegrand})(args...; kwargs...)
-    p = MixedParameters(args...; kwargs...)
-    sol = solve_p(s, p)
-    return sol.u
-end
-
-function do_solve(f::ParameterIntegrand, dom, p, alg::EvalCounter, cacheval; kws...)
-    n::Int = 0
-    function g(x, args...; kwargs...)
-        n += 1
-        return f.f(x, args...; kwargs...)
-    end
-    sol = do_solve(ParameterIntegrand{typeof(g)}(g, f.p), dom, p, alg.alg, cacheval; kws...)
-    return IntegralSolution(sol.u, sol.resid, true, n)
-end
-
-# ambiguity
-function do_solve(f::ParameterIntegrand, bz::SymmetricBZ, p, alg::EvalCounter{<:AutoBZAlgorithm}, cacheval; kws...)
-    return do_solve(f, bz, p, AutoBZEvalCounter(bz_to_standard(bz, alg.alg)...), cacheval; kws...)
-end
diff --git a/test/brillouin.jl b/test/brillouin.jl
index 5745092..3cbbd80 100644
--- a/test/brillouin.jl
+++ b/test/brillouin.jl
@@ -1,7 +1,6 @@
 using Test
 using LinearAlgebra
 using AutoBZCore
-using AutoBZCore: IntegralProblem, solve, MixedParameters
 using AutoBZCore: PuncturedInterval, HyperCube, segments, endpoints
 
 @testset "domains" begin
@@ -35,77 +34,10 @@ end
     A = I(dims)
     vol = (2π)^dims
     for bz in (load_bz(FBZ(), A), load_bz(InversionSymIBZ(), A))
-        ip = IntegralProblem((x,p) -> 1.0, bz)  # unit measure
+        ip = AutoBZProblem((x,p) -> 1.0, bz)  # unit measure
         for alg in (IAI(), TAI(), PTR(), AutoPTR())
-            @test @inferred(solve(ip, alg)).u ≈ vol
+            solver = init(ip, alg)
+            @test @inferred(solve!(solver)).value ≈ vol
         end
-        @test @inferred(solve(IntegralProblem((x, p) -> exp(-x^2), -Inf, Inf), QuadGKJL())).u ≈ sqrt(pi)
-    end
-end
-
-@testset "interfaces" begin
-    @testset "MixedParameters" begin
-        args = (1, 2)
-        kwargs = (a = "a", b = "b")
-        p = MixedParameters(args...)
-        q = MixedParameters(; kwargs...)
-        # we only merge non-MixedParameters in the right argument
-        for pq = (merge(p, q), merge(p, kwargs), merge(q, args))
-            @test args[1] == pq[1]
-            @test args[2] == pq[2]
-            @test kwargs.a == pq.a
-            @test kwargs.b == pq.b
-        end
-        @test 3 == merge(p, 3)[3] == merge(q, 3)[1] # generic types are appended
-        @test "c" == merge(p, (a="c",)).a == merge(q, (a="c",)).a # keywords overwritten
-    end
-    @testset "IntegralSolver" begin
-        f = (x,p) -> p
-        p = 0.81
-        # SciML interface: (a, b, alg)
-        a = 0; b = 1
-        prob = IntegralProblem(f, a, b, 33) # ordinary integrands get parameters replaced
-        solver = IntegralSolver(prob, QuadGKJL())
-        @test solver(p) == solve(IntegralProblem(f, a, b, p), QuadGKJL()).u
-        # AutoBZ interface: (bz, alg)
-        dims = 3
-        A = I(dims)
-        B = AutoBZCore.canonical_reciprocal_basis(A)
-        bz = load_bz(FBZ(), A, B)
-        prob = IntegralProblem(f, bz, p)
-        solver = IntegralSolver(IntegralProblem(f, bz), IAI())
-        @test solver(p) == solve(prob, IAI()).u # use the plain interface
-        # g = (x,p) -> sum(x)*p[1]+p.a
-        # solver2 = IntegralSolver(ParameterIntegrand(g), bz, IAI()) # use the MixedParameters interface
-        # prob2 = IntegralProblem(g, bz, MixedParameters(12.0, a=1.0))
-        # @test solver2(12.0, a=1.0) == solve(prob2, IAI()).u
-    end
-    @testset "ParameterIntegrand" begin
-        # AutoBZ interface user function: f(x, args...; kwargs...) where args & kwargs
-        # stored in MixedParameters
-        f(x, a; b) = a*x+b
-        # SciML interface for ParameterIntegrand: f(x, p) (# and parameters can be preloaded and
-        # p is merged with MixedParameters)
-        @test f(6.7, 1.3, b=4.2) == ParameterIntegrand(f, 1.3, b=4.2)(6.7, AutoBZCore.NullParameters()) == ParameterIntegrand(f)(6.7, MixedParameters(1.3, b=4.2))
-        # A ParameterIntegrand merges its parameters with the problem's
-        prob = IntegralProblem(ParameterIntegrand(f, 1.3, b=4.2), 0, 1)
-        u = IntegralSolver(prob, QuadGKJL())()
-        v = IntegralSolver(ParameterIntegrand(f), 0, 1, QuadGKJL())(1.3, b=4.2)
-        w = IntegralSolver(ParameterIntegrand(f, b=4.2), 0, 1, QuadGKJL())(1.3)
-        @test u == v == w
-        @test solve(prob, EvalCounter(QuadGKJL(order=7))).numevals == 15
-    end
-    @testset "batchsolve" begin
-        # SciML interface: iterable of parameters
-        prob = IntegralProblem((x, p) -> p, 0, 1)
-        solver = IntegralSolver(prob, QuadGKJL())
-        params = range(1, 2, length=3)
-        @test [solver(p) for p in params] == batchsolve(solver, params)
-        # AutoBZ interface: array of MixedParameters
-        f(x, a; b) = a*x+b
-        solver = IntegralSolver(ParameterIntegrand(f), 0, 1, QuadGKJL())
-        as = rand(3); bs = rand(3)
-        @test [solver(a, b=b) for (a,b) in Iterators.zip(as, bs)] == batchsolve(solver, paramzip(as, b=bs))
-        @test [solver(a, b=b) for (a,b) in Iterators.product(as, bs)] == batchsolve(solver, paramproduct(as, b=bs))
     end
 end
diff --git a/test/dos.jl b/test/dos.jl
index 357b48f..87cb9bc 100644
--- a/test/dos.jl
+++ b/test/dos.jl
@@ -1,5 +1,6 @@
 using Test, AutoBZCore, LinearAlgebra, StaticArrays, OffsetArrays, Elliptic
 using GeneralizedGaussianQuadrature: generalizedquadrature
+using FourierSeriesEvaluators, QuadGK
 
 # test set of known DOS examples
 
@@ -105,15 +106,15 @@ for (model, solution, bandwidth, bzkind) in (
         cache = AutoBZCore.init(prob, alg)
         for e in E
             cache.domain = e
-            @test AutoBZCore.solve!(cache).u ≈ solution(e) atol=1e-2
+            @test AutoBZCore.solve!(cache).value ≈ solution(e) atol=1e-2
         end
     end
 end
 
 # test caching behavior
-let h = FourierSeries(SMatrix{1,1,Float64,1}.([0.5, 0.0, 0.5]), period=1.0, offset=-2)
+let h = FourierSeries(SMatrix{1,1,Float64,1}.([0.5, 0.0, 0.5]), period=1.0, offset=-2), E = 0.1
     bz = load_bz(FBZ(), [2pi;;])
-    prob = DOSProblem(h, 0.0, bz)
+    prob = DOSProblem(h, E, bz)
     alg = GGR()
 
     cache = AutoBZCore.init(prob, alg)
@@ -121,12 +122,14 @@ let h = FourierSeries(SMatrix{1,1,Float64,1}.([0.5, 0.0, 0.5]), period=1.0, offs
 
     h.c .*= 2
     cache.isfresh = true
+    cache.domain = 2E
 
     sol2 = AutoBZCore.solve!(cache)
 
-    @test sol1.u*2 ≈ sol2.u
+    @test sol1.value ≈ 2sol2.value
 
     cache.H = FourierSeries(2*h.c, period=h.t, offset=h.o)
+    cache.domain = 4E
     sol3 = AutoBZCore.solve!(cache)
-    @test sol2.u*2 ≈ sol3.u
+    @test sol2.value ≈ 2sol3.value
 end
diff --git a/test/fourier.jl b/test/fourier.jl
index 150fcc8..1d5abc4 100644
--- a/test/fourier.jl
+++ b/test/fourier.jl
@@ -1,11 +1,108 @@
 using Test
 using LinearAlgebra
 using StaticArrays
+using FourierSeriesEvaluators
 using AutoBZCore
-using AutoBZCore: IntegralProblem, solve, MixedParameters
+using AutoBZCore: CubicLimits
 using AutoBZCore: PuncturedInterval, HyperCube, segments, endpoints
 
+@testset "FourierIntegralFunction" begin
+    @testset "quadrature" begin
+        a = 0
+        b = 1
+        p = 0.0
+        t = 1.0
+        s = FourierSeries([1, 0, 1]/2; period=t, offset=-2)
+        int(x, s, p) = x * s + p
+        ref = (b-a)*p + t*(b*sin(b/t*2pi) + t*cos(b/t*2pi) - (a*sin(a/t*2pi) + t*cos(a/t*2pi)))
+        abstol = 1e-5
+        prob = IntegralProblem(FourierIntegralFunction(int, s), (a, b), p; abstol)
+        for alg in (QuadGKJL(), QuadratureFunction(), AuxQuadGKJL(), ContQuadGKJL(), MeroQuadGKJL())
+            @test solve(prob, alg).value ≈ ref atol=abstol
+        end
+    end
+    @testset "commonproblem" begin
+        a = 0
+        b = 1
+        p = 0.0
+        t = 1.0
+        update! = (cache, x, s, p) -> cache.p = (x, s, p)
+        postsolve = (sol, x, s, p) -> sol.value
+        s = FourierSeries([1, 0, 1]/2; period=t, offset=-2)
+        f = (x, (y, s, p)) -> x * s + p + y
+        subprob = IntegralProblem(f, (a, b), ((a+b)/2, s((a+b)/2), 1.0))
+        abstol = 1e-5
+        prob = IntegralProblem(CommonSolveFourierIntegralFunction(subprob, QuadGKJL(), update!, postsolve, s), (a, b), p; abstol)
+        for alg in (QuadGKJL(), QuadratureFunction(), AuxQuadGKJL(), ContQuadGKJL(), MeroQuadGKJL())
+            cache = init(prob, alg)
+            for p in [3.0, 4.0]
+                ref = (b-a)*(t*p + (b-a)^2/2) + (b-a)^2/2*t*(b*sin(b/t*2pi) + t*cos(b/t*2pi) - (a*sin(a/t*2pi) + t*cos(a/t*2pi)))
+                cache.p = p
+                sol = solve!(cache)
+                @test ref ≈ sol.value atol=abstol
+            end
+        end
+        f = (x, (y, s, p)) -> x * s + p
+        subprob = IntegralProblem(f, (a, b), ([(a+b)/2], s((a+b)/2), 1.0))
+        abstol = 1e-5
+        prob = IntegralProblem(CommonSolveFourierIntegralFunction(subprob, QuadGKJL(), update!, postsolve, s), AutoBZCore.Basis(t*I(1)), p; abstol)
+        for alg in (MonkhorstPack(), AutoSymPTRJL(),)
+            cache = init(prob, alg)
+            for p in [3.0, 4.0]
+                ref = (b-a)*(t*p) + (b-a)^2/2*t*(b*sin(b/t*2pi) + t*cos(b/t*2pi) - (a*sin(a/t*2pi) + t*cos(a/t*2pi)))
+                cache.p = p
+                sol = solve!(cache)
+                @test ref ≈ sol.value atol=abstol
+            end
+        end
+    end
+    @testset "cubature" for dim in 2:3
+        a = zeros(dim)
+        b = ones(dim)
+        p = 0.0
+        t = 1.0
+        s = FourierSeries([prod(x) for x in Iterators.product([(0.1, 0.5, 0.3) for i in 1:dim]...)]; period=t, offset=-2)
+        f = (x, s, p) -> prod(x) * s + p
+        abstol = 1e-4
+        prob = IntegralProblem(FourierIntegralFunction(f, s), (a, b), p; abstol)
+        refprob = IntegralProblem(IntegralFunction(let f=f; (x, p) -> f(x, s(x), p); end), (a, b), p; abstol)
+        for alg in (HCubatureJL(),)
+            @test solve(prob, alg).value ≈ solve(refprob, alg).value atol=abstol
+        end
+        p=1.3
+        f = (x, s, p) -> inv(s + im*p)
+        prob = IntegralProblem(FourierIntegralFunction(f, s), AutoBZCore.Basis(t*I(dim)), p; abstol)
+        refprob = IntegralProblem(IntegralFunction(let f=f; (x, p) -> f(x, s(x), p); end), AutoBZCore.Basis(t*I(dim)), p; abstol)
+        for alg in (MonkhorstPack(), AutoSymPTRJL(),)
+            @test solve(prob, alg).value ≈ solve(refprob, alg).value atol=abstol
+        end
+    end
+    @testset "meta-algorithms" for dim in 2:3
+        # NestedQuad
+        a = zeros(dim)
+        b = ones(dim)
+        p0 = 0.0
+        t = 1.0
+        s = FourierSeries([prod(x) for x in Iterators.product([(0.1, 0.5, 0.3) for i in 1:dim]...)]; period=t, offset=-2)
+        int(x, s, p) = prod(x) * s + p
+        abstol = 1e-4
+        prob = IntegralProblem(FourierIntegralFunction(int, s), CubicLimits(a, b), p0; abstol)
+        refprob = IntegralProblem(FourierIntegralFunction(int, s), (a, b), p0; abstol)
+        for alg in (QuadGKJL(), AuxQuadGKJL())
+            cache = init(prob, NestedQuad(alg))
+            refcache = init(refprob, HCubatureJL())
+            for p in [5.0, 6.0]
+                cache.p = p
+                refcache.p = p
+                @test solve!(cache).value ≈ solve!(refcache).value atol=abstol
+            end
+        end
+        # TODO implement CommonSolveFourierInplaceIntegralFunction
+        # TODO implement CommonSolveFourierInplaceBatchIntegralFunction
+    end
+end
 
+#=
 @testset "FourierIntegrand" begin
     for dims in 1:3
         s = FourierSeries(integer_lattice(dims), period=1)
@@ -54,3 +151,4 @@ end
         end
     end
 end
+=#
diff --git a/test/hdf5ext.jl b/test/hdf5ext.jl
deleted file mode 100644
index 565aae9..0000000
--- a/test/hdf5ext.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-using Test
-using LinearAlgebra
-using StaticArrays
-using HDF5
-using AutoBZCore
-
-fn = tempname()
-h5open(fn, "w") do io
-    g1 = create_group(io, "Number")
-    @testset "Number" begin
-        prob   = IntegralProblem((x, p) -> p, 0, 1)
-        solver = IntegralSolver(prob, QuadGKJL())
-        params = 1:1.0:10
-        values = batchsolve(g1, solver, params, verb=false)
-        @test g1["I"][:] ≈ values
-    end
-    g2 = create_group(io, "SArray")
-    @testset "SArray" begin
-        f(x,p) = ((s,c) = sincos(p*x) ; SHermitianCompact{2,Float64,3}([s, c, -s]))
-        prob   = IntegralProblem(f, 0.0, 1pi)
-        solver = IntegralSolver(prob, QuadGKJL())
-        params = [0.8, 0.9, 1.0]
-        values = batchsolve(g2, solver, params, verb=false)
-        # Arrays of SArray are flattened to multidimensional arrays
-        @test g2["I"][:,:,:] ≈ reshape(collect(Iterators.flatten(values)), 2, 2, :)
-    end
-    g3 = create_group(io, "AuxValue")
-    @testset "AuxValue" begin
-        f(x,p) = ((re,im) = reim(inv(complex(cos(x), p))) ; AuxValue(re, im))
-        prob   = IntegralProblem(f, 0.0, 2pi)
-        solver = IntegralSolver(prob, QuadGKJL(), abstol=1e-3)
-        params = [2.0, 1.0, 0.5]
-        values = batchsolve(g3, solver, params, verb=false)
-        @test g3["I/val"][:] ≈ getproperty.(values, :val)
-        @test g3["I/aux"][:] ≈ getproperty.(values, :aux)
-    end
-    g4 = create_group(io, "0d")
-    g5 = create_group(io, "3d")
-    @testset "parameter dimensions" begin
-        prob   = IntegralProblem((x, p) -> p[1] + p[2] + p[3], 0, 1)
-        solver = IntegralSolver(prob, QuadGKJL())
-        # 0-d parameters
-        params = paramzip(0, 1, 2)
-        values = batchsolve(g4, solver, params, verb=false)
-        @test g4["I"][] ≈ values[] ≈ 3
-        @test g4["args/1"][] ≈ 0
-        @test g4["args/2"][] ≈ 1
-        @test g4["args/3"][] ≈ 2
-        # 3-d parameters
-        params = paramproduct(1:2, 1:2, 1:2)
-        values = batchsolve(g5, solver, params, verb=false)
-        @test g5["I"][1,1,1] ≈ values[1] ≈ 3
-        @test g5["I"][2,2,2] ≈ values[8] ≈ 6
-    end
-end
-rm(fn)
diff --git a/test/interface_tests.jl b/test/interface_tests.jl
index cef2e26..cc99c8f 100644
--- a/test/interface_tests.jl
+++ b/test/interface_tests.jl
@@ -1,8 +1,8 @@
 using Test
 using LinearAlgebra
 using AutoBZCore
-using AutoBZCore: IntegralProblem, solve, MixedParameters
 using AutoBZCore: PuncturedInterval, HyperCube, segments, endpoints
+using AutoBZCore: CubicLimits
 
 @testset "domains" begin
     # PuncturedInterval
@@ -25,19 +25,57 @@ using AutoBZCore: PuncturedInterval, HyperCube, segments, endpoints
 end
 
 @testset "quadrature" begin
-    # QuadratureFunction QuadGKJL AuxQuadGKJL ContQuadGKJL MeroQuadGKJL
     a = 0.0
     b = 2pi
     abstol=1e-5
-    p = 3.0
+    p=3.0
+    # QuadratureFunction QuadGKJL AuxQuadGKJL ContQuadGKJL MeroQuadGKJL
     for (f, ref) in (
         ((x,p) -> p*sin(x), 0.0),
         ((x,p) -> p*one(x), p*(b-a)),
         ((x,p) -> inv(p-cos(x)), (b-a)/sqrt(p^2-1)),
     )
-        prob = IntegralProblem(f, a, b, p)
+        prob = IntegralProblem(f, (a, b), p; abstol)
         for alg in (QuadratureFunction(), QuadGKJL(), AuxQuadGKJL(), ContQuadGKJL(), MeroQuadGKJL())
-            @test ref ≈ solve(prob, alg, abstol=abstol).u atol=abstol
+            sol = solve(prob, alg)
+            @test ref ≈ sol.value atol=abstol
+        end
+    end
+    @test @inferred(solve(IntegralProblem((x, p) -> exp(-x^2), (-Inf, Inf)), QuadGKJL())).value ≈ sqrt(pi)
+end
+
+@testset "commonproblem" begin
+    a = 1.0
+    b = 2pi
+    abstol=1e-5
+    p0=3.0
+    # QuadratureFunction QuadGKJL AuxQuadGKJL ContQuadGKJL MeroQuadGKJL
+    update! = (cache, x, p) -> cache.p = (x, p)
+    postsolve = (sol, x, p) -> sol.value
+    f = (x, (y, p)) -> p*(y + x)
+    subprob = IntegralProblem(f, (a, b), ((a+b)/2, p0); abstol)
+    integrand = CommonSolveIntegralFunction(subprob, QuadGKJL(), update!, postsolve)
+    prob = IntegralProblem(integrand, (a, b), p0; abstol)
+    for alg in (QuadratureFunction(), QuadGKJL(), HCubatureJL(), AuxQuadGKJL(), ContQuadGKJL(), MeroQuadGKJL())
+        cache = init(prob, alg)
+        for p in [3.0, 4.0]
+            ref = p*(b-a)*(b^2-a^2)
+            cache.p = p
+            sol = solve!(cache)
+            @test ref ≈ sol.value atol=abstol
+        end
+    end
+    f = (x, (y, p)) -> p*(sin(only(y))^2 + x)
+    subprob = IntegralProblem(f, (a, b), ([b/2], p0); abstol)
+    integrand = CommonSolveIntegralFunction(subprob, QuadGKJL(), update!, postsolve)
+    prob = IntegralProblem(integrand, AutoBZCore.Basis(b*I(1)), p0; abstol)
+    for alg in (MonkhorstPack(), AutoSymPTRJL(),)
+        cache = init(prob, alg)
+        for p in [3.0, 4.0]
+            ref = p*((b-a)+(b^2-a^2))*b/2
+            cache.p = p
+            sol = solve!(cache)
+            @test ref ≈ sol.value atol=abstol
         end
     end
 end
@@ -53,13 +91,13 @@ end
         ((x,p) -> p*one(eltype(x)), p*(b-a)^dim),
         ((x,p) -> prod(y -> inv(p-cos(y)), x), ((b-a)/sqrt(p^2-1))^dim),
     )
-        prob = IntegralProblem(f, fill(a, dim), fill(b, dim), p)
+        prob = IntegralProblem(f, (fill(a, dim), fill(b, dim)), p; abstol)
         for alg in (HCubatureJL(),)
-            @test ref ≈ solve(prob, alg, abstol=abstol).u atol=abstol
+            @test ref ≈ solve(prob, alg).value atol=abstol
         end
-        prob = IntegralProblem(f, Basis(b*I(dim)), p)
+        prob = IntegralProblem(f, AutoBZCore.Basis(b*I(dim)), p; abstol)
         for alg in (MonkhorstPack(), AutoSymPTRJL(),)
-            @test ref ≈ solve(prob, alg, abstol=abstol).u atol=abstol
+            @test ref ≈ solve(prob, alg).value atol=abstol
         end
     end
 end
@@ -75,18 +113,19 @@ end
         ((y,x,p) -> y .= p*one(only(x)), [p*(b-a)]),
         ((y,x,p) -> y .= inv(p-cos(only(x))), [(b-a)/sqrt(p^2-1)]),
     )
-        integrand = InplaceIntegrand(f, [0.0])
-        inplaceprob = IntegralProblem(integrand, a, b, p)
-        for alg in (QuadratureFunction(), QuadGKJL(), AuxQuadGKJL(), HCubatureJL(),)
-            @test ref ≈ solve(inplaceprob, alg, abstol=abstol).u atol=abstol
+        integrand = InplaceIntegralFunction(f, [0.0])
+        inplaceprob = IntegralProblem(integrand, (a, b), p; abstol)
+        for alg in (QuadGKJL(), QuadratureFunction(), QuadGKJL(), AuxQuadGKJL())
+            @test ref ≈ solve(inplaceprob, alg).value atol=abstol
         end
-        inplaceprob = IntegralProblem(integrand, Basis([b;;]), p)
+        inplaceprob = IntegralProblem(integrand, AutoBZCore.Basis([b;;]), p; abstol)
         for alg in (MonkhorstPack(), AutoSymPTRJL())
-            @test ref ≈ solve(inplaceprob, alg, abstol=abstol).u atol=abstol
+            @test ref ≈ solve(inplaceprob, alg).value atol=abstol
         end
     end
 end
 
+
 @testset "batch" begin
     # QuadratureFunction AuxQuadGKJL MonkhorstPack AutoSymPTRJL
     a = 0.0
@@ -98,37 +137,40 @@ end
         ((y,x,p) -> y .= p .* one.(only.(x)), p*(b-a)),
         ((y,x,p) -> y .= inv.(p .- cos.(only.(x))), (b-a)/sqrt(p^2-1)),
     )
-        integrand = BatchIntegrand(f, Float64)
-        batchprob = IntegralProblem(integrand, a, b, p)
-        for alg in (QuadratureFunction(), AuxQuadGKJL())
-            @test ref ≈ solve(batchprob, alg, abstol=abstol).u atol=abstol
+        integrand = InplaceBatchIntegralFunction(f, zeros(1))
+        batchprob = IntegralProblem(integrand, (a, b), p; abstol)
+        for alg in (QuadGKJL(), QuadratureFunction(), AuxQuadGKJL())
+            @test ref ≈ solve(batchprob, alg).value atol=abstol
         end
-        batchprob = IntegralProblem(integrand, Basis([b;;]), p)
+        batchprob = IntegralProblem(integrand, AutoBZCore.Basis([b;;]), p; abstol)
         for alg in (MonkhorstPack(), AutoSymPTRJL())
-            @test ref ≈ solve(batchprob, alg, abstol=abstol).u atol=abstol
+            @test ref ≈ solve(batchprob, alg).value atol=abstol
         end
     end
 end
-
 @testset "multi-algorithms" begin
     # NestedQuad
-    f(x, p) = 1.0 + p*sum(cos, x)
-    p = 7.0
+    f(x, p) = 1.0 + p*sum(abs2 ∘ cos, x)
     abstol=1e-3
-    for dim in 1:3, alg in (QuadratureFunction(), AuxQuadGKJL())
-        ref = (2pi)^dim
+    p0 = 0.0
+    for dim in 1:3, alg in (QuadratureFunction(), QuadGKJL(), AuxQuadGKJL())
         dom = CubicLimits(zeros(dim), 2pi*ones(dim))
-        prob = IntegralProblem(f, dom, p)
+        prob = IntegralProblem(f, dom, p0; abstol)
         ndalg = NestedQuad(alg)
-        @test ref ≈ solve(prob, ndalg, abstol=abstol).u atol=abstol
-        inplaceprob = IntegralProblem(InplaceIntegrand((y,x,p) -> y .= f(x,p), [0.0]), dom, p)
-        @test [ref] ≈ solve(inplaceprob, ndalg, abstol=abstol).u atol=abstol
-        batchprob = IntegralProblem(BatchIntegrand((y,x,p) -> y .= f.(x,Ref(p)), Float64), dom, p)
-        @test ref ≈ solve(batchprob, ndalg, abstol=abstol).u atol=abstol
-        nestedbatchprob = IntegralProblem(NestedBatchIntegrand(ntuple(n -> f, 3), Float64), dom, p)
-        @test ref ≈ solve(nestedbatchprob, ndalg, abstol=abstol).u atol=abstol
+        cache = init(prob, ndalg)
+        for p in [5.0, 7.0]
+            cache.p = p
+            ref = (2pi)^dim + dim*p*pi*(2pi)^(dim-1)
+            @test ref ≈ solve!(cache).value atol=abstol
+            # TODO implement CommonSolveInplaceIntegralFunction
+            inplaceprob = IntegralProblem(InplaceIntegralFunction((y,x,p) -> y .= f(x,p), [0.0]), dom, p)
+            @test_broken [ref] ≈ solve(inplaceprob, ndalg, abstol=abstol).value atol=abstol
+            # TODO implement CommonSolveInplaceBatchIntegralFunction
+            batchprob = IntegralProblem(InplaceBatchIntegralFunction((y,x,p) -> y .= f.(x,Ref(p)), zeros(Float64, 1)), dom, p)
+            @test_broken ref ≈ solve(batchprob, ndalg, abstol=abstol).value atol=abstol
+        end
     end
-
+    #=
     # AbsoluteEstimate
     est_alg = QuadratureFunction()
     abs_alg = QuadGKJL()
@@ -137,7 +179,7 @@ end
     f2(x, p) = inv(complex(p...) - cos(x))
     prob = IntegralProblem(f2, 0.0, 2pi, (0.5, 1e-3))
     abstol = 1e-5; reltol=1e-5
-    @test solve(prob, alg, reltol=reltol).u ≈ solve(prob, ref_alg, abstol=abstol).u atol=abstol
+    @test solve(prob, alg, reltol=reltol).value ≈ solve(prob, ref_alg, abstol=abstol).value atol=abstol
 
     # EvalCounter
     for prob in (
@@ -156,4 +198,5 @@ end
             @test solve(prob, EvalCounter(alg)).numevals == numevals
         end
     end
+    =#
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index d4318b4..538b731 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,7 +5,6 @@ include("utils.jl")
 @testset "interface" include("interface_tests.jl")
 @testset "brillouin" include("brillouin.jl")
 @testset "fourier" include("fourier.jl")
-@testset "HDF5Ext" include("hdf5ext.jl")
 @testset "SymmetryReduceBZExt" include("test_ibz.jl")
 # @testset "AtomsBaseExt" include("atomsbaseext.jl")
 # @testset "WannierIOExt" include("wannierioext.jl")
diff --git a/test/test_ibz.jl b/test/test_ibz.jl
index e8ce4f1..4661e15 100644
--- a/test/test_ibz.jl
+++ b/test/test_ibz.jl
@@ -5,6 +5,7 @@ import SymmetryReduceBZ.Symmetry: calc_bz, calc_ibz
 import SymmetryReduceBZ.Utilities: volume, vertices, get_uniquefacets
 using Polyhedra: Polyhedron
 using AutoBZCore
+using IteratedIntegration: nested_quad
 using LinearAlgebra
 using Test