diff --git a/src/MatrixFields/matrix_multiplication.jl b/src/MatrixFields/matrix_multiplication.jl index 19d1d6962b..9e66915725 100644 --- a/src/MatrixFields/matrix_multiplication.jl +++ b/src/MatrixFields/matrix_multiplication.jl @@ -347,7 +347,7 @@ boundary_modified_ud(::BottomRightMatrixCorner, ud, column_space, i) = # matrix field broadcast expressions to take roughly 3 or 4 times longer to # evaluate, but this is less significant than the decrease in compilation time. # matrix-matrix multiplication -function multiply_matrix_at_index( +Base.@propagate_inbounds function multiply_matrix_at_index( loc, space, idx, @@ -385,20 +385,33 @@ function multiply_matrix_at_index( # of as a map from boundary_modified_ld1 to boundary_modified_ud1. For # simplicity, use zero padding for rows that are outside the matrix. # Wrap the rows in a BandMatrixRow so that they can be easily indexed. - matrix2_rows = unrolled_map((ld1:ud1...,)) do d + N_mr = ud1 - ld1 + 1 + nt_mr = ntuple(ξ -> ld1 + ξ - 1, Val(N_mr)) + row_type = eltype(matrix2) + matrix2_rows = unrolled_map(nt_mr) do d # TODO: Use @propagate_inbounds_meta instead of @inline_meta. Base.@_inline_meta - if isnothing(bc) || boundary_modified_ld1 <= d <= boundary_modified_ud1 - @inbounds Operators.getidx(space, matrix2, loc, idx + d, hidx) + if isnothing(bc) || + boundary_modified_ld1 <= + d <= + boundary_modified_ud1 + @inbounds Operators.getidx( + space, + matrix2, + loc, + idx + d, + hidx, + )::row_type else - zero(eltype(matrix2)) # This row is outside the matrix. + zero(row_type) # This row is outside the matrix. end - end + end::NTuple{N_mr, row_type} matrix2_rows_wrapper = BandMatrixRow{ld1}(matrix2_rows...) # Precompute the zero value to avoid inference issues caused by passing # prod_type into the function closure below. - zero_value = rzero(eltype(prod_type)) + prod_entry_type = eltype(prod_type) + zero_value = rzero(prod_entry_type)::prod_entry_type # Compute the entries of the product matrix row. To avoid inference # issues at boundary points, this is implemented as a padded map from @@ -406,18 +419,35 @@ function multiply_matrix_at_index( # to boundary_modified_prod_ud. For simplicity, use zero padding for # entries that are outside the matrix. Wrap the entries in a # BandMatrixRow before returning them. - prod_entries = map((prod_ld:prod_ud...,)) do prod_d + N_pe = prod_ud - prod_ld + 1 + nt_pe = ntuple(ζ -> prod_ld + ζ - 1, Val(N_pe)) + prod_entries = unrolled_map(nt_pe) do prod_d # TODO: Use @propagate_inbounds_meta instead of @inline_meta. Base.@_inline_meta if isnothing(bc) || - boundary_modified_prod_ld <= prod_d <= boundary_modified_prod_ud - prod_entry = zero_value - min_d = max(boundary_modified_ld1, prod_d - ud2) - max_d = min(boundary_modified_ud1, prod_d - ld2) + boundary_modified_prod_ld <= + prod_d <= + boundary_modified_prod_ud + prod_entry = + zero_value::prod_entry_type + min_d = max( + boundary_modified_ld1, + prod_d - ud2, + ) + max_d = min( + boundary_modified_ud1, + prod_d - ld2, + ) @inbounds for d in min_d:max_d value1 = matrix1_row[d] - value2 = matrix2_rows_wrapper[d][prod_d - d] - value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) + value2 = + matrix2_rows_wrapper[d][prod_d - d] + value2_lg = + Geometry.LocalGeometry( + space, + idx + d, + hidx, + ) prod_entry = radd( prod_entry, rmul_with_projection(value1, value2, value2_lg), @@ -427,11 +457,11 @@ function multiply_matrix_at_index( else zero_value # This entry is outside the matrix. end - end + end::NTuple{N_pe, prod_entry_type} return BandMatrixRow{prod_ld}(prod_entries...) end # matrix-vector multiplication -function multiply_matrix_at_index( +Base.@propagate_inbounds function multiply_matrix_at_index( loc, space, idx, @@ -455,7 +485,7 @@ function multiply_matrix_at_index( matrix1_row = @inbounds Operators.getidx(space, matrix1, loc, idx, hidx) vector = arg - prod_value = rzero(prod_type) + prod_value = rzero(prod_type)::prod_type @inbounds for d in boundary_modified_ld1:boundary_modified_ud1 value1 = matrix1_row[d] value2 = Operators.getidx(space, vector, loc, idx + d, hidx) diff --git a/src/Operators/operator2stencil.jl b/src/Operators/operator2stencil.jl index 111a7e4f25..6afe715d5b 100644 --- a/src/Operators/operator2stencil.jl +++ b/src/Operators/operator2stencil.jl @@ -98,8 +98,7 @@ right_interior_idx( args..., ) = right_interior_idx(space, op.op, bc, args...) -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:Union{InterpolateF2C, InterpolateC2F}}, loc, space, @@ -123,8 +122,7 @@ function stencil_left_boundary( T = eltype(arg) return StencilCoefs{-half, half}((zero(T), zero(T))) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:InterpolateC2F}, bc::SetValue, loc, @@ -136,8 +134,8 @@ function stencil_right_boundary( T = eltype(arg) return StencilCoefs{-half, half}((zero(T), zero(T))) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:InterpolateC2F}, bc::Union{SetGradient, Extrapolate}, loc, @@ -149,8 +147,8 @@ function stencil_left_boundary( val⁺ = getidx(space, arg, loc, idx + half, hidx) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:InterpolateC2F}, bc::Union{SetGradient, Extrapolate}, loc, @@ -164,8 +162,8 @@ function stencil_right_boundary( end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:Union{LeftBiasedF2C, LeftBiasedC2F}}, loc, space, @@ -187,8 +185,8 @@ stencil_left_boundary( ) = StencilCoefs{-half, -half}((zero(eltype(arg)),)) -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:Union{RightBiasedF2C, RightBiasedC2F}}, loc, space, @@ -210,8 +208,8 @@ stencil_right_boundary( ) = StencilCoefs{half, half}((zero(eltype(arg)),)) -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:AdvectionC2C}, loc, space, @@ -235,8 +233,8 @@ function stencil_interior( val⁺ = RecursiveApply.rdiv(w³⁺ ⊠ (θ⁺ ⊟ θ), 2) return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:AdvectionC2C}, bc::SetValue, loc, @@ -261,8 +259,8 @@ function stencil_left_boundary( val⁺ = RecursiveApply.rdiv(w³⁺ ⊠ (θ⁺ ⊟ θ), 2) return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:AdvectionC2C}, bc::SetValue, loc, @@ -287,8 +285,8 @@ function stencil_right_boundary( val⁺ = w³⁺ ⊠ (θ⁺ ⊟ θ) return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:AdvectionC2C}, bc::Extrapolate, loc, @@ -307,8 +305,8 @@ function stencil_left_boundary( val⁺ = w³⁺ ⊠ (θ⁺ ⊟ θ) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:AdvectionC2C}, bc::Extrapolate, loc, @@ -329,8 +327,8 @@ function stencil_right_boundary( end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:Union{FluxCorrectionC2C, FluxCorrectionF2F}}, loc, space, @@ -354,8 +352,8 @@ function stencil_interior( val⁺ = abs(w³⁺) ⊠ (θ⁺ ⊟ θ) return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:Union{FluxCorrectionC2C, FluxCorrectionF2F}}, bc::Extrapolate, loc, @@ -374,8 +372,8 @@ function stencil_left_boundary( val⁺ = abs(w³⁺) ⊠ (θ⁺ ⊟ θ) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:Union{FluxCorrectionC2C, FluxCorrectionF2F}}, bc::Extrapolate, loc, @@ -396,8 +394,8 @@ function stencil_right_boundary( end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:Union{GradientF2C, GradientC2F}}, loc, space, @@ -412,8 +410,8 @@ function stencil_interior( Geometry.Covariant3Vector(1) ⊗ getidx(space, arg, loc, idx + half, hidx) return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:GradientF2C}, bc::SetValue, loc, @@ -426,8 +424,8 @@ function stencil_left_boundary( Geometry.Covariant3Vector(1) ⊗ getidx(space, arg, loc, idx + half, hidx) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:GradientF2C}, bc::SetValue, loc, @@ -459,8 +457,8 @@ stencil_right_boundary( hidx, arg, ) = extrapolation_increases_bandwidth_error(GradientF2C) -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:GradientC2F}, bc::SetValue, loc, @@ -473,8 +471,8 @@ function stencil_left_boundary( Geometry.Covariant3Vector(2) ⊗ getidx(space, arg, loc, idx + half, hidx) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:GradientC2F}, bc::SetValue, loc, @@ -514,8 +512,8 @@ function stencil_right_boundary( end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:Union{DivergenceF2C, DivergenceC2F}}, loc, space, @@ -536,8 +534,8 @@ function stencil_interior( val⁺ = Ju³⁺ ⊠ invJ return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:DivergenceF2C}, bc::SetValue, loc, @@ -554,8 +552,8 @@ function stencil_left_boundary( val⁺ = Ju³⁺ ⊠ invJ return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:DivergenceF2C}, bc::SetValue, loc, @@ -590,8 +588,8 @@ stencil_right_boundary( hidx, arg, ) = extrapolation_increases_bandwidth_error(DivergenceF2C) -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:DivergenceC2F}, bc::SetValue, loc, @@ -608,8 +606,8 @@ function stencil_left_boundary( val⁺ = Ju³⁺ ⊠ (2 * invJ) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:DivergenceC2F}, bc::SetValue, loc, @@ -661,8 +659,8 @@ fd3_curl⁺(::Geometry.Covariant3Vector, invJ) = fd3_curl⁺(u::Geometry.Covariant12Vector, invJ) = Geometry.Contravariant12Vector(-u.u₂ * invJ, u.u₁ * invJ) -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior( + +Base.@propagate_inbounds function stencil_interior( ::Operator2Stencil{<:CurlC2F}, loc, space, @@ -677,8 +675,8 @@ function stencil_interior( val⁺ = fd3_curl⁺(u₊, invJ) return StencilCoefs{-half, half}((val⁻, val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( + +Base.@propagate_inbounds function stencil_left_boundary( ::Operator2Stencil{<:CurlC2F}, bc::SetValue, loc, @@ -692,8 +690,8 @@ function stencil_left_boundary( val⁺ = fd3_curl⁺(u₊, 2 * invJ) return StencilCoefs{-half, half}((zero(val⁺), val⁺)) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( + +Base.@propagate_inbounds function stencil_right_boundary( ::Operator2Stencil{<:CurlC2F}, bc::SetValue, loc, diff --git a/src/Operators/pointwisestencil.jl b/src/Operators/pointwisestencil.jl index 4060e5e335..829dd2f768 100644 --- a/src/Operators/pointwisestencil.jl +++ b/src/Operators/pointwisestencil.jl @@ -435,11 +435,19 @@ return_eltype(::ApplyStencil, stencil, arg) = eltype(eltype(stencil)) return_space(::ApplyStencil, stencil_space, arg_space) = stencil_space -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function apply_stencil_at_idx(i_vals, stencil, arg, loc, space, idx, hidx) +Base.@propagate_inbounds function apply_stencil_at_idx( + i_vals, + stencil, + arg, + loc, + space, + idx, + hidx, +) coefs = getidx(space, stencil, loc, idx, hidx) lbw = bandwidths(eltype(stencil))[1] - val = zero(eltype(eltype(stencil))) + val_type = eltype(eltype(stencil)) + val = zero(val_type)::val_type @inbounds for j in 1:length(i_vals) i = i_vals[j] val = val ⊞ coefs[i - lbw + 1] ⊠ getidx(space, arg, loc, idx + i, hidx) @@ -447,15 +455,21 @@ function apply_stencil_at_idx(i_vals, stencil, arg, loc, space, idx, hidx) return val end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_interior(::ApplyStencil, loc, space, idx, hidx, stencil, arg) +Base.@propagate_inbounds function stencil_interior( + ::ApplyStencil, + loc, + space, + idx, + hidx, + stencil, + arg, +) lbw, ubw = bandwidths(eltype(stencil)) i_vals = lbw:ubw return apply_stencil_at_idx(i_vals, stencil, arg, loc, space, idx, hidx) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_left_boundary( +Base.@propagate_inbounds function stencil_left_boundary( ::ApplyStencil, ::LeftStencilBoundary, loc, @@ -471,8 +485,7 @@ function stencil_left_boundary( return apply_stencil_at_idx(i_vals, stencil, arg, loc, space, idx, hidx) end -# TODO: find out why using Base.@propagate_inbounds blows up compilation time -function stencil_right_boundary( +Base.@propagate_inbounds function stencil_right_boundary( ::ApplyStencil, ::RightStencilBoundary, loc, @@ -537,7 +550,7 @@ function compose_stencils_at_idx( ntup = ( ntuple(Val(n)) do j Base.@_inline_meta - val = zero(zeroT) + val = zero(zeroT)::zeroT for i in get_range(ir_type, space, stencil1, stencil2, idx, j) val = val ⊞