Skip to content

lazied_dicg #532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
08e2412
added is_inface_feasible function
WenjieXiao-2022 Nov 15, 2024
f72e008
filtered in-face vertex in precomputed set
WenjieXiao-2022 Nov 15, 2024
67172a2
format
WenjieXiao-2022 Nov 15, 2024
9e66dbb
added lazification for blended
WenjieXiao-2022 Nov 15, 2024
bad0595
lazified blended DICG
WenjieXiao-2022 Nov 15, 2024
c48c319
formatted
WenjieXiao-2022 Nov 15, 2024
da58370
tols changed for inface check
WenjieXiao-2022 Nov 15, 2024
8192bb9
removed tabs
WenjieXiao-2022 Nov 28, 2024
720531b
renamed arguments
WenjieXiao-2022 Nov 28, 2024
8ed9db6
removed duplicate code in lazy_dicg_step
WenjieXiao-2022 Nov 28, 2024
ab3b818
format
WenjieXiao-2022 Nov 28, 2024
b2d3cf0
added tol for gamma_max comput and inface away vertex
WenjieXiao-2022 Dec 4, 2024
888af8a
rename
WenjieXiao-2022 Dec 4, 2024
a3d0768
vectorize x
WenjieXiao-2022 Dec 4, 2024
2e4aaaa
Merge branch 'ZIB-IOL:master' into dicg
WenjieXiao-2022 Dec 4, 2024
c4361d3
in-face checked for pre_computed_set
WenjieXiao-2022 Dec 4, 2024
d309e92
Merge branch 'ZIB-IOL:master' into dicg
WenjieXiao-2022 Dec 13, 2024
282566b
removed the second in-face function and formatted
WenjieXiao-2022 Dec 17, 2024
2a2f078
formatted
WenjieXiao-2022 Dec 17, 2024
fa891ca
check if extra vertex storage empty for lazification
WenjieXiao-2022 Jan 2, 2025
4cd7fc4
move computed dirction
WenjieXiao-2022 Jan 3, 2025
1111ae7
formatted
WenjieXiao-2022 Jan 3, 2025
85547df
added inface feasibility check for ZeroOne cube
WenjieXiao-2022 Jan 3, 2025
0c05deb
added inface feasibility check for the Birkhoff Polytope
WenjieXiao-2022 Jan 3, 2025
10af11b
added inface feasibility check functions
WenjieXiao-2022 Jan 3, 2025
cf7286a
typo
WenjieXiao-2022 Jan 4, 2025
ea9d743
added lazification functions
WenjieXiao-2022 Jan 4, 2025
498da61
Update dicg.jl
WenjieXiao-2022 Feb 25, 2025
7bdd294
updated strong lazification parameter
WenjieXiao-2022 Feb 25, 2025
7b98755
Update dicg.jl
WenjieXiao-2022 Feb 25, 2025
d12c92e
resolved conflicts
WenjieXiao-2022 Feb 25, 2025
74eaa23
renamed
WenjieXiao-2022 Feb 25, 2025
16e013e
Merge branch 'master' into dicg
WenjieXiao-2022 Feb 25, 2025
a67e820
added some note
WenjieXiao-2022 Feb 25, 2025
d2f1bbc
changed to !=
WenjieXiao-2022 Feb 25, 2025
c96021b
removed indent
WenjieXiao-2022 Feb 25, 2025
0fa245c
removed indent
WenjieXiao-2022 Feb 25, 2025
deec99d
Update dicg.jl
WenjieXiao-2022 Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 85 additions & 27 deletions src/dicg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,15 @@ function blended_decomposition_invariant_conditional_gradient(
phi = primal
gamma = one(phi)

if lazy
if extra_vertex_storage === nothing
v = compute_extreme_point(lmo, gradient, lazy = lazy)
pre_computed_set = [v]
else
pre_computed_set = extra_vertex_storage
end
end

if linesearch_workspace === nothing
linesearch_workspace = build_linesearch_workspace(line_search, x, gradient)
end
Expand Down Expand Up @@ -393,7 +402,17 @@ function blended_decomposition_invariant_conditional_gradient(
end

if lazy
error("not implemented yet")
d, v, v_index, a, away_index, phi, step_type =
lazy_dicg_step(
x,
gradient,
lmo,
pre_computed_set,
phi,
epsilon,
d;
variant = "blended",
)
else # non-lazy, call the simple and modified
a = compute_inface_extreme_point(lmo, NegatingArray(gradient), x; lazy=lazy)
v_inface = compute_inface_extreme_point(lmo, gradient, x; lazy=lazy)
Expand All @@ -412,6 +431,11 @@ function blended_decomposition_invariant_conditional_gradient(
gamma_max = one(phi)
end
end
if step_type == ST_REGULAR
gamma_max = one(phi)
else
gamma_max = dicg_maximum_step(lmo, d, x)
end
gamma = perform_line_search(
line_search,
t,
Expand Down Expand Up @@ -491,52 +515,86 @@ function lazy_dicg_step(
phi,
epsilon,
d;
use_extra_vertex_storage=false,
extra_vertex_storage=nothing,
lazy_tolerance=2.0,
memory_mode::MemoryEmphasis=InplaceEmphasis(),
variant = "standard",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you only have two variants it might be cleaner to use a boolean flag here. For example, using_blended which would be true if blended_dicg was used and false otherwise.

strong_lazification = false,
use_extra_vertex_storage = false,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a specific function for the lazy version of dicg, we always use the extra vertex storage, no? In which case, the default value for use_extra_vertex_storage should be true.

extra_vertex_storage = nothing,
lazy_tolerance = 2.0,
memory_mode::MemoryEmphasis = InplaceEmphasis(),
)
v_local, v_local_loc, val, a_local, a_local_loc, valM =
pre_computed_set_argminmax(pre_computed_set, gradient)
step_type = ST_REGULAR
pre_computed_set_argminmax(lmo, pre_computed_set, gradient, x; strong_lazification = strong_lazification)
step_type = ST_PAIRWISE
away_index = nothing
fw_index = nothing
grad_dot_x = fast_dot(x, gradient)
grad_dot_a_local = valM

# Do lazy pairwise step
grad_dot_lazy_fw_vertex = val

if grad_dot_a_local - grad_dot_lazy_fw_vertex >= phi / lazy_tolerance &&
grad_dot_a_local - grad_dot_lazy_fw_vertex >= epsilon
if strong_lazification
a_taken = a_local
grad_dot_a_taken = grad_dot_a_local
else
a_taken = compute_inface_extreme_point(lmo, NegatingArray(gradient), x)
grad_dot_a_taken = fast_dot(gradient, a_taken)
end
# Do lazy pairwise step
if grad_dot_a_taken - grad_dot_lazy_fw_vertex >= phi / lazy_tolerance &&
grad_dot_a_taken - grad_dot_lazy_fw_vertex >= epsilon
step_type = ST_LAZY
v = v_local
a = a_local
a = a_taken
d = muladd_memory_mode(memory_mode, d, a, v)
fw_index = v_local_loc
else
v = compute_extreme_point(lmo, gradient)
grad_dot_v = fast_dot(gradient, v)
# Do lazy inface_point
if grad_dot_a_local - grad_dot_v >= phi / lazy_tolerance &&
grad_dot_a_local - grad_dot_v >= epsilon
step_type = ST_LAZY
a = a_local
away_index = a_local_loc
dual_gap = grad_dot_x - grad_dot_v
phi = dual_gap

if variant == "standard"
v_taken = v
grad_dot_v_taken = grad_dot_v
else
a = compute_inface_extreme_point(lmo, NegatingArray(gradient), x)
v_taken = compute_inface_extreme_point(lmo, gradient, x;)
grad_dot_v_taken = fast_dot(gradient, v_taken)

end

# Real dual gap promises enough progress.
grad_dot_fw_vertex = fast_dot(v, gradient)
dual_gap = grad_dot_x - grad_dot_fw_vertex

# Do lazy inface_point
if dual_gap >= phi / lazy_tolerance
d = muladd_memory_mode(memory_mode, d, a, v)
#Lower our expectation for progress.
if (grad_dot_a_taken - grad_dot_v_taken >= phi / lazy_tolerance &&
grad_dot_a_taken - grad_dot_v_taken >= epsilon) || !strong_lazification
if !strong_lazification
a = a_taken
d = muladd_memory_mode(memory_mode, d, a, v_taken)
step_type = ST_PAIRWISE
away_index = -1
else
a = a_taken
d = muladd_memory_mode(memory_mode, d, a, v_taken)
step_type = ST_PAIRWISE
away_index = a_local_loc
end
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only thing that is different is away_index, isn't it? That can be done cleaner as

away_index = strong_lazification ? a_local_loc : -1

else
a = compute_inface_extreme_point(lmo, NegatingArray(gradient), x)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You seem to be calling this twice! For Blended in line 559 and for non strong lazification in line 538.

if variant == "standard"
d = muladd_memory_mode(memory_mode, d, a, v)
else
grad_dot_a = fast_dot(gradient, a)
inface_gap = grad_dot_a - grad_dot_v_taken
if inface_gap >= phi / lazy_tolerance
step_type = ST_PAIRWISE
d = muladd_memory_mode(memory_mode, d, a, v)
else # global FW step
step_type = ST_REGULAR
d = muladd_memory_mode(memory_mode, d, x, v)
end
end
end
else
d = muladd_memory_mode(memory_mode, d, a, v)
step_type = ST_DUALSTEP
phi = min(dual_gap, phi / 2.0)
a = a_taken
end
end
return d, v, fw_index, a, away_index, phi, step_type
Expand Down
56 changes: 43 additions & 13 deletions src/moi_oracle.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,14 @@ function compute_inface_extreme_point(lmo::MathOptLMO{OT}, direction, x; solve_d
end

# function barrier for performance
function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F}, ::Type{S}, valvar) where {OT,F,S}
function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F}, ::Type{S}, valvar;atol=1e-6) where {OT,F,S}
const_list = MOI.get(lmo.o, MOI.ListOfConstraintIndices{F,S}())
for c_idx in const_list
func = MOI.get(lmo.o, MOI.ConstraintFunction(), c_idx)
val = MOIU.eval_variables(valvar, func)
set = MOI.get(lmo.o, MOI.ConstraintSet(), c_idx)
if S <: MOI.GreaterThan
if set.lowerval
if isapprox(set.lower, val; atol = atol)
MOI.delete(lmo.o, c_idx)
if F <: MOI.VariableIndex
check_cidx = MOI.ConstraintIndex{F,MOI.LessThan{Float64}}(c_idx.value)
Expand All @@ -191,7 +191,7 @@ function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F},
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.lower))
end
elseif S <: MOI.LessThan
if set.upperval
if isapprox(set.upper, val; atol = atol)
MOI.delete(lmo.o, c_idx)
if F <: MOI.VariableIndex
check_cidx = MOI.ConstraintIndex{F,MOI.GreaterThan{Float64}}(c_idx.value)
Expand All @@ -213,10 +213,10 @@ function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F},
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.upper))
end
elseif S <: MOI.Interval
if set.upperval
if isapprox(set.upper, val; atol = atol)
MOI.delete(lmo.o, c_idx)
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.upper))
elseif set.lowerval
elseif isapprox(set.lower, val; atol = atol)
MOI.delete(lmo.o, c_idx)
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.lower))
end
Expand All @@ -232,17 +232,17 @@ function compute_inface_extreme_point(
kwargs...,
) where {OT, T <: Real}
n = size(direction, 1)
a = compute_inface_extreme_point(lmo, vec(direction), x)
a = compute_inface_extreme_point(lmo, vec(direction), vec(x))
return reshape(a, n, n)
end

# Fast way to compute gamma_max.
# Check every constraint and compute the corresponding gamma_upper_bound.
function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x) where {OT}
function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x;tol=1e-6) where {OT}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to have a doc string here, i.e.

"""
Description of the function.
"""

This would also be good for the compute_inface_extreme_point as well as is_inface_feasible function.

gamma_less_than = Float64[]
for (F, S) in MOI.get(lmo.o, MOI.ListOfConstraintTypesPresent())
valvar(f) = x[f.value]
valvar_(f) = direction[f.value]
valvar_d(f) = direction[f.value]
const_list = MOI.get(lmo.o, MOI.ListOfConstraintIndices{F,S}())

# Constraints need to satisfy g(x+γ*d) ∈ ConstraintSet.
Expand All @@ -252,28 +252,28 @@ function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x) where {OT}
# Compute g(x).
val = MOIU.eval_variables(valvar, func)
# Compute g(d).
val_d = MOIU.eval_variables(valvar_, func)
val_d = MOIU.eval_variables(valvar_d, func)
set = MOI.get(lmo.o, MOI.ConstraintSet(), c_idx)
if S <: MOI.Interval
if val_d < 0.0
if val_d < -tol
upper_bound_gamma = (val - set.upper) / val_d
push!(gamma_less_than, upper_bound_gamma)
end
if val_d > 0.0
if val_d > tol
upper_bound_gamma = (val - set.lower) / val_d
push!(gamma_less_than, upper_bound_gamma)
end
end

if S <: MOI.LessThan
if val_d < 0.0
if val_d < -tol
upper_bound_gamma = (val - set.upper) / val_d
push!(gamma_less_than, upper_bound_gamma)
end
end

if S <: MOI.GreaterThan
if val_d > 0.0
if val_d > tol
upper_bound_gamma = (val - set.lower) / val_d
push!(gamma_less_than, upper_bound_gamma)
end
Expand All @@ -292,6 +292,36 @@ function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x) where {OT}
end
end

function is_inface_feasible(lmo::MathOptLMO{OT}, a, x;) where {OT}
variables = MOI.get(lmo.o, MOI.ListOfVariableIndices())
valvar(f) = x[f.value]
valvar_away(f) = a[f.value]
for (F, S) in MOI.get(lmo.o, MOI.ListOfConstraintTypesPresent())
const_list = MOI.get(lmo.o, MOI.ListOfConstraintIndices{F, S}())
for c_idx in const_list
func = MOI.get(lmo.o, MOI.ConstraintFunction(), c_idx)
val = MOIU.eval_variables(valvar, func)
val_away = MOIU.eval_variables(valvar_away, func)
set = MOI.get(lmo.o, MOI.ConstraintSet(), c_idx)
if S <: MOI.GreaterThan || S <: MOI.Interval
if isapprox(set.lower, val; atol = 1e-15, rtol = 1e-5)
if !isapprox(set.lower, val_away; atol = 1e-15, rtol = 1e-5)
return false
end
end
end
if S <: MOI.LessThan || S <: MOI.Interval
if isapprox(set.upper, val; atol = 1e-15, rtol = sqrt(eps()))
if !isapprox(set.upper, val_; atol = 1e-15, rtol = 1e-5)
return false
end
end
end
end
end
return true
end

function Base.copy(lmo::MathOptLMO{OT}; ensure_identity=true) where {OT}
opt = OT() # creates the empty optimizer
index_map = MOI.copy_to(opt, lmo.o)
Expand Down
46 changes: 24 additions & 22 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -382,28 +382,30 @@ Base.length(storage::DeletedVertexStorage) = length(storage.storage)
Computes the linear minimizer in the direction on the precomputed_set.
Precomputed_set stores the vertices computed as extreme points v in each iteration.
"""
function pre_computed_set_argminmax(pre_computed_set, direction)
val = convert(eltype(direction), Inf)
valM = convert(eltype(direction), -Inf)
idx = -1
idxM = -1
for i in eachindex(pre_computed_set)
temp_val = fast_dot(pre_computed_set[i], direction)
if temp_val < val
val = temp_val
idx = i
end
if valM < temp_val
valM = temp_val
idxM = i
end
end
if idx == -1 || idxM == -1
error("Infinite minimum $val or maximum $valM in the precomputed set. Does the gradient contain invalid (NaN / Inf) entries?")
end
v_local = pre_computed_set[idx]
a_local = pre_computed_set[idxM]
return (v_local, idx, val, a_local, idxM, valM)
function pre_computed_set_argminmax(lmo, pre_computed_set, direction, x; strong_lazification = false)
val = convert(eltype(direction), Inf)
valM = convert(eltype(direction), -Inf)
idx = -1
idxM = -1
for i in eachindex(pre_computed_set)
temp_val = fast_dot(pre_computed_set[i], direction)
if temp_val < val
val = temp_val
idx = i
end
if strong_lazification
if is_inface_feasible(lmo, pre_computed_set[i], x) && temp_val > valM
valM = temp_val
idxM = i
end
end
end
if idx == -1
error("Infinite minimum $val in the precomputed set. Does the gradient contain invalid (NaN / Inf) entries?")
end
v_local = pre_computed_set[idx]
a_local = idxM != -1 ? pre_computed_set[idxM] : nothing
return (v_local, idx, val, a_local, idxM, valM)
end

"""
Expand Down
Loading