-
Notifications
You must be signed in to change notification settings - Fork 22
lazied_dicg #532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
lazied_dicg #532
Changes from 17 commits
08e2412
f72e008
67172a2
9e66dbb
bad0595
c48c319
da58370
8192bb9
720531b
8ed9db6
ab3b818
b2d3cf0
888af8a
a3d0768
2e4aaaa
c4361d3
d309e92
282566b
2a2f078
fa891ca
4cd7fc4
1111ae7
85547df
0c05deb
10af11b
cf7286a
ea9d743
498da61
7bdd294
7b98755
d12c92e
74eaa23
16e013e
a67e820
d2f1bbc
c96021b
0fa245c
deec99d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -361,6 +361,15 @@ function blended_decomposition_invariant_conditional_gradient( | |
phi = primal | ||
gamma = one(phi) | ||
|
||
if lazy | ||
if extra_vertex_storage === nothing | ||
v = compute_extreme_point(lmo, gradient, lazy = lazy) | ||
pre_computed_set = [v] | ||
else | ||
pre_computed_set = extra_vertex_storage | ||
end | ||
end | ||
|
||
if linesearch_workspace === nothing | ||
linesearch_workspace = build_linesearch_workspace(line_search, x, gradient) | ||
end | ||
|
@@ -393,7 +402,17 @@ function blended_decomposition_invariant_conditional_gradient( | |
end | ||
|
||
if lazy | ||
error("not implemented yet") | ||
d, v, v_index, a, away_index, phi, step_type = | ||
lazy_dicg_step( | ||
x, | ||
gradient, | ||
lmo, | ||
pre_computed_set, | ||
phi, | ||
epsilon, | ||
d; | ||
variant = "blended", | ||
) | ||
else # non-lazy, call the simple and modified | ||
a = compute_inface_extreme_point(lmo, NegatingArray(gradient), x; lazy=lazy) | ||
v_inface = compute_inface_extreme_point(lmo, gradient, x; lazy=lazy) | ||
|
@@ -412,6 +431,11 @@ function blended_decomposition_invariant_conditional_gradient( | |
gamma_max = one(phi) | ||
end | ||
end | ||
if step_type == ST_REGULAR | ||
gamma_max = one(phi) | ||
else | ||
gamma_max = dicg_maximum_step(lmo, d, x) | ||
end | ||
gamma = perform_line_search( | ||
line_search, | ||
t, | ||
|
@@ -491,52 +515,86 @@ function lazy_dicg_step( | |
phi, | ||
epsilon, | ||
d; | ||
use_extra_vertex_storage=false, | ||
extra_vertex_storage=nothing, | ||
lazy_tolerance=2.0, | ||
memory_mode::MemoryEmphasis=InplaceEmphasis(), | ||
variant = "standard", | ||
strong_lazification = false, | ||
use_extra_vertex_storage = false, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is a specific function for the lazy version of dicg, we always use the extra vertex storage, no? In which case, the default value for |
||
extra_vertex_storage = nothing, | ||
lazy_tolerance = 2.0, | ||
memory_mode::MemoryEmphasis = InplaceEmphasis(), | ||
) | ||
v_local, v_local_loc, val, a_local, a_local_loc, valM = | ||
pre_computed_set_argminmax(pre_computed_set, gradient) | ||
step_type = ST_REGULAR | ||
pre_computed_set_argminmax(lmo, pre_computed_set, gradient, x; strong_lazification = strong_lazification) | ||
step_type = ST_PAIRWISE | ||
away_index = nothing | ||
fw_index = nothing | ||
grad_dot_x = fast_dot(x, gradient) | ||
grad_dot_a_local = valM | ||
|
||
# Do lazy pairwise step | ||
grad_dot_lazy_fw_vertex = val | ||
|
||
if grad_dot_a_local - grad_dot_lazy_fw_vertex >= phi / lazy_tolerance && | ||
grad_dot_a_local - grad_dot_lazy_fw_vertex >= epsilon | ||
if strong_lazification | ||
a_taken = a_local | ||
grad_dot_a_taken = grad_dot_a_local | ||
else | ||
a_taken = compute_inface_extreme_point(lmo, NegatingArray(gradient), x) | ||
grad_dot_a_taken = fast_dot(gradient, a_taken) | ||
end | ||
# Do lazy pairwise step | ||
if grad_dot_a_taken - grad_dot_lazy_fw_vertex >= phi / lazy_tolerance && | ||
grad_dot_a_taken - grad_dot_lazy_fw_vertex >= epsilon | ||
step_type = ST_LAZY | ||
v = v_local | ||
a = a_local | ||
a = a_taken | ||
d = muladd_memory_mode(memory_mode, d, a, v) | ||
fw_index = v_local_loc | ||
else | ||
v = compute_extreme_point(lmo, gradient) | ||
grad_dot_v = fast_dot(gradient, v) | ||
# Do lazy inface_point | ||
if grad_dot_a_local - grad_dot_v >= phi / lazy_tolerance && | ||
grad_dot_a_local - grad_dot_v >= epsilon | ||
step_type = ST_LAZY | ||
a = a_local | ||
away_index = a_local_loc | ||
dual_gap = grad_dot_x - grad_dot_v | ||
phi = dual_gap | ||
|
||
if variant == "standard" | ||
v_taken = v | ||
grad_dot_v_taken = grad_dot_v | ||
else | ||
a = compute_inface_extreme_point(lmo, NegatingArray(gradient), x) | ||
v_taken = compute_inface_extreme_point(lmo, gradient, x;) | ||
grad_dot_v_taken = fast_dot(gradient, v_taken) | ||
|
||
end | ||
|
||
# Real dual gap promises enough progress. | ||
grad_dot_fw_vertex = fast_dot(v, gradient) | ||
dual_gap = grad_dot_x - grad_dot_fw_vertex | ||
|
||
# Do lazy inface_point | ||
if dual_gap >= phi / lazy_tolerance | ||
d = muladd_memory_mode(memory_mode, d, a, v) | ||
#Lower our expectation for progress. | ||
if (grad_dot_a_taken - grad_dot_v_taken >= phi / lazy_tolerance && | ||
grad_dot_a_taken - grad_dot_v_taken >= epsilon) || !strong_lazification | ||
if !strong_lazification | ||
a = a_taken | ||
d = muladd_memory_mode(memory_mode, d, a, v_taken) | ||
step_type = ST_PAIRWISE | ||
away_index = -1 | ||
else | ||
a = a_taken | ||
d = muladd_memory_mode(memory_mode, d, a, v_taken) | ||
step_type = ST_PAIRWISE | ||
away_index = a_local_loc | ||
end | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only thing that is different is
|
||
else | ||
a = compute_inface_extreme_point(lmo, NegatingArray(gradient), x) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You seem to be calling this twice! For Blended in line 559 and for non strong lazification in line 538. |
||
if variant == "standard" | ||
d = muladd_memory_mode(memory_mode, d, a, v) | ||
else | ||
grad_dot_a = fast_dot(gradient, a) | ||
inface_gap = grad_dot_a - grad_dot_v_taken | ||
if inface_gap >= phi / lazy_tolerance | ||
step_type = ST_PAIRWISE | ||
d = muladd_memory_mode(memory_mode, d, a, v) | ||
else # global FW step | ||
step_type = ST_REGULAR | ||
d = muladd_memory_mode(memory_mode, d, x, v) | ||
end | ||
end | ||
end | ||
else | ||
d = muladd_memory_mode(memory_mode, d, a, v) | ||
step_type = ST_DUALSTEP | ||
phi = min(dual_gap, phi / 2.0) | ||
a = a_taken | ||
end | ||
end | ||
return d, v, fw_index, a, away_index, phi, step_type | ||
|
WenjieXiao-2022 marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,14 +157,14 @@ function compute_inface_extreme_point(lmo::MathOptLMO{OT}, direction, x; solve_d | |
end | ||
|
||
# function barrier for performance | ||
WenjieXiao-2022 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F}, ::Type{S}, valvar) where {OT,F,S} | ||
function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F}, ::Type{S}, valvar;atol=1e-6) where {OT,F,S} | ||
const_list = MOI.get(lmo.o, MOI.ListOfConstraintIndices{F,S}()) | ||
for c_idx in const_list | ||
func = MOI.get(lmo.o, MOI.ConstraintFunction(), c_idx) | ||
val = MOIU.eval_variables(valvar, func) | ||
set = MOI.get(lmo.o, MOI.ConstraintSet(), c_idx) | ||
if S <: MOI.GreaterThan | ||
if set.lower ≈ val | ||
if isapprox(set.lower, val; atol = atol) | ||
MOI.delete(lmo.o, c_idx) | ||
if F <: MOI.VariableIndex | ||
check_cidx = MOI.ConstraintIndex{F,MOI.LessThan{Float64}}(c_idx.value) | ||
|
@@ -191,7 +191,7 @@ function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F}, | |
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.lower)) | ||
end | ||
elseif S <: MOI.LessThan | ||
if set.upper ≈ val | ||
if isapprox(set.upper, val; atol = atol) | ||
MOI.delete(lmo.o, c_idx) | ||
if F <: MOI.VariableIndex | ||
check_cidx = MOI.ConstraintIndex{F,MOI.GreaterThan{Float64}}(c_idx.value) | ||
|
@@ -213,10 +213,10 @@ function compute_inface_extreme_point_subroutine(lmo::MathOptLMO{OT}, ::Type{F}, | |
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.upper)) | ||
end | ||
elseif S <: MOI.Interval | ||
if set.upper ≈ val | ||
if isapprox(set.upper, val; atol = atol) | ||
MOI.delete(lmo.o, c_idx) | ||
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.upper)) | ||
elseif set.lower ≈ val | ||
elseif isapprox(set.lower, val; atol = atol) | ||
MOI.delete(lmo.o, c_idx) | ||
MOI.add_constraint(lmo.o, func, MOI.EqualTo(set.lower)) | ||
end | ||
|
@@ -232,17 +232,17 @@ function compute_inface_extreme_point( | |
kwargs..., | ||
) where {OT, T <: Real} | ||
n = size(direction, 1) | ||
a = compute_inface_extreme_point(lmo, vec(direction), x) | ||
a = compute_inface_extreme_point(lmo, vec(direction), vec(x)) | ||
WenjieXiao-2022 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return reshape(a, n, n) | ||
end | ||
|
||
# Fast way to compute gamma_max. | ||
# Check every constraint and compute the corresponding gamma_upper_bound. | ||
function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x) where {OT} | ||
function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x;tol=1e-6) where {OT} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to have a doc string here, i.e.
This would also be good for the |
||
gamma_less_than = Float64[] | ||
for (F, S) in MOI.get(lmo.o, MOI.ListOfConstraintTypesPresent()) | ||
valvar(f) = x[f.value] | ||
valvar_(f) = direction[f.value] | ||
valvar_d(f) = direction[f.value] | ||
const_list = MOI.get(lmo.o, MOI.ListOfConstraintIndices{F,S}()) | ||
|
||
# Constraints need to satisfy g(x+γ*d) ∈ ConstraintSet. | ||
|
@@ -252,28 +252,28 @@ function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x) where {OT} | |
# Compute g(x). | ||
val = MOIU.eval_variables(valvar, func) | ||
# Compute g(d). | ||
val_d = MOIU.eval_variables(valvar_, func) | ||
val_d = MOIU.eval_variables(valvar_d, func) | ||
set = MOI.get(lmo.o, MOI.ConstraintSet(), c_idx) | ||
if S <: MOI.Interval | ||
if val_d < 0.0 | ||
if val_d < -tol | ||
upper_bound_gamma = (val - set.upper) / val_d | ||
push!(gamma_less_than, upper_bound_gamma) | ||
end | ||
if val_d > 0.0 | ||
if val_d > tol | ||
upper_bound_gamma = (val - set.lower) / val_d | ||
push!(gamma_less_than, upper_bound_gamma) | ||
end | ||
end | ||
|
||
if S <: MOI.LessThan | ||
if val_d < 0.0 | ||
if val_d < -tol | ||
upper_bound_gamma = (val - set.upper) / val_d | ||
push!(gamma_less_than, upper_bound_gamma) | ||
end | ||
end | ||
|
||
if S <: MOI.GreaterThan | ||
if val_d > 0.0 | ||
if val_d > tol | ||
upper_bound_gamma = (val - set.lower) / val_d | ||
push!(gamma_less_than, upper_bound_gamma) | ||
end | ||
|
@@ -292,6 +292,36 @@ function dicg_maximum_step(lmo::MathOptLMO{OT}, direction, x) where {OT} | |
end | ||
end | ||
|
||
function is_inface_feasible(lmo::MathOptLMO{OT}, a, x;) where {OT} | ||
variables = MOI.get(lmo.o, MOI.ListOfVariableIndices()) | ||
valvar(f) = x[f.value] | ||
valvar_away(f) = a[f.value] | ||
for (F, S) in MOI.get(lmo.o, MOI.ListOfConstraintTypesPresent()) | ||
const_list = MOI.get(lmo.o, MOI.ListOfConstraintIndices{F, S}()) | ||
for c_idx in const_list | ||
func = MOI.get(lmo.o, MOI.ConstraintFunction(), c_idx) | ||
val = MOIU.eval_variables(valvar, func) | ||
val_away = MOIU.eval_variables(valvar_away, func) | ||
set = MOI.get(lmo.o, MOI.ConstraintSet(), c_idx) | ||
if S <: MOI.GreaterThan || S <: MOI.Interval | ||
if isapprox(set.lower, val; atol = 1e-15, rtol = 1e-5) | ||
if !isapprox(set.lower, val_away; atol = 1e-15, rtol = 1e-5) | ||
return false | ||
end | ||
end | ||
end | ||
if S <: MOI.LessThan || S <: MOI.Interval | ||
if isapprox(set.upper, val; atol = 1e-15, rtol = sqrt(eps())) | ||
if !isapprox(set.upper, val_; atol = 1e-15, rtol = 1e-5) | ||
return false | ||
end | ||
end | ||
end | ||
end | ||
end | ||
return true | ||
end | ||
|
||
function Base.copy(lmo::MathOptLMO{OT}; ensure_identity=true) where {OT} | ||
opt = OT() # creates the empty optimizer | ||
index_map = MOI.copy_to(opt, lmo.o) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you only have two variants it might be cleaner to use a boolean flag here. For example,
using_blended
which would be true if blended_dicg was used and false otherwise.