@@ -24,23 +24,23 @@ where ``C_j(u)`` is diagonal matrix of curvatures.
2424
2525This OGM method uses a majorize-minimize (MM) line search.
2626
27- in
28- - `B` array of ``J`` blocks ``B_1,...,B_J``
29- - `gradf` array of ``J`` functions return gradients of ``f_1,...,f_J``
30- - `curvf` array of ``J`` functions `z -> curv(z)` that return a scalar
31- or a vector of curvature values for each element of ``z``
32- - `x0` initial guess; need `length(x) == size(B[j],2)` for ``j=1...J``
33-
34- option
35- - `niter` # number of outer iterations; default 50
36- - `ninner` # number of inner iterations of MM line search; default 5
37- - `fun` User-defined function to be evaluated with two arguments (x,iter).
27+ # in
28+ - `B` vector of ``J`` blocks ``B_1,...,B_J``
29+ - `gradf` vector of ``J`` functions return gradients of ``f_1,...,f_J``
30+ - `curvf` vector of ``J`` functions `z -> curv(z)` that return a scalar
31+ or a vector of curvature values for each element of ``z``
32+ - `x0` initial guess; need `length(x) == size(B[j],2)` for ``j=1...J``
33+
34+ # option
35+ - `niter` # number of outer iterations; default 50
36+ - `ninner` # number of inner iterations of MM line search; default 5
37+ - `fun` User-defined function to be evaluated with two arguments (x,iter).
3838 * It is evaluated at (x0,0) and then after each iteration.
3939
40- output
41- - `x` final iterate
42- - `out` `[ niter+1] (fun(x0,0), fun(x1,1), ..., fun(x_niter,niter))`
43- * (all 0 by default). This is an array of length `niter+1`
40+ # output
41+ - `x` final iterate
42+ - `out ( niter+1) (fun(x0,0), fun(x1,1), ..., fun(x_niter,niter))`
43+ * (all 0 by default). This is a vector of length `niter+1`.
4444"""
4545function ogm_ls (
4646 B:: AbstractVector{<:Any} ,
@@ -52,6 +52,8 @@ function ogm_ls(
5252 fun:: Function = (x,iter) -> 0 ,
5353)
5454
55+ Base. require_one_based_indexing (B, gradf, curvf)
56+
5557out = Array {Any} (undef, niter+ 1 )
5658out[1 ] = fun (x0, 0 )
5759
@@ -65,12 +67,12 @@ grad_sum = zeros(size(x0))
6567ti = 1
6668thetai = 1
6769
68- B0 = [B[j] * x for j= 1 : J]
70+ B0 = [B[j] * x for j in 1 : J]
6971Bx = copy (B0)
7072By = copy (B0)
71- grad = (Bx) -> sum ([B[j]' * gradf[j](Bx[j]) for j= 1 : J])
73+ grad = (Bx) -> sum ([B[j]' * gradf[j](Bx[j]) for j in 1 : J])
7274
73- for iter = 1 : niter
75+ for iter in 1 : niter
7476 grad_new = grad (Bx) # gradient of x_{iter-1}
7577 grad_sum += ti * grad_new # sum_{j=0}^{iter-1} t_j * gradient_j
7678
@@ -80,21 +82,21 @@ for iter = 1:niter
8082 tt = (iter < niter) ? ti : thetai # use theta_i factor for last iteration
8183 yi = (1 - 1 / tt) * x + (1 / tt) * x0
8284
83- for j= 1 : J # update Bj * yi
85+ for j in 1 : J # update Bj * yi
8486 By[j] = (1 - 1 / tt) * Bx[j] + (1 / tt) * B0[j]
8587 end
8688
8789 dir = - (1 - 1 / tt) * grad_new - (2 / tt) * grad_sum # -d_i
8890
8991 # MM-based line search for step size alpha
9092 # using h(a) = sum_j f_j(By_j + a * Bd_j)
91- Bd = [B[j] * dir for j= 1 : J]
93+ Bd = [B[j] * dir for j in 1 : J]
9294
9395 alf = 0
94- for ii= 1 : ninner
96+ for ii in 1 : ninner
9597 derh = 0 # derivative of h(a)
9698 curv = 0
97- for j= 1 : J
99+ for j in 1 : J
98100 tmp = By[j] + alf * Bd[j]
99101 derh += real (dot (Bd[j], gradf[j](tmp)))
100102 curv += sum (curvf[j](tmp) .* abs2 .(Bd[j]))
@@ -109,20 +111,20 @@ for iter = 1:niter
109111 end
110112
111113# # derivative of h(a) = cost(x + a * dir) where \alpha is real
112- # dh = alf -> real(sum([Bd[j]' * gradf[j](By[j] + alf * Bd[j]) for j= 1:J]))
113- # Ldh = sum([Lgf[j] * norm(Bd[j])^2 for j= 1:J]) # Lipschitz constant for dh
114+ # dh = alf -> real(sum([Bd[j]' * gradf[j](By[j] + alf * Bd[j]) for j in 1:J]))
115+ # Ldh = sum([Lgf[j] * norm(Bd[j])^2 for j in 1:J]) # Lipschitz constant for dh
114116# (alf, ) = gd(dh, Ldh, 0, niter=ninner) # GD-based line search
115117# todo
116118
117119 x = yi + alf * dir
118120
119121 if iter < niter
120- for j= 1 : J # update Bj * x
122+ for j in 1 : J # update Bj * x
121123 Bx[j] = By[j] + alf * Bd[j]
122124 end
123125 end
124126
125- # for j= 1:J # recursive update Bj * yi ???
127+ # for j in 1:J # recursive update Bj * yi ???
126128# By[j] = (1 - 1/ti) * (By[j] + alf * Bd[j]) + (1/ti) * B0[j]
127129# end
128130
@@ -136,10 +138,11 @@ end
136138"""
137139 (x,out) = ogm_ls(grad, curv, x0, ...)
138140
139- special case of `ogm_ls` (OGM with line search) for minimizing a cost function
141+ Special case of `ogm_ls` (OGM with line search)
142+ for minimizing a cost function
140143whose gradient is `grad(x)`
141144and that has a quadratic majorizer with diagonal Hessian given by `curv(x)`.
142- Typically `curv = (x) -> L` where `L` is the Lipschitz constant of `grad`
145+ Typically `curv = (x) -> L` where `L` is the Lipschitz constant of `grad`.
143146"""
144147function ogm_ls (
145148 grad:: Function ,
0 commit comments