-
Notifications
You must be signed in to change notification settings - Fork 696
NonlinearMinimizer using Projection and Proximal Operators #364
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
fb74645
a7ee059
679bb5f
3d80b31
3781e37
536886d
f98bd80
ae795b6
51b4224
ee697bf
ce8638f
bbc3edd
f85ff86
e3a61a9
928de32
91f2e17
33d28ff
6cba897
9bef354
18c7789
43794c0
e2c1db8
defaff5
610027f
8c6a6c8
b4d86e8
3a6fc97
8533ada
a0bbd33
40a45a8
7308c7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…ction code in BFGS using PowerMethod; Fixes in NonlinearMinimizer skeleton
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package breeze.optimize.linear | ||
|
|
||
| import breeze.linalg.operators.OpMulMatrix | ||
| import breeze.math.MutableInnerProductVectorSpace | ||
| import breeze.numerics.abs | ||
| import breeze.util.SerializableLogging | ||
| import breeze.linalg.norm | ||
| import breeze.util.Implicits._ | ||
|
|
||
| /** | ||
| * Created by debasish83 on 2/3/15. | ||
| */ | ||
| class PowerMethod[T, M](maxIterations: Int = 10,tolerance: Double = 1E-5) | ||
| (implicit space: MutableInnerProductVectorSpace[T, Double], mult: OpMulMatrix.Impl2[M, T, T]) extends SerializableLogging { | ||
|
|
||
| import space._ | ||
|
|
||
| case class State(eigenValue: Double, eigenVector: T, iter: Int, converged: Boolean) | ||
|
|
||
| def initialState(y: T, A: M): State = { | ||
| //Force y to be a vector of unit norm | ||
| val normInit = norm(y) | ||
| y *= 1.0 / normInit | ||
| val ay = mult(A, y) | ||
| val lambda = y dot ay | ||
|
|
||
| y := ay | ||
| y *= norm(ay) | ||
| if (lambda < 0.0) y *= -1.0 | ||
| State(lambda, y, 0, false) | ||
| } | ||
|
|
||
| def iterations(y: T, | ||
| A: M): Iterator[State] = Iterator.iterate(initialState(y, A)) { state => | ||
| import state._ | ||
| val ay = mult(A, y) | ||
| val lambda = y dot ay | ||
| val norm1 = norm(ay) | ||
| ay *= 1.0 / norm1 | ||
| if (lambda < 0.0) ay *= -1.0 | ||
|
|
||
| val val_dif = abs(lambda - eigenValue) | ||
| if (val_dif <= tolerance || iter > maxIterations) State(lambda, ay, iter + 1, true) | ||
| else State(lambda, ay/lambda, iter + 1, false) | ||
| }.takeUpToWhere(_.converged) | ||
|
|
||
| def iterateAndReturnState(y: T, A: M): State = { | ||
| iterations(y, A).last | ||
| } | ||
|
|
||
| def eigen(y: T, A: M): Double = { | ||
| iterateAndReturnState(y, A).eigenValue | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,11 +17,14 @@ | |
|
|
||
| package breeze.optimize.proximal | ||
|
|
||
| import breeze.linalg.{DenseVector, norm} | ||
| import breeze.linalg.{DenseMatrix, DenseVector, norm} | ||
| import breeze.numerics._ | ||
| import breeze.optimize.{DiffFunction, LBFGS} | ||
| import breeze.stats.distributions.Rand | ||
|
|
||
| import breeze.optimize.proximal.Constraint._ | ||
| import scala.math._ | ||
| import scala.math.pow | ||
| import scala.math.sqrt | ||
|
|
||
| /** | ||
| * Created by debasish83 on 12/11/14. | ||
|
|
@@ -31,25 +34,25 @@ import scala.math._ | |
| * It solves the problem that has the following structure | ||
| * minimize f(x) + g(x) | ||
| * | ||
| * | ||
| * g(x) represents the following constraints | ||
| * | ||
| * 1. x >= 0 | ||
| * 2. lb <= x <= ub | ||
| * 3. L1(x) | ||
| * 4. Aeq*x = beq | ||
| * 5. aeq'x = beq | ||
| * 6. 1'x = 1, x >= 0 which is called ProbabilitySimplex from the following reference | ||
| * Proximal Algorithms by Boyd et al. | ||
| * 6. 1'x = 1, x >= 0 ProbabilitySimplex from the reference Proximal Algorithms by Boyd et al. | ||
| * | ||
| * f(x) can be either a convex or a non-linear function. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what you mean by "convex" or "non-linear function", doesn't the union of those two sets include all functions?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For convex / quadratic function bfgs + line search is good...my guess is for nonlinear like sigmoid we will need a cg based solution....may be use tron with empirical hessian...have not tested any non-convex yet...let me say that f(x) is convex
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still don't understand the comment. If f(x) can be a convex function, or a non-linear function, then can't f literally be any function? since linear functions are a subset of convex functions...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes...I cleaned it...TODO here is to experiment with TRON for non-convex functions like sigmoid activation and extend PQN to Proximal operators... |
||
| * | ||
| * For convex functions we will use B matrix with 7/10 columns | ||
| * | ||
| * For non-linear functions we will experiment with B matrix with 1 column to mimic a CG solver | ||
| * TO DO : For non-linear functions we will experiment with TRON-like Primal solver | ||
| */ | ||
|
|
||
| class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance: Double=1E-9) { | ||
| class NonlinearMinimizer(ndim: Int, | ||
| proximal: Proximal = null, | ||
| maxIters: Int = -1, m: Int = 10, tolerance: Double=1E-4) { | ||
| type BDV = DenseVector[Double] | ||
|
|
||
| case class State(x: BDV, u: BDV, z: BDV, iterations: Int, converged: Boolean) | ||
|
|
@@ -64,39 +67,21 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance: | |
| rho: Double) extends DiffFunction[DenseVector[Double]] { | ||
| override def calculate(x: DenseVector[Double]) = { | ||
| val (f, g) = primal.calculate(x) | ||
| val proxObj = f + u.dot(x - z) + 0.5 * rho * norm(x - z, 2) | ||
| val proxObj = f + u.dot(x - z) + 0.5 * rho * pow(norm(x - z), 2) | ||
| val proxGrad = g + u + (x - z):*rho | ||
| (proxObj, proxGrad) | ||
| } | ||
| } | ||
|
|
||
| //TO DO : alpha needs to be scaled based on Nesterov's acceleration | ||
| val alpha: Double = 1.0 | ||
| val rho: Double = 1.0 | ||
|
|
||
| val ABSTOL = 1e-8 | ||
| val RELTOL = 1e-4 | ||
|
|
||
| var proximal: Proximal = null | ||
|
|
||
| //TO DO : This can take a proximal function as input | ||
| //TO DO : alpha needs to be scaled based on Nesterov's acceleration | ||
| def setProximal(proximal: Proximal): NonlinearMinimizer = { | ||
| this.proximal = proximal | ||
| this | ||
| } | ||
|
|
||
| var lambda: Double = 1.0 | ||
|
|
||
| /*Regularization for Elastic Net */ | ||
| def setLambda(lambda: Double): NonlinearMinimizer = { | ||
| this.lambda = lambda | ||
| this | ||
| } | ||
|
|
||
| val innerIters = 10 | ||
|
|
||
| def iterations(primal: DiffFunction[DenseVector[Double]]) : State = { | ||
|
|
||
| def iterations(primal: DiffFunction[DenseVector[Double]], | ||
| rho: Double = 1.0) : State = { | ||
| val iters = if (proximal == null) maxIters else innerIters | ||
| val lbfgs = new LBFGS[DenseVector[Double]](iters, m, tolerance) | ||
| val init = DenseVector.rand[Double](ndim, Rand.gaussian(0, 1)) | ||
|
|
@@ -137,7 +122,7 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance: | |
| z += u | ||
|
|
||
| //Apply proximal operator | ||
| proximal.prox(z, lambda/rho) | ||
| proximal.prox(z, rho) | ||
|
|
||
| //z has proximal(x_hat) | ||
|
|
||
|
|
@@ -184,25 +169,70 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance: | |
|
|
||
| object NonlinearMinimizer { | ||
| def apply(ndim: Int, constraint: Constraint, lambda: Double): NonlinearMinimizer = { | ||
| val minimizer = new NonlinearMinimizer(ndim) | ||
| constraint match { | ||
| case POSITIVE => minimizer.setProximal(ProjectPos()) | ||
| case BOUNDS => { | ||
| case SMOOTH => new NonlinearMinimizer(ndim) | ||
| case POSITIVE => new NonlinearMinimizer(ndim, ProjectPos()) | ||
| case BOX => { | ||
| val lb = DenseVector.zeros[Double](ndim) | ||
| val ub = DenseVector.ones[Double](ndim) | ||
| minimizer.setProximal(ProjectBox(lb, ub)) | ||
| new NonlinearMinimizer(ndim, ProjectBox(lb, ub)) | ||
| } | ||
| case EQUALITY => { | ||
| val aeq = DenseVector.ones[Double](ndim) | ||
| val beq = 1.0 | ||
| minimizer.setProximal(ProjectHyperPlane(aeq, beq)) | ||
| new NonlinearMinimizer(ndim, ProjectHyperPlane(aeq, 1.0)) | ||
| } | ||
| case SPARSE => minimizer.setProximal(ProximalL1()) | ||
| case SPARSE => new NonlinearMinimizer(ndim, ProximalL1().setLambda(lambda)) | ||
| //TO DO: ProximalSimplex : for PLSA | ||
| } | ||
| } | ||
|
|
||
| def main(args: Array[String]) { | ||
| ??? | ||
| if (args.length < 4) { | ||
| println("Usage: NonlinearMinimizer n m lambda beta") | ||
| println("Test NonlinearMinimizer with a quadratic function of dimenion n and m equalities with lambda beta for elasticNet") | ||
| sys.exit(1) | ||
| } | ||
|
|
||
| val problemSize = args(0).toInt | ||
| val nequalities = args(1).toInt | ||
|
|
||
| val lambda = args(2).toDouble | ||
| val beta = args(3).toDouble | ||
|
|
||
| println(s"Generating randomized QPs with rank ${problemSize} equalities ${nequalities}") | ||
| val (aeq, b, bl, bu, q, h) = QpGenerator(problemSize, nequalities) | ||
|
|
||
| val qpSolver = new QuadraticMinimizer(problemSize) | ||
| val qpStart = System.nanoTime() | ||
| val qpResult = qpSolver.minimize(h, q) | ||
| val qpTime = System.nanoTime() - qpStart | ||
|
|
||
| val nlStart = System.nanoTime() | ||
| val nlResult = NonlinearMinimizer(problemSize, SMOOTH, 0.0).minimize(QuadraticMinimizer.Cost(h, q)) | ||
| val nlTime = System.nanoTime() - nlStart | ||
|
|
||
| println(s"||qp - nl|| norm ${norm(qpResult - nlResult, 2)} max-norm ${norm(qpResult - nlResult, inf)}") | ||
|
|
||
| val qpObj = QuadraticMinimizer.computeObjective(h, q, qpResult) | ||
| val nlObj = QuadraticMinimizer.computeObjective(h, q, nlResult) | ||
| println(s"Objective qp $qpObj nl $nlObj") | ||
|
|
||
| println(s"dim ${problemSize} qp ${qpTime/1e6} ms nl ${nlTime/1e6} ms") | ||
|
|
||
| val lambdaL1 = lambda * beta | ||
| val lambdaL2 = lambda * (1 - beta) | ||
|
|
||
| val regularizedGram = h + (DenseMatrix.eye[Double](h.rows) :* lambdaL2) | ||
|
|
||
| val nlSparseStart = System.nanoTime() | ||
| val nlSparseResult = NonlinearMinimizer(problemSize, SPARSE, lambdaL1).iterations(QuadraticMinimizer.Cost(regularizedGram,q)) | ||
| val nlSparseTime = System.nanoTime() - nlSparseStart | ||
|
|
||
| val owlqnStart = System.nanoTime() | ||
| val owlqnResult = QuadraticMinimizer.optimizeWithOWLQN(DenseVector.rand[Double](problemSize), regularizedGram, q, lambdaL1) | ||
| val owlqnTime = System.nanoTime() - owlqnStart | ||
|
|
||
| println(s"||owlqn - sparseqp|| norm ${norm(owlqnResult.x - nlSparseResult.x, 2)} inf-norm ${norm(owlqnResult.x - nlSparseResult.x, inf)}") | ||
| println(s"nlSparse ${nlSparseTime/1e6} ms iters ${nlSparseResult.iterations} owlqn ${owlqnTime/1e6} ms iters ${owlqnResult.iter}") | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also, definitely not licensed to the ASF. Just put your name (c) 2015 like how I (usually) do it.