Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
fb74645
Initial commit and skeleton for NonlinearMinimizer
Jan 30, 2015
a7ee059
Merge branch 'qp' of https://github.com/debasish83/breeze into nlqp
Jan 31, 2015
679bb5f
Skeleton for approximate eigen value calculation
Feb 2, 2015
3d80b31
Copyright message for NonlinearMinimizer
Feb 2, 2015
3781e37
merge with qp branch; NOTICE file updated
Feb 3, 2015
536886d
Initial checkin for PowerMethod and PowerMethodTest;Eigen value extra…
Feb 3, 2015
f98bd80
Compilation fixes to LBFGS eigenMin and eigenMax
Feb 4, 2015
ae795b6
Power Method merged; NonlinearMinimizer now supports preserving histo…
Feb 11, 2015
51b4224
Generating PQN.CompactHessian from BFGS.ApproximateInverseHessian not…
Feb 11, 2015
ee697bf
Linear Regression formulation added for comparisons
Feb 11, 2015
ce8638f
Fixed LBFGS.maxEigen using power law on CompactHessian
Feb 12, 2015
bbc3edd
Merge branch 'qp' of https://github.com/debasish83/breeze into nlqp
Feb 17, 2015
f85ff86
Merge branch 'qp' of https://github.com/debasish83/breeze into nlqp
Feb 22, 2015
e3a61a9
Added a proximal interface to ProjectQuasiNewton solver; Added projec…
Feb 23, 2015
928de32
probability simplex benchmark
Feb 24, 2015
91f2e17
After experimentation NonlinearMinimizer now users PQN/OWLQN and supp…
Feb 28, 2015
33d28ff
Add testcases for Least square variants
Mar 1, 2015
6cba897
merge with upstream
Mar 1, 2015
9bef354
I dunno.
dlwh Mar 1, 2015
18c7789
PQN fixes from David's fix_pqn branch; added strong wolfe line search…
Mar 2, 2015
43794c0
Unused import from FirstOrderMinimizer; PQN migrated to Strong Wolfe …
Mar 5, 2015
e2c1db8
Used BacktrackingLineSearch in SPG and PQN; Updated NonlinearMinimize…
Mar 5, 2015
defaff5
NonlinearMinimizer println changed to nl from pqn
Mar 5, 2015
610027f
Updated with cforRange in proximal operations
Mar 7, 2015
8c6a6c8
BacktrackingLineSearch takes an initfval;OWLQN, PQN and SPG updated t…
Mar 7, 2015
b4d86e8
Merge branch 'master' of https://github.com/scalanlp/breeze into nlqp
Mar 7, 2015
3a6fc97
infiniteIteration API in FirstOrderMinimizer takes initialState;PQN b…
Mar 11, 2015
8533ada
migrate LBFGS Eigen calculation to https://github.com/debasish83/bree…
Mar 11, 2015
a0bbd33
cleaned up minEigen call from QuadraticMinimizer
Mar 11, 2015
40a45a8
NonlinearMinimizer inner iterations through BFGS cleaned
Mar 12, 2015
7308c7a
Updated contributions in README.md
Mar 12, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Initial checkin for PowerMethod and PowerMethodTest;Eigen value extra…
…ction code in BFGS using PowerMethod; Fixes in NonlinearMinimizer skeleton
  • Loading branch information
Debasish Das committed Feb 3, 2015
commit 536886d4a706ae78053de5cfff0fceeda5d42e80
28 changes: 20 additions & 8 deletions math/src/main/scala/breeze/optimize/LBFGS.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package breeze.optimize
import breeze.linalg._
import breeze.linalg.operators.OpMulMatrix
import breeze.math.MutableInnerProductModule
import breeze.optimize.linear.PowerMethod
import breeze.optimize.proximal.QuadraticMinimizer
import breeze.stats.distributions.Rand
import breeze.util.SerializableLogging

/**
Expand Down Expand Up @@ -85,23 +88,34 @@ class LBFGS[T](maxIter: Int = -1, m: Int=10, tolerance: Double=1E-9)
* specified through DiffFunction through power iteration on ApproximateInverseHessian,
* get the largest eigenvalue e and return 1/e
*
* @param n the size of the problem, can be infered from memStep/memGradDelta
* @param state the current state of the optimization
* @return maximum eigen value
* @return minimium eigen eigen value
*/
protected def minEigen(state: State) : Double = {
???
protected def minEigen(n: Int, state: State) : Double = {
val pm = new PowerMethod[DenseVector[Double], LBFGS.ApproximateInverseHessian[T]]
val init = DenseVector.rand[Double](n, Rand.gaussian(0, 1))
val eigenMax = pm.eigen(init, state.history)
1.0/eigenMax
}

/**
* Get the maximum approximate eigen value of the quadratic approximation convex function specified
* through DiffFunction by doing a inverse power iteration using the CompactHessian representation
* generated from ApproximateInverseHessian, get the largest eigenvalue e
*
* @param n the size of the problem, can be infered from memStep/memGradDelta
* @param state the current state of the optimization
* @return minimum eigen value
* @return maximum eigen value
*/
protected def maxEigen(state: State) : Double = {
???
protected def maxEigen(n: Int, state: State, init: DenseVector[Double]) : Double = {
val compactHessian = new CompactHessian(state.history.m)
val memStep = state.history.memStep.asInstanceOf[Seq[DenseVector[Double]]]
val memGradDelta = state.history.memGradDelta.asInstanceOf[Seq[DenseVector[Double]]]
(0 to state.history.historyLength).map{i => compactHessian.updated(memStep(i), memGradDelta(i))}
val init = DenseVector.rand[Double](n, Rand.gaussian(0, 1))
val pm = new PowerMethod[DenseVector[Double], CompactHessian]
pm.eigen(init, compactHessian)
}
}

Expand Down Expand Up @@ -162,12 +176,10 @@ object LBFGS {
}
}


implicit def multiplyInverseHessian[T](implicit vspace: MutableInnerProductModule[T, Double]):OpMulMatrix.Impl2[ApproximateInverseHessian[T], T, T] = {
new OpMulMatrix.Impl2[ApproximateInverseHessian[T], T, T] {
def apply(a: ApproximateInverseHessian[T], b: T): T = a * b
}

}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package breeze.optimize

import breeze.linalg._
import breeze.collection.mutable.RingBuffer
import breeze.math.{MutableInnerProductModule, MutableVectorField}
import breeze.math.{MutableInnerProductModule}
import breeze.util.SerializableLogging

// Compact representation of an n x n Hessian, maintained via L-BFGS updates
Expand Down
70 changes: 70 additions & 0 deletions math/src/main/scala/breeze/optimize/linear/PowerMethod.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, definitely not licensed to the ASF. Just put your name (c) 2015 like how I (usually) do it.

* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package breeze.optimize.linear

import breeze.linalg.operators.OpMulMatrix
import breeze.math.MutableInnerProductVectorSpace
import breeze.numerics.abs
import breeze.util.SerializableLogging
import breeze.linalg.norm
import breeze.util.Implicits._

/**
* Created by debasish83 on 2/3/15.
*/
class PowerMethod[T, M](maxIterations: Int = 10,tolerance: Double = 1E-5)
(implicit space: MutableInnerProductVectorSpace[T, Double], mult: OpMulMatrix.Impl2[M, T, T]) extends SerializableLogging {

import space._

case class State(eigenValue: Double, eigenVector: T, iter: Int, converged: Boolean)

def initialState(y: T, A: M): State = {
//Force y to be a vector of unit norm
val normInit = norm(y)
y *= 1.0 / normInit
val ay = mult(A, y)
val lambda = y dot ay

y := ay
y *= norm(ay)
if (lambda < 0.0) y *= -1.0
State(lambda, y, 0, false)
}

def iterations(y: T,
A: M): Iterator[State] = Iterator.iterate(initialState(y, A)) { state =>
import state._
val ay = mult(A, y)
val lambda = y dot ay
val norm1 = norm(ay)
ay *= 1.0 / norm1
if (lambda < 0.0) ay *= -1.0

val val_dif = abs(lambda - eigenValue)
if (val_dif <= tolerance || iter > maxIterations) State(lambda, ay, iter + 1, true)
else State(lambda, ay/lambda, iter + 1, false)
}.takeUpToWhere(_.converged)

def iterateAndReturnState(y: T, A: M): State = {
iterations(y, A).last
}

def eigen(y: T, A: M): Double = {
iterateAndReturnState(y, A).eigenValue
}
}
106 changes: 68 additions & 38 deletions math/src/main/scala/breeze/optimize/proximal/NonlinearMinimizer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

package breeze.optimize.proximal

import breeze.linalg.{DenseVector, norm}
import breeze.linalg.{DenseMatrix, DenseVector, norm}
import breeze.numerics._
import breeze.optimize.{DiffFunction, LBFGS}
import breeze.stats.distributions.Rand

import breeze.optimize.proximal.Constraint._
import scala.math._
import scala.math.pow
import scala.math.sqrt

/**
* Created by debasish83 on 12/11/14.
Expand All @@ -31,25 +34,25 @@ import scala.math._
* It solves the problem that has the following structure
* minimize f(x) + g(x)
*
*
* g(x) represents the following constraints
*
* 1. x >= 0
* 2. lb <= x <= ub
* 3. L1(x)
* 4. Aeq*x = beq
* 5. aeq'x = beq
* 6. 1'x = 1, x >= 0 which is called ProbabilitySimplex from the following reference
* Proximal Algorithms by Boyd et al.
* 6. 1'x = 1, x >= 0 ProbabilitySimplex from the reference Proximal Algorithms by Boyd et al.
*
* f(x) can be either a convex or a non-linear function.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what you mean by "convex" or "non-linear function", doesn't the union of those two sets include all functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For convex / quadratic function bfgs + line search is good...my guess is for nonlinear like sigmoid we will need a cg based solution....may be use tron with empirical hessian...have not tested any non-convex yet...let me say that f(x) is convex

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't understand the comment. If f(x) can be a convex function, or a non-linear function, then can't f literally be any function? since linear functions are a subset of convex functions...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes...I cleaned it...TODO here is to experiment with TRON for non-convex functions like sigmoid activation and extend PQN to Proximal operators...

*
* For convex functions we will use B matrix with 7/10 columns
*
* For non-linear functions we will experiment with B matrix with 1 column to mimic a CG solver
* TO DO : For non-linear functions we will experiment with TRON-like Primal solver
*/

class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance: Double=1E-9) {
class NonlinearMinimizer(ndim: Int,
proximal: Proximal = null,
maxIters: Int = -1, m: Int = 10, tolerance: Double=1E-4) {
type BDV = DenseVector[Double]

case class State(x: BDV, u: BDV, z: BDV, iterations: Int, converged: Boolean)
Expand All @@ -64,39 +67,21 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance:
rho: Double) extends DiffFunction[DenseVector[Double]] {
override def calculate(x: DenseVector[Double]) = {
val (f, g) = primal.calculate(x)
val proxObj = f + u.dot(x - z) + 0.5 * rho * norm(x - z, 2)
val proxObj = f + u.dot(x - z) + 0.5 * rho * pow(norm(x - z), 2)
val proxGrad = g + u + (x - z):*rho
(proxObj, proxGrad)
}
}

//TO DO : alpha needs to be scaled based on Nesterov's acceleration
val alpha: Double = 1.0
val rho: Double = 1.0

val ABSTOL = 1e-8
val RELTOL = 1e-4

var proximal: Proximal = null

//TO DO : This can take a proximal function as input
//TO DO : alpha needs to be scaled based on Nesterov's acceleration
def setProximal(proximal: Proximal): NonlinearMinimizer = {
this.proximal = proximal
this
}

var lambda: Double = 1.0

/*Regularization for Elastic Net */
def setLambda(lambda: Double): NonlinearMinimizer = {
this.lambda = lambda
this
}

val innerIters = 10

def iterations(primal: DiffFunction[DenseVector[Double]]) : State = {

def iterations(primal: DiffFunction[DenseVector[Double]],
rho: Double = 1.0) : State = {
val iters = if (proximal == null) maxIters else innerIters
val lbfgs = new LBFGS[DenseVector[Double]](iters, m, tolerance)
val init = DenseVector.rand[Double](ndim, Rand.gaussian(0, 1))
Expand Down Expand Up @@ -137,7 +122,7 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance:
z += u

//Apply proximal operator
proximal.prox(z, lambda/rho)
proximal.prox(z, rho)

//z has proximal(x_hat)

Expand Down Expand Up @@ -184,25 +169,70 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance:

object NonlinearMinimizer {
def apply(ndim: Int, constraint: Constraint, lambda: Double): NonlinearMinimizer = {
val minimizer = new NonlinearMinimizer(ndim)
constraint match {
case POSITIVE => minimizer.setProximal(ProjectPos())
case BOUNDS => {
case SMOOTH => new NonlinearMinimizer(ndim)
case POSITIVE => new NonlinearMinimizer(ndim, ProjectPos())
case BOX => {
val lb = DenseVector.zeros[Double](ndim)
val ub = DenseVector.ones[Double](ndim)
minimizer.setProximal(ProjectBox(lb, ub))
new NonlinearMinimizer(ndim, ProjectBox(lb, ub))
}
case EQUALITY => {
val aeq = DenseVector.ones[Double](ndim)
val beq = 1.0
minimizer.setProximal(ProjectHyperPlane(aeq, beq))
new NonlinearMinimizer(ndim, ProjectHyperPlane(aeq, 1.0))
}
case SPARSE => minimizer.setProximal(ProximalL1())
case SPARSE => new NonlinearMinimizer(ndim, ProximalL1().setLambda(lambda))
//TO DO: ProximalSimplex : for PLSA
}
}

def main(args: Array[String]) {
???
if (args.length < 4) {
println("Usage: NonlinearMinimizer n m lambda beta")
println("Test NonlinearMinimizer with a quadratic function of dimenion n and m equalities with lambda beta for elasticNet")
sys.exit(1)
}

val problemSize = args(0).toInt
val nequalities = args(1).toInt

val lambda = args(2).toDouble
val beta = args(3).toDouble

println(s"Generating randomized QPs with rank ${problemSize} equalities ${nequalities}")
val (aeq, b, bl, bu, q, h) = QpGenerator(problemSize, nequalities)

val qpSolver = new QuadraticMinimizer(problemSize)
val qpStart = System.nanoTime()
val qpResult = qpSolver.minimize(h, q)
val qpTime = System.nanoTime() - qpStart

val nlStart = System.nanoTime()
val nlResult = NonlinearMinimizer(problemSize, SMOOTH, 0.0).minimize(QuadraticMinimizer.Cost(h, q))
val nlTime = System.nanoTime() - nlStart

println(s"||qp - nl|| norm ${norm(qpResult - nlResult, 2)} max-norm ${norm(qpResult - nlResult, inf)}")

val qpObj = QuadraticMinimizer.computeObjective(h, q, qpResult)
val nlObj = QuadraticMinimizer.computeObjective(h, q, nlResult)
println(s"Objective qp $qpObj nl $nlObj")

println(s"dim ${problemSize} qp ${qpTime/1e6} ms nl ${nlTime/1e6} ms")

val lambdaL1 = lambda * beta
val lambdaL2 = lambda * (1 - beta)

val regularizedGram = h + (DenseMatrix.eye[Double](h.rows) :* lambdaL2)

val nlSparseStart = System.nanoTime()
val nlSparseResult = NonlinearMinimizer(problemSize, SPARSE, lambdaL1).iterations(QuadraticMinimizer.Cost(regularizedGram,q))
val nlSparseTime = System.nanoTime() - nlSparseStart

val owlqnStart = System.nanoTime()
val owlqnResult = QuadraticMinimizer.optimizeWithOWLQN(DenseVector.rand[Double](problemSize), regularizedGram, q, lambdaL1)
val owlqnTime = System.nanoTime() - owlqnStart

println(s"||owlqn - sparseqp|| norm ${norm(owlqnResult.x - nlSparseResult.x, 2)} inf-norm ${norm(owlqnResult.x - nlSparseResult.x, inf)}")
println(s"nlSparse ${nlSparseTime/1e6} ms iters ${nlSparseResult.iterations} owlqn ${owlqnTime/1e6} ms iters ${owlqnResult.iter}")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ object QuadraticMinimizer {
constraint: Constraint,
lambda: Double): QuadraticMinimizer = {
constraint match {
case SMOOTH => new QuadraticMinimizer(rank)
case POSITIVE => new QuadraticMinimizer(rank, ProjectPos())
case BOX => {
//Direct QP with bounds
Expand All @@ -392,16 +393,26 @@ object QuadraticMinimizer {
res
}

case class Cost(H: DenseMatrix[Double],
q: DenseVector[Double]) extends DiffFunction[DenseVector[Double]] {
def calculate(x: DenseVector[Double]) = {
(computeObjective(H, q, x), H * x + q)
}
}

def optimizeWithLBFGS(init: DenseVector[Double],
H: DenseMatrix[Double],
q: DenseVector[Double]) = {
val lbfgs = new LBFGS[DenseVector[Double]](-1, 7)
val f = new DiffFunction[DenseVector[Double]] {
def calculate(x: DenseVector[Double]) = {
(computeObjective(H, q, x), H * x + q)
}
}
lbfgs.minimize(f, init)
lbfgs.minimize(Cost(H, q), init)
}

def optimizeWithOWLQN(init: DenseVector[Double],
regularizedGram: DenseMatrix[Double],
q: DenseVector[Double],
lambdaL1: Double) = {
val owlqn = new OWLQN[Int, DenseVector[Double]](-1, 7, lambdaL1, 1e-6)
owlqn.minimizeAndReturnState(Cost(regularizedGram, q), init)
}

def main(args: Array[String]) {
Expand Down Expand Up @@ -458,24 +469,13 @@ object QuadraticMinimizer {

val regularizedGram = h + (DenseMatrix.eye[Double](h.rows) :* lambdaL2)

val owlqn = new OWLQN[Int, DenseVector[Double]](-1, 7, lambdaL1)

def optimizeWithOWLQN(init: DenseVector[Double]) = {
val f = new DiffFunction[DenseVector[Double]] {
def calculate(x: DenseVector[Double]) = {
(computeObjective(regularizedGram, q, x), regularizedGram * x + q)
}
}
owlqn.minimizeAndReturnState(f, init)
}

val sparseQp = QuadraticMinimizer(h.rows, SPARSE, lambdaL1)
val sparseQpStart = System.nanoTime()
val sparseQpResult = sparseQp.minimizeAndReturnState(regularizedGram, q)
val sparseQpTime = System.nanoTime() - sparseQpStart

val startOWLQN = System.nanoTime()
val owlqnResult = optimizeWithOWLQN(DenseVector.rand[Double](problemSize))
val owlqnResult = optimizeWithOWLQN(DenseVector.rand[Double](problemSize), regularizedGram, q, lambdaL1)
val owlqnTime = System.nanoTime() - startOWLQN

println(s"||owlqn - sparseqp|| norm ${norm(owlqnResult.x - sparseQpResult.x, 2)} inf-norm ${norm(owlqnResult.x - sparseQpResult.x, inf)}")
Expand Down
Loading