Initial checkin for PowerMethod and PowerMethodTest;Eigen value extra…

…ction code in BFGS using PowerMethod; Fixes in NonlinearMinimizer skeleton
scalanlp · dlwh · Mar 12, 2015 · Jan 30, 2015 · Jan 31, 2015 · Feb 2, 2015
commit 536886d4a706ae78053de5cfff0fceeda5d42e80
diff --git a/math/src/main/scala/breeze/optimize/LBFGS.scala b/math/src/main/scala/breeze/optimize/LBFGS.scala
@@ -19,6 +19,9 @@ package breeze.optimize
 import breeze.linalg._
 import breeze.linalg.operators.OpMulMatrix
 import breeze.math.MutableInnerProductModule
+import breeze.optimize.linear.PowerMethod
+import breeze.optimize.proximal.QuadraticMinimizer
+import breeze.stats.distributions.Rand
 import breeze.util.SerializableLogging
 
 /**
@@ -85,23 +88,34 @@ class LBFGS[T](maxIter: Int = -1, m: Int=10, tolerance: Double=1E-9)
    * specified through DiffFunction through power iteration on ApproximateInverseHessian,
    * get the largest eigenvalue e and return 1/e
    *
+   * @param n the size of the problem, can be infered from memStep/memGradDelta
    * @param state the current state of the optimization
-   * @return maximum eigen value
+   * @return minimium eigen eigen value
    */
-  protected def minEigen(state: State) : Double = {
-    ???
+  protected def minEigen(n: Int, state: State) : Double = {
+    val pm = new PowerMethod[DenseVector[Double], LBFGS.ApproximateInverseHessian[T]]
+    val init = DenseVector.rand[Double](n, Rand.gaussian(0, 1))
+    val eigenMax = pm.eigen(init, state.history)
+    1.0/eigenMax
   }
 
   /**
    * Get the maximum approximate eigen value of the quadratic approximation convex function specified
    * through DiffFunction by doing a inverse power iteration using the CompactHessian representation
    * generated from ApproximateInverseHessian, get the largest eigenvalue e
    *
+   * @param n the size of the problem, can be infered from memStep/memGradDelta
    * @param state the current state of the optimization
-   * @return minimum eigen value
+   * @return maximum eigen value
    */
-  protected def maxEigen(state: State) : Double = {
-    ???
+  protected def maxEigen(n: Int, state: State, init: DenseVector[Double]) : Double = {
+    val compactHessian = new CompactHessian(state.history.m)
+    val memStep = state.history.memStep.asInstanceOf[Seq[DenseVector[Double]]]
+    val memGradDelta = state.history.memGradDelta.asInstanceOf[Seq[DenseVector[Double]]]
+    (0 to state.history.historyLength).map{i => compactHessian.updated(memStep(i), memGradDelta(i))}
+    val init = DenseVector.rand[Double](n, Rand.gaussian(0, 1))
+    val pm = new PowerMethod[DenseVector[Double], CompactHessian]
+    pm.eigen(init, compactHessian)
   }
 }
 
@@ -162,12 +176,10 @@ object LBFGS {
     }
   }
 
-
   implicit def multiplyInverseHessian[T](implicit vspace: MutableInnerProductModule[T, Double]):OpMulMatrix.Impl2[ApproximateInverseHessian[T], T, T] = {
     new OpMulMatrix.Impl2[ApproximateInverseHessian[T], T, T] {
       def apply(a: ApproximateInverseHessian[T], b: T): T = a * b
     }
-
   }
 
 }

diff --git a/math/src/main/scala/breeze/optimize/ProjectedQuasiNewton.scala b/math/src/main/scala/breeze/optimize/ProjectedQuasiNewton.scala
@@ -2,7 +2,7 @@ package breeze.optimize
 
 import breeze.linalg._
 import breeze.collection.mutable.RingBuffer
-import breeze.math.{MutableInnerProductModule, MutableVectorField}
+import breeze.math.{MutableInnerProductModule}
 import breeze.util.SerializableLogging
 
 // Compact representation of an n x n Hessian, maintained via L-BFGS updates

diff --git a/math/src/main/scala/breeze/optimize/linear/PowerMethod.scala b/math/src/main/scala/breeze/optimize/linear/PowerMethod.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package breeze.optimize.linear
+
+import breeze.linalg.operators.OpMulMatrix
+import breeze.math.MutableInnerProductVectorSpace
+import breeze.numerics.abs
+import breeze.util.SerializableLogging
+import breeze.linalg.norm
+import breeze.util.Implicits._
+
+/**
+ * Created by debasish83 on 2/3/15.
+ */
+class PowerMethod[T, M](maxIterations: Int = 10,tolerance: Double = 1E-5)
+                       (implicit space: MutableInnerProductVectorSpace[T, Double], mult: OpMulMatrix.Impl2[M, T, T]) extends SerializableLogging {
+
+  import space._
+
+  case class State(eigenValue: Double, eigenVector: T, iter: Int, converged: Boolean)
+
+  def initialState(y: T, A: M): State = {
+    //Force y to be a vector of unit norm
+    val normInit = norm(y)
+    y *= 1.0 / normInit
+    val ay = mult(A, y)
+    val lambda = y dot ay
+
+    y := ay
+    y *= norm(ay)
+    if (lambda < 0.0) y *= -1.0
+    State(lambda, y, 0, false)
+  }
+
+  def iterations(y: T,
+                 A: M): Iterator[State] = Iterator.iterate(initialState(y, A)) { state =>
+    import state._
+    val ay = mult(A, y)
+    val lambda = y dot ay
+    val norm1 = norm(ay)
+    ay *= 1.0 / norm1
+    if (lambda < 0.0) ay *= -1.0
+
+    val val_dif = abs(lambda - eigenValue)
+    if (val_dif <= tolerance || iter > maxIterations) State(lambda, ay, iter + 1, true)
+    else State(lambda, ay/lambda, iter + 1, false)
+  }.takeUpToWhere(_.converged)
+
+  def iterateAndReturnState(y: T, A: M): State = {
+    iterations(y, A).last
+  }
+
+  def eigen(y: T, A: M): Double = {
+    iterateAndReturnState(y, A).eigenValue
+  }
+}
diff --git a/math/src/main/scala/breeze/optimize/proximal/NonlinearMinimizer.scala b/math/src/main/scala/breeze/optimize/proximal/NonlinearMinimizer.scala
@@ -17,11 +17,14 @@
 
 package breeze.optimize.proximal
 
-import breeze.linalg.{DenseVector, norm}
+import breeze.linalg.{DenseMatrix, DenseVector, norm}
+import breeze.numerics._
 import breeze.optimize.{DiffFunction, LBFGS}
 import breeze.stats.distributions.Rand
-
+import breeze.optimize.proximal.Constraint._
 import scala.math._
+import scala.math.pow
+import scala.math.sqrt
 
 /**
  * Created by debasish83 on 12/11/14.
@@ -31,25 +34,25 @@ import scala.math._
  * It solves the problem that has the following structure
  * minimize f(x) + g(x)
  *
- *
  * g(x) represents the following constraints
  *
  * 1. x >= 0
  * 2. lb <= x <= ub
  * 3. L1(x)
  * 4. Aeq*x = beq
  * 5. aeq'x = beq
- * 6. 1'x = 1, x >= 0 which is called ProbabilitySimplex from the following reference
- *    Proximal Algorithms by Boyd et al.
+ * 6. 1'x = 1, x >= 0 ProbabilitySimplex from the reference Proximal Algorithms by Boyd et al.
  *
  * f(x) can be either a convex or a non-linear function.
  *
  * For convex functions we will use B matrix with 7/10 columns
  *
- * For non-linear functions we will experiment with B matrix with 1 column to mimic a CG solver
+ * TO DO : For non-linear functions we will experiment with TRON-like Primal solver
  */
 
-class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance: Double=1E-9) {
+class NonlinearMinimizer(ndim: Int,
+                         proximal: Proximal = null,
+                         maxIters: Int = -1, m: Int = 10, tolerance: Double=1E-4) {
   type BDV = DenseVector[Double]
 
   case class State(x: BDV, u: BDV, z: BDV, iterations: Int, converged: Boolean)
@@ -64,39 +67,21 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance:
                             rho: Double) extends DiffFunction[DenseVector[Double]] {
     override def calculate(x: DenseVector[Double]) = {
       val (f, g) = primal.calculate(x)
-      val proxObj = f + u.dot(x - z) + 0.5 * rho * norm(x - z, 2)
+      val proxObj = f + u.dot(x - z) + 0.5 * rho * pow(norm(x - z), 2)
       val proxGrad = g + u + (x - z):*rho
       (proxObj, proxGrad)
     }
   }
 
+  //TO DO : alpha needs to be scaled based on Nesterov's acceleration
   val alpha: Double = 1.0
-  val rho: Double = 1.0
 
   val ABSTOL = 1e-8
   val RELTOL = 1e-4
-
-  var proximal: Proximal = null
-
-  //TO DO : This can take a proximal function as input
-  //TO DO : alpha needs to be scaled based on Nesterov's acceleration
-  def setProximal(proximal: Proximal): NonlinearMinimizer = {
-    this.proximal = proximal
-    this
-  }
-
-  var lambda: Double = 1.0
-
-  /*Regularization for Elastic Net */
-  def setLambda(lambda: Double): NonlinearMinimizer = {
-    this.lambda = lambda
-    this
-  }
-
   val innerIters = 10
 
-  def iterations(primal: DiffFunction[DenseVector[Double]]) : State = {
-
+  def iterations(primal: DiffFunction[DenseVector[Double]],
+                 rho: Double = 1.0) : State = {
     val iters = if (proximal == null) maxIters else innerIters
     val lbfgs = new LBFGS[DenseVector[Double]](iters, m, tolerance)
     val init = DenseVector.rand[Double](ndim, Rand.gaussian(0, 1))
@@ -137,7 +122,7 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance:
       z += u
 
       //Apply proximal operator
-      proximal.prox(z, lambda/rho)
+      proximal.prox(z, rho)
 
       //z has proximal(x_hat)
 
@@ -184,25 +169,70 @@ class NonlinearMinimizer(ndim: Int, maxIters: Int = -1, m: Int = 10, tolerance:
 
 object NonlinearMinimizer {
   def apply(ndim: Int, constraint: Constraint, lambda: Double): NonlinearMinimizer = {
-    val minimizer = new NonlinearMinimizer(ndim)
     constraint match {
-      case POSITIVE => minimizer.setProximal(ProjectPos())
-      case BOUNDS => {
+      case SMOOTH => new NonlinearMinimizer(ndim)
+      case POSITIVE => new NonlinearMinimizer(ndim, ProjectPos())
+      case BOX => {
         val lb = DenseVector.zeros[Double](ndim)
         val ub = DenseVector.ones[Double](ndim)
-        minimizer.setProximal(ProjectBox(lb, ub))
+        new NonlinearMinimizer(ndim, ProjectBox(lb, ub))
       }
       case EQUALITY => {
         val aeq = DenseVector.ones[Double](ndim)
-        val beq = 1.0
-        minimizer.setProximal(ProjectHyperPlane(aeq, beq))
+        new NonlinearMinimizer(ndim, ProjectHyperPlane(aeq, 1.0))
       }
-      case SPARSE => minimizer.setProximal(ProximalL1())
+      case SPARSE => new NonlinearMinimizer(ndim, ProximalL1().setLambda(lambda))
       //TO DO: ProximalSimplex : for PLSA
     }
   }
 
   def main(args: Array[String]) {
-    ???
+    if (args.length < 4) {
+      println("Usage: NonlinearMinimizer n m lambda beta")
+      println("Test NonlinearMinimizer with a quadratic function of dimenion n and m equalities with lambda beta for elasticNet")
+      sys.exit(1)
+    }
+
+    val problemSize = args(0).toInt
+    val nequalities = args(1).toInt
+
+    val lambda = args(2).toDouble
+    val beta = args(3).toDouble
+
+    println(s"Generating randomized QPs with rank ${problemSize} equalities ${nequalities}")
+    val (aeq, b, bl, bu, q, h) = QpGenerator(problemSize, nequalities)
+
+    val qpSolver = new QuadraticMinimizer(problemSize)
+    val qpStart = System.nanoTime()
+    val qpResult = qpSolver.minimize(h, q)
+    val qpTime = System.nanoTime() - qpStart
+
+    val nlStart = System.nanoTime()
+    val nlResult = NonlinearMinimizer(problemSize, SMOOTH, 0.0).minimize(QuadraticMinimizer.Cost(h, q))
+    val nlTime = System.nanoTime() - nlStart
+
+    println(s"||qp - nl|| norm ${norm(qpResult - nlResult, 2)} max-norm ${norm(qpResult - nlResult, inf)}")
+
+    val qpObj = QuadraticMinimizer.computeObjective(h, q, qpResult)
+    val nlObj = QuadraticMinimizer.computeObjective(h, q, nlResult)
+    println(s"Objective qp $qpObj nl $nlObj")
+
+    println(s"dim ${problemSize} qp ${qpTime/1e6} ms nl ${nlTime/1e6} ms")
+
+    val lambdaL1 = lambda * beta
+    val lambdaL2 = lambda * (1 - beta)
+
+    val regularizedGram = h + (DenseMatrix.eye[Double](h.rows) :* lambdaL2)
+
+    val nlSparseStart = System.nanoTime()
+    val nlSparseResult = NonlinearMinimizer(problemSize, SPARSE, lambdaL1).iterations(QuadraticMinimizer.Cost(regularizedGram,q))
+    val nlSparseTime = System.nanoTime() - nlSparseStart
+
+    val owlqnStart = System.nanoTime()
+    val owlqnResult = QuadraticMinimizer.optimizeWithOWLQN(DenseVector.rand[Double](problemSize), regularizedGram, q, lambdaL1)
+    val owlqnTime = System.nanoTime() - owlqnStart
+
+    println(s"||owlqn - sparseqp|| norm ${norm(owlqnResult.x - nlSparseResult.x, 2)} inf-norm ${norm(owlqnResult.x - nlSparseResult.x, inf)}")
+    println(s"nlSparse ${nlSparseTime/1e6} ms iters ${nlSparseResult.iterations} owlqn ${owlqnTime/1e6} ms iters ${owlqnResult.iter}")
   }
 }
diff --git a/math/src/main/scala/breeze/optimize/proximal/QuadraticMinimizer.scala b/math/src/main/scala/breeze/optimize/proximal/QuadraticMinimizer.scala
@@ -370,6 +370,7 @@ object QuadraticMinimizer {
             constraint: Constraint,
             lambda: Double): QuadraticMinimizer = {
     constraint match {
+      case SMOOTH => new QuadraticMinimizer(rank)
       case POSITIVE => new QuadraticMinimizer(rank, ProjectPos())
       case BOX => {
         //Direct QP with bounds
@@ -392,16 +393,26 @@ object QuadraticMinimizer {
     res
   }
 
+  case class Cost(H: DenseMatrix[Double],
+                  q: DenseVector[Double]) extends DiffFunction[DenseVector[Double]] {
+    def calculate(x: DenseVector[Double]) = {
+      (computeObjective(H, q, x), H * x + q)
+    }
+  }
+
   def optimizeWithLBFGS(init: DenseVector[Double],
                          H: DenseMatrix[Double],
                          q: DenseVector[Double]) = {
     val lbfgs = new LBFGS[DenseVector[Double]](-1, 7)
-    val f = new DiffFunction[DenseVector[Double]] {
-      def calculate(x: DenseVector[Double]) = {
-        (computeObjective(H, q, x), H * x + q)
-      }
-    }
-    lbfgs.minimize(f, init)
+    lbfgs.minimize(Cost(H, q), init)
+  }
+
+  def optimizeWithOWLQN(init: DenseVector[Double],
+                        regularizedGram: DenseMatrix[Double],
+                        q: DenseVector[Double],
+                        lambdaL1: Double) = {
+    val owlqn = new OWLQN[Int, DenseVector[Double]](-1, 7, lambdaL1, 1e-6)
+    owlqn.minimizeAndReturnState(Cost(regularizedGram, q), init)
   }
 
   def main(args: Array[String]) {
@@ -458,24 +469,13 @@ object QuadraticMinimizer {
 
     val regularizedGram = h + (DenseMatrix.eye[Double](h.rows) :* lambdaL2)
 
-    val owlqn = new OWLQN[Int, DenseVector[Double]](-1, 7, lambdaL1)
-
-    def optimizeWithOWLQN(init: DenseVector[Double]) = {
-      val f = new DiffFunction[DenseVector[Double]] {
-        def calculate(x: DenseVector[Double]) = {
-          (computeObjective(regularizedGram, q, x), regularizedGram * x + q)
-        }
-      }
-      owlqn.minimizeAndReturnState(f, init)
-    }
-
     val sparseQp = QuadraticMinimizer(h.rows, SPARSE, lambdaL1)
     val sparseQpStart = System.nanoTime()
     val sparseQpResult = sparseQp.minimizeAndReturnState(regularizedGram, q)
     val sparseQpTime = System.nanoTime() - sparseQpStart
 
     val startOWLQN = System.nanoTime()
-    val owlqnResult = optimizeWithOWLQN(DenseVector.rand[Double](problemSize))
+    val owlqnResult = optimizeWithOWLQN(DenseVector.rand[Double](problemSize), regularizedGram, q, lambdaL1)
     val owlqnTime = System.nanoTime() - startOWLQN
 
     println(s"||owlqn - sparseqp|| norm ${norm(owlqnResult.x - sparseQpResult.x, 2)} inf-norm ${norm(owlqnResult.x - sparseQpResult.x, inf)}")