scalanlp · dlwh · Mar 12, 2015 · Jan 30, 2015 · Jan 31, 2015 · Feb 2, 2015
diff --git a/NOTICE b/NOTICE
@@ -2,7 +2,7 @@ Breeze is distributed under an Apache License V2.0 (See LICENSE)
 
 ===============================================================================
 
-Proximal algorithms outlined in Proximal.scala (package breeze.optimize.quadratic)
+Proximal algorithms outlined in Proximal.scala (package breeze.optimize.proximal)
 are based on https://github.com/cvxgrp/proximal (see LICENSE for details) and distributed with 
 Copyright (c) 2014 by Debasish Das (Verizon), all rights reserved.
 
@@ -11,3 +11,7 @@ Copyright (c) 2014 by Debasish Das (Verizon), all rights reserved.
 QuadraticMinimizer class in package breeze.optimize.proximal is distributed with Copyright (c)
 2014, Debasish Das (Verizon), all rights reserved.
 
+===============================================================================
+
+NonlinearMinimizer class in package breeze.optimize.proximal is distributed with Copyright (c)
+2015, Debasish Das (Verizon), all rights reserved.
diff --git a/README.md b/README.md
@@ -100,7 +100,7 @@ Contributions from:
 * Chris Stucchio (@stucchio)
 * Xiangrui Meng (@mengxr)
 * Gabriel Schubiner (@gabeos)
-
+* Debasish Das (@debasish83)
 
 
 And others (contact David Hall if you've contributed code and aren't listed).
diff --git a/math/src/main/scala/breeze/linalg/operators/DenseMatrixOps.scala b/math/src/main/scala/breeze/linalg/operators/DenseMatrixOps.scala
@@ -124,7 +124,7 @@ trait DenseMatrixMultiplyStuff extends DenseMatrixOps
         // square: LUSolve
         val X = DenseMatrix.zeros[Double](V.rows, V.cols)
         X := V
-        LUSolve(X,A)
+        LUSolve(X, A)
         X
       } else {
         // non-square: QRSolve
@@ -137,15 +137,13 @@ trait DenseMatrixMultiplyStuff extends DenseMatrixOps
     /** X := A \ X, for square A */
     def LUSolve(X: DenseMatrix[Double], A: DenseMatrix[Double]): DenseMatrix[Double] = {
 
-      require(X.offset == 0)
-      require(A.offset == 0)
       val piv = new Array[Int](A.rows)
       val newA = A.copy
       assert(!newA.isTranspose)
 
       val info: Int = {
         val info = new intW(0)
-        lapack.dgesv(A.rows, X.cols, newA.data, newA.majorStride, piv, X.data, X.majorStride, info)
+        lapack.dgesv(A.rows, X.cols, newA.data, newA.offset, newA.majorStride, piv, 0, X.data, X.offset, X.majorStride, info)
         info.`val`
       }
 

diff --git a/math/src/main/scala/breeze/linalg/support/CanMapValues.scala b/math/src/main/scala/breeze/linalg/support/CanMapValues.scala
@@ -51,14 +51,14 @@ trait CanMapValuesLowPrio {
 object CanMapValues extends CanMapValuesLowPrio {
   class HandHold[From, ValueType]
 
-  /*
-  implicit def canMapSelf[V, V2]: CanMapValues[V, V, V2, V2] = {
-    new CanMapValues[V, V, V2, V2] {
-      def map(from: V, fn: (V) => V2) = fn(from)
-      def mapActive(from: V, fn: (V) => V2) = fn(from)
-    }
-  }
-  */
+  implicit def canMapSelfDouble[V2]: CanMapValues[Double, Double, V2, V2] = canMapSelf[Double, V2]
+  implicit def canMapSelfInt[V2]: CanMapValues[Int, Int, V2, V2] = canMapSelf[Int, V2]
+  implicit def canMapSelfFloat[V2]: CanMapValues[Float, Float, V2, V2] = canMapSelf[Float, V2]
+  implicit def canMapSelfLong[V2]: CanMapValues[Long, Long, V2, V2] = canMapSelf[Long, V2]
+  implicit def canMapSelfShort[V2]: CanMapValues[Short, Short, V2, V2] = canMapSelf[Short, V2]
+  implicit def canMapSelfByte[V2]: CanMapValues[Byte, Byte, V2, V2] = canMapSelf[Byte, V2]
+  implicit def canMapSelfChar[V2]: CanMapValues[Char, Char, V2, V2] = canMapSelf[Char, V2]
+
 
   type Op[From, A, B, To] = CanMapValues[From, A, B, To]
 

diff --git a/math/src/main/scala/breeze/optimize/BacktrackingLineSearch.scala b/math/src/main/scala/breeze/optimize/BacktrackingLineSearch.scala
@@ -8,7 +8,8 @@ package breeze.optimize
  *
  * @author dlwh
  */
-class BacktrackingLineSearch(maxIterations: Int = 20,
+class BacktrackingLineSearch(initfval: Double,
+                             maxIterations: Int = 20,
                              shrinkStep: Double = 0.5,
                              growStep: Double = 2.1,
                              cArmijo: Double = 1E-4,
@@ -24,7 +25,8 @@ class BacktrackingLineSearch(maxIterations: Int = 20,
   require(cWolfe < 1.0)
   def iterations(f: DiffFunction[Double], init: Double = 1.0): Iterator[State] = {
     val (f0, df0) = f.calculate(0.0)
-    val (initfval, initfderiv) = f.calculate(init)
+    val initfderiv = f.calculate(init)._2
+    //val (initfval, initfderiv) = f.calculate(init)
     Iterator.iterate( (State(init, initfval, initfderiv), false, 0)) { case (state@State(alpha, fval, fderiv), _, iter) =>
       val multiplier =  if(fval > f0 + alpha * df0 * cArmijo) {
         shrinkStep

diff --git a/math/src/main/scala/breeze/optimize/FirstOrderMinimizer.scala b/math/src/main/scala/breeze/optimize/FirstOrderMinimizer.scala
@@ -1,7 +1,7 @@
 package breeze.optimize
 
 import breeze.linalg.norm
-import breeze.math.{MutableEnumeratedCoordinateField, MutableCoordinateField, MutableFiniteCoordinateField, NormedModule}
+import breeze.math.{MutableEnumeratedCoordinateField, MutableFiniteCoordinateField, NormedModule}
 import breeze.optimize.FirstOrderMinimizer.ConvergenceReason
 import breeze.stats.distributions.{RandBasis, ThreadLocalRandomGenerator}
 import breeze.util.Implicits._
@@ -108,10 +108,11 @@ abstract class FirstOrderMinimizer[T, DF<:StochasticDiffFunction[T]](maxIter: In
      f.calculate(x)
   }
 
-  def infiniteIterations(f: DF, init: T): Iterator[State] = {
+  def infiniteIterations(f: DF, state: State): Iterator[State] = {
     var failedOnce = false
     val adjustedFun = adjustFunction(f)
-    Iterator.iterate(initialState(adjustedFun,init)) { state => try {
+
+    Iterator.iterate(state) { state => try {
         val dir = chooseDescentDirection(state, adjustedFun)
         val stepSize = determineStepSize(state, adjustedFun, dir)
         logger.info(f"Step Size: $stepSize%.4g")
@@ -141,7 +142,8 @@ abstract class FirstOrderMinimizer[T, DF<:StochasticDiffFunction[T]](maxIter: In
   }
 
   def iterations(f: DF, init: T): Iterator[State] = {
-    infiniteIterations(f, init).takeUpToWhere(_.converged)
+    val adjustedFun = adjustFunction(f)
+    infiniteIterations(f, initialState(adjustedFun, init)).takeUpToWhere(_.converged)
   }
 
   def minimize(f: DF, init: T): T = {

diff --git a/math/src/main/scala/breeze/optimize/LBFGS.scala b/math/src/main/scala/breeze/optimize/LBFGS.scala
@@ -19,12 +19,12 @@ package breeze.optimize
 import breeze.linalg._
 import breeze.linalg.operators.OpMulMatrix
 import breeze.math.MutableInnerProductModule
+import breeze.optimize.linear.PowerMethod
 import breeze.util.SerializableLogging
 
-
 /**
  * Port of LBFGS to Scala.
- * 
+ *
  * Special note for LBFGS:
  *  If you use it in published work, you must cite one of:
  *     * J. Nocedal. Updating  Quasi-Newton  Matrices  with  Limited  Storage
@@ -80,7 +80,6 @@ class LBFGS[T](maxIter: Int = -1, m: Int=10, tolerance: Double=1E-9)
       throw new StepSizeUnderflow
     alpha
   }
-
 }
 
 object LBFGS {
@@ -140,13 +139,10 @@ object LBFGS {
     }
   }
 
-
   implicit def multiplyInverseHessian[T](implicit vspace: MutableInnerProductModule[T, Double]):OpMulMatrix.Impl2[ApproximateInverseHessian[T], T, T] = {
     new OpMulMatrix.Impl2[ApproximateInverseHessian[T], T, T] {
       def apply(a: ApproximateInverseHessian[T], b: T): T = a * b
     }
-
   }
-
 }
 
diff --git a/math/src/main/scala/breeze/optimize/OWLQN.scala b/math/src/main/scala/breeze/optimize/OWLQN.scala
@@ -70,7 +70,7 @@ class OWLQN[K, T](maxIter: Int, m: Int, l1reg: K => Double, tolerance: Double)(i
          adjv -> (adjgrad dot dir)
        }
     }
-    val search = new BacktrackingLineSearch(shrinkStep= if(iter < 1) 0.1 else 0.5)
+    val search = new BacktrackingLineSearch(state.value, shrinkStep= if(iter < 1) 0.1 else 0.5)
     val alpha = search.minimize(ff, if(iter < 1) .5/norm(state.grad) else 1.0)
 
     alpha

diff --git a/math/src/main/scala/breeze/optimize/ProjectedQuasiNewton.scala b/math/src/main/scala/breeze/optimize/ProjectedQuasiNewton.scala
@@ -1,8 +1,24 @@
 package breeze.optimize
 
+/*
+ Copyright 2015 David Hall, Debasish Das
+
+ Licensed under the Apache License, Version 2.0 (the "License")
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
 import breeze.linalg._
 import breeze.collection.mutable.RingBuffer
-import breeze.math.{MutableInnerProductModule, MutableVectorField}
+import breeze.math.MutableInnerProductModule
 import breeze.util.SerializableLogging
 
 // Compact representation of an n x n Hessian, maintained via L-BFGS updates
@@ -66,32 +82,32 @@ class ProjectedQuasiNewton(tolerance: Double = 1e-6,
                            val m: Int = 10,
                            val initFeas: Boolean = false,
                            val testOpt: Boolean = true,
-                           val maxNumIt: Int = 500,
+                           maxIter: Int = -1,
                            val maxSrchIt: Int = 50,
                            val gamma: Double = 1e-4,
                            val projection: DenseVector[Double] => DenseVector[Double] = identity)
                           (implicit space: MutableInnerProductModule[DenseVector[Double],Double])
-  extends FirstOrderMinimizer[DenseVector[Double], DiffFunction[DenseVector[Double]]](maxIter = maxNumIt, tolerance = tolerance) with Projecting[DenseVector[Double]] with SerializableLogging {
-  val innerOptimizer = new SpectralProjectedGradient[DenseVector[Double], DiffFunction[DenseVector[Double]]](
-    testOpt = true,
+  extends FirstOrderMinimizer[DenseVector[Double], DiffFunction[DenseVector[Double]]](maxIter = maxIter, tolerance = tolerance) with Projecting[DenseVector[Double]] with SerializableLogging {
+  type BDV = DenseVector[Double]
+
+  val innerOptimizer = new SpectralProjectedGradient[BDV](
     tolerance = tolerance,
-    maxIter = 500,
+    maxIter = 50,
+    bbMemory = 5,
     initFeas = true,
     minImprovementWindow = 10,
     projection = projection
   )
 
   type History = CompactHessian
 
-
   protected def initialHistory(f: DiffFunction[DenseVector[Double]], init: DenseVector[Double]): History = {
     new CompactHessian(m)
   }
 
-  override protected def adjust(newX: DenseVector[Double], newGrad: DenseVector[Double], newVal: Double):(Double,DenseVector[Double]) = (newVal,-projectedVector(newX, -newGrad))
+  override protected def adjust(newX: DenseVector[Double], newGrad: DenseVector[Double], newVal: Double):(Double,DenseVector[Double]) = (newVal,projectedVector(newX, -newGrad))
 
   private def computeGradient(x: DenseVector[Double], g: DenseVector[Double]): DenseVector[Double] = projectedVector(x, -g)
-  private def computeGradientNorm(x: DenseVector[Double], g: DenseVector[Double]): Double = norm(computeGradient(x, g),Double.PositiveInfinity)
 
   protected def chooseDescentDirection(state: State, fn: DiffFunction[DenseVector[Double]]): DenseVector[Double] = {
     import state._
@@ -101,72 +117,54 @@ class ProjectedQuasiNewton(tolerance: Double = 1e-6,
       // Update the limited-memory BFGS approximation to the Hessian
       //B.update(y, s)
       // Solve subproblem; we use the current iterate x as a guess
-      val subprob = new ProjectedQuasiNewton.QuadraticSubproblem(fn, state.adjustedValue, x, grad, history)
-      val p = innerOptimizer.minimize(new CachedDiffFunction(subprob), x)
-      p - x
+      val subprob = new ProjectedQuasiNewton.QuadraticSubproblem(state.adjustedValue, x, grad, history)
+      val spgResult = innerOptimizer.minimizeAndReturnState(new CachedDiffFunction(subprob), x)
+      logger.info(f"ProjectedQuasiNewton: outerIter ${state.iter} innerIters ${spgResult.iter}")
+      spgResult.x - x
       //	time += subprob.time
     }
   }
 
-
-  protected def determineStepSize(state: State, fn: DiffFunction[DenseVector[Double]], dir: DenseVector[Double]): Double = {
-    if (state.iter == 0)
-      return scala.math.min(1.0, 1.0 / norm(state.grad,1.0))
-    val dirnorm = norm(dir, Double.PositiveInfinity)
-    if(dirnorm < 1E-10) return 0.0
-    import state._
-    // Backtracking line-search
-    var accepted = false
-    var lambda = 1.0
-    val gTd = grad dot dir
-    var srchit = 0
-
-    do {
-      val candx = x + dir * lambda
-      val candf = fn.valueAt(candx)
-      val suffdec = gamma * lambda * gTd
-
-      if (testOpt && srchit > 0) {
-        logger.debug(f"PQN:    SrchIt $srchit%4d: f $candf%-10.4f t $lambda%-10.4f\n")
-      }
-
-      if (candf < state.adjustedValue + suffdec) {
-        accepted = true
-      } else if (srchit >= maxSrchIt) {
-        accepted = true
-      } else {
-        lambda *= 0.5
-        srchit = srchit + 1
-      }
-    } while (!accepted)
-
-    if (srchit >= maxSrchIt) {
-      logger.info("PQN: Line search cannot make further progress")
-      throw new LineSearchFailed(norm(state.grad,Double.PositiveInfinity), norm(dir, Double.PositiveInfinity))
-    }
-    lambda
+  /**
+   * Given a direction, perform a Strong Wolfe Line Search
+   *
+   * TO DO: Compare performance with Cubic Interpolation based line search from Mark's PQN paper
+   *
+   * @param state the current state
+   * @param f The objective
+   * @param dir The step direction
+   * @return stepSize
+   */
+  protected def determineStepSize(state: State, f: DiffFunction[DenseVector[Double]], dir: DenseVector[Double]) = {
+    val x = state.x
+    val grad = state.grad
+
+    val ff = LineSearch.functionFromSearchDirection(f, x, dir)
+    val search = new BacktrackingLineSearch(state.value, maxIterations = maxSrchIt, shrinkStep= if(state.iter < 1) 0.1 else 0.5)
+    var alpha = if(state.iter == 0.0) min(1.0, 1.0/norm(dir)) else 1.0
+    alpha = search.minimize(ff, alpha)
+
+    if(alpha * norm(grad) < 1E-10) throw new StepSizeUnderflow
+
+    alpha
   }
 
-
   protected def takeStep(state: State, dir: DenseVector[Double], stepSize: Double): DenseVector[Double] = {
     projection(state.x + dir * stepSize)
   }
 
-
   protected def updateHistory(newX: DenseVector[Double], newGrad: DenseVector[Double], newVal: Double,  f: DiffFunction[DenseVector[Double]], oldState: State): History = {
     import oldState._
-    val s = newX - oldState.x
-    val y = newGrad - oldState.grad
+    val s = newX - x
+    val y = newGrad - grad
     oldState.history.updated(y, s)
   }
-
 }
 
-object ProjectedQuasiNewton {
+object ProjectedQuasiNewton extends SerializableLogging {
   // Forms a quadratic model around fun, the argmin of which is then a feasible
   // quasi-Newton descent direction
-  class QuadraticSubproblem(fun: DiffFunction[DenseVector[Double]],
-                            fk: Double,
+  class QuadraticSubproblem(fk: Double,
                             xk: DenseVector[Double],
                             gk: DenseVector[Double],
                             B: CompactHessian) extends DiffFunction[DenseVector[Double]] {

diff --git a/math/src/main/scala/breeze/optimize/Projecting.scala b/math/src/main/scala/breeze/optimize/Projecting.scala
@@ -1,6 +1,6 @@
 package breeze.optimize
 
-import breeze.math.{Module, NormedVectorSpace}
+import breeze.math.{Module}
 
 trait Projecting[T] {
   def projection: T => T