add scala example and fix error prompt in include_example

apache · yinxusen · Jun 30, 2016 · Jun 30, 2016 · Jun 30, 2016 · Jun 30, 2016
commit 4fa23b4fb74e252bc9b8cd9cda0f1453752639bd
diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
@@ -85,20 +85,20 @@ def select_lines(code)
         .select { |l, i| l.include? "$example off#{@snippet_label}$" }
         .map { |l, i| i }
 
-      raise "Start indices amount is not equal to end indices amount, see #{@file}." \
+      raise "Start indices amount is not equal to end indices amount, see #{@file}, #{@snippet_label}." \
         unless startIndices.size == endIndices.size
 
-      raise "No code is selected by include_example, see #{@file}." \
+      raise "No code is selected by include_example, see #{@file}, #{@snippet_label}." \
         if startIndices.size == 0
 
       # Select and join code blocks together, with a space line between each of two continuous
       # blocks.
       lastIndex = -1
       result = ""
       startIndices.zip(endIndices).each do |start, endline|
-        raise "Overlapping between two example code blocks are not allowed, see #{@file}." \
+        raise "Overlapping between two example code blocks are not allowed, see #{@file}, #{@snippet_label}." \
             if start <= lastIndex
-        raise "$example on$ should not be in the same line with $example off$, see #{@file}." \
+        raise "$example on$ should not be in the same line with $example off$, see #{@file}, #{@snippet_label}." \
             if start == endline
         lastIndex = endline
         range = Range.new(start + 1, endline - 1)

diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
@@ -35,16 +35,7 @@ using the factory methods implemented in
 
 Refer to the [`Vector` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and [`Vectors` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
-
-// Create a dense vector (1.0, 0.0, 3.0).
-val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
-val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
-val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
-{% endhighlight %}
+{% include_example local_vector scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
 ***Note:***
 Scala imports `scala.collection.immutable.Vector` by default, so you have to import
@@ -127,16 +118,8 @@ A labeled point is represented by the case class
 
 Refer to the [`LabeledPoint` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.LabeledPoint
+{% include_example labeled_point scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-// Create a labeled point with a positive label and a dense feature vector.
-val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
-
-// Create a labeled point with a negative label and a sparse feature vector.
-val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -201,13 +184,8 @@ examples stored in LIBSVM format.
 
 Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
+{% include_example libsvm scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -266,15 +244,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order.
 
 Refer to the [`Matrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix) and [`Matrices` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.{Matrix, Matrices}
+{% include_example local_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
-
-// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
-val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8))
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -369,21 +340,8 @@ For [singular value decomposition (SVD)](https://en.wikipedia.org/wiki/Singular_
 
 Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
-
-val rows: RDD[Vector] = ... // an RDD of local vectors
-// Create a RowMatrix from an RDD[Vector].
-val mat: RowMatrix = new RowMatrix(rows)
+{% include_example row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// QR decomposition 
-val qrResult = mat.tallSkinnyQR(true)
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -456,20 +414,8 @@ its row indices.
 
 Refer to the [`IndexedRowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
+{% include_example indexed_row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-val rows: RDD[IndexedRow] = ... // an RDD of indexed rows
-// Create an IndexedRowMatrix from an RDD[IndexedRow].
-val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
-
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// Drop its row indices.
-val rowMat: RowMatrix = mat.toRowMatrix()
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -562,20 +508,8 @@ with sparse rows by calling `toIndexedRowMatrix`.  Other computations for
 
 Refer to the [`CoordinateMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
+{% include_example coordinate_row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries
-// Create a CoordinateMatrix from an RDD[MatrixEntry].
-val mat: CoordinateMatrix = new CoordinateMatrix(entries)
-
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// Convert it to an IndexRowMatrix whose rows are sparse vectors.
-val indexedRowMatrix = mat.toIndexedRowMatrix()
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -670,22 +604,8 @@ Users may change the block size by supplying the values through `toBlockMatrix(r
 
 Refer to the [`BlockMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) for details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
-
-val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries
-// Create a CoordinateMatrix from an RDD[MatrixEntry].
-val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
-// Transform the CoordinateMatrix to a BlockMatrix
-val matA: BlockMatrix = coordMat.toBlockMatrix().cache()
-
-// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
-// Nothing happens if it is valid.
-matA.validate()
+{% include_example block_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}
 
-// Calculate A^T A.
-val ata = matA.transpose.multiply(matA)
-{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">

diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DataTypesExamples.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.{Matrices, Matrix}
+// $example on:local-vector$
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+// $example off:local-vector$
+import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
+import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+
+
+object DataTypesExamples {
+
+  def localVectorExample(): Unit = {
+    // $example on:local-vector$
+    // Create a dense vector (1.0, 0.0, 3.0).
+    val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
+    // Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to
+    // nonzero entries.
+    val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
+    // Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
+    val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
+    // $example off:local-vector$
+  }
+
+  def labeledPointExample(): Unit = {
+    // $example on:labeled-point$
+    // Create a labeled point with a positive label and a dense feature vector.
+    val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
+
+    // Create a labeled point with a negative label and a sparse feature vector.
+    val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
+    // $example off:labeled-point$
+  }
+
+  def libsvmExample(): Unit = {
+    val sc = SparkContext.getOrCreate()
+    // $example on:libsvm$
+    val examples: RDD[LabeledPoint] =
+      MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // $example off:libsvm$
+  }
+
+  def localMatrixExample(): Unit = {
+    // $example on:local-matrix$
+    // Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+    val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
+
+    // Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
+    val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8))
+    // $example off:local-matrix$
+  }
+
+  def rowMatrixExample(): Unit = {
+    val sc = SparkContext.getOrCreate()
+    // $example on:row-matrix$
+    val v1 = Vectors.dense(1.0, 10.0, 100.0)
+    val v2 = Vectors.dense(2.0, 20.0, 200.0)
+    val v3 = Vectors.dense(3.0, 30.0, 300.0)
+
+    val rows: RDD[Vector] = sc.parallelize(Seq(v1, v2, v3)) // an RDD of local vectors
+    // Create a RowMatrix from an RDD[Vector].
+    val mat: RowMatrix = new RowMatrix(rows)
+
+    // Get its size.
+    val m = mat.numRows()
+    val n = mat.numCols()
+
+    // QR decomposition
+    val qrResult = mat.tallSkinnyQR(true)
+    // $example off:row-matrix$
+  }
+
+  def indexedRowMatrixExample(): Unit = {
+    val sc = SparkContext.getOrCreate()
+
+    // $example on:indexed-row-matrix$
+    val r0 = IndexedRow(0, Vectors.dense(1, 2, 3))
+    val r1 = IndexedRow(1, Vectors.dense(4, 5, 6))
+    val r2 = IndexedRow(2, Vectors.dense(7, 8, 9))
+    val r3 = IndexedRow(3, Vectors.dense(10, 11, 12))
+
+    val rows: RDD[IndexedRow] = sc.parallelize(Seq(r0, r1, r2, r3)) // an RDD of indexed rows
+    // Create an IndexedRowMatrix from an RDD[IndexedRow].
+    val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
+
+    // Get its size.
+    val m = mat.numRows()
+    val n = mat.numCols()
+
+    // Drop its row indices.
+    val rowMat: RowMatrix = mat.toRowMatrix()
+    // $example off:indexed-row-matrix$
+  }
+
+  def coordinateMatrixExample(): Unit = {
+    val sc = SparkContext.getOrCreate()
+
+    // $example on:coordinate-row-matrix$
+    val me1 = MatrixEntry(0, 0, 1.2)
+    val me2 = MatrixEntry(1, 0, 2.1)
+    val me3 = MatrixEntry(6, 1, 3.7)
+
+    val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3)) // an RDD of matrix entries
+    // Create a CoordinateMatrix from an RDD[MatrixEntry].
+    val mat: CoordinateMatrix = new CoordinateMatrix(entries)
+
+    // Get its size.
+    val m = mat.numRows()
+    val n = mat.numCols()
+
+    // Convert it to an IndexRowMatrix whose rows are sparse vectors.
+    val indexedRowMatrix = mat.toIndexedRowMatrix()
+    // $example off:coordinate-row-matrix$
+  }
+
+  def blockMatrixExample(): Unit = {
+    val sc = SparkContext.getOrCreate()
+
+    // $example on:block-matrix$
+    val me1 = MatrixEntry(0, 0, 1.2)
+    val me2 = MatrixEntry(1, 0, 2.1)
+    val me3 = MatrixEntry(6, 1, 3.7)
+
+    // an RDD of (i, j, v) matrix entries
+    val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3))
+    // Create a CoordinateMatrix from an RDD[MatrixEntry].
+    val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
+    // Transform the CoordinateMatrix to a BlockMatrix
+    val matA: BlockMatrix = coordMat.toBlockMatrix().cache()
+
+    // Validate whether the BlockMatrix is set up properly.
+    // Throws an Exception when it is not valid.
+    // Nothing happens if it is valid.
+    matA.validate()
+
+    // Calculate A^T A.
+    val ata = matA.transpose.multiply(matA)
+    // $example off:block-matrix$
+  }
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("DataTypeExamples")
+    val sc = new SparkContext(conf)
+
+    localVectorExample()
+    labeledPointExample()
+    libsvmExample()
+    localMatrixExample()
+    rowMatrixExample()
+    indexedRowMatrixExample()
+    coordinateMatrixExample()
+    blockMatrixExample()
+
+    sc.stop()
+  }
+}
+// scalastyle:on println