Skip to content
Next Next commit
add scala example and fix error prompt in include_example
  • Loading branch information
yinxusen committed Jun 30, 2016
commit 4fa23b4fb74e252bc9b8cd9cda0f1453752639bd
8 changes: 4 additions & 4 deletions docs/_plugins/include_example.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,20 @@ def select_lines(code)
.select { |l, i| l.include? "$example off#{@snippet_label}$" }
.map { |l, i| i }

raise "Start indices amount is not equal to end indices amount, see #{@file}." \
raise "Start indices amount is not equal to end indices amount, see #{@file}, #{@snippet_label}." \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer

... see #{@file} [labeled=#{@snippet_label}].

Otherwise the label itself might be mistaken for a file path.

unless startIndices.size == endIndices.size

raise "No code is selected by include_example, see #{@file}." \
raise "No code is selected by include_example, see #{@file}, #{@snippet_label}." \
if startIndices.size == 0

# Select and join code blocks together, with a space line between each of two continuous
# blocks.
lastIndex = -1
result = ""
startIndices.zip(endIndices).each do |start, endline|
raise "Overlapping between two example code blocks are not allowed, see #{@file}." \
raise "Overlapping between two example code blocks are not allowed, see #{@file}, #{@snippet_label}." \
if start <= lastIndex
raise "$example on$ should not be in the same line with $example off$, see #{@file}." \
raise "$example on$ should not be in the same line with $example off$, see #{@file}, #{@snippet_label}." \
if start == endline
lastIndex = endline
range = Range.new(start + 1, endline - 1)
Expand Down
96 changes: 8 additions & 88 deletions docs/mllib-data-types.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,7 @@ using the factory methods implemented in

Refer to the [`Vector` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and [`Vectors` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.{Vector, Vectors}

// Create a dense vector (1.0, 0.0, 3.0).
val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
{% endhighlight %}
{% include_example local_vector scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

***Note:***
Scala imports `scala.collection.immutable.Vector` by default, so you have to import
Expand Down Expand Up @@ -127,16 +118,8 @@ A labeled point is represented by the case class

Refer to the [`LabeledPoint` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
{% include_example labeled_point scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

// Create a labeled point with a positive label and a dense feature vector.
val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))

// Create a labeled point with a negative label and a sparse feature vector.
val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down Expand Up @@ -201,13 +184,8 @@ examples stored in LIBSVM format.

Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.rdd.RDD
{% include_example libsvm scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down Expand Up @@ -266,15 +244,8 @@ matrices. Remember, local matrices in MLlib are stored in column-major order.

Refer to the [`Matrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix) and [`Matrices` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.{Matrix, Matrices}
{% include_example local_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))

// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8))
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down Expand Up @@ -369,21 +340,8 @@ For [singular value decomposition (SVD)](https://en.wikipedia.org/wiki/Singular_

Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.linalg.distributed.RowMatrix

val rows: RDD[Vector] = ... // an RDD of local vectors
// Create a RowMatrix from an RDD[Vector].
val mat: RowMatrix = new RowMatrix(rows)
{% include_example row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

// Get its size.
val m = mat.numRows()
val n = mat.numCols()

// QR decomposition
val qrResult = mat.tallSkinnyQR(true)
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down Expand Up @@ -456,20 +414,8 @@ its row indices.

Refer to the [`IndexedRowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
{% include_example indexed_row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

val rows: RDD[IndexedRow] = ... // an RDD of indexed rows
// Create an IndexedRowMatrix from an RDD[IndexedRow].
val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)

// Get its size.
val m = mat.numRows()
val n = mat.numCols()

// Drop its row indices.
val rowMat: RowMatrix = mat.toRowMatrix()
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down Expand Up @@ -562,20 +508,8 @@ with sparse rows by calling `toIndexedRowMatrix`. Other computations for

Refer to the [`CoordinateMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
{% include_example coordinate_row_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries
// Create a CoordinateMatrix from an RDD[MatrixEntry].
val mat: CoordinateMatrix = new CoordinateMatrix(entries)

// Get its size.
val m = mat.numRows()
val n = mat.numCols()

// Convert it to an IndexRowMatrix whose rows are sparse vectors.
val indexedRowMatrix = mat.toIndexedRowMatrix()
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down Expand Up @@ -670,22 +604,8 @@ Users may change the block size by supplying the values through `toBlockMatrix(r

Refer to the [`BlockMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) for details on the API.

{% highlight scala %}
import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}

val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries
// Create a CoordinateMatrix from an RDD[MatrixEntry].
val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
// Transform the CoordinateMatrix to a BlockMatrix
val matA: BlockMatrix = coordMat.toBlockMatrix().cache()

// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
// Nothing happens if it is valid.
matA.validate()
{% include_example block_matrix scala/org/apache/spark/examples/mllib/DataTypesExamples.scala %}

// Calculate A^T A.
val ata = matA.transpose.multiply(matA)
{% endhighlight %}
</div>

<div data-lang="java" markdown="1">
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// scalastyle:off println
package org.apache.spark.examples.mllib

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.linalg.{Matrices, Matrix}
// $example on:local-vector$
import org.apache.spark.mllib.linalg.{Vector, Vectors}
// $example off:local-vector$
import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.rdd.RDD


object DataTypesExamples {

def localVectorExample(): Unit = {
// $example on:local-vector$
// Create a dense vector (1.0, 0.0, 3.0).
val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to
// nonzero entries.
val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
// $example off:local-vector$
}

def labeledPointExample(): Unit = {
// $example on:labeled-point$
// Create a labeled point with a positive label and a dense feature vector.
val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))

// Create a labeled point with a negative label and a sparse feature vector.
val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
// $example off:labeled-point$
}

def libsvmExample(): Unit = {
val sc = SparkContext.getOrCreate()
// $example on:libsvm$
val examples: RDD[LabeledPoint] =
MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
// $example off:libsvm$
}

def localMatrixExample(): Unit = {
// $example on:local-matrix$
// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))

// Create a sparse matrix ((9.0, 0.0), (0.0, 8.0), (0.0, 6.0))
val sm: Matrix = Matrices.sparse(3, 2, Array(0, 1, 3), Array(0, 2, 1), Array(9, 6, 8))
// $example off:local-matrix$
}

def rowMatrixExample(): Unit = {
val sc = SparkContext.getOrCreate()
// $example on:row-matrix$
val v1 = Vectors.dense(1.0, 10.0, 100.0)
val v2 = Vectors.dense(2.0, 20.0, 200.0)
val v3 = Vectors.dense(3.0, 30.0, 300.0)

val rows: RDD[Vector] = sc.parallelize(Seq(v1, v2, v3)) // an RDD of local vectors
// Create a RowMatrix from an RDD[Vector].
val mat: RowMatrix = new RowMatrix(rows)

// Get its size.
val m = mat.numRows()
val n = mat.numCols()

// QR decomposition
val qrResult = mat.tallSkinnyQR(true)
// $example off:row-matrix$
}

def indexedRowMatrixExample(): Unit = {
val sc = SparkContext.getOrCreate()

// $example on:indexed-row-matrix$
val r0 = IndexedRow(0, Vectors.dense(1, 2, 3))
val r1 = IndexedRow(1, Vectors.dense(4, 5, 6))
val r2 = IndexedRow(2, Vectors.dense(7, 8, 9))
val r3 = IndexedRow(3, Vectors.dense(10, 11, 12))

val rows: RDD[IndexedRow] = sc.parallelize(Seq(r0, r1, r2, r3)) // an RDD of indexed rows
// Create an IndexedRowMatrix from an RDD[IndexedRow].
val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)

// Get its size.
val m = mat.numRows()
val n = mat.numCols()

// Drop its row indices.
val rowMat: RowMatrix = mat.toRowMatrix()
// $example off:indexed-row-matrix$
}

def coordinateMatrixExample(): Unit = {
val sc = SparkContext.getOrCreate()

// $example on:coordinate-row-matrix$
val me1 = MatrixEntry(0, 0, 1.2)
val me2 = MatrixEntry(1, 0, 2.1)
val me3 = MatrixEntry(6, 1, 3.7)

val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3)) // an RDD of matrix entries
// Create a CoordinateMatrix from an RDD[MatrixEntry].
val mat: CoordinateMatrix = new CoordinateMatrix(entries)

// Get its size.
val m = mat.numRows()
val n = mat.numCols()

// Convert it to an IndexRowMatrix whose rows are sparse vectors.
val indexedRowMatrix = mat.toIndexedRowMatrix()
// $example off:coordinate-row-matrix$
}

def blockMatrixExample(): Unit = {
val sc = SparkContext.getOrCreate()

// $example on:block-matrix$
val me1 = MatrixEntry(0, 0, 1.2)
val me2 = MatrixEntry(1, 0, 2.1)
val me3 = MatrixEntry(6, 1, 3.7)

// an RDD of (i, j, v) matrix entries
val entries: RDD[MatrixEntry] = sc.parallelize(Seq(me1, me2, me3))
// Create a CoordinateMatrix from an RDD[MatrixEntry].
val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
// Transform the CoordinateMatrix to a BlockMatrix
val matA: BlockMatrix = coordMat.toBlockMatrix().cache()

// Validate whether the BlockMatrix is set up properly.
// Throws an Exception when it is not valid.
// Nothing happens if it is valid.
matA.validate()

// Calculate A^T A.
val ata = matA.transpose.multiply(matA)
// $example off:block-matrix$
}

def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("DataTypeExamples")
val sc = new SparkContext(conf)

localVectorExample()
labeledPointExample()
libsvmExample()
localMatrixExample()
rowMatrixExample()
indexedRowMatrixExample()
coordinateMatrixExample()
blockMatrixExample()

sc.stop()
}
}
// scalastyle:on println