Skip to content
Prev Previous commit
Next Next commit
add sc
  • Loading branch information
yinxusen committed Jun 30, 2016
commit b0e74e34accd768d2d16e4afc41d46a7cde4da8b
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ private static void blockMatrixExample() {
}

public static void main(String[] args) {

SparkConf conf = new SparkConf().setAppName("JavaDataTypesExample");
SparkContext sc = new SparkContext(conf);

Expand Down
20 changes: 14 additions & 6 deletions examples/src/main/python/mllib/datatypes_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@

from __future__ import print_function

from numpy import array

from pyspark import SparkContext
from pyspark.mllib.clustering import BisectingKMeans, BisectingKMeansModel


def __local_vector_example():
Expand Down Expand Up @@ -54,6 +51,8 @@ def __labeled_point_example():


def __libsvm_example():
sc = SparkContext.getOrCreate()

# $example on:libsvm$
from pyspark.mllib.util import MLUtils

Expand All @@ -74,6 +73,8 @@ def __local_matrix_example():


def __row_matrix_example():
sc = SparkContext.getOrCreate()

# $example on:row_matrix$
from pyspark.mllib.linalg.distributed import RowMatrix

Expand All @@ -93,6 +94,8 @@ def __row_matrix_example():


def __indexed_row_matrix_example():
sc = SparkContext.getOrCreate()

# $example on:indexed_row_matrix$
from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix

Expand Down Expand Up @@ -122,12 +125,15 @@ def __indexed_row_matrix_example():


def __coordinate_matrix_example():
sc = SparkContext.getOrCreate()

# $example on:coordinate_matrix$
from pyspark.mllib.linalg.distributed import CoordinateMatrix, MatrixEntry

# Create an RDD of coordinate entries.
# - This can be done explicitly with the MatrixEntry class:
entries = sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), MatrixEntry(6, 1, 3.7)])
entries =\
sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), MatrixEntry(6, 1, 3.7)])
# - or using (long, long, float) tuples:
entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)])

Expand All @@ -153,6 +159,8 @@ def __coordinate_matrix_example():


def __block_matrix():
sc = SparkContext.getOrCreate()

# $example on:block_matrix$
from pyspark.mllib.linalg import Matrices
from pyspark.mllib.linalg.distributed import BlockMatrix
Expand All @@ -165,8 +173,8 @@ def __block_matrix():
mat = BlockMatrix(blocks, 3, 2)

# Get its size.
m = mat.numRows() # 6
n = mat.numCols() # 2
m = mat.numRows() # 6
n = mat.numCols() # 2

# Get the blocks as an RDD of sub-matrix blocks.
blocksRDD = mat.blocks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,14 @@
package org.apache.spark.examples.mllib

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.linalg.{Matrices, Matrix}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.rdd.RDD


object DataTypesExamples {

private def localVectorExample(): Unit = {
// $example on:local_vector$
import org.apache.spark.mllib.linalg.{Vector, Vectors}

// $example on:local_vector$
// Create a dense vector (1.0, 0.0, 3.0).
val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to
Expand All @@ -46,6 +39,9 @@ object DataTypesExamples {

private def labeledPointExample(): Unit = {
// $example on:labeled_point$
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint

// Create a labeled point with a positive label and a dense feature vector.
val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))

Expand All @@ -57,13 +53,19 @@ object DataTypesExamples {
private def libsvmExample(): Unit = {
val sc = SparkContext.getOrCreate()
// $example on:libsvm$
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.rdd.RDD

val examples: RDD[LabeledPoint] =
MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
// $example off:libsvm$
}

private def localMatrixExample(): Unit = {
// $example on:local_matrix$
import org.apache.spark.mllib.linalg.{Matrix, Matrices}

// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))

Expand All @@ -75,6 +77,10 @@ object DataTypesExamples {
private def rowMatrixExample(): Unit = {
val sc = SparkContext.getOrCreate()
// $example on:row_matrix$
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.rdd.RDD

val v1 = Vectors.dense(1.0, 10.0, 100.0)
val v2 = Vectors.dense(2.0, 20.0, 200.0)
val v3 = Vectors.dense(3.0, 30.0, 300.0)
Expand All @@ -96,6 +102,10 @@ object DataTypesExamples {
val sc = SparkContext.getOrCreate()

// $example on:indexed_row_matrix$
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
import org.apache.spark.rdd.RDD

val r0 = IndexedRow(0, Vectors.dense(1, 2, 3))
val r1 = IndexedRow(1, Vectors.dense(4, 5, 6))
val r2 = IndexedRow(2, Vectors.dense(7, 8, 9))
Expand All @@ -118,6 +128,9 @@ object DataTypesExamples {
val sc = SparkContext.getOrCreate()

// $example on:coordinate_matrix$
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
import org.apache.spark.rdd.RDD

val me1 = MatrixEntry(0, 0, 1.2)
val me2 = MatrixEntry(1, 0, 2.1)
val me3 = MatrixEntry(6, 1, 3.7)
Expand All @@ -139,6 +152,9 @@ object DataTypesExamples {
val sc = SparkContext.getOrCreate()

// $example on:block_matrix$
import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
import org.apache.spark.rdd.RDD

val me1 = MatrixEntry(0, 0, 1.2)
val me2 = MatrixEntry(1, 0, 2.1)
val me3 = MatrixEntry(6, 1, 3.7)
Expand All @@ -161,7 +177,7 @@ object DataTypesExamples {
}

def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("DataTypeExamples")
val conf = new SparkConf().setAppName("DataTypesExamples")
val sc = new SparkContext(conf)

localVectorExample()
Expand Down