Skip to content

Commit 1fdd2df

Browse files
committed
Merge branch 'SPARK-10117' of github.com:Lewuathe/spark into SPARK-10117
2 parents ba3657c + 0ea1c1c commit 1fdd2df

File tree

5 files changed

+11
-39
lines changed

5 files changed

+11
-39
lines changed

mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ private[ml] class LibSVMRelation(val path: String, val numFeatures: Int, val vec
6464

6565
}
6666

67+
/**
68+
* This is used for creating DataFrame from LibSVM format file.
69+
* The LibSVM file path must be specified to DefaultSource.
70+
*/
6771
class DefaultSource extends RelationProvider with DataSourceRegister {
6872

6973
override def shortName(): String = "libsvm"

mllib/src/main/scala/org/apache/spark/ml/source/libsvm/package.scala

Lines changed: 0 additions & 33 deletions
This file was deleted.

mllib/src/test/java/org/apache/spark/ml/source/JavaLibSVMRelationSuite.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ public void tearDown() {
6868

6969
@Test
7070
public void verifyLibSVMDF() {
71-
dataset = jsql.read().format("org.apache.spark.ml.source.libsvm").option("vectorType", "dense")
72-
.load(path.getPath());
71+
dataset = jsql.read().format("libsvm").option("vectorType", "dense").load(path.getPath());
7372
Assert.assertEquals("label", dataset.columns()[0]);
7473
Assert.assertEquals("features", dataset.columns()[1]);
7574
Row r = dataset.first();

mllib/src/test/scala/org/apache/spark/ml/source/LibSVMRelationSuite.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
4545
}
4646

4747
test("select as sparse vector") {
48-
val df = sqlContext.read.libsvm(path)
48+
val df = sqlContext.read.format("libsvm").load(path)
4949
assert(df.columns(0) == "label")
5050
assert(df.columns(1) == "features")
5151
val row1 = df.first()
@@ -55,8 +55,8 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
5555
}
5656

5757
test("select as dense vector") {
58-
val df = sqlContext.read.options(Map("vectorType" -> "dense"))
59-
.libsvm(path)
58+
val df = sqlContext.read.format("libsvm").options(Map("vectorType" -> "dense"))
59+
.load(path)
6060
assert(df.columns(0) == "label")
6161
assert(df.columns(1) == "features")
6262
assert(df.count() == 3)
@@ -75,7 +75,8 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
7575
val tempDir = Utils.createTempDir()
7676
val file = new File(tempDir.getPath, "part-00001")
7777
Files.write(lines, file, Charsets.US_ASCII)
78-
val df = sqlContext.read.option("numFeatures", "100").libsvm(tempDir.toURI.toString)
78+
val df = sqlContext.read.option("numFeatures", "100").format("libsvm")
79+
.load(tempDir.toURI.toString)
7980
val row1 = df.first()
8081
val v = row1.getAs[SparseVector](1)
8182
assert(v == Vectors.sparse(100, Seq((0, 1.0), (9, 2.0), (19, 3.0), (29, 4.0), (39, 5.0),
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
org.apache.spark.sql.execution.datasources.jdbc.DefaultSource
22
org.apache.spark.sql.execution.datasources.json.DefaultSource
33
org.apache.spark.sql.execution.datasources.parquet.DefaultSource
4+
org.apache.spark.ml.source.libsvm.DefaultSource

0 commit comments

Comments
 (0)