Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Applying review comments.
  • Loading branch information
attilapiros committed Mar 9, 2018
commit 80b9c8bb4712ae9914b2b9f429ddec04cb25dfac
Original file line number Diff line number Diff line change
Expand Up @@ -34,44 +34,44 @@ class NGramSuite extends MLTest with DefaultReadWriteTest {
val nGram = new NGram()
.setInputCol("inputTokens")
.setOutputCol("nGrams")
val dataFrame = Seq(NGramTestData(
val dataset = Seq(NGramTestData(
Array("Test", "for", "ngram", "."),
Array("Test for", "for ngram", "ngram .")
)).toDF()
testNGram(nGram, dataFrame)
testNGram(nGram, dataset)
}

test("NGramLength=4 yields length 4 n-grams") {
val nGram = new NGram()
.setInputCol("inputTokens")
.setOutputCol("nGrams")
.setN(4)
val dataFrame = Seq(NGramTestData(
val dataset = Seq(NGramTestData(
Array("a", "b", "c", "d", "e"),
Array("a b c d", "b c d e")
)).toDF()
testNGram(nGram, dataFrame)
testNGram(nGram, dataset)
}

test("empty input yields empty output") {
val nGram = new NGram()
.setInputCol("inputTokens")
.setOutputCol("nGrams")
.setN(4)
val dataFrame = Seq(NGramTestData(Array(), Array())).toDF()
testNGram(nGram, dataFrame)
val dataset = Seq(NGramTestData(Array(), Array())).toDF()
testNGram(nGram, dataset)
}

test("input array < n yields empty output") {
val nGram = new NGram()
.setInputCol("inputTokens")
.setOutputCol("nGrams")
.setN(6)
val dataFrame = Seq(NGramTestData(
val dataset = Seq(NGramTestData(
Array("a", "b", "c", "d", "e"),
Array()
)).toDF()
testNGram(nGram, dataFrame)
testNGram(nGram, dataset)
}

test("read/write") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,38 @@ class NormalizerSuite extends MLTest with DefaultReadWriteTest {

import testImplicits._

@transient val data: Seq[Vector] = Seq(
Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
Vectors.dense(0.0, 0.0, 0.0),
Vectors.dense(0.6, -1.1, -3.0),
Vectors.sparse(3, Seq((1, 0.91), (2, 3.2))),
Vectors.sparse(3, Seq((0, 5.7), (1, 0.72), (2, 2.7))),
Vectors.sparse(3, Seq()))
@transient var data: Array[Vector] = _
@transient var l1Normalized: Array[Vector] = _
@transient var l2Normalized: Array[Vector] = _

override def beforeAll(): Unit = {
super.beforeAll()

data = Array(
Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
Vectors.dense(0.0, 0.0, 0.0),
Vectors.dense(0.6, -1.1, -3.0),
Vectors.sparse(3, Seq((1, 0.91), (2, 3.2))),
Vectors.sparse(3, Seq((0, 5.7), (1, 0.72), (2, 2.7))),
Vectors.sparse(3, Seq())
)
l1Normalized = Array(
Vectors.sparse(3, Seq((0, -0.465116279), (1, 0.53488372))),
Vectors.dense(0.0, 0.0, 0.0),
Vectors.dense(0.12765957, -0.23404255, -0.63829787),
Vectors.sparse(3, Seq((1, 0.22141119), (2, 0.7785888))),
Vectors.dense(0.625, 0.07894737, 0.29605263),
Vectors.sparse(3, Seq())
)
l2Normalized = Array(
Vectors.sparse(3, Seq((0, -0.65617871), (1, 0.75460552))),
Vectors.dense(0.0, 0.0, 0.0),
Vectors.dense(0.184549876, -0.3383414, -0.922749378),
Vectors.sparse(3, Seq((1, 0.27352993), (2, 0.96186349))),
Vectors.dense(0.897906166, 0.113419726, 0.42532397),
Vectors.sparse(3, Seq())
)
}

def assertTypeOfVector(lhs: Vector, rhs: Vector): Unit = {
assert((lhs, rhs) match {
Expand All @@ -48,16 +73,8 @@ class NormalizerSuite extends MLTest with DefaultReadWriteTest {
}

test("Normalization with default parameter") {
val expected = Seq(
Vectors.sparse(3, Seq((0, -0.65617871), (1, 0.75460552))),
Vectors.dense(0.0, 0.0, 0.0),
Vectors.dense(0.184549876, -0.3383414, -0.922749378),
Vectors.sparse(3, Seq((1, 0.27352993), (2, 0.96186349))),
Vectors.dense(0.897906166, 0.113419726, 0.42532397),
Vectors.sparse(3, Seq())
)
val dataFrame: DataFrame = data.zip(expected).seq.toDF("features", "expected")
val normalizer = new Normalizer().setInputCol("features").setOutputCol("normalized")
val dataFrame: DataFrame = data.zip(l2Normalized).seq.toDF("features", "expected")

testTransformer[(Vector, Vector)](dataFrame, normalizer, "features", "normalized", "expected") {
case Row(features: Vector, normalized: Vector, expected: Vector) =>
Expand All @@ -67,15 +84,7 @@ class NormalizerSuite extends MLTest with DefaultReadWriteTest {
}

test("Normalization with setter") {
val expected = Seq(
Vectors.sparse(3, Seq((0, -0.465116279), (1, 0.53488372))),
Vectors.dense(0.0, 0.0, 0.0),
Vectors.dense(0.12765957, -0.23404255, -0.63829787),
Vectors.sparse(3, Seq((1, 0.22141119), (2, 0.7785888))),
Vectors.dense(0.625, 0.07894737, 0.29605263),
Vectors.sparse(3, Seq())
)
val dataFrame: DataFrame = data.zip(expected).seq.toDF("features", "expected")
val dataFrame: DataFrame = data.zip(l1Normalized).seq.toDF("features", "expected")
val normalizer = new Normalizer().setInputCol("features").setOutputCol("normalized").setP(1)

testTransformer[(Vector, Vector)](dataFrame, normalizer, "features", "normalized", "expected") {
Expand Down