@@ -185,34 +185,33 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
185185 >>> from pyspark.sql import Row
186186 >>> from pyspark.ml.linalg import Vectors
187187 >>> bdf = sc.parallelize([
188- ... Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
189- ... Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], []))]).toDF()
190- >>> blor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight")
188+ ... Row(label=1.0, weight=1.0, features=Vectors.dense(0.0, 5.0)),
189+ ... Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
190+ ... Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
191+ ... Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
192+ >>> blor = LogisticRegression(regParam=0.01, weightCol="weight")
191193 >>> blorModel = blor.fit(bdf)
192194 >>> blorModel.coefficients
193- DenseVector([5.4 ...])
195+ DenseVector([-1.080..., -0.646 ...])
194196 >>> blorModel.intercept
195- -2.63...
196- >>> mdf = sc.parallelize([
197- ... Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
198- ... Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], [])),
199- ... Row(label=2.0, weight=2.0, features=Vectors.dense(3.0))]).toDF()
200- >>> mlor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight",
201- ... family="multinomial")
197+ 3.112...
198+ >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
199+ >>> mdf = spark.read.format("libsvm").load(data_path)
200+ >>> mlor = LogisticRegression(regParam=0.1, elasticNetParam=1.0, family="multinomial")
202201 >>> mlorModel = mlor.fit(mdf)
203202 >>> mlorModel.coefficientMatrix
204- DenseMatrix (3, 1, [-2.3 ..., 0.2..., 2.1 ...], 1)
203+ SparseMatrix (3, 4, [0, 1, 2, 3], [3, 2, 1], [1.87 ..., -2.75..., -0.50 ...], 1)
205204 >>> mlorModel.interceptVector
206- DenseVector([2.1 ..., 0.6 ..., -2.8 ...])
207- >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
205+ DenseVector([0.04 ..., -0.42 ..., 0.37 ...])
206+ >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0 ))]).toDF()
208207 >>> result = blorModel.transform(test0).head()
209208 >>> result.prediction
210- 0 .0
209+ 1 .0
211210 >>> result.probability
212- DenseVector([0.99 ..., 0.00 ...])
211+ DenseVector([0.02 ..., 0.97 ...])
213212 >>> result.rawPrediction
214- DenseVector([8.12 ..., -8.12 ...])
215- >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1 , [0], [1.0]))]).toDF()
213+ DenseVector([-3.54 ..., 3.54 ...])
214+ >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2 , [0], [1.0]))]).toDF()
216215 >>> blorModel.transform(test1).head().prediction
217216 1.0
218217 >>> blor.setParams("vector")
@@ -222,8 +221,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
222221 >>> lr_path = temp_path + "/lr"
223222 >>> blor.save(lr_path)
224223 >>> lr2 = LogisticRegression.load(lr_path)
225- >>> lr2.getMaxIter ()
226- 5
224+ >>> lr2.getRegParam ()
225+ 0.01
227226 >>> model_path = temp_path + "/lr_model"
228227 >>> blorModel.save(model_path)
229228 >>> model2 = LogisticRegressionModel.load(model_path)
@@ -1480,31 +1479,33 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
14801479
14811480 >>> from pyspark.sql import Row
14821481 >>> from pyspark.ml.linalg import Vectors
1483- >>> df = sc.parallelize([
1484- ... Row(label=0.0, features=Vectors.dense(1.0, 0.8)),
1485- ... Row(label=1.0, features=Vectors.sparse(2, [], [])),
1486- ... Row(label=2.0, features=Vectors.dense(0.5, 0.5))]).toDF()
1487- >>> lr = LogisticRegression(maxIter=5, regParam=0.01)
1482+ >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
1483+ >>> df = spark.read.format("libsvm").load(data_path)
1484+ >>> lr = LogisticRegression(regParam=0.01)
14881485 >>> ovr = OneVsRest(classifier=lr)
14891486 >>> model = ovr.fit(df)
1490- >>> [x.coefficients for x in model.models]
1491- [DenseVector([4.9791, 2.426]), DenseVector([-4.1198, -5.9326]), DenseVector([-3.314, 5.2423])]
1487+ >>> model.models[0].coefficients
1488+ DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
1489+ >>> model.models[1].coefficients
1490+ DenseVector([-2.1..., 3.1..., -2.6..., -2.3...])
1491+ >>> model.models[2].coefficients
1492+ DenseVector([0.3..., -3.4..., 1.0..., -1.1...])
14921493 >>> [x.intercept for x in model.models]
1493- [-5.06544 ..., 2.30341 ..., -1.29133 ...]
1494- >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
1494+ [-2.7 ..., -2.5 ..., -1.3 ...]
1495+ >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0 ))]).toDF()
14951496 >>> model.transform(test0).head().prediction
1496- 1.0
1497- >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
1498- >>> model.transform(test1).head().prediction
14991497 0.0
1500- >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4 ))]).toDF()
1501- >>> model.transform(test2 ).head().prediction
1498+ >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0] ))]).toDF()
1499+ >>> model.transform(test1 ).head().prediction
15021500 2.0
1501+ >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
1502+ >>> model.transform(test2).head().prediction
1503+ 0.0
15031504 >>> model_path = temp_path + "/ovr_model"
15041505 >>> model.save(model_path)
15051506 >>> model2 = OneVsRestModel.load(model_path)
15061507 >>> model2.transform(test0).head().prediction
1507- 1 .0
1508+ 0 .0
15081509
15091510 .. versionadded:: 2.0.0
15101511 """
0 commit comments