Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Updated documentation
  • Loading branch information
Niketan Pansare committed Aug 8, 2016
commit ca671346e4e16134e0485ebf37de6d79e1254d30
14 changes: 14 additions & 0 deletions docs/algorithms-classification.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,15 @@ Eqs. (1) and (2).

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import SystemML as sml
# C = 1/reg
logistic = sml.mllearn.LogisticRegression(sqlCtx, fit_intercept=True, max_iter=100, max_inner_iter=0, tol=0.000001, C=1.0)
# X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame
y_test = logistic.fit(X_train, y_train).predict(X_test)
# df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
y_test = logistic.fit(df_train).transform(df_test)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f MultiLogReg.dml
Expand Down Expand Up @@ -224,6 +226,7 @@ SystemML Language Reference for details.

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
# Scikit-learn way
from sklearn import datasets, neighbors
import SystemML as sml
Expand Down Expand Up @@ -272,6 +275,7 @@ test = sqlCtx.createDataFrame([
(15L, "apache hadoop")], ["id", "text"])
prediction = model.transform(test)
prediction.show()
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f MultiLogReg.dml
Expand Down Expand Up @@ -453,13 +457,15 @@ support vector machine (`y` with domain size `2`).

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import SystemML as sml
# C = 1/reg
svm = sml.mllearn.SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=False)
# X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame
y_test = svm.fit(X_train, y_train)
# df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
y_test = svm.fit(df_train)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f l2-svm.dml
Expand Down Expand Up @@ -497,10 +503,12 @@ y_test = svm.fit(df_train)

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
# X_test can be NumPy matrices or Pandas DataFrame
y_test = svm.predict(X_test)
# df_test is a DataFrame that contains the column "features" of type Vector
y_test = svm.transform(df_test)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f l2-svm-predict.dml
Expand Down Expand Up @@ -705,13 +713,15 @@ class labels.

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import SystemML as sml
# C = 1/reg
svm = sml.mllearn.SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=True)
# X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame
y_test = svm.fit(X_train, y_train)
# df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
y_test = svm.fit(df_train)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f m-svm.dml
Expand Down Expand Up @@ -749,10 +759,12 @@ y_test = svm.fit(df_train)

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
# X_test can be NumPy matrices or Pandas DataFrame
y_test = svm.predict(X_test)
# df_test is a DataFrame that contains the column "features" of type Vector
y_test = svm.transform(df_test)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f m-svm-predict.dml
Expand Down Expand Up @@ -837,6 +849,7 @@ SystemML Language Reference for details.

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
# Scikit-learn way
from sklearn import datasets, neighbors
import SystemML as sml
Expand Down Expand Up @@ -885,6 +898,7 @@ test = sqlCtx.createDataFrame([
(15L, "apache hadoop")], ["id", "text"])
prediction = model.transform(test)
prediction.show()
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f m-svm.dml
Expand Down
8 changes: 8 additions & 0 deletions docs/algorithms-regression.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,15 @@ efficient when the number of features $m$ is relatively small

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import SystemML as sml
# C = 1/reg
lr = sml.mllearn.LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='direct-solve')
# X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame
y_test = lr.fit(X_train, y_train)
# df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
y_test = lr.fit(df_train)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f LinearRegDS.dml
Expand Down Expand Up @@ -121,13 +123,15 @@ y_test = lr.fit(df_train)

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import SystemML as sml
# C = 1/reg
lr = sml.mllearn.LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='newton-cg')
# X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame
y_test = lr.fit(X_train, y_train)
# df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
y_test = lr.fit(df_train)
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f LinearRegCG.dml
Expand Down Expand Up @@ -215,6 +219,7 @@ SystemML Language Reference for details.

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import numpy as np
from sklearn import datasets
import SystemML as sml
Expand All @@ -235,6 +240,7 @@ regr = sml.mllearn.LinearRegression(sqlCtx, solver='direct-solve')
regr.fit(diabetes_X_train, diabetes_y_train)
# The mean square error
print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f LinearRegDS.dml
Expand Down Expand Up @@ -268,6 +274,7 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) -

<div class="codetabs">
<div data-lang="Python" markdown="1">
{% highlight python %}
import numpy as np
from sklearn import datasets
import SystemML as sml
Expand All @@ -288,6 +295,7 @@ regr = sml.mllearn.LinearRegression(sqlCtx, solver='newton-cg')
regr.fit(diabetes_X_train, diabetes_y_train)
# The mean square error
print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
{% endhighlight %}
</div>
<div data-lang="Hadoop" markdown="1">
hadoop jar SystemML.jar -f LinearRegCG.dml
Expand Down