updates to automl tutorial

rastala · rastala · commit 9b701ebaebe5 · 2018-10-02T14:33:28.000-04:00
diff --git a/tutorials/03.auto-train-models.ipynb b/tutorials/03.auto-train-models.ipynb
@@ -15,7 +15,7 @@
       "source": [
         "# Tutorial: Train a classification model with automated machine learning\n",
         "\n",
-        "In this tutorial, you'll learn how to generate a  machine learning model using automated machine learning (automated ML).  Azure Machine Learning can perform data preprocessing, algorithm selection and hyperparameter selection in an automated way for you. The final model can then be deployed following the workflow in the [Deploy a model](02.deploy-models.ipynb) tutorial.\n",
+        "In this tutorial, you'll learn how to generate a  machine learning model using automated machine learning (automated ML).  Azure Machine Learning can perform algorithm selection and hyperparameter selection in an automated way for you. The final model can then be deployed following the workflow in the [Deploy a model](02.deploy-models.ipynb) tutorial.\n",
         "\n",
         "[flow diagram](./imgs/flow2.png)\n",
         "\n",
@@ -133,8 +133,8 @@
         "digits = datasets.load_digits()\n",
         "\n",
         "# Exclude the first 100 rows from training so that they can be used for test.\n",
-        "X_digits = digits.data[100:,:]\n",
-        "y_digits = digits.target[100:]"
+        "X_train = digits.data[100:,:]\n",
+        "y_train = digits.target[100:]"
       ]
     },
     {
@@ -155,13 +155,13 @@
         "count = 0\n",
         "sample_size = 30\n",
         "plt.figure(figsize = (16, 6))\n",
-        "for i in np.random.permutation(X_digits.shape[0])[:sample_size]:\n",
+        "for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n",
         "    count = count + 1\n",
         "    plt.subplot(1, sample_size, count)\n",
         "    plt.axhline('')\n",
         "    plt.axvline('')\n",
-        "    plt.text(x = 2, y = -2, s = y_digits[i], fontsize = 18)\n",
-        "    plt.imshow(X_digits[i].reshape(8, 8), cmap = plt.cm.Greys)\n",
+        "    plt.text(x = 2, y = -2, s = y_train[i], fontsize = 18)\n",
+        "    plt.imshow(X_train[i].reshape(8, 8), cmap = plt.cm.Greys)\n",
         "plt.show()"
       ]
     },
@@ -187,8 +187,7 @@
         "|**max_time_sec**|12,000|Time limit in seconds for each iteration|\n",
         "|**iterations**|20|Number of iterations. In each iteration, the model trains with the data with a specific pipeline|\n",
         "|**n_cross_validations**|3|Number of cross validation splits|\n",
-        "|**preprocess**|False| *True/False* Enables experiment to perform preprocessing on the input.  Preprocessing handles *missing data*, and performs some common *feature extraction*|\n",
-        "|**exit_score**|0.995|*double* value indicating the target for *primary_metric*. Once the target is surpassed the run terminates|\n",
+        "|**exit_score**|0.9985|*double* value indicating the target for *primary_metric*. Once the target is surpassed the run terminates|\n",
         "|**blacklist_algos**|['kNN','LinearSVM']|*Array* of *strings* indicating algorithms to ignore.\n"
       ]
     },
@@ -210,11 +209,10 @@
         "                             max_time_sec = 12000,\n",
         "                             iterations = 20,\n",
         "                             n_cross_validations = 3,\n",
-        "                             preprocess = False,\n",
         "                             exit_score = 0.9985,\n",
         "                             blacklist_algos = ['kNN','LinearSVM'],\n",
-        "                             X = X_digits,\n",
-        "                             y = y_digits,\n",
+        "                             X = X_train,\n",
+        "                             y = y_train,\n",
         "                             path=project_folder)"
       ]
     },
@@ -351,8 +349,10 @@
       "source": [
         "# find 30 random samples from test set\n",
         "n = 30\n",
-        "sample_indices = np.random.permutation(X_digits.shape[0])[0:n]\n",
-        "test_samples = X_digits[sample_indices]\n",
+        "X_test = digits.data[:100, :]\n",
+        "y_test = digits.target[:100]\n",
+        "sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
+        "test_samples = X_test[sample_indices]\n",
         "\n",
         "\n",
         "# predict using the  model\n",
@@ -368,11 +368,11 @@
         "    plt.axvline('')\n",
         "    \n",
         "    # use different color for misclassified sample\n",
-        "    font_color = 'red' if y_digits[s] != result[i] else 'black'\n",
-        "    clr_map = plt.cm.gray if y_digits[s] != result[i] else plt.cm.Greys\n",
+        "    font_color = 'red' if y_test[s] != result[i] else 'black'\n",
+        "    clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
         "    \n",
         "    plt.text(x = 2, y = -2, s = result[i], fontsize = 18, color = font_color)\n",
-        "    plt.imshow(X_digits[s].reshape(8, 8), cmap = clr_map)\n",
+        "    plt.imshow(X_test[s].reshape(8, 8), cmap = clr_map)\n",
         "    \n",
         "    i = i + 1\n",
         "plt.show()"
@@ -393,7 +393,7 @@
         "> * Review training results\n",
         "> * Register the best model\n",
         "\n",
-        "Learn more about [how to configure settings for automatic training]() or [how to use automatic training on a remote resource]()."
+        "Learn more about [how to configure settings for automatic training](https://aka.ms/aml-how-configure-auto) or [how to use automatic training on a remote resource](https://aka.ms/aml-how-to-auto-remote)."
       ]
     }
   ],