Update notebooks

Azure · rastala · Sep 24, 2018 · Sep 24, 2018 · Sep 24, 2018 · a6c6e63451d53f0ed9635c45a4aaab1ab2ddddc9
commit a6c6e63451d53f0ed9635c45a4aaab1ab2ddddc9
diff --git a/00.configuration.ipynb b/00.configuration.ipynb
@@ -18,7 +18,7 @@
     "## Prerequisites:\n",
     "\n",
     "### 1. Install Azure ML SDK\n",
-    "Follow [SDK installation instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment).\n",
+    "Follow [SDK installation instructions](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-environment).\n",
     "\n",
     "### 2. Install some additional packages\n",
     "This Notebook requires some additional libraries. In the conda environment, run below commands: \n",
@@ -185,35 +185,11 @@
    },
    "outputs": [],
    "source": [
-    "# load workspace configuratio from ./aml_config/config.json file.ß\n",
+    "# load workspace configuratio from ./aml_config/config.json file.\n",
     "my_workspace = Workspace.from_config()\n",
     "my_workspace.get_details()"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create a folder to host all sample projects\n",
-    "Lastly, create a folder where all the sample projects will be hosted."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "sample_projects_folder = './sample_projects'\n",
-    "\n",
-    "if not os.path.isdir(sample_projects_folder):\n",
-    "    os.mkdir(sample_projects_folder)\n",
-    "    \n",
-    "print('Sample projects will be created in {}.'.format(sample_projects_folder))"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -225,9 +201,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.6",
    "language": "python",
-   "name": "python3"
+   "name": "python36"
   },
   "language_info": {
    "codemirror_mode": {
@@ -239,7 +215,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.4"
   }
  },
  "nbformat": 4,

diff --git a/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb b/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb
@@ -277,6 +277,16 @@
     "        os.remove(path=model_name)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# now let's take a look at the experiment in Azure portal.\n",
+    "experiment"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -778,9 +788,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.6",
    "language": "python",
-   "name": "python3"
+   "name": "python36"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/01.getting-started/02.train-on-local/02.train-on-local.ipynb b/01.getting-started/02.train-on-local/02.train-on-local.ipynb
@@ -88,27 +88,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Create a folder to store the training script."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "script_folder = './samples/train-on-local'\n",
-    "os.makedirs(script_folder, exist_ok=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create `train.py`\n",
+    "## View `train.py`\n",
     "\n",
-    "Use `%%writefile` magic to write training code to `train.py` file under your script folder."
+    "`train.py` is already created for you."
    ]
   },
   {
@@ -117,59 +99,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%writefile $script_folder/train.py\n",
-    "\n",
-    "import os\n",
-    "from sklearn.datasets import load_diabetes\n",
-    "from sklearn.linear_model import Ridge\n",
-    "from sklearn.metrics import mean_squared_error\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from azureml.core.run import Run\n",
-    "from sklearn.externals import joblib\n",
-    "\n",
-    "# example of referencing another script\n",
-    "import mylib\n",
-    "\n",
-    "X, y = load_diabetes(return_X_y=True)\n",
-    "\n",
-    "run = Run.get_submitted_run()\n",
-    "\n",
-    "X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)\n",
-    "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
-    "        \"test\": {\"X\": X_test, \"y\": y_test}}\n",
-    "\n",
-    "# example of referencing another script\n",
-    "alphas = mylib.get_alphas()\n",
-    "\n",
-    "for alpha in alphas:\n",
-    "    # Use Ridge algorithm to create a regression model\n",
-    "    reg = Ridge(alpha=alpha)\n",
-    "    reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
-    "\n",
-    "    preds = reg.predict(data[\"test\"][\"X\"])\n",
-    "    mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
-    "    run.log('alpha', alpha)\n",
-    "    run.log('mse', mse)\n",
-    "\n",
-    "    model_file_name='ridge_{0:.2f}.pkl'.format(alpha)\n",
-    "    # save model in the outputs folder so it automatically get uploaded\n",
-    "    with open(model_file_name, \"wb\") as file:\n",
-    "        joblib.dump(value=reg, filename=model_file_name)\n",
-    "        \n",
-    "    # upload the model file explicitly into artifacts \n",
-    "    run.upload_file(name=model_file_name, path_or_stream=model_file_name)\n",
-    "    \n",
-    "    # register the model\n",
-    "    run.register_model(model_name='diabetes-model', model_path=model_file_name)\n",
-    "\n",
-    "    print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
+    "with open('./train.py', 'r') as f:\n",
+    "    print(f.read())"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`train.py` also references a `mylib.py` file. So let's create that too."
+    "Note `train.py` also references a `mylib.py` file."
    ]
   },
   {
@@ -178,12 +116,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%writefile $script_folder/mylib.py\n",
-    "import numpy as np\n",
-    "\n",
-    "def get_alphas():\n",
-    "    # list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
-    "    return np.arange(0.0, 1.0, 0.05)"
+    "with open('./mylib.py', 'r') as f:\n",
+    "    print(f.read())"
    ]
   },
   {
@@ -209,7 +143,7 @@
     "run_config_user_managed.environment.python.user_managed_dependencies = True\n",
     "\n",
     "# You can choose a specific Python environment by pointing to a Python path \n",
-    "#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'"
+    "#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
    ]
   },
   {
@@ -228,9 +162,8 @@
    "source": [
     "from azureml.core import ScriptRunConfig\n",
     "\n",
-    "src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_user_managed)\n",
-    "run = exp.submit(src)\n",
-    "run.wait_for_completion(show_output=True)"
+    "src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
+    "run = exp.submit(src)"
    ]
   },
   {
@@ -249,6 +182,22 @@
     "run"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Block to wait till run finishes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run.wait_for_completion(show_output=True)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -290,9 +239,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_system_managed)\n",
-    "run = exp.submit(src)\n",
-    "run.wait_for_completion(show_output = True)"
+    "src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
+    "run = exp.submit(src)"
    ]
   },
   {
@@ -311,12 +259,30 @@
     "run"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Block and wait till run finishes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run.wait_for_completion(show_output = True)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### Docker-based execution\n",
-    "**NOTE** You must have Docker engine installed locally in order to use this execution mode. You can also ask the system to pull down a Docker image and execute your scripts in it."
+    "**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
+    "\n",
+    "You can also ask the system to pull down a Docker image and execute your scripts in it."
    ]
   },
   {
@@ -356,7 +322,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_docker)\n",
+    "src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)\n",
     "run = exp.submit(src)"
    ]
   },
@@ -376,7 +342,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "run.wait_for_completion(show_output = True)"
+    "run.wait_for_completion(show_output=True)"
    ]
   },
   {
@@ -455,7 +421,7 @@
    "outputs": [],
    "source": [
     "# supply a model name, and the full path to the serialized model file.\n",
-    "model = run.register_model(model_name='best_ridge_model', model_path='ridge_0.40.pkl')"
+    "model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
    ]
   },
   {
@@ -477,9 +443,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [default]",
+   "display_name": "Python 3.6",
    "language": "python",
-   "name": "python3"
+   "name": "python36"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/01.getting-started/02.train-on-local/mylib.py b/01.getting-started/02.train-on-local/mylib.py
@@ -0,0 +1,9 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license.
+
+import numpy as np
+
+
+def get_alphas():
+    # list of numbers from 0.0 to 1.0 with a 0.05 interval
+    return np.arange(0.0, 1.0, 0.05)
diff --git a/01.getting-started/02.train-on-local/train.py b/01.getting-started/02.train-on-local/train.py
@@ -1,24 +1,30 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license.
+
 from sklearn.datasets import load_diabetes
 from sklearn.linear_model import Ridge
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from azureml.core.run import Run
 from sklearn.externals import joblib
-
+import os
 import numpy as np
+import mylib
 
-# os.makedirs('./outputs', exist_ok = True)
+os.makedirs('./outputs', exist_ok=True)
 
 X, y = load_diabetes(return_X_y=True)
 
 run = Run.get_submitted_run()
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                    test_size=0.2,
+                                                    random_state=0)
 data = {"train": {"X": X_train, "y": y_train},
         "test": {"X": X_test, "y": y_test}}
 
 # list of numbers from 0.0 to 1.0 with a 0.05 interval
-alphas = np.arange(0.0, 1.0, 0.05)
+alphas = mylib.get_alphas()
 
 for alpha in alphas:
     # Use Ridge algorithm to create a regression model
@@ -33,13 +39,7 @@
     model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
     # save model in the outputs folder so it automatically get uploaded
     with open(model_file_name, "wb") as file:
-        joblib.dump(value=reg, filename=model_file_name)
-
-    # upload the model file explicitly into artifacts
-    run.upload_file(name=model_file_name, path_or_stream=model_file_name)
-
-    # register the model
-    # commented out for now until a bug is fixed
-    # run.register_model(file_name = model_file_name)
+        joblib.dump(value=reg, filename=os.path.join('./outputs/',
+                                                     model_file_name))
 
     print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))