Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Update notebooks
  • Loading branch information
rastala committed Sep 24, 2018
commit a6c6e63451d53f0ed9635c45a4aaab1ab2ddddc9
34 changes: 5 additions & 29 deletions 00.configuration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"## Prerequisites:\n",
"\n",
"### 1. Install Azure ML SDK\n",
"Follow [SDK installation instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment).\n",
"Follow [SDK installation instructions](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-environment).\n",
"\n",
"### 2. Install some additional packages\n",
"This Notebook requires some additional libraries. In the conda environment, run below commands: \n",
Expand Down Expand Up @@ -185,35 +185,11 @@
},
"outputs": [],
"source": [
"# load workspace configuratio from ./aml_config/config.json file.ß\n",
"# load workspace configuratio from ./aml_config/config.json file.\n",
"my_workspace = Workspace.from_config()\n",
"my_workspace.get_details()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a folder to host all sample projects\n",
"Lastly, create a folder where all the sample projects will be hosted."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"sample_projects_folder = './sample_projects'\n",
"\n",
"if not os.path.isdir(sample_projects_folder):\n",
" os.mkdir(sample_projects_folder)\n",
" \n",
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -225,9 +201,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -239,7 +215,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.4"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@
" os.remove(path=model_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# now let's take a look at the experiment in Azure portal.\n",
"experiment"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -778,9 +788,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {
Expand Down
138 changes: 52 additions & 86 deletions 01.getting-started/02.train-on-local/02.train-on-local.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -88,27 +88,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a folder to store the training script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"script_folder = './samples/train-on-local'\n",
"os.makedirs(script_folder, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create `train.py`\n",
"## View `train.py`\n",
"\n",
"Use `%%writefile` magic to write training code to `train.py` file under your script folder."
"`train.py` is already created for you."
]
},
{
Expand All @@ -117,59 +99,15 @@
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/train.py\n",
"\n",
"import os\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from azureml.core.run import Run\n",
"from sklearn.externals import joblib\n",
"\n",
"# example of referencing another script\n",
"import mylib\n",
"\n",
"X, y = load_diabetes(return_X_y=True)\n",
"\n",
"run = Run.get_submitted_run()\n",
"\n",
"X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
"\n",
"# example of referencing another script\n",
"alphas = mylib.get_alphas()\n",
"\n",
"for alpha in alphas:\n",
" # Use Ridge algorithm to create a regression model\n",
" reg = Ridge(alpha=alpha)\n",
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
"\n",
" preds = reg.predict(data[\"test\"][\"X\"])\n",
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
" run.log('alpha', alpha)\n",
" run.log('mse', mse)\n",
"\n",
" model_file_name='ridge_{0:.2f}.pkl'.format(alpha)\n",
" # save model in the outputs folder so it automatically get uploaded\n",
" with open(model_file_name, \"wb\") as file:\n",
" joblib.dump(value=reg, filename=model_file_name)\n",
" \n",
" # upload the model file explicitly into artifacts \n",
" run.upload_file(name=model_file_name, path_or_stream=model_file_name)\n",
" \n",
" # register the model\n",
" run.register_model(model_name='diabetes-model', model_path=model_file_name)\n",
"\n",
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
"with open('./train.py', 'r') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`train.py` also references a `mylib.py` file. So let's create that too."
"Note `train.py` also references a `mylib.py` file."
]
},
{
Expand All @@ -178,12 +116,8 @@
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/mylib.py\n",
"import numpy as np\n",
"\n",
"def get_alphas():\n",
" # list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
" return np.arange(0.0, 1.0, 0.05)"
"with open('./mylib.py', 'r') as f:\n",
" print(f.read())"
]
},
{
Expand All @@ -209,7 +143,7 @@
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
"\n",
"# You can choose a specific Python environment by pointing to a Python path \n",
"#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'"
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
]
},
{
Expand All @@ -228,9 +162,8 @@
"source": [
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_user_managed)\n",
"run = exp.submit(src)\n",
"run.wait_for_completion(show_output=True)"
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
"run = exp.submit(src)"
]
},
{
Expand All @@ -249,6 +182,22 @@
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block to wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -290,9 +239,8 @@
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_system_managed)\n",
"run = exp.submit(src)\n",
"run.wait_for_completion(show_output = True)"
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
"run = exp.submit(src)"
]
},
{
Expand All @@ -311,12 +259,30 @@
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block and wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Docker-based execution\n",
"**NOTE** You must have Docker engine installed locally in order to use this execution mode. You can also ask the system to pull down a Docker image and execute your scripts in it."
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
"\n",
"You can also ask the system to pull down a Docker image and execute your scripts in it."
]
},
{
Expand Down Expand Up @@ -356,7 +322,7 @@
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_docker)\n",
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)\n",
"run = exp.submit(src)"
]
},
Expand All @@ -376,7 +342,7 @@
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
"run.wait_for_completion(show_output=True)"
]
},
{
Expand Down Expand Up @@ -455,7 +421,7 @@
"outputs": [],
"source": [
"# supply a model name, and the full path to the serialized model file.\n",
"model = run.register_model(model_name='best_ridge_model', model_path='ridge_0.40.pkl')"
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
]
},
{
Expand All @@ -477,9 +443,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {
Expand Down
9 changes: 9 additions & 0 deletions 01.getting-started/02.train-on-local/mylib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.

import numpy as np


def get_alphas():
# list of numbers from 0.0 to 1.0 with a 0.05 interval
return np.arange(0.0, 1.0, 0.05)
24 changes: 12 additions & 12 deletions 01.getting-started/02.train-on-local/train.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.

from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from azureml.core.run import Run
from sklearn.externals import joblib

import os
import numpy as np
import mylib

# os.makedirs('./outputs', exist_ok = True)
os.makedirs('./outputs', exist_ok=True)

X, y = load_diabetes(return_X_y=True)

run = Run.get_submitted_run()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=0)
data = {"train": {"X": X_train, "y": y_train},
"test": {"X": X_test, "y": y_test}}

# list of numbers from 0.0 to 1.0 with a 0.05 interval
alphas = np.arange(0.0, 1.0, 0.05)
alphas = mylib.get_alphas()

for alpha in alphas:
# Use Ridge algorithm to create a regression model
Expand All @@ -33,13 +39,7 @@
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
# save model in the outputs folder so it automatically get uploaded
with open(model_file_name, "wb") as file:
joblib.dump(value=reg, filename=model_file_name)

# upload the model file explicitly into artifacts
run.upload_file(name=model_file_name, path_or_stream=model_file_name)

# register the model
# commented out for now until a bug is fixed
# run.register_model(file_name = model_file_name)
joblib.dump(value=reg, filename=os.path.join('./outputs/',
model_file_name))

print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
Loading