Skip to content

Commit ecf8564

Browse files
authored
Merge pull request Azure#18 from rastala/master
Update notebooks
2 parents 867538c + a6c6e63 commit ecf8564

File tree

49 files changed

+357
-2848
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+357
-2848
lines changed

00.configuration.ipynb

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"## Prerequisites:\n",
1919
"\n",
2020
"### 1. Install Azure ML SDK\n",
21-
"Follow [SDK installation instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment).\n",
21+
"Follow [SDK installation instructions](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-environment).\n",
2222
"\n",
2323
"### 2. Install some additional packages\n",
2424
"This Notebook requires some additional libraries. In the conda environment, run below commands: \n",
@@ -185,35 +185,11 @@
185185
},
186186
"outputs": [],
187187
"source": [
188-
"# load workspace configuratio from ./aml_config/config.json file.ß\n",
188+
"# load workspace configuratio from ./aml_config/config.json file.\n",
189189
"my_workspace = Workspace.from_config()\n",
190190
"my_workspace.get_details()"
191191
]
192192
},
193-
{
194-
"cell_type": "markdown",
195-
"metadata": {},
196-
"source": [
197-
"## Create a folder to host all sample projects\n",
198-
"Lastly, create a folder where all the sample projects will be hosted."
199-
]
200-
},
201-
{
202-
"cell_type": "code",
203-
"execution_count": null,
204-
"metadata": {},
205-
"outputs": [],
206-
"source": [
207-
"import os\n",
208-
"\n",
209-
"sample_projects_folder = './sample_projects'\n",
210-
"\n",
211-
"if not os.path.isdir(sample_projects_folder):\n",
212-
" os.mkdir(sample_projects_folder)\n",
213-
" \n",
214-
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
215-
]
216-
},
217193
{
218194
"cell_type": "markdown",
219195
"metadata": {},
@@ -225,9 +201,9 @@
225201
],
226202
"metadata": {
227203
"kernelspec": {
228-
"display_name": "Python 3",
204+
"display_name": "Python 3.6",
229205
"language": "python",
230-
"name": "python3"
206+
"name": "python36"
231207
},
232208
"language_info": {
233209
"codemirror_mode": {
@@ -239,7 +215,7 @@
239215
"name": "python",
240216
"nbconvert_exporter": "python",
241217
"pygments_lexer": "ipython3",
242-
"version": "3.6.5"
218+
"version": "3.6.4"
243219
}
244220
},
245221
"nbformat": 4,

01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,16 @@
277277
" os.remove(path=model_name)"
278278
]
279279
},
280+
{
281+
"cell_type": "code",
282+
"execution_count": null,
283+
"metadata": {},
284+
"outputs": [],
285+
"source": [
286+
"# now let's take a look at the experiment in Azure portal.\n",
287+
"experiment"
288+
]
289+
},
280290
{
281291
"cell_type": "markdown",
282292
"metadata": {},
@@ -778,9 +788,9 @@
778788
],
779789
"metadata": {
780790
"kernelspec": {
781-
"display_name": "Python 3",
791+
"display_name": "Python 3.6",
782792
"language": "python",
783-
"name": "python3"
793+
"name": "python36"
784794
},
785795
"language_info": {
786796
"codemirror_mode": {

01.getting-started/02.train-on-local/02.train-on-local.ipynb

Lines changed: 52 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -88,27 +88,9 @@
8888
"cell_type": "markdown",
8989
"metadata": {},
9090
"source": [
91-
"Create a folder to store the training script."
92-
]
93-
},
94-
{
95-
"cell_type": "code",
96-
"execution_count": null,
97-
"metadata": {},
98-
"outputs": [],
99-
"source": [
100-
"import os\n",
101-
"script_folder = './samples/train-on-local'\n",
102-
"os.makedirs(script_folder, exist_ok=True)"
103-
]
104-
},
105-
{
106-
"cell_type": "markdown",
107-
"metadata": {},
108-
"source": [
109-
"## Create `train.py`\n",
91+
"## View `train.py`\n",
11092
"\n",
111-
"Use `%%writefile` magic to write training code to `train.py` file under your script folder."
93+
"`train.py` is already created for you."
11294
]
11395
},
11496
{
@@ -117,59 +99,15 @@
11799
"metadata": {},
118100
"outputs": [],
119101
"source": [
120-
"%%writefile $script_folder/train.py\n",
121-
"\n",
122-
"import os\n",
123-
"from sklearn.datasets import load_diabetes\n",
124-
"from sklearn.linear_model import Ridge\n",
125-
"from sklearn.metrics import mean_squared_error\n",
126-
"from sklearn.model_selection import train_test_split\n",
127-
"from azureml.core.run import Run\n",
128-
"from sklearn.externals import joblib\n",
129-
"\n",
130-
"# example of referencing another script\n",
131-
"import mylib\n",
132-
"\n",
133-
"X, y = load_diabetes(return_X_y=True)\n",
134-
"\n",
135-
"run = Run.get_submitted_run()\n",
136-
"\n",
137-
"X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)\n",
138-
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
139-
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
140-
"\n",
141-
"# example of referencing another script\n",
142-
"alphas = mylib.get_alphas()\n",
143-
"\n",
144-
"for alpha in alphas:\n",
145-
" # Use Ridge algorithm to create a regression model\n",
146-
" reg = Ridge(alpha=alpha)\n",
147-
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
148-
"\n",
149-
" preds = reg.predict(data[\"test\"][\"X\"])\n",
150-
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
151-
" run.log('alpha', alpha)\n",
152-
" run.log('mse', mse)\n",
153-
"\n",
154-
" model_file_name='ridge_{0:.2f}.pkl'.format(alpha)\n",
155-
" # save model in the outputs folder so it automatically get uploaded\n",
156-
" with open(model_file_name, \"wb\") as file:\n",
157-
" joblib.dump(value=reg, filename=model_file_name)\n",
158-
" \n",
159-
" # upload the model file explicitly into artifacts \n",
160-
" run.upload_file(name=model_file_name, path_or_stream=model_file_name)\n",
161-
" \n",
162-
" # register the model\n",
163-
" run.register_model(model_name='diabetes-model', model_path=model_file_name)\n",
164-
"\n",
165-
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
102+
"with open('./train.py', 'r') as f:\n",
103+
" print(f.read())"
166104
]
167105
},
168106
{
169107
"cell_type": "markdown",
170108
"metadata": {},
171109
"source": [
172-
"`train.py` also references a `mylib.py` file. So let's create that too."
110+
"Note `train.py` also references a `mylib.py` file."
173111
]
174112
},
175113
{
@@ -178,12 +116,8 @@
178116
"metadata": {},
179117
"outputs": [],
180118
"source": [
181-
"%%writefile $script_folder/mylib.py\n",
182-
"import numpy as np\n",
183-
"\n",
184-
"def get_alphas():\n",
185-
" # list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
186-
" return np.arange(0.0, 1.0, 0.05)"
119+
"with open('./mylib.py', 'r') as f:\n",
120+
" print(f.read())"
187121
]
188122
},
189123
{
@@ -209,7 +143,7 @@
209143
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
210144
"\n",
211145
"# You can choose a specific Python environment by pointing to a Python path \n",
212-
"#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'"
146+
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
213147
]
214148
},
215149
{
@@ -228,9 +162,8 @@
228162
"source": [
229163
"from azureml.core import ScriptRunConfig\n",
230164
"\n",
231-
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_user_managed)\n",
232-
"run = exp.submit(src)\n",
233-
"run.wait_for_completion(show_output=True)"
165+
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
166+
"run = exp.submit(src)"
234167
]
235168
},
236169
{
@@ -249,6 +182,22 @@
249182
"run"
250183
]
251184
},
185+
{
186+
"cell_type": "markdown",
187+
"metadata": {},
188+
"source": [
189+
"Block to wait till run finishes."
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"metadata": {},
196+
"outputs": [],
197+
"source": [
198+
"run.wait_for_completion(show_output=True)"
199+
]
200+
},
252201
{
253202
"cell_type": "markdown",
254203
"metadata": {},
@@ -290,9 +239,8 @@
290239
"metadata": {},
291240
"outputs": [],
292241
"source": [
293-
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_system_managed)\n",
294-
"run = exp.submit(src)\n",
295-
"run.wait_for_completion(show_output = True)"
242+
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
243+
"run = exp.submit(src)"
296244
]
297245
},
298246
{
@@ -311,12 +259,30 @@
311259
"run"
312260
]
313261
},
262+
{
263+
"cell_type": "markdown",
264+
"metadata": {},
265+
"source": [
266+
"Block and wait till run finishes."
267+
]
268+
},
269+
{
270+
"cell_type": "code",
271+
"execution_count": null,
272+
"metadata": {},
273+
"outputs": [],
274+
"source": [
275+
"run.wait_for_completion(show_output = True)"
276+
]
277+
},
314278
{
315279
"cell_type": "markdown",
316280
"metadata": {},
317281
"source": [
318282
"### Docker-based execution\n",
319-
"**NOTE** You must have Docker engine installed locally in order to use this execution mode. You can also ask the system to pull down a Docker image and execute your scripts in it."
283+
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
284+
"\n",
285+
"You can also ask the system to pull down a Docker image and execute your scripts in it."
320286
]
321287
},
322288
{
@@ -356,7 +322,7 @@
356322
"metadata": {},
357323
"outputs": [],
358324
"source": [
359-
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_docker)\n",
325+
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)\n",
360326
"run = exp.submit(src)"
361327
]
362328
},
@@ -376,7 +342,7 @@
376342
"metadata": {},
377343
"outputs": [],
378344
"source": [
379-
"run.wait_for_completion(show_output = True)"
345+
"run.wait_for_completion(show_output=True)"
380346
]
381347
},
382348
{
@@ -455,7 +421,7 @@
455421
"outputs": [],
456422
"source": [
457423
"# supply a model name, and the full path to the serialized model file.\n",
458-
"model = run.register_model(model_name='best_ridge_model', model_path='ridge_0.40.pkl')"
424+
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
459425
]
460426
},
461427
{
@@ -477,9 +443,9 @@
477443
],
478444
"metadata": {
479445
"kernelspec": {
480-
"display_name": "Python [default]",
446+
"display_name": "Python 3.6",
481447
"language": "python",
482-
"name": "python3"
448+
"name": "python36"
483449
},
484450
"language_info": {
485451
"codemirror_mode": {
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Copyright (c) Microsoft. All rights reserved.
2+
# Licensed under the MIT license.
3+
4+
import numpy as np
5+
6+
7+
def get_alphas():
8+
# list of numbers from 0.0 to 1.0 with a 0.05 interval
9+
return np.arange(0.0, 1.0, 0.05)
Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,30 @@
1+
# Copyright (c) Microsoft. All rights reserved.
2+
# Licensed under the MIT license.
3+
14
from sklearn.datasets import load_diabetes
25
from sklearn.linear_model import Ridge
36
from sklearn.metrics import mean_squared_error
47
from sklearn.model_selection import train_test_split
58
from azureml.core.run import Run
69
from sklearn.externals import joblib
7-
10+
import os
811
import numpy as np
12+
import mylib
913

10-
# os.makedirs('./outputs', exist_ok = True)
14+
os.makedirs('./outputs', exist_ok=True)
1115

1216
X, y = load_diabetes(return_X_y=True)
1317

1418
run = Run.get_submitted_run()
1519

16-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
20+
X_train, X_test, y_train, y_test = train_test_split(X, y,
21+
test_size=0.2,
22+
random_state=0)
1723
data = {"train": {"X": X_train, "y": y_train},
1824
"test": {"X": X_test, "y": y_test}}
1925

2026
# list of numbers from 0.0 to 1.0 with a 0.05 interval
21-
alphas = np.arange(0.0, 1.0, 0.05)
27+
alphas = mylib.get_alphas()
2228

2329
for alpha in alphas:
2430
# Use Ridge algorithm to create a regression model
@@ -33,13 +39,7 @@
3339
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
3440
# save model in the outputs folder so it automatically get uploaded
3541
with open(model_file_name, "wb") as file:
36-
joblib.dump(value=reg, filename=model_file_name)
37-
38-
# upload the model file explicitly into artifacts
39-
run.upload_file(name=model_file_name, path_or_stream=model_file_name)
40-
41-
# register the model
42-
# commented out for now until a bug is fixed
43-
# run.register_model(file_name = model_file_name)
42+
joblib.dump(value=reg, filename=os.path.join('./outputs/',
43+
model_file_name))
4444

4545
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))

0 commit comments

Comments
 (0)