diff --git a/.amlignore b/.amlignore new file mode 100644 index 000000000..616345c4d --- /dev/null +++ b/.amlignore @@ -0,0 +1,7 @@ +.ipynb_checkpoints +azureml-logs +.azureml +.git +outputs +azureml-setup +docs diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..94ae80d7a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\Users\\sgilley\\.azureml\\envs\\jan3\\python.exe" +} \ No newline at end of file diff --git a/aml_config/conda_dependencies.yml b/aml_config/conda_dependencies.yml new file mode 100644 index 000000000..5e49a89d6 --- /dev/null +++ b/aml_config/conda_dependencies.yml @@ -0,0 +1,15 @@ +# Conda environment specification. The dependencies defined in this file will +# be automatically provisioned for runs with userManagedDependencies=False. + +# Details about the Conda environment file format: +# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually + +name: project_environment +dependencies: + # The python interpreter version. + # Currently Azure ML only supports 3.5.2 and later. +- python=3.6.2 + +- pip: + # Required packages for AzureML execution, history, and data preparation. + - azureml-defaults diff --git a/aml_config/docker.runconfig b/aml_config/docker.runconfig new file mode 100644 index 000000000..d79398c8c --- /dev/null +++ b/aml_config/docker.runconfig @@ -0,0 +1,115 @@ +# The script to run. +script: train.py +# The arguments to the script file. +arguments: [] +# The name of the compute target to use for this run. +target: local +# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch". +framework: PySpark +# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi". +communicator: None +# Automatically prepare the run environment as part of the run itself. +autoPrepareEnvironment: true +# Maximum allowed duration for the run. +maxRunDurationSeconds: +# Number of nodes to use for running job. +nodeCount: 1 +# Environment details. +environment: +# Environment variables set for the run. + environmentVariables: + EXAMPLE_ENV_VAR: EXAMPLE_VALUE +# Python details + python: +# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment. + userManagedDependencies: false +# The python interpreter path + interpreterPath: python +# Path to the conda dependencies file to use for this run. If a project +# contains multiple programs with different sets of dependencies, it may be +# convenient to manage those environments with separate files. + condaDependenciesFile: aml_config/conda_dependencies.yml +# Docker details + docker: +# Set True to perform this run inside a Docker container. + enabled: true +# Base image used for Docker-based runs. + baseImage: mcr.microsoft.com/azureml/base:0.2.0 +# Set False if necessary to work around shared volume bugs. + sharedVolumes: true +# Run with NVidia Docker extension to support GPUs. + gpuSupport: false +# Extra arguments to the Docker run command. + arguments: [] +# Image registry that contains the base image. + baseImageRegistry: +# DNS name or IP address of azure container registry(ACR) + address: +# The username for ACR + username: +# The password for ACR + password: +# Spark details + spark: +# List of spark repositories. + repositories: + - https://mmlspark.azureedge.net/maven + packages: + - group: com.microsoft.ml.spark + artifact: mmlspark_2.11 + version: '0.12' + precachePackages: true +# Databricks details + databricks: +# List of maven libraries. + mavenLibraries: [] +# List of PyPi libraries + pypiLibraries: [] +# List of RCran libraries + rcranLibraries: [] +# List of JAR libraries + jarLibraries: [] +# List of Egg libraries + eggLibraries: [] +# History details. +history: +# Enable history tracking -- this allows status, logs, metrics, and outputs +# to be collected for a run. + outputCollection: true +# whether to take snapshots for history. + snapshotProject: true +# Spark configuration details. +spark: + configuration: + spark.app.name: Azure ML Experiment + spark.yarn.maxAppAttempts: 1 +# HDI details. +hdi: +# Yarn deploy mode. Options are cluster and client. + yarnDeployMode: cluster +# Tensorflow details. +tensorflow: +# The number of worker tasks. + workerCount: 1 +# The number of parameter server tasks. + parameterServerCount: 1 +# Mpi details. +mpi: +# When using MPI, number of processes per node. + processCountPerNode: 1 +# data reference configuration details +dataReferences: {} +# Project share datastore reference. +sourceDirectoryDataStore: +# AmlCompute details. +amlcompute: +# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs + vmSize: +# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority". + vmPriority: +# A bool that indicates if the cluster has to be retained after job completion. + retainCluster: false +# Name of the cluster to be created. If not specified, runId will be used as cluster name. + name: +# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0. + clusterMaxNodeCount: 1 diff --git a/aml_config/local.runconfig b/aml_config/local.runconfig new file mode 100644 index 000000000..ccfa6195b --- /dev/null +++ b/aml_config/local.runconfig @@ -0,0 +1,115 @@ +# The script to run. +script: train.py +# The arguments to the script file. +arguments: [] +# The name of the compute target to use for this run. +target: local +# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch". +framework: Python +# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi". +communicator: None +# Automatically prepare the run environment as part of the run itself. +autoPrepareEnvironment: true +# Maximum allowed duration for the run. +maxRunDurationSeconds: +# Number of nodes to use for running job. +nodeCount: 1 +# Environment details. +environment: +# Environment variables set for the run. + environmentVariables: + EXAMPLE_ENV_VAR: EXAMPLE_VALUE +# Python details + python: +# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment. + userManagedDependencies: false +# The python interpreter path + interpreterPath: python +# Path to the conda dependencies file to use for this run. If a project +# contains multiple programs with different sets of dependencies, it may be +# convenient to manage those environments with separate files. + condaDependenciesFile: aml_config/conda_dependencies.yml +# Docker details + docker: +# Set True to perform this run inside a Docker container. + enabled: false +# Base image used for Docker-based runs. + baseImage: mcr.microsoft.com/azureml/base:0.2.0 +# Set False if necessary to work around shared volume bugs. + sharedVolumes: true +# Run with NVidia Docker extension to support GPUs. + gpuSupport: false +# Extra arguments to the Docker run command. + arguments: [] +# Image registry that contains the base image. + baseImageRegistry: +# DNS name or IP address of azure container registry(ACR) + address: +# The username for ACR + username: +# The password for ACR + password: +# Spark details + spark: +# List of spark repositories. + repositories: + - https://mmlspark.azureedge.net/maven + packages: + - group: com.microsoft.ml.spark + artifact: mmlspark_2.11 + version: '0.12' + precachePackages: true +# Databricks details + databricks: +# List of maven libraries. + mavenLibraries: [] +# List of PyPi libraries + pypiLibraries: [] +# List of RCran libraries + rcranLibraries: [] +# List of JAR libraries + jarLibraries: [] +# List of Egg libraries + eggLibraries: [] +# History details. +history: +# Enable history tracking -- this allows status, logs, metrics, and outputs +# to be collected for a run. + outputCollection: true +# whether to take snapshots for history. + snapshotProject: true +# Spark configuration details. +spark: + configuration: + spark.app.name: Azure ML Experiment + spark.yarn.maxAppAttempts: 1 +# HDI details. +hdi: +# Yarn deploy mode. Options are cluster and client. + yarnDeployMode: cluster +# Tensorflow details. +tensorflow: +# The number of worker tasks. + workerCount: 1 +# The number of parameter server tasks. + parameterServerCount: 1 +# Mpi details. +mpi: +# When using MPI, number of processes per node. + processCountPerNode: 1 +# data reference configuration details +dataReferences: {} +# Project share datastore reference. +sourceDirectoryDataStore: +# AmlCompute details. +amlcompute: +# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs + vmSize: +# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority". + vmPriority: +# A bool that indicates if the cluster has to be retained after job completion. + retainCluster: false +# Name of the cluster to be created. If not specified, runId will be used as cluster name. + name: +# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0. + clusterMaxNodeCount: 1 diff --git a/aml_config/project.json b/aml_config/project.json new file mode 100644 index 000000000..dfedb75a2 --- /dev/null +++ b/aml_config/project.json @@ -0,0 +1 @@ +{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"} \ No newline at end of file diff --git a/ignore/doc-qa/how-to-deploy-to-aci/how-to-deploy-to-aci.py b/ignore/doc-qa/how-to-deploy-to-aci/how-to-deploy-to-aci.py new file mode 100644 index 000000000..76ec5f8f5 --- /dev/null +++ b/ignore/doc-qa/how-to-deploy-to-aci/how-to-deploy-to-aci.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +# coding: utf-8 + +import azureml.core +print('SDK version' + azureml.core.VERSION) + +# PREREQ: load workspace info +# import azureml.core + +# +from azureml.core import Workspace +ws = Workspace.from_config() +# + +scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())" +print(scorepy_content) +with open("score.py","w") as f: + f.write(scorepy_content) + + +# PREREQ: create environment file +from azureml.core.conda_dependencies import CondaDependencies + +myenv = CondaDependencies() +myenv.add_conda_package("scikit-learn") + +with open("myenv.yml","w") as f: + f.write(myenv.serialize_to_string()) + +# +from azureml.core.image import ContainerImage + +image_config = ContainerImage.image_configuration(execution_script = "score.py", + runtime = "python", + conda_file = "myenv.yml", + description = "Image with mnist model", + tags = {"data": "mnist", "type": "classification"} + ) +# + +# +from azureml.core.webservice import AciWebservice + +aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, + memory_gb = 1, + tags = {"data": "mnist", "type": "classification"}, + description = 'Handwriting recognition') +# + +# +from azureml.core.model import Model + +model_name = "sklearn_mnist" +model = Model.register(model_path = "sklearn_mnist_model.pkl", + model_name = model_name, + tags = {"data": "mnist", "type": "classification"}, + description = "Mnist handwriting recognition", + workspace = ws) +# + +# +from azureml.core.model import Model + +model_name = "sklearn_mnist" +model=Model(ws, model_name) +# + + +# ## DEPLOY FROM REGISTERED MODEL + +# +from azureml.core.webservice import Webservice + +service_name = 'aci-mnist-2' +service = Webservice.deploy_from_model(deployment_config = aciconfig, + image_config = image_config, + models = [model], # this is the registered model object + name = service_name, + workspace = ws) +service.wait_for_deployment(show_output = True) +print(service.state) +# + +service.delete() + +# ## DEPLOY FROM IMAGE + + +# +from azureml.core.image import ContainerImage + +image = ContainerImage.create(name = "myimage1", + models = [model], # this is the registered model object + image_config = image_config, + workspace = ws) + +image.wait_for_creation(show_output = True) +# + +# +from azureml.core.webservice import Webservice + +service_name = 'aci-mnist-13' +service = Webservice.deploy_from_image(deployment_config = aciconfig, + image = image, + name = service_name, + workspace = ws) +service.wait_for_deployment(show_output = True) +print(service.state) +# + +service.delete() + + +# ## DEPLOY FROM MODEL FILE +# First change score.py! + + + +scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist_model.pkl')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())" +with open("score.py","w") as f: + f.write(scorepy_content) + + + +# +from azureml.core.webservice import Webservice + +service_name = 'aci-mnist-1' +service = Webservice.deploy(deployment_config = aciconfig, + image_config = image_config, + model_paths = ['sklearn_mnist_model.pkl'], + name = service_name, + workspace = ws) + +service.wait_for_deployment(show_output = True) +print(service.state) +# + +# +# Load Data +import os +import urllib + +os.makedirs('./data', exist_ok = True) + +urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/test-images.gz') + +from utils import load_data +X_test = load_data('./data/test-images.gz', False) / 255.0 + +from sklearn import datasets +import numpy as np +import json + +# find 5 random samples from test set +n = 5 +sample_indices = np.random.permutation(X_test.shape[0])[0:n] + +test_samples = json.dumps({"data": X_test[sample_indices].tolist()}) +test_samples = bytes(test_samples, encoding = 'utf8') + +# predict using the deployed model +prediction = service.run(input_data = test_samples) +print(prediction) +# + +# +service.delete() +# + + + + diff --git a/ignore/doc-qa/how-to-deploy-to-aci/sklearn_mnist_model.pkl b/ignore/doc-qa/how-to-deploy-to-aci/sklearn_mnist_model.pkl new file mode 100644 index 000000000..135dd09ec Binary files /dev/null and b/ignore/doc-qa/how-to-deploy-to-aci/sklearn_mnist_model.pkl differ diff --git a/ignore/doc-qa/how-to-deploy-to-aci/utils.py b/ignore/doc-qa/how-to-deploy-to-aci/utils.py new file mode 100644 index 000000000..98170adae --- /dev/null +++ b/ignore/doc-qa/how-to-deploy-to-aci/utils.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import gzip +import numpy as np +import struct + + +# load compressed MNIST gz files and return numpy arrays +def load_data(filename, label=False): + with gzip.open(filename) as gz: + struct.unpack('I', gz.read(4)) + n_items = struct.unpack('>I', gz.read(4)) + if not label: + n_rows = struct.unpack('>I', gz.read(4))[0] + n_cols = struct.unpack('>I', gz.read(4))[0] + res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8) + res = res.reshape(n_items[0], n_rows * n_cols) + else: + res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8) + res = res.reshape(n_items[0], 1) + return res + + +# one-hot encode a 1-D array +def one_hot_encode(array, num_of_classes): + return np.eye(num_of_classes)[array.reshape(-1)] diff --git a/ignore/doc-qa/how-to-set-up-training-targets/Local.py b/ignore/doc-qa/how-to-set-up-training-targets/Local.py new file mode 100644 index 000000000..d9ccfce56 --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/Local.py @@ -0,0 +1,39 @@ +# Code for Local computer and Submit training run sections + +# Check core SDK version number +import azureml.core + +print("SDK version:", azureml.core.VERSION) + +# +from azureml.core.runconfig import RunConfiguration + +# Edit a run configuration property on the fly. +run_local = RunConfiguration() + +run_local.environment.python.user_managed_dependencies = True +# + +from azureml.core import Workspace +ws = Workspace.from_config() + + +# Set up an experiment +# +from azureml.core import Experiment +experiment_name = 'my_experiment' + +exp = Experiment(workspace=ws, name=experiment_name) +# + +# Submit the experiment using the run configuration +# +from azureml.core import ScriptRunConfig +import os + +script_folder = os.getcwd() +src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_local) +run = exp.submit(src) +run.wait_for_completion(show_output = True) +# + diff --git a/ignore/doc-qa/how-to-set-up-training-targets/amlcompute.py b/ignore/doc-qa/how-to-set-up-training-targets/amlcompute.py new file mode 100644 index 000000000..4658aab1a --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/amlcompute.py @@ -0,0 +1,48 @@ +# Code for Azure Machine Learning Compute - Run-based creation + +# Check core SDK version number +import azureml.core + +print("SDK version:", azureml.core.VERSION) + + +from azureml.core import Workspace +ws = Workspace.from_config() + + +# Set up an experiment +from azureml.core import Experiment +experiment_name = 'my-experiment' +script_folder= "./" + +exp = Experiment(workspace=ws, name=experiment_name) + + +# +from azureml.core.compute import ComputeTarget, AmlCompute + +# First, list the supported VM families for Azure Machine Learning Compute +print(AmlCompute.supported_vmsizes(workspace=ws)) + +from azureml.core.runconfig import RunConfiguration +# Create a new runconfig object +run_temp_compute = RunConfiguration() + +# Signal that you want to use AmlCompute to execute the script +run_temp_compute.target = "amlcompute" + +# AmlCompute is created in the same region as your workspace +# Set the VM size for AmlCompute from the list of supported_vmsizes +run_temp_compute.amlcompute.vm_size = 'STANDARD_D2_V2' +# + + +# Submit the experiment using the run configuration +from azureml.core import ScriptRunConfig + +src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_temp_compute) +run = exp.submit(src) +run.wait_for_completion(show_output = True) + + + diff --git a/ignore/doc-qa/how-to-set-up-training-targets/amlcompute2.py b/ignore/doc-qa/how-to-set-up-training-targets/amlcompute2.py new file mode 100644 index 000000000..bffe44328 --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/amlcompute2.py @@ -0,0 +1,69 @@ +# Code for Azure Machine Learning Compute - Persistent compute + +# Check core SDK version number +import azureml.core + +print("SDK version:", azureml.core.VERSION) + +from azureml.core import Workspace +ws = Workspace.from_config() + + +# Set up an experiment +from azureml.core import Experiment +experiment_name = 'my-experiment' +script_folder= "./" + +exp = Experiment(workspace=ws, name=experiment_name) + +# +from azureml.core.compute import ComputeTarget, AmlCompute +from azureml.core.compute_target import ComputeTargetException + +# Choose a name for your CPU cluster +cpu_cluster_name = "cpucluster" + +# Verify that cluster does not exist already +try: + cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name) + print('Found existing cluster, use it.') +except ComputeTargetException: + compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', + max_nodes=4) + cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config) + +cpu_cluster.wait_for_completion(show_output=True) +# + +# +from azureml.core.runconfig import RunConfiguration +from azureml.core.conda_dependencies import CondaDependencies +from azureml.core.runconfig import DEFAULT_CPU_IMAGE + +# Create a new runconfig object +run_amlcompute = RunConfiguration() + +# Use the cpu_cluster you created above. +run_amlcompute.target = cpu_cluster + +# Enable Docker +run_amlcompute.environment.docker.enabled = True + +# Set Docker base image to the default CPU-based image +run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE + +# Use conda_dependencies.yml to create a conda environment in the Docker image for execution +run_amlcompute.environment.python.user_managed_dependencies = False + +# Specify CondaDependencies obj, add necessary packages +run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) +# + +# Submit the experiment using the run configuration +# +from azureml.core import ScriptRunConfig + +src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_amlcompute) +run = exp.submit(src) +run.wait_for_completion(show_output = True) +# diff --git a/ignore/doc-qa/how-to-set-up-training-targets/dsvm.py b/ignore/doc-qa/how-to-set-up-training-targets/dsvm.py new file mode 100644 index 000000000..9033d618f --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/dsvm.py @@ -0,0 +1,26 @@ +# Code for Remote virtual machines + + compute_target_name = "sheri-linuxvm" + +# +import azureml.core +from azureml.core.runconfig import RunConfiguration +from azureml.core.conda_dependencies import CondaDependencies + +run_dsvm = RunConfiguration(framework = "python") + +# Set the compute target to the Linux DSVM +run_dsvm.target = compute_target_name + +# Use Docker in the remote VM +run_dsvm.environment.docker.enabled = True + +# Use the CPU base image +# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE" +run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE +print('Base Docker image is:', run_dsvm.environment.docker.base_image) + +# Specify the CondaDependencies object +run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) +# +print(run_dsvm) \ No newline at end of file diff --git a/ignore/doc-qa/how-to-set-up-training-targets/hdi.py b/ignore/doc-qa/how-to-set-up-training-targets/hdi.py new file mode 100644 index 000000000..ea8bc8ff3 --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/hdi.py @@ -0,0 +1,27 @@ + +from azureml.core import Workspace +ws = Workspace.from_config() + +from azureml.core.compute import ComputeTarget + +# refers to an existing compute resource attached to the workspace! +hdi_compute = ComputeTarget(workspace=ws, name='sherihdi') + + +# +from azureml.core.runconfig import RunConfiguration +from azureml.core.conda_dependencies import CondaDependencies + + +# use pyspark framework +run_hdi = RunConfiguration(framework="pyspark") + +# Set compute target to the HDI cluster +run_hdi.target = hdi_compute.name + +# specify CondaDependencies object to ask system installing numpy +cd = CondaDependencies() +cd.add_conda_package('numpy') +run_hdi.environment.python.conda_dependencies = cd +# +print(run_hdi) \ No newline at end of file diff --git a/ignore/doc-qa/how-to-set-up-training-targets/mylib.py b/ignore/doc-qa/how-to-set-up-training-targets/mylib.py new file mode 100644 index 000000000..08e4d1f4a --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/mylib.py @@ -0,0 +1,9 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. + +import numpy as np + + +def get_alphas(): + # list of numbers from 0.0 to 1.0 with a 0.05 interval + return np.arange(0.0, 1.0, 0.05) diff --git a/ignore/doc-qa/how-to-set-up-training-targets/remote.py b/ignore/doc-qa/how-to-set-up-training-targets/remote.py new file mode 100644 index 000000000..b46fdc597 --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/remote.py @@ -0,0 +1,52 @@ +# Code for Remote virtual machines + +compute_target_name = "attach-dsvm" + +# +import azureml.core +from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE +from azureml.core.conda_dependencies import CondaDependencies + +run_dsvm = RunConfiguration(framework = "python") + +# Set the compute target to the Linux DSVM +run_dsvm.target = compute_target_name + +# Use Docker in the remote VM +run_dsvm.environment.docker.enabled = True + +# Use the CPU base image +# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE" +run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE +print('Base Docker image is:', run_dsvm.environment.docker.base_image) + +# Prepare the Docker and conda environment automatically when they're used for the first time +run_dsvm.prepare_environment = True + +# Specify the CondaDependencies object +run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) +# +hdi_compute.name = "blah" +from azureml.core.runconfig import RunConfiguration +from azureml.core.conda_dependencies import CondaDependencies + + +# use pyspark framework +hdi_run_config = RunConfiguration(framework="pyspark") + +# Set compute target to the HDI cluster +hdi_run_config.target = hdi_compute.name + +# specify CondaDependencies object to ask system installing numpy +cd = CondaDependencies() +cd.add_conda_package('numpy') +hdi_run_config.environment.python.conda_dependencies = cd + +# +from azureml.core.runconfig import RunConfiguration +# Configure the HDInsight run +# Load the runconfig object from the myhdi.runconfig file generated in the previous attach operation +run_hdi = RunConfiguration.load(project_object = project, run_name = 'myhdi') + +# Ask the system to prepare the conda environment automatically when it's used for the first time +run_hdi.auto_prepare_environment = True> \ No newline at end of file diff --git a/ignore/doc-qa/how-to-set-up-training-targets/runconfig.py b/ignore/doc-qa/how-to-set-up-training-targets/runconfig.py new file mode 100644 index 000000000..d0f4423ce --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/runconfig.py @@ -0,0 +1,25 @@ +# Code for What's a run configuration + +# +from azureml.core.runconfig import RunConfiguration +from azureml.core.conda_dependencies import CondaDependencies + +run_system_managed = RunConfiguration() + +# Specify the conda dependencies with scikit-learn +run_system_managed.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) +# +print(run_system_managed) + + +# +from azureml.core.runconfig import RunConfiguration + +run_user_managed = RunConfiguration() +run_user_managed.environment.python.user_managed_dependencies = True + +# Choose a specific Python environment by pointing to a Python path. For example: +# run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python' +# +print(run_user_managed) + diff --git a/ignore/doc-qa/how-to-set-up-training-targets/train.py b/ignore/doc-qa/how-to-set-up-training-targets/train.py new file mode 100644 index 000000000..42da5a6d4 --- /dev/null +++ b/ignore/doc-qa/how-to-set-up-training-targets/train.py @@ -0,0 +1,45 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. + +from sklearn.datasets import load_diabetes +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from azureml.core.run import Run +from sklearn.externals import joblib +import os +import numpy as np +import mylib + +os.makedirs('./outputs', exist_ok=True) + +X, y = load_diabetes(return_X_y=True) + +run = Run.get_context() + +X_train, X_test, y_train, y_test = train_test_split(X, y, + test_size=0.2, + random_state=0) +data = {"train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}} + +# list of numbers from 0.0 to 1.0 with a 0.05 interval +alphas = mylib.get_alphas() + +for alpha in alphas: + # Use Ridge algorithm to create a regression model + reg = Ridge(alpha=alpha) + reg.fit(data["train"]["X"], data["train"]["y"]) + + preds = reg.predict(data["test"]["X"]) + mse = mean_squared_error(preds, data["test"]["y"]) + run.log('alpha', alpha) + run.log('mse', mse) + + model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha) + # save model in the outputs folder so it automatically get uploaded + with open(model_file_name, "wb") as file: + joblib.dump(value=reg, filename=os.path.join('./outputs/', + model_file_name)) + + print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse)) diff --git a/ignore/doc-qa/quickstart-create-workspace-with-python/quickstart.py b/ignore/doc-qa/quickstart-create-workspace-with-python/quickstart.py new file mode 100644 index 000000000..54528c116 --- /dev/null +++ b/ignore/doc-qa/quickstart-create-workspace-with-python/quickstart.py @@ -0,0 +1,55 @@ +# code snippets for the quickstart-create-workspace-with-python article +# +import azureml.core +print(azureml.core.VERSION) +# + +# this is NOT a snippet. If this code changes, go fix it in the article! +from azureml.core import Workspace +ws = Workspace.create(name='myworkspace', + subscription_id='', + resource_group='myresourcegroup', + create_resource_group=True, + location='eastus2' # or other supported Azure region + ) + +# +ws.get_details() +# + +# +# Create the configuration file. +ws.write_config() + +# Use this code to load the workspace from +# other scripts and notebooks in this directory. +# ws = Workspace.from_config() +# + +# +from azureml.core import Experiment + +# Create a new experiment in your workspace. +exp = Experiment(workspace=ws, name='myexp') + +# Start a run and start the logging service. +run = exp.start_logging() + +# Log a single number. +run.log('my magic number', 42) + +# Log a list (Fibonacci numbers). +run.log_list('my list', [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]) + +# Finish the run. +run.complete() +# + +# +print(run.get_portal_url()) +# + + +# +ws.delete(delete_dependent_resources=True) +#