Merge branch 'master' into jenns/splitpipeline

j-so · j-so · commit f58e0df643f8 · 2020-06-18T14:38:51.000-07:00
diff --git a/.pipelines/diabetes_regression-variables-template.yml b/.pipelines/diabetes_regression-variables-template.yml
@@ -1,8 +1,7 @@
 # Pipeline template that defines common runtime environment variables.
 variables:
-
   # Source Config
-    # The directory containing the scripts for training, evaluating, and registering the model
+  # The directory containing the scripts for training, evaluating, and registering the model
   - name: SOURCES_DIR_TRAIN
     value: diabetes_regression
     # The path to the model training script under SOURCES_DIR_TRAIN
@@ -23,7 +22,7 @@ variables:
     value: mlopspython
   - name: DATASET_NAME
     value: diabetes_ds
-  # Uncomment DATASTORE_NAME if you have configured non default datastore to point to your data   
+  # Uncomment DATASTORE_NAME if you have configured non default datastore to point to your data
   # - name: DATASTORE_NAME
   #   value: datablobstore
   - name: DATASET_VERSION
@@ -50,16 +49,16 @@ variables:
   # The name for the (docker/webapp) scoring image
   - name: IMAGE_NAME
     value: "diabetestrained"
- 
+
     # Optional. Used by a training pipeline with R on Databricks
   - name: DB_CLUSTER_ID
     value: ""
 
   # These are the default values set in ml_service\util\env_variables.py. Uncomment and override if desired.
-    # Set to false to disable the evaluation step in the ML pipeline and register the newly trained model unconditionally.
+  # Set to false to disable the evaluation step in the ML pipeline and register the newly trained model unconditionally.
   # - name: RUN_EVALUATION
   #   value: "true"
-    # Set to false to register the model regardless of the outcome of the evaluation step in the ML pipeline.
+  # Set to false to register the model regardless of the outcome of the evaluation step in the ML pipeline.
   # - name: ALLOW_RUN_CANCEL
   #   value: "true"
 
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -1,9 +1,8 @@
-
 # Getting Started with MLOpsPython <!-- omit in toc -->
 
-This guide shows how to get MLOpsPython working with a sample ML project ***diabetes_regression***. The project creates a linear regression model to predict diabetes and has CI/CD DevOps practices enabled for model training and serving when these steps are completed in this getting started guide.
+This guide shows how to get MLOpsPython working with a sample ML project **_diabetes_regression_**. The project creates a linear regression model to predict diabetes and has CI/CD DevOps practices enabled for model training and serving when these steps are completed in this getting started guide.
 
-If you would like to bring your own model code to use this template structure, follow the [custom model](custom_model.md) guide. We recommend completing this getting started guide  with the diabetes model through ACI deployment first to ensure everything is working in your environment before converting the template to use your own model code.
+If you would like to bring your own model code to use this template structure, follow the [custom model](custom_model.md) guide. We recommend completing this getting started guide with the diabetes model through ACI deployment first to ensure everything is working in your environment before converting the template to use your own model code.
 
 - [Setting up Azure DevOps](#setting-up-azure-devops)
   - [Install the Azure Machine Learning extension](#install-the-azure-machine-learning-extension)
@@ -46,13 +45,13 @@ We recommend using the [repository template](https://github.com/microsoft/MLOpsP
 
 ## Create a Variable Group for your Pipeline
 
-MLOpsPython requires some variables to be set before you can run any pipelines. You'll need to create a *variable group* in Azure DevOps to store values that are reused across multiple pipelines or pipeline stages. Either store the values directly in [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) or connect to an Azure Key Vault in your subscription. Check out the [Add & use variable groups](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=yaml#use-a-variable-group) documentation to learn more about how to create a variable group and link it to your pipeline.
+MLOpsPython requires some variables to be set before you can run any pipelines. You'll need to create a _variable group_ in Azure DevOps to store values that are reused across multiple pipelines or pipeline stages. Either store the values directly in [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) or connect to an Azure Key Vault in your subscription. Check out the [Add & use variable groups](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=yaml#use-a-variable-group) documentation to learn more about how to create a variable group and link it to your pipeline.
 
 Navigate to **Library** in the **Pipelines** section as indicated below:
 
 ![Library Variable Groups](./images/library_variable_groups.png)
 
-Create a variable group named **``devopsforai-aml-vg``**. The YAML pipeline definitions in this repository refer to this variable group by name.
+Create a variable group named **`devopsforai-aml-vg`**. The YAML pipeline definitions in this repository refer to this variable group by name.
 
 The variable group should contain the following required variables. **Azure resources that don't exist yet will be created in the [Provisioning resources using Azure Pipelines](#provisioning-resources-using-azure-pipelines) step below.**
 
@@ -80,7 +79,7 @@ More variables are available for further tweaking, but the above variables are a
 
 **WORKSPACE_NAME** is used for creating the Azure Machine Learning Workspace. You can provide an existing Azure ML Workspace here if you've got one.
 
-**AZURE_RM_SVC_CONNECTION** is used by the [Azure Pipeline]((../environment_setup/iac-create-environment-pipeline.yml)) in Azure DevOps that creates the Azure ML workspace and associated resources through Azure Resource Manager. You'll create the connection in a [step below](#create-an-azure-devops-service-connection-for-the-azure-resource-manager).
+**AZURE_RM_SVC_CONNECTION** is used by the [Azure Pipeline](../environment_setup/iac-create-environment-pipeline.yml) in Azure DevOps that creates the Azure ML workspace and associated resources through Azure Resource Manager. You'll create the connection in a [step below](#create-an-azure-devops-service-connection-for-the-azure-resource-manager).
 
 **WORKSPACE_SVC_CONNECTION** is used to reference a [service connection for the Azure ML workspace](#create-an-azure-devops-azure-ml-workspace-service-connection). You'll create the connection after [provisioning the workspace](#provisioning-resources-using-azure-pipelines) in the [Create an Azure DevOps Service Connection for the Azure ML Workspace](#create-an-azure-devops-service-connection-for-the-azure-ml-workspace) section below.
 
@@ -92,11 +91,11 @@ The easiest way to create all required Azure resources (Resource Group, Azure ML
 
 ### Create an Azure DevOps Service Connection for the Azure Resource Manager
 
-The [IaC provisioning pipeline]((../environment_setup/iac-create-environment-pipeline.yml)) requires an **Azure Resource Manager** [service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#create-a-service-connection).
+The [IaC provisioning pipeline](../environment_setup/iac-create-environment-pipeline.yml) requires an **Azure Resource Manager** [service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#create-a-service-connection).
 
 ![Create service connection](./images/create-rm-service-connection.png)
 
-Leave the **``Resource Group``** field empty.
+Leave the **`Resource Group`** field empty.
 
 **Note:** Creating the Azure Resource Manager service connection scope requires 'Owner' or 'User Access Administrator' permissions on the subscription.
 You'll also need sufficient permissions to register an application with your Azure AD tenant, or you can get the ID and secret of a service principal from your Azure AD Administrator. That principal must have 'Contributor' permissions on the subscription.
@@ -144,7 +143,7 @@ These pipelines use a Docker container on the Azure Pipelines agents to accompli
 
 ### Set up the Model CI, training, evaluation, and registration pipeline
 
-In your Azure DevOps project, create and run a new build pipeline based on the  [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml)
+In your Azure DevOps project, create and run a new build pipeline based on the [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml)
 pipeline definition in your forked repository.
 
 If you plan to use the release deployment pipeline (in the next section), you will need to rename this pipeline to `Model-Train-Register-CI`.
@@ -175,15 +174,15 @@ The pipeline stages are summarized below:
 
 - Linting (code quality analysis)
 - Unit tests and code coverage analysis
-- Build and publish *ML Training Pipeline* in an *ML Workspace*
+- Build and publish _ML Training Pipeline_ in an _ML Workspace_
 
 #### Train model
 
-- Determine the ID of the *ML Training Pipeline* published in the previous stage.
-- Trigger the *ML Training Pipeline* and waits for it to complete.
+- Determine the ID of the _ML Training Pipeline_ published in the previous stage.
+- Trigger the _ML Training Pipeline_ and waits for it to complete.
   - This is an **agentless** job. The CI pipeline can wait for ML pipeline completion for hours or even days without using agent resources.
-- Determine if a new model was registered by the *ML Training Pipeline*.
-  - If the model evaluation determines that the new model doesn't perform any better than the previous one, the new model won't register and the *ML Training Pipeline* will be **canceled**. In this case, you'll see a message in the 'Train Model' job under the 'Determine if evaluation succeeded and new model is registered' step saying '**Model was not registered for this run.**'
+- Determine if a new model was registered by the _ML Training Pipeline_.
+  - If the model evaluation determines that the new model doesn't perform any better than the previous one, the new model won't register and the _ML Training Pipeline_ will be **canceled**. In this case, you'll see a message in the 'Train Model' job under the 'Determine if evaluation succeeded and new model is registered' step saying '**Model was not registered for this run.**'
   - See [evaluate_model.py](../diabetes_regression/evaluate/evaluate_model.py#L118) for the evaluation logic.
   - [Additional Variables and Configuration](#additional-variables-and-configuration) for configuring this and other behavior.
 
@@ -249,7 +248,9 @@ MLOpsPython also can deploy to [Azure Kubernetes Service](https://azure.microsof
 
 Creating a cluster on Azure Kubernetes Service is out of scope of this tutorial, but you can find set up information on the [Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using the Azure portal](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough-portal#create-an-aks-cluster) page.
 
-**Note:** If your target deployment environment is a Kubernetes cluster and you want to implement Canary and/or A/B testing deployment strategies, check out this [tutorial](./canary_ab_deployment.md).
+> **_Note_**
+>
+> If your target deployment environment is a Kubernetes cluster and you want to implement Canary and/or A/B testing deployment strategies, check out this [tutorial](./canary_ab_deployment.md).
 
 Keep the Azure Container Instances deployment active because it's a lightweight way to validate changes before deploying to Azure Kubernetes Service.
 
@@ -260,7 +261,7 @@ In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the va
 | AKS_COMPUTE_NAME    | aks             |
 | AKS_DEPLOYMENT_NAME | mlops-aks       |
 
-Set **AKS_COMPUTE_NAME** to the *Compute name* of the Inference Cluster that references the Azure Kubernetes Service cluster in your Azure ML Workspace.
+Set **AKS_COMPUTE_NAME** to the _Compute name_ of the Inference Cluster that references the Azure Kubernetes Service cluster in your Azure ML Workspace.
 
 After successfully deploying to Azure Container Instances, the next stage will deploy the model to Kubernetes and run a smoke test.
 
@@ -296,18 +297,18 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta
 
 The build pipeline also supports building and publishing Azure ML pipelines using R to train a model. You can enable it by changing the `build-train-script` pipeline variable to either of the following values:
 
-* `diabetes_regression_build_train_pipeline_with_r.py` to train a model with R on Azure ML Compute. You'll also need to uncomment (include) the `r-essentials` Conda packages in the environment definition YAML `diabetes_regression/conda_dependencies.yml`.
-* `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` to train a model with R on Databricks. You'll need to manually create a Databricks cluster and attach it to the Azure ML Workspace as a compute resource. Set the DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables in your variable group.
+- `diabetes_regression_build_train_pipeline_with_r.py` to train a model with R on Azure ML Compute. You'll also need to uncomment (include) the `r-essentials` Conda packages in the environment definition YAML `diabetes_regression/conda_dependencies.yml`.
+- `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` to train a model with R on Databricks. You'll need to manually create a Databricks cluster and attach it to the Azure ML Workspace as a compute resource. Set the DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables in your variable group.
 
 Example ML pipelines using R have a single step to train a model. They don't demonstrate how to evaluate and register a model. The evaluation and registering techniques are shown only in the Python implementation.
 
 ### Observability and Monitoring
 
 You can explore aspects of model observability in the solution, such as:
 
-* **Logging**: Navigate to the Application Insights instance linked to the Azure ML Portal, then go to the Logs (Analytics) pane. The following sample query correlates HTTP requests with custom logs generated in `score.py`. This can be used, for example, to analyze query duration vs. scoring batch size:
+- **Logging**: Navigate to the Application Insights instance linked to the Azure ML Portal, then go to the Logs (Analytics) pane. The following sample query correlates HTTP requests with custom logs generated in `score.py`. This can be used, for example, to analyze query duration vs. scoring batch size:
 
-  ```
+  ```sql
   let Traceinfo=traces
   | extend d=parse_json(tostring(customDimensions.Content))
   | project workspace=customDimensions.["Workspace Name"],
@@ -321,17 +322,17 @@ You can explore aspects of model observability in the solution, such as:
   | project-away id1
   ```
 
-* **Distributed tracing**: The smoke test client code sets an HTTP `traceparent` header (per the [W3C Trace Context proposed specification](https://www.w3.org/TR/trace-context-1)), and the `score.py` code logs the header. The query above shows how to surface this value. You can adapt it to your tracing framework.
-* **Monitoring**: You can use [Azure Monitor for containers](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/container-insights-overview) to monitor the Azure ML scoring containers' performance.
+- **Distributed tracing**: The smoke test client code sets an HTTP `traceparent` header (per the [W3C Trace Context proposed specification](https://www.w3.org/TR/trace-context-1)), and the `score.py` code logs the header. The query above shows how to surface this value. You can adapt it to your tracing framework.
+- **Monitoring**: You can use [Azure Monitor for containers](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/container-insights-overview) to monitor the Azure ML scoring containers' performance.
 
 ### Clean up the example resources
 
 To remove the resources created for this project, use the [/environment_setup/iac-remove-environment-pipeline.yml](../environment_setup/iac-remove-environment-pipeline.yml) definition or you can just delete the resource group in the [Azure Portal](https://portal.azure.com).
 
 ## Next Steps: Integrating your project
 
-* The [custom model](custom_model.md) guide includes information on bringing your own code to this repository template.
-* Consider using [Azure Pipelines self-hosted agents](https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/agents?view=azure-devops&tabs=browser#install) to speed up your Azure ML pipeline execution. The Docker container image for the Azure ML pipeline is sizable, and having it cached on the agent between runs can trim several minutes from your runs.
+- The [custom model](custom_model.md) guide includes information on bringing your own code to this repository template.
+- Consider using [Azure Pipelines self-hosted agents](https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/agents?view=azure-devops&tabs=browser#install) to speed up your Azure ML pipeline execution. The Docker container image for the Azure ML pipeline is sizable, and having it cached on the agent between runs can trim several minutes from your runs.
 
 ### Additional Variables and Configuration
 
@@ -341,7 +342,7 @@ There are more variables used in the project. They're defined in two places: one
 
 For using Azure Pipelines, all other variables are stored in the file `.pipelines/diabetes_regression-variables-template.yml`. Using the default values as a starting point, adjust the variables to suit your requirements.
 
-In that folder, you'll also find the `parameters.json` file that we recommend using to provide parameters for training, evaluation, and scoring scripts. The sample parameter that `diabetes_regression` uses is the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). We don't provide any serializers for this config file.
+In that folder, you'll also find the `parameters.json` file that we recommend using to provide parameters for training, evaluation, and scoring scripts. The sample parameter that `diabetes_regression` uses is the ridge regression [_alpha_ hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). We don't provide any serializers for this config file.
 
 #### Local configuration
 
diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json
@@ -45,6 +45,17 @@
     "acr": {
       "type": "string",
       "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]"
+    },
+    "sku": {
+      "type": "string",
+      "defaultValue": "basic",
+      "allowedValues": [
+        "basic",
+        "enterprise"
+      ],
+      "metadata": {
+        "description": "Specifies the sku, also referred as 'edition' of the Azure Machine Learning workspace."
+      }
     }
   },
   "variables": {
@@ -92,7 +103,8 @@
           "name": "standard",
           "family": "A"
         },
-        "accessPolicies": []
+        "accessPolicies": [
+        ]
       }
     },
     {
@@ -131,13 +143,17 @@
       "identity": {
         "type": "systemAssigned"
       },
+      "sku": {
+        "tier": "[parameters('sku')]",
+        "name": "[parameters('sku')]"
+      },
       "properties": {
         "friendlyName": "[variables('amlWorkspaceName')]",
         "keyVault": "[resourceId('Microsoft.KeyVault/vaults',variables('keyVaultName'))]",
         "applicationInsights": "[resourceId('Microsoft.Insights/components',variables('applicationInsightsName'))]",
         "containerRegistry": "[resourceId('Microsoft.ContainerRegistry/registries',variables('containerRegistryName'))]",
         "storageAccount": "[resourceId('Microsoft.Storage/storageAccounts/',variables('storageAccountName'))]"
       }
-    }    
+    }
   ]
 }
diff --git a/environment_setup/iac-create-environment-pipeline-arm.yml b/environment_setup/iac-create-environment-pipeline-arm.yml