Skip to content

Commit bb61306

Browse files
author
j-so
committed
Merge branch 'master' into jenns/splitpipeline
2 parents f58e0df + bcdac5c commit bb61306

19 files changed

+1327
-94
lines changed

.env.example

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,33 @@
11
# Azure Subscription Variables
22
SUBSCRIPTION_ID = ''
3-
LOCATION = 'westeurope'
3+
LOCATION = ''
44
TENANT_ID = ''
55
BASE_NAME = ''
66
SP_APP_ID = ''
77
SP_APP_SECRET = ''
8-
RESOUCE_GROUP = 'mlops-rg'
8+
RESOURCE_GROUP = 'mlops-RG'
99

1010
# Mock build/release ID for local testing
1111
BUILD_BUILDID = '001'
1212

1313
# Azure ML Workspace Variables
14-
WORKSPACE_NAME = 'aml-workspace'
15-
EXPERIMENT_NAME = ''
14+
WORKSPACE_NAME = 'mlops-aml-ws'
15+
EXPERIMENT_NAME = 'mlopspython'
1616

1717
# AML Compute Cluster Config
1818
AML_ENV_NAME='diabetes_regression_training_env'
19+
AML_ENV_TRAIN_CONDA_DEP_FILE="conda_dependencies.yml"
1920
AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
2021
AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
2122
AML_CLUSTER_MAX_NODES = '4'
2223
AML_CLUSTER_MIN_NODES = '0'
2324
AML_CLUSTER_PRIORITY = 'lowpriority'
2425
# Training Config
25-
MODEL_NAME = 'sklearn_regression_model.pkl'
26+
MODEL_NAME = 'diabetes_regression_model.pkl'
2627
MODEL_VERSION = '1'
2728
TRAIN_SCRIPT_PATH = 'training/train.py'
29+
30+
2831
# AML Pipeline Config
2932
TRAINING_PIPELINE_NAME = 'Training Pipeline'
3033
MODEL_PATH = ''
@@ -51,3 +54,28 @@ ALLOW_RUN_CANCEL = 'true'
5154

5255
# Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
5356
AML_REBUILD_ENVIRONMENT = 'false'
57+
58+
59+
60+
USE_GPU_FOR_SCORING = "false"
61+
AML_ENV_SCORE_CONDA_DEP_FILE="conda_dependencies_scoring.yml"
62+
AML_ENV_SCORECOPY_CONDA_DEP_FILE="conda_dependencies_scorecopy.yml"
63+
# AML Compute Cluster Config for parallel batch scoring
64+
AML_ENV_NAME_SCORING='diabetes_regression_scoring_env'
65+
AML_ENV_NAME_SCORE_COPY='diabetes_regression_score_copy_env'
66+
AML_COMPUTE_CLUSTER_NAME_SCORING = 'score-cluster'
67+
AML_COMPUTE_CLUSTER_CPU_SKU_SCORING = 'STANDARD_DS2_V2'
68+
AML_CLUSTER_MAX_NODES_SCORING = '4'
69+
AML_CLUSTER_MIN_NODES_SCORING = '0'
70+
AML_CLUSTER_PRIORITY_SCORING = 'lowpriority'
71+
AML_REBUILD_ENVIRONMENT_SCORING = 'true'
72+
BATCHSCORE_SCRIPT_PATH = 'scoring/parallel_batchscore.py'
73+
BATCHSCORE_COPY_SCRIPT_PATH = 'scoring/parallel_batchscore_copyoutput.py'
74+
75+
76+
SCORING_DATASTORE_INPUT_CONTAINER = 'input'
77+
SCORING_DATASTORE_INPUT_FILENAME = 'diabetes_scoring_input.csv'
78+
SCORING_DATASTORE_OUTPUT_CONTAINER = 'output'
79+
SCORING_DATASTORE_OUTPUT_FILENAME = 'diabetes_scoring_output.csv'
80+
SCORING_DATASET_NAME = 'diabetes_scoring_ds'
81+
SCORING_PIPELINE_NAME = 'diabetes-scoring-pipeline'
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Continuous Integration (CI) pipeline that orchestrates the batch scoring of the diabetes_regression model.
2+
3+
# Runtime parameters to select artifacts
4+
parameters:
5+
- name : artifactBuildId
6+
displayName: Model Train CI Build ID. Default is 'latest'.
7+
type: string
8+
default: latest
9+
10+
pr: none
11+
12+
# Trigger this pipeline on model-train pipeline completion
13+
resources:
14+
containers:
15+
- container: mlops
16+
image: mcr.microsoft.com/mlops/python:latest
17+
pipelines:
18+
- pipeline: model-train-ci
19+
source: Model-Train-Register-CI # Name of the triggering pipeline
20+
trigger:
21+
branches:
22+
include:
23+
- master
24+
25+
trigger:
26+
branches:
27+
include:
28+
- master
29+
paths:
30+
include:
31+
- diabetes_regression/scoring/parallel_batchscore.py
32+
- ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py
33+
- ml_service/pipelines/run_parallel_batchscore_pipeline.py
34+
35+
variables:
36+
- template: diabetes_regression-variables-template.yml
37+
- group: devopsforai-aml-vg
38+
39+
pool:
40+
vmImage: ubuntu-latest
41+
42+
stages:
43+
- stage: 'Batch_Scoring_Pipeline_CI'
44+
displayName: 'Batch Scoring Pipeline CI'
45+
jobs:
46+
- job: "Build_Batch_Scoring_Pipeline"
47+
displayName: "Build Batch Scoring Pipeline"
48+
container: mlops
49+
timeoutInMinutes: 0
50+
steps:
51+
- download: none
52+
- template: code-quality-template.yml
53+
- task: AzureCLI@1
54+
name: publish_batchscore
55+
inputs:
56+
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
57+
scriptLocation: inlineScript
58+
workingDirectory: $(Build.SourcesDirectory)
59+
inlineScript: |
60+
set -e # fail on error
61+
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
62+
# Invoke the Python building and publishing a training pipeline
63+
python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline
64+
65+
- job: "Run_Batch_Score_Pipeline"
66+
displayName: "Run Batch Scoring Pipeline"
67+
dependsOn: "Build_Batch_Scoring_Pipeline"
68+
timeoutInMinutes: 240
69+
pool: server
70+
variables:
71+
pipeline_id: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['publish_batchscore.pipeline_id']]
72+
steps:
73+
- download: none
74+
- template: diabetes_regression-get-model-id-artifact-template.yml
75+
parameters:
76+
projectId: '$(resources.pipeline.model-train-ci.projectID)'
77+
pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)'
78+
artifactBuildId: ${{ parameters.artifactBuildId }}
79+
- task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
80+
displayName: 'Invoke Batch Scoring pipeline'
81+
inputs:
82+
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
83+
PipelineId: '$(pipeline_id)'
84+
ExperimentName: '$(EXPERIMENT_NAME)'
85+
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}'
86+

.pipelines/diabetes_regression-variables-template.yml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ variables:
1616
# The path to the model scoring script relative to SOURCES_DIR_TRAIN
1717
- name: SCORE_SCRIPT
1818
value: scoring/score.py
19+
1920

2021
# Azure ML Variables
2122
- name: EXPERIMENT_NAME
@@ -35,6 +36,8 @@ variables:
3536
# AML Compute Cluster Config
3637
- name: AML_ENV_NAME
3738
value: diabetes_regression_training_env
39+
- name: AML_ENV_TRAIN_CONDA_DEP_FILE
40+
value: "conda_dependencies.yml"
3841
- name: AML_COMPUTE_CLUSTER_CPU_SKU
3942
value: STANDARD_DS2_V2
4043
- name: AML_COMPUTE_CLUSTER_NAME
@@ -65,3 +68,62 @@ variables:
6568
# Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
6669
# - name: AML_REBUILD_ENVIRONMENT
6770
# value: "false"
71+
72+
# Variables below are used for controlling various aspects of batch scoring
73+
- name: USE_GPU_FOR_SCORING
74+
value: False
75+
# Conda dependencies for the batch scoring step
76+
- name: AML_ENV_SCORE_CONDA_DEP_FILE
77+
value: "conda_dependencies_scoring.yml"
78+
# Conda dependencies for the score copying step
79+
- name: AML_ENV_SCORECOPY_CONDA_DEP_FILE
80+
value: "conda_dependencies_scorecopy.yml"
81+
# AML Compute Cluster Config for parallel batch scoring
82+
- name: AML_ENV_NAME_SCORING
83+
value: diabetes_regression_scoring_env
84+
- name: AML_ENV_NAME_SCORE_COPY
85+
value: diabetes_regression_score_copy_env
86+
- name: AML_COMPUTE_CLUSTER_CPU_SKU_SCORING
87+
value: STANDARD_DS2_V2
88+
- name: AML_COMPUTE_CLUSTER_NAME_SCORING
89+
value: score-cluster
90+
- name: AML_CLUSTER_MIN_NODES_SCORING
91+
value: 0
92+
- name: AML_CLUSTER_MAX_NODES_SCORING
93+
value: 4
94+
- name: AML_CLUSTER_PRIORITY_SCORING
95+
value: lowpriority
96+
# The path to the batch scoring script relative to SOURCES_DIR_TRAIN
97+
- name: BATCHSCORE_SCRIPT_PATH
98+
value: scoring/parallel_batchscore.py
99+
- name: BATCHSCORE_COPY_SCRIPT_PATH
100+
value: scoring/parallel_batchscore_copyoutput.py
101+
# Flag to allow rebuilding the AML Environment after it was built for the first time.
102+
# This enables dependency updates from the conda dependencies yaml for scoring activities.
103+
- name: AML_REBUILD_ENVIRONMENT_SCORING
104+
value: "true"
105+
106+
# Datastore config for scoring
107+
# The storage account name and key are supplied as variables in a variable group
108+
# in the Azure Pipelines library for this project. Please refer to repo docs for
109+
# more details
110+
111+
# Blob container where the input data for scoring can be found
112+
- name: SCORING_DATASTORE_INPUT_CONTAINER
113+
value: "input"
114+
# Blobname for the input data - include any applicable path in the string
115+
- name: SCORING_DATASTORE_INPUT_FILENAME
116+
value: "diabetes_scoring_input.csv"
117+
# Blob container where the output data for scoring can be found
118+
- name: SCORING_DATASTORE_OUTPUT_CONTAINER
119+
value: "output"
120+
# Blobname for the output data - include any applicable path in the string
121+
- name: SCORING_DATASTORE_OUTPUT_FILENAME
122+
value: "diabetes_scoring_output.csv"
123+
# Dataset name for input data for scoring
124+
- name: SCORING_DATASET_NAME
125+
value: "diabetes_scoring_ds"
126+
# Scoring pipeline name
127+
- name: SCORING_PIPELINE_NAME
128+
value: "diabetes-scoring-pipeline"
129+

bootstrap/bootstrap.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,11 @@ def replace_project_name(project_dir, project_name, rename_name):
9090
r".pipelines/diabetes_regression-ci-image.yml",
9191
r".pipelines/diabetes_regression-publish-model-artifact-template.yml", # NOQA: E501
9292
r".pipelines/diabetes_regression-get-model-id-artifact-template.yml", # NOQA: E501
93+
r".pipelines/diabetes_regression-batchscoring-ci.yml",
9394
r".pipelines/diabetes_regression-variables-template.yml",
9495
r"environment_setup/Dockerfile",
9596
r"environment_setup/install_requirements.sh",
97+
r"ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py", # NOQA: E501
9698
r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", # NOQA: E501
9799
r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py", # NOQA: E501
98100
r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py", # NOQA: E501
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Conda environment specification. The dependencies defined in this file will
2+
# be automatically provisioned for managed runs. These include runs against
3+
# the localdocker, remotedocker, and cluster compute targets.
4+
5+
# Note that this file is NOT used to automatically manage dependencies for the
6+
# local compute target. To provision these dependencies locally, run:
7+
# conda env update --file conda_dependencies.yml
8+
9+
# Details about the Conda environment file format:
10+
# https://conda.io/docs/using/envs.html#create-environment-file-by-hand
11+
12+
# For managing Spark packages and configuration, see spark_dependencies.yml.
13+
# Version of this configuration file's structure and semantics in AzureML.
14+
# This directive is stored in a comment to preserve the Conda file structure.
15+
# [AzureMlVersion] = 2
16+
17+
# These dependencies are used to create the environment used by the batch score
18+
# copy pipeline step
19+
name: diabetes_regression_score_copy_env
20+
dependencies:
21+
# The python interpreter version.
22+
# Currently Azure ML Workbench only supports 3.5.2 and later.
23+
- python=3.7.*
24+
- pip
25+
26+
- pip:
27+
# Base AzureML SDK
28+
- azureml-sdk==1.6.*
29+
30+
# Score copying deps
31+
- azure-storage-blob
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Conda environment specification. The dependencies defined in this file will
2+
# be automatically provisioned for managed runs. These include runs against
3+
# the localdocker, remotedocker, and cluster compute targets.
4+
5+
# Note that this file is NOT used to automatically manage dependencies for the
6+
# local compute target. To provision these dependencies locally, run:
7+
# conda env update --file conda_dependencies.yml
8+
9+
# Details about the Conda environment file format:
10+
# https://conda.io/docs/using/envs.html#create-environment-file-by-hand
11+
12+
# For managing Spark packages and configuration, see spark_dependencies.yml.
13+
# Version of this configuration file's structure and semantics in AzureML.
14+
# This directive is stored in a comment to preserve the Conda file structure.
15+
# [AzureMlVersion] = 2
16+
17+
# These dependencies are used to create the environment used by the batch score
18+
# pipeline step
19+
name: diabetes_regression_scoring_env
20+
dependencies:
21+
# The python interpreter version.
22+
# Currently Azure ML Workbench only supports 3.5.2 and later.
23+
- python=3.7.*
24+
- pip
25+
26+
- pip:
27+
# Base AzureML SDK
28+
- azureml-sdk==1.6.*
29+
30+
# Scoring deps
31+
- scikit-learn
32+
- pandas

0 commit comments

Comments
 (0)