radhakrishnang
diff --git a/‎.env.example‎
Lines changed: 33 additions & 5 deletions b/‎.env.example‎
Lines changed: 33 additions & 5 deletions
diff --git a/‎.pipelines/diabetes_regression-batchscoring-ci.yml‎
Lines changed: 86 additions & 0 deletions b/‎.pipelines/diabetes_regression-batchscoring-ci.yml‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎.pipelines/diabetes_regression-variables-template.yml‎
Lines changed: 62 additions & 0 deletions b/‎.pipelines/diabetes_regression-variables-template.yml‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎bootstrap/bootstrap.py‎
Lines changed: 2 additions & 0 deletions b/‎bootstrap/bootstrap.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎diabetes_regression/conda_dependencies_scorecopy.yml‎
Lines changed: 31 additions & 0 deletions b/‎diabetes_regression/conda_dependencies_scorecopy.yml‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎diabetes_regression/conda_dependencies_scoring.yml‎
Lines changed: 32 additions & 0 deletions b/‎diabetes_regression/conda_dependencies_scoring.yml‎
Lines changed: 32 additions & 0 deletions
@@ -1,30 +1,33 @@
 # Azure Subscription Variables
 SUBSCRIPTION_ID = ''
-LOCATION = 'westeurope'
+LOCATION = ''
 TENANT_ID = ''
 BASE_NAME = ''
 SP_APP_ID = ''
 SP_APP_SECRET = ''
-RESOUCE_GROUP = 'mlops-rg'
+RESOURCE_GROUP = 'mlops-RG'
 
 # Mock build/release ID for local testing
 BUILD_BUILDID = '001'
 
 # Azure ML Workspace Variables
-WORKSPACE_NAME = 'aml-workspace'
-EXPERIMENT_NAME = ''
+WORKSPACE_NAME = 'mlops-aml-ws'
+EXPERIMENT_NAME = 'mlopspython'
 
 # AML Compute Cluster Config
 AML_ENV_NAME='diabetes_regression_training_env'
+AML_ENV_TRAIN_CONDA_DEP_FILE="conda_dependencies.yml"
 AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
 AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
 AML_CLUSTER_MAX_NODES = '4'
 AML_CLUSTER_MIN_NODES = '0'
 AML_CLUSTER_PRIORITY = 'lowpriority'
 # Training Config
-MODEL_NAME = 'sklearn_regression_model.pkl'
+MODEL_NAME = 'diabetes_regression_model.pkl'
 MODEL_VERSION = '1'
 TRAIN_SCRIPT_PATH = 'training/train.py'
+
+
 # AML Pipeline Config
 TRAINING_PIPELINE_NAME = 'Training Pipeline'
 MODEL_PATH = ''
@@ -51,3 +54,28 @@ ALLOW_RUN_CANCEL = 'true'
 
 # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
 AML_REBUILD_ENVIRONMENT = 'false'
+
+
+
+USE_GPU_FOR_SCORING = "false"
+AML_ENV_SCORE_CONDA_DEP_FILE="conda_dependencies_scoring.yml"
+AML_ENV_SCORECOPY_CONDA_DEP_FILE="conda_dependencies_scorecopy.yml"
+# AML Compute Cluster Config for parallel batch scoring
+AML_ENV_NAME_SCORING='diabetes_regression_scoring_env'
+AML_ENV_NAME_SCORE_COPY='diabetes_regression_score_copy_env'
+AML_COMPUTE_CLUSTER_NAME_SCORING = 'score-cluster'
+AML_COMPUTE_CLUSTER_CPU_SKU_SCORING = 'STANDARD_DS2_V2'
+AML_CLUSTER_MAX_NODES_SCORING = '4'
+AML_CLUSTER_MIN_NODES_SCORING = '0'
+AML_CLUSTER_PRIORITY_SCORING = 'lowpriority'
+AML_REBUILD_ENVIRONMENT_SCORING = 'true'
+BATCHSCORE_SCRIPT_PATH = 'scoring/parallel_batchscore.py'
+BATCHSCORE_COPY_SCRIPT_PATH = 'scoring/parallel_batchscore_copyoutput.py'
+
+
+SCORING_DATASTORE_INPUT_CONTAINER = 'input'
+SCORING_DATASTORE_INPUT_FILENAME = 'diabetes_scoring_input.csv'
+SCORING_DATASTORE_OUTPUT_CONTAINER = 'output'
+SCORING_DATASTORE_OUTPUT_FILENAME = 'diabetes_scoring_output.csv'
+SCORING_DATASET_NAME = 'diabetes_scoring_ds'
+SCORING_PIPELINE_NAME = 'diabetes-scoring-pipeline'
@@ -0,0 +1,86 @@
+# Continuous Integration (CI) pipeline that orchestrates the batch scoring of the diabetes_regression model.
+
+# Runtime parameters to select artifacts
+parameters:
+- name : artifactBuildId
+  displayName: Model Train CI Build ID. Default is 'latest'.
+  type: string
+  default: latest
+
+pr: none
+
+# Trigger this pipeline on model-train pipeline completion
+resources:
+  containers:
+  - container: mlops
+    image: mcr.microsoft.com/mlops/python:latest
+  pipelines:
+  - pipeline: model-train-ci
+    source: Model-Train-Register-CI # Name of the triggering pipeline
+    trigger:
+      branches:
+        include:
+        - master
+
+trigger:
+  branches:
+    include:
+    - master
+  paths:
+    include:
+    - diabetes_regression/scoring/parallel_batchscore.py
+    - ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py
+    - ml_service/pipelines/run_parallel_batchscore_pipeline.py
+
+variables:
+- template: diabetes_regression-variables-template.yml
+- group: devopsforai-aml-vg
+
+pool:
+  vmImage: ubuntu-latest
+
+stages:
+- stage: 'Batch_Scoring_Pipeline_CI'
+  displayName: 'Batch Scoring Pipeline CI'
+  jobs:
+  - job: "Build_Batch_Scoring_Pipeline"
+    displayName: "Build Batch Scoring Pipeline"
+    container: mlops
+    timeoutInMinutes: 0
+    steps:
+    - download: none
+    - template: code-quality-template.yml
+    - task: AzureCLI@1
+      name: publish_batchscore
+      inputs:
+        azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'        
+        scriptLocation: inlineScript
+        workingDirectory: $(Build.SourcesDirectory)
+        inlineScript: |
+          set -e # fail on error
+          export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+          # Invoke the Python building and publishing a training pipeline
+          python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline
+ 
+  - job: "Run_Batch_Score_Pipeline"
+    displayName: "Run Batch Scoring Pipeline"
+    dependsOn: "Build_Batch_Scoring_Pipeline"
+    timeoutInMinutes: 240
+    pool: server
+    variables:
+      pipeline_id: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['publish_batchscore.pipeline_id']]
+    steps:
+    - download: none
+    - template: diabetes_regression-get-model-id-artifact-template.yml
+      parameters:
+        projectId: '$(resources.pipeline.model-train-ci.projectID)'
+        pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)'
+        artifactBuildId: ${{ parameters.artifactBuildId }}
+    - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
+      displayName: 'Invoke Batch Scoring pipeline'
+      inputs:
+        azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+        PipelineId: '$(pipeline_id)'
+        ExperimentName: '$(EXPERIMENT_NAME)'
+        PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}'
+      
@@ -16,6 +16,7 @@ variables:
     # The path to the model scoring script relative to SOURCES_DIR_TRAIN
   - name: SCORE_SCRIPT
     value: scoring/score.py
+    
 
   # Azure ML Variables
   - name: EXPERIMENT_NAME
@@ -35,6 +36,8 @@ variables:
   # AML Compute Cluster Config
   - name: AML_ENV_NAME
     value: diabetes_regression_training_env
+  - name: AML_ENV_TRAIN_CONDA_DEP_FILE
+    value: "conda_dependencies.yml"
   - name: AML_COMPUTE_CLUSTER_CPU_SKU
     value: STANDARD_DS2_V2
   - name: AML_COMPUTE_CLUSTER_NAME
@@ -65,3 +68,62 @@ variables:
   # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
   # - name: AML_REBUILD_ENVIRONMENT
   #  value: "false"
+
+  # Variables below are used for controlling various aspects of batch scoring
+  - name: USE_GPU_FOR_SCORING
+    value: False
+  # Conda dependencies for the batch scoring step
+  - name: AML_ENV_SCORE_CONDA_DEP_FILE
+    value: "conda_dependencies_scoring.yml"
+  # Conda dependencies for the score copying step
+  - name: AML_ENV_SCORECOPY_CONDA_DEP_FILE
+    value: "conda_dependencies_scorecopy.yml"
+    # AML Compute Cluster Config for parallel batch scoring
+  - name: AML_ENV_NAME_SCORING
+    value: diabetes_regression_scoring_env
+  - name: AML_ENV_NAME_SCORE_COPY
+    value: diabetes_regression_score_copy_env
+  - name: AML_COMPUTE_CLUSTER_CPU_SKU_SCORING
+    value: STANDARD_DS2_V2
+  - name: AML_COMPUTE_CLUSTER_NAME_SCORING
+    value: score-cluster
+  - name: AML_CLUSTER_MIN_NODES_SCORING
+    value: 0
+  - name: AML_CLUSTER_MAX_NODES_SCORING
+    value: 4
+  - name: AML_CLUSTER_PRIORITY_SCORING
+    value: lowpriority
+  # The path to the batch scoring script relative to SOURCES_DIR_TRAIN
+  - name: BATCHSCORE_SCRIPT_PATH
+    value: scoring/parallel_batchscore.py
+  - name: BATCHSCORE_COPY_SCRIPT_PATH
+    value: scoring/parallel_batchscore_copyoutput.py
+  # Flag to allow rebuilding the AML Environment after it was built for the first time. 
+  # This enables dependency updates from the conda dependencies yaml for scoring activities.
+  - name: AML_REBUILD_ENVIRONMENT_SCORING
+    value: "true"
+
+  # Datastore config for scoring
+  # The storage account name and key are supplied as variables in a variable group 
+  # in the Azure Pipelines library for this project. Please refer to repo docs for 
+  # more details
+
+  # Blob container where the input data for scoring can be found
+  - name: SCORING_DATASTORE_INPUT_CONTAINER
+    value: "input"
+  # Blobname for the input data - include any applicable path in the string 
+  - name: SCORING_DATASTORE_INPUT_FILENAME
+    value: "diabetes_scoring_input.csv"
+  # Blob container where the output data for scoring can be found
+  - name: SCORING_DATASTORE_OUTPUT_CONTAINER
+    value: "output"
+  # Blobname for the output data - include any applicable path in the string 
+  - name: SCORING_DATASTORE_OUTPUT_FILENAME
+    value: "diabetes_scoring_output.csv"
+  # Dataset name for input data for scoring
+  - name: SCORING_DATASET_NAME
+    value: "diabetes_scoring_ds"
+  # Scoring pipeline name
+  - name: SCORING_PIPELINE_NAME
+    value: "diabetes-scoring-pipeline"
+    
@@ -90,9 +90,11 @@ def replace_project_name(project_dir, project_name, rename_name):
             r".pipelines/diabetes_regression-ci-image.yml",
             r".pipelines/diabetes_regression-publish-model-artifact-template.yml",  # NOQA: E501
             r".pipelines/diabetes_regression-get-model-id-artifact-template.yml",  # NOQA: E501
+            r".pipelines/diabetes_regression-batchscoring-ci.yml",
             r".pipelines/diabetes_regression-variables-template.yml",
             r"environment_setup/Dockerfile",
             r"environment_setup/install_requirements.sh",
+            r"ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py",  # NOQA: E501
             r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py",  # NOQA: E501
             r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py",  # NOQA: E501
             r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py",  # NOQA: E501
 
@@ -0,0 +1,31 @@
+# Conda environment specification. The dependencies defined in this file will
+# be automatically provisioned for managed runs. These include runs against
+# the localdocker, remotedocker, and cluster compute targets.
+
+# Note that this file is NOT used to automatically manage dependencies for the
+# local compute target. To provision these dependencies locally, run:
+# conda env update --file conda_dependencies.yml
+
+# Details about the Conda environment file format:
+# https://conda.io/docs/using/envs.html#create-environment-file-by-hand
+
+# For managing Spark packages and configuration, see spark_dependencies.yml.
+# Version of this configuration file's structure and semantics in AzureML.
+# This directive is stored in a comment to preserve the Conda file structure.
+# [AzureMlVersion] = 2
+
+# These dependencies are used to create the environment used by the batch score 
+# copy pipeline step
+name: diabetes_regression_score_copy_env
+dependencies:
+  # The python interpreter version.
+  # Currently Azure ML Workbench only supports 3.5.2 and later.
+  - python=3.7.*
+  - pip
+
+  - pip:
+      # Base AzureML SDK
+      - azureml-sdk==1.6.*
+      
+      # Score copying deps
+      - azure-storage-blob
@@ -0,0 +1,32 @@
+# Conda environment specification. The dependencies defined in this file will
+# be automatically provisioned for managed runs. These include runs against
+# the localdocker, remotedocker, and cluster compute targets.
+
+# Note that this file is NOT used to automatically manage dependencies for the
+# local compute target. To provision these dependencies locally, run:
+# conda env update --file conda_dependencies.yml
+
+# Details about the Conda environment file format:
+# https://conda.io/docs/using/envs.html#create-environment-file-by-hand
+
+# For managing Spark packages and configuration, see spark_dependencies.yml.
+# Version of this configuration file's structure and semantics in AzureML.
+# This directive is stored in a comment to preserve the Conda file structure.
+# [AzureMlVersion] = 2
+
+# These dependencies are used to create the environment used by the batch score 
+# pipeline step
+name: diabetes_regression_scoring_env
+dependencies:
+  # The python interpreter version.
+  # Currently Azure ML Workbench only supports 3.5.2 and later.
+  - python=3.7.*
+  - pip
+
+  - pip:
+      # Base AzureML SDK
+      - azureml-sdk==1.6.*
+      
+      # Scoring deps
+      - scikit-learn
+      - pandas