Skip to content

Commit 4bfa69b

Browse files
author
j-so
committed
use model version for batch scoring
1 parent c6167eb commit 4bfa69b

File tree

7 files changed

+51
-91
lines changed

7 files changed

+51
-91
lines changed

.pipelines/diabetes_regression-batchscoring-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,5 @@ stages:
8080
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
8181
PipelineId: '$(pipeline_id)'
8282
ExperimentName: '$(EXPERIMENT_NAME)'
83-
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}'
83+
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "model_version": "$(MODEL_VERSION)"}'
8484

diabetes_regression/evaluate/evaluate_model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@
109109
tag_name = 'experiment_name'
110110

111111
model = get_latest_model(
112-
model_name, tag_name, exp.name, ws)
112+
model_name=model_name,
113+
tag_name=tag_name,
114+
tag_value=exp.name,
115+
aml_workspace=ws)
113116

114117
if (model is not None):
115118
production_model_mse = 10000

diabetes_regression/scoring/parallel_batchscore.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import joblib
3030
import sys
3131
from typing import List
32-
from util.model_helper import get_latest_model
32+
from util.model_helper import get_model
3333

3434
model = None
3535

@@ -59,6 +59,19 @@ def parse_args() -> List[str]:
5959

6060
model_name = model_name_param[0][1]
6161

62+
model_version_param = [
63+
(sys.argv[idx], sys.argv[idx + 1])
64+
for idx, itm in enumerate(sys.argv)
65+
if itm == "--model_version"
66+
]
67+
68+
if len(model_version_param) == 0:
69+
raise ValueError(
70+
"Model name is required but no model name parameter was passed to the script" # NOQA: E501
71+
)
72+
73+
model_version = model_version_param[0][1]
74+
6275
model_tag_name_param = [
6376
(sys.argv[idx], sys.argv[idx + 1])
6477
for idx, itm in enumerate(sys.argv)
@@ -83,7 +96,7 @@ def parse_args() -> List[str]:
8396
else model_tag_value_param[0][1]
8497
)
8598

86-
return [model_name, model_tag_name, model_tag_value]
99+
return [model_name, model_version, model_tag_name, model_tag_value]
87100

88101

89102
def init():
@@ -95,12 +108,14 @@ def init():
95108
print("Initializing batch scoring script...")
96109

97110
model_filter = parse_args()
98-
amlmodel = get_latest_model(
99-
model_filter[0], model_filter[1], model_filter[2]
100-
) # NOQA: E501
111+
amlmodel = get_model(
112+
model_name=env.model_filter[0],
113+
model_version=model_filter[1],
114+
tag_name=model_filter[2],
115+
tag_value=model_filter[3])
101116

102117
global model
103-
modelpath = amlmodel.get_model_path(model_name=model_filter[0])
118+
modelpath = Model.get_model_path(model_name=model_filter[0])
104119
model = joblib.load(modelpath)
105120
print("Loaded model {}".format(model_filter[0]))
106121
except Exception as ex:

diabetes_regression/util/model_helper.py

Lines changed: 16 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ def get_current_workspace() -> Workspace:
2222
return experiment.workspace
2323

2424

25-
def get_latest_model(
25+
def get_model(
2626
model_name: str,
27+
model_version: int = None, # If none, return latest model
2728
tag_name: str = None,
2829
tag_value: str = None,
2930
aml_workspace: Workspace = None
@@ -35,53 +36,25 @@ def get_latest_model(
3536
Parameters:
3637
aml_workspace (Workspace): aml.core Workspace that the model lives.
3738
model_name (str): name of the model we are looking for
39+
(optional) model_version (str): version of the model. Returns latest if not provided.
3840
(optional) tag (str): the tag value & name the model was registered under.
3941
4042
Return:
4143
A single aml model from the workspace that matches the name and tag.
4244
"""
43-
try:
44-
# Validate params. cannot be None.
45-
if model_name is None:
46-
raise ValueError("model_name[:str] is required")
47-
48-
if aml_workspace is None:
45+
if aml_workspace is None:
4946
print("No workspace defined - using current experiment workspace.")
5047
aml_workspace = get_current_workspace()
5148

52-
model_list = None
53-
tag_ext = ""
54-
55-
# Get lastest model
56-
# True: by name and tags
57-
if tag_name is not None and tag_value is not None:
58-
model_list = AMLModel.list(
59-
aml_workspace, name=model_name,
60-
tags=[[tag_name, tag_value]], latest=True
61-
)
62-
tag_ext = f"tag_name: {tag_name}, tag_value: {tag_value}."
63-
# False: Only by name
64-
else:
65-
model_list = AMLModel.list(
66-
aml_workspace, name=model_name, latest=True)
67-
68-
# latest should only return 1 model, but if it does,
69-
# then maybe sdk or source code changed.
70-
71-
# define the error messages
72-
too_many_model_message = ("Found more than one latest model. "
73-
f"Models found: {model_list}. "
74-
f"{tag_ext}")
75-
76-
no_model_found_message = (f"No Model found with name: {model_name}. "
77-
f"{tag_ext}")
78-
79-
if len(model_list) > 1:
80-
raise ValueError(too_many_model_message)
81-
if len(model_list) == 1:
82-
return model_list[0]
83-
else:
84-
print(no_model_found_message)
85-
return None
86-
except Exception:
87-
raise
49+
if tagname is not None and tagvalue is not None:
50+
model = Model(aml_workspace, name=model_name, version=model_version, tags=[[tag_name, tag_value]])
51+
elif (tagname is None and tagvalue is not None) or (
52+
tagvalue is None and tagname is not None
53+
):
54+
raise ValueError(
55+
"model_tag_name and model_tag_value should both be supplied"
56+
+ "or excluded" # NOQA: E501
57+
)
58+
else:
59+
model = Model(aml_workspace, name=env.model_name, version=env.model_version)
60+
return model

ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,37 +61,6 @@ def parse_args() -> Namespace:
6161
args = parser.parse_args()
6262
return args
6363

64-
65-
def get_model(
66-
ws: Workspace, env: Env, tagname: str = None, tagvalue: str = None
67-
) -> Model:
68-
"""
69-
Gets a model from the models registered with the AML workspace.
70-
If a tag/value pair is supplied, uses it to filter.
71-
72-
:param ws: Current AML workspace
73-
:param env: Environment variables
74-
:param tagname: Optional tag name, default is None
75-
:param tagvalue: Optional tag value, default is None
76-
77-
:returns: Model
78-
79-
:raises: ValueError
80-
"""
81-
if tagname is not None and tagvalue is not None:
82-
model = Model(ws, name=env.model_name, tags=[[tagname, tagvalue]])
83-
elif (tagname is None and tagvalue is not None) or (
84-
tagvalue is None and tagname is not None
85-
):
86-
raise ValueError(
87-
"model_tag_name and model_tag_value should both be supplied"
88-
+ "or excluded" # NOQA: E501
89-
)
90-
else:
91-
model = Model(ws, name=env.model_name)
92-
return model
93-
94-
9564
def get_or_create_datastore(
9665
datastorename: str, ws: Workspace, env: Env, input: bool = True
9766
) -> Datastore:
@@ -331,7 +300,6 @@ def get_run_configs(
331300

332301

333302
def get_scoring_pipeline(
334-
model: Model,
335303
scoring_dataset: Dataset,
336304
output_loc: PipelineData,
337305
score_run_config: ParallelRunConfig,
@@ -362,6 +330,9 @@ def get_scoring_pipeline(
362330
model_name_param = PipelineParameter(
363331
"model_name", default_value=env.model_name
364332
) # NOQA: E501
333+
model_version_param = PipelineParameter(
334+
"model_version", default_value=env.model_version
335+
) # NOQA: E501
365336
model_tag_name_param = PipelineParameter(
366337
"model_tag_name", default_value=" "
367338
) # NOQA: E501
@@ -376,6 +347,8 @@ def get_scoring_pipeline(
376347
arguments=[
377348
"--model_name",
378349
model_name_param,
350+
"--model_version",
351+
model_version_param,
379352
"--model_tag_name",
380353
model_tag_name_param,
381354
"--model_tag_value",
@@ -450,12 +423,7 @@ def build_batchscore_pipeline():
450423
aml_workspace, aml_compute_score, env
451424
)
452425

453-
trained_model = get_model(
454-
aml_workspace, env, args.model_tag_name, args.model_tag_value
455-
)
456-
457426
scoring_pipeline = get_scoring_pipeline(
458-
trained_model,
459427
input_dataset,
460428
output_location,
461429
scoring_runconfig,

ml_service/pipelines/diabetes_regression_verify_train_pipeline.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
from azureml.core import Run, Experiment, Workspace
55
from ml_service.util.env_variables import Env
6-
from diabetes_regression.util.model_helper import get_latest_model
6+
from diabetes_regression.util.model_helper import get_model
77

88

99
def main():
@@ -53,8 +53,8 @@ def main():
5353

5454
try:
5555
tag_name = 'BuildId'
56-
model = get_latest_model(
57-
model_name, tag_name, build_id, exp.workspace)
56+
model = get_model(
57+
model_name=model_name, tag_name=tag_name, tag_value=build_id, aml_workspace=exp.workspace)
5858
if (model is not None):
5959
print("Model was registered for this build.")
6060
if (model is None):

ml_service/pipelines/run_parallel_batchscore_pipeline.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def run_batchscore_pipeline():
115115
scoringpipeline,
116116
pipeline_parameters={
117117
"model_name": env.model_name,
118+
"model_version": env.model_version,
118119
"model_tag_name": " ",
119120
"model_tag_value": " ",
120121
},

0 commit comments

Comments
 (0)