diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_blob_storage_helper.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_blob_storage_helper.py index d3273540c8e6..4af026ecddea 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_blob_storage_helper.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_blob_storage_helper.py @@ -93,7 +93,7 @@ def upload( file_size, _ = get_directory_size(source, ignore_file=ignore_file) file_size_in_mb = file_size / 10**6 cloud = _get_cloud_details() - cloud_endpoint = cloud['storage_endpoint'] # make sure proper cloud endpoint is used + cloud_endpoint = cloud["storage_endpoint"] # make sure proper cloud endpoint is used full_storage_url = f"https://{self.account_name}.blob.{cloud_endpoint}/{self.container}/{dest}" if file_size_in_mb > 100: module_logger.warning(FILE_SIZE_WARNING.format(source=source, destination=full_storage_url)) @@ -231,9 +231,9 @@ def download( # check if total size of download has exceeded 100 MB # make sure proper cloud endpoint is used cloud = _get_cloud_details() - cloud_endpoint = cloud['storage_endpoint'] + cloud_endpoint = cloud["storage_endpoint"] full_storage_url = f"https://{self.account_name}.blob.{cloud_endpoint}/{self.container}/{starts_with}" - download_size_in_mb += (blob_content.size / 10**6) + download_size_in_mb += blob_content.size / 10**6 if download_size_in_mb > 100: module_logger.warning(FILE_SIZE_WARNING.format(source=full_storage_url, destination=destination)) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_gen2_storage_helper.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_gen2_storage_helper.py index 6e53bf1142bb..9458c493349f 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_gen2_storage_helper.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_gen2_storage_helper.py @@ -85,7 +85,7 @@ def upload( file_size_in_mb = file_size / 10**6 cloud = _get_cloud_details() - cloud_endpoint = cloud['storage_endpoint'] # make sure proper cloud endpoint is used + cloud_endpoint = cloud["storage_endpoint"] # make sure proper cloud endpoint is used full_storage_url = f"https://{self.account_name}.dfs.{cloud_endpoint}/{self.file_system}/{dest}" if file_size_in_mb > 100: module_logger.warning(FILE_SIZE_WARNING.format(source=source, destination=full_storage_url)) @@ -185,9 +185,9 @@ def download(self, starts_with: str, destination: str = Path.home()) -> None: # check if total size of download has exceeded 100 MB cloud = _get_cloud_details() - cloud_endpoint = cloud['storage_endpoint'] # make sure proper cloud endpoint is used + cloud_endpoint = cloud["storage_endpoint"] # make sure proper cloud endpoint is used full_storage_url = f"https://{self.account_name}.dfs.{cloud_endpoint}/{self.file_system}/{starts_with}" - download_size_in_mb += (file_client.get_file_properties().size / 10**6) + download_size_in_mb += file_client.get_file_properties().size / 10**6 if download_size_in_mb > 100: module_logger.warning(FILE_SIZE_WARNING.format(source=full_storage_url, destination=destination)) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_azure_environments.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_azure_environments.py index c42317356990..f08de6c9bb6a 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_azure_environments.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_azure_environments.py @@ -15,7 +15,6 @@ from azure.mgmt.core import ARMPipelineClient - module_logger = logging.getLogger(__name__) @@ -63,10 +62,11 @@ class EndpointURLS: # pylint: disable=too-few-public-methods,no-init _requests_pipeline = None + def _get_cloud(cloud: str): if cloud in _environments: return _environments[cloud] - arm_url = os.environ.get(ArmConstants.METADATA_URL_ENV_NAME,ArmConstants.DEFAULT_URL) + arm_url = os.environ.get(ArmConstants.METADATA_URL_ENV_NAME, ArmConstants.DEFAULT_URL) arm_clouds = _get_clouds_by_metadata_url(arm_url) try: new_cloud = arm_clouds[cloud] @@ -75,6 +75,7 @@ def _get_cloud(cloud: str): except KeyError: raise Exception('Unknown cloud environment "{0}".'.format(cloud)) + def _get_default_cloud_name(): """Return AzureCloud as the default cloud.""" return os.getenv(AZUREML_CLOUD_ENV_NAME, AzureEnvironments.ENV_DEFAULT) @@ -209,53 +210,56 @@ def _resource_to_scopes(resource): scope = resource + "/.default" return [scope] + def _get_registry_discovery_url(cloud, cloud_suffix=""): """Get or generate the registry discovery url - :param cloud: configuration of the cloud to get the registry_discovery_url from - :param cloud_suffix: the suffix to use for the cloud, in the case that the registry_discovery_url - must be generated - :return: string of discovery url + :param cloud: configuration of the cloud to get the registry_discovery_url from + :param cloud_suffix: the suffix to use for the cloud, in the case that the registry_discovery_url + must be generated + :return: string of discovery url """ cloud_name = cloud["name"] if cloud_name in _environments: return _environments[cloud_name].registry_url registry_discovery_region = os.environ.get( - ArmConstants.REGISTRY_DISCOVERY_REGION_ENV_NAME, - ArmConstants.REGISTRY_DISCOVERY_DEFAULT_REGION + ArmConstants.REGISTRY_DISCOVERY_REGION_ENV_NAME, ArmConstants.REGISTRY_DISCOVERY_DEFAULT_REGION ) registry_discovery_region_default = "https://{}{}.api.azureml.{}/".format( - cloud_name.lower(), - registry_discovery_region, - cloud_suffix + cloud_name.lower(), registry_discovery_region, cloud_suffix ) return os.environ.get(ArmConstants.REGISTRY_ENV_URL, registry_discovery_region_default) + def _get_clouds_by_metadata_url(metadata_url): """Get all the clouds by the specified metadata url - :return: list of the clouds + :return: list of the clouds """ try: - module_logger.debug('Start : Loading cloud metadata from the url specified by %s', metadata_url) + module_logger.debug("Start : Loading cloud metadata from the url specified by %s", metadata_url) client = ARMPipelineClient(base_url=metadata_url, policies=[]) HttpRequest("GET", metadata_url) with client.send_request(HttpRequest("GET", metadata_url)) as meta_response: arm_cloud_dict = meta_response.json() cli_cloud_dict = _convert_arm_to_cli(arm_cloud_dict) - module_logger.debug('Finish : Loading cloud metadata from the url specified by %s', metadata_url) + module_logger.debug("Finish : Loading cloud metadata from the url specified by %s", metadata_url) return cli_cloud_dict except Exception as ex: # pylint: disable=broad-except - module_logger.warning("Error: Azure ML was unable to load cloud metadata from the url specified by %s. %s. " - "This may be due to a misconfiguration of networking controls. Azure Machine Learning Python " - "SDK requires outbound access to Azure Resource Manager. Please contact your networking team " - "to configure outbound access to Azure Resource Manager on both Network Security Group and " - "Firewall. For more details on required configurations, see " - "https://docs.microsoft.com/azure/machine-learning/how-to-access-azureml-behind-firewall.", - metadata_url, ex) + module_logger.warning( + "Error: Azure ML was unable to load cloud metadata from the url specified by %s. %s. " + "This may be due to a misconfiguration of networking controls. Azure Machine Learning Python " + "SDK requires outbound access to Azure Resource Manager. Please contact your networking team " + "to configure outbound access to Azure Resource Manager on both Network Security Group and " + "Firewall. For more details on required configurations, see " + "https://docs.microsoft.com/azure/machine-learning/how-to-access-azureml-behind-firewall.", + metadata_url, + ex, + ) return {} + def _convert_arm_to_cli(arm_cloud_metadata): cli_cloud_metadata_dict = {} if isinstance(arm_cloud_metadata, dict): @@ -265,7 +269,7 @@ def _convert_arm_to_cli(arm_cloud_metadata): try: cloud_name = cloud["name"] portal_endpoint = cloud["portal"] - cloud_suffix = ".".join(portal_endpoint.split('.')[2:]).replace("/", "") + cloud_suffix = ".".join(portal_endpoint.split(".")[2:]).replace("/", "") registry_discovery_url = _get_registry_discovery_url(cloud, cloud_suffix) cli_cloud_metadata_dict[cloud_name] = { EndpointURLS.AZURE_PORTAL_ENDPOINT: cloud["portal"], @@ -273,7 +277,7 @@ def _convert_arm_to_cli(arm_cloud_metadata): EndpointURLS.ACTIVE_DIRECTORY_ENDPOINT: cloud["authentication"]["loginEndpoint"], EndpointURLS.AML_RESOURCE_ID: "https://ml.azure.{}".format(cloud_suffix), EndpointURLS.STORAGE_ENDPOINT: cloud["suffixes"]["storage"], - EndpointURLS.REGISTRY_DISCOVERY_ENDPOINT: registry_discovery_url + EndpointURLS.REGISTRY_DISCOVERY_ENDPOINT: registry_discovery_url, } except KeyError as ex: module_logger.warning("Property on cloud not found in arm cloud metadata: %s", ex) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/component.py index 71c49a588779..a13ac8cf4ccd 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/component.py @@ -119,6 +119,7 @@ def _serialize(self, obj, *, many: bool = False): def add_param_overrides(self, data, **kwargs): source_path = self.context.pop(SOURCE_PATH_CONTEXT_KEY, None) if isinstance(data, dict) and source_path and os.path.isfile(source_path): + def should_node_overwritten(_root, _parts): parts = _parts.copy() parts.pop() @@ -135,9 +136,7 @@ def should_node_overwritten(_root, _parts): ("inputs.*.enum", should_node_overwritten), ]: for dot_key in get_valid_dot_keys_with_wildcard( - origin_data, - dot_key_wildcard, - validate_func=condition_func + origin_data, dot_key_wildcard, validate_func=condition_func ): pydash.set_(data, dot_key, pydash.get(origin_data, dot_key)) return super().add_param_overrides(data, **kwargs) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/input_output.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/input_output.py index e0f0082f079e..057087ae82b7 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/input_output.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/input_output.py @@ -18,7 +18,7 @@ "float", "Float", "double", - "Double" + "Double", ] diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/__init__.py index a065b8489c06..13d8db80f83b 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/__init__.py @@ -4,6 +4,4 @@ from ._yaml_utils import yaml_safe_load_with_base_resolver -__all__ = [ - "yaml_safe_load_with_base_resolver" -] +__all__ = ["yaml_safe_load_with_base_resolver"] diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_utils.py index eb5b3680e8b8..272c162c6f4c 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_utils.py @@ -11,6 +11,7 @@ def _map_internal_output_type(_meta): """Map component output type to valid pipeline output type.""" + def _map_primitive_type(_type): """Convert double and float to number type.""" _type = _type.lower() diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_yaml_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_yaml_utils.py index a0dd70cee2c0..29c2d8491f8d 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_yaml_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_utils/_yaml_utils.py @@ -18,6 +18,7 @@ class _SafeLoaderWithBaseLoader(strictyaml.ruamel.SafeLoader): from the inheritance list. Instead, we overwrite add_version_implicit_resolver method to make _SafeLoaderWithBaseLoader._version_implicit_resolver empty. Then the resolver will act like a BaseResolver. """ + def fetch_comment(self, comment): pass diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py index 0ebb927265c9..6116381edbed 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py @@ -95,11 +95,7 @@ def _copy(src: Path, dst: Path, *, ignore_file=None) -> None: # for same folder, the expected behavior is merging # ignore will be also applied during this process for name in src.glob("*"): - _AdditionalIncludes._copy( - name, - dst / name.name, - ignore_file=ignore_file.merge(name) - ) + _AdditionalIncludes._copy(name, dst / name.name, ignore_file=ignore_file.merge(name)) @staticmethod def _is_folder_to_compress(path: Path) -> bool: @@ -182,9 +178,7 @@ def resolve(self) -> None: skip_ignore_file=True, ) self._copy( - Path(self._code_path), - tmp_folder_path / Path(self._code_path).name, - ignore_file=root_ignore_file + Path(self._code_path), tmp_folder_path / Path(self._code_path).name, ignore_file=root_ignore_file ) else: # current implementation of ignore file is based on absolute path, so it cannot be shared diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/data_collector_schema.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/data_collector_schema.py index 2ca0d037afd1..0c4fb1965623 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/data_collector_schema.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/data_collector_schema.py @@ -23,7 +23,7 @@ class DataCollectorSchema(metaclass=PatchedSchemaMeta): collections = fields.Mapping(fields.Str, NestedField(DeploymentCollectionSchema)) rolling_rate = StringTransformedEnum( required=False, - allowed_values=[ RollingRate.MINUTE, RollingRate.DAY, RollingRate.HOUR], + allowed_values=[RollingRate.MINUTE, RollingRate.DAY, RollingRate.HOUR], ) destination = NestedField(DestinationSchema) sampling_rate = fields.Float() diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/deployment_collection_schema.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/deployment_collection_schema.py index fb7dad0f5064..41b624b9ace4 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/deployment_collection_schema.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/deployment_collection_schema.py @@ -15,10 +15,7 @@ class DeploymentCollectionSchema(metaclass=PatchedSchemaMeta): - enabled = StringTransformedEnum( - required= True, - allowed_values=[Boolean.TRUE, Boolean.FALSE] - ) + enabled = StringTransformedEnum(required=True, allowed_values=[Boolean.TRUE, Boolean.FALSE]) data = NestedField(DataAssetSchema) # pylint: disable=unused-argument,no-self-use diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/payload_response_schema.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/payload_response_schema.py index ed4c19f24827..e1dd1ae267a5 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/payload_response_schema.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_deployment/online/payload_response_schema.py @@ -14,10 +14,7 @@ class PayloadResponseSchema(metaclass=PatchedSchemaMeta): - enabled = StringTransformedEnum( - required= True, - allowed_values=[Boolean.TRUE, Boolean.FALSE] - ) + enabled = StringTransformedEnum(required=True, allowed_values=[Boolean.TRUE, Boolean.FALSE]) # pylint: disable=unused-argument,no-self-use @post_load diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_endpoint/endpoint.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_endpoint/endpoint.py index e0b896b40f7c..1ff43338bc74 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_endpoint/endpoint.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_endpoint/endpoint.py @@ -18,10 +18,7 @@ class EndpointSchema(PathAwareSchema): id = fields.Str() - name = fields.Str( - required=True, - validate=validate.Regexp(EndpointConfigurations.NAME_REGEX_PATTERN) - ) + name = fields.Str(required=True, validate=validate.Regexp(EndpointConfigurations.NAME_REGEX_PATTERN)) description = fields.Str(metadata={"description": "Description of the inference endpoint."}) tags = fields.Dict() provisioning_state = fields.Str(metadata={"description": "Provisioning state for the endpoint."}) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/assets/environment.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/assets/environment.py index 273ff477765a..b3034bff5539 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/assets/environment.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/assets/environment.py @@ -75,9 +75,7 @@ class _BaseEnvironmentSchema(AssetSchema): ) build = NestedField( BuildContextSchema, - metadata={ - "description": "Docker build context to create the environment. Mutually exclusive with image" - }, + metadata={"description": "Docker build context to create the environment. Mutually exclusive with image"}, ) image = fields.Str() conda_file = UnionField([fields.Raw(), fields.Str()]) @@ -101,9 +99,7 @@ def pre_load(self, data, **kwargs): # validates that "channels" and "dependencies" are not included in the data creation. # These properties should only be on environment conda files not in the environment creation file if "channels" in data or "dependencies" in data: - environmentMessage = CREATE_ENVIRONMENT_ERROR_MESSAGE.format( - YAMLRefDocLinks.ENVIRONMENT - ) + environmentMessage = CREATE_ENVIRONMENT_ERROR_MESSAGE.format(YAMLRefDocLinks.ENVIRONMENT) raise ValidationError(environmentMessage) return data diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/__init__.py index 9d7867c513e7..ca2d9f01ca26 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/__init__.py @@ -9,11 +9,18 @@ from .import_component import AnonymousImportComponentSchema, ImportComponentFileRefField, ImportComponentSchema from .parallel_component import AnonymousParallelComponentSchema, ParallelComponentFileRefField, ParallelComponentSchema from .spark_component import AnonymousSparkComponentSchema, SparkComponentFileRefField, SparkComponentSchema -from .data_transfer_component import AnonymousDataTransferCopyComponentSchema, DataTransferCopyComponentFileRefField, \ - DataTransferCopyComponentSchema, AnonymousDataTransferImportComponentSchema, \ - DataTransferImportComponentFileRefField, DataTransferImportComponentSchema, \ - AnonymousDataTransferExportComponentSchema, DataTransferExportComponentFileRefField, \ - DataTransferExportComponentSchema +from .data_transfer_component import ( + AnonymousDataTransferCopyComponentSchema, + DataTransferCopyComponentFileRefField, + DataTransferCopyComponentSchema, + AnonymousDataTransferImportComponentSchema, + DataTransferImportComponentFileRefField, + DataTransferImportComponentSchema, + AnonymousDataTransferExportComponentSchema, + DataTransferExportComponentFileRefField, + DataTransferExportComponentSchema, +) + __all__ = [ "ComponentSchema", "CommandComponentSchema", diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/data_transfer_component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/data_transfer_component.py index 68ebce3c3f1b..e0e569ea9187 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/data_transfer_component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/component/data_transfer_component.py @@ -15,8 +15,13 @@ from azure.ai.ml._schema.core.schema_meta import PatchedSchemaMeta from azure.ai.ml._schema.core.fields import FileRefField, StringTransformedEnum, NestedField from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, AssetTypes -from azure.ai.ml.constants._component import ComponentSource, NodeType, DataTransferTaskType, DataCopyMode, \ - ExternalDataType +from azure.ai.ml.constants._component import ( + ComponentSource, + NodeType, + DataTransferTaskType, + DataCopyMode, + ExternalDataType, +) class DataTransferComponentSchemaMixin(ComponentSchema): @@ -25,8 +30,9 @@ class DataTransferComponentSchemaMixin(ComponentSchema): class DataTransferCopyComponentSchema(DataTransferComponentSchemaMixin): task = StringTransformedEnum(allowed_values=[DataTransferTaskType.COPY_DATA], required=True) - data_copy_mode = StringTransformedEnum(allowed_values=[DataCopyMode.MERGE_WITH_OVERWRITE, - DataCopyMode.FAIL_IF_CONFLICT]) + data_copy_mode = StringTransformedEnum( + allowed_values=[DataCopyMode.MERGE_WITH_OVERWRITE, DataCopyMode.FAIL_IF_CONFLICT] + ) inputs = fields.Dict( keys=fields.Str(), values=NestedField(InputPortSchema), @@ -34,24 +40,25 @@ class DataTransferCopyComponentSchema(DataTransferComponentSchemaMixin): class SinkSourceSchema(metaclass=PatchedSchemaMeta): - type = StringTransformedEnum(allowed_values=[ExternalDataType.FILE_SYSTEM, ExternalDataType.DATABASE], - required=True) + type = StringTransformedEnum( + allowed_values=[ExternalDataType.FILE_SYSTEM, ExternalDataType.DATABASE], required=True + ) class SourceInputsSchema(metaclass=PatchedSchemaMeta): """ For export task in DataTransfer, inputs type only support uri_file for database and uri_folder for filesystem. """ - type = StringTransformedEnum(allowed_values=[AssetTypes.URI_FOLDER, AssetTypes.URI_FILE], - required=True) + + type = StringTransformedEnum(allowed_values=[AssetTypes.URI_FOLDER, AssetTypes.URI_FILE], required=True) class SinkOutputsSchema(metaclass=PatchedSchemaMeta): """ For import task in DataTransfer, outputs type only support mltable for database and uri_folder for filesystem; """ - type = StringTransformedEnum(allowed_values=[AssetTypes.MLTABLE, AssetTypes.URI_FOLDER], - required=True) + + type = StringTransformedEnum(allowed_values=[AssetTypes.MLTABLE, AssetTypes.URI_FOLDER], required=True) class DataTransferImportComponentSchema(DataTransferComponentSchemaMixin): @@ -64,14 +71,15 @@ class DataTransferImportComponentSchema(DataTransferComponentSchemaMixin): @validates("inputs") def inputs_key(self, value): - raise ValidationError(f"inputs field is not a valid filed in task type " - f"{DataTransferTaskType.IMPORT_DATA}.") + raise ValidationError(f"inputs field is not a valid filed in task type " f"{DataTransferTaskType.IMPORT_DATA}.") @validates("outputs") def outputs_key(self, value): if len(value) != 1 or value and list(value.keys())[0] != "sink": - raise ValidationError(f"outputs field only support one output called sink in task type " - f"{DataTransferTaskType.IMPORT_DATA}.") + raise ValidationError( + f"outputs field only support one output called sink in task type " + f"{DataTransferTaskType.IMPORT_DATA}." + ) class DataTransferExportComponentSchema(DataTransferComponentSchemaMixin): @@ -85,13 +93,16 @@ class DataTransferExportComponentSchema(DataTransferComponentSchemaMixin): @validates("inputs") def inputs_key(self, value): if len(value) != 1 or value and list(value.keys())[0] != "source": - raise ValidationError(f"inputs field only support one input called source in task type " - f"{DataTransferTaskType.EXPORT_DATA}.") + raise ValidationError( + f"inputs field only support one input called source in task type " + f"{DataTransferTaskType.EXPORT_DATA}." + ) @validates("outputs") def outputs_key(self, value): - raise ValidationError(f"outputs field is not a valid filed in task type " - f"{DataTransferTaskType.EXPORT_DATA}.") + raise ValidationError( + f"outputs field is not a valid filed in task type " f"{DataTransferTaskType.EXPORT_DATA}." + ) class RestDataTransferCopyComponentSchema(DataTransferCopyComponentSchema): diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/aml_compute.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/aml_compute.py index 83b65aebfb3f..f3ba226ab2bf 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/aml_compute.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/aml_compute.py @@ -29,9 +29,7 @@ def make(self, data, **kwargs): class AmlComputeSchema(ComputeSchema): type = StringTransformedEnum(allowed_values=[ComputeType.AMLCOMPUTE], required=True) size = fields.Str() - tier = StringTransformedEnum( - allowed_values=[ComputeTier.LOWPRIORITY, ComputeTier.DEDICATED] - ) + tier = StringTransformedEnum(allowed_values=[ComputeTier.LOWPRIORITY, ComputeTier.DEDICATED]) min_instances = fields.Int() max_instances = fields.Int() idle_time_before_scale_down = fields.Int() @@ -40,7 +38,5 @@ class AmlComputeSchema(ComputeSchema): network_settings = NestedField(NetworkSettingsSchema) identity = NestedField(IdentitySchema) enable_node_public_ip = fields.Bool( - metadata={ - "description": "Enable or disable node public IP address provisioning." - } + metadata={"description": "Enable or disable node public IP address provisioning."} ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/compute_instance.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/compute_instance.py index 344a60adc874..f7cd68bacb0c 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/compute_instance.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/compute/compute_instance.py @@ -52,9 +52,7 @@ def make(self, data, **kwargs): class ComputeInstanceSchema(ComputeSchema): - type = StringTransformedEnum( - allowed_values=[ComputeType.COMPUTEINSTANCE], required=True - ) + type = StringTransformedEnum(allowed_values=[ComputeType.COMPUTEINSTANCE], required=True) size = fields.Str() network_settings = NestedField(NetworkSettingsSchema) create_on_behalf_of = NestedField(CreateOnBehalfOfSchema) @@ -62,20 +60,14 @@ class ComputeInstanceSchema(ComputeSchema): ssh_public_access_enabled = fields.Bool(dump_default=None) state = fields.Str(dump_only=True) last_operation = fields.Dict(keys=fields.Str(), values=fields.Str(), dump_only=True) - services = fields.List( - fields.Dict(keys=fields.Str(), values=fields.Str()), dump_only=True - ) + services = fields.List(fields.Dict(keys=fields.Str(), values=fields.Str()), dump_only=True) schedules = NestedField(ComputeSchedulesSchema) identity = ExperimentalField(NestedField(IdentitySchema)) idle_time_before_shutdown = ExperimentalField(fields.Str()) idle_time_before_shutdown_minutes = ExperimentalField(fields.Int()) custom_applications = ExperimentalField(fields.List(NestedField(CustomApplicationsSchema))) setup_scripts = ExperimentalField(NestedField(SetupScriptsSchema)) - os_image_metadata = ExperimentalField( - NestedField(OsImageMetadataSchema, dump_only=True) - ) + os_image_metadata = ExperimentalField(NestedField(OsImageMetadataSchema, dump_only=True)) enable_node_public_ip = fields.Bool( - metadata={ - "description": "Enable or disable node public IP address provisioning." - } + metadata={"description": "Enable or disable node public IP address provisioning."} ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/core/fields.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/core/fields.py index b23ceb90e1f3..684190b99a8a 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/core/fields.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/core/fields.py @@ -83,24 +83,14 @@ def _jsonschema_type_mapping(self): def _serialize(self, value, attr, obj, **kwargs): if not value: return - if ( - isinstance(value, str) - and self.casing_transform(value) in self.allowed_values - ): + if isinstance(value, str) and self.casing_transform(value) in self.allowed_values: return value if self.pass_original else self.casing_transform(value) - raise ValidationError( - f"Value {value!r} passed is not in set {self.allowed_values}" - ) + raise ValidationError(f"Value {value!r} passed is not in set {self.allowed_values}") def _deserialize(self, value, attr, data, **kwargs): - if ( - isinstance(value, str) - and self.casing_transform(value) in self.allowed_values - ): + if isinstance(value, str) and self.casing_transform(value) in self.allowed_values: return value if self.pass_original else self.casing_transform(value) - raise ValidationError( - f"Value {value!r} passed is not in set {self.allowed_values}" - ) + raise ValidationError(f"Value {value!r} passed is not in set {self.allowed_values}") class DumpableEnumField(StringTransformedEnum): @@ -167,9 +157,7 @@ def _validate(self, value): path = self._resolve_path(value) if (self._allow_dir and path.is_dir()) or (self._allow_file and path.is_file()): return - raise self.make_error( - "path_not_exist", path=path.as_posix(), allow_type=self.allowed_path_type - ) + raise self.make_error("path_not_exist", path=path.as_posix(), allow_type=self.allowed_path_type) def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[str]: # do not block serializing None even if required or not allow_none. @@ -177,10 +165,7 @@ def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[str]: return None self._validate(value) # always dump path as absolute path in string as base_path will be dropped after serialization - return super(LocalPathField, self)._serialize( - self._resolve_path(value).as_posix(), attr, obj, **kwargs - ) - + return super(LocalPathField, self)._serialize(self._resolve_path(value).as_posix(), attr, obj, **kwargs) class SerializeValidatedUrl(fields.Url): @@ -273,9 +258,7 @@ def _validate(self, value): try: from_iso_datetime(value) except Exception: - raise ValidationError( - f"Not a valid ISO8601-formatted datetime string: {value}" - ) + raise ValidationError(f"Not a valid ISO8601-formatted datetime string: {value}") class ArmStr(Field): @@ -317,9 +300,7 @@ def _deserialize(self, value, attr, data, **kwargs): if self.azureml_type is not None: azureml_type_suffix = self.azureml_type else: - azureml_type_suffix = ( - "" + "//)" - ) + azureml_type_suffix = "" + "//)" raise ValidationError( f"In order to specify an existing {self.azureml_type if self.azureml_type is not None else 'asset'}, " "please provide either of the following prefixed with 'azureml:':\n" @@ -363,9 +344,7 @@ def _deserialize(self, value, attr, data, **kwargs): if not (label or version): if self.allow_default_version: return name - raise ValidationError( - f"Either version or label is not provided for {attr} or the id is not valid." - ) + raise ValidationError(f"Either version or label is not provided for {attr} or the id is not valid.") if version: return f"{name}:{version}" @@ -452,16 +431,12 @@ def __init__(self, union_fields: List[fields.Field], is_strict=False, **kwargs): try: # add the validation and make sure union_fields must be subclasses or instances of # marshmallow.base.FieldABC - self._union_fields = [ - resolve_field_instance(cls_or_instance) - for cls_or_instance in union_fields - ] + self._union_fields = [resolve_field_instance(cls_or_instance) for cls_or_instance in union_fields] # TODO: make serialization/de-serialization work in the same way as json schema when is_strict is True self.is_strict = is_strict # S\When True, combine fields with oneOf instead of anyOf at schema generation except FieldInstanceResolutionError as error: raise ValueError( - 'Elements of "union_fields" must be subclasses or ' - "instances of marshmallow.base.FieldABC." + 'Elements of "union_fields" must be subclasses or ' "instances of marshmallow.base.FieldABC." ) from error @property @@ -521,13 +496,9 @@ def _deserialize(self, value, attr, data, **kwargs): and isinstance(schema.schema, PathAwareSchema) ): # use old base path to recover original base path - schema.schema.context[ - BASE_PATH_CONTEXT_KEY - ] = schema.schema.old_base_path + schema.schema.context[BASE_PATH_CONTEXT_KEY] = schema.schema.old_base_path # recover base path of parent schema - schema.context[BASE_PATH_CONTEXT_KEY] = schema.schema.context[ - BASE_PATH_CONTEXT_KEY - ] + schema.context[BASE_PATH_CONTEXT_KEY] = schema.schema.context[BASE_PATH_CONTEXT_KEY] raise ValidationError(errors, field_name=attr) @@ -565,8 +536,7 @@ def __init__( for type_name, type_sensitive_fields in type_sensitive_fields_dict.items(): union_fields.extend(type_sensitive_fields) self._type_sensitive_fields_dict[type_name] = [ - resolve_field_instance(cls_or_instance) - for cls_or_instance in type_sensitive_fields + resolve_field_instance(cls_or_instance) for cls_or_instance in type_sensitive_fields ] super(TypeSensitiveUnionField, self).__init__(union_fields, **kwargs) @@ -578,9 +548,7 @@ def _bind_to_schema(self, field_name, schema): type_name, type_sensitive_fields, ) in self._type_sensitive_fields_dict.items(): - self._type_sensitive_fields_dict[type_name] = self._create_bind_fields( - type_sensitive_fields, field_name - ) + self._type_sensitive_fields_dict[type_name] = self._create_bind_fields(type_sensitive_fields, field_name) @property def type_field_name(self) -> str: @@ -602,18 +570,14 @@ def _raise_simplified_error_base_on_type(self, e, value, attr): & its type doesn't match any allowed types, raise "Value {} not in set {}"; If value has type & its type matches at least 1 field, return the first matched error message;""" - value_type = try_get_non_arbitrary_attr_for_potential_attr_dict( - value, self.type_field_name - ) + value_type = try_get_non_arbitrary_attr_for_potential_attr_dict(value, self.type_field_name) if value_type is None: # if value has no type field, raise original error raise e if value_type not in self.allowed_types: # if value has type field but its value doesn't match any allowed value, raise ValidationError directly raise ValidationError( - message={ - self.type_field_name: f"Value {value_type!r} passed is not in set {self.allowed_types}" - }, + message={self.type_field_name: f"Value {value_type!r} passed is not in set {self.allowed_types}"}, field_name=attr, ) # if value has type field and its value match at least 1 allowed value, raise first matched @@ -630,9 +594,7 @@ def _raise_simplified_error_base_on_type(self, e, value, attr): def _serialize(self, value, attr, obj, **kwargs): union_fields = self._union_fields[:] - value_type = try_get_non_arbitrary_attr_for_potential_attr_dict( - value, self.type_field_name - ) + value_type = try_get_non_arbitrary_attr_for_potential_attr_dict(value, self.type_field_name) if value_type is not None and value_type in self.allowed_types: target_fields = self._type_sensitive_fields_dict[value_type] if len(target_fields) == 1: @@ -640,9 +602,7 @@ def _serialize(self, value, attr, obj, **kwargs): self._union_fields = target_fields try: - return super(TypeSensitiveUnionField, self)._serialize( - value, attr, obj, **kwargs - ) + return super(TypeSensitiveUnionField, self)._serialize(value, attr, obj, **kwargs) except ValidationError as e: self._raise_simplified_error_base_on_type(e, value, attr) finally: @@ -670,9 +630,7 @@ def _try_load_from_yaml(self, value): def _deserialize(self, value, attr, data, **kwargs): try: - return super(TypeSensitiveUnionField, self)._deserialize( - value, attr, data, **kwargs - ) + return super(TypeSensitiveUnionField, self)._deserialize(value, attr, data, **kwargs) except ValidationError as e: if isinstance(value, str) and self._allow_load_from_yaml: value = self._try_load_from_yaml(value) @@ -711,9 +669,7 @@ def CodeField(**kwargs): # put arm versioned string at last order as it can deserialize any string into "azureml:" ArmVersionedStr(azureml_type=AzureMLResourceType.CODE), ], - metadata={ - "description": "A local path or http:, https:, azureml: url pointing to a remote location." - }, + metadata={"description": "A local path or http:, https:, azureml: url pointing to a remote location."}, **kwargs, ) @@ -777,9 +733,7 @@ def _deserialize(self, value, attr, data, **kwargs) -> str: class DumpableIntegerField(fields.Integer): - def _serialize( - self, value, attr, obj, **kwargs - ) -> typing.Optional[typing.Union[str, _T]]: + def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[typing.Union[str, _T]]: if self.strict and not isinstance(value, int): # this implementation can serialize bool to bool raise self.make_error("invalid", input=value) @@ -803,16 +757,12 @@ def _validated(self, value): raise self.make_error("invalid", input=value) return super()._validated(value) - def _serialize( - self, value, attr, obj, **kwargs - ) -> typing.Optional[typing.Union[str, _T]]: + def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[typing.Union[str, _T]]: return super()._serialize(self._validated(value), attr, obj, **kwargs) class DumpableStringField(fields.String): - def _serialize( - self, value, attr, obj, **kwargs - ) -> typing.Optional[typing.Union[str, _T]]: + def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[typing.Union[str, _T]]: if not isinstance(value, str): raise ValidationError("Given value is not a string") return super()._serialize(value, attr, obj, **kwargs) @@ -826,8 +776,7 @@ def __init__(self, experimental_field: fields.Field, **kwargs): self.required = experimental_field.required except FieldInstanceResolutionError as error: raise ValueError( - '"experimental_field" must be subclasses or ' - "instances of marshmallow.base.FieldABC." + '"experimental_field" must be subclasses or ' "instances of marshmallow.base.FieldABC." ) from error # This sets the parent for the schema and also handles nesting. @@ -842,9 +791,7 @@ def _serialize(self, value, attr, obj, **kwargs): def _deserialize(self, value, attr, data, **kwargs): if value is not None: - message = "Field '{0}': {1} {2}".format( - attr, EXPERIMENTAL_FIELD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE - ) + message = "Field '{0}': {1} {2}".format(attr, EXPERIMENTAL_FIELD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE) if not _is_warning_cached(message): module_logger.warning(message) @@ -953,6 +900,4 @@ def _serialize(self, value, attr, obj, **kwargs): def _deserialize(self, value, attr, data, **kwargs): if isinstance(value, str) and value.startswith("git+"): return value - raise ValidationError( - "In order to specify a git path, please provide the correct path prefixed with 'git+\n" - ) + raise ValidationError("In order to specify a git path, please provide the correct path prefixed with 'git+\n") diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/data_transfer_job.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/data_transfer_job.py index 48a4aa32e47e..2bc56df1777d 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/data_transfer_job.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/data_transfer_job.py @@ -16,8 +16,9 @@ class DataTransferCopyJobSchema(BaseJobSchema): type = StringTransformedEnum(required=True, allowed_values=JobType.DATA_TRANSFER) task = StringTransformedEnum(allowed_values=[DataTransferTaskType.COPY_DATA], required=True) - data_copy_mode = StringTransformedEnum(allowed_values=[DataCopyMode.MERGE_WITH_OVERWRITE, - DataCopyMode.FAIL_IF_CONFLICT]) + data_copy_mode = StringTransformedEnum( + allowed_values=[DataCopyMode.MERGE_WITH_OVERWRITE, DataCopyMode.FAIL_IF_CONFLICT] + ) compute = ComputeField() inputs = InputsField() outputs = OutputsField() @@ -25,7 +26,7 @@ class DataTransferCopyJobSchema(BaseJobSchema): class DataTransferImportJobSchema(BaseJobSchema): type = StringTransformedEnum(required=True, allowed_values=JobType.DATA_TRANSFER) - task = StringTransformedEnum(allowed_values=[DataTransferTaskType.IMPORT_DATA], required=True) + task = StringTransformedEnum(allowed_values=[DataTransferTaskType.IMPORT_DATA], required=True) compute = ComputeField() outputs = fields.Dict( keys=fields.Str(), @@ -37,13 +38,15 @@ class DataTransferImportJobSchema(BaseJobSchema): @validates("outputs") def outputs_key(self, value): # pylint: disable=no-self-use if len(value) != 1 or list(value.keys())[0] != "sink": - raise ValidationError(f"outputs field only support one output called sink in task type " - f"{DataTransferTaskType.IMPORT_DATA}.") + raise ValidationError( + f"outputs field only support one output called sink in task type " + f"{DataTransferTaskType.IMPORT_DATA}." + ) class DataTransferExportJobSchema(BaseJobSchema): type = StringTransformedEnum(required=True, allowed_values=JobType.DATA_TRANSFER) - task = StringTransformedEnum(allowed_values=[DataTransferTaskType.EXPORT_DATA], required=True) + task = StringTransformedEnum(allowed_values=[DataTransferTaskType.EXPORT_DATA], required=True) compute = ComputeField() inputs = InputsField(allow_none=False) sink = UnionField([NestedField(DatabaseSchema), NestedField(FileSystemSchema)], required=True, allow_none=False) @@ -51,5 +54,7 @@ class DataTransferExportJobSchema(BaseJobSchema): @validates("inputs") def inputs_key(self, value): # pylint: disable=no-self-use if len(value) != 1 or list(value.keys())[0] != "source": - raise ValidationError(f"inputs field only support one input called source in task type " - f"{DataTransferTaskType.EXPORT_DATA}.") + raise ValidationError( + f"inputs field only support one input called source in task type " + f"{DataTransferTaskType.EXPORT_DATA}." + ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_entry.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_entry.py index f27e8d8dcc15..3e8aa99542ba 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_entry.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_entry.py @@ -51,7 +51,9 @@ def generate_path_property(azureml_type): fields.Str(metadata={"pattern": r"^(http(s)?):.*"}), fields.Str(metadata={"pattern": r"^(wasb(s)?):.*"}), LocalPathField(pattern=r"^file:.*"), - LocalPathField(pattern=r"^(?!(azureml|http(s)?|wasb(s)?|file):).*",), + LocalPathField( + pattern=r"^(?!(azureml|http(s)?|wasb(s)?|file):).*", + ), ], is_strict=True, ) @@ -104,7 +106,6 @@ class DataInputSchema(InputSchema): datastore = generate_datastore_property() - class MLTableInputSchema(InputSchema): mode = StringTransformedEnum( allowed_values=[ @@ -121,7 +122,6 @@ class MLTableInputSchema(InputSchema): datastore = generate_datastore_property() - class InputLiteralValueSchema(metaclass=PatchedSchemaMeta): value = UnionField([fields.Str(), fields.Bool(), fields.Int(), fields.Float()]) @@ -191,17 +191,10 @@ def check_dict(self, data, **kwargs): class DatabaseSchema(metaclass=PatchedSchemaMeta): - type = StringTransformedEnum( - allowed_values=[ - ExternalDataType.DATABASE - ], - required=True - ) + type = StringTransformedEnum(allowed_values=[ExternalDataType.DATABASE], required=True) table_name = fields.Str() query = fields.Str( - metadata={ - "description": "The sql query command." - }, + metadata={"description": "The sql query command."}, ) stored_procedure = fields.Str() stored_procedure_params = fields.List(NestedField(StoredProcedureParamsSchema)) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_fields_provider.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_fields_provider.py index c59ec0dd4411..7fb2e8e014d6 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_fields_provider.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/input_output_fields_provider.py @@ -37,12 +37,7 @@ def InputsField(*, support_databinding: bool = False, **kwargs): return fields.Dict( keys=fields.Str(), - values=UnionField( - value_fields, - metadata={"description": "Inputs to a job."}, - is_strict=True, - **kwargs - ), + values=UnionField(value_fields, metadata={"description": "Inputs to a job."}, is_strict=True, **kwargs), ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/__init__.py index b4598f6f4643..a19931cd30e6 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/__init__.py @@ -4,7 +4,14 @@ # pylint: disable=unused-import __path__ = __import__("pkgutil").extend_path(__path__, __name__) -from .component_job import CommandSchema, ImportSchema, ParallelSchema, SparkSchema, DataTransferCopySchema, \ - DataTransferImportSchema, DataTransferExportSchema +from .component_job import ( + CommandSchema, + ImportSchema, + ParallelSchema, + SparkSchema, + DataTransferCopySchema, + DataTransferImportSchema, + DataTransferExportSchema, +) from .pipeline_job import PipelineJobSchema from .settings import PipelineJobSettingsSchema diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/component_job.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/component_job.py index 75e61f857b0a..a06135e81aa8 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/component_job.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/component_job.py @@ -440,21 +440,20 @@ class DataTransferImportSchema(BaseNodeSchema): compute = ComputeField() source = UnionField([NestedField(DatabaseSchema), NestedField(FileSystemSchema)], required=True, allow_none=False) outputs = fields.Dict( - keys=fields.Str(), - values=UnionField([OutputBindingStr, NestedField(OutputSchema)]), - allow_none=False + keys=fields.Str(), values=UnionField([OutputBindingStr, NestedField(OutputSchema)]), allow_none=False ) @validates("inputs") def inputs_key(self, value): - raise ValidationError(f"inputs field is not a valid filed in task type " - f"{DataTransferTaskType.IMPORT_DATA}.") + raise ValidationError(f"inputs field is not a valid filed in task type " f"{DataTransferTaskType.IMPORT_DATA}.") @validates("outputs") def outputs_key(self, value): if len(value) != 1 or list(value.keys())[0] != "sink": - raise ValidationError(f"outputs field only support one output called sink in task type " - f"{DataTransferTaskType.IMPORT_DATA}.") + raise ValidationError( + f"outputs field only support one output called sink in task type " + f"{DataTransferTaskType.IMPORT_DATA}." + ) @post_load def make(self, data, **kwargs) -> "DataTransferImport": @@ -504,13 +503,16 @@ class DataTransferExportSchema(BaseNodeSchema): @validates("inputs") def inputs_key(self, value): if len(value) != 1 or list(value.keys())[0] != "source": - raise ValidationError(f"inputs field only support one input called source in task type " - f"{DataTransferTaskType.EXPORT_DATA}.") + raise ValidationError( + f"inputs field only support one input called source in task type " + f"{DataTransferTaskType.EXPORT_DATA}." + ) @validates("outputs") def outputs_key(self, value): - raise ValidationError(f"outputs field is not a valid filed in task type " - f"{DataTransferTaskType.EXPORT_DATA}.") + raise ValidationError( + f"outputs field is not a valid filed in task type " f"{DataTransferTaskType.EXPORT_DATA}." + ) @post_load def make(self, data, **kwargs) -> "DataTransferExport": diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/control_flow_job.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/control_flow_job.py index 69661d289333..4f47960864d9 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/control_flow_job.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/control_flow_job.py @@ -106,7 +106,7 @@ def load_items(self, data, **kwargs): # pylint: disable=no-self-use, unused-arg if isinstance(items, str): items = json.loads(items) data["items"] = items - except Exception: # pylint: disable=broad-except + except Exception: # pylint: disable=broad-except pass return data diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_component.py index 1bc5b6ee3bb1..ba529d1862ae 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_component.py @@ -41,8 +41,11 @@ from azure.ai.ml._schema.pipeline.pipeline_import_job import PipelineImportJobSchema from azure.ai.ml._schema.pipeline.pipeline_parallel_job import PipelineParallelJobSchema from azure.ai.ml._schema.pipeline.pipeline_spark_job import PipelineSparkJobSchema -from azure.ai.ml._schema.pipeline.pipeline_datatransfer_job import PipelineDataTransferCopyJobSchema, \ - PipelineDataTransferImportJobSchema, PipelineDataTransferExportJobSchema +from azure.ai.ml._schema.pipeline.pipeline_datatransfer_job import ( + PipelineDataTransferCopyJobSchema, + PipelineDataTransferImportJobSchema, + PipelineDataTransferExportJobSchema, +) from azure.ai.ml._utils.utils import is_private_preview_enabled from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, AzureMLResourceType from azure.ai.ml.constants._component import ( @@ -133,9 +136,7 @@ def _post_load_pipeline_jobs(context, data: dict) -> dict: job_type = job_instance.get("type") if job_type == ControlFlowType.IF_ELSE: # Convert to if-else node. - job_instance = ConditionNode._create_instance_from_schema_dict( - loaded_data=job_instance - ) + job_instance = ConditionNode._create_instance_from_schema_dict(loaded_data=job_instance) elif job_instance.get("type") == ControlFlowType.DO_WHILE: # Convert to do-while node. job_instance = DoWhile._create_instance_from_schema_dict( @@ -156,8 +157,9 @@ def _post_load_pipeline_jobs(context, data: dict) -> dict: context=context, pipeline_job_dict=data, ) - if not (job_instance.type == NodeType.DATA_TRANSFER and job_instance.task != - DataTransferTaskType.COPY_DATA): + if not ( + job_instance.type == NodeType.DATA_TRANSFER and job_instance.task != DataTransferTaskType.COPY_DATA + ): job_instance.component._source = ComponentSource.YAML_JOB job_instance._source = job_instance.component._source jobs[key] = job_instance diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_datatransfer_job.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_datatransfer_job.py index ffe4b6c394ec..b20b949e4ec6 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_datatransfer_job.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_datatransfer_job.py @@ -11,8 +11,11 @@ from azure.ai.ml._schema.core.fields import NestedField, UnionField from azure.ai.ml._schema.job.input_output_entry import OutputSchema -from azure.ai.ml._schema.job.data_transfer_job import DataTransferCopyJobSchema, DataTransferImportJobSchema, \ - DataTransferExportJobSchema +from azure.ai.ml._schema.job.data_transfer_job import ( + DataTransferCopyJobSchema, + DataTransferImportJobSchema, + DataTransferExportJobSchema, +) module_logger = logging.getLogger(__name__) @@ -44,7 +47,6 @@ def make(self, data: Any, **kwargs: Any): class PipelineDataTransferExportJobSchema(DataTransferExportJobSchema): - @post_load def make(self, data: Any, **kwargs: Any): from azure.ai.ml.entities._job.data_transfer.data_transfer_job import DataTransferExportJob diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_job_io.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_job_io.py index 93b080a0db9f..ecfc02e9a22b 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_job_io.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_schema/pipeline/pipeline_job_io.py @@ -40,8 +40,13 @@ def _serialize(self, value, attr, obj, **kwargs): raise ValidationError(f"Invalid output binding string '{value}' passed") def _deserialize(self, value, attr, data, **kwargs): - if isinstance(value, dict) and "path" in value and "mode" not in value and \ - 'name' not in value and 'version' not in value: + if ( + isinstance(value, dict) + and "path" in value + and "mode" not in value + and "name" not in value + and "version" not in value + ): value = value["path"] if isinstance(value, str) and re.match(ComponentJobConstants.OUTPUT_PATTERN, value): return value diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_appinsights_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_appinsights_utils.py index 67d5ffd33556..2d6bd0914e4c 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_appinsights_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_appinsights_utils.py @@ -69,7 +69,9 @@ def get_default_resource_group_deployment(deployment_name: str, location: str, s "properties": { "mode": "Incremental", "template": { - "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "$schema": ( + "https://schema.management.azure.com/schemas/2018-05-01/" + "subscriptionDeploymentTemplate.json#" + ), "contentVersion": "1.0.0.1", "parameters": {}, "variables": {}, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py index f4185346bb1e..7f4d58f51c92 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py @@ -439,8 +439,7 @@ def get_directory_size(root: os.PathLike, ignore_file: IgnoreFile = IgnoreFile(N # ensure we're counting the size of the linked file # os.readlink returns a file path relative to dirpath, and must be # re-joined to get a workable result - path_size = os.path.getsize(os.path.join(dirpath, - os.readlink(convert_windows_path_to_unix(full_path)))) + path_size = os.path.getsize(os.path.join(dirpath, os.readlink(convert_windows_path_to_unix(full_path)))) size_list[full_path] = path_size total_size += path_size return total_size, size_list @@ -709,6 +708,7 @@ def _get_next_version_from_container( version = "1" return version + def _get_latest_version_from_container( asset_name: str, container_operation: Any, @@ -740,21 +740,22 @@ def _get_latest_version_from_container( f"Asset {asset_name} does not exist in registry {registry_name}." if registry_name else f"Asset {asset_name} does not exist in workspace {workspace_name}." - ) + ) no_personal_data_message = ( "Asset {asset_name} does not exist in registry {registry_name}." if registry_name else "Asset {asset_name} does not exist in workspace {workspace_name}." - ) + ) raise ValidationException( message=message, no_personal_data_message=no_personal_data_message, target=ErrorTarget.ASSET, error_category=ErrorCategory.USER_ERROR, - error_type=ValidationErrorType.RESOURCE_NOT_FOUND + error_type=ValidationErrorType.RESOURCE_NOT_FOUND, ) return version + def _get_latest( asset_name: str, version_operation: Any, @@ -801,12 +802,12 @@ def _get_latest( f"Asset {asset_name} does not exist in registry {registry_name}." if registry_name else f"Asset {asset_name} does not exist in workspace {workspace_name}." - ) + ) no_personal_data_message = ( "Asset {asset_name} does not exist in registry {registry_name}." if registry_name else "Asset {asset_name} does not exist in workspace {workspace_name}." - ) + ) raise ValidationException( message=message, no_personal_data_message=no_personal_data_message, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_cache_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_cache_utils.py index 545edf03966b..463ea88983a0 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_cache_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_cache_utils.py @@ -15,8 +15,12 @@ from typing import List, Dict, Optional, Union, Callable from azure.ai.ml._utils._asset_utils import get_object_hash -from azure.ai.ml._utils.utils import is_on_disk_cache_enabled, is_concurrent_component_registration_enabled, \ - is_private_preview_enabled, write_to_shared_file +from azure.ai.ml._utils.utils import ( + is_on_disk_cache_enabled, + is_concurrent_component_registration_enabled, + is_private_preview_enabled, + write_to_shared_file, +) from azure.ai.ml.constants._common import AzureMLResourceType, AZUREML_COMPONENT_REGISTRATION_MAX_WORKERS from azure.ai.ml.entities import Component from azure.ai.ml.entities._builders import BaseNode @@ -98,9 +102,7 @@ def __init__( self._cache: Dict[str, _CacheContent] = {} self._nodes_to_resolve: List[BaseNode] = [] - self._client_hash = self._get_client_hash( - subscription_id, resource_group_name, workspace_name, registry_name - ) + self._client_hash = self._get_client_hash(subscription_id, resource_group_name, workspace_name, registry_name) # the same client share 1 lock self._lock = _node_resolution_lock[self._client_hash] @@ -141,7 +143,7 @@ def _get_component_registration_max_workers(): "Please reset the value to an integer.", AZUREML_COMPONENT_REGISTRATION_MAX_WORKERS, os.environ.get(AZUREML_COMPONENT_REGISTRATION_MAX_WORKERS), - default_max_workers + default_max_workers, ) max_workers = default_max_workers return max_workers @@ -208,6 +210,7 @@ def _get_on_disk_hash_for_component(component: Component, in_memory_hash: str) - def _on_disk_cache_dir(self) -> Path: """Get the base path for on disk cache.""" from azure.ai.ml._version import VERSION + return Path(tempfile.gettempdir()).joinpath( ".azureml", "azure-ai-ml", @@ -254,8 +257,7 @@ def _save_to_on_disk_cache(self, on_disk_hash: str, arm_id: str) -> None: ) def _resolve_cache_contents(self, cache_contents_to_resolve: List[_CacheContent], resolver): - """Resolve all components to resolve and save the results in cache. - """ + """Resolve all components to resolve and save the results in cache.""" _components = list(map(lambda x: x.component_ref, cache_contents_to_resolve)) _map_func = partial(resolver, azureml_type=AzureMLResourceType.COMPONENT) @@ -297,10 +299,7 @@ def _prepare_items_to_resolve(self): self._nodes_to_resolve.clear() return dict_of_nodes_to_resolve, cache_contents_to_resolve - def _resolve_cache_contents_from_disk( - self, - cache_contents_to_resolve: List[_CacheContent] - ) -> List[_CacheContent]: + def _resolve_cache_contents_from_disk(self, cache_contents_to_resolve: List[_CacheContent]) -> List[_CacheContent]: """Check on-disk cache to resolve cache contents in cache_contents_to_resolve and return unresolved cache contents. """ @@ -308,8 +307,7 @@ def _resolve_cache_contents_from_disk( # we can't assume that the code folder won't change among dependency resolution for cache_content in cache_contents_to_resolve: cache_content.on_disk_hash = self._get_on_disk_hash_for_component( - cache_content.component_ref, - cache_content.in_memory_hash + cache_content.component_ref, cache_content.in_memory_hash ) left_cache_contents_to_resolve = [] @@ -342,16 +340,14 @@ def _resolve_nodes(self): self._fill_back_component_to_nodes(dict_of_nodes_to_resolve) def register_node_for_lazy_resolution(self, node: BaseNode): - """Register a node with its component to resolve. - """ + """Register a node with its component to resolve.""" component = node._component # pylint: disable=protected-access # directly resolve node and skip registration if the resolution involves no remote call # so that all node will be skipped when resolving a subgraph recursively if isinstance(component, str): node._component = self._resolver( # pylint: disable=protected-access - component, - azureml_type=AzureMLResourceType.COMPONENT + component, azureml_type=AzureMLResourceType.COMPONENT ) return if component.id is not None: diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_func_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_func_utils.py index de8775b0388a..1d263085566f 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_func_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_func_utils.py @@ -18,7 +18,7 @@ class PersistentLocalsFunctionBuilder(abc.ABC): "not_callable": "func must be a function or a callable object", "conflict_argument": "Injected param name __self conflicts with function args {args}", "not_all_template_separators_used": "Not all template separators are used, " - "please switch to a compatible version of Python.", + "please switch to a compatible version of Python.", } injected_param = "__self" @@ -43,10 +43,10 @@ def call(self, func, _all_kwargs) -> Tuple[Any, dict]: """ if isinstance(func, (FunctionType, MethodType)): pass - elif hasattr(func, '__call__'): + elif hasattr(func, "__call__"): func = func.__call__ else: - raise TypeError(self.make_error('not_callable')) + raise TypeError(self.make_error("not_callable")) if self.injected_param in func.__code__.co_varnames: raise ValueError(self.make_error("conflict_argument", args=list(func.__code__.co_varnames))) @@ -94,7 +94,6 @@ def _call(self, func, _all_kwargs): try: from bytecode import Bytecode, Instr - class PersistentLocalsFunction(object): def __init__(self, _func, *, _self: Optional[Any] = None, skip_locals: Optional[List[str]] = None): """ @@ -121,18 +120,15 @@ def __call__(__self, *args, **kwargs): # pylint: disable=no-self-argument for skip_local in __self._skip_locals: __self.locals.pop(skip_local, None) - def _source_template_func(mock_arg): return mock_arg - def _target_template_func(__self, mock_arg): try: return mock_arg finally: __self.locals = locals().copy() - class PersistentLocalsFunctionBytecodeBuilder(PersistentLocalsFunctionBuilder): def __init__(self): self._template_separators = self.get_instructions(_source_template_func) @@ -178,12 +174,7 @@ def _create_code(self, instructions: List[Instr], base_func: Union[FunctionType, # endregion - def _split_instructions( - self, - instructions, - *, - skip_body_instr=False - ) -> List[List[Any]]: + def _split_instructions(self, instructions, *, skip_body_instr=False) -> List[List[Any]]: """Split instructions into several pieces by template separators. For example, in Python 3.11, the template separators will be: [ @@ -247,12 +238,9 @@ def func(__self, *func_args): generated_instructions = [] for template_piece, input_piece, separator in zip( - self._template_body, - self._split_instructions( - self.get_instructions(func), - skip_body_instr=True - ), - self._template_separators, + self._template_body, + self._split_instructions(self.get_instructions(func), skip_body_instr=True), + self._template_separators, ): generated_instructions.extend(template_piece) generated_instructions.extend(input_piece) @@ -265,7 +253,7 @@ def func(__self, *func_args): func.__globals__, func.__name__, func.__defaults__, - func.__closure__ + func.__closure__, ) return PersistentLocalsFunction( generated_func, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/azure_resource_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/azure_resource_utils.py index 006854b866a9..80409062df5c 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/azure_resource_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/azure_resource_utils.py @@ -9,8 +9,9 @@ from azure.core.credentials import TokenCredential -def get_resources_from_subscriptions(strQuery: str, credential: TokenCredential, - subscription_list: Optional[List[str]] = None): +def get_resources_from_subscriptions( + strQuery: str, credential: TokenCredential, subscription_list: Optional[List[str]] = None +): # If a subscription list is passed in, use it. Otherwise, get all subscriptions subsList = [] @@ -19,7 +20,7 @@ def get_resources_from_subscriptions(strQuery: str, credential: TokenCredential, else: subsClient = SubscriptionClient(credential) for sub in subsClient.subscriptions.list(): - subsList.append(sub.as_dict().get('subscription_id')) + subsList.append(sub.as_dict().get("subscription_id")) # Create Azure Resource Graph client and set options argClient = arg.ResourceGraphClient(credential) @@ -35,8 +36,9 @@ def get_resources_from_subscriptions(strQuery: str, credential: TokenCredential, return argClient.resources(argQuery) -def get_virtual_clusters_from_subscriptions(credential: TokenCredential, - subscription_list: Optional[List[str]] = None) -> List[Dict]: +def get_virtual_clusters_from_subscriptions( + credential: TokenCredential, subscription_list: Optional[List[str]] = None +) -> List[Dict]: # cspell:ignore tolower strQuery = """resources @@ -47,16 +49,19 @@ def get_virtual_clusters_from_subscriptions(credential: TokenCredential, return get_resources_from_subscriptions(strQuery, credential, subscription_list).data -def get_generic_resource_by_id(arm_id: str, credential: TokenCredential, - subscription_id: str, api_version: Optional[str] = None) -> Dict: +def get_generic_resource_by_id( + arm_id: str, credential: TokenCredential, subscription_id: str, api_version: Optional[str] = None +) -> Dict: resource_client = ResourceManagementClient(credential, subscription_id) generic_resource = resource_client.resources.get_by_id(arm_id, api_version) return generic_resource.as_dict() -def get_virtual_cluster_by_id(name: str, resource_group: str, - subscription_id: str, credential: TokenCredential) -> Dict: + +def get_virtual_cluster_by_id( + name: str, resource_group: str, subscription_id: str, credential: TokenCredential +) -> Dict: arm_id = ( f"/subscriptions/{subscription_id}/resourceGroups/{resource_group}" diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/utils.py index e096fdb806e7..e77a824af9e7 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/utils.py @@ -927,12 +927,7 @@ def write_to_shared_file(file_path: Union[str, PathLike], content: str): def _get_valid_dot_keys_with_wildcard_impl( - left_reversed_parts, - root, - *, - validate_func=None, - cur_node=None, - processed_parts=None + left_reversed_parts, root, *, validate_func=None, cur_node=None, processed_parts=None ): if len(left_reversed_parts) == 0: if validate_func is None or validate_func(root, processed_parts): @@ -953,12 +948,14 @@ def _get_valid_dot_keys_with_wildcard_impl( if not isinstance(next_key, str): continue processed_parts.append(next_key) - result.extend(_get_valid_dot_keys_with_wildcard_impl( - left_reversed_parts, - root, - validate_func=validate_func, - cur_node=cur_node[next_key], - processed_parts=processed_parts) + result.extend( + _get_valid_dot_keys_with_wildcard_impl( + left_reversed_parts, + root, + validate_func=validate_func, + cur_node=cur_node[next_key], + processed_parts=processed_parts, + ) ) processed_parts.pop() elif key in cur_node: @@ -968,13 +965,14 @@ def _get_valid_dot_keys_with_wildcard_impl( root, validate_func=validate_func, cur_node=cur_node[key], - processed_parts=processed_parts + processed_parts=processed_parts, ) processed_parts.pop() left_reversed_parts.append(key) return result + def get_valid_dot_keys_with_wildcard( root: Dict[str, Any], dot_key_wildcard: str, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_vendor/azure_resources/flatten_json/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_vendor/azure_resources/flatten_json/__init__.py index bf53b8fa9889..d180645c4bc6 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_vendor/azure_resources/flatten_json/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_vendor/azure_resources/flatten_json/__init__.py @@ -28,10 +28,7 @@ def check_if_numbers_are_consecutive(list_): :param list_: list of integers :return: Boolean """ - return all( - True if second - first == 1 else False - for first, second in zip(list_[:-1], list_[1:]) - ) + return all(True if second - first == 1 else False for first, second in zip(list_[:-1], list_[1:])) def _construct_key(previous_key, separator, new_key, replace_separators=None): @@ -48,16 +45,12 @@ def _construct_key(previous_key, separator, new_key, replace_separators=None): if replace_separators is not None: new_key = str(new_key).replace(separator, replace_separators) if previous_key: - return u"{}{}{}".format(previous_key, separator, new_key) + return "{}{}{}".format(previous_key, separator, new_key) else: return new_key -def flatten( - nested_dict, - separator="_", - root_keys_to_ignore=None, - replace_separators=None): +def flatten(nested_dict, separator="_", root_keys_to_ignore=None, replace_separators=None): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy @@ -102,20 +95,11 @@ def _flatten(object_, key): if not (not key and object_key in root_keys_to_ignore): _flatten( object_[object_key], - _construct_key( - key, - separator, - object_key, - replace_separators=replace_separators)) + _construct_key(key, separator, object_key, replace_separators=replace_separators), + ) elif isinstance(object_, (list, set, tuple)): for index, item in enumerate(object_): - _flatten( - item, - _construct_key( - key, - separator, - index, - replace_separators=replace_separators)) + _flatten(item, _construct_key(key, separator, index, replace_separators=replace_separators)) # Anything left take as is else: flattened_dict[key] = object_ @@ -127,10 +111,9 @@ def _flatten(object_, key): flatten_json = flatten -def flatten_preserve_lists(nested_dict, separator="_", - root_keys_to_ignore=None, - max_list_index=3, max_depth=3, - replace_separators=None): +def flatten_preserve_lists( + nested_dict, separator="_", root_keys_to_ignore=None, max_list_index=3, max_depth=3, replace_separators=None +): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy @@ -153,8 +136,7 @@ def flatten_preserve_lists(nested_dict, separator="_", """ assert isinstance(nested_dict, dict), "flatten requires a dictionary input" - assert isinstance(separator, six.string_types), \ - "separator must be a string" + assert isinstance(separator, six.string_types), "separator must be a string" if root_keys_to_ignore is None: root_keys_to_ignore = set() @@ -191,16 +173,12 @@ def _flatten(object_, key): if not (not key and object_key in root_keys_to_ignore): _flatten( object_[object_key], - _construct_key( - key, - separator, - object_key, - replace_separators=replace_separators)) + _construct_key(key, separator, object_key, replace_separators=replace_separators), + ) elif isinstance(object_, (list, set, tuple)): for index, item in enumerate(object_): - key = _construct_key(key, separator, index, - replace_separators=replace_separators) + key = _construct_key(key, separator, index, replace_separators=replace_separators) _flatten(item, key) else: @@ -221,16 +199,14 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): # write latest child as value if max_depth exceeded if cur_depth > max_depth_inner: - global_max_record = int(max(list( - list_prebuilt_flattened_dict.keys()))) + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_ else: # Empty object can't be iterated, take as is if not object_: - global_max_record = int(max(list( - list_prebuilt_flattened_dict.keys()))) + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_ @@ -240,54 +216,40 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): first_key = list(object_.keys())[0] # if only 1 child value, and child value # not a dict or list, flatten immediately - if len(object_) == 1 \ - and not (isinstance(object_[first_key], dict) - or isinstance(object_[first_key], list)): - global_max_record = int(max(list( - list_prebuilt_flattened_dict.keys()))) - - for d in list_prebuilt_flattened_dict[ - str(global_max_record) - ]: + if len(object_) == 1 and not ( + isinstance(object_[first_key], dict) or isinstance(object_[first_key], list) + ): + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + + for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_[first_key] else: - for object_key, val in \ - sorted(object_.items(), - key=lambda x: - (str(type(x[1])), len(str(x[1]))), - reverse=False): + for object_key, val in sorted( + object_.items(), key=lambda x: (str(type(x[1])), len(str(x[1]))), reverse=False + ): if not (not key and object_key in root_keys_to_ignore): _flatten_low_entropy( object_[object_key], - _construct_key( - key, - separator, - object_key, - replace_separators=replace_separators), + _construct_key(key, separator, object_key, replace_separators=replace_separators), cur_depth, - max_depth_inner) + max_depth_inner, + ) # lists could go into rows, like in a relational database elif isinstance(object_, list) or isinstance(object_, set): if debug: - print("\nparent key of list:", - key, "| length: ", - str(len(object_))) + print("\nparent key of list:", key, "| length: ", str(len(object_))) # need to remember global list state when we entered # this recursion - global_max_record_start = int(max(list( - list_prebuilt_flattened_dict.keys()))) - entry = copy.deepcopy(list_prebuilt_flattened_dict[ - str(global_max_record_start) - ]) + global_max_record_start = int(max(list(list_prebuilt_flattened_dict.keys()))) + entry = copy.deepcopy(list_prebuilt_flattened_dict[str(global_max_record_start)]) for index, item in enumerate(object_): if debug: - print(" list key:", key, - " index: " + str(index), "vals: ", item) + print(" list key:", key, " index: " + str(index), "vals: ", item) sub = -1 if isinstance(item, dict): @@ -298,25 +260,20 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): if not isnan(sub) and index < max_list_index: # start from second element, 1st element is like column if index > 0: - global_max_record = int(max(list( - list_prebuilt_flattened_dict.keys()))) + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) - list_prebuilt_flattened_dict[ - str(global_max_record + 1) - ] = copy.deepcopy(entry) + list_prebuilt_flattened_dict[str(global_max_record + 1)] = copy.deepcopy(entry) - _flatten_low_entropy(item, key, cur_depth, - max_depth_inner) + _flatten_low_entropy(item, key, cur_depth, max_depth_inner) else: pass - list_prebuilt_flattened_dict['0'] = \ - [subel for k, v in - sorted(list_prebuilt_flattened_dict.items()) - for idx, subel in enumerate(v)] + list_prebuilt_flattened_dict["0"] = [ + subel for k, v in sorted(list_prebuilt_flattened_dict.items()) for idx, subel in enumerate(v) + ] for key in list(sorted(list_prebuilt_flattened_dict.keys())): - if key != '0': + if key != "0": del list_prebuilt_flattened_dict[key] if debug: print("collapsed global list") @@ -326,8 +283,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): # in this case, there may be # a list of prebuilt_flattened_dict by now # so need to update them all. - global_max_record = int(max(list( - list_prebuilt_flattened_dict.keys()))) + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_ @@ -340,34 +296,34 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): # get unique column names, without the integers # TODO: potential issue: what if column names have digits naturally? reskeys = list(flattened_dict.keys()) - unique_integers = list(set([separator + char for key - in reskeys for char in key if char.isdigit()])) - regex = '|'.join(unique_integers) + unique_integers = list(set([separator + char for key in reskeys for char in key if char.isdigit()])) + regex = "|".join(unique_integers) regex += "|" + regex.replace(".", "") - unique_columns = list(set([re.sub("(" + regex + ")", "", key) - for key in reskeys])) + unique_columns = list(set([re.sub("(" + regex + ")", "", key) for key in reskeys])) # create global dict, now with unique column names prebuilt_flattened_dict = {column: None for column in unique_columns} # initialize global record list - list_prebuilt_flattened_dict = {'0': [prebuilt_flattened_dict]} + list_prebuilt_flattened_dict = {"0": [prebuilt_flattened_dict]} - _flatten_low_entropy(nested_dict, None, cur_depth=0, - max_depth_inner=max_depth) + _flatten_low_entropy(nested_dict, None, cur_depth=0, max_depth_inner=max_depth) - return list_prebuilt_flattened_dict['0'] + return list_prebuilt_flattened_dict["0"] def _unflatten_asserts(flat_dict, separator): assert isinstance(flat_dict, dict), "un_flatten requires dictionary input" assert isinstance(separator, six.string_types), "separator must be string" - assert all((not value or not isinstance(value, Iterable) or - isinstance(value, six.string_types) - for value in flat_dict.values())), "provided dict is not flat" + assert all( + ( + not value or not isinstance(value, Iterable) or isinstance(value, six.string_types) + for value in flat_dict.values() + ) + ), "provided dict is not flat" -def unflatten(flat_dict, separator='_'): +def unflatten(flat_dict, separator="_"): """ Creates a hierarchical dictionary from a flattened dictionary Assumes no lists are present @@ -392,18 +348,16 @@ def _unflatten(dic, keys, value): split_key = item.split(separator) next_split_key = list_keys[i + 1].split(separator) if not split_key == next_split_key[:-1]: - _unflatten(unflattened_dict, item.split(separator), - flat_dict[item]) + _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) else: pass # if key contained in next key, json will be invalid. else: # last element - _unflatten(unflattened_dict, item.split(separator), - flat_dict[item]) + _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) return unflattened_dict -def unflatten_list(flat_dict, separator='_'): +def unflatten_list(flat_dict, separator="_"): """ Unflattens a dictionary, first assuming no lists exist and then tries to identify lists and replaces them @@ -435,10 +389,13 @@ def _convert_dict_to_list(object_, parent_object, parent_object_key): keys = [] keys_len = len(keys) - if (keys_len > 0 and sum(keys) == - int(((keys_len - 1) * keys_len) / 2) and keys[0] == 0 and - keys[-1] == keys_len - 1 and - check_if_numbers_are_consecutive(keys)): + if ( + keys_len > 0 + and sum(keys) == int(((keys_len - 1) * keys_len) / 2) + and keys[0] == 0 + and keys[-1] == keys_len - 1 + and check_if_numbers_are_consecutive(keys) + ): # The dictionary looks like a list so we're going to replace it parent_object[parent_object_key] = [] @@ -446,9 +403,9 @@ def _convert_dict_to_list(object_, parent_object, parent_object_key): parent_object[parent_object_key].append(object_[str(key)]) # The list item we just added might be a list itself # https://github.com/amirziai/flatten/issues/15 - _convert_dict_to_list(parent_object[parent_object_key][-1], - parent_object[parent_object_key], - key_index) + _convert_dict_to_list( + parent_object[parent_object_key][-1], parent_object[parent_object_key], key_index + ) _convert_dict_to_list(unflattened_dict, None, None) return unflattened_dict @@ -458,9 +415,9 @@ def cli(input_stream=sys.stdin, output_stream=sys.stdout): raw = input_stream.read() input_json = json.loads(raw) output = json.dumps(flatten(input_json)) - output_stream.write('{}\n'.format(output)) + output_stream.write("{}\n".format(output)) output_stream.flush() -if __name__ == '__main__': - cli() \ No newline at end of file +if __name__ == "__main__": + cli() diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py index 3b75b7701907..191a1602fec8 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py @@ -597,6 +597,7 @@ class IdentityType: USER_IDENTITY = "user_identity" MANAGED_IDENTITY = "managed_identity" + class Boolean: TRUE = "true" FALSE = "false" diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_component.py index c757cae8eeec..056d094220de 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_component.py @@ -37,7 +37,6 @@ class ControlFlowType(object): CONTROL_FLOW_TYPES = [getattr(ControlFlowType, k) for k in dir(ControlFlowType) if k.isupper()] - class DataTransferTaskType(object): COPY_DATA = "copy_data" IMPORT_DATA = "import_data" diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_compute.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_compute.py index 0b95b37ee779..2b1d2e22cd39 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_compute.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_compute.py @@ -32,6 +32,7 @@ class ComputeDefaults: IDLE_TIME = 1800 PRIORITY = "Dedicated" + class CustomApplicationDefaults: TARGET_PORT = "target_port" PUBLISHED_PORT = "published_port" @@ -40,5 +41,6 @@ class CustomApplicationDefaults: DOCKER = "docker" ENDPOINT_NAME = "connect" + DUPLICATE_APPLICATION_ERROR = "Value of {} must be unique across all custom applications." INVALID_VALUE_ERROR = "Value of {} must be between {} and {}." diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_deployment.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_deployment.py index 3197cf9d53cb..2be1552ee755 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_deployment.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_deployment.py @@ -24,4 +24,5 @@ class EndpointDeploymentLogContainerType(object): INFERENCE_SERVER = "inference-server" STORAGE_INITIALIZER = "storage-initializer" + SmallSKUs = ["standard_ds1_v2", "standard_ds2_v2"] diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/data_transfer/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/data_transfer/__init__.py index 41fe13ef90e4..60323726e663 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/data_transfer/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/data_transfer/__init__.py @@ -4,10 +4,4 @@ from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem from azure.ai.ml.entities._builders.data_transfer_func import copy_data, import_data, export_data -__all__ = [ - "import_data", - "export_data", - "copy_data", - "Database", - "FileSystem" -] +__all__ = ["import_data", "export_data", "copy_data", "Database", "FileSystem"] diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_component_func.py b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_component_func.py index d3f99def4c18..067cccd836e8 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_component_func.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_component_func.py @@ -52,6 +52,7 @@ def to_component_func(entity: ComponentEntity, component_creation_func) -> Calla flattened_group_keys = [] # Flatten all group parameters, for function parameter validation. from azure.ai.ml.entities._inputs_outputs import GroupInput + for name, item in entity.inputs.items(): if isinstance(item, GroupInput): flattened_group_keys.extend(list(item.flatten(group_parameter_name=name).keys())) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_dynamic.py b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_dynamic.py index fa1cc79d3ea9..2e1c5ff545c1 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_dynamic.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_dynamic.py @@ -123,9 +123,7 @@ def create_kw_function_from_parameters( def f(**kwargs): # We need to make sure all keys of kwargs are valid. # Merge valid group keys with original keys. - _assert_arg_valid( - kwargs, [*list(default_kwargs.keys()), *flattened_group_keys], func_name=func_name - ) + _assert_arg_valid(kwargs, [*list(default_kwargs.keys()), *flattened_group_keys], func_name=func_name) # We need to put the default args to the kwargs before invoking the original function. _update_dct_if_not_exist(kwargs, default_kwargs) return func(**kwargs) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_mldesigner/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_mldesigner/__init__.py index f3ca8ca911d0..08fcefc98a98 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_mldesigner/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_mldesigner/__init__.py @@ -13,10 +13,10 @@ from azure.ai.ml.entities._component.component_factory import component_factory from azure.ai.ml.entities._job.pipeline._load_component import _generate_component_function from azure.ai.ml.entities._inputs_outputs import _get_param_with_standard_annotation -from azure.ai.ml._internal.entities._additional_includes import _AdditionalIncludes # pylint: disable=unused-import +from azure.ai.ml._internal.entities._additional_includes import _AdditionalIncludes # pylint: disable=unused-import from azure.ai.ml._utils._asset_utils import get_ignore_file from azure.ai.ml._utils.utils import try_enable_internal_components -from azure.ai.ml._internal.entities import InternalComponent # pylint: disable=unused-import +from azure.ai.ml._internal.entities import InternalComponent # pylint: disable=unused-import from azure.ai.ml.dsl._condition import condition from azure.ai.ml.dsl._do_while import do_while from azure.ai.ml.dsl._group_decorator import group @@ -31,7 +31,6 @@ "condition", "do_while", "group", - # must keep "get_ignore_file", "_get_param_with_standard_annotation", diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_pipeline_component_builder.py b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_pipeline_component_builder.py index b90b2ee6bbf2..39e517e88e60 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_pipeline_component_builder.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/dsl/_pipeline_component_builder.py @@ -203,7 +203,7 @@ def build( pipeline_component._outputs = self._build_pipeline_outputs(outputs) return pipeline_component - def _validate_group_annotation(self, name:str, val:GroupInput): + def _validate_group_annotation(self, name: str, val: GroupInput): for k, v in val.values.items(): if isinstance(v, GroupInput): self._validate_group_annotation(k, v) @@ -261,8 +261,10 @@ def _build_pipeline_outputs(self, outputs: typing.Dict[str, NodeOutput]): # Note: Here we set PipelineOutput as Pipeline's output definition as we need output binding. output_meta = Output( - type=_map_internal_output_type(meta), description=meta.description, - mode=meta.mode, is_control=meta.is_control + type=_map_internal_output_type(meta), + description=meta.description, + mode=meta.mode, + is_control=meta.is_control, ) pipeline_output = PipelineOutput( port_name=key, @@ -429,7 +431,7 @@ def _get_output_annotation(cls, func): if not isinstance(val, Output): raise UserErrorException( message="Invalid output annotation. " - f"Only Output annotation in return annotation is supported. Got {type(val)}." + f"Only Output annotation in return annotation is supported. Got {type(val)}." ) output_annotations[key] = val._to_dict() return output_annotations @@ -475,16 +477,22 @@ def _validate_keyword_in_node_io(node: Union[BaseNode, AutoMLJob]): if has_attr_safe(node, "inputs"): for input_name in set(node.inputs) & COMPONENT_IO_KEYWORDS: module_logger.warning( - "Reserved word \"%s\" is used as input name in node \"%s\", " + 'Reserved word "%s" is used as input name in node "%s", ' "can only be accessed with '%s.inputs[\"%s\"]'", - input_name, node.name, node.name, input_name + input_name, + node.name, + node.name, + input_name, ) if has_attr_safe(node, "outputs"): for output_name in set(node.outputs) & COMPONENT_IO_KEYWORDS: module_logger.warning( - "Reserved word \"%s\" is used as output name in node \"%s\", " + 'Reserved word "%s" is used as output name in node "%s", ' "can only be accessed with '%s.outputs[\"%s\"]'", - output_name, node.name, node.name, output_name + output_name, + node.name, + node.name, + output_name, ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/__init__.py index ad0508389fba..62d4ce621e6b 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/__init__.py @@ -79,7 +79,7 @@ from ._job.job import Job from ._job.job_limits import CommandJobLimits from ._job.job_resource_configuration import JobResourceConfiguration -from ._job.job_service import JobService, SshJobService,JupyterLabJobService, TensorBoardJobService, VsCodeJobService +from ._job.job_service import JobService, SshJobService, JupyterLabJobService, TensorBoardJobService, VsCodeJobService from ._job.parallel.parallel_task import ParallelTask from ._job.parallel.retry_settings import RetrySettings from ._job.parameterized_command import ParameterizedCommand diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_assets/_artifacts/data.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_assets/_artifacts/data.py index 6225fdfbd048..f5ae03e9f09e 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_assets/_artifacts/data.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_assets/_artifacts/data.py @@ -213,7 +213,7 @@ def _update_path(self, asset_artifact: ArtifactStorageInfo) -> None: # datastore_arm_id is null for registry scenario, so capture the full_storage_path if not asset_artifact.datastore_arm_id and asset_artifact.full_storage_path: self.path = asset_artifact.full_storage_path - else : + else: groups = re.search(regex, asset_artifact.datastore_arm_id) if groups: datastore_name = groups.group(1) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/__init__.py index 3ca6987aa5bb..95dfca0a7af4 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/__init__.py @@ -11,5 +11,18 @@ from .sweep import Sweep from .data_transfer import DataTransfer, DataTransferCopy, DataTransferImport, DataTransferExport -__all__ = ["BaseNode", "Sweep", "Parallel", "Command", "Import", "Spark", "Pipeline", "parse_inputs_outputs", - "DoWhile", "DataTransfer", "DataTransferCopy", "DataTransferImport", "DataTransferExport"] +__all__ = [ + "BaseNode", + "Sweep", + "Parallel", + "Command", + "Import", + "Spark", + "Pipeline", + "parse_inputs_outputs", + "DoWhile", + "DataTransfer", + "DataTransferCopy", + "DataTransferImport", + "DataTransferExport", +] diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/base_node.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/base_node.py index 915b6857212d..2cfe658f2d6d 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/base_node.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/base_node.py @@ -365,6 +365,7 @@ def _from_rest_object(cls, obj: dict) -> "BaseNode": obj[CommonYamlFields.TYPE] = NodeType.COMMAND from azure.ai.ml.entities._job.pipeline._load_component import pipeline_node_factory + # todo: refine Hard code for now to support different task type for DataTransfer node _type = obj[CommonYamlFields.TYPE] if _type == NodeType.DATA_TRANSFER: diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/condition_node.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/condition_node.py index d71d65c9f7a7..912d78d22935 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/condition_node.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/condition_node.py @@ -71,15 +71,17 @@ def _validate_params(self, raise_error=True) -> MutableValidationResult: # check if condition is valid binding if isinstance(self.condition, str) and not is_data_binding_expression( - self.condition, ["parent"], is_singular=False): + self.condition, ["parent"], is_singular=False + ): error_tail = "for example, ${{parent.jobs.xxx.outputs.output}}" validation_result.append_error( yaml_path="condition", message=f"'condition' of dsl.condition has invalid binding expression: {self.condition}, {error_tail}", ) - error_msg = "{!r} of dsl.condition node must be an instance of " \ - f"{BaseNode}, {AutoMLJob} or {str}," "got {!r}." + error_msg = ( + "{!r} of dsl.condition node must be an instance of " f"{BaseNode}, {AutoMLJob} or {str}," "got {!r}." + ) if self.true_block is not None and not isinstance(self.true_block, (BaseNode, AutoMLJob, str)): validation_result.append_error( yaml_path="true_block", message=error_msg.format("true_block", type(self.true_block)) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer.py index 958fd5c5e53f..3d1366dbb2f4 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer.py @@ -10,15 +10,25 @@ from marshmallow import Schema from azure.ai.ml._restclient.v2022_10_01_preview.models import JobBase -from azure.ai.ml._schema.job.data_transfer_job import DataTransferCopyJobSchema, DataTransferImportJobSchema, \ - DataTransferExportJobSchema -from azure.ai.ml.constants._component import NodeType, ExternalDataType, DataTransferTaskType -from azure.ai.ml.entities._component.datatransfer_component import DataTransferCopyComponent, \ - DataTransferImportComponent, DataTransferExportComponent, DataTransferComponent +from azure.ai.ml._schema.job.data_transfer_job import ( + DataTransferCopyJobSchema, + DataTransferImportJobSchema, + DataTransferExportJobSchema, +) +from azure.ai.ml.constants._component import NodeType, ExternalDataType, DataTransferTaskType +from azure.ai.ml.entities._component.datatransfer_component import ( + DataTransferCopyComponent, + DataTransferImportComponent, + DataTransferExportComponent, + DataTransferComponent, +) from azure.ai.ml.entities._component.component import Component from azure.ai.ml.entities._inputs_outputs import Input, Output -from azure.ai.ml.entities._job.data_transfer.data_transfer_job import DataTransferCopyJob, DataTransferImportJob, \ - DataTransferExportJob +from azure.ai.ml.entities._job.data_transfer.data_transfer_job import ( + DataTransferCopyJob, + DataTransferImportJob, + DataTransferExportJob, +) from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem @@ -52,8 +62,9 @@ def _build_source_sink(io_dict: Union[Dict, Database, FileSystem]): msg = "Source or sink only support type {} and {}, currently got {}." raise ValidationException( message=msg.format(ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, data_type), - no_personal_data_message=msg.format(ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, - "data_type"), + no_personal_data_message=msg.format( + ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, "data_type" + ), target=ErrorTarget.COMPONENT, error_category=ErrorCategory.USER_ERROR, error_type=ValidationErrorType.INVALID_VALUE, @@ -68,6 +79,7 @@ class DataTransfer(BaseNode): You should not instantiate this class directly. """ + def __init__( self, *, @@ -194,10 +206,7 @@ def _picked_fields_from_dict_to_rest_object(cls) -> List[str]: def _to_rest_object(self, **kwargs) -> dict: rest_obj = super()._to_rest_object(**kwargs) - for key, value in { - "componentId": self._get_component_id(), - "data_copy_mode": self.data_copy_mode - }.items(): + for key, value in {"componentId": self._get_component_id(), "data_copy_mode": self.data_copy_mode}.items(): if value is not None: rest_obj[key] = value return convert_ordered_dict_to_dict(rest_obj) @@ -225,7 +234,7 @@ def _to_job(self) -> DataTransferCopyJob: services=self.services, compute=self.compute, task=self.task, - data_copy_mode=self.data_copy_mode + data_copy_mode=self.data_copy_mode, ) def __call__(self, *args, **kwargs) -> "DataTransferCopy": @@ -251,8 +260,7 @@ def __call__(self, *args, **kwargs) -> "DataTransferCopy": # Pass through the display name only if the display name is not system generated. node.display_name = self.display_name if self.display_name != self.name else None return node - msg = "copy_data can be called as a function only when referenced component is {}, " \ - "currently got {}." + msg = "copy_data can be called as a function only when referenced component is {}, currently got {}." raise ValidationException( message=msg.format(type(Component), self._component), no_personal_data_message=msg.format(type(Component), "self._component"), diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer_func.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer_func.py index bbbbd5e255e3..2c572202d9fa 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer_func.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/data_transfer_func.py @@ -5,11 +5,18 @@ from typing import Optional, Dict, Union, Callable, Tuple -from azure.ai.ml.entities._component.datatransfer_component import DataTransferCopyComponent, \ - DataTransferImportComponent, DataTransferExportComponent +from azure.ai.ml.entities._component.datatransfer_component import ( + DataTransferCopyComponent, + DataTransferImportComponent, + DataTransferExportComponent, +) from azure.ai.ml.constants._common import AssetTypes, LegacyAssetTypes -from azure.ai.ml.constants._component import ComponentSource, ExternalDataType, DataTransferBuiltinComponentUri, \ - DataTransferTaskType +from azure.ai.ml.constants._component import ( + ComponentSource, + ExternalDataType, + DataTransferBuiltinComponentUri, + DataTransferTaskType, +) from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem from azure.ai.ml.entities._inputs_outputs import Output, Input from azure.ai.ml.entities._job.pipeline._io import PipelineInput, NodeOutput @@ -44,7 +51,7 @@ def _parse_input(input_value): if input_type in SUPPORTED_INPUTS: job_input = Input(**input_value) component_input = Input(**input_value) - elif isinstance(input_value, str): + elif isinstance(input_value, str): # Input bindings component_input = ComponentTranslatableMixin._to_input_builder_function(input_value) job_input = input_value @@ -57,8 +64,10 @@ def _parse_input(input_value): component_input, _ = _parse_input(data) job_input = input_value else: - msg = f"Unsupported input type: {type(input_value)}, only Input, dict, str, PipelineInput and NodeOutput are " \ - f"supported." + msg = ( + f"Unsupported input type: {type(input_value)}, only Input, dict, str, PipelineInput and NodeOutput are " + f"supported." + ) raise ValidationException( message=msg, no_personal_data_message=msg, @@ -105,19 +114,19 @@ def _parse_inputs_outputs(io_dict: Dict, parse_func: Callable) -> Tuple[Dict, Di def copy_data( - *, - name: Optional[str] = None, - description: Optional[str] = None, - tags: Optional[Dict] = None, - display_name: Optional[str] = None, - experiment_name: Optional[str] = None, - compute: Optional[str] = None, - inputs: Optional[Dict] = None, - outputs: Optional[Dict] = None, - is_deterministic: bool = True, - task: Optional[str] = DataTransferTaskType.COPY_DATA, - data_copy_mode: Optional[str] = None, - **kwargs, + *, + name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + display_name: Optional[str] = None, + experiment_name: Optional[str] = None, + compute: Optional[str] = None, + inputs: Optional[Dict] = None, + outputs: Optional[Dict] = None, + is_deterministic: bool = True, + task: Optional[str] = DataTransferTaskType.COPY_DATA, + data_copy_mode: Optional[str] = None, + **kwargs, ) -> DataTransferCopy: """Create a DataTransferCopy object which can be used inside dsl.pipeline as a function. @@ -187,18 +196,18 @@ def copy_data( @pipeline_node_decorator def import_data( - *, - name: Optional[str] = None, - description: Optional[str] = None, - tags: Optional[Dict] = None, - display_name: Optional[str] = None, - experiment_name: Optional[str] = None, - compute: Optional[str] = None, - source: Optional[Union[Dict, Database, FileSystem]] = None, - outputs: Optional[Dict] = None, - is_deterministic: bool = True, - task: Optional[str] = DataTransferTaskType.IMPORT_DATA, - **kwargs, + *, + name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + display_name: Optional[str] = None, + experiment_name: Optional[str] = None, + compute: Optional[str] = None, + source: Optional[Union[Dict, Database, FileSystem]] = None, + outputs: Optional[Dict] = None, + is_deterministic: bool = True, + task: Optional[str] = DataTransferTaskType.IMPORT_DATA, + **kwargs, ) -> DataTransferImport: """Create a DataTransferImport object which can be used inside dsl.pipeline. @@ -229,7 +238,7 @@ def import_data( :type task: str """ source = _build_source_sink(source) - outputs = outputs or {'sink': Output(type=AssetTypes.MLTABLE)} + outputs = outputs or {"sink": Output(type=AssetTypes.MLTABLE)} # # job inputs can not be None # job_inputs = {k: v for k, v in job_inputs.items() if v is not None} component_outputs, job_outputs = _parse_inputs_outputs(outputs, parse_func=_parse_output) @@ -271,18 +280,18 @@ def import_data( @pipeline_node_decorator def export_data( - *, - name: Optional[str] = None, - description: Optional[str] = None, - tags: Optional[Dict] = None, - display_name: Optional[str] = None, - experiment_name: Optional[str] = None, - compute: Optional[str] = None, - sink: Optional[Union[Dict, Database, FileSystem]] = None, - inputs: Optional[Dict] = None, - is_deterministic: bool = True, - task: Optional[str] = DataTransferTaskType.EXPORT_DATA, - **kwargs, + *, + name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict] = None, + display_name: Optional[str] = None, + experiment_name: Optional[str] = None, + compute: Optional[str] = None, + sink: Optional[Union[Dict, Database, FileSystem]] = None, + inputs: Optional[Dict] = None, + is_deterministic: bool = True, + task: Optional[str] = DataTransferTaskType.EXPORT_DATA, + **kwargs, ) -> DataTransferExport: """Create a DataTransferExport object which can be used inside dsl.pipeline. diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/parallel_for.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/parallel_for.py index 37bd784ce4b3..29047497b898 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/parallel_for.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/parallel_for.py @@ -125,7 +125,7 @@ def _to_rest_item(cls, item: dict) -> dict: **cls._input_entity_to_rest_inputs(input_entity=asset_inputs), # primitive inputs has primitive type value like this # {"int_param": 1} - **primitive_inputs + **primitive_inputs, } @classmethod @@ -153,10 +153,7 @@ def _to_rest_object(self, **kwargs) -> dict: # pylint: disable=unused-argument rest_node = super(ParallelFor, self)._to_rest_object(**kwargs) # convert items to rest object rest_items = self._to_rest_items(items=self.items) - rest_node.update(dict( - items=rest_items, - outputs=self._to_rest_outputs() - )) + rest_node.update(dict(items=rest_items, outputs=self._to_rest_outputs())) return convert_ordered_dict_to_dict(rest_node) @classmethod @@ -168,10 +165,7 @@ def _from_rest_item(cls, rest_item): asset_inputs[key] = val else: primitive_inputs[key] = val - return { - **cls._from_rest_inputs(inputs=asset_inputs), - **primitive_inputs - } + return {**cls._from_rest_inputs(inputs=asset_inputs), **primitive_inputs} @classmethod def _from_rest_items(cls, rest_items: str) -> Union[dict, list, str]: @@ -247,7 +241,7 @@ def _validate_items(cls, items, raise_error=True, body_component=None): if not is_data_binding_expression(items, ["parent"]): validation_result.append_error( yaml_path="items", - message=f"Items is neither a valid JSON string due to {e} or a binding string." + message=f"Items is neither a valid JSON string due to {e} or a binding string.", ) if isinstance(items, dict): # Validate dict keys @@ -256,9 +250,7 @@ def _validate_items(cls, items, raise_error=True, body_component=None): if len(items) > 0: cls._validate_items_list(items, validation_result, body_component=body_component) else: - validation_result.append_error( - yaml_path="items", - message="Items is an empty list/dict.") + validation_result.append_error(yaml_path="items", message="Items is an empty list/dict.") else: validation_result.append_error( yaml_path="items", @@ -280,7 +272,7 @@ def _validate_items_list(cls, items: list, validation_result, body_component=Non if not isinstance(item, dict): validation_result.append_error( yaml_path="items", - message=f"Items has to be list/dict of dict as value, " f"but got {type(item)} for {item}." + message=f"Items has to be list/dict of dict as value, " f"but got {type(item)} for {item}.", ) else: # item has to have matched meta @@ -289,17 +281,11 @@ def _validate_items_list(cls, items: list, validation_result, body_component=Non meta = item else: msg = f"Items should have same keys with body inputs, but got {item.keys()} and {meta.keys()}." - validation_result.append_error( - yaml_path="items", - message=msg - ) + validation_result.append_error(yaml_path="items", message=msg) # items' keys should appear in body's inputs if isinstance(body_component, Component) and (not item.keys() <= body_component.inputs.keys()): msg = f"Item {item} got unmatched inputs with loop body component inputs {body_component.inputs}." - validation_result.append_error( - yaml_path="items", - message=msg - ) + validation_result.append_error(yaml_path="items", message=msg) # validate item value type cls._validate_item_value_type(item=item, validation_result=validation_result) @@ -313,7 +299,7 @@ def _validate_item_value_type(cls, item: dict, validation_result): yaml_path="items", message="Unsupported type {} in parallel_for items. Supported types are: {}".format( type(val), supported_types - ) + ), ) if isinstance(val, Input): cls._validate_input_item_value(entry=val, validation_result=validation_result) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component.py index de2111c8a0f6..74b896cc1b23 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component.py @@ -515,11 +515,15 @@ def __call__(self, *args, **kwargs) -> [..., Union["Command", "Parallel"]]: if args: # raise clear error message for unsupported positional args if self._func._has_parameters: - msg = f"Component function doesn't support positional arguments, got {args} for {self.name}. " \ - f"Please use keyword arguments like: {self._func._func_calling_example}." + msg = ( + f"Component function doesn't support positional arguments, got {args} for {self.name}. " + f"Please use keyword arguments like: {self._func._func_calling_example}." + ) else: - msg = "Component function doesn't has any parameters, " \ - f"please make sure component {self.name} has inputs. " + msg = ( + "Component function doesn't has any parameters, " + f"please make sure component {self.name} has inputs. " + ) raise ValidationException( message=msg, target=ErrorTarget.COMPONENT, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component_factory.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component_factory.py index 8a859802e118..7551d230fe15 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component_factory.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component_factory.py @@ -18,8 +18,11 @@ from azure.ai.ml.entities._component.parallel_component import ParallelComponent from azure.ai.ml.entities._component.pipeline_component import PipelineComponent from azure.ai.ml.entities._component.spark_component import SparkComponent -from azure.ai.ml.entities._component.datatransfer_component import DataTransferCopyComponent, \ - DataTransferImportComponent, DataTransferExportComponent +from azure.ai.ml.entities._component.datatransfer_component import ( + DataTransferCopyComponent, + DataTransferImportComponent, + DataTransferExportComponent, +) from azure.ai.ml.entities._util import get_type_from_spec diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/datatransfer_component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/datatransfer_component.py index 151a6055d324..03ab93a70d2e 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/datatransfer_component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/datatransfer_component.py @@ -6,8 +6,11 @@ from marshmallow import Schema -from azure.ai.ml._schema.component.data_transfer_component import DataTransferCopyComponentSchema, \ - DataTransferImportComponentSchema, DataTransferExportComponentSchema +from azure.ai.ml._schema.component.data_transfer_component import ( + DataTransferCopyComponentSchema, + DataTransferImportComponentSchema, + DataTransferExportComponentSchema, +) from azure.ai.ml.constants._common import COMPONENT_TYPE, AssetTypes from azure.ai.ml.constants._component import NodeType, DataTransferTaskType, ExternalDataType from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException, ValidationErrorType @@ -91,8 +94,9 @@ def _build_source_sink(cls, io_dict: Union[Dict, Database, FileSystem]): msg = "Source or sink only support type {} and {}, currently got {}." raise ValidationException( message=msg.format(ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, data_type), - no_personal_data_message=msg.format(ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, - "data_type"), + no_personal_data_message=msg.format( + ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, "data_type" + ), target=ErrorTarget.COMPONENT, error_category=ErrorCategory.USER_ERROR, error_type=ValidationErrorType.INVALID_VALUE, @@ -163,8 +167,9 @@ def _validate_input_output_mapping(self): outputs_count = len(self.outputs) if outputs_count != 1: msg = "Only support single output in {}, but there're {} outputs." - validation_result.append_error(message=msg.format(DataTransferTaskType.COPY_DATA, outputs_count), - yaml_path="outputs") + validation_result.append_error( + message=msg.format(DataTransferTaskType.COPY_DATA, outputs_count), yaml_path="outputs" + ) else: input_type = None output_type = None @@ -175,18 +180,18 @@ def _validate_input_output_mapping(self): output_type = output_data.type if input_type is None or output_type is None or input_type != output_type: msg = "Input type {} doesn't exactly match with output type {} in task {}" - validation_result.append_error(message=msg.format(input_type, output_type, - DataTransferTaskType.COPY_DATA), - yaml_path="outputs") + validation_result.append_error( + message=msg.format(input_type, output_type, DataTransferTaskType.COPY_DATA), yaml_path="outputs" + ) elif inputs_count > 1: for _, output_data in self.outputs.items(): output_type = output_data.type if output_type is None or output_type != AssetTypes.URI_FOLDER: msg = "output type {} need to be {} in task {}" - validation_result.append_error(message=msg.format(output_type, - AssetTypes.URI_FOLDER, - DataTransferTaskType.COPY_DATA), - yaml_path="outputs") + validation_result.append_error( + message=msg.format(output_type, AssetTypes.URI_FOLDER, DataTransferTaskType.COPY_DATA), + yaml_path="outputs", + ) else: msg = "Inputs must be set in task {}." validation_result.append_error(message=msg.format(DataTransferTaskType.COPY_DATA), yaml_path="outputs") @@ -215,7 +220,7 @@ def __init__( **kwargs, ): - outputs = outputs or {'sink': Output(type=AssetTypes.MLTABLE)} + outputs = outputs or {"sink": Output(type=AssetTypes.MLTABLE)} super().__init__( task=task, outputs=outputs, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/pipeline_component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/pipeline_component.py index 450b14f0f005..2672000e2143 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/pipeline_component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/pipeline_component.py @@ -307,16 +307,19 @@ def _get_anonymous_hash(self) -> str: # command component), so we just use rest object to generate hash for pipeline component, # which doesn't have reuse issue. component_interface_dict = self._to_rest_object().properties.component_spec - hash_value = hash_dict(component_interface_dict, keys_to_omit=[ - # omit name since anonymous component will have same name - "name", - # omit _source since it doesn't impact component's uniqueness - "_source", - # omit id since it will be set after component is registered - "id", - # omit version since it will be set to this hash later - "version" - ]) + hash_value = hash_dict( + component_interface_dict, + keys_to_omit=[ + # omit name since anonymous component will have same name + "name", + # omit _source since it doesn't impact component's uniqueness + "_source", + # omit id since it will be set after component is registered + "id", + # omit version since it will be set to this hash later + "version", + ], + ) return hash_value def _get_flattened_inputs(self): @@ -354,7 +357,7 @@ def _resolve_sub_nodes(cls, rest_jobs): # TODO: Remove this ad-hoc fix after unified arm id format in object component_id = node.get("componentId", "") if isinstance(component_id, str) and re.match(ASSET_ARM_ID_REGEX_FORMAT, component_id): - node["componentId"] = component_id[len(ARM_ID_PREFIX):] + node["componentId"] = component_id[len(ARM_ID_PREFIX) :] if not LoopNode._is_loop_node_dict(node): # skip resolve LoopNode first since it may reference other nodes # use node factory instead of BaseNode._from_rest_object here as AutoMLJob is not a BaseNode diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/_custom_applications.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/_custom_applications.py index 626f0f344229..7f339fc1e576 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/_custom_applications.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/_custom_applications.py @@ -55,9 +55,7 @@ class EndpointsSettings: """ def __init__(self, *, target: int, published: int): - EndpointsSettings._validate_endpoint_settings( - target=target, published=published - ) + EndpointsSettings._validate_endpoint_settings(target=target, published=published) self.target = target self.published = published @@ -165,9 +163,7 @@ def _to_rest_object(self): environment_variables = None if self.environment_variables: environment_variables = { - name: RestEnvironmentVariable( - type=RestEnvironmentVariableType.LOCAL, value=value - ) + name: RestEnvironmentVariable(type=RestEnvironmentVariableType.LOCAL, value=value) for name, value in self.environment_variables.items() } @@ -181,11 +177,7 @@ def _to_rest_object(self): endpoints=endpoints, environment_variables=environment_variables, volumes=volumes, - docker=( - Docker(privileged=True) - if self.type == CustomApplicationDefaults.DOCKER - else None - ), + docker=(Docker(privileged=True) if self.type == CustomApplicationDefaults.DOCKER else None), additional_properties=self.additional_properties, ) @@ -228,9 +220,7 @@ def validate_custom_applications(custom_apps: List[CustomApplications]): error_category=ErrorCategory.USER_ERROR, ) - published_ports = [ - endpoint.published for app in custom_apps for endpoint in app.endpoints - ] + published_ports = [endpoint.published for app in custom_apps for endpoint in app.endpoints] if len(set(published_ports)) != len(published_ports): raise ValidationException( diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/compute_instance.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/compute_instance.py index 53ebd73d8f0e..dadc529e5d51 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/compute_instance.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/compute_instance.py @@ -243,9 +243,7 @@ def _to_rest_object(self) -> ComputeResource: subnet_resource = ResourceId(id=self.subnet) else: subnet_resource = None - if self.ssh_public_access_enabled and not ( - self.ssh_settings and self.ssh_settings.ssh_key_value - ): + if self.ssh_public_access_enabled and not (self.ssh_settings and self.ssh_settings.ssh_key_value): msg = "ssh_key_value is required when ssh_public_access_enabled = True." raise ValidationException( message=msg, @@ -259,9 +257,7 @@ def _to_rest_object(self) -> ComputeResource: admin_public_key=self.ssh_settings.ssh_key_value, ) if self.ssh_public_access_enabled is not None: - ssh_settings.ssh_public_access = ( - "Enabled" if self.ssh_public_access_enabled else "Disabled" - ) + ssh_settings.ssh_public_access = "Enabled" if self.ssh_public_access_enabled else "Disabled" else: ssh_settings.ssh_public_access = "NotSpecified" personal_compute_instance_settings = None @@ -292,12 +288,8 @@ def _to_rest_object(self) -> ComputeResource: idle_time_before_shutdown=idle_time_before_shutdown, enable_node_public_ip=self.enable_node_public_ip, ) - compute_instance_prop.schedules = ( - self.schedules._to_rest_object() if self.schedules else None - ) - compute_instance_prop.setup_scripts = ( - self.setup_scripts._to_rest_object() if self.setup_scripts else None - ) + compute_instance_prop.schedules = self.schedules._to_rest_object() if self.schedules else None + compute_instance_prop.setup_scripts = self.setup_scripts._to_rest_object() if self.setup_scripts else None if self.custom_applications: validate_custom_applications(self.custom_applications) compute_instance_prop.custom_services = [] @@ -311,9 +303,7 @@ def _to_rest_object(self) -> ComputeResource: return ComputeResource( location=self.location, properties=compute_instance, - identity=( - self.identity._to_compute_rest_object() if self.identity else None - ), + identity=(self.identity._to_compute_rest_object() if self.identity else None), tags=self.tags, ) @@ -361,12 +351,10 @@ def _load_from_rest(cls, rest_obj: ComputeResource) -> "ComputeInstance": network_settings = NetworkSettings( subnet=prop.properties.subnet.id if prop.properties.subnet else None, public_ip_address=prop.properties.connectivity_endpoints.public_ip_address - if prop.properties.connectivity_endpoints - and prop.properties.connectivity_endpoints.public_ip_address + if prop.properties.connectivity_endpoints and prop.properties.connectivity_endpoints.public_ip_address else None, private_ip_address=prop.properties.connectivity_endpoints.private_ip_address - if prop.properties.connectivity_endpoints - and prop.properties.connectivity_endpoints.private_ip_address + if prop.properties.connectivity_endpoints and prop.properties.connectivity_endpoints.private_ip_address else None, ) os_image_metadata = None @@ -376,12 +364,8 @@ def _load_from_rest(cls, rest_obj: ComputeResource) -> "ComputeInstance": is_latest_os_image_version=metadata.is_latest_os_image_version if metadata.is_latest_os_image_version is not None else None, - current_image_version=metadata.current_image_version - if metadata.current_image_version - else None, - latest_image_version=metadata.latest_image_version - if metadata.latest_image_version - else None, + current_image_version=metadata.current_image_version if metadata.current_image_version else None, + latest_image_version=metadata.latest_image_version if metadata.latest_image_version else None, ) idle_time_before_shutdown = None @@ -393,9 +377,7 @@ def _load_from_rest(cls, rest_obj: ComputeResource) -> "ComputeInstance": pattern=idle_time_before_shutdown_pattern, string=idle_time_before_shutdown, ) - idle_time_before_shutdown_minutes = ( - int(idle_time_match[1]) if idle_time_match else None - ) + idle_time_before_shutdown_minutes = int(idle_time_match[1]) if idle_time_match else None custom_applications = None if prop.properties and prop.properties.custom_services: custom_applications = [] @@ -425,23 +407,13 @@ def _load_from_rest(cls, rest_obj: ComputeResource) -> "ComputeInstance": create_on_behalf_of=create_on_behalf_of, network_settings=network_settings, ssh_settings=ssh_settings, - ssh_public_access_enabled=_ssh_public_access_to_bool( - prop.properties.ssh_settings.ssh_public_access - ) - if ( - prop.properties - and prop.properties.ssh_settings - and prop.properties.ssh_settings.ssh_public_access - ) + ssh_public_access_enabled=_ssh_public_access_to_bool(prop.properties.ssh_settings.ssh_public_access) + if (prop.properties and prop.properties.ssh_settings and prop.properties.ssh_settings.ssh_public_access) else None, schedules=ComputeSchedules._from_rest_object(prop.properties.schedules) - if prop.properties - and prop.properties.schedules - and prop.properties.schedules.compute_start_stop - else None, - identity=IdentityConfiguration._from_compute_rest_object(rest_obj.identity) - if rest_obj.identity + if prop.properties and prop.properties.schedules and prop.properties.schedules.compute_start_stop else None, + identity=IdentityConfiguration._from_compute_rest_object(rest_obj.identity) if rest_obj.identity else None, setup_scripts=SetupScripts._from_rest_object(prop.properties.setup_scripts) if prop.properties and prop.properties.setup_scripts else None, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/synapsespark_compute.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/synapsespark_compute.py index 233f4019e427..4a8349d240c7 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/synapsespark_compute.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/synapsespark_compute.py @@ -119,13 +119,7 @@ def __init__( **kwargs, ): kwargs[TYPE] = ComputeType.SYNAPSESPARK - super().__init__( - name=name, - description=description, - location=kwargs.pop("location", None), - tags=tags, - **kwargs - ) + super().__init__(name=name, description=description, location=kwargs.pop("location", None), tags=tags, **kwargs) self.identity = identity self.node_count = node_count self.node_family = node_family diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/destination.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/destination.py index 6121cfa6818f..a205e5540671 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/destination.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/destination.py @@ -20,10 +20,7 @@ class Destination: """ # pylint: disable=unused-argument,no-self-use - def __init__( - self, path: Optional[str] = None, - event_hub: Optional[EventHub] = None, - **kwargs): + def __init__(self, path: Optional[str] = None, event_hub: Optional[EventHub] = None, **kwargs): self.path = path self.event_hub = event_hub diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/event_hub.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/event_hub.py index dbb8977e1ef5..efb23b6e8414 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/event_hub.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/event_hub.py @@ -21,10 +21,7 @@ class EventHub: # pylint: disable=unused-argument,no-self-use def __init__( - self, - namespace: Optional[str] = None, - oversize_data_config: Optional[OversizeDataConfig] = None, - **kwargs + self, namespace: Optional[str] = None, oversize_data_config: Optional[OversizeDataConfig] = None, **kwargs ): self.namespace = namespace self.oversize_data_config = oversize_data_config diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/online_deployment.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/online_deployment.py index 799f8496eacc..7ac89d23cca3 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/online_deployment.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_deployment/online_deployment.py @@ -53,7 +53,7 @@ module_logger = logging.getLogger(__name__) -#pylint: disable=too-many-instance-attributes +# pylint: disable=too-many-instance-attributes class OnlineDeployment(Deployment): """Online endpoint deployment entity @@ -327,9 +327,8 @@ def _filter_datastore_to_rest_object(self): @classmethod def _filter_datastore_from_rest_object( - cls, - entity: "OnlineDeployment", - deployment: RestOnlineDeploymentDetails) -> "OnlineDeployment": + cls, entity: "OnlineDeployment", deployment: RestOnlineDeploymentDetails + ) -> "OnlineDeployment": # Data collector is private preview. If Private Preview environment variable is not enable # data collector will be removed from tags. Data Collector values will be stored in tags # until data collector is added to the contract. diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_endpoint/_endpoint_helpers.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_endpoint/_endpoint_helpers.py index fc00729ca245..4abbe98750d5 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_endpoint/_endpoint_helpers.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_endpoint/_endpoint_helpers.py @@ -15,10 +15,7 @@ def validate_endpoint_or_deployment_name(name: str, is_deployment: bool = False) """ type_str = "a deployment" if is_deployment else "an endpoint" target = ErrorTarget.DEPLOYMENT if is_deployment else ErrorTarget.ENDPOINT - if ( - len(name) < EndpointConfigurations.MIN_NAME_LENGTH - or len(name) > EndpointConfigurations.MAX_NAME_LENGTH - ): + if len(name) < EndpointConfigurations.MIN_NAME_LENGTH or len(name) > EndpointConfigurations.MAX_NAME_LENGTH: msg = f"The name for {type_str} must be at least 3 and at most 32 characters long (inclusive of both limits)." raise ValidationException( message=msg, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/__init__.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/__init__.py index fd1444cb7431..062c162cf470 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/__init__.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/__init__.py @@ -61,5 +61,13 @@ def some_pipeline( from .utils import _get_param_with_standard_annotation, is_group from .external_data import Database, FileSystem -__all__ = ["Input", "Output", "EnumInput", "GroupInput", "is_group", "_get_param_with_standard_annotation", - "Database", "FileSystem"] +__all__ = [ + "Input", + "Output", + "EnumInput", + "GroupInput", + "is_group", + "_get_param_with_standard_annotation", + "Database", + "FileSystem", +] diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/external_data.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/external_data.py index 1324a9dfbe71..10be493fefd2 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/external_data.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/external_data.py @@ -27,7 +27,7 @@ def __init__( *, name: Optional[str] = None, value: Optional[str] = None, - type: Optional[str] = None, # pylint: disable=redefined-builtin + type: Optional[str] = None, # pylint: disable=redefined-builtin ): self.type = type self.name = name @@ -141,6 +141,7 @@ class FileSystem(DictMixin, RestTranslatableMixin): # pylint: disable=too-many- :raises ~azure.ai.ml.exceptions.ValidationException: Raised if Source cannot be successfully validated. Details will be provided in the error message. """ + _EMPTY = Parameter.empty def __init__( diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/output.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/output.py index 5523ae8c06cd..c672c10f73c3 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/output.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_inputs_outputs/output.py @@ -88,8 +88,8 @@ def __init__(self, *, type=AssetTypes.URI_FOLDER, path=None, mode=None, descript super(Output, self).__init__(type=type) # As an annotation, it is not allowed to initialize the _port_name. self._port_name = None - self.name = kwargs.pop('name', None) - self.version = kwargs.pop('version', None) + self.name = kwargs.pop("name", None) + self.version = kwargs.pop("version", None) self._is_primitive_type = self.type in IOConstants.PRIMITIVE_STR_2_TYPE self.description = description self.path = path @@ -108,8 +108,7 @@ def _get_hint(self, new_line_style=False): def _to_dict(self): """Convert the Output object to a dict.""" - keys = ["name", "version", "path", "type", "mode", - "description", "is_control", "early_available"] + keys = ["name", "version", "path", "type", "mode", "description", "is_control", "early_available"] result = {key: getattr(self, key) for key in keys} return _remove_empty_values(result) @@ -142,6 +141,6 @@ def _assert_name_and_version(self): raise UserErrorException( f"The output name {self.name} can only contain alphanumeric characters, dashes and underscores, " f"with a limit of 255 characters." - ) + ) if self.version and not self.name: raise UserErrorException("Output name is required when output version is specified.") diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py index 8868b245f899..f8398c7bca93 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py @@ -9,12 +9,19 @@ from typing import Dict, Optional, Union from azure.ai.ml._restclient.v2022_10_01_preview.models import JobBase -from azure.ai.ml._schema.job.data_transfer_job import DataTransferCopyJobSchema, DataTransferImportJobSchema, \ - DataTransferExportJobSchema +from azure.ai.ml._schema.job.data_transfer_job import ( + DataTransferCopyJobSchema, + DataTransferImportJobSchema, + DataTransferExportJobSchema, +) from azure.ai.ml.constants import JobType from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE -from azure.ai.ml.constants._component import ExternalDataType, DataTransferBuiltinComponentUri, ComponentSource, \ - DataTransferTaskType +from azure.ai.ml.constants._component import ( + ExternalDataType, + DataTransferBuiltinComponentUri, + ComponentSource, + DataTransferTaskType, +) from azure.ai.ml.entities._inputs_outputs import Input, Output from azure.ai.ml.entities._util import load_from_dict from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException @@ -113,8 +120,9 @@ def _build_source_sink(cls, io_dict: Union[Dict, Database, FileSystem]): msg = "Source or sink only support type {} and {}, currently got {}." raise ValidationException( message=msg.format(ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, data_type), - no_personal_data_message=msg.format(ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, - "data_type"), + no_personal_data_message=msg.format( + ExternalDataType.DATABASE, ExternalDataType.FILE_SYSTEM, "data_type" + ), target=ErrorTarget.COMPONENT, error_category=ErrorCategory.USER_ERROR, error_type=ValidationErrorType.INVALID_VALUE, @@ -169,7 +177,7 @@ def _to_component(self, context: Optional[Dict] = None, **kwargs): inputs=self._to_inputs(inputs=self.inputs, pipeline_job_dict=pipeline_job_dict), outputs=self._to_outputs(outputs=self.outputs, pipeline_job_dict=pipeline_job_dict), task=self.task, - data_copy_mode=self.data_copy_mode + data_copy_mode=self.data_copy_mode, ) def _to_node(self, context: Optional[Dict] = None, **kwargs): diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/base.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/base.py index 182f3922a9cf..35ee44948877 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/base.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/base.py @@ -418,7 +418,7 @@ def name(self, name): raise UserErrorException( f"We support self._data of Input, Output, InputOutputBase, NodeOutput and NodeInput," f"but got type: {type(self._data)}." - ) + ) @property def version(self) -> str: @@ -440,7 +440,7 @@ def version(self, version): raise UserErrorException( f"We support self._data of Input, Output, InputOutputBase, NodeOutput and NodeInput," f"but got type: {type(self._data)}." - ) + ) @property def path(self) -> Optional[str]: @@ -470,7 +470,7 @@ def _assert_name_and_version(self): raise UserErrorException( f"The output name {self.name} can only contain alphanumeric characters, dashes and underscores, " f"with a limit of 255 characters." - ) + ) if self.version and not self.name: raise UserErrorException("Output name is required when output version is specified.") @@ -513,7 +513,7 @@ def _to_job_output(self): is_control=is_control, name=self.name, version=self.version, - description=self.description + description=self.description, ) else: msg = "Got unexpected type for output: {}." diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/mixin.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/mixin.py index bd4425da6eb7..4b4e50bcecb9 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/mixin.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_io/mixin.py @@ -207,10 +207,10 @@ def _to_rest_outputs(self) -> Dict[str, Dict]: rest_output_bindings[key].update({"version": binding["version"]}) def _rename_name_and_version(output_dict): - if 'asset_name' in output_dict.keys(): - output_dict['name'] = output_dict.pop('asset_name') - if 'asset_version' in output_dict.keys(): - output_dict['version'] = output_dict.pop('asset_version') + if "asset_name" in output_dict.keys(): + output_dict["name"] = output_dict.pop("asset_name") + if "asset_version" in output_dict.keys(): + output_dict["version"] = output_dict.pop("asset_version") return output_dict rest_data_outputs = {name: _rename_name_and_version(val.as_dict()) for name, val in rest_data_outputs.items()} diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_load_component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_load_component.py index fa76aadae2ba..648dda99d2fc 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_load_component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_load_component.py @@ -15,8 +15,17 @@ from azure.ai.ml.constants._compute import ComputeType from azure.ai.ml.dsl._component_func import to_component_func from azure.ai.ml.dsl._overrides_definition import OverrideDefinition -from azure.ai.ml.entities._builders import BaseNode, Command, Import, Parallel, Spark, Sweep, DataTransferCopy, \ - DataTransferImport, DataTransferExport +from azure.ai.ml.entities._builders import ( + BaseNode, + Command, + Import, + Parallel, + Spark, + Sweep, + DataTransferCopy, + DataTransferImport, + DataTransferExport, +) from azure.ai.ml.entities._builders.condition_node import ConditionNode from azure.ai.ml.entities._builders.control_flow_node import ControlFlowNode from azure.ai.ml.entities._builders.do_while import DoWhile diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py index cf41ee29b295..49f9d543b506 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_pipeline_job_helpers.py @@ -45,8 +45,8 @@ def process_sdk_component_job_io( if isinstance(io_value, (Input, Output)) and isinstance(io_value.path, str): mode = io_value.mode path = io_value.path - name = io_value.name if hasattr(io_value, 'name') else None - version = io_value.version if hasattr(io_value, 'version') else None + name = io_value.name if hasattr(io_value, "name") else None + version = io_value.version if hasattr(io_value, "version") else None if any([re.match(item, path) for item in io_binding_regex_list]): # Yaml syntax requires using ${{}} to enclose inputs and outputs bindings # io_bindings[io_name] = io_value @@ -127,7 +127,7 @@ def from_dict_to_rest_io( io_bindings[key].update({"mode": INPUT_MOUNT_MAPPING_FROM_REST[io_mode]}) # add name and version for binding input if io_name or io_version: - assert rest_object_class.__name__ == 'JobOutput' + assert rest_object_class.__name__ == "JobOutput" # current code only support dump name and version for JobOutput # this assert can be deleted if we need to dump name/version for JobInput if io_name: @@ -137,13 +137,13 @@ def from_dict_to_rest_io( if not io_mode and not io_name and not io_version: io_bindings[key] = io_value else: - if rest_object_class.__name__ == 'JobOutput': + if rest_object_class.__name__ == "JobOutput": # current code only support dump name and version for JobOutput # this condition can be deleted if we need to dump name/version for JobInput - if 'name' in val.keys(): - val['asset_name'] = val.pop('name') - if 'version' in val.keys(): - val['asset_version'] = val.pop('version') + if "name" in val.keys(): + val["asset_name"] = val.pop("name") + if "version" in val.keys(): + val["asset_version"] = val.pop("version") rest_obj = rest_object_class.from_dict(val) rest_io_objects[key] = rest_obj else: diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_schedule/trigger.py b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_schedule/trigger.py index 4386a388af19..1d406a4a5414 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_schedule/trigger.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/entities/_schedule/trigger.py @@ -8,10 +8,10 @@ from typing import List, Optional, Union from azure.ai.ml._restclient.v2022_12_01_preview.models import CronTrigger as RestCronTrigger -from azure.ai.ml._restclient.v2022_12_01_preview.models import RecurrenceSchedule as RestRecurrencePattern +from azure.ai.ml._restclient.v2022_12_01_preview.models import RecurrenceSchedule as RestRecurrencePattern from azure.ai.ml._restclient.v2022_12_01_preview.models import RecurrenceTrigger as RestRecurrenceTrigger -from azure.ai.ml._restclient.v2022_12_01_preview.models import TriggerBase as RestTriggerBase -from azure.ai.ml._restclient.v2022_12_01_preview.models import TriggerType as RestTriggerType +from azure.ai.ml._restclient.v2022_12_01_preview.models import TriggerBase as RestTriggerBase +from azure.ai.ml._restclient.v2022_12_01_preview.models import TriggerType as RestTriggerType from azure.ai.ml._utils.utils import camel_to_snake, snake_to_camel from azure.ai.ml.constants import TimeZone from azure.ai.ml.entities._mixins import RestTranslatableMixin diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/exceptions.py b/sdk/ml/azure-ai-ml/azure/ai/ml/exceptions.py index 658f2bcf09b3..43afd3ffd591 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/exceptions.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/exceptions.py @@ -487,7 +487,7 @@ def __init__( message, no_personal_data_message=None, error_category=ErrorCategory.USER_ERROR, - target: ErrorTarget = ErrorTarget.PIPELINE + target: ErrorTarget = ErrorTarget.PIPELINE, ): super().__init__( message=message, diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_batch_deployment_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_batch_deployment_operations.py index 777b5f856e7d..08be83c0cc9b 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_batch_deployment_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_batch_deployment_operations.py @@ -277,21 +277,23 @@ def _validate_component(self, deployment: Deployment, orchestrators: OperationOr deployment.job_definition.name = name deployment.job_definition.component_id = component_id elif isinstance(deployment.job_definition.job, str): - job_component = PipelineComponent(source_job_id= deployment.job_definition.job) + job_component = PipelineComponent(source_job_id=deployment.job_definition.job) component = self._component_operations.create_or_update( - name = job_component.name, + name=job_component.name, resource_group_name=self._resource_group_name, workspace_name=self._workspace_name, - body = job_component._to_rest_object(), - version= job_component.version, - **self._init_kwargs + body=job_component._to_rest_object(), + version=job_component.version, + **self._init_kwargs, ) if not deployment.job_definition.description and component.properties.description: deployment.job_definition.description = component.properties.description if not deployment.job_definition.tags and component.properties.tags: deployment.job_definition.tags = component.properties.tags # pylint: disable=line-too-long - if isinstance(deployment.job_definition.job, str) or isinstance(deployment.job_definition.component, PipelineComponent): + if isinstance(deployment.job_definition.job, str) or isinstance( + deployment.job_definition.component, PipelineComponent + ): deployment.job_definition.component = None deployment.job_definition.job = None deployment.job_definition.component_id = component.id diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py index 7c28f31773b7..2f43b265f2a3 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py @@ -477,9 +477,7 @@ def _try_resolve_environment_for_component(cls, component, _: str, resolver: Cal not isinstance(component.environment, dict) and not type(component.environment).__name__ == "InternalEnvironment" ): - component.environment = resolver( - component.environment, azureml_type=AzureMLResourceType.ENVIRONMENT - ) + component.environment = resolver(component.environment, azureml_type=AzureMLResourceType.ENVIRONMENT) def _resolve_arm_id_or_upload_dependencies(self, component: Component) -> None: if isinstance(component, AutoMLComponent): @@ -577,6 +575,7 @@ def _try_resolve_node_level_task_for_parallel_node(cls, node: BaseNode, _: str, The ideal solution should be done after PRS team decides how to handle parallel.task.code """ from azure.ai.ml.entities import Parallel, ParallelComponent + if not isinstance(node, Parallel): return component = node._component # pylint: disable=protected-access @@ -592,9 +591,7 @@ def _try_resolve_node_level_task_for_parallel_node(cls, node: BaseNode, _: str, ) node.task.code = component.code if node.task.environment: - node.task.environment = resolver( - component.environment, azureml_type=AzureMLResourceType.ENVIRONMENT - ) + node.task.environment = resolver(component.environment, azureml_type=AzureMLResourceType.ENVIRONMENT) @classmethod def _set_default_display_name_for_anonymous_component_in_node(cls, node: BaseNode, default_name: str): @@ -610,12 +607,12 @@ def _set_default_display_name_for_anonymous_component_in_node(cls, node: BaseNod # TODO: the same anonymous component with different node name will have different anonymous hash # as their display name will be different. if ( - isinstance(component, Component) - # check if component is anonymous and not created based on its id. We can't directly check - # node._component._is_anonymous as it will be set to True on component creation, - # which is later than this check - and not component.id - and not component.display_name + isinstance(component, Component) + # check if component is anonymous and not created based on its id. We can't directly check + # node._component._is_anonymous as it will be set to True on component creation, + # which is later than this check + and not component.id + and not component.display_name ): component.display_name = default_name @@ -634,11 +631,7 @@ def _try_resolve_compute_for_node(cls, node: BaseNode, _: str, resolver): node.compute_name = resolver(node.compute_name, azureml_type=AzureMLResourceType.COMPUTE) @classmethod - def _divide_nodes_to_resolve_into_layers( - cls, - component: PipelineComponent, - extra_operations: List[Callable] - ): + def _divide_nodes_to_resolve_into_layers(cls, component: PipelineComponent, extra_operations: List[Callable]): """Traverse the pipeline component and divide nodes to resolve into layers. For example, for below pipeline component, assuming that all nodes need to be resolved: A @@ -676,7 +669,7 @@ def _divide_nodes_to_resolve_into_layers( if isinstance(job_instance, BaseNode) and isinstance(job_instance._component, PipelineComponent): if cur_layer + 1 == len(layers): layers.append([]) - layers[cur_layer+1].extend(job_instance.component.jobs.items()) + layers[cur_layer + 1].extend(job_instance.component.jobs.items()) if cur_layer_head == len(layers[cur_layer]): cur_layer += 1 @@ -689,11 +682,7 @@ def _divide_nodes_to_resolve_into_layers( return layers def _resolve_dependencies_for_pipeline_component_jobs( - self, - component: Union[Component, str], - resolver: Callable, - *, - resolve_inputs: bool = True + self, component: Union[Component, str], resolver: Callable, *, resolve_inputs: bool = True ): """Resolve dependencies for pipeline component jobs. Will directly return if component is not a pipeline component. @@ -725,7 +714,7 @@ def _resolve_dependencies_for_pipeline_component_jobs( partial(self._try_resolve_environment_for_component, resolver=resolver), partial(self._try_resolve_compute_for_node, resolver=resolver), # should we resolve code here after we do extra operations concurrently? - ] + ], ) # cache anonymous component only for now diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_compute_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_compute_operations.py index 1ca829a67c80..c80487191f5d 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_compute_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_compute_operations.py @@ -115,7 +115,7 @@ def begin_create_or_update(self, compute: Compute) -> LROPoller[Compute]: module_logger.warning( "Warning: 'Location' is not supported for compute type %s and will not be used.", compute.type, - ) + ) compute.location = self._get_workspace_location() if not compute.location: diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py index c1d612695f86..5b704780c9e8 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py @@ -22,7 +22,9 @@ from azure.ai.ml._scope_dependent_operations import OperationConfig, OperationScope, _ScopeDependentOperations from azure.ai.ml._restclient.v2021_10_01_dataplanepreview import ( - AzureMachineLearningWorkspaces as ServiceClient102021Dataplane, ) + AzureMachineLearningWorkspaces as ServiceClient102021Dataplane, +) + # from azure.ai.ml._telemetry import ActivityType, monitor_with_activity from azure.ai.ml._utils._asset_utils import ( _archive_or_restore, @@ -62,13 +64,11 @@ class DataOperations(_ScopeDependentOperations): - def __init__( self, operation_scope: OperationScope, operation_config: OperationConfig, - service_client: Union[ServiceClient102022, - ServiceClient102021Dataplane], + service_client: Union[ServiceClient102022, ServiceClient102021Dataplane], datastore_operations: DatastoreOperations, **kwargs: Dict, ): @@ -103,65 +103,76 @@ def list( :rtype: ~azure.core.paging.ItemPaged[Data] """ if name: - return (self._operation.list( - name=name, + return ( + self._operation.list( + name=name, + registry_name=self._registry_name, + cls=lambda objs: [Data._from_rest_object(obj) for obj in objs], + list_view_type=list_view_type, + **self._scope_kwargs, + ) + if self._registry_name + else self._operation.list( + name=name, + workspace_name=self._workspace_name, + cls=lambda objs: [Data._from_rest_object(obj) for obj in objs], + list_view_type=list_view_type, + **self._scope_kwargs, + ) + ) + return ( + self._container_operation.list( registry_name=self._registry_name, - cls=lambda objs: [Data._from_rest_object(obj) for obj in objs], + cls=lambda objs: [Data._from_container_rest_object(obj) for obj in objs], list_view_type=list_view_type, **self._scope_kwargs, - ) if self._registry_name else self._operation.list( - name=name, + ) + if self._registry_name + else self._container_operation.list( workspace_name=self._workspace_name, - cls=lambda objs: [Data._from_rest_object(obj) for obj in objs], + cls=lambda objs: [Data._from_container_rest_object(obj) for obj in objs], list_view_type=list_view_type, **self._scope_kwargs, - )) - return (self._container_operation.list( - registry_name=self._registry_name, - cls=lambda objs: - [Data._from_container_rest_object(obj) for obj in objs], - list_view_type=list_view_type, - **self._scope_kwargs, - ) if self._registry_name else self._container_operation.list( - workspace_name=self._workspace_name, - cls=lambda objs: - [Data._from_container_rest_object(obj) for obj in objs], - list_view_type=list_view_type, - **self._scope_kwargs, - )) + ) + ) def _get(self, name: str, version: Optional[str] = None) -> Data: if version: - return (self._operation.get( + return ( + self._operation.get( + name=name, + version=version, + registry_name=self._registry_name, + **self._scope_kwargs, + **self._init_kwargs, + ) + if self._registry_name + else self._operation.get( + resource_group_name=self._resource_group_name, + workspace_name=self._workspace_name, + name=name, + version=version, + **self._init_kwargs, + ) + ) + return ( + self._container_operation.get( name=name, - version=version, registry_name=self._registry_name, **self._scope_kwargs, **self._init_kwargs, - ) if self._registry_name else self._operation.get( + ) + if self._registry_name + else self._container_operation.get( resource_group_name=self._resource_group_name, workspace_name=self._workspace_name, name=name, - version=version, **self._init_kwargs, - )) - return (self._container_operation.get( - name=name, - registry_name=self._registry_name, - **self._scope_kwargs, - **self._init_kwargs, - ) if self._registry_name else self._container_operation.get( - resource_group_name=self._resource_group_name, - workspace_name=self._workspace_name, - name=name, - **self._init_kwargs, - )) + ) + ) # @monitor_with_activity(logger, "Data.Get", ActivityType.PUBLICAPI) - def get(self, - name: str, - version: Optional[str] = None, - label: Optional[str] = None) -> Data: + def get(self, name: str, version: Optional[str] = None, label: Optional[str] = None) -> Data: """Get the specified data asset. :param name: Name of data asset. @@ -241,22 +252,18 @@ def create_or_update(self, data: Data) -> Data: version=version, resource_group=self._resource_group_name, registry=self._registry_name, - body=get_asset_body_for_registry_storage( - self._registry_name, "data", name, version), + body=get_asset_body_for_registry_storage(self._registry_name, "data", name, version), ) if not sas_uri: - module_logger.debug( - "Getting the existing asset name: %s, version: %s", - name, version) + module_logger.debug("Getting the existing asset name: %s, version: %s", name, version) return self.get(name=name, version=version) referenced_uris = self._validate(data) if referenced_uris: data._referenced_uris = referenced_uris - data, _ = _check_and_upload_path(artifact=data, - asset_operations=self, - sas_uri=sas_uri, - artifact_type=ErrorTarget.DATA) + data, _ = _check_and_upload_path( + artifact=data, asset_operations=self, sas_uri=sas_uri, artifact_type=ErrorTarget.DATA + ) data_version_resource = data._to_rest_object() auto_increment_version = data._auto_increment_version @@ -271,20 +278,23 @@ def create_or_update(self, data: Data) -> Data: **self._init_kwargs, ) else: - result = (self._operation.begin_create_or_update( - name=name, - version=version, - registry_name=self._registry_name, - body=data_version_resource, - **self._scope_kwargs, - ).result() if self._registry_name else - self._operation.create_or_update( - name=name, - version=version, - workspace_name=self._workspace_name, - body=data_version_resource, - **self._scope_kwargs, - )) + result = ( + self._operation.begin_create_or_update( + name=name, + version=version, + registry_name=self._registry_name, + body=data_version_resource, + **self._scope_kwargs, + ).result() + if self._registry_name + else self._operation.create_or_update( + name=name, + version=version, + workspace_name=self._workspace_name, + body=data_version_resource, + **self._scope_kwargs, + ) + ) if not result and self._registry_name: result = self._get(name=name, version=version) @@ -431,12 +441,8 @@ def _get_latest_version(self, name: str) -> Data: recently updated. """ latest_version = _get_latest_version_from_container( - name, - self._container_operation, - self._resource_group_name, - self._workspace_name, - self._registry_name - ) + name, self._container_operation, self._resource_group_name, self._workspace_name, self._registry_name + ) return self.get(name, version=latest_version) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_datastore_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_datastore_operations.py index 78fa5afe5cb1..8b66e83e986e 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_datastore_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_datastore_operations.py @@ -13,7 +13,7 @@ from azure.ai.ml._restclient.v2022_10_01.models import ( Datastore as DatastoreData, DatastoreSecrets, - NoneDatastoreCredentials + NoneDatastoreCredentials, ) from azure.ai.ml._scope_dependent_operations import OperationConfig, OperationScope, _ScopeDependentOperations @@ -41,8 +41,7 @@ def __init__( serviceclient_2022_10_01: ServiceClient2022_10_01, **kwargs: Dict ): - super(DatastoreOperations, self).__init__( - operation_scope, operation_config) + super(DatastoreOperations, self).__init__(operation_scope, operation_config) # ops_logger.update_info(kwargs) self._operation = serviceclient_2022_10_01.datastores self._credential = serviceclient_2022_10_01._config.credential @@ -66,8 +65,7 @@ def _list_helper(datastore_resource, include_secrets: bool): return self._operation.list( resource_group_name=self._operation_scope.resource_group_name, workspace_name=self._workspace_name, - cls=lambda objs: [_list_helper( - obj, include_secrets) for obj in objs], + cls=lambda objs: [_list_helper(obj, include_secrets) for obj in objs], **self._init_kwargs ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_online_deployment_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_online_deployment_operations.py index 1cc4c704e162..e7976c8be4e0 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_online_deployment_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_online_deployment_operations.py @@ -126,12 +126,12 @@ def begin_create_or_update( deployment=deployment, local_endpoint_mode=self._get_local_endpoint_mode(vscode_debug), ) - if (deployment and deployment.instance_type and deployment.instance_type.lower() in SmallSKUs): + if deployment and deployment.instance_type and deployment.instance_type.lower() in SmallSKUs: module_logger.warning( - "Instance type %s may be too small for compute resources. " # pylint: disable=line-too-long - "Minimum recommended compute SKU is Standard_DS3_v2 for general purpose endpoints. Learn more about SKUs here: " # pylint: disable=line-too-long + "Instance type %s may be too small for compute resources. " # pylint: disable=line-too-long + "Minimum recommended compute SKU is Standard_DS3_v2 for general purpose endpoints. Learn more about SKUs here: " # pylint: disable=line-too-long "https://learn.microsoft.com/en-us/azure/machine-learning/referencemanaged-online-endpoints-vm-sku-list", - deployment.instance_type # pylint: disable=line-too-long + deployment.instance_type, # pylint: disable=line-too-long ) if ( not skip_script_validation diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py index dba998f3c822..454e75e7aaa5 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py @@ -144,9 +144,10 @@ def get_asset_arm_id( if azureml_type == AzureMLResourceType.ENVIRONMENT: azureml_prefix = "azureml:" # return the same value if resolved result is passed in - _asset = asset[len(azureml_prefix):] if asset.startswith(azureml_prefix) else asset + _asset = asset[len(azureml_prefix) :] if asset.startswith(azureml_prefix) else asset if _asset.startswith(CURATED_ENV_PREFIX) or re.match( - REGISTRY_VERSION_PATTERN, f"{azureml_prefix}{_asset}"): + REGISTRY_VERSION_PATTERN, f"{azureml_prefix}{_asset}" + ): return f"{azureml_prefix}{_asset}" name, label = parse_name_label(asset) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_virtual_cluster_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_virtual_cluster_operations.py index a301d22fe3dc..be355b3a6b19 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_virtual_cluster_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_virtual_cluster_operations.py @@ -18,7 +18,7 @@ module_logger = ops_logger.module_logger -class VirtualClusterOperations(): +class VirtualClusterOperations: """VirtualClusterOperations. You should not instantiate this class directly. Instead, you should @@ -70,6 +70,9 @@ def get(self, name: str) -> Dict: :rtype: Dict """ - - return get_virtual_cluster_by_id(name=name, resource_group=self._resource_group_name, - subscription_id=self._subscription_id, credential=self._credentials) + return get_virtual_cluster_by_id( + name=name, + resource_group=self._resource_group_name, + subscription_id=self._subscription_id, + credential=self._credentials, + ) diff --git a/sdk/ml/azure-ai-ml/samples/ml_samples_authentication_sovereign_cloud.py b/sdk/ml/azure-ai-ml/samples/ml_samples_authentication_sovereign_cloud.py index bc4191028dae..436b9bb1d180 100644 --- a/sdk/ml/azure-ai-ml/samples/ml_samples_authentication_sovereign_cloud.py +++ b/sdk/ml/azure-ai-ml/samples/ml_samples_authentication_sovereign_cloud.py @@ -25,7 +25,6 @@ class MLClientSamples(object): - def ml_auth_azure_default_credential(self): # [START create_ml_client_default_credential] # Get a credential for authentication @@ -48,14 +47,14 @@ def ml_auth_azure_default_credential(self): # AzureAuthorityHosts.AZURE_CHINA or AzureAuthorityHosts.AZURE_GOVERNMENT # credential = DefaultAzureCredential(authority=AzureAuthorityHosts.AZURE_CHINA) credential = DefaultAzureCredential(authority=AzureAuthorityHosts.AZURE_PUBLIC_CLOUD) - + # When using sovereign domains (that is, any cloud other than AZURE_PUBLIC_CLOUD), # you must pass in the cloud name in kwargs. Default cloud is AzureCloud kwargs = {"cloud": "AzureCloud"} # get a handle to the subscription ml_client = MLClient(credential, subscription_id, resource_group, **kwargs) # [END create_ml_client_default_credential] - + from azure.ai.ml.entities import Workspace # Get a list of workspaces in a resource group @@ -63,6 +62,6 @@ def ml_auth_azure_default_credential(self): print(ws.name, ":", ws.location, ":", ws.description) -if __name__ == '__main__': +if __name__ == "__main__": sample = MLClientSamples() sample.ml_auth_azure_default_credential() diff --git a/sdk/ml/azure-ai-ml/scripts/regenerate_restclient.py b/sdk/ml/azure-ai-ml/scripts/regenerate_restclient.py index e36bb82b5965..2bf4e554ddf9 100644 --- a/sdk/ml/azure-ai-ml/scripts/regenerate_restclient.py +++ b/sdk/ml/azure-ai-ml/scripts/regenerate_restclient.py @@ -131,7 +131,6 @@ def regenerate_restclient(api_tag, verbose): ) - if __name__ == "__main__": parser = ArgumentParser() diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification.py index 0b428ef68426..4647c039590f 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification.py @@ -23,10 +23,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestAutoMLImageClassification(AzureRecordedTestCase): def _create_jsonl_multiclass(self, client, train_path, val_path): @@ -137,4 +134,6 @@ def test_image_classification_multiclass_run( assert_final_job_status(submitted_job_sweep, client, ImageClassificationJob, JobStatus.COMPLETED, deadline=3600) # Assert completion of Automode job - assert_final_job_status(submitted_job_automode, client, ImageClassificationJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_automode, client, ImageClassificationJob, JobStatus.COMPLETED, deadline=3600 + ) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification_multilabel.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification_multilabel.py index 53666586a977..e0743407c504 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification_multilabel.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_classification_multilabel.py @@ -27,10 +27,7 @@ "mock_asset_name", "mock_code_hash", ) -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestAutoMLImageClassificationMultilabel(AzureRecordedTestCase): def _create_jsonl_multilabel(self, client: MLClient, train_path: str, val_path: str): src_images = "./multilabelFridgeObjects/" @@ -144,7 +141,11 @@ def test_image_classification_multilabel_run( submitted_job_automode = client.jobs.create_or_update(image_classification_multilabel_job_automode) # Assert completion of regular sweep job - assert_final_job_status(submitted_job_sweep, client, ImageClassificationMultilabelJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_sweep, client, ImageClassificationMultilabelJob, JobStatus.COMPLETED, deadline=3600 + ) # Assert completion of Automode job - assert_final_job_status(submitted_job_automode, client, ImageClassificationMultilabelJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_automode, client, ImageClassificationMultilabelJob, JobStatus.COMPLETED, deadline=3600 + ) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_object_detection.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_object_detection.py index b244ebacfb49..99a4c17e767f 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_object_detection.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_object_detection.py @@ -23,10 +23,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestAutoMLImageObjectDetection(AzureRecordedTestCase): def _create_jsonl_object_detection(self, client, train_path, val_path): import xml.etree.ElementTree as ET @@ -168,7 +165,11 @@ def test_image_object_detection_run( submitted_job_automode = client.jobs.create_or_update(image_object_detection_job_automode) # Assert completion of regular sweep job - assert_final_job_status(submitted_job_sweep, client, ImageObjectDetectionJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_sweep, client, ImageObjectDetectionJob, JobStatus.COMPLETED, deadline=3600 + ) # Assert completion of Automode job - assert_final_job_status(submitted_job_automode, client, ImageObjectDetectionJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_automode, client, ImageObjectDetectionJob, JobStatus.COMPLETED, deadline=3600 + ) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_segmentation.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_segmentation.py index edcd100acc19..605dde03f616 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_segmentation.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_automl_image_segmentation.py @@ -24,7 +24,7 @@ @pytest.mark.usefixtures("recorded_test") @pytest.mark.skipif( condition=not is_live() or platform.python_implementation() == "PyPy", - reason="Datasets downloaded by test are too large to record reliably" + reason="Datasets downloaded by test are too large to record reliably", ) class TestAutoMLImageSegmentation(AzureRecordedTestCase): def _create_jsonl_segmentation(self, client, train_path, val_path): @@ -36,6 +36,7 @@ def _create_jsonl_segmentation(self, client, train_path, val_path): data_path_uri = client.data.create_or_update(fridge_data) import os + train_annotations_file = os.path.join(train_path, "train_annotations.jsonl") validation_annotations_file = os.path.join(val_path, "validation_annotations.jsonl") @@ -60,7 +61,7 @@ def _update_jsonl_path(self, remote_path, file_path): # Update image url json_line["image_url"] = remote_path + old_url[result + len(data_path) :] jsonl_file_write.write(json.dumps(json_line) + "\n") - + def test_image_segmentation_run(self, image_segmentation_dataset: Tuple[Input, Input], client: MLClient) -> None: # Note: this test launches two jobs in order to avoid calling the dataset fixture more than once. Ideally, it # would have sufficed to mark the fixture with session scope, but pytest-xdist breaks this functionality: @@ -118,7 +119,11 @@ def test_image_segmentation_run(self, image_segmentation_dataset: Tuple[Input, I submitted_job_automode = client.jobs.create_or_update(image_instance_segmentation_job_automode) # Assert completion of regular sweep job - assert_final_job_status(submitted_job_sweep, client, ImageInstanceSegmentationJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_sweep, client, ImageInstanceSegmentationJob, JobStatus.COMPLETED, deadline=3600 + ) # Assert completion of Automode job - assert_final_job_status(submitted_job_automode, client, ImageInstanceSegmentationJob, JobStatus.COMPLETED, deadline=3600) + assert_final_job_status( + submitted_job_automode, client, ImageInstanceSegmentationJob, JobStatus.COMPLETED, deadline=3600 + ) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_classification.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_classification.py index 8128606c50a0..7d6e27f3e50e 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_classification.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_classification.py @@ -1,4 +1,3 @@ - # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- @@ -18,10 +17,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestAutoMLClassification(AzureRecordedTestCase): def get_classification_task( self, dataset: Tuple[Input, Input, str], experiment_name: str, add_validation: bool = False diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_forecasting.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_forecasting.py index 8a4acc0d8821..1643a1b20543 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_forecasting.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_forecasting.py @@ -17,10 +17,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestAutoMLForecasting(AzureRecordedTestCase): def get_forecasting_task( self, diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_regression.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_regression.py index 6d67a7ca3316..9e6e77807cd7 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_regression.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_regression.py @@ -17,10 +17,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestAutoMLRegression(AzureRecordedTestCase): def get_regression_task( self, dataset: Tuple[Input, str], experiment_name: str, add_validation: bool = False diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification.py index 7b05d2bea898..24c5c38be9b4 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification.py @@ -17,10 +17,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestTextClassification(AzureRecordedTestCase): def test_remote_run_text_classification( self, diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification_multilabel.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification_multilabel.py index 738c6ada295c..26c2af9a03e0 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification_multilabel.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_classification_multilabel.py @@ -17,10 +17,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestTextClassificationMultilabel(AzureRecordedTestCase): def test_remote_run_text_classification_multilabel( self, paper_categorization: Tuple[Input, Input, str], client: MLClient diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_ner.py b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_ner.py index 2d0996cf32a3..4297815e08ca 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_ner.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/e2etests/test_remote_text_ner.py @@ -17,10 +17,7 @@ @pytest.mark.automl_test @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="Datasets downloaded by test are too large to record reliably" -) +@pytest.mark.skipif(condition=not is_live(), reason="Datasets downloaded by test are too large to record reliably") class TestTextNer(AzureRecordedTestCase): def test_remote_run_text_ner( self, diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_schema.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_schema.py index f56a6edf6787..3961b759102c 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_schema.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_schema.py @@ -164,8 +164,7 @@ def expected_image_search_space_settings() -> List[RestImageClassificationSearch learning_rate="uniform(0.005,0.05)", model_name="choice('vitb16r224','vits16r224')", number_of_epochs="choice(15,30)", - ams_gradient="choice(True,False)" - + ams_gradient="choice(True,False)", ), RestImageClassificationSearchSpace( learning_rate="uniform(0.005,0.05)", @@ -173,7 +172,7 @@ def expected_image_search_space_settings() -> List[RestImageClassificationSearch training_crop_size="choice(224,256)", validation_crop_size="choice(224,256)", validation_resize_size="choice(288,320,352)", - ams_gradient="False" + ams_gradient="False", ), ] @@ -691,13 +690,18 @@ def test_image_classification_schema_validation(self, tmp_path: Path): with pytest.raises(ValidationError, match="Value 'random_lr_scheduler1' passed is not in set"): load_job(test_yaml_path) - test_config_copy["search_space"][0]["learning_rate_scheduler"] = f"{camel_to_snake(LearningRateScheduler.WARMUP_COSINE)}" + test_config_copy["search_space"][0][ + "learning_rate_scheduler" + ] = f"{camel_to_snake(LearningRateScheduler.WARMUP_COSINE)}" dump_yaml_to_file(test_yaml_path, test_config_copy) assert isinstance(load_job(test_yaml_path), image_classification_job.ImageClassificationJob) test_config_copy["search_space"][0]["learning_rate_scheduler"] = { "type": "choice", - "values": [f"{camel_to_snake(LearningRateScheduler.WARMUP_COSINE)}", f"{camel_to_snake(LearningRateScheduler.STEP)}"], + "values": [ + f"{camel_to_snake(LearningRateScheduler.WARMUP_COSINE)}", + f"{camel_to_snake(LearningRateScheduler.STEP)}", + ], } dump_yaml_to_file(test_yaml_path, test_config_copy) assert isinstance(load_job(test_yaml_path), image_classification_job.ImageClassificationJob) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_sweep_setting.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_sweep_setting.py index e0fe5113e2cc..ad9ac47b18a6 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_sweep_setting.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_image_sweep_setting.py @@ -23,15 +23,15 @@ class TestImageSweepSettings: _EARLY_TERMINATION_POLICY_OPTIONS = [ - None, + None, EarlyTerminationPolicyType.BANDIT, - EarlyTerminationPolicyType.MEDIAN_STOPPING, - EarlyTerminationPolicyType.TRUNCATION_SELECTION + EarlyTerminationPolicyType.MEDIAN_STOPPING, + EarlyTerminationPolicyType.TRUNCATION_SELECTION, ] _SAMPLING_ALGORITHM_OPTIONS = [ - SamplingAlgorithmType.GRID, - SamplingAlgorithmType.BAYESIAN, - SamplingAlgorithmType.RANDOM + SamplingAlgorithmType.GRID, + SamplingAlgorithmType.BAYESIAN, + SamplingAlgorithmType.RANDOM, ] _EARLY_TERM_POLICY_AND_SAMPLING_ALG_OPTIONS = list( product(_EARLY_TERMINATION_POLICY_OPTIONS, _SAMPLING_ALGORITHM_OPTIONS) @@ -41,95 +41,82 @@ class TestImageSweepSettings: "early_termination_name,sampling_algorithm_name", _EARLY_TERM_POLICY_AND_SAMPLING_ALG_OPTIONS ) def test_image_sweep_settings_from_rest( - self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType, ) -> None: - image_sweep_settings_rest = self._get_rest_obj( - early_termination_name, sampling_algorithm_name - ) - expected_image_sweep_settings_obj = self._get_entity_obj( - early_termination_name, sampling_algorithm_name - ) + image_sweep_settings_rest = self._get_rest_obj(early_termination_name, sampling_algorithm_name) + expected_image_sweep_settings_obj = self._get_entity_obj(early_termination_name, sampling_algorithm_name) image_sweep_settings_obj = ImageSweepSettings._from_rest_object(image_sweep_settings_rest) - assert image_sweep_settings_obj == expected_image_sweep_settings_obj, \ - f"actual: {image_sweep_settings_obj}, expected: {expected_image_sweep_settings_obj}" + assert ( + image_sweep_settings_obj == expected_image_sweep_settings_obj + ), f"actual: {image_sweep_settings_obj}, expected: {expected_image_sweep_settings_obj}" @pytest.mark.parametrize( "early_termination_name,sampling_algorithm_name", _EARLY_TERM_POLICY_AND_SAMPLING_ALG_OPTIONS ) def test_image_sweep_settings_to_rest( - self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType, ) -> None: - image_sweep_settings_obj = self._get_entity_obj( - early_termination_name, sampling_algorithm_name - ) - expected_image_sweep_settings_rest = self._get_rest_obj( - early_termination_name, sampling_algorithm_name - ) + image_sweep_settings_obj = self._get_entity_obj(early_termination_name, sampling_algorithm_name) + expected_image_sweep_settings_rest = self._get_rest_obj(early_termination_name, sampling_algorithm_name) image_sweep_settings_rest = image_sweep_settings_obj._to_rest_object() - assert image_sweep_settings_rest == expected_image_sweep_settings_rest, \ - f"actual: {image_sweep_settings_rest}, expected: {expected_image_sweep_settings_rest}" + assert ( + image_sweep_settings_rest == expected_image_sweep_settings_rest + ), f"actual: {image_sweep_settings_rest}, expected: {expected_image_sweep_settings_rest}" @pytest.mark.parametrize( "early_termination_name,sampling_algorithm_name", _EARLY_TERM_POLICY_AND_SAMPLING_ALG_OPTIONS ) def test_equality( - self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType, ) -> None: image_sweep_settings_obj = self._get_entity_obj(early_termination_name, sampling_algorithm_name) # serialize and deserialize to ensure equality image_sweep_settings_rest = ImageSweepSettings._to_rest_object(image_sweep_settings_obj) image_sweep_settings_obj_2 = ImageSweepSettings._from_rest_object(image_sweep_settings_rest) - assert image_sweep_settings_obj == image_sweep_settings_obj_2, \ - f"actual: {image_sweep_settings_obj}, expected: {image_sweep_settings_obj_2}" + assert ( + image_sweep_settings_obj == image_sweep_settings_obj_2 + ), f"actual: {image_sweep_settings_obj}, expected: {image_sweep_settings_obj_2}" def _get_rest_obj( - self, + self, early_termination_name: Optional[EarlyTerminationPolicyType] = None, - sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID + sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID, ) -> RestImageSweepSettings: if early_termination_name == EarlyTerminationPolicyType.BANDIT: rest_early_termination_name = RestBanditPolicy(evaluation_interval=10, slack_factor=0.2) elif early_termination_name == EarlyTerminationPolicyType.MEDIAN_STOPPING: - rest_early_termination_name = RestMedianStoppingPolicy(delay_evaluation = 5, evaluation_interval = 1) + rest_early_termination_name = RestMedianStoppingPolicy(delay_evaluation=5, evaluation_interval=1) elif early_termination_name == EarlyTerminationPolicyType.TRUNCATION_SELECTION: rest_early_termination_name = RestTruncationSelectionPolicy( - evaluation_interval=1, - truncation_percentage=20, - delay_evaluation=5 + evaluation_interval=1, truncation_percentage=20, delay_evaluation=5 ) else: rest_early_termination_name = None - + return RestImageSweepSettings( - sampling_algorithm=sampling_algorithm_name, - early_termination=rest_early_termination_name + sampling_algorithm=sampling_algorithm_name, early_termination=rest_early_termination_name ) def _get_entity_obj( - self, + self, early_termination_name: Optional[EarlyTerminationPolicyType] = None, - sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID + sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID, ) -> ImageSweepSettings: if early_termination_name == EarlyTerminationPolicyType.BANDIT: early_termination_name = BanditPolicy(evaluation_interval=10, slack_factor=0.2) elif early_termination_name == EarlyTerminationPolicyType.MEDIAN_STOPPING: - early_termination_name = MedianStoppingPolicy(delay_evaluation = 5, evaluation_interval = 1) + early_termination_name = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=1) elif early_termination_name == EarlyTerminationPolicyType.TRUNCATION_SELECTION: early_termination_name = TruncationSelectionPolicy( - evaluation_interval=1, - truncation_percentage=20, - delay_evaluation=5 + evaluation_interval=1, truncation_percentage=20, delay_evaluation=5 ) else: early_termination_name = None - - return ImageSweepSettings( - sampling_algorithm=sampling_algorithm_name, - early_termination=early_termination_name - ) + + return ImageSweepSettings(sampling_algorithm=sampling_algorithm_name, early_termination=early_termination_name) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_schema.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_schema.py index 5d168db02449..6afe697a168d 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_schema.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_schema.py @@ -81,7 +81,7 @@ def nlp_sweep_settings_expected() -> RestNlpSweepSettings: early_termination=RestBanditPolicy( slack_amount=0.02, evaluation_interval=10, - ) + ), ) @@ -99,13 +99,13 @@ def nlp_search_space_expected() -> List[RestNlpParameterSubspace]: RestNlpParameterSubspace( model_name="choice('bert-base-cased','bert-base-uncased')", learning_rate="uniform(0.000005,0.00005)", - learning_rate_scheduler="choice('linear','cosine_with_restarts')" + learning_rate_scheduler="choice('linear','cosine_with_restarts')", ), RestNlpParameterSubspace( model_name="choice('roberta-base','roberta-large')", learning_rate="uniform(0.000002,0.000008)", - gradient_accumulation_steps="choice(1,2,3)" - ) + gradient_accumulation_steps="choice(1,2,3)", + ), ] @@ -245,9 +245,8 @@ def _get_rest_automl_job(automl_task, name, compute_id): @pytest.fixture def loaded_text_classification_job( - mock_machinelearning_client: OperationScope, - run_type: str, - tmp_path: Path) -> AutoMLJob: + mock_machinelearning_client: OperationScope, run_type: str, tmp_path: Path +) -> AutoMLJob: return _load_automl_job_from_path( mock_machinelearning_client, @@ -259,9 +258,8 @@ def loaded_text_classification_job( @pytest.fixture def loaded_text_classification_multilabel_job( - mock_machinelearning_client: OperationScope, - run_type: str, - tmp_path: Path) -> AutoMLJob: + mock_machinelearning_client: OperationScope, run_type: str, tmp_path: Path +) -> AutoMLJob: return _load_automl_job_from_path( mock_machinelearning_client, run_type, @@ -271,10 +269,7 @@ def loaded_text_classification_multilabel_job( @pytest.fixture -def loaded_text_ner_job( - mock_machinelearning_client: OperationScope, - run_type: str, - tmp_path: Path) -> AutoMLJob: +def loaded_text_ner_job(mock_machinelearning_client: OperationScope, run_type: str, tmp_path: Path) -> AutoMLJob: return _load_automl_job_from_path( mock_machinelearning_client, run_type, @@ -284,10 +279,8 @@ def loaded_text_ner_job( def _load_automl_job_from_path( - mock_machinelearning_client: OperationScope, - run_type: str, - tmp_path: Path, - schema_path: Path) -> AutoMLJob: + mock_machinelearning_client: OperationScope, run_type: str, tmp_path: Path, schema_path: Path +) -> AutoMLJob: test_config = load_yaml(schema_path) if run_type == "single": test_config["limits"]["max_trials"] = 1 diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_sweep_settings.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_sweep_settings.py index 53f9abd0262d..6fac96a39a5d 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_sweep_settings.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_automl_nlp_sweep_settings.py @@ -31,46 +31,49 @@ class TestNlpSweepSettings: EARLY_TERM_SAMPL_ALG_PAIRS = list(product(ALL_TERMINATION_POLICIES, ALL_SAMPLING_ALGORITHMS)) - @pytest.mark.parametrize( - "early_termination_name,sampling_algorithm_name", EARLY_TERM_SAMPL_ALG_PAIRS - ) - def test_nlp_sweep_settings_from_rest(self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType) -> None: + @pytest.mark.parametrize("early_termination_name,sampling_algorithm_name", EARLY_TERM_SAMPL_ALG_PAIRS) + def test_nlp_sweep_settings_from_rest( + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType, + ) -> None: nlp_sweep_settings_rest = self._get_rest_obj(early_termination_name, sampling_algorithm_name) expected_nlp_sweep_settings_obj = self._get_entity_obj(early_termination_name, sampling_algorithm_name) nlp_sweep_settings_obj = NlpSweepSettings._from_rest_object(nlp_sweep_settings_rest) - assert nlp_sweep_settings_obj == expected_nlp_sweep_settings_obj, \ - f"expected: {expected_nlp_sweep_settings_obj}, actual: {nlp_sweep_settings_obj}" + assert ( + nlp_sweep_settings_obj == expected_nlp_sweep_settings_obj + ), f"expected: {expected_nlp_sweep_settings_obj}, actual: {nlp_sweep_settings_obj}" - @pytest.mark.parametrize( - "early_termination_name,sampling_algorithm_name", EARLY_TERM_SAMPL_ALG_PAIRS - ) - def test_nlp_sweep_settings_to_rest(self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType) -> None: + @pytest.mark.parametrize("early_termination_name,sampling_algorithm_name", EARLY_TERM_SAMPL_ALG_PAIRS) + def test_nlp_sweep_settings_to_rest( + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType, + ) -> None: expected_nlp_sweep_settings_rest = self._get_rest_obj(early_termination_name, sampling_algorithm_name) nlp_sweep_settings_obj = self._get_entity_obj(early_termination_name, sampling_algorithm_name) nlp_sweep_settings_rest = nlp_sweep_settings_obj._to_rest_object() - assert nlp_sweep_settings_rest == expected_nlp_sweep_settings_rest, \ - f"expected: {expected_nlp_sweep_settings_rest}, actual: {nlp_sweep_settings_rest}" + assert ( + nlp_sweep_settings_rest == expected_nlp_sweep_settings_rest + ), f"expected: {expected_nlp_sweep_settings_rest}, actual: {nlp_sweep_settings_rest}" - @pytest.mark.parametrize( - "early_termination_name,sampling_algorithm_name", EARLY_TERM_SAMPL_ALG_PAIRS - ) - def test_nlp_sweep_settings_round_trip(self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType) -> None: + @pytest.mark.parametrize("early_termination_name,sampling_algorithm_name", EARLY_TERM_SAMPL_ALG_PAIRS) + def test_nlp_sweep_settings_round_trip( + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType, + ) -> None: expected_nlp_sweep_settings_obj = self._get_entity_obj(early_termination_name, sampling_algorithm_name) rest_sweep_settings_obj = expected_nlp_sweep_settings_obj._to_rest_object() round_trip_nlp_sweep_settings_obj = NlpSweepSettings._from_rest_object(rest_sweep_settings_obj) - assert round_trip_nlp_sweep_settings_obj == expected_nlp_sweep_settings_obj, \ - f"expected: {expected_nlp_sweep_settings_obj}, actual: {round_trip_nlp_sweep_settings_obj}" + assert ( + round_trip_nlp_sweep_settings_obj == expected_nlp_sweep_settings_obj + ), f"expected: {expected_nlp_sweep_settings_obj}, actual: {round_trip_nlp_sweep_settings_obj}" def _get_entity_obj( - self, - early_termination_name: Optional[EarlyTerminationPolicyType], - sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID + self, + early_termination_name: Optional[EarlyTerminationPolicyType], + sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID, ) -> NlpSweepSettings: early_termination_policy = None if early_termination_name == EarlyTerminationPolicyType.BANDIT: @@ -78,24 +81,25 @@ def _get_entity_obj( elif early_termination_name == EarlyTerminationPolicyType.MEDIAN_STOPPING: early_termination_policy = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=1) elif early_termination_name == EarlyTerminationPolicyType.TRUNCATION_SELECTION: - early_termination_policy = TruncationSelectionPolicy(evaluation_interval=1, - truncation_percentage=20, - delay_evaluation=5) - return NlpSweepSettings(early_termination=early_termination_policy, - sampling_algorithm=sampling_algorithm_name) + early_termination_policy = TruncationSelectionPolicy( + evaluation_interval=1, truncation_percentage=20, delay_evaluation=5 + ) + return NlpSweepSettings(early_termination=early_termination_policy, sampling_algorithm=sampling_algorithm_name) def _get_rest_obj( - self, - early_termination_name: Optional[EarlyTerminationPolicyType] = None, - sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID) -> RestNlpSweepSettings: + self, + early_termination_name: Optional[EarlyTerminationPolicyType] = None, + sampling_algorithm_name: SamplingAlgorithmType = SamplingAlgorithmType.GRID, + ) -> RestNlpSweepSettings: early_termination_policy = None if early_termination_name == EarlyTerminationPolicyType.BANDIT: early_termination_policy = RestBanditPolicy(evaluation_interval=10, slack_factor=0.2) elif early_termination_name == EarlyTerminationPolicyType.MEDIAN_STOPPING: early_termination_policy = RestMedianStoppingPolicy(delay_evaluation=5, evaluation_interval=1) elif early_termination_name == EarlyTerminationPolicyType.TRUNCATION_SELECTION: - early_termination_policy = RestTruncationSelectionPolicy(evaluation_interval=1, - truncation_percentage=20, - delay_evaluation=5) - return RestNlpSweepSettings(sampling_algorithm=sampling_algorithm_name, - early_termination=early_termination_policy) + early_termination_policy = RestTruncationSelectionPolicy( + evaluation_interval=1, truncation_percentage=20, delay_evaluation=5 + ) + return RestNlpSweepSettings( + sampling_algorithm=sampling_algorithm_name, early_termination=early_termination_policy + ) diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_job.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_job.py index 2cd9eb8fddc3..0a9916e4e13e 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_job.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_job.py @@ -81,16 +81,17 @@ def test_automl_nlp_text_classification_init(self, run_type): ) if run_type == "sweep": - job.set_limits(max_concurrent_trials=2, - max_trials=1, - timeout_minutes=30, - trial_timeout_minutes=10, - max_nodes=4) + job.set_limits( + max_concurrent_trials=2, max_trials=1, timeout_minutes=30, trial_timeout_minutes=10, max_nodes=4 + ) early_termination_policy = BanditPolicy(evaluation_interval=10, slack_amount=0.02) - job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=early_termination_policy) - job.extend_search_space([SearchSpace(model_name="bert-base-cased", learning_rate=Uniform(5e-6, 5e-5)), - SearchSpace(model_name="bert-large-cased", number_of_epochs=Choice([3, 4, 5]))]) + job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, early_termination=early_termination_policy) + job.extend_search_space( + [ + SearchSpace(model_name="bert-base-cased", learning_rate=Uniform(5e-6, 5e-5)), + SearchSpace(model_name="bert-large-cased", number_of_epochs=Choice([3, 4, 5])), + ] + ) else: job.set_limits(timeout_minutes=30) job.set_training_parameters(training_batch_size=16) @@ -180,21 +181,27 @@ def test_automl_nlp_text_classification_to_rest_object(self, run_type): primary_metric=primary_metric, log_verbosity=log_verbosity, ) - job.set_limits(max_concurrent_trials=max_concurrent_trials, - max_trials=max_trials, - max_nodes=max_nodes, - timeout_minutes=timeout) + job.set_limits( + max_concurrent_trials=max_concurrent_trials, + max_trials=max_trials, + max_nodes=max_nodes, + timeout_minutes=timeout, + ) job.set_featurization(dataset_language=dataset_language) job.set_training_parameters(weight_decay=0.01) rest_sweep = None rest_search_space = None if run_type == "sweep": - job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2)) + job.set_sweep( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) job.extend_search_space([SearchSpace(model_name=Choice(["bert-base-cased", "distilbert-base-cased"]))]) - rest_sweep = NlpSweepSettings(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2)) + rest_sweep = NlpSweepSettings( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) rest_search_space = [NlpParameterSubspace(model_name="choice('bert-base-cased','distilbert-base-cased')")] expected = TextClassification( @@ -207,7 +214,7 @@ def test_automl_nlp_text_classification_to_rest_object(self, run_type): max_concurrent_trials=max_concurrent_trials, max_trials=max_trials, max_nodes=max_nodes, - timeout=to_iso_duration_format_mins(timeout) + timeout=to_iso_duration_format_mins(timeout), ), fixed_parameters=NlpFixedParameters(weight_decay=0.01), sweep_settings=rest_sweep, @@ -265,23 +272,29 @@ def test_automl_nlp_text_classification_from_rest_object(self, run_type): tags={"foo_tag": "bar"}, identity=identity, ) - expected_job.set_limits(max_concurrent_trials=max_concurrent_trials, - max_trials=max_trials, - max_nodes=max_nodes, - timeout_minutes=timeout) + expected_job.set_limits( + max_concurrent_trials=max_concurrent_trials, + max_trials=max_trials, + max_nodes=max_nodes, + timeout_minutes=timeout, + ) expected_job.set_featurization(dataset_language=dataset_language) expected_job.set_training_parameters(weight_decay=0.01) rest_sweep = None rest_search_space = None if run_type == "sweep": - expected_job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2)) - expected_job.extend_search_space([SearchSpace(model_name=Choice(["bert-base-cased", - "distilbert-base-cased"]))]) - rest_sweep = NlpSweepSettings(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=RestBanditPolicy(slack_factor=0.2, - evaluation_interval=2)) + expected_job.set_sweep( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) + expected_job.extend_search_space( + [SearchSpace(model_name=Choice(["bert-base-cased", "distilbert-base-cased"]))] + ) + rest_sweep = NlpSweepSettings( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) rest_search_space = [NlpParameterSubspace(model_name="choice(bert-base-cased, distilbert-base-cased)")] task_details = TextClassification( @@ -299,7 +312,7 @@ def test_automl_nlp_text_classification_from_rest_object(self, run_type): featurization_settings=NlpVerticalFeaturizationSettings(dataset_language=dataset_language), fixed_parameters=NlpFixedParameters(weight_decay=0.01), sweep_settings=rest_sweep, - search_space=rest_search_space + search_space=rest_search_space, ) job_data = JobBase(properties=RestAutoMLJob(task_details=task_details, identity=identity._to_job_rest_object())) # Test converting REST object to Job diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_multilabel_job.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_multilabel_job.py index c2e9d78f3ab4..90d0e97e147e 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_multilabel_job.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_classification_multilabel_job.py @@ -82,16 +82,17 @@ def test_automl_nlp_text_classification_multilabel_init(self, run_type): ) if run_type == "sweep": - job.set_limits(max_concurrent_trials=2, - max_trials=1, - timeout_minutes=30, - trial_timeout_minutes=10, - max_nodes=4) + job.set_limits( + max_concurrent_trials=2, max_trials=1, timeout_minutes=30, trial_timeout_minutes=10, max_nodes=4 + ) early_termination_policy = BanditPolicy(evaluation_interval=10, slack_amount=0.02) - job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=early_termination_policy) - job.extend_search_space([SearchSpace(model_name="bert-base-cased", learning_rate=Uniform(5e-6, 5e-5)), - SearchSpace(model_name="bert-large-cased", number_of_epochs=Choice([3, 4, 5]))]) + job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, early_termination=early_termination_policy) + job.extend_search_space( + [ + SearchSpace(model_name="bert-base-cased", learning_rate=Uniform(5e-6, 5e-5)), + SearchSpace(model_name="bert-large-cased", number_of_epochs=Choice([3, 4, 5])), + ] + ) else: job.set_limits(timeout_minutes=30) job.set_training_parameters(training_batch_size=16) @@ -107,7 +108,7 @@ def test_automl_nlp_text_classification_multilabel_init(self, run_type): assert job.sweep.sampling_algorithm == SamplingAlgorithmType.GRID assert job.sweep.early_termination == early_termination_policy - assert job.search_space[0].model_name == 'bert-base-cased' + assert job.search_space[0].model_name == "bert-base-cased" assert job.search_space[0].learning_rate == Uniform(5e-6, 5e-5) assert job.search_space[1].model_name == "bert-large-cased" @@ -181,21 +182,27 @@ def test_automl_nlp_text_classification_multilabel_to_rest_object(self, run_type training_data=Input(type=AssetTypes.MLTABLE, path=training_data_uri), validation_data=Input(type=AssetTypes.MLTABLE, path=validation_data_uri), ) - job.set_limits(max_concurrent_trials=max_concurrent_trials, - max_trials=max_trials, - max_nodes=max_nodes, - timeout_minutes=timeout) + job.set_limits( + max_concurrent_trials=max_concurrent_trials, + max_trials=max_trials, + max_nodes=max_nodes, + timeout_minutes=timeout, + ) job.set_featurization(dataset_language=dataset_language) job.set_training_parameters(weight_decay=0.01) rest_sweep = None rest_search_space = None if run_type == "sweep": - job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2)) + job.set_sweep( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) job.extend_search_space([SearchSpace(model_name=Choice(["bert-base-cased", "distilbert-base-cased"]))]) - rest_sweep = NlpSweepSettings(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2)) + rest_sweep = NlpSweepSettings( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) rest_search_space = [NlpParameterSubspace(model_name="choice('bert-base-cased','distilbert-base-cased')")] expected = TextClassificationMultilabel( @@ -208,7 +215,7 @@ def test_automl_nlp_text_classification_multilabel_to_rest_object(self, run_type max_concurrent_trials=max_concurrent_trials, max_trials=max_trials, max_nodes=max_nodes, - timeout=to_iso_duration_format_mins(timeout) + timeout=to_iso_duration_format_mins(timeout), ), fixed_parameters=NlpFixedParameters(weight_decay=0.01), sweep_settings=rest_sweep, @@ -265,23 +272,29 @@ def test_automl_nlp_text_classification_multilabel_from_rest_object(self, run_ty tags={"foo_tag": "bar"}, identity=identity, ) - expected_job.set_limits(max_concurrent_trials=max_concurrent_trials, - max_trials=max_trials, - max_nodes=max_nodes, - timeout_minutes=timeout) + expected_job.set_limits( + max_concurrent_trials=max_concurrent_trials, + max_trials=max_trials, + max_nodes=max_nodes, + timeout_minutes=timeout, + ) expected_job.set_featurization(dataset_language=dataset_language) expected_job.set_training_parameters(weight_decay=0.01) rest_sweep = None rest_search_space = None if run_type == "sweep": - expected_job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2)) - expected_job.extend_search_space([SearchSpace(model_name=Choice(["bert-base-cased", - "distilbert-base-cased"]))]) - rest_sweep = NlpSweepSettings(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=RestBanditPolicy(slack_factor=0.2, - evaluation_interval=2)) + expected_job.set_sweep( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) + expected_job.extend_search_space( + [SearchSpace(model_name=Choice(["bert-base-cased", "distilbert-base-cased"]))] + ) + rest_sweep = NlpSweepSettings( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) rest_search_space = [NlpParameterSubspace(model_name="choice(bert-base-cased, distilbert-base-cased)")] task_details = TextClassificationMultilabel( @@ -293,12 +306,12 @@ def test_automl_nlp_text_classification_multilabel_from_rest_object(self, run_ty max_concurrent_trials=max_concurrent_trials, max_trials=max_trials, max_nodes=max_nodes, - timeout=to_iso_duration_format_mins(timeout) + timeout=to_iso_duration_format_mins(timeout), ), featurization_settings=NlpVerticalFeaturizationSettings(dataset_language=dataset_language), fixed_parameters=NlpFixedParameters(weight_decay=0.01), sweep_settings=rest_sweep, - search_space=rest_search_space + search_space=rest_search_space, ) job_data = JobBase(properties=RestAutoMLJob(task_details=task_details, identity=identity._to_job_rest_object())) # Test converting REST object to Job diff --git a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_ner_job.py b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_ner_job.py index 723a42eff9cc..f4b8e2edbeb6 100644 --- a/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_ner_job.py +++ b/sdk/ml/azure-ai-ml/tests/automl_job/unittests/test_text_ner_job.py @@ -74,16 +74,17 @@ def test_automl_nlp_text_ner_init(self, run_type): ) if run_type == "sweep": - job.set_limits(max_concurrent_trials=2, - max_trials=1, - timeout_minutes=30, - trial_timeout_minutes=10, - max_nodes=4) + job.set_limits( + max_concurrent_trials=2, max_trials=1, timeout_minutes=30, trial_timeout_minutes=10, max_nodes=4 + ) early_termination_policy = BanditPolicy(evaluation_interval=10, slack_amount=0.02) - job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=early_termination_policy) - job.extend_search_space([SearchSpace(model_name="bert-base-cased", learning_rate=Uniform(5e-6, 5e-5)), - SearchSpace(model_name="bert-large-cased", number_of_epochs=Choice([3, 4, 5]))]) + job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, early_termination=early_termination_policy) + job.extend_search_space( + [ + SearchSpace(model_name="bert-base-cased", learning_rate=Uniform(5e-6, 5e-5)), + SearchSpace(model_name="bert-large-cased", number_of_epochs=Choice([3, 4, 5])), + ] + ) else: job.set_limits(timeout_minutes=30) job.set_training_parameters(training_batch_size=16) @@ -169,21 +170,27 @@ def test_automl_nlp_text_ner_to_rest_object(self, run_type): log_verbosity=log_verbosity, identity=identity, ) - job.set_limits(max_concurrent_trials=max_concurrent_trials, - max_trials=max_trials, - max_nodes=max_nodes, - timeout_minutes=timeout) + job.set_limits( + max_concurrent_trials=max_concurrent_trials, + max_trials=max_trials, + max_nodes=max_nodes, + timeout_minutes=timeout, + ) job.set_featurization(dataset_language=dataset_language) job.set_training_parameters(weight_decay=0.01) rest_sweep = None rest_search_space = None if run_type == "sweep": - job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2)) + job.set_sweep( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) job.extend_search_space([SearchSpace(model_name=Choice(["bert-base-cased", "distilbert-base-cased"]))]) - rest_sweep = NlpSweepSettings(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2)) + rest_sweep = NlpSweepSettings( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) rest_search_space = [NlpParameterSubspace(model_name="choice('bert-base-cased','distilbert-base-cased')")] expected = TextNer( @@ -195,7 +202,7 @@ def test_automl_nlp_text_ner_to_rest_object(self, run_type): max_concurrent_trials=max_concurrent_trials, max_trials=max_trials, max_nodes=max_nodes, - timeout=to_iso_duration_format_mins(timeout) + timeout=to_iso_duration_format_mins(timeout), ), fixed_parameters=NlpFixedParameters(weight_decay=0.01), sweep_settings=rest_sweep, @@ -250,23 +257,29 @@ def test_automl_nlp_text_ner_from_rest_object(self, run_type): tags={"foo_tag": "bar"}, identity=identity, ) - expected_job.set_limits(max_concurrent_trials=max_concurrent_trials, - max_trials=max_trials, - max_nodes=max_nodes, - timeout_minutes=timeout) + expected_job.set_limits( + max_concurrent_trials=max_concurrent_trials, + max_trials=max_trials, + max_nodes=max_nodes, + timeout_minutes=timeout, + ) expected_job.set_featurization(dataset_language=dataset_language) expected_job.set_training_parameters(weight_decay=0.01) rest_sweep = None rest_search_space = None if run_type == "sweep": - expected_job.set_sweep(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2)) - expected_job.extend_search_space([SearchSpace(model_name=Choice(["bert-base-cased", - "distilbert-base-cased"]))]) - rest_sweep = NlpSweepSettings(sampling_algorithm=SamplingAlgorithmType.GRID, - early_termination=RestBanditPolicy(slack_factor=0.2, - evaluation_interval=2)) + expected_job.set_sweep( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=BanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) + expected_job.extend_search_space( + [SearchSpace(model_name=Choice(["bert-base-cased", "distilbert-base-cased"]))] + ) + rest_sweep = NlpSweepSettings( + sampling_algorithm=SamplingAlgorithmType.GRID, + early_termination=RestBanditPolicy(slack_factor=0.2, evaluation_interval=2), + ) rest_search_space = [NlpParameterSubspace(model_name="choice(bert-base-cased, distilbert-base-cased)")] task_details = TextNer( @@ -277,12 +290,12 @@ def test_automl_nlp_text_ner_from_rest_object(self, run_type): max_concurrent_trials=max_concurrent_trials, max_trials=max_trials, max_nodes=max_nodes, - timeout=to_iso_duration_format_mins(timeout) + timeout=to_iso_duration_format_mins(timeout), ), featurization_settings=NlpVerticalFeaturizationSettings(dataset_language=dataset_language), fixed_parameters=NlpFixedParameters(weight_decay=0.01), sweep_settings=rest_sweep, - search_space=rest_search_space + search_space=rest_search_space, ) job_data = JobBase(properties=RestAutoMLJob(task_details=task_details, identity=identity._to_job_rest_object())) # Test converting REST object to Job diff --git a/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_deployment.py b/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_deployment.py index cc011e66f886..cc3c07aa3f48 100644 --- a/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_deployment.py +++ b/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_deployment.py @@ -72,7 +72,13 @@ def test_batch_deployment(self, client: MLClient, data_with_2_versions: str) -> ) client.batch_endpoints.begin_delete(name=endpoint.name) - def test_batch_deployment_dependency_label_resolution(self, client: MLClient, randstr: Callable[[], str], rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str]) -> None: + def test_batch_deployment_dependency_label_resolution( + self, + client: MLClient, + randstr: Callable[[], str], + rand_batch_name: Callable[[], str], + rand_batch_deployment_name: Callable[[], str], + ) -> None: endpoint_yaml = "./tests/test_configs/endpoints/batch/batch_endpoint_mlflow_new.yaml" name = rand_batch_name("name") deployment_yaml = "./tests/test_configs/deployments/batch/batch_deployment_mlflow_new.yaml" @@ -129,7 +135,13 @@ def test_batch_deployment_dependency_label_resolution(self, client: MLClient, ra ) assert resolved_model.asset_name == model_name and resolved_model.asset_version == model_versions[-1] - def test_batch_job_download(self, client: MLClient, tmp_path: Path, rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str]) -> str: + def test_batch_job_download( + self, + client: MLClient, + tmp_path: Path, + rand_batch_name: Callable[[], str], + rand_batch_deployment_name: Callable[[], str], + ) -> str: endpoint_name = rand_batch_name("name") endpoint = load_batch_endpoint( "./tests/test_configs/endpoints/batch/batch_endpoint_mlflow_new.yaml", diff --git a/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_endpoint.py b/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_endpoint.py index ef9dcd7f390c..c2f9346d5d06 100644 --- a/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_endpoint.py +++ b/sdk/ml/azure-ai-ml/tests/batch_services/e2etests/test_batch_endpoint.py @@ -12,9 +12,7 @@ @pytest.mark.usefixtures("recorded_test") @pytest.mark.production_experiences_test class TestBatchEndpoint(AzureRecordedTestCase): - def test_batch_endpoint_create( - self, client: MLClient, rand_batch_name: Callable[[], str] - ) -> None: + def test_batch_endpoint_create(self, client: MLClient, rand_batch_name: Callable[[], str]) -> None: endpoint_yaml = "./tests/test_configs/endpoints/batch/batch_endpoint.yaml" name = rand_batch_name("name") # Bug in MFE that batch endpoint properties are not preserved, uncomment below after it's fixed in MFE @@ -41,9 +39,10 @@ def test_batch_endpoint_create( raise Exception(f"Batch endpoint {name} is supposed to be deleted.") - @pytest.mark.usefixtures("light_gbm_model") - def test_mlflow_batch_endpoint_create_and_update(self, client: MLClient, rand_batch_name: Callable[[], str]) -> None: + def test_mlflow_batch_endpoint_create_and_update( + self, client: MLClient, rand_batch_name: Callable[[], str] + ) -> None: # light_gbm_model fixture is not used directly, but it makes sure the model being used by the batch endpoint exists endpoint_yaml = "./tests/test_configs/endpoints/batch/batch_endpoint_mlflow.yaml" @@ -68,8 +67,9 @@ def test_mlflow_batch_endpoint_create_and_update(self, client: MLClient, rand_ba raise Exception(f"Batch endpoint {name} is supposed to be deleted.") - - def test_batch_invoke(self, client: MLClient, rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str]) -> None: + def test_batch_invoke( + self, client: MLClient, rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str] + ) -> None: endpoint_yaml = "./tests/test_configs/endpoints/batch/simple_batch_endpoint.yaml" endpoint_name = rand_batch_name("endpoint_name") endpoint = load_batch_endpoint(endpoint_yaml) @@ -90,29 +90,27 @@ def test_batch_invoke(self, client: MLClient, rand_batch_name: Callable[[], str] # Invoke using inputs: Dict[str, Input] input_1 = Input( type="uri_folder", - path='https://pipelinedata.blob.core.windows.net/sampledata/mnist', + path="https://pipelinedata.blob.core.windows.net/sampledata/mnist", ) batchjob = client.batch_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - inputs = {"input1": input_1} + endpoint_name=endpoint_name, deployment_name=deployment_name, inputs={"input1": input_1} ) assert batchjob # Invoke using deprecated input: Input batchjob_input = client.batch_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - input = input_1 + endpoint_name=endpoint_name, deployment_name=deployment_name, input=input_1 ) assert batchjob_input @pytest.mark.skipif( condition=not is_live(), - reason="Update operation is not valid. If we use the same endpoint/deployment this will throw an error" + reason="Update operation is not valid. If we use the same endpoint/deployment this will throw an error", ) - def test_batch_component(self, client: MLClient, rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str]) -> None: + def test_batch_component( + self, client: MLClient, rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str] + ) -> None: endpoint_yaml = "./tests/test_configs/endpoints/batch/batch_endpoint_deployment_component.yaml" endpoint_name = rand_batch_name("endpoint_name") endpoint = load_batch_endpoint(endpoint_yaml) @@ -130,21 +128,27 @@ def test_batch_component(self, client: MLClient, rand_batch_name: Callable[[], s endpoint = client.batch_endpoints.begin_create_or_update(endpoint).result() # create a deployment client.batch_deployments.begin_create_or_update(deployment).result() - + # Batch endpoint invoke using different supported inputs inputs_dict = { "input_1": Input(path="azureml:list_data_v2_test:2", type="uri_folder"), - "input_2": Input(path="azureml:list_data_v2_test:2", type="uri_folder") + "input_2": Input(path="azureml:list_data_v2_test:2", type="uri_folder"), } job = client.batch_endpoints.invoke( endpoint_name=endpoint.name, deployment_name=deployment.name, - inputs = inputs_dict, + inputs=inputs_dict, ) assert job - def test_batch_invoke_outputs(self, client: MLClient, rand_batch_name: Callable[[], str], rand_batch_deployment_name: Callable[[], str], randstr: Callable[[str], str]) -> None: + def test_batch_invoke_outputs( + self, + client: MLClient, + rand_batch_name: Callable[[], str], + rand_batch_deployment_name: Callable[[], str], + randstr: Callable[[str], str], + ) -> None: endpoint_yaml = "./tests/test_configs/endpoints/batch/simple_batch_endpoint.yaml" endpoint_name = rand_batch_name("endpoint_name") endpoint = load_batch_endpoint(endpoint_yaml) @@ -165,23 +169,25 @@ def test_batch_invoke_outputs(self, client: MLClient, rand_batch_name: Callable[ # Invoke using inputs: Dict[str, Input] input_1 = Input( type="uri_folder", - path='https://pipelinedata.blob.core.windows.net/sampledata/mnist', + path="https://pipelinedata.blob.core.windows.net/sampledata/mnist", ) # Invoke using outputs: Dict[str, Output] output_file_name = randstr("output_file") output_1 = Output( type="uri_file", - path='azureml://datastores/workspaceblobstore/paths/batchendpointinvoke/mnistOutput/' + output_file_name +'.csv', + path="azureml://datastores/workspaceblobstore/paths/batchendpointinvoke/mnistOutput/" + + output_file_name + + ".csv", ) batchjob = client.batch_endpoints.invoke( endpoint_name=endpoint_name, deployment_name=deployment_name, - inputs = {"input1": input_1}, - outputs = {"output": output_1} + inputs={"input1": input_1}, + outputs={"output": output_1}, ) assert batchjob delete_res = client.batch_endpoints.begin_delete(name=endpoint_name) - delete_res = delete_res.result() \ No newline at end of file + delete_res = delete_res.result() diff --git a/sdk/ml/azure-ai-ml/tests/code_asset/e2etests/test_code.py b/sdk/ml/azure-ai-ml/tests/code_asset/e2etests/test_code.py index 4640032df6a5..205a34573749 100644 --- a/sdk/ml/azure-ai-ml/tests/code_asset/e2etests/test_code.py +++ b/sdk/ml/azure-ai-ml/tests/code_asset/e2etests/test_code.py @@ -56,10 +56,7 @@ def test_asset_path_update( code_entity.path = code_asset_path client._code.create_or_update(code_entity) - @pytest.mark.skipif( - condition=not is_live(), - reason="registry tests do not record properly. Investigate later." - ) + @pytest.mark.skipif(condition=not is_live(), reason="registry tests do not record properly. Investigate later.") def test_create_and_get_from_registry( self, registry_client: MLClient, diff --git a/sdk/ml/azure-ai-ml/tests/command_job/e2etests/test_command_job.py b/sdk/ml/azure-ai-ml/tests/command_job/e2etests/test_command_job.py index 86b389dc9f81..892d336b2f80 100644 --- a/sdk/ml/azure-ai-ml/tests/command_job/e2etests/test_command_job.py +++ b/sdk/ml/azure-ai-ml/tests/command_job/e2etests/test_command_job.py @@ -420,18 +420,16 @@ def test_command_job_parsing_error(self, randstr: Callable[[], str]) -> None: assert "Error while parsing yaml file" in e.value.message @pytest.mark.e2etest - def test_command_job_register_output( - self, randstr: Callable[[str], str], client: MLClient - ) -> None: + def test_command_job_register_output(self, randstr: Callable[[str], str], client: MLClient) -> None: job: CommandJob = load_job( source="./tests/test_configs/command_job/command_job_register_output.yml", params_override=[{"name": randstr("job_name")}], ) job = client.jobs.create_or_update(job=job) - assert job.outputs.test2.name == 'test2_output' - assert job.outputs.test2.version == '2' - assert job.outputs.test3.name == 'test3_output' - assert job.outputs.test3.version == '3' + assert job.outputs.test2.name == "test2_output" + assert job.outputs.test2.version == "2" + assert job.outputs.test3.name == "test3_output" + assert job.outputs.test3.version == "3" def check_tid_in_url(client: MLClient, job: Job) -> None: diff --git a/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py b/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py index a13505f1c619..036bfd5bf214 100644 --- a/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py +++ b/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py @@ -81,6 +81,7 @@ def create_tensorflow_distribution(has_strs: bool = False): return create_tensorflow_distribution + # previous bodiless_matcher fixture doesn't take effect because of typo, please add it in method level if needed @@ -143,7 +144,7 @@ def test_command_component(self, client: MLClient, randstr: Callable[[str], str] "optional": True, "type": "number", }, - "component_in_path": {"description": "A path", "type": "uri_folder", 'optional': False}, + "component_in_path": {"description": "A path", "type": "uri_folder", "optional": False}, }, "is_deterministic": True, "outputs": {"component_out_path": {"type": "uri_folder"}}, @@ -169,11 +170,17 @@ def test_parallel_component(self, client: MLClient, randstr: Callable[[str], str "error_threshold": 10, "input_data": "${{inputs.score_input}}", "inputs": { - "label": {"description": "Other reference data for batch scoring, " "e.g. labels.", - "type": "uri_file", 'optional': False}, - "score_input": {"description": "The data to be split and scored in " "parallel.", "type": "mltable", - 'optional': False}, - "score_model": {"description": "The model for batch score.", "type": "custom_model", 'optional': False}, + "label": { + "description": "Other reference data for batch scoring, " "e.g. labels.", + "type": "uri_file", + "optional": False, + }, + "score_input": { + "description": "The data to be split and scored in " "parallel.", + "type": "mltable", + "optional": False, + }, + "score_model": {"description": "The model for batch score.", "type": "custom_model", "optional": False}, }, "is_deterministic": True, "max_concurrency_per_instance": 12, @@ -232,21 +239,23 @@ def test_automl_component(self, client: MLClient, registry_client: MLClient, ran def test_spark_component(self, client: MLClient, randstr: Callable[[], str]) -> None: expected_dict = { - '$schema': 'https://azuremlschemas.azureedge.net/latest/sparkComponent.schema.json', - 'args': '--file_input ${{inputs.file_input}} --output ${{outputs.output}}', - 'conf': {'spark.driver.cores': 2, - 'spark.driver.memory': '1g', - 'spark.executor.cores': 1, - 'spark.executor.instances': 1, - 'spark.executor.memory': '1g'}, - 'description': 'Aml Spark dataset test module', - 'display_name': 'Aml Spark dataset test module', - 'entry': {'file': 'kmeans_example.py'}, - 'inputs': {'file_input': {'type': 'uri_file', 'optional': False}}, - 'is_deterministic': True, - 'outputs': {'output': {'type': 'uri_folder'}}, - 'type': 'spark', - 'version': '1' + "$schema": "https://azuremlschemas.azureedge.net/latest/sparkComponent.schema.json", + "args": "--file_input ${{inputs.file_input}} --output ${{outputs.output}}", + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "description": "Aml Spark dataset test module", + "display_name": "Aml Spark dataset test module", + "entry": {"file": "kmeans_example.py"}, + "inputs": {"file_input": {"type": "uri_file", "optional": False}}, + "is_deterministic": True, + "outputs": {"output": {"type": "uri_folder"}}, + "type": "spark", + "version": "1", } assert_component_basic_workflow( client=client, @@ -263,10 +272,10 @@ def test_datatransfer_copy_urifolder_component(self, client: MLClient, randstr: "display_name": "Data Transfer Component copy-files", "type": "data_transfer", "task": "copy_data", - 'inputs': {'folder1': {'type': 'uri_folder', 'optional': False}}, - 'outputs': {'output_folder': {'type': 'uri_folder'}}, - 'is_deterministic': True, - 'version': '1' + "inputs": {"folder1": {"type": "uri_folder", "optional": False}}, + "outputs": {"output_folder": {"type": "uri_folder"}}, + "is_deterministic": True, + "version": "1", } assert_component_basic_workflow( client=client, @@ -283,10 +292,10 @@ def test_datatransfer_copy_urifile_component(self, client: MLClient, randstr: Ca "display_name": "Data Transfer Component copy uri files", "type": "data_transfer", "task": "copy_data", - 'inputs': {'folder1': {'type': 'uri_file', 'optional': False}}, - 'outputs': {'output_folder': {'type': 'uri_file'}}, - 'is_deterministic': True, - 'version': '1' + "inputs": {"folder1": {"type": "uri_file", "optional": False}}, + "outputs": {"output_folder": {"type": "uri_file"}}, + "is_deterministic": True, + "version": "1", } assert_component_basic_workflow( client=client, @@ -303,11 +312,13 @@ def test_datatransfer_copy_2urifolder_component(self, client: MLClient, randstr: "display_name": "Data Transfer Component merge-files", "type": "data_transfer", "task": "copy_data", - 'inputs': {'folder1': {'type': 'uri_folder', 'optional': False}, - 'folder2': {'type': 'uri_folder', 'optional': False}}, - 'outputs': {'output_folder': {'type': 'uri_folder'}}, - 'is_deterministic': True, - 'version': '1' + "inputs": { + "folder1": {"type": "uri_folder", "optional": False}, + "folder2": {"type": "uri_folder", "optional": False}, + }, + "outputs": {"output_folder": {"type": "uri_folder"}}, + "is_deterministic": True, + "version": "1", } assert_component_basic_workflow( client=client, @@ -324,12 +335,14 @@ def test_datatransfer_copy_mixtype_component(self, client: MLClient, randstr: Ca "display_name": "Data Transfer Component merge mix type files", "type": "data_transfer", "task": "copy_data", - 'inputs': {'input1': {'type': 'uri_file', 'optional': False}, - 'input2': {'type': 'uri_file', 'optional': False}, - 'input3': {'type': 'mltable', 'optional': False}}, - 'outputs': {'output_folder': {'type': 'uri_folder'}}, - 'is_deterministic': True, - 'version': '1' + "inputs": { + "input1": {"type": "uri_file", "optional": False}, + "input2": {"type": "uri_file", "optional": False}, + "input3": {"type": "mltable", "optional": False}, + }, + "outputs": {"output_folder": {"type": "uri_folder"}}, + "is_deterministic": True, + "version": "1", } assert_component_basic_workflow( client=client, @@ -367,8 +380,9 @@ def test_command_component_create_input_output_types( # server side will remove \n from the code now. Skip them given it's not targeted to check in this test # server side will return optional False for optional None input omit_fields = ["id", "command", "environment", "inputs.*.optional"] - assert omit_with_wildcard(component_entity._to_dict(), *omit_fields) == \ - omit_with_wildcard(target_entity._to_dict(), *omit_fields) + assert omit_with_wildcard(component_entity._to_dict(), *omit_fields) == omit_with_wildcard( + target_entity._to_dict(), *omit_fields + ) def test_command_component_with_code(self, client: MLClient, randstr: Callable[[str], str]) -> None: component_name = randstr("component_name") @@ -422,7 +436,7 @@ def test_component_update(self, client: MLClient, randstr: Callable[[str], str]) "creation_context", "resources", "id", - "inputs.component_in_path.optional" # backend will return component inputs as optional:False + "inputs.component_in_path.optional", # backend will return component inputs as optional:False ) expected_dict = pydash.omit( dict(target_entity._to_dict()), @@ -806,9 +820,7 @@ def test_simple_pipeline_component_create(self, client: MLClient, randstr: Calla ) # Assert binding on compute not changed after resolve dependencies client.components._resolve_dependencies_for_pipeline_component_jobs( - component, - resolver=client.components._orchestrators.get_asset_arm_id, - resolve_inputs=False + component, resolver=client.components._orchestrators.get_asset_arm_id, resolve_inputs=False ) assert component.jobs["component_a_job"].compute == "${{parent.inputs.node_compute}}" # Assert E2E @@ -884,8 +896,11 @@ def test_helloworld_nested_pipeline_component(self, client: MLClient, randstr: C "display_name": "Hello World Pipeline Component", "is_deterministic": False, "inputs": { - "component_in_path": {"type": "uri_folder", "description": "A path for pipeline component", - "optional": False}, + "component_in_path": { + "type": "uri_folder", + "description": "A path for pipeline component", + "optional": False, + }, "component_in_number": { "type": "number", "optional": True, @@ -943,25 +958,28 @@ def test_command_component_with_properties_e2e_flow(self, client: MLClient, rand source="./tests/test_configs/components/helloworld_component_with_properties.yml", ) expected_dict = { - '$schema': 'https://azuremlschemas.azureedge.net/development/commandComponent.schema.json', - '_source': 'YAML.COMPONENT', - 'command': 'echo Hello World & echo $[[${{inputs.component_in_number}}]] & ' - 'echo ${{inputs.component_in_path}} & echo ' - '${{outputs.component_out_path}} > ' - '${{outputs.component_out_path}}/component_in_number', - 'description': 'This is the basic command component', - 'display_name': 'CommandComponentBasic', - 'inputs': {'component_in_number': {'default': '10.99', - 'description': 'A number', - 'optional': True, - 'type': 'number'}, - 'component_in_path': {'description': 'A path', - 'type': 'uri_folder'}}, - 'is_deterministic': True, - 'outputs': {'component_out_path': {'type': 'uri_folder'}}, - 'properties': {'azureml.pipelines.dynamic': 'true'}, - 'tags': {'owner': 'sdkteam', 'tag': 'tagvalue'}, - 'type': 'command', + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json", + "_source": "YAML.COMPONENT", + "command": "echo Hello World & echo $[[${{inputs.component_in_number}}]] & " + "echo ${{inputs.component_in_path}} & echo " + "${{outputs.component_out_path}} > " + "${{outputs.component_out_path}}/component_in_number", + "description": "This is the basic command component", + "display_name": "CommandComponentBasic", + "inputs": { + "component_in_number": { + "default": "10.99", + "description": "A number", + "optional": True, + "type": "number", + }, + "component_in_path": {"description": "A path", "type": "uri_folder"}, + }, + "is_deterministic": True, + "outputs": {"component_out_path": {"type": "uri_folder"}}, + "properties": {"azureml.pipelines.dynamic": "true"}, + "tags": {"owner": "sdkteam", "tag": "tagvalue"}, + "type": "command", } omit_fields = ["name", "creation_context", "id", "code", "environment", "version"] rest_component = pydash.omit( diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_command_component_entity.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_command_component_entity.py index 960e09169be9..3704a1131533 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_command_component_entity.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_command_component_entity.py @@ -384,13 +384,19 @@ def test_sweep_early_termination_setter(self): sampling_algorithm="random", ) sweep_job1.early_termination = { - 'type': "bandit", 'evaluation_interval': 100, 'delay_evaluation': 200, 'slack_factor': 40.0 - } + "type": "bandit", + "evaluation_interval": 100, + "delay_evaluation": 200, + "slack_factor": 40.0, + } from azure.ai.ml.entities._job.sweep.early_termination_policy import BanditPolicy + assert isinstance(sweep_job1.early_termination, BanditPolicy) - assert [sweep_job1.early_termination.evaluation_interval, - sweep_job1.early_termination.delay_evaluation, - sweep_job1.early_termination.slack_factor] == [100, 200, 40.0] + assert [ + sweep_job1.early_termination.evaluation_interval, + sweep_job1.early_termination.delay_evaluation, + sweep_job1.early_termination.slack_factor, + ] == [100, 200, 40.0] def test_invalid_component_inputs(self) -> None: yaml_path = "./tests/test_configs/components/invalid/helloworld_component_conflict_input_names.yml" @@ -506,9 +512,7 @@ def test_component_code_asset_ignoring_pycache(self) -> None: with build_temp_folder( source_base_dir="./tests/test_configs/components", relative_files_to_copy=["helloworld_component.yml"], - extra_files_to_create={ - "__pycache__/a.pyc": None - } + extra_files_to_create={"__pycache__/a.pyc": None}, ) as temp_dir: # resolve and test for ignore_file's is_file_excluded component.code = temp_dir @@ -524,12 +528,12 @@ def test_component_code_asset_ignoring_pycache(self) -> None: def test_normalized_arm_id_in_component_dict(self): component_dict = { "code": "azureml:/subscriptions/123ABC_+-=/resourceGroups/123ABC_+-=/providers/Microsoft.MachineLearningServices/workspaces/123ABC_+-=/codes/xxx", - "environment": "azureml:/subscriptions/123ABC_+-=/resourceGroups/123ABC_+-=/providers/Microsoft.MachineLearningServices/workspaces/123ABC_+-=/environments/xxx" + "environment": "azureml:/subscriptions/123ABC_+-=/resourceGroups/123ABC_+-=/providers/Microsoft.MachineLearningServices/workspaces/123ABC_+-=/environments/xxx", } normalized_arm_id_in_object(component_dict) expected_dict = { - 'code': 'azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/xxx', - 'environment': 'azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/environments/xxx' + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/xxx", + "environment": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/environments/xxx", } assert component_dict == expected_dict diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py index f9cb245843f7..78286ad635e7 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py @@ -23,7 +23,7 @@ def mock_component_operation( mock_workspace_scope: OperationScope, mock_operation_config: OperationConfig, mock_aml_services_2022_05_01: Mock, - mock_machinelearning_client: Mock + mock_machinelearning_client: Mock, ) -> ComponentOperations: yield ComponentOperations( operation_scope=mock_workspace_scope, diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_schema.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_schema.py index c68b109599df..77efa5a57007 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_schema.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_schema.py @@ -315,7 +315,7 @@ def test_component_factory(self): data=component_entity._to_dict(), context={ "source_path": test_path, - } + }, ) assert recreated_component._to_dict() == component_entity._to_dict() diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_data_transfer_component_entity.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_data_transfer_component_entity.py index 0b3985639072..b5f376791880 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_data_transfer_component_entity.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_data_transfer_component_entity.py @@ -3,8 +3,11 @@ from azure.ai.ml import MLClient from azure.ai.ml import load_component -from azure.ai.ml.entities._component.datatransfer_component import DataTransferCopyComponent, \ - DataTransferImportComponent, DataTransferExportComponent +from azure.ai.ml.entities._component.datatransfer_component import ( + DataTransferCopyComponent, + DataTransferImportComponent, + DataTransferExportComponent, +) from azure.ai.ml.constants._component import DataCopyMode, DataTransferTaskType from .test_component_schema import load_component_entity_from_rest_json, load_component_entity_from_yaml from .._util import _COMPONENT_TIMEOUT_SECOND @@ -16,8 +19,9 @@ class TestDataTransferComponentEntity: def test_serialize_deserialize_copy_task_component(self, mock_machinelearning_client: MLClient): test_path = "./tests/test_configs/components/data_transfer/copy_files.yaml" - component_entity = load_component_entity_from_yaml(test_path, mock_machinelearning_client, - _type="data_transfer") + component_entity = load_component_entity_from_yaml( + test_path, mock_machinelearning_client, _type="data_transfer" + ) assert isinstance(component_entity, DataTransferCopyComponent) rest_path = "./tests/test_configs/components/data_transfer/copy_files.json" @@ -28,7 +32,7 @@ def test_serialize_deserialize_copy_task_component(self, mock_machinelearning_cl display_name="Data Transfer Component copy-files", inputs={"folder1": {"type": "uri_folder"}}, outputs={"output_folder": {"type": "uri_folder"}}, - data_copy_mode= DataCopyMode.MERGE_WITH_OVERWRITE, + data_copy_mode=DataCopyMode.MERGE_WITH_OVERWRITE, base_path="./tests/test_configs/components/data_transfer", ) @@ -43,8 +47,9 @@ def test_serialize_deserialize_copy_task_component(self, mock_machinelearning_cl def test_serialize_deserialize_merge_task_component(self, mock_machinelearning_client: MLClient): test_path = "./tests/test_configs/components/data_transfer/merge_files.yaml" - component_entity = load_component_entity_from_yaml(test_path, mock_machinelearning_client, - _type="data_transfer") + component_entity = load_component_entity_from_yaml( + test_path, mock_machinelearning_client, _type="data_transfer" + ) assert isinstance(component_entity, DataTransferCopyComponent) rest_path = "./tests/test_configs/components/data_transfer/merge_files.json" rest_entity = load_component_entity_from_rest_json(rest_path) @@ -54,14 +59,22 @@ def test_serialize_deserialize_merge_task_component(self, mock_machinelearning_c display_name="Data Transfer Component merge-files", inputs={"folder1": {"type": "uri_folder"}, "folder2": {"type": "uri_folder"}}, outputs={"output_folder": {"type": "uri_folder"}}, - data_copy_mode= DataCopyMode.MERGE_WITH_OVERWRITE, + data_copy_mode=DataCopyMode.MERGE_WITH_OVERWRITE, base_path="./tests/test_configs/components/data_transfer", ) # data_copy_mode is a run time config, cannot be decided in registering progress. So won't be returned from # backend. - omit_fields = ["name", "id", "$schema", "data_copy_mode", "inputs.folder1.optional", "version", - "inputs.folder2.optional", "inputs.folder3.optional"] + omit_fields = [ + "name", + "id", + "$schema", + "data_copy_mode", + "inputs.folder1.optional", + "version", + "inputs.folder2.optional", + "inputs.folder3.optional", + ] yaml_dict = pydash.omit(dict(component_entity._to_dict()), *omit_fields) rest_dict = pydash.omit(dict(rest_entity._to_dict()), *omit_fields) sdk_dict = pydash.omit(dict(data_transfer_copy_component._to_dict()), *omit_fields) @@ -70,8 +83,9 @@ def test_serialize_deserialize_merge_task_component(self, mock_machinelearning_c def test_serialize_deserialize_import_task_component(self, mock_machinelearning_client: MLClient): test_path = "./tests/test_configs/components/data_transfer/import_file_to_blob.yaml" - component_entity = load_component_entity_from_yaml(test_path, mock_machinelearning_client, - _type="data_transfer") + component_entity = load_component_entity_from_yaml( + test_path, mock_machinelearning_client, _type="data_transfer" + ) assert isinstance(component_entity, DataTransferImportComponent) data_transfer_copy_component = DataTransferImportComponent( @@ -90,8 +104,9 @@ def test_serialize_deserialize_import_task_component(self, mock_machinelearning_ def test_serialize_deserialize_export_task_component(self, mock_machinelearning_client: MLClient): test_path = "./tests/test_configs/components/data_transfer/export_blob_to_database.yaml" - component_entity = load_component_entity_from_yaml(test_path, mock_machinelearning_client, - _type="data_transfer") + component_entity = load_component_entity_from_yaml( + test_path, mock_machinelearning_client, _type="data_transfer" + ) assert isinstance(component_entity, DataTransferExportComponent) data_transfer_copy_component = DataTransferExportComponent( @@ -119,7 +134,7 @@ def test_copy_task_component_entity(self): outputs={ "output_folder": {"type": "uri_folder"}, }, - data_copy_mode= DataCopyMode.MERGE_WITH_OVERWRITE, + data_copy_mode=DataCopyMode.MERGE_WITH_OVERWRITE, base_path="./tests/test_configs/components/data_transfer", ) omit_fields = [ @@ -136,4 +151,3 @@ def test_copy_task_component_entity(self): yaml_component_dict = pydash.omit(yaml_component_dict, *omit_fields) assert component_dict == yaml_component_dict - diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_parallel_component_operations.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_parallel_component_operations.py index 0da1ce479284..0fed40889e7e 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_parallel_component_operations.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_parallel_component_operations.py @@ -62,9 +62,7 @@ def test_create(self, mock_component_operation: ComponentOperations) -> None: workspace_name=mock_component_operation._workspace_name, ) - def test_create_autoincrement( - self, mock_component_operation: ComponentOperations - ) -> None: + def test_create_autoincrement(self, mock_component_operation: ComponentOperations) -> None: task = { "type": "run_function", "model": {"name": "sore_model", "type": "mlflow_model"}, @@ -83,8 +81,8 @@ def test_create_autoincrement( ) assert component._auto_increment_version with patch.object(ComponentOperations, "_resolve_arm_id_or_upload_dependencies") as mock_thing, patch( - "azure.ai.ml.operations._component_operations.Component._from_rest_object", - return_value=component, + "azure.ai.ml.operations._component_operations.Component._from_rest_object", + return_value=component, ): mock_component_operation.create_or_update(component) mock_thing.assert_called_once() diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_pipeline_component_entity.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_pipeline_component_entity.py index 0c02c012c9ec..619cecd81b3a 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_pipeline_component_entity.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_pipeline_component_entity.py @@ -401,30 +401,37 @@ def test_simple_jobs_from_rest(self) -> None: test_path = "./tests/test_configs/components/pipeline_component_jobs_rest_data.json" with open(test_path, "r") as f: json_in_file = yaml.safe_load(f) - job_dict = copy.deepcopy(json_in_file['properties']['component_spec']['jobs']) + job_dict = copy.deepcopy(json_in_file["properties"]["component_spec"]["jobs"]) jobs = PipelineComponent._resolve_sub_nodes(job_dict) - node_dict = {key: node._to_rest_object() for key, node in jobs.items()}['component_a_job'] - assert node_dict['computeId'] == '${{parent.inputs.node_compute}}' - assert node_dict['outputs'] == { - 'output_binding': {'type': 'literal', 'value': '${{parent.outputs.output}}'}, - 'output_binding2': {'type': 'literal', 'value': '${{parent.outputs.output}}'}, - 'output_data': {'job_output_type': 'uri_folder', 'mode': 'Upload'}, - 'output_data_legacy': {'job_output_type': 'uri_folder', 'mode': 'Upload'}} - assert node_dict['inputs'] == { - 'binding_input': {'job_input_type': 'literal', 'value': '${{parent.inputs.component_in_path}}'}, - 'data_input': {'job_input_type': 'uri_file', - 'mode': 'Download', - 'uri': 'https://my-blob/path/to/data'}, - 'data_input_legacy': {'job_input_type': 'uri_file', - 'mode': 'Download', - 'uri': 'https://my-blob/path/to/data'}, - 'literal_input': {'job_input_type': 'literal', 'value': '11'}, - 'literal_input2': {'job_input_type': 'literal', 'value': '12'}} - assert node_dict['resources'] == {'instance_count': 1, 'properties': { - 'target_selector': {'my_resource_only': 'false', 'allow_spot_vm': 'true'}}, 'shm_size': '2g'} + node_dict = {key: node._to_rest_object() for key, node in jobs.items()}["component_a_job"] + assert node_dict["computeId"] == "${{parent.inputs.node_compute}}" + assert node_dict["outputs"] == { + "output_binding": {"type": "literal", "value": "${{parent.outputs.output}}"}, + "output_binding2": {"type": "literal", "value": "${{parent.outputs.output}}"}, + "output_data": {"job_output_type": "uri_folder", "mode": "Upload"}, + "output_data_legacy": {"job_output_type": "uri_folder", "mode": "Upload"}, + } + assert node_dict["inputs"] == { + "binding_input": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + "data_input": {"job_input_type": "uri_file", "mode": "Download", "uri": "https://my-blob/path/to/data"}, + "data_input_legacy": { + "job_input_type": "uri_file", + "mode": "Download", + "uri": "https://my-blob/path/to/data", + }, + "literal_input": {"job_input_type": "literal", "value": "11"}, + "literal_input2": {"job_input_type": "literal", "value": "12"}, + } + assert node_dict["resources"] == { + "instance_count": 1, + "properties": {"target_selector": {"my_resource_only": "false", "allow_spot_vm": "true"}}, + "shm_size": "2g", + } rest_obj = ComponentVersionData.from_dict(json.loads(json.dumps(json_in_file))) pipeline_component = Component._from_rest_object(rest_obj) assert pipeline_component.jobs - obj_node_dict = {key: node._to_rest_object() for key, node in pipeline_component.jobs.items()}['component_a_job'] + obj_node_dict = {key: node._to_rest_object() for key, node in pipeline_component.jobs.items()}[ + "component_a_job" + ] assert obj_node_dict == node_dict diff --git a/sdk/ml/azure-ai-ml/tests/compute/e2etests/test_compute.py b/sdk/ml/azure-ai-ml/tests/compute/e2etests/test_compute.py index f722e2d73430..4bc60198bde4 100644 --- a/sdk/ml/azure-ai-ml/tests/compute/e2etests/test_compute.py +++ b/sdk/ml/azure-ai-ml/tests/compute/e2etests/test_compute.py @@ -77,7 +77,7 @@ def test_compute_instance_create_and_delete( "Test takes 5 minutes in automation. " "Already have unit tests verifying correct _restclient method is called. " "Can be validated in live build only." - ) + ), ) def test_compute_instance_stop_start_restart( self, client: MLClient, rand_compute_name: Callable[[str], str] diff --git a/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_entity.py b/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_entity.py index cd7b0e9e33d7..ba69bab41748 100644 --- a/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_entity.py +++ b/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_entity.py @@ -26,7 +26,9 @@ def test_compute_from_rest(self): with open("tests/test_configs/compute/compute-kubernetes.yaml", "r") as f: data = yaml.safe_load(f) resource_id = "/subscriptions/dummy/resourceGroups/dummy/providers/Microsoft.Kubernetes/connectedClusters/dummy" - uai_resource_id = "/subscriptions/dummy/resourceGroups/dummy/providers/Microsoft.ManagedIdentity/userAssignedIdentities/dummy" + uai_resource_id = ( + "/subscriptions/dummy/resourceGroups/dummy/providers/Microsoft.ManagedIdentity/userAssignedIdentities/dummy" + ) context = { "base_path": "./", "params_override": [ @@ -45,9 +47,7 @@ def test_compute_from_rest(self): compute._to_rest_object() assert compute.type == "kubernetes" assert compute.identity.type == "user_assigned" - assert ( - compute.identity.user_assigned_identities[0].resource_id == uai_resource_id - ) + assert compute.identity.user_assigned_identities[0].resource_id == uai_resource_id def _test_loaded_compute(self, compute: AmlCompute): assert compute.name == "banchaml" @@ -64,10 +64,7 @@ def test_compute_from_yaml(self): rest_intermediate = compute._to_rest_object() assert rest_intermediate.properties.compute_type == "AmlCompute" - assert ( - rest_intermediate.properties.properties.user_account_credentials.admin_user_name - == "azureuser" - ) + assert rest_intermediate.properties.properties.user_account_credentials.admin_user_name == "azureuser" assert rest_intermediate.properties.properties.enable_node_public_ip assert rest_intermediate.location == compute.location assert rest_intermediate.tags is not None @@ -84,51 +81,28 @@ def test_compute_from_yaml(self): def test_compute_vm_from_yaml(self): resource_id = "/subscriptions/13e50845-67bc-4ac5-94db-48d493a6d9e8/resourceGroups/myrg/providers/Microsoft.Compute/virtualMachines/myvm" fake_key = "myfakekey" - compute: VirtualMachineCompute = load_compute( - "tests/test_configs/compute/compute-vm.yaml" - ) + compute: VirtualMachineCompute = load_compute("tests/test_configs/compute/compute-vm.yaml") assert compute.name == "banchcivm" assert compute.ssh_settings.admin_username == "azureuser" assert compute.ssh_settings.admin_password == "azureuserpassword" assert compute.ssh_settings.ssh_port == 8888 assert compute.resource_id == resource_id - assert ( - compute.ssh_settings.ssh_private_key_file - == "tests/test_configs/compute/ssh_fake_key.txt" - ) + assert compute.ssh_settings.ssh_private_key_file == "tests/test_configs/compute/ssh_fake_key.txt" rest_intermediate = compute._to_rest_object() assert rest_intermediate.properties.resource_id == resource_id assert rest_intermediate.properties.properties.ssh_port == 8888 - assert ( - rest_intermediate.properties.properties.administrator_account.password - == "azureuserpassword" - ) - assert ( - rest_intermediate.properties.properties.administrator_account.username - == "azureuser" - ) - assert ( - rest_intermediate.properties.properties.administrator_account.private_key_data - == fake_key - ) + assert rest_intermediate.properties.properties.administrator_account.password == "azureuserpassword" + assert rest_intermediate.properties.properties.administrator_account.username == "azureuser" + assert rest_intermediate.properties.properties.administrator_account.private_key_data == fake_key serializer = Serializer({"ComputeResource": ComputeResource}) body = serializer.body(rest_intermediate, "ComputeResource") assert body["properties"]["resourceId"] == resource_id assert body["properties"]["properties"]["sshPort"] == 8888 - assert ( - body["properties"]["properties"]["administratorAccount"]["username"] - == "azureuser" - ) - assert ( - body["properties"]["properties"]["administratorAccount"]["password"] - == "azureuserpassword" - ) - assert ( - body["properties"]["properties"]["administratorAccount"]["privateKeyData"] - == fake_key - ) + assert body["properties"]["properties"]["administratorAccount"]["username"] == "azureuser" + assert body["properties"]["properties"]["administratorAccount"]["password"] == "azureuserpassword" + assert body["properties"]["properties"]["administratorAccount"]["privateKeyData"] == fake_key def test_compute_from_constructor(self): compute = ComputeInstance(name="comp", type="computeinstance") @@ -138,9 +112,7 @@ def test_compute_from_constructor(self): compute._to_dict() assert compute.type == "kubernetes" - def _uai_list_to_dict( - self, value: List[ManagedIdentityConfiguration] - ) -> Union[str, ManagedIdentityConfiguration]: + def _uai_list_to_dict(self, value: List[ManagedIdentityConfiguration]) -> Union[str, ManagedIdentityConfiguration]: uai_dict = {} for item in value: @@ -151,15 +123,13 @@ def _uai_list_to_dict( def test_compute_instance_load_from_rest(self): compute_instance: ComputeInstance = load_compute( source="tests/test_configs/compute/compute-ci-unit.yaml", - params_override=[{"tags.test1": "test"}, {"tags.test2":"true"}, {"tags.test3":"0"}] + params_override=[{"tags.test1": "test"}, {"tags.test2": "true"}, {"tags.test3": "0"}], ) - + compute_instance._set_full_subnet_name("subscription_id", "resource_group_name") compute_resource = compute_instance._to_rest_object() - compute_instance2: ComputeInstance = ComputeInstance._load_from_rest( - compute_resource - ) + compute_instance2: ComputeInstance = ComputeInstance._load_from_rest(compute_resource) assert compute_instance2.last_operation == compute_instance.last_operation assert compute_instance2.services == compute_instance.services assert compute_instance2.tags is not None @@ -173,81 +143,38 @@ def test_compute_instance_with_image_metadata(self): latest_image_version="22.08.20", is_latest_os_image_version=False, ) - compute_instance: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-unit.yaml" - ) + compute_instance: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-unit.yaml") compute_resource = compute_instance._to_rest_object() compute_resource.properties.properties.os_image_metadata = os_image_metadata - compute_instance2: ComputeInstance = ComputeInstance._load_from_rest( - compute_resource - ) + compute_instance2: ComputeInstance = ComputeInstance._load_from_rest(compute_resource) os_image_metadata2 = compute_instance2.os_image_metadata assert os_image_metadata2 is not None - assert ( - os_image_metadata2.current_image_version - == os_image_metadata.current_image_version - ) - assert ( - os_image_metadata2.latest_image_version - == os_image_metadata.latest_image_version - ) - assert ( - os_image_metadata2.is_latest_os_image_version - == os_image_metadata.is_latest_os_image_version - ) + assert os_image_metadata2.current_image_version == os_image_metadata.current_image_version + assert os_image_metadata2.latest_image_version == os_image_metadata.latest_image_version + assert os_image_metadata2.is_latest_os_image_version == os_image_metadata.is_latest_os_image_version def test_compute_instance_schedules_from_yaml(self): - compute_instance: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-schedules.yaml" - ) + compute_instance: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-schedules.yaml") assert len(compute_instance.schedules.compute_start_stop) == 2 compute_resource = compute_instance._to_rest_object() - compute_instance2: ComputeInstance = ComputeInstance._load_from_rest( - compute_resource - ) + compute_instance2: ComputeInstance = ComputeInstance._load_from_rest(compute_resource) assert len(compute_instance2.schedules.compute_start_stop) == 2 assert compute_instance2.schedules.compute_start_stop[0].action == "stop" assert compute_instance2.schedules.compute_start_stop[0].trigger.type == "Cron" - assert ( - compute_instance2.schedules.compute_start_stop[0].trigger.start_time - == "2021-03-10T21:21:07" - ) - assert ( - compute_instance2.schedules.compute_start_stop[0].trigger.time_zone - == "Pacific Standard Time" - ) - assert ( - compute_instance2.schedules.compute_start_stop[0].trigger.expression - == "0 18 * * *" - ) + assert compute_instance2.schedules.compute_start_stop[0].trigger.start_time == "2021-03-10T21:21:07" + assert compute_instance2.schedules.compute_start_stop[0].trigger.time_zone == "Pacific Standard Time" + assert compute_instance2.schedules.compute_start_stop[0].trigger.expression == "0 18 * * *" assert compute_instance2.schedules.compute_start_stop[1].action == "start" - assert ( - compute_instance2.schedules.compute_start_stop[1].trigger.type - == "Recurrence" - ) - assert ( - compute_instance2.schedules.compute_start_stop[1].trigger.start_time - == "2021-03-10T21:21:07" - ) - assert ( - compute_instance2.schedules.compute_start_stop[1].trigger.time_zone - == "Pacific Standard Time" - ) - assert ( - compute_instance2.schedules.compute_start_stop[1].trigger.frequency - == "week" - ) + assert compute_instance2.schedules.compute_start_stop[1].trigger.type == "Recurrence" + assert compute_instance2.schedules.compute_start_stop[1].trigger.start_time == "2021-03-10T21:21:07" + assert compute_instance2.schedules.compute_start_stop[1].trigger.time_zone == "Pacific Standard Time" + assert compute_instance2.schedules.compute_start_stop[1].trigger.frequency == "week" assert compute_instance2.schedules.compute_start_stop[1].trigger.interval == 1 - assert ( - compute_instance2.schedules.compute_start_stop[1].trigger.schedule - is not None - ) + assert compute_instance2.schedules.compute_start_stop[1].trigger.schedule is not None def test_compute_instance_idle_shutdown_from_yaml(self): - compute_instance: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-idle-shutdown.yaml" - ) + compute_instance: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-idle-shutdown.yaml") assert compute_instance.idle_time_before_shutdown == "PT20M" assert compute_instance.idle_time_before_shutdown_minutes == 15 @@ -257,46 +184,26 @@ def test_compute_instance_idle_shutdown_from_yaml(self): == f"PT{compute_instance.idle_time_before_shutdown_minutes}M" ) - compute_instance2: ComputeInstance = ComputeInstance._load_from_rest( - compute_resource - ) - assert ( - compute_instance2.idle_time_before_shutdown - == f"PT{compute_instance.idle_time_before_shutdown_minutes}M" - ) - assert ( - compute_instance2.idle_time_before_shutdown_minutes - == compute_instance.idle_time_before_shutdown_minutes - ) + compute_instance2: ComputeInstance = ComputeInstance._load_from_rest(compute_resource) + assert compute_instance2.idle_time_before_shutdown == f"PT{compute_instance.idle_time_before_shutdown_minutes}M" + assert compute_instance2.idle_time_before_shutdown_minutes == compute_instance.idle_time_before_shutdown_minutes def test_compute_instance_setup_scripts_from_yaml(self): - loaded_instance: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-setup-scripts.yaml" - ) + loaded_instance: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-setup-scripts.yaml") compute_resource: ComputeResource = loaded_instance._to_rest_object() - compute_instance: ComputeInstance = ComputeInstance._load_from_rest( - compute_resource - ) + compute_instance: ComputeInstance = ComputeInstance._load_from_rest(compute_resource) assert compute_instance.setup_scripts is not None assert compute_instance.setup_scripts.creation_script is not None - assert ( - compute_instance.setup_scripts.creation_script.path - == "Users/test/creation-script.sh" - ) + assert compute_instance.setup_scripts.creation_script.path == "Users/test/creation-script.sh" assert compute_instance.setup_scripts.creation_script.timeout_minutes == "20" assert compute_instance.setup_scripts.startup_script is not None - assert ( - compute_instance.setup_scripts.startup_script.path - == "Users/test/startup-script.sh" - ) + assert compute_instance.setup_scripts.startup_script.path == "Users/test/startup-script.sh" assert compute_instance.setup_scripts.startup_script.command == "ls" assert compute_instance.setup_scripts.startup_script.timeout_minutes == "15" def test_compute_instance_uai_from_yaml(self): - compute: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-uai.yaml" - ) + compute: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-uai.yaml") assert compute.name == "banchci" assert compute.type == "computeinstance" assert compute.identity.type == "user_assigned" @@ -313,8 +220,7 @@ def test_compute_instance_uai_from_yaml(self): assert len(compute_resource.identity.user_assigned_identities) == 1 for k in compute_resource.identity.user_assigned_identities.keys(): assert ( - k - == "/subscriptions/4faaaf21-663f-4391-96fd-47197c630979/resourceGroups/test-rg" + k == "/subscriptions/4faaaf21-663f-4391-96fd-47197c630979/resourceGroups/test-rg" "-centraluseuap-v2-t-2021W35/providers/Microsoft.ManagedIdentity" "/userAssignedIdentities/x" ) @@ -331,9 +237,7 @@ def test_compute_instance_uai_from_yaml(self): ) def test_compute_instance_sai_from_yaml(self): - compute: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci.yaml" - ) + compute: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci.yaml") assert compute.name == "banchci" assert compute.type == "computeinstance" assert compute.identity.type == "system_assigned" @@ -346,13 +250,9 @@ def test_compute_instance_sai_from_yaml(self): assert compute_from_rest.identity.type == "system_assigned" def test_compute_no_public_ip_from_yaml(self): - compute_instance: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-no-public-ip.yaml" - ) + compute_instance: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-no-public-ip.yaml") - aml_compute: AmlCompute = load_compute( - "tests/test_configs/compute/compute-aml-no-public-ip.yaml" - ) + aml_compute: AmlCompute = load_compute("tests/test_configs/compute/compute-aml-no-public-ip.yaml") def validate_no_public_ip(compute: Compute): assert compute.enable_node_public_ip == False @@ -363,11 +263,9 @@ def validate_no_public_ip(compute: Compute): validate_no_public_ip(compute=compute_instance) validate_no_public_ip(compute=aml_compute) - + def test_compute_instance_with_custom_app(self): - compute_instance: ComputeInstance = load_compute( - "tests/test_configs/compute/compute-ci-custom-app.yaml" - ) + compute_instance: ComputeInstance = load_compute("tests/test_configs/compute/compute-ci-custom-app.yaml") assert compute_instance.custom_applications is not None assert len(compute_instance.custom_applications) == 2 @@ -425,9 +323,7 @@ def test_synapse_compute_from_rest(self): assert compute.type == "synapsespark" def test_synapsespark_compute_from_yaml(self): - compute: SynapseSparkCompute = load_compute( - "tests/test_configs/compute/compute-synapsespark-identity.yaml" - ) + compute: SynapseSparkCompute = load_compute("tests/test_configs/compute/compute-synapsespark-identity.yaml") assert compute.name == "testidentity" assert compute.identity.type == "user_assigned" diff --git a/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_operations.py b/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_operations.py index 1aebc2420ab5..2e80dfdb6e21 100644 --- a/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_operations.py +++ b/sdk/ml/azure-ai-ml/tests/compute/unittests/test_compute_operations.py @@ -21,7 +21,9 @@ @pytest.fixture def mock_compute_operation( - mock_workspace_scope: OperationScope, mock_operation_config: OperationConfig, mock_aml_services_2022_10_01_preview: Mock + mock_workspace_scope: OperationScope, + mock_operation_config: OperationConfig, + mock_aml_services_2022_10_01_preview: Mock, ) -> ComputeOperations: yield ComputeOperations( operation_scope=mock_workspace_scope, @@ -46,9 +48,7 @@ def test_list(self, mock_compute_operation: ComputeOperations) -> None: sys.version_info[1] == 11, reason=f"This test is not compatible with Python 3.11, skip in CI.", ) - def test_create_compute_instance( - self, mock_compute_operation: ComputeOperations, mocker: MockFixture - ) -> None: + def test_create_compute_instance(self, mock_compute_operation: ComputeOperations, mocker: MockFixture) -> None: mocker.patch( "azure.ai.ml._restclient.v2022_10_01_preview.workspaces.get", return_value=funny(), @@ -66,9 +66,7 @@ def test_create_compute_instance( sys.version_info[1] == 11, reason=f"This test is not compatible with Python 3.11, skip in CI.", ) - def test_create_aml_compute( - self, mock_compute_operation: ComputeOperations, mocker: MockFixture - ) -> None: + def test_create_aml_compute(self, mock_compute_operation: ComputeOperations, mocker: MockFixture) -> None: mocker.patch("azure.ai.ml._restclient.v2022_10_01_preview.workspaces.get", return_value=funny()) compute = load_compute("./tests/test_configs/compute/compute-aml.yaml") mock_compute_operation.begin_create_or_update(compute=compute) @@ -94,9 +92,7 @@ def test_restart(self, mock_compute_operation: ComputeOperations) -> None: mock_compute_operation.begin_restart("randstr") mock_compute_operation._operation.begin_restart.assert_called_once() - def test_update_aml_compute( - self, mock_compute_operation: ComputeOperations, mocker: MockFixture - ) -> None: + def test_update_aml_compute(self, mock_compute_operation: ComputeOperations, mocker: MockFixture) -> None: compute = AmlCompute( name="name", tags={"key1": "value1", "key2": "value2"}, diff --git a/sdk/ml/azure-ai-ml/tests/conftest.py b/sdk/ml/azure-ai-ml/tests/conftest.py index 3391abcfc90e..39bef1a6b3f8 100644 --- a/sdk/ml/azure-ai-ml/tests/conftest.py +++ b/sdk/ml/azure-ai-ml/tests/conftest.py @@ -219,10 +219,12 @@ def mock_aml_services_2021_10_01_dataplanepreview(mocker: MockFixture) -> Mock: def mock_aml_services_2022_10_01_preview(mocker: MockFixture) -> Mock: return mocker.patch("azure.ai.ml._restclient.v2022_10_01_preview") + @pytest.fixture def mock_aml_services_2022_12_01_preview(mocker: MockFixture) -> Mock: return mocker.patch("azure.ai.ml._restclient.v2022_12_01_preview") + @pytest.fixture def mock_aml_services_run_history(mocker: MockFixture) -> Mock: return mocker.patch("azure.ai.ml._restclient.runhistory") @@ -343,6 +345,7 @@ def registry_client(e2e_ws_scope: OperationScope, auth: ClientSecretCredential) registry_name="testFeed", ) + @pytest.fixture def data_asset_registry_client(e2e_ws_scope: OperationScope, auth: ClientSecretCredential) -> MLClient: """return a machine learning client using default e2e testing workspace""" diff --git a/sdk/ml/azure-ai-ml/tests/dataset/e2etests/test_data.py b/sdk/ml/azure-ai-ml/tests/dataset/e2etests/test_data.py index d04e80cf4b77..adf514216b47 100644 --- a/sdk/ml/azure-ai-ml/tests/dataset/e2etests/test_data.py +++ b/sdk/ml/azure-ai-ml/tests/dataset/e2etests/test_data.py @@ -175,20 +175,15 @@ def test_create_mltable(self, client: MLClient, tmp_path: Path, randstr: Callabl assert data_version.id == generate_data_arm_id(client._operation_scope, name, version) assert data_version.path.endswith("/tmp_folder/") - @pytest.mark.skipif(condition=not is_live(), - reason="Auth issue in Registry") + @pytest.mark.skipif(condition=not is_live(), reason="Auth issue in Registry") def test_create_data_asset_in_registry( - self, data_asset_registry_client: MLClient, - randstr: Callable[[], str]) -> None: + self, data_asset_registry_client: MLClient, randstr: Callable[[], str] + ) -> None: name = randstr("name") version = "1" data_asset = load_data( source="./tests/test_configs/dataset/data_file.yaml", - params_override=[{ - "name": name - }, { - "version": version - }], + params_override=[{"name": name}, {"version": version}], ) sleep_if_live(3) obj = data_asset_registry_client.data.create_or_update(data_asset) @@ -210,16 +205,13 @@ def test_list(self, client: MLClient, data_with_2_versions: str) -> None: # use a set since ordering of elements returned from list isn't guaranteed assert {"1", "2"} == {data.version for data in data_list} - @pytest.mark.skipif(condition=not is_live(), - reason="Auth issue in Registry") - def test_list_data_in_registry( - self, data_asset_registry_client: MLClient) -> None: + @pytest.mark.skipif(condition=not is_live(), reason="Auth issue in Registry") + def test_list_data_in_registry(self, data_asset_registry_client: MLClient) -> None: data_iterator = data_asset_registry_client.data.list() assert data_iterator assert isinstance(data_iterator, ItemPaged) - def test_data_get_latest_label(self, client: MLClient, - randstr: Callable[[], str]) -> None: + def test_data_get_latest_label(self, client: MLClient, randstr: Callable[[], str]) -> None: name = randstr("name") versions = ["foo", "bar", "baz", "foobar"] @@ -227,35 +219,28 @@ def test_data_get_latest_label(self, client: MLClient, client.data.create_or_update( load_data( source="./tests/test_configs/dataset/data_file.yaml", - params_override=[{ - "name": name - }, { - "version": version - }], - )) + params_override=[{"name": name}, {"version": version}], + ) + ) sleep_if_live(3) assert client.data.get(name, label="latest").version == version - @pytest.mark.skipif(condition=not is_live(), - reason="Auth issue in Registry") + @pytest.mark.skipif(condition=not is_live(), reason="Auth issue in Registry") def test_data_get_latest_label_in_registry( - self, data_asset_registry_client: MLClient, randstr: Callable[[], - str]) -> None: + self, data_asset_registry_client: MLClient, randstr: Callable[[], str] + ) -> None: name = randstr("name") versions = ["foo", "bar", "baz", "foobar"] for version in versions: data_asset_registry_client.data.create_or_update( load_data( source="./tests/test_configs/dataset/data_file.yaml", - params_override=[{ - "name": name - }, { - "version": version - }], - )) + params_override=[{"name": name}, {"version": version}], + ) + ) sleep_if_live(3) - assert data_asset_registry_client.data.get(name, - label="latest").version == version + assert data_asset_registry_client.data.get(name, label="latest").version == version + @pytest.mark.e2etest def test_data_archive_restore_version(self, client: MLClient, randstr: Callable[[], str]) -> None: name = randstr("name") diff --git a/sdk/ml/azure-ai-ml/tests/dataset/unittests/test_data_operations.py b/sdk/ml/azure-ai-ml/tests/dataset/unittests/test_data_operations.py index 75e8fb639abb..60033355b559 100644 --- a/sdk/ml/azure-ai-ml/tests/dataset/unittests/test_data_operations.py +++ b/sdk/ml/azure-ai-ml/tests/dataset/unittests/test_data_operations.py @@ -29,9 +29,8 @@ @pytest.fixture def mock_datastore_operation( - mock_workspace_scope: OperationScope, - mock_operation_config: OperationConfig, - mock_aml_services_2022_10_01: Mock) -> DatastoreOperations: + mock_workspace_scope: OperationScope, mock_operation_config: OperationConfig, mock_aml_services_2022_10_01: Mock +) -> DatastoreOperations: yield DatastoreOperations( operation_scope=mock_workspace_scope, operation_config=mock_operation_config, @@ -79,8 +78,7 @@ def mock_artifact_storage(_one, _two, _three, **kwargs) -> Mock: name="testFileData", version="3", relative_path="path", - datastore_arm_id= - "/subscriptions/mock/resourceGroups/mock/providers/Microsoft.MachineLearningServices/workspaces/mock/datastores/datastore_id", + datastore_arm_id="/subscriptions/mock/resourceGroups/mock/providers/Microsoft.MachineLearningServices/workspaces/mock/datastores/datastore_id", container_name="containerName", ) @@ -88,37 +86,27 @@ def mock_artifact_storage(_one, _two, _three, **kwargs) -> Mock: # @pytest.fixture def mock_sas_uri(**kwargs) -> Mock: return "test_sas_uri" + + @pytest.mark.unittest -@patch("azure.ai.ml._artifacts._artifact_utilities._upload_to_datastore", - new=mock_artifact_storage) -@patch("azure.ai.ml._utils._registry_utils.get_sas_uri_for_registry_asset", - new=mock_sas_uri) +@patch("azure.ai.ml._artifacts._artifact_utilities._upload_to_datastore", new=mock_artifact_storage) +@patch("azure.ai.ml._utils._registry_utils.get_sas_uri_for_registry_asset", new=mock_sas_uri) @patch.object(Data, "_from_rest_object", new=Mock()) @patch.object(Data, "_from_container_rest_object", new=Mock()) @pytest.mark.data_experiences_test class TestDataOperations: - def test_list(self, mock_data_operations: DataOperations) -> None: - mock_data_operations._operation.list.return_value = [ - Mock(Data) for _ in range(10) - ] - mock_data_operations._container_operation.list.return_value = [ - Mock(Data) for _ in range(10) - ] + mock_data_operations._operation.list.return_value = [Mock(Data) for _ in range(10)] + mock_data_operations._container_operation.list.return_value = [Mock(Data) for _ in range(10)] result = mock_data_operations.list() assert isinstance(result, Iterable) mock_data_operations._container_operation.list.assert_called_once() mock_data_operations.list(name="random_name") mock_data_operations._operation.list.assert_called_once() - def test_list_in_registry( - self, mock_data_operations_in_registry: DataOperations) -> None: - mock_data_operations_in_registry._operation.list.return_value = [ - Mock(Data) for _ in range(10) - ] - mock_data_operations_in_registry._container_operation.list.return_value = [ - Mock(Data) for _ in range(10) - ] + def test_list_in_registry(self, mock_data_operations_in_registry: DataOperations) -> None: + mock_data_operations_in_registry._operation.list.return_value = [Mock(Data) for _ in range(10)] + mock_data_operations_in_registry._container_operation.list.return_value = [Mock(Data) for _ in range(10)] mock_data_operations_in_registry.list(name="random_name") mock_data_operations_in_registry._operation.list.assert_called_once_with( name="random_name", @@ -128,14 +116,9 @@ def test_list_in_registry( cls=ANY, ) - def test_list_in_registry_no_name( - self, mock_data_operations_in_registry: DataOperations) -> None: - mock_data_operations_in_registry._operation.list.return_value = [ - Mock(Data) for _ in range(10) - ] - mock_data_operations_in_registry._container_operation.list.return_value = [ - Mock(Data) for _ in range(10) - ] + def test_list_in_registry_no_name(self, mock_data_operations_in_registry: DataOperations) -> None: + mock_data_operations_in_registry._operation.list.return_value = [Mock(Data) for _ in range(10)] + mock_data_operations_in_registry._container_operation.list.return_value = [Mock(Data) for _ in range(10)] mock_data_operations_in_registry.list() mock_data_operations_in_registry._container_operation.list.assert_called_once_with( resource_group_name=Test_Resource_Group, @@ -144,40 +127,27 @@ def test_list_in_registry_no_name( cls=ANY, ) - def test_get_with_version(self, - mock_data_operations: DataOperations) -> None: + def test_get_with_version(self, mock_data_operations: DataOperations) -> None: name_only = "some_name" version = "1" data_asset = Data(name=name_only, version=version) - with patch.object(ItemPaged, - "next"), patch.object(Data, - "_from_rest_object", - return_value=data_asset): + with patch.object(ItemPaged, "next"), patch.object(Data, "_from_rest_object", return_value=data_asset): mock_data_operations.get(name_only, version) mock_data_operations._operation.get.assert_called_once_with( - name=name_only, - version=version, - resource_group_name=Test_Resource_Group, - workspace_name=Test_Workspace_Name) + name=name_only, version=version, resource_group_name=Test_Resource_Group, workspace_name=Test_Workspace_Name + ) - def test_get_in_registry_with_version( - self, mock_data_operations_in_registry: DataOperations) -> None: + def test_get_in_registry_with_version(self, mock_data_operations_in_registry: DataOperations) -> None: name_only = "some_name" version = "1" data_asset = Data(name=name_only, version=version) - with patch.object(ItemPaged, - "next"), patch.object(Data, - "_from_rest_object", - return_value=data_asset): + with patch.object(ItemPaged, "next"), patch.object(Data, "_from_rest_object", return_value=data_asset): mock_data_operations_in_registry.get(name_only, version) mock_data_operations_in_registry._operation.get.assert_called_once_with( - name=name_only, - version=version, - resource_group_name=Test_Resource_Group, - registry_name=Test_Registry_Name) + name=name_only, version=version, resource_group_name=Test_Resource_Group, registry_name=Test_Registry_Name + ) - def test_get_no_version(self, - mock_data_operations: DataOperations) -> None: + def test_get_no_version(self, mock_data_operations: DataOperations) -> None: name = "random_name" with pytest.raises(Exception) as ex: mock_data_operations.get(name=name) @@ -190,18 +160,17 @@ def test_create_with_spec_file( ) -> None: data_path = "./tests/test_configs/dataset/data_local_path.yaml" with patch( - "azure.ai.ml._artifacts._artifact_utilities._upload_to_datastore", - return_value=ArtifactStorageInfo( - name="testFileData", - version="3", - relative_path="path", - datastore_arm_id= - "/subscriptions/mock/resourceGroups/mock/providers/Microsoft.MachineLearningServices/workspaces/mock/datastores/datastore_id", - container_name="containerName", - ), + "azure.ai.ml._artifacts._artifact_utilities._upload_to_datastore", + return_value=ArtifactStorageInfo( + name="testFileData", + version="3", + relative_path="path", + datastore_arm_id="/subscriptions/mock/resourceGroups/mock/providers/Microsoft.MachineLearningServices/workspaces/mock/datastores/datastore_id", + container_name="containerName", + ), ) as mock_thing, patch( - "azure.ai.ml.operations._data_operations.Data._from_rest_object", - return_value=None, + "azure.ai.ml.operations._data_operations.Data._from_rest_object", + return_value=None, ): data = load_data(source=data_path) path = Path(data._base_path, data.path).resolve() @@ -220,8 +189,7 @@ def test_create_with_spec_file( ignore_file=None, ) mock_data_operations._operation.create_or_update.assert_called_once() - assert "version='1'" in str( - mock_data_operations._operation.create_or_update.call_args) + assert "version='1'" in str(mock_data_operations._operation.create_or_update.call_args) def test_create_or_update_in_registry( self, @@ -233,14 +201,11 @@ def test_create_or_update_in_registry( data_path = "./tests/test_configs/dataset/data_local_path.yaml" data = load_data(source=data_path) with patch( - "azure.ai.ml.operations._data_operations._check_and_upload_path", - return_value=(data, "indicatorfile.txt"), - ), patch( - "azure.ai.ml.operations._data_operations.Data._from_rest_object", - return_value=data - ), patch( - "azure.ai.ml.operations._data_operations.get_sas_uri_for_registry_asset", - return_value="test_sas_uri") as mock_sas_uri: + "azure.ai.ml.operations._data_operations._check_and_upload_path", + return_value=(data, "indicatorfile.txt"), + ), patch("azure.ai.ml.operations._data_operations.Data._from_rest_object", return_value=data), patch( + "azure.ai.ml.operations._data_operations.get_sas_uri_for_registry_asset", return_value="test_sas_uri" + ) as mock_sas_uri: mock_data_operations_in_registry.create_or_update(data) mock_sas_uri.assert_called_once() mock_data_operations_in_registry._operation.begin_create_or_update.assert_called_once_with( @@ -248,7 +213,8 @@ def test_create_or_update_in_registry( version="1", registry_name=Test_Registry_Name, resource_group_name=Test_Resource_Group, - body=ANY) + body=ANY, + ) def test_create_with_mltable_pattern_path( self, @@ -304,7 +270,9 @@ def test_create_or_update_missing_path(self, mock_data_operations: DataOperation with pytest.raises(Exception) as ex: load_data("tests/test_configs/dataset/data_missing_path_test.yml") - assert REF_DOC_YAML_SCHEMA_ERROR_MSG_FORMAT.format(YAMLRefDocSchemaNames.DATA, YAMLRefDocLinks.DATA) in str(ex.value) + assert REF_DOC_YAML_SCHEMA_ERROR_MSG_FORMAT.format(YAMLRefDocSchemaNames.DATA, YAMLRefDocLinks.DATA) in str( + ex.value + ) @patch("azure.ai.ml.operations._data_operations.read_local_mltable_metadata_contents") @patch("azure.ai.ml.operations._data_operations.read_remote_mltable_metadata_contents") diff --git a/sdk/ml/azure-ai-ml/tests/datastore/unittests/test_datastore_operations.py b/sdk/ml/azure-ai-ml/tests/datastore/unittests/test_datastore_operations.py index 1d13154bd783..caf56f32e401 100644 --- a/sdk/ml/azure-ai-ml/tests/datastore/unittests/test_datastore_operations.py +++ b/sdk/ml/azure-ai-ml/tests/datastore/unittests/test_datastore_operations.py @@ -29,22 +29,16 @@ def test_list(self, mock_from_rest, mock_datastore_operation: DatastoreOperation mock_datastore_operation._operation.list.assert_called_once() mock_datastore_operation._operation.list_secrets.assert_not_called() - def test_delete( - self, mock_from_rest, mock_datastore_operation: DatastoreOperations - ) -> None: + def test_delete(self, mock_from_rest, mock_datastore_operation: DatastoreOperations) -> None: mock_datastore_operation.delete("random_name") mock_datastore_operation._operation.delete.assert_called_once() - def test_get_no_secrets( - self, mock_from_rest, mock_datastore_operation: DatastoreOperations - ) -> None: + def test_get_no_secrets(self, mock_from_rest, mock_datastore_operation: DatastoreOperations) -> None: mock_datastore_operation.get("random_name") mock_datastore_operation._operation.get.assert_called_once() mock_datastore_operation._operation.list_secrets.assert_not_called() - def test_get_no_secrets_with_secrets( - self, mock_from_rest, mock_datastore_operation: DatastoreOperations - ) -> None: + def test_get_no_secrets_with_secrets(self, mock_from_rest, mock_datastore_operation: DatastoreOperations) -> None: mock_datastore_operation.get("random_name", include_secrets=True) mock_datastore_operation._operation.get.assert_called_once() mock_datastore_operation._operation.list_secrets.assert_called_once() diff --git a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_controlflow_pipeline.py b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_controlflow_pipeline.py index d7d0ccacb1b0..f24530a10089 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_controlflow_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_controlflow_pipeline.py @@ -71,7 +71,7 @@ def condition_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { "conditionnode": { - '_source': 'DSL', + "_source": "DSL", "condition": "${{parent.jobs.result.outputs.output}}", "false_block": "${{parent.jobs.node1}}", "true_block": "${{parent.jobs.node2}}", @@ -116,22 +116,25 @@ def condition_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(rest_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'conditionnode': { - '_source': 'DSL', - 'condition': True, - 'false_block': '${{parent.jobs.node1}}', - 'true_block': '${{parent.jobs.node2}}', - 'type': 'if_else'}, - 'node1': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}}, - 'name': 'node1', - 'type': 'command'}, - 'node2': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '2'}}, - 'name': 'node2', - 'type': 'command'} + "conditionnode": { + "_source": "DSL", + "condition": True, + "false_block": "${{parent.jobs.node1}}", + "true_block": "${{parent.jobs.node2}}", + "type": "if_else", + }, + "node1": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "1"}}, + "name": "node1", + "type": "command", + }, + "node2": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "2"}}, + "name": "node2", + "type": "command", + }, } def test_dsl_condition_pipeline_with_one_branch(self, client: MLClient): @@ -152,16 +155,18 @@ def condition_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(rest_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'conditionnode': { - '_source': 'DSL', - 'condition': True, - 'false_block': '${{parent.jobs.node1}}', - 'type': 'if_else'}, - 'node1': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}}, - 'name': 'node1', - 'type': 'command'} + "conditionnode": { + "_source": "DSL", + "condition": True, + "false_block": "${{parent.jobs.node1}}", + "type": "if_else", + }, + "node1": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "1"}}, + "name": "node1", + "type": "command", + }, } def test_registered_component_is_control(self, client: MLClient): @@ -177,7 +182,8 @@ def test_registered_component_is_control(self, client: MLClient): "bool_param_output": {"type": "boolean", "is_control": True, "early_available": True}, "int_param_output": {"type": "integer", "is_control": True}, "float_param_output": {"type": "number", "is_control": True}, - "str_param_output": {"type": "string", "is_control": True}} + "str_param_output": {"type": "string", "is_control": True}, + } assert rest_dict["outputs"] == expected_dict # Assert on pipeline component @@ -192,8 +198,7 @@ class ControlOutputGroup: @pipeline() def test_pipeline_component_control_output() -> ControlOutputGroup: node = primitive_component_with_normal_input_output_v2( - input_data=test_input, parambool=True, - paramint=2, paramfloat=2.2, paramstr="test" + input_data=test_input, parambool=True, paramint=2, paramfloat=2.2, paramstr="test" ) return node.outputs @@ -219,7 +224,8 @@ def test_pipeline(input_data, int_param, bool_param, float_param, str_param): int_param=int_param, bool_param=bool_param, float_param=float_param, - str_param=str_param) + str_param=str_param, + ) do_while( body=do_while_body_func, @@ -240,64 +246,81 @@ def test_pipeline(input_data, int_param, bool_param, float_param, str_param): parambool=do_while_body_func.outputs.bool_param_output, paramint=do_while_body_func.outputs.int_param_output, paramfloat=do_while_body_func.outputs.float_param_output, - paramstr=do_while_body_func.outputs.str_param_output) + paramstr=do_while_body_func.outputs.str_param_output, + ) condition(condition=do_while_body_func.outputs.condition, true_block=primitive_output_component_true) - pipeline_job = test_pipeline(input_data=test_input, int_param=4, bool_param=True, float_param=22.0, - str_param="string_param_no_space") + pipeline_job = test_pipeline( + input_data=test_input, int_param=4, bool_param=True, float_param=22.0, str_param="string_param_no_space" + ) with include_private_preview_nodes_in_pipeline(): rest_job = assert_job_cancel(pipeline_job, client) dsl_pipeline_job_dict = omit_with_wildcard(rest_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'conditionnode': { - '_source': 'DSL', - 'condition': '${{parent.jobs.do_while_body_func.outputs.condition}}', - 'true_block': '${{parent.jobs.primitive_output_component_true}}', - 'type': 'if_else'}, - 'do_while_body_func': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'bool_param': {'job_input_type': 'literal', - 'value': '${{parent.inputs.bool_param}}'}, - 'float_param': {'job_input_type': 'literal', - 'value': '${{parent.inputs.float_param}}'}, - 'input_1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.input_data}}'}, - 'input_2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.input_data}}'}, - 'int_param': {'job_input_type': 'literal', - 'value': '${{parent.inputs.int_param}}'}, - 'str_param': {'job_input_type': 'literal', - 'value': '${{parent.inputs.str_param}}'}}, - 'name': 'do_while_body_func', - 'type': 'command'}, - 'dowhile': { - '_source': 'DSL', - 'body': '${{parent.jobs.do_while_body_func}}', - 'condition': 'condition', - 'limits': {'max_iteration_count': 3}, - 'mapping': {'bool_param_output': ['bool_param'], - 'float_param_output': ['float_param'], - 'int_param_output': ['int_param'], - 'output_1': ['input_1'], - 'output_2': ['input_2'], - 'str_param_output': ['str_param']}, - 'type': 'do_while'}, - 'primitive_output_component_true': { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'input_data': {'job_input_type': 'literal', - 'value': '${{parent.jobs.do_while_body_func.outputs.output_1}}'}, - 'parambool': {'job_input_type': 'literal', - 'value': '${{parent.jobs.do_while_body_func.outputs.bool_param_output}}'}, - 'paramfloat': {'job_input_type': 'literal', - 'value': '${{parent.jobs.do_while_body_func.outputs.float_param_output}}'}, - 'paramint': {'job_input_type': 'literal', - 'value': '${{parent.jobs.do_while_body_func.outputs.int_param_output}}'}, - 'paramstr': {'job_input_type': 'literal', - 'value': '${{parent.jobs.do_while_body_func.outputs.str_param_output}}'}}, - 'name': 'primitive_output_component_true', - 'type': 'command'} + "conditionnode": { + "_source": "DSL", + "condition": "${{parent.jobs.do_while_body_func.outputs.condition}}", + "true_block": "${{parent.jobs.primitive_output_component_true}}", + "type": "if_else", + }, + "do_while_body_func": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": { + "bool_param": {"job_input_type": "literal", "value": "${{parent.inputs.bool_param}}"}, + "float_param": {"job_input_type": "literal", "value": "${{parent.inputs.float_param}}"}, + "input_1": {"job_input_type": "literal", "value": "${{parent.inputs.input_data}}"}, + "input_2": {"job_input_type": "literal", "value": "${{parent.inputs.input_data}}"}, + "int_param": {"job_input_type": "literal", "value": "${{parent.inputs.int_param}}"}, + "str_param": {"job_input_type": "literal", "value": "${{parent.inputs.str_param}}"}, + }, + "name": "do_while_body_func", + "type": "command", + }, + "dowhile": { + "_source": "DSL", + "body": "${{parent.jobs.do_while_body_func}}", + "condition": "condition", + "limits": {"max_iteration_count": 3}, + "mapping": { + "bool_param_output": ["bool_param"], + "float_param_output": ["float_param"], + "int_param_output": ["int_param"], + "output_1": ["input_1"], + "output_2": ["input_2"], + "str_param_output": ["str_param"], + }, + "type": "do_while", + }, + "primitive_output_component_true": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": { + "input_data": { + "job_input_type": "literal", + "value": "${{parent.jobs.do_while_body_func.outputs.output_1}}", + }, + "parambool": { + "job_input_type": "literal", + "value": "${{parent.jobs.do_while_body_func.outputs.bool_param_output}}", + }, + "paramfloat": { + "job_input_type": "literal", + "value": "${{parent.jobs.do_while_body_func.outputs.float_param_output}}", + }, + "paramint": { + "job_input_type": "literal", + "value": "${{parent.jobs.do_while_body_func.outputs.int_param_output}}", + }, + "paramstr": { + "job_input_type": "literal", + "value": "${{parent.jobs.do_while_body_func.outputs.str_param_output}}", + }, + }, + "name": "primitive_output_component_true", + "type": "command", + }, } @@ -308,9 +331,7 @@ def test_pipeline(input_data, int_param, bool_param, float_param, str_param): # ) class TestParallelForPipeline(TestControlFlowPipeline): def test_simple_dsl_parallel_for_pipeline(self, client: MLClient): - hello_world_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) + hello_world_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") @pipeline def parallel_for_pipeline(): @@ -320,7 +341,7 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 1}, {"component_in_number": 2}, - ] + ], ) after_node = hello_world_component( component_in_path=parallel_node.outputs.component_out_path, @@ -335,31 +356,39 @@ def parallel_for_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'after_node': { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'computeId': 'cpu-cluster', - 'inputs': {'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.jobs.parallel_node.outputs.component_out_path}}'}}, - 'name': 'after_node', - 'type': 'command'}, - 'parallel_body': { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'component_in_path': {'job_input_type': 'uri_file', - 'uri': 'https://dprepdata.blob.core.windows.net/demo/Titanic.csv'}}, - 'name': 'parallel_body', - 'type': 'command'}, - 'parallel_node': { - '_source': 'DSL', - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, ' - '{"component_in_number": 2}]', - 'type': 'parallel_for'} + "after_node": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "computeId": "cpu-cluster", + "inputs": { + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.parallel_node.outputs.component_out_path}}", + } + }, + "name": "after_node", + "type": "command", + }, + "parallel_body": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": { + "component_in_path": { + "job_input_type": "uri_file", + "uri": "https://dprepdata.blob.core.windows.net/demo/Titanic.csv", + } + }, + "name": "parallel_body", + "type": "command", + }, + "parallel_node": { + "_source": "DSL", + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, ' '{"component_in_number": 2}]', + "type": "parallel_for", + }, } def test_dsl_parallel_for_pipeline_unprovided_input(self, client: MLClient): - hello_world_component = load_component( - source="./tests/test_configs/components/helloworld_component_alt1.yml" - ) + hello_world_component = load_component(source="./tests/test_configs/components/helloworld_component_alt1.yml") @pipeline def parallel_for_pipeline(): @@ -369,11 +398,10 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 1}, {"component_in_number": 2}, - ] + ], ) after_node = hello_world_component( - component_in_path=parallel_node.outputs.component_out_path, - component_in_number=1 + component_in_path=parallel_node.outputs.component_out_path, component_in_number=1 ) after_node.compute = "cpu-cluster" @@ -384,40 +412,45 @@ def parallel_for_pipeline(): pipeline_job = assert_job_cancel(pipeline_job, client) dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'after_node': { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'computeId': 'cpu-cluster', - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.jobs.parallel_node.outputs.component_out_path}}'}}, - 'name': 'after_node', - 'type': 'command'}, - 'parallel_body': { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'component_in_path': {'job_input_type': 'uri_file', - 'uri': 'https://dprepdata.blob.core.windows.net/demo/Titanic.csv'}}, - 'name': 'parallel_body', - 'type': 'command'}, - 'parallel_node': { - '_source': 'DSL', - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, ' - '{"component_in_number": 2}]', - 'type': 'parallel_for'} + "after_node": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "computeId": "cpu-cluster", + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.parallel_node.outputs.component_out_path}}", + }, + }, + "name": "after_node", + "type": "command", + }, + "parallel_body": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": { + "component_in_path": { + "job_input_type": "uri_file", + "uri": "https://dprepdata.blob.core.windows.net/demo/Titanic.csv", + } + }, + "name": "parallel_body", + "type": "command", + }, + "parallel_node": { + "_source": "DSL", + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, ' '{"component_in_number": 2}]', + "type": "parallel_for", + }, } def test_parallel_for_pipeline_with_subgraph(self, client: MLClient): - hello_world_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) + hello_world_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") @pipeline def sub_graph(component_in_number: int = 10): node = hello_world_component(component_in_path=test_input, component_in_number=component_in_number) - return { - "component_out_path": node.outputs.component_out_path - } + return {"component_out_path": node.outputs.component_out_path} @pipeline def parallel_for_pipeline(): @@ -427,7 +460,7 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 1}, {"component_in_number": 2}, - ] + ], ) after_node = hello_world_component( component_in_path=parallel_node.outputs.component_out_path, @@ -442,35 +475,34 @@ def parallel_for_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'after_node': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'computeId': 'cpu-cluster', - 'inputs': {'component_in_path': { - 'job_input_type': 'literal', - 'value': '${{parent.jobs.parallel_node.outputs.component_out_path}}'}}, - 'name': 'after_node', - 'type': 'command'}, - 'parallel_body': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'name': 'parallel_body', - 'type': 'pipeline'}, - 'parallel_node': { - '_source': 'DSL', - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, ' - '{"component_in_number": 2}]', - 'type': 'parallel_for'} + "after_node": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "computeId": "cpu-cluster", + "inputs": { + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.parallel_node.outputs.component_out_path}}", + } + }, + "name": "after_node", + "type": "command", + }, + "parallel_body": {"_source": "REMOTE.WORKSPACE.COMPONENT", "name": "parallel_body", "type": "pipeline"}, + "parallel_node": { + "_source": "DSL", + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, ' '{"component_in_number": 2}]', + "type": "parallel_for", + }, } def test_parallel_for_pipeline_subgraph_unprovided_input(self, client: MLClient): - hello_world_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) + hello_world_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") @pipeline def sub_graph(component_in_number: int): node = hello_world_component(component_in_path=test_input, component_in_number=component_in_number) - return { - "component_out_path": node.outputs.component_out_path - } + return {"component_out_path": node.outputs.component_out_path} @pipeline def parallel_for_pipeline(): @@ -480,7 +512,7 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 1}, {"component_in_number": 2}, - ] + ], ) after_node = hello_world_component( component_in_path=parallel_node.outputs.component_out_path, @@ -495,34 +527,39 @@ def parallel_for_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'after_node': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'computeId': 'cpu-cluster', - 'inputs': {'component_in_path': { - 'job_input_type': 'literal', - 'value': '${{parent.jobs.parallel_node.outputs.component_out_path}}'}}, - 'name': 'after_node', - 'type': 'command'}, - 'parallel_body': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'name': 'parallel_body', - 'type': 'pipeline'}, - 'parallel_node': { - '_source': 'DSL', - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, ' - '{"component_in_number": 2}]', - 'type': 'parallel_for'} + "after_node": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "computeId": "cpu-cluster", + "inputs": { + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.parallel_node.outputs.component_out_path}}", + } + }, + "name": "after_node", + "type": "command", + }, + "parallel_body": {"_source": "REMOTE.WORKSPACE.COMPONENT", "name": "parallel_body", "type": "pipeline"}, + "parallel_node": { + "_source": "DSL", + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, ' '{"component_in_number": 2}]', + "type": "parallel_for", + }, } def test_parallel_for_pipeline_with_port_outputs(self, client: MLClient): hello_world_component = load_component( source="./tests/test_configs/components/helloworld_component.yml", params_override=[ - {"outputs": { - "component_out_path": {"type": "uri_folder"}, - "component_out_file": {"type": "uri_file"}, - "component_out_table": {"type": "mltable"}, - }} - ] + { + "outputs": { + "component_out_path": {"type": "uri_folder"}, + "component_out_file": {"type": "uri_file"}, + "component_out_table": {"type": "mltable"}, + } + } + ], ) @pipeline @@ -533,7 +570,7 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 3}, {"component_in_number": 4}, - ] + ], ) return { "component_out_path": parallel_node.outputs.component_out_path, @@ -549,33 +586,33 @@ def parallel_for_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'parallel_body': {'_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'component_in_path': { - 'job_input_type': 'uri_file', - 'uri': 'https://dprepdata.blob.core.windows.net/demo/Titanic.csv'}}, - 'name': 'parallel_body', - 'type': 'command'}, - 'parallel_node': { - '_source': 'DSL', - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 3}, ' - '{"component_in_number": 4}]', - 'type': 'parallel_for', - 'outputs': {'component_out_file': {'type': 'literal', - 'value': '${{parent.outputs.component_out_file}}'}, - 'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}, - 'component_out_table': {'type': 'literal', - 'value': '${{parent.outputs.component_out_table}}'}}, - } + "parallel_body": { + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": { + "component_in_path": { + "job_input_type": "uri_file", + "uri": "https://dprepdata.blob.core.windows.net/demo/Titanic.csv", + } + }, + "name": "parallel_body", + "type": "command", + }, + "parallel_node": { + "_source": "DSL", + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 3}, ' '{"component_in_number": 4}]', + "type": "parallel_for", + "outputs": { + "component_out_file": {"type": "literal", "value": "${{parent.outputs.component_out_file}}"}, + "component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"}, + "component_out_table": {"type": "literal", "value": "${{parent.outputs.component_out_table}}"}, + }, + }, } assert dsl_pipeline_job_dict["properties"]["outputs"] == { - 'component_out_file': {'job_output_type': 'mltable', - 'mode': 'ReadWriteMount'}, - 'component_out_path': {'job_output_type': 'mltable', - 'mode': 'ReadWriteMount'}, - 'component_out_table': {'job_output_type': 'mltable', - 'mode': 'ReadWriteMount'} + "component_out_file": {"job_output_type": "mltable", "mode": "ReadWriteMount"}, + "component_out_path": {"job_output_type": "mltable", "mode": "ReadWriteMount"}, + "component_out_table": {"job_output_type": "mltable", "mode": "ReadWriteMount"}, } # parallel for pipeline component is correctly generated @@ -588,21 +625,20 @@ def parent_pipeline(): rest_pipeline_component = pipeline_job.jobs["parallel_for_pipeline"].component._to_rest_object().as_dict() assert rest_pipeline_component["properties"]["component_spec"]["outputs"] == { - 'component_out_file': {'type': 'mltable'}, - 'component_out_path': {'type': 'mltable'}, - 'component_out_table': {'type': 'mltable'} + "component_out_file": {"type": "mltable"}, + "component_out_path": {"type": "mltable"}, + "component_out_table": {"type": "mltable"}, } assert rest_pipeline_component["properties"]["component_spec"]["jobs"]["parallel_node"] == { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 3}, {"component_in_number": 4}]', - 'outputs': {'component_out_file': {'type': 'literal', - 'value': '${{parent.outputs.component_out_file}}'}, - 'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}, - 'component_out_table': {'type': 'literal', - 'value': '${{parent.outputs.component_out_table}}'}}, - 'type': 'parallel_for', - '_source': 'DSL', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 3}, {"component_in_number": 4}]', + "outputs": { + "component_out_file": {"type": "literal", "value": "${{parent.outputs.component_out_file}}"}, + "component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"}, + "component_out_table": {"type": "literal", "value": "${{parent.outputs.component_out_table}}"}, + }, + "type": "parallel_for", + "_source": "DSL", } with include_private_preview_nodes_in_pipeline(): @@ -612,12 +648,14 @@ def test_parallel_for_pipeline_with_primitive_outputs(self, client: MLClient): hello_world_component = load_component( source="./tests/test_configs/components/helloworld_component.yml", params_override=[ - {"outputs": { - "component_out_path": {"type": "uri_folder"}, - "component_out_number": {"type": "number"}, - "component_out_boolean": {"type": "boolean", "is_control": True}, - }} - ] + { + "outputs": { + "component_out_path": {"type": "uri_folder"}, + "component_out_number": {"type": "number"}, + "component_out_boolean": {"type": "boolean", "is_control": True}, + } + } + ], ) @pipeline @@ -628,7 +666,7 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 1}, {"component_in_number": 2}, - ] + ], ) return { "component_out_path": parallel_node.outputs.component_out_path, @@ -645,22 +683,21 @@ def parent_pipeline(): rest_pipeline_component = pipeline_job.jobs["parallel_for_pipeline"].component._to_rest_object().as_dict() assert rest_pipeline_component["properties"]["component_spec"]["outputs"] == { - 'component_out_boolean': {'is_control': True, 'type': 'string'}, - 'component_out_number': {'type': 'string'}, - 'component_out_path': {'type': 'mltable'} + "component_out_boolean": {"is_control": True, "type": "string"}, + "component_out_number": {"type": "string"}, + "component_out_path": {"type": "mltable"}, } assert rest_pipeline_component["properties"]["component_spec"]["jobs"]["parallel_node"] == { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, {"component_in_number": 2}]', - 'outputs': {'component_out_boolean': {'type': 'literal', - 'value': '${{parent.outputs.component_out_boolean}}'}, - 'component_out_number': {'type': 'literal', - 'value': '${{parent.outputs.component_out_number}}'}, - 'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}}, - 'type': 'parallel_for', - '_source': 'DSL', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, {"component_in_number": 2}]', + "outputs": { + "component_out_boolean": {"type": "literal", "value": "${{parent.outputs.component_out_boolean}}"}, + "component_out_number": {"type": "literal", "value": "${{parent.outputs.component_out_number}}"}, + "component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"}, + }, + "type": "parallel_for", + "_source": "DSL", } # parallel for pipeline component is correctly generated @@ -678,10 +715,7 @@ def parallel_for_pipeline(): foreach_config = {} for i in range(10): foreach_config[f"silo_{i}"] = {} - parallel_node = parallel_for( - body=parallel_body, - items=foreach_config - ) + parallel_node = parallel_for(body=parallel_body, items=foreach_config) return { "component_out_path": parallel_node.outputs.component_out_path, } @@ -694,25 +728,23 @@ def parallel_for_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"] == { - 'parallel_body': {'_source': 'YAML.COMPONENT', - 'name': 'parallel_body', - 'type': 'command'}, - 'parallel_node': { - '_source': 'DSL', - 'body': '${{parent.jobs.parallel_body}}', - 'items': '{"silo_0": {}, "silo_1": {}, "silo_2": {}, ' - '"silo_3": {}, "silo_4": {}, "silo_5": {}, ' - '"silo_6": {}, "silo_7": {}, "silo_8": {}, ' - '"silo_9": {}}', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}}, - 'type': 'parallel_for'} + "parallel_body": {"_source": "YAML.COMPONENT", "name": "parallel_body", "type": "command"}, + "parallel_node": { + "_source": "DSL", + "body": "${{parent.jobs.parallel_body}}", + "items": '{"silo_0": {}, "silo_1": {}, "silo_2": {}, ' + '"silo_3": {}, "silo_4": {}, "silo_5": {}, ' + '"silo_6": {}, "silo_7": {}, "silo_8": {}, ' + '"silo_9": {}}', + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"} + }, + "type": "parallel_for", + }, } def test_parallel_for_pipeline_with_asset_items(self, client: MLClient): - hello_world_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) + hello_world_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") @pipeline def parallel_for_pipeline(): @@ -722,7 +754,7 @@ def parallel_for_pipeline(): items=[ {"component_in_number": 1, "component_in_path": test_input}, {"component_in_number": 2, "component_in_path": test_input}, - ] + ], ) after_node = hello_world_component( component_in_path=parallel_node.outputs.component_out_path, @@ -736,15 +768,15 @@ def parallel_for_pipeline(): dsl_pipeline_job_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict(), *omit_fields) assert dsl_pipeline_job_dict["properties"]["jobs"]["parallel_node"] == { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_path": {"uri": ' - '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' - '"job_input_type": "uri_file"}, ' - '"component_in_number": 1}, {"component_in_path": ' - '{"uri": ' - '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' - '"job_input_type": "uri_file"}, ' - '"component_in_number": 2}]', - 'type': 'parallel_for', - '_source': 'DSL', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_path": {"uri": ' + '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' + '"job_input_type": "uri_file"}, ' + '"component_in_number": 1}, {"component_in_path": ' + '{"uri": ' + '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' + '"job_input_type": "uri_file"}, ' + '"component_in_number": 2}]', + "type": "parallel_for", + "_source": "DSL", } diff --git a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py index 3ec51f967987..600ec37c79fb 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py @@ -645,8 +645,7 @@ def sample_pipeline(job_in_file, sample_rate): }, "name": "node1", "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output}}"}}, - 'resources': {'instance_type': 'standard_e4s_v3', - 'runtime_version': '3.1.0'}, + "resources": {"instance_type": "standard_e4s_v3", "runtime_version": "3.1.0"}, "type": "spark", } }, @@ -991,7 +990,7 @@ def pipeline_missing_type( in e.value.message ) - @dsl.pipeline(non_pipeline_inputs=['param']) + @dsl.pipeline(non_pipeline_inputs=["param"]) def pipeline_with_non_pipeline_inputs( required_input: Input, required_param: str, @@ -1004,14 +1003,13 @@ def pipeline_with_non_pipeline_inputs( with pytest.raises(ValidationException) as e: client.components.create_or_update(pipeline_with_non_pipeline_inputs) - assert "Cannot register pipeline component 'pipeline_with_non_pipeline_inputs' with non_pipeline_inputs." in e.value.message + assert ( + "Cannot register pipeline component 'pipeline_with_non_pipeline_inputs' with non_pipeline_inputs." + in e.value.message + ) @dsl.pipeline() - def pipeline_with_variable_inputs( - required_input: Input, - required_param: str, - *args, **kwargs - ): + def pipeline_with_variable_inputs(required_input: Input, required_param: str, *args, **kwargs): default_optional_func( required_input=required_input, required_param=required_param, @@ -1019,7 +1017,10 @@ def pipeline_with_variable_inputs( with pytest.raises(ValidationException) as e: client.components.create_or_update(pipeline_with_variable_inputs) - assert "Cannot register the component pipeline_with_variable_inputs with variable inputs ['args', 'kwargs']" in e.value.message + assert ( + "Cannot register the component pipeline_with_variable_inputs with variable inputs ['args', 'kwargs']" + in e.value.message + ) def test_create_pipeline_component_by_dsl(self, caplog, client: MLClient): default_optional_func = load_component(source=str(components_dir / "default_optional_component.yml")) @@ -1049,9 +1050,7 @@ def valid_pipeline_func( # Assert binding on compute not changed after resolve dependencies client.components._resolve_dependencies_for_pipeline_component_jobs( - component, - resolver=client.components._orchestrators.get_asset_arm_id, - resolve_inputs=False + component, resolver=client.components._orchestrators.get_asset_arm_id, resolve_inputs=False ) assert component.jobs["node2"].compute == "${{parent.inputs.node_compute}}" @@ -1487,7 +1486,7 @@ def pipeline_distribution_components(job_in_number, job_in_path): # continue_on_step_failure can't be set in create_or_update assert created_job.settings.continue_on_step_failure is False assert created_job.jobs["hello_world_component_mpi"].component.startswith(ANONYMOUS_COMPONENT_NAME) - assert created_job.jobs["helloworld_component"].component == 'microsoftsamples_command_component_basic:0.0.1' + assert created_job.jobs["helloworld_component"].component == "microsoftsamples_command_component_basic:0.0.1" assert hello_world_func._is_anonymous is False assert origin_id == hello_world_func.id @@ -1818,7 +1817,8 @@ def parallel_in_pipeline(job_data_path): assert expected_job == actual_job def test_multi_parallel_components_with_file_input_pipeline_output( - self, client: MLClient, randstr: Callable[[str], str]) -> None: + self, client: MLClient, randstr: Callable[[str], str] + ) -> None: components_dir = tests_root_dir / "test_configs/dsl_pipeline/parallel_component_with_file_input" batch_inference1 = load_component(source=str(components_dir / "score.yml")) batch_inference2 = load_component(source=str(components_dir / "score.yml")) @@ -2318,27 +2318,36 @@ def pipeline_with_group(group: ParamClass): component_in_boolean=group.bool_param, component_in_ranged_integer=group.sub.int_param, ) + component = client.components.create_or_update(pipeline_with_group) # Assert key not exists - match = "(.*)unexpected keyword argument 'group.not_exist'(.*)valid keywords: " \ - "'group', 'group.sub.int_param', 'group.str_param', 'group.bool_param', 'group.number_param'" + match = ( + "(.*)unexpected keyword argument 'group.not_exist'(.*)valid keywords: " + "'group', 'group.sub.int_param', 'group.str_param', 'group.bool_param', 'group.number_param'" + ) with pytest.raises(UnexpectedKeywordError, match=match): - component(**{ - "group.number_param": 4.0, "group.str_param": "testing", - "group.sub.int_param": 4, "group.not_exist": 4, - }) + component( + **{ + "group.number_param": 4.0, + "group.str_param": "testing", + "group.sub.int_param": 4, + "group.not_exist": 4, + } + ) # Assert conflict assignment with pytest.raises(Exception, match="Conflict parameter key 'group' and 'group.number_param'"): - pipeline = component(**{ - "group.number_param": 4.0, "group.str_param": "testing", - "group.sub.int_param": 4, "group": ParamClass(sub=SubParamClass(int_param=1)) - }) + pipeline = component( + **{ + "group.number_param": 4.0, + "group.str_param": "testing", + "group.sub.int_param": 4, + "group": ParamClass(sub=SubParamClass(int_param=1)), + } + ) pipeline.settings.default_compute = "cpu-cluster" client.jobs.create_or_update(pipeline) # Assert happy path - inputs = { - "group.number_param": 4.0, "group.str_param": "testing", "group.sub.int_param": 4 - } + inputs = {"group.number_param": 4.0, "group.str_param": "testing", "group.sub.int_param": 4} pipeline = component(**inputs) pipeline.settings.default_compute = "cpu-cluster" rest_pipeline_job = client.jobs.create_or_update(pipeline) @@ -2372,50 +2381,47 @@ def test_pipeline_node_identity_with_component(self, client: MLClient): @dsl.pipeline def pipeline_func(component_in_path): - node1 = component_func( - component_in_number=1, component_in_path=component_in_path - ) + node1 = component_func(component_in_number=1, component_in_path=component_in_path) node1.identity = AmlTokenConfiguration() - node2 = component_func( - component_in_number=1, component_in_path=component_in_path - ) + node2 = component_func(component_in_number=1, component_in_path=component_in_path) node2.identity = UserIdentityConfiguration() - node3 = component_func( - component_in_number=1, component_in_path=component_in_path - ) + node3 = component_func(component_in_number=1, component_in_path=component_in_path) node3.identity = ManagedIdentityConfiguration() pipeline = pipeline_func(component_in_path=job_input) pipeline_job = client.jobs.create_or_update(pipeline, compute="cpu-cluster") - omit_fields = [ - "jobs.*.componentId", - "jobs.*._source" - ] + omit_fields = ["jobs.*.componentId", "jobs.*._source"] actual_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict()["properties"], *omit_fields) assert actual_dict["jobs"] == { - 'node1': {'identity': {'type': 'aml_token'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}'}}, - 'name': 'node1', - 'type': 'command'}, - 'node2': {'identity': {'type': 'user_identity'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}'}}, - 'name': 'node2', - 'type': 'command'}, - 'node3': {'identity': {'type': 'managed_identity'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}'}}, - 'name': 'node3', - 'type': 'command'} + "node1": { + "identity": {"type": "aml_token"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, + "name": "node1", + "type": "command", + }, + "node2": { + "identity": {"type": "user_identity"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, + "name": "node2", + "type": "command", + }, + "node3": { + "identity": {"type": "managed_identity"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, + "name": "node3", + "type": "command", + }, } def test_default_pipeline_job_services(self, client: MLClient, randstr: Callable[[str], str]) -> None: @@ -2464,8 +2470,8 @@ def my_pipeline() -> Outputs: pipeline_job = my_pipeline() # overwrite group outputs mode will appear in pipeline job&component level - expected_outputs = {'output1': {'description': 'new description', 'type': 'uri_folder'}} - expected_job_outputs = {'output1': {'description': 'new description', 'job_output_type': 'uri_folder'}} + expected_outputs = {"output1": {"description": "new description", "type": "uri_folder"}} + expected_job_outputs = {"output1": {"description": "new description", "job_output_type": "uri_folder"}} rest_job_dict = pipeline_job._to_rest_object().as_dict() # assert pipeline job level mode overwrite @@ -2498,8 +2504,8 @@ def my_pipeline() -> Outputs: pipeline_job = my_pipeline() # overwrite group outputs mode will appear in pipeline job&component level - expected_job_outputs = {'output1': {'mode': 'Upload', 'job_output_type': 'uri_folder'}} - expected_outputs = {'output1': {'mode': 'upload', 'type': 'uri_folder'}} + expected_job_outputs = {"output1": {"mode": "Upload", "job_output_type": "uri_folder"}} + expected_outputs = {"output1": {"mode": "upload", "type": "uri_folder"}} rest_job_dict = pipeline_job._to_rest_object().as_dict() # assert pipeline job level mode overwrite assert rest_job_dict["properties"]["outputs"] == expected_job_outputs @@ -2528,28 +2534,44 @@ def my_pipeline(): assert_job_cancel(pipeline_job, client) def test_register_output_sdk(self, client: MLClient): - from azure.ai.ml.sweep import BanditPolicy, Choice, LogNormal, LogUniform, Normal, QLogNormal, QLogUniform, QNormal, QUniform, Randint, Uniform + from azure.ai.ml.sweep import ( + BanditPolicy, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, + ) component = load_component(source="./tests/test_configs/components/helloworld_component.yml") - component_input = Input(type='uri_file', path='https://dprepdata.blob.core.windows.net/demo/Titanic.csv') + component_input = Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv") spark_component = load_component(source="./tests/test_configs/spark_component/component.yml") - parallel_component = load_component(source="./tests/test_configs/components/parallel_component_with_file_input.yml") + parallel_component = load_component( + source="./tests/test_configs/components/parallel_component_with_file_input.yml" + ) sweep_component = load_component(source="./tests/test_configs/components/helloworld_component_for_sweep.yml") @dsl.pipeline() def register_node_output(): node = component(component_in_path=component_input) - node.outputs.component_out_path.name = 'a_output' - node.outputs.component_out_path.version = '1' + node.outputs.component_out_path.name = "a_output" + node.outputs.component_out_path.version = "1" spark_node = spark_component(file_input=component_input) spark_node.compute = "cpu-cluster" - spark_node.outputs.output.name = 'spark_output' - spark_node.outputs.output.version = '1' + spark_node.outputs.output.name = "spark_output" + spark_node.outputs.output.version = "1" - parallel_node = parallel_component(job_data_path=Input(type='mltable', path='https://dprepdata.blob.core.windows.net/demo/Titanic.csv')) + parallel_node = parallel_component( + job_data_path=Input(type="mltable", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv") + ) parallel_node.outputs.job_output_path.name = "parallel_output" - parallel_node.outputs.job_output_path.version = '123_parallel' + parallel_node.outputs.job_output_path.version = "123_parallel" cmd_node1 = sweep_component( batch_size=Choice([25, 35]), @@ -2576,62 +2598,58 @@ def register_node_output(): sweep_node.compute = "cpu-cluster" sweep_node.set_limits(max_total_trials=2, max_concurrent_trials=3, timeout=600) sweep_node.early_termination = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=1) - sweep_node.outputs.trained_model_dir.name = 'sweep_output' - sweep_node.outputs.trained_model_dir.version = 'sweep_2' + sweep_node.outputs.trained_model_dir.name = "sweep_output" + sweep_node.outputs.trained_model_dir.version = "sweep_2" pipeline = register_node_output() pipeline.settings.default_compute = "azureml:cpu-cluster" pipeline_job = assert_job_cancel(pipeline, client) - output = pipeline_job.jobs['node'].outputs.component_out_path - assert output.name == 'a_output' - assert output.version == '1' - output = pipeline_job.jobs['spark_node'].outputs.output - assert output.name == 'spark_output' - assert output.version == '1' - output = pipeline_job.jobs['parallel_node'].outputs.job_output_path - assert output.name == 'parallel_output' - assert output.version == '123_parallel' - output = pipeline_job.jobs['sweep_node'].outputs.trained_model_dir - assert output.name == 'sweep_output' - assert output.version == 'sweep_2' + output = pipeline_job.jobs["node"].outputs.component_out_path + assert output.name == "a_output" + assert output.version == "1" + output = pipeline_job.jobs["spark_node"].outputs.output + assert output.name == "spark_output" + assert output.version == "1" + output = pipeline_job.jobs["parallel_node"].outputs.job_output_path + assert output.name == "parallel_output" + assert output.version == "123_parallel" + output = pipeline_job.jobs["sweep_node"].outputs.trained_model_dir + assert output.name == "sweep_output" + assert output.version == "sweep_2" @dsl.pipeline() def register_pipeline_output(): node = component(component_in_path=component_input) - return { - 'pipeine_a_output': node.outputs.component_out_path - } + return {"pipeine_a_output": node.outputs.component_out_path} pipeline = register_pipeline_output() - pipeline.outputs.pipeine_a_output.name = 'a_output' - pipeline.outputs.pipeine_a_output.version = '1' + pipeline.outputs.pipeine_a_output.name = "a_output" + pipeline.outputs.pipeine_a_output.version = "1" pipeline.settings.default_compute = "azureml:cpu-cluster" pipeline_job = assert_job_cancel(pipeline, client) output = pipeline_job.outputs.pipeine_a_output - assert output.name == 'a_output' - assert output.version == '1' + assert output.name == "a_output" + assert output.version == "1" @dsl.pipeline() def register_both_output(): node = component(component_in_path=component_input) - node.outputs.component_out_path.name = 'a_output' - node.outputs.component_out_path.version = '1' - return { - 'pipeine_a_output': node.outputs.component_out_path - } + node.outputs.component_out_path.name = "a_output" + node.outputs.component_out_path.version = "1" + return {"pipeine_a_output": node.outputs.component_out_path} pipeline = register_both_output() - pipeline.outputs.pipeine_a_output.name = 'b_output' - pipeline.outputs.pipeine_a_output.version = '2' + pipeline.outputs.pipeine_a_output.name = "b_output" + pipeline.outputs.pipeine_a_output.version = "2" pipeline.settings.default_compute = "azureml:cpu-cluster" pipeline_job = assert_job_cancel(pipeline, client) pipeline_output = pipeline_job.outputs.pipeine_a_output - assert pipeline_output.name == 'b_output' - assert pipeline_output.version == '2' - node_output = pipeline_job.jobs['node'].outputs.component_out_path - assert node_output.name == 'a_output' - assert node_output.version == '1' + assert pipeline_output.name == "b_output" + assert pipeline_output.version == "2" + node_output = pipeline_job.jobs["node"].outputs.component_out_path + assert node_output.name == "a_output" + assert node_output.version == "1" @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") def test_dsl_pipeline_with_data_transfer_copy_2urifolder(self, client: MLClient) -> None: @@ -2646,27 +2664,29 @@ def test_dsl_pipeline_with_data_transfer_copy_2urifolder(self, client: MLClient) actual_job = omit_with_wildcard(pipeline_job._to_rest_object().properties.as_dict(), *common_omit_fields) expected_job = { - 'description': 'submit a pipeline with data transfer copy job', - 'inputs': {'cosmos_folder': {'job_input_type': 'uri_folder', - 'mode': 'ReadOnlyMount'}, - 'cosmos_folder_dup': {'job_input_type': 'uri_folder', - 'mode': 'ReadOnlyMount'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'merge_files': {'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder_dup}}'}}, - 'name': 'merge_files', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer'}}, - 'outputs': {'merged_blob': {'job_output_type': 'uri_folder', - 'mode': 'ReadWriteMount'}}, - 'settings': {'default_compute': 'adftest'}, - 'tags': {} + "description": "submit a pipeline with data transfer copy job", + "inputs": { + "cosmos_folder": {"job_input_type": "uri_folder", "mode": "ReadOnlyMount"}, + "cosmos_folder_dup": {"job_input_type": "uri_folder", "mode": "ReadOnlyMount"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "merge_files": { + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder_dup}}"}, + }, + "name": "merge_files", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", + } + }, + "outputs": {"merged_blob": {"job_output_type": "uri_folder", "mode": "ReadWriteMount"}}, + "settings": {"default_compute": "adftest"}, + "tags": {}, } assert expected_job == actual_job @@ -2686,14 +2706,15 @@ def pipeline(): pipeline_job = assert_job_cancel(pipeline_job, client) job_dict = pipeline_job._to_dict() expected_node_output_dict = { - 'component_out_path': '${{parent.outputs.component_out_path}}', + "component_out_path": "${{parent.outputs.component_out_path}}", } expected_pipeline_output_dict = { - 'component_out_path': { + "component_out_path": { # default mode added by mt, default type added by SDK - 'mode': 'rw_mount', 'type': 'uri_folder', + "mode": "rw_mount", + "type": "uri_folder", # node level config will be copied to pipeline level - 'path': 'azureml://datastores/workspaceblobstore/paths/outputs/1', + "path": "azureml://datastores/workspaceblobstore/paths/outputs/1", } } assert job_dict["jobs"]["node1"]["outputs"] == expected_node_output_dict @@ -2720,13 +2741,13 @@ def outer_pipeline(): pipeline_job = outer_pipeline() pipeline_job_dict = pipeline_job._to_dict() assert pipeline_job_dict["outputs"] == { - 'component_out_path': {'path': 'azureml://datastores/workspaceblobstore/paths/outputs/1'} + "component_out_path": {"path": "azureml://datastores/workspaceblobstore/paths/outputs/1"} } pipeline_component = pipeline_job.jobs["node1"].component pipeline_component_dict = pipeline_component._to_dict() - assert pipeline_component_dict["outputs"] == {'component_out_path': {'type': 'uri_folder'}} + assert pipeline_component_dict["outputs"] == {"component_out_path": {"type": "uri_folder"}} assert pipeline_component_dict["jobs"]["node1"]["outputs"] == { - 'component_out_path': '${{parent.outputs.component_out_path}}' + "component_out_path": "${{parent.outputs.component_out_path}}" } pipeline_job.settings.default_compute = "cpu-cluster" @@ -2734,55 +2755,50 @@ def outer_pipeline(): client.jobs.begin_cancel(pipeline_job.name) job_dict = pipeline_job._to_dict() # outer pipeline's node1 should have the output setting - assert job_dict["jobs"]["node1"]["outputs"] == { - 'component_out_path': '${{parent.outputs.component_out_path}}' - } + assert job_dict["jobs"]["node1"]["outputs"] == {"component_out_path": "${{parent.outputs.component_out_path}}"} assert job_dict["outputs"] == { - 'component_out_path': { - 'mode': 'rw_mount', 'type': 'uri_folder', + "component_out_path": { + "mode": "rw_mount", + "type": "uri_folder", # node level config will be copied to pipeline level - 'path': 'azureml://datastores/workspaceblobstore/paths/outputs/1', + "path": "azureml://datastores/workspaceblobstore/paths/outputs/1", } } @pytest.mark.disable_mock_code_hash def test_register_output_sdk_succeed(self, client: MLClient): component = load_component(source="./tests/test_configs/components/helloworld_component.yml") - component_input = Input(type='uri_file', path='https://dprepdata.blob.core.windows.net/demo/Titanic.csv') + component_input = Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv") @dsl.pipeline() def sub_pipeline(): node = component(component_in_path=component_input) - node.outputs.component_out_path.name = 'sub_pipeline_output' - node.outputs.component_out_path.version = 'v1' - return { - 'sub_pipeine_a_output': node.outputs.component_out_path - } + node.outputs.component_out_path.name = "sub_pipeline_output" + node.outputs.component_out_path.version = "v1" + return {"sub_pipeine_a_output": node.outputs.component_out_path} @dsl.pipeline() def register_both_output(): # register NodeOutput which is binding to PipelineOutput node = component(component_in_path=component_input) - node.outputs.component_out_path.name = 'n1_output' - node.outputs.component_out_path.version = 'v1' + node.outputs.component_out_path.name = "n1_output" + node.outputs.component_out_path.version = "v1" # register NodeOutput which isn't binding to PipelineOutput node_2 = component(component_in_path=component_input) - node_2.outputs.component_out_path.name = 'n2_output' - node_2.outputs.component_out_path.version = 'v1' + node_2.outputs.component_out_path.name = "n2_output" + node_2.outputs.component_out_path.version = "v1" # register NodeOutput of subgraph sub_node = sub_pipeline() - sub_node.outputs.sub_pipeine_a_output.name = 'sub_pipeline' - sub_node.outputs.sub_pipeine_a_output.version = 'v1' + sub_node.outputs.sub_pipeine_a_output.name = "sub_pipeline" + sub_node.outputs.sub_pipeine_a_output.version = "v1" - return { - 'pipeine_a_output': node.outputs.component_out_path - } + return {"pipeine_a_output": node.outputs.component_out_path} pipeline = register_both_output() - pipeline.outputs.pipeine_a_output.name = 'p1_output' - pipeline.outputs.pipeine_a_output.version = 'v1' + pipeline.outputs.pipeine_a_output.name = "p1_output" + pipeline.outputs.pipeine_a_output.version = "v1" pipeline.settings.default_compute = "cpu-cluster" pipeline_job = client.jobs.create_or_update(pipeline) client.jobs.stream(pipeline_job.name) @@ -2792,6 +2808,10 @@ def check_name_version_and_register_succeed(output, output_name, output_version) assert output.version == output_version assert client.data.get(name=output_name, version=output_version) - check_name_version_and_register_succeed(pipeline_job.outputs.pipeine_a_output, 'p1_output', 'v1') - check_name_version_and_register_succeed(pipeline_job.jobs['node_2'].outputs.component_out_path, 'n2_output', 'v1') - check_name_version_and_register_succeed(pipeline_job.jobs['sub_node'].outputs.sub_pipeine_a_output, 'sub_pipeline', 'v1') + check_name_version_and_register_succeed(pipeline_job.outputs.pipeine_a_output, "p1_output", "v1") + check_name_version_and_register_succeed( + pipeline_job.jobs["node_2"].outputs.component_out_path, "n2_output", "v1" + ) + check_name_version_and_register_succeed( + pipeline_job.jobs["sub_node"].outputs.sub_pipeine_a_output, "sub_pipeline", "v1" + ) diff --git a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_on_registry.py b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_on_registry.py index 4f97e2616578..e4947a8a97aa 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_on_registry.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_on_registry.py @@ -30,7 +30,7 @@ def score_pipeline(model_input, test_data): class TestDSLPipelineOnRegistry(AzureRecordedTestCase): test_data = Input( type=AssetTypes.URI_FILE, - path="./tests/test_configs/pipeline_jobs/job_with_registry_model_as_input/data/sample1.csv" + path="./tests/test_configs/pipeline_jobs/job_with_registry_model_as_input/data/sample1.csv", ) def test_pipeline_job_create_with_registered_component_on_registry(self, pipelines_registry_client: MLClient): @@ -86,9 +86,7 @@ def test_pipeline_with_registry_component_and_model_as_input( ): # load_component score_component_name, component_version = "score_component", "2" - score_func = pipelines_registry_client.components.get( - name=score_component_name, version=component_version - ) + score_func = pipelines_registry_client.components.get(name=score_component_name, version=component_version) pipeline_score_model = Input( type="mlflow_model", path="azureml://registries/sdk-test/models/iris_model/versions/1" diff --git a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_samples.py b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_samples.py index 5d4f7322c1fe..69488ead6425 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_samples.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_samples.py @@ -332,21 +332,28 @@ def test_spark_job_with_multiple_node_in_pipeline(self, client: MLClient) -> Non assert_job_cancel(pipeline, client) @pytest.mark.e2etest - def test_spark_job_with_builder_in_pipeline_without_entry(self, client: MLClient,) -> None: + def test_spark_job_with_builder_in_pipeline_without_entry( + self, + client: MLClient, + ) -> None: from test_configs.dsl_pipeline.spark_job_in_pipeline.invalid_pipeline import ( generate_dsl_pipeline_from_builder_without_entry as spark_job_in_pipeline, ) + pipeline = spark_job_in_pipeline() with pytest.raises(Exception) as ex: created_job = client.jobs.create_or_update(pipeline) - assert '{\n "result": "Failed",\n "errors": [\n {\n "message": "Missing data for required field.",' \ - '\n "path": "jobs.add_greeting_column.component.entry",\n "value": null\n }\n ]\n}' == str(ex.value) - + assert ( + '{\n "result": "Failed",\n "errors": [\n {\n "message": "Missing data for required field.",' + '\n "path": "jobs.add_greeting_column.component.entry",\n "value": null\n }\n ]\n}' + == str(ex.value) + ) + validation_result = client.jobs.validate(pipeline) assert validation_result.passed is False assert validation_result.error_messages == { - "jobs.add_greeting_column.component.entry": 'Missing data for required field.', + "jobs.add_greeting_column.component.entry": "Missing data for required field.", } @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") @@ -365,6 +372,7 @@ def test_data_transfer_copy_2urifolder_job_with_builder_in_pipeline(self, client from test_configs.dsl_pipeline.data_transfer_job_in_pipeline.copy_data.pipeline import ( generate_dsl_pipeline_from_builder as data_transfer_job_in_pipeline, ) + pipeline = data_transfer_job_in_pipeline() assert_job_cancel(pipeline, client) diff --git a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_with_specific_nodes.py b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_with_specific_nodes.py index 35f82e4d9a8e..bd96388f43e1 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_with_specific_nodes.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline_with_specific_nodes.py @@ -10,8 +10,12 @@ from pytest_mock import MockFixture from azure.ai.ml.operations._operation_orchestrator import OperationOrchestrator -from test_utilities.utils import _PYTEST_TIMEOUT_METHOD, assert_job_cancel, submit_and_cancel_new_dsl_pipeline, \ - omit_with_wildcard +from test_utilities.utils import ( + _PYTEST_TIMEOUT_METHOD, + assert_job_cancel, + submit_and_cancel_new_dsl_pipeline, + omit_with_wildcard, +) from azure.ai.ml import ( Input, @@ -79,15 +83,20 @@ def pipeline_leaf(component_in_path: Input): component_func1 = load_component(source=path) component_func1(component_in_path=component_in_path, component_in_number=1) - component_func2 = load_component(source=path, params_override=[{ - "name": "another_component_name", - "version": "another_component_version", - }]) + component_func2 = load_component( + source=path, + params_override=[ + { + "name": "another_component_name", + "version": "another_component_version", + } + ], + ) component_func2(component_in_path=component_in_path, component_in_number=1) - component_func3 = load_component(source=path, params_override=[{ - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:2" - }]) + component_func3 = load_component( + source=path, params_override=[{"environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:2"}] + ) component_func3(component_in_path=component_in_path, component_in_number=1) component_func4 = load_component(source=path) @@ -103,6 +112,7 @@ def pipeline_mid(job_in_path: Input): def pipeline_root(job_in_path: Input): pipeline_mid(job_in_path) pipeline_mid(job_in_path) + return pipeline_root @staticmethod @@ -221,9 +231,7 @@ def test_dsl_pipeline_component_cache_in_resolver(self, client: MLClient) -> Non pipeline_root = self._generate_multi_layer_pipeline_func() _submit_and_cancel = partial( - submit_and_cancel_new_dsl_pipeline, - client=client, - job_in_path=Input(path=input_data_path) + submit_and_cancel_new_dsl_pipeline, client=client, job_in_path=Input(path=input_data_path) ) def _mock_get_component_arm_id(_component: Component) -> str: @@ -236,18 +244,14 @@ def _mock_get_component_arm_id(_component: Component) -> str: return _component.id with mock.patch.object( - OperationOrchestrator, - "_get_component_arm_id", - side_effect=_mock_get_component_arm_id + OperationOrchestrator, "_get_component_arm_id", side_effect=_mock_get_component_arm_id ) as mock_resolve: _submit_and_cancel(pipeline_root) # pipeline_leaf, pipeline_mid and 3 command components will be resolved assert mock_resolve.call_count == 5 with mock.patch.object( - OperationOrchestrator, - "_get_component_arm_id", - side_effect=_mock_get_component_arm_id + OperationOrchestrator, "_get_component_arm_id", side_effect=_mock_get_component_arm_id ) as mock_resolve: _submit_and_cancel(pipeline_root) # no more requests to resolve components as local cache is hit @@ -258,9 +262,7 @@ def _mock_get_component_arm_id(_component: Component) -> str: leaf_subgraph = pipeline_job.jobs["pipeline_mid"].component.jobs["pipeline_leaf"].component leaf_subgraph.jobs["another_component_name"].component.command += " & echo updated2" with mock.patch.object( - OperationOrchestrator, - "_get_component_arm_id", - side_effect=_mock_get_component_arm_id + OperationOrchestrator, "_get_component_arm_id", side_effect=_mock_get_component_arm_id ) as mock_resolve: assert_job_cancel(pipeline_job, client) # updated command component and its parents (pipeline_leaf and pipeline_mid) will be resolved @@ -293,11 +295,11 @@ def test_dsl_pipeline_concurrent_component_registration(self, client: MLClient, # TODO: test with multiple pipelines after server-side return jobs for pipeline component for _ in range(0, 0): assert omit_with_wildcard(base._to_dict(), *omit_fields) == omit_with_wildcard( - treat._to_dict(), *omit_fields) + treat._to_dict(), *omit_fields + ) base = _get_component_in_first_child(base, client) treat = _get_component_in_first_child(treat, client) # The last layer contains the command components omit_fields.pop() - assert omit_with_wildcard(base._to_dict(), *omit_fields) == omit_with_wildcard( - treat._to_dict(), *omit_fields) + assert omit_with_wildcard(base._to_dict(), *omit_fields) == omit_with_wildcard(treat._to_dict(), *omit_fields) diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_command_builder.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_command_builder.py index d784c531ed83..972e580c8cee 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_command_builder.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_command_builder.py @@ -20,7 +20,13 @@ from azure.ai.ml.dsl import pipeline from azure.ai.ml.entities import CommandJobLimits, JobResourceConfiguration from azure.ai.ml.entities._builders import Command -from azure.ai.ml.entities._job.job_service import JobService, JupyterLabJobService, SshJobService, TensorBoardJobService, VsCodeJobService +from azure.ai.ml.entities._job.job_service import ( + JobService, + JupyterLabJobService, + SshJobService, + TensorBoardJobService, + VsCodeJobService, +) from azure.ai.ml.entities._job.pipeline._component_translatable import ComponentTranslatableMixin from azure.ai.ml.exceptions import JobException, ValidationException @@ -744,9 +750,9 @@ def test_executor_instances_is_mandatory_when_dynamic_allocation_disabled(self): ) node._to_rest_object() assert ( - ve.message - == "spark.driver.cores, spark.driver.memory, spark.executor.cores, spark.executor.memory and " - "spark.executor.instances are mandatory fields." + ve.message + == "spark.driver.cores, spark.driver.memory, spark.executor.cores, spark.executor.memory and " + "spark.executor.instances are mandatory fields." ) def test_executor_instances_is_specified_as_min_executor_if_unset(self): @@ -921,9 +927,7 @@ def test_command_services(self) -> None: def test_command_services_subtypes(self) -> None: services = { "my_ssh": SshJobService(), - "my_tensorboard": TensorBoardJobService( - log_dir="~/tblog" - ), + "my_tensorboard": TensorBoardJobService(log_dir="~/tblog"), "my_jupyterlab": JupyterLabJobService(), "my_vscode": VsCodeJobService(), } @@ -936,7 +940,7 @@ def test_command_services_subtypes(self) -> None: }, }, "my_jupyterlab": {"job_service_type": "JupyterLab"}, - "my_vscode": { "job_service_type": "VSCode"}, + "my_vscode": {"job_service_type": "VSCode"}, } node = command( name="interactive-command-job", @@ -953,7 +957,6 @@ def test_command_services_subtypes(self) -> None: assert isinstance(node_services.get("my_jupyterlab"), JupyterLabJobService) assert isinstance(node_services.get("my_vscode"), VsCodeJobService) - command_job_services = node._to_job().services assert isinstance(command_job_services.get("my_ssh"), SshJobService) assert isinstance(command_job_services.get("my_tensorboard"), TensorBoardJobService) @@ -1023,15 +1026,17 @@ def my_pipeline(): def test_set_identity(self, test_command): from azure.ai.ml.entities._credentials import AmlTokenConfiguration + node1 = test_command() node2 = node1() node2.identity = AmlTokenConfiguration() node3 = node1() - node3.identity = {'type': 'AMLToken'} + node3.identity = {"type": "AMLToken"} assert node2.identity == node3.identity def test_sweep_set_search_space(self, test_command): from azure.ai.ml.entities._job.sweep.search_space import Choice + node1 = test_command() command_node_to_sweep_1 = node1() sweep_node_1 = command_node_to_sweep_1.sweep( @@ -1039,7 +1044,7 @@ def test_sweep_set_search_space(self, test_command): goal="maximize", sampling_algorithm="random", ) - sweep_node_1.search_space = {'batch_size': {'type': 'choice', 'values': [25, 35]}} + sweep_node_1.search_space = {"batch_size": {"type": "choice", "values": [25, 35]}} command_node_to_sweep_2 = node1() sweep_node_2 = command_node_to_sweep_2.sweep( @@ -1047,13 +1052,15 @@ def test_sweep_set_search_space(self, test_command): goal="maximize", sampling_algorithm="random", ) - sweep_node_2.search_space = {'batch_size': Choice(values=[25, 35])} + sweep_node_2.search_space = {"batch_size": Choice(values=[25, 35])} assert sweep_node_1.search_space == sweep_node_2.search_space def test_unsupported_positional_args(self, test_command): with pytest.raises(ValidationException) as e: test_command(1) - msg = "Component function doesn't support positional arguments, got (1,) " \ - "for my_job. Please use keyword arguments like: " \ - "component_func(float=xxx, integer=xxx, string=xxx, boolean=xxx, uri_folder=xxx, uri_file=xxx)." + msg = ( + "Component function doesn't support positional arguments, got (1,) " + "for my_job. Please use keyword arguments like: " + "component_func(float=xxx, integer=xxx, string=xxx, boolean=xxx, uri_folder=xxx, uri_file=xxx)." + ) assert msg in str(e.value) diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_component_func.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_component_func.py index d6e8c9f90e12..277888516ec0 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_component_func.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_component_func.py @@ -69,9 +69,7 @@ def test_generate_component_function(self) -> None: # hint user when component func don't take any parameters. with pytest.raises(ValidationException) as error_info: new_func(10) - assert ( - "Component function doesn't has any parameters" - ) in str(error_info.value) + assert ("Component function doesn't has any parameters") in str(error_info.value) def test_required_component_inputs_missing(self): component_func = load_component(source="./tests/test_configs/components/helloworld_component.yml") @@ -183,9 +181,8 @@ def test_component_outputs(self): # non-existent output with pytest.raises( - UnexpectedAttributeError, - match="Got an unexpected attribute 'component_out_path_non', " - "valid attributes: 'component_out_path'." + UnexpectedAttributeError, + match="Got an unexpected attribute 'component_out_path_non', " "valid attributes: 'component_out_path'.", ): component.outputs["component_out_path_non"].path = test_output_path @@ -196,7 +193,9 @@ def test_component_outputs(self): assert component._build_outputs() == {"component_out_path": output_data} # set output via output binding - component.outputs.component_out_path._data = PipelineOutput(port_name="pipeline_output", owner="pipeline", meta=None) + component.outputs.component_out_path._data = PipelineOutput( + port_name="pipeline_output", owner="pipeline", meta=None + ) assert component._build_outputs() == { "component_out_path": Output(path="${{parent.outputs.pipeline_output}}", type="uri_folder", mode=None) } diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_controlflow_pipeline.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_controlflow_pipeline.py index a624b8a464f2..8fa267fdc4da 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_controlflow_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_controlflow_pipeline.py @@ -12,7 +12,7 @@ @pytest.mark.usefixtures( "enable_pipeline_private_preview_features", "enable_private_preview_schema_features", - "enable_private_preview_pipeline_node_types" + "enable_private_preview_pipeline_node_types", ) @pytest.mark.timeout(_DSL_TIMEOUT_SECOND) @pytest.mark.unittest @@ -29,7 +29,8 @@ class TestParallelForPipelineUT(TestControlFlowPipelineUT): def test_dsl_parallel_for_pipeline_illegal_cases(self): # body unsupported parallel_component = load_component( - source="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/score.yml") + source="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/score.yml" + ) basic_component = load_component( source="./tests/test_configs/components/component_with_conditional_output/spec.yaml" @@ -40,19 +41,15 @@ def invalid_pipeline(test_path1, test_path2): body = parallel_component() parallel_for( body=body, - items=[ - {"job_data_path": test_path1}, - {"job_data_path": test_path2} - ], + items=[{"job_data_path": test_path1}, {"job_data_path": test_path2}], ) with pytest.raises(ValidationException) as e: - invalid_pipeline( - test_path1=Input(path="test_path1"), - test_path2=Input(path="test_path2") - ) - assert "Expecting (, " \ - ") for body" in str(e.value) + invalid_pipeline(test_path1=Input(path="test_path1"), test_path2=Input(path="test_path2")) + assert ( + "Expecting (, " + ") for body" in str(e.value) + ) # items with invalid type @@ -73,64 +70,58 @@ def invalid_pipeline(): "items, error_message", [ ( - # items with invalid content type - {"a": 1}, - "but got for 1.", + # items with invalid content type + {"a": 1}, + "but got for 1.", ), ( - # items with empty dict as content - [], - "Items is an empty list/dict" - + # items with empty dict as content + [], + "Items is an empty list/dict", ), ( - # item meta not match - [ - {"component_in_path": "test_path1"}, - {"component_in_path": "test_path2", "component_in_number": 1} - ], - "Items should have same keys with body inputs, but got " + # item meta not match + [{"component_in_path": "test_path1"}, {"component_in_path": "test_path2", "component_in_number": 1}], + "Items should have same keys with body inputs, but got ", ), ( - # item inputs not exist in body - [ - {"job_data_path": "test_path1"}, - ], - "Item {\'job_data_path\': \'test_path1\'} got unmatched inputs " + # item inputs not exist in body + [ + {"job_data_path": "test_path1"}, + ], + "Item {'job_data_path': 'test_path1'} got unmatched inputs ", ), ( - # invalid JSON string items - '[{"component_in_number": 1}, {}]', - "Items should have same keys with body inputs" + # invalid JSON string items + '[{"component_in_number": 1}, {}]', + "Items should have same keys with body inputs", ), ( - # unsupported item value type - [ - {"component_in_number": CustomizedObject()}, - ], - "Unsupported type" + # unsupported item value type + [ + {"component_in_number": CustomizedObject()}, + ], + "Unsupported type", ), ( - # local file input - [{"component_in_path": Input(path="./tests/test_configs/components/helloworld_component.yml")}], - "Local file input" + # local file input + [{"component_in_path": Input(path="./tests/test_configs/components/helloworld_component.yml")}], + "Local file input", ), ( - # empty path - [{"component_in_path": Input(path=None)}], - "Input path not provided" + # empty path + [{"component_in_path": Input(path=None)}], + "Input path not provided", ), ( - # dict Input - [{"component_in_path": {"job_input_path": "azureml://path/to/file"}}], - "Unsupported type" + # dict Input + [{"component_in_path": {"job_input_path": "azureml://path/to/file"}}], + "Unsupported type", ), ], ) def test_dsl_parallel_for_pipeline_illegal_items_content(self, items, error_message): - basic_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) + basic_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") @pipeline def invalid_pipeline(): @@ -153,9 +144,7 @@ def invalid_pipeline(): ), ) def test_dsl_parallel_for_pipeline_legal_items_content(self, items): - basic_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) + basic_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") @pipeline def valid_pipeline(): @@ -170,12 +159,8 @@ def valid_pipeline(): def test_dsl_parallel_for_pipeline_items(self): # TODO: submit those pipelines - basic_component = load_component( - source="./tests/test_configs/components/helloworld_component.yml" - ) - complex_component = load_component( - source="./tests/test_configs/components/input_types_component.yml" - ) + basic_component = load_component(source="./tests/test_configs/components/helloworld_component.yml") + complex_component = load_component(source="./tests/test_configs/components/input_types_component.yml") # binding in items @@ -184,32 +169,22 @@ def my_pipeline(test_path1, test_path2): body = basic_component() parallel_for( body=body, - items=[ - {"component_in_path": test_path1}, - {"component_in_path": test_path2} - ], + items=[{"component_in_path": test_path1}, {"component_in_path": test_path2}], ) - my_job = my_pipeline( - test_path1=Input(path="test_path1"), - test_path2=Input(path="test_path2") - ) + my_job = my_pipeline(test_path1=Input(path="test_path1"), test_path2=Input(path="test_path2")) rest_job = my_job._to_rest_object().as_dict() rest_items = rest_job["properties"]["jobs"]["parallelfor"]["items"] - assert rest_items == '[{"component_in_path": "${{parent.inputs.test_path1}}"}, ' \ - '{"component_in_path": "${{parent.inputs.test_path2}}"}]' + assert ( + rest_items == '[{"component_in_path": "${{parent.inputs.test_path1}}"}, ' + '{"component_in_path": "${{parent.inputs.test_path2}}"}]' + ) # dict items @pipeline def my_pipeline(): body = basic_component(component_in_path=Input(path="test_path1")) - parallel_for( - body=body, - items={ - "iter1": {"component_in_number": 1}, - "iter2": {"component_in_number": 2} - } - ) + parallel_for(body=body, items={"iter1": {"component_in_number": 1}, "iter2": {"component_in_number": 2}}) my_job = my_pipeline() rest_job = my_job._to_rest_object().as_dict() @@ -220,10 +195,7 @@ def my_pipeline(): @pipeline def my_pipeline(pipeline_input: str): body = basic_component(component_in_path=Input(path="test_path1")) - parallel_for( - body=body, - items=pipeline_input - ) + parallel_for(body=body, items=pipeline_input) my_job = my_pipeline(pipeline_input='[{"component_in_number": 1}, {"component_in_number": 2}]') rest_job = my_job._to_rest_object().as_dict() @@ -234,10 +206,7 @@ def my_pipeline(pipeline_input: str): @pipeline def my_pipeline(): body = basic_component(component_in_path=Input(path="test_path1")) - parallel_for( - body=body, - items='[{"component_in_number": 1}, {"component_in_number": 2}]' - ) + parallel_for(body=body, items='[{"component_in_number": 1}, {"component_in_number": 2}]') my_job = my_pipeline() rest_job = my_job._to_rest_object().as_dict() @@ -256,25 +225,25 @@ def my_pipeline(): component_in_ranged_integer=10, component_in_enum="world", component_in_boolean=True, - component_in_ranged_number=5.5), - ] + component_in_ranged_number=5.5, + ), + ], ) my_job = my_pipeline() rest_job = my_job._to_rest_object().as_dict() rest_items = rest_job["properties"]["jobs"]["parallelfor"]["items"] - assert rest_items == '[{"component_in_string": "component_in_string", ' \ - '"component_in_ranged_integer": 10, "component_in_enum": "world", ' \ - '"component_in_boolean": true, "component_in_ranged_number": 5.5}]' + assert ( + rest_items == '[{"component_in_string": "component_in_string", ' + '"component_in_ranged_integer": 10, "component_in_enum": "world", ' + '"component_in_boolean": true, "component_in_ranged_number": 5.5}]' + ) # JSON string items @pipeline def my_pipeline(): body = basic_component(component_in_path=Input(path="test_path1")) - parallel_for( - body=body, - items='[{"component_in_number": 1}, {"component_in_number": 2}]' - ) + parallel_for(body=body, items='[{"component_in_number": 1}, {"component_in_number": 2}]') my_job = my_pipeline() rest_job = my_job._to_rest_object().as_dict() @@ -284,25 +253,23 @@ def my_pipeline(): @pytest.mark.parametrize( "output_dict, pipeline_out_dict, component_out_dict, check_pipeline_job", [ - ({"type": "uri_file"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "uri_folder"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "mltable"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "mlflow_model"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "triton_model"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "custom_model"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "path"}, {'job_output_type': 'mltable'}, {'type': 'mltable'}, True), - ({"type": "number"}, {}, {'type': 'string'}, False), - ({"type": "string", "is_control": True}, {}, {'type': 'string', "is_control": True}, False), - ({"type": "boolean", "is_control": True}, {}, {'type': 'string', "is_control": True}, False), - ({"type": "integer"}, {}, {'type': 'string'}, False), - ] + ({"type": "uri_file"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "uri_folder"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "mltable"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "mlflow_model"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "triton_model"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "custom_model"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "path"}, {"job_output_type": "mltable"}, {"type": "mltable"}, True), + ({"type": "number"}, {}, {"type": "string"}, False), + ({"type": "string", "is_control": True}, {}, {"type": "string", "is_control": True}, False), + ({"type": "boolean", "is_control": True}, {}, {"type": "string", "is_control": True}, False), + ({"type": "integer"}, {}, {"type": "string"}, False), + ], ) def test_parallel_for_outputs(self, output_dict, pipeline_out_dict, component_out_dict, check_pipeline_job): basic_component = load_component( source="./tests/test_configs/components/helloworld_component.yml", - params_override=[ - {"outputs.component_out_path": output_dict} - ] + params_override=[{"outputs.component_out_path": output_dict}], ) @pipeline @@ -310,26 +277,20 @@ def my_pipeline(): body = basic_component(component_in_path=Input(path="test_path1")) foreach_node = parallel_for( - body=body, - items={ - "iter1": {"component_in_number": 1}, - "iter2": {"component_in_number": 2} - } + body=body, items={"iter1": {"component_in_number": 1}, "iter2": {"component_in_number": 2}} ) - return { - "output": foreach_node.outputs.component_out_path - } + return {"output": foreach_node.outputs.component_out_path} my_job = my_pipeline() if check_pipeline_job: rest_job = my_job._to_rest_object().as_dict() rest_outputs = rest_job["properties"]["outputs"] - assert rest_outputs == {'output': pipeline_out_dict} + assert rest_outputs == {"output": pipeline_out_dict} pipeline_component = my_job.component rest_component = pipeline_component._to_rest_object().as_dict() - assert rest_component["properties"]["component_spec"]["outputs"] == {'output': component_out_dict} + assert rest_component["properties"]["component_spec"]["outputs"] == {"output": component_out_dict} def test_parallel_for_source(self): basic_component = load_component( @@ -341,11 +302,7 @@ def my_pipeline(): body = basic_component(component_in_path=Input(path="test_path1")) foreach_node = parallel_for( - body=body, - items={ - "iter1": {"component_in_number": 1}, - "iter2": {"component_in_number": 2} - } + body=body, items={"iter1": {"component_in_number": 1}, "iter2": {"component_in_number": 2}} ) my_job = my_pipeline() diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_group.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_group.py index 726b646308a9..18831f266f50 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_group.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_group.py @@ -16,10 +16,7 @@ from .._util import _DSL_TIMEOUT_SECOND -@pytest.mark.usefixtures( - "enable_pipeline_private_preview_features", - "enable_private_preview_schema_features" -) +@pytest.mark.usefixtures("enable_pipeline_private_preview_features", "enable_private_preview_schema_features") @pytest.mark.timeout(_DSL_TIMEOUT_SECOND) @pytest.mark.unittest @pytest.mark.pipeline_test @@ -80,9 +77,8 @@ class MixedGroup: sys.stdout = stdout_str_IO = StringIO() help(MixedGroup.__init__) assert ( - "__init__(self,*,int_param:int=None,str_param:str=None,enum_param:str=None," - "str_default_param:str='test',optional_int_param:int=5)->None" in stdout_str_IO.getvalue().replace(" ", - "") + "__init__(self,*,int_param:int=None,str_param:str=None,enum_param:str=None," + "str_default_param:str='test',optional_int_param:int=5)->None" in stdout_str_IO.getvalue().replace(" ", "") ) sys.stdout = original_out @@ -91,8 +87,8 @@ class MixedGroup: int_param=1, str_param="test-str", enum_param=EnumOps.Option1, str_default_param="op2", optional_int_param=4 ) assert ( - "MixedGroup(int_param=1,str_param='test-str',enum_param=," - "str_default_param='op2',optional_int_param=4)".replace(" ", "") in var.__repr__().replace(" ", "") + "MixedGroup(int_param=1,str_param='test-str',enum_param=," + "str_default_param='op2',optional_int_param=4)".replace(" ", "") in var.__repr__().replace(" ", "") ) # __set_attribute__ func test @@ -131,6 +127,7 @@ class SubGroup: param: int = 1 with pytest.raises(ValueError) as e: + @group class ItemGroup: group_param: SubGroup = "str" @@ -156,6 +153,7 @@ class Group: param: Input(type="uri_folder") with pytest.raises(UserErrorException) as e: + @pipeline def my_pipeline(param: Group): pass @@ -167,18 +165,22 @@ class Group: param: Output with pytest.raises(UserErrorException) as e: + @pipeline def my_pipeline(param: Group): pass + assert "Output annotation cannot be used in @pipeline." in str(e.value) class CustomizedObj: pass with pytest.raises(UserErrorException) as e: + @group class Group: param: CustomizedObj + assert "Unsupported annotation type" in str(e.value) def test_group_inherit(self): @@ -342,8 +344,8 @@ def str_param_pipeline(str_param, group: ParamClass): pipeline_job.inputs.str_param = ParamClass() # Note: this is the expected behavior. assert ( - pipeline_job._to_dict()["inputs"]["str_param"] - == "TestDSLGroup.test_assign_group_invalid..ParamClass(str_param='string_by_default')" + pipeline_job._to_dict()["inputs"]["str_param"] + == "TestDSLGroup.test_assign_group_invalid..ParamClass(str_param='string_by_default')" ) pipeline_job.inputs.str_param = "test" @@ -360,9 +362,11 @@ class PortOutputs: output2: Output(type="uri_folder") with pytest.raises(UserErrorException) as e: + @pipeline def my_pipeline(my_inputs: PortOutputs): pass + assert "Output annotation cannot be used in @pipeline." in str(e.value) @group @@ -373,9 +377,11 @@ class PrimitiveOutputs: output4: Output(type="boolean", is_control=True) with pytest.raises(UserErrorException) as e: + @pipeline def my_pipeline(my_inputs: PrimitiveOutputs): pass + assert "Output annotation cannot be used in @pipeline." in str(e.value) def test_group_port_inputs(self): @@ -407,14 +413,14 @@ class MixedGroup: sys.stdout = stdout_str_IO = StringIO() help(MixedGroup.__init__) assert ( - "__init__(self,*," - "int_param:int=None," - "str_default_param:str='test'," - "str_param:str=None," - "input_folder:{'type':'uri_folder'}=None," - "optional_int_param:int=5," - "output_folder:{'type':'uri_folder'}=None)" - "->None" in stdout_str_IO.getvalue().replace(" ", "") + "__init__(self,*," + "int_param:int=None," + "str_default_param:str='test'," + "str_param:str=None," + "input_folder:{'type':'uri_folder'}=None," + "optional_int_param:int=5," + "output_folder:{'type':'uri_folder'}=None)" + "->None" in stdout_str_IO.getvalue().replace(" ", "") ) sys.stdout = original_out @@ -423,13 +429,13 @@ class MixedGroup: int_param=1, str_param="test-str", input_folder=Input(path="input"), output_folder=Output(path="output") ) assert ( - "MixedGroup(" - "int_param=1," - "str_default_param='test'," - "str_param='test-str'," - "input_folder={'type':'uri_folder','path':'input'}," - "optional_int_param=5," - "output_folder={'type':'uri_folder','path':'output'})" in var.__repr__().replace(" ", "") + "MixedGroup(" + "int_param=1," + "str_default_param='test'," + "str_param='test-str'," + "input_folder={'type':'uri_folder','path':'input'}," + "optional_int_param=5," + "output_folder={'type':'uri_folder','path':'output'})" in var.__repr__().replace(" ", "") ) # __set_attribute__ func test @@ -439,17 +445,21 @@ class MixedGroup: def test_group_port_defaults(self): # input with pytest.raises(UserErrorException) as e: + @group class SubGroup: int_param0: Input int_param1: Input = Input(path="in1") + assert "Default value of Input 'int_param1' cannot be set" in str(e.value) with pytest.raises(UserErrorException) as e: + @group class SubGroup: out_param0: Output out_param1: Output = Output(path="out2") + assert "Default value of Output 'out_param1' cannot be set" in str(e.value) @pytest.mark.skip(reason="Input group item .result() is not supported currently.") @@ -478,14 +488,9 @@ def pipeline_with_group(group: ParamClass, int_param: int): component_in_path=group.str_param.result(), component_in_number=int_param.result() ) - return { - "output1": node1.outputs.component_out_path, - "output2": node2.outputs.component_out_path - } + return {"output1": node1.outputs.component_out_path, "output2": node2.outputs.component_out_path} - pipeline_job1 = pipeline_with_group( - group=ParamClass(str_param="str_1"), int_param=1 - ) + pipeline_job1 = pipeline_with_group(group=ParamClass(str_param="str_1"), int_param=1) common_omit_fields = [ "jobs.*.componentId", @@ -493,18 +498,18 @@ def pipeline_with_group(group: ParamClass, int_param: int): "jobs.*.properties", ] - rest_pipeline_job = omit_with_wildcard(pipeline_job1._to_rest_object().properties.as_dict(), - *common_omit_fields) + rest_pipeline_job = omit_with_wildcard( + pipeline_job1._to_rest_object().properties.as_dict(), *common_omit_fields + ) expected_pipeline_job1 = {} assert rest_pipeline_job == expected_pipeline_job1 - pipeline_job2 = pipeline_with_group( - group=ParamClass(str_param="str_2"), int_param=1 - ) + pipeline_job2 = pipeline_with_group(group=ParamClass(str_param="str_2"), int_param=1) - rest_pipeline_job = omit_with_wildcard(pipeline_job2._to_rest_object().properties.as_dict(), - *common_omit_fields) + rest_pipeline_job = omit_with_wildcard( + pipeline_job2._to_rest_object().properties.as_dict(), *common_omit_fields + ) expected_pipeline_job2 = {} assert rest_pipeline_job == expected_pipeline_job2 @@ -521,10 +526,7 @@ class PortOutputs: def my_pipeline_dict_return() -> PortOutputs: node1 = hello_world_component_func(component_in_number=1, component_in_path=Input(path="/a/path/on/ds")) node2 = hello_world_component_func(component_in_number=1, component_in_path=Input(path="/a/path/on/ds")) - return { - "output1": node1.outputs.component_out_path, - "output2": node2.outputs.component_out_path - } + return {"output1": node1.outputs.component_out_path, "output2": node2.outputs.component_out_path} pipeline_job1 = my_pipeline_dict_return() @@ -569,7 +571,7 @@ def pipeline_type_mismatch() -> SinglePortOutput: with pytest.raises(UserErrorException) as e: pipeline_type_mismatch() - assert "{\'type\': \'uri_folder\'} != annotation output {\'type\': \'mltable\'}" in str(e.value) + assert "{'type': 'uri_folder'} != annotation output {'type': 'mltable'}" in str(e.value) basic_component = load_component( source="./tests/test_configs/components/component_with_conditional_output/spec.yaml" @@ -595,7 +597,7 @@ def pipeline_is_control_mismatch() -> PrimitiveOutputs1: with pytest.raises(UserErrorException) as e: pipeline_is_control_mismatch() - assert "{\'type\': \'boolean\', 'is_control': True} != annotation output" in str(e.value) + assert "{'type': 'boolean', 'is_control': True} != annotation output" in str(e.value) def test_group_outputs_unsupported_annotation(self): @group @@ -611,18 +613,22 @@ class ParentOutputs: hello_world_component_func = load_component(source=hello_world_component_yaml) with pytest.raises(UserErrorException) as e: + @pipeline def my_pipeline() -> ParentOutputs: node1 = hello_world_component_func(component_in_number=1) return { "output1": node1.outputs.component_out_path, } + assert "Nested group annotation is not supported in pipeline output." in str(e.value) with pytest.raises(UserErrorException) as e: + @group class GroupOutputs: output1: PipelineInput + assert "Unsupported annotation type" in str(e.value) def test_input_in_output_group(self): @@ -641,5 +647,4 @@ def my_pipeline() -> Outputs: ) pipeline_job = my_pipeline() - assert pipeline_job._to_dict()["outputs"] == {'output1': {'type': 'uri_folder'}} - + assert pipeline_job._to_dict()["outputs"] == {"output1": {"type": "uri_folder"}} diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py index f58a94cbef38..f1b121acff90 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py @@ -54,10 +54,7 @@ components_dir = tests_root_dir / "test_configs/components/" -@pytest.mark.usefixtures( - "enable_pipeline_private_preview_features", - "enable_private_preview_schema_features" -) +@pytest.mark.usefixtures("enable_pipeline_private_preview_features", "enable_private_preview_schema_features") @pytest.mark.timeout(_DSL_TIMEOUT_SECOND) @pytest.mark.unittest @pytest.mark.pipeline_test @@ -230,18 +227,15 @@ def pipeline(number, path): "component_in_path": Input(path="${{parent.inputs.path}}", type="uri_folder", mode=None), } - @pytest.mark.parametrize( - "output_type", - ["uri_file", "mltable", "mlflow_model", "triton_model", "custom_model"] - ) + @pytest.mark.parametrize("output_type", ["uri_file", "mltable", "mlflow_model", "triton_model", "custom_model"]) def test_dsl_pipeline_output_type(self, output_type): yaml_file = "./tests/test_configs/components/helloworld_component.yml" @dsl.pipeline() def pipeline(number, path): - component_func = load_component(source=yaml_file, params_override=[ - {"outputs.component_out_path.type": output_type} - ]) + component_func = load_component( + source=yaml_file, params_override=[{"outputs.component_out_path.type": output_type}] + ) node1 = component_func(component_in_number=number, component_in_path=path) return {"pipeline_output": node1.outputs.component_out_path} @@ -254,21 +248,21 @@ def test_dsl_pipeline_complex_input_output(self) -> None: @dsl.pipeline() def pipeline( - job_in_data_name_version_def_mode, - job_in_data_name_version_mode_mount, - job_in_data_name_version_mode_download, - job_in_data_by_name, - job_in_data_by_armid, - job_in_data_by_store_path, - job_in_data_by_path_default_store, - job_in_data_by_store_path_and_mount, - job_in_data_by_store_path_and_download, - job_in_data_by_blob_dir, - job_in_data_by_blob_file, - job_in_data_local_dir, - job_in_data_local_file, - job_in_data_local_yaml_definition, - job_in_data_uri, + job_in_data_name_version_def_mode, + job_in_data_name_version_mode_mount, + job_in_data_name_version_mode_download, + job_in_data_by_name, + job_in_data_by_armid, + job_in_data_by_store_path, + job_in_data_by_path_default_store, + job_in_data_by_store_path_and_mount, + job_in_data_by_store_path_and_download, + job_in_data_by_blob_dir, + job_in_data_by_blob_file, + job_in_data_local_dir, + job_in_data_local_file, + job_in_data_local_yaml_definition, + job_in_data_uri, ): component_func = load_component(source=yaml_file) multiple_data_component = component_func( @@ -782,8 +776,8 @@ def pipeline(component_in_number, component_in_path): with pytest.raises(UserErrorException) as ex: pipeline(10, test_job_input) assert ( - "Pipeline input expected an azure.ai.ml.Input or primitive types (str, bool, int or float), but got type ." - in ex.__str__() + "Pipeline input expected an azure.ai.ml.Input or primitive types (str, bool, int or float), but got type ." + in ex.__str__() ) def test_dsl_pipeline_multi_times(self): @@ -818,8 +812,8 @@ def mock_add_to_builder(component): _add_component_to_current_definition_builder(component) with mock.patch( - "azure.ai.ml.dsl._pipeline_component_builder._add_component_to_current_definition_builder", - side_effect=mock_add_to_builder, + "azure.ai.ml.dsl._pipeline_component_builder._add_component_to_current_definition_builder", + side_effect=mock_add_to_builder, ) as mocker: # DSL yaml_file = "./tests/test_configs/components/helloworld_component.yml" @@ -923,12 +917,12 @@ def mock_from_rest(*args, **kwargs): component_names = set() with mock.patch( - "azure.ai.ml.operations._operation_orchestrator.OperationOrchestrator.get_asset_arm_id", - side_effect=mock_arm_id, + "azure.ai.ml.operations._operation_orchestrator.OperationOrchestrator.get_asset_arm_id", + side_effect=mock_arm_id, ): with mock.patch( - "azure.ai.ml._restclient.v2022_10_01.operations.ComponentVersionsOperations.create_or_update", - side_effect=mock_create, + "azure.ai.ml._restclient.v2022_10_01.operations.ComponentVersionsOperations.create_or_update", + side_effect=mock_create, ): with mock.patch.object(Component, "_from_rest_object", side_effect=mock_from_rest): for _, job in pipeline.jobs.items(): @@ -1099,56 +1093,56 @@ def pipeline(number, path): with patch("sys.stdout", new=StringIO()) as std_out: print(pipeline1) assert ( - "display_name: pipeline\ntype: pipeline\ninputs:\n number: 10\n path:\n type: uri_folder" - in std_out.getvalue() + "display_name: pipeline\ntype: pipeline\ninputs:\n number: 10\n path:\n type: uri_folder" + in std_out.getvalue() ) @pytest.mark.parametrize( "target_yml, target_dsl_pipeline", [ ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_basic.yml", - data_binding_expression.input_basic(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_basic.yml", + data_binding_expression.input_basic(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_literal_cross_type.yml", - data_binding_expression.input_literal_cross_type(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_literal_cross_type.yml", + data_binding_expression.input_literal_cross_type(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_literal_meta.yml", - data_binding_expression.input_literal_meta(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_literal_meta.yml", + data_binding_expression.input_literal_meta(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_path_concatenate.yml", - data_binding_expression.input_path_concatenate(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_path_concatenate.yml", + data_binding_expression.input_path_concatenate(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_reason_expression.yml", - data_binding_expression.input_reason_expression(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_reason_expression.yml", + data_binding_expression.input_reason_expression(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_string_concatenate.yml", - data_binding_expression.input_string_concatenate(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_string_concatenate.yml", + data_binding_expression.input_string_concatenate(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_compute.yml", - data_binding_expression.run_settings_compute(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_compute.yml", + data_binding_expression.run_settings_compute(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/input_path.yml", - data_binding_expression.input_path(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/input_path.yml", + data_binding_expression.input_path(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_choice.yml", - data_binding_expression.run_settings_sweep_choice(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_choice.yml", + data_binding_expression.run_settings_sweep_choice(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_limits.yml", - data_binding_expression.run_settings_sweep_limits(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_limits.yml", + data_binding_expression.run_settings_sweep_limits(), ), ( - "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_literal.yml", - data_binding_expression.run_settings_sweep_literal(), + "./tests/test_configs/dsl_pipeline/data_binding_expression/run_settings_sweep_literal.yml", + data_binding_expression.run_settings_sweep_literal(), ), ], ) @@ -1360,7 +1354,7 @@ def test_dsl_pipeline_with_only_setting_binding_node(self) -> None: }, } }, - "outputs": {"trained_model": {"job_output_type": "uri_folder", 'mode': 'Upload'}}, + "outputs": {"trained_model": {"job_output_type": "uri_folder", "mode": "Upload"}}, "settings": {}, } } @@ -1515,7 +1509,7 @@ def root_pipeline(component_in_number: int, component_in_path: str): "type": "pipeline", }, }, - 'outputs': {'sub_pipeline_out': {'type': 'uri_folder'}} + "outputs": {"sub_pipeline_out": {"type": "uri_folder"}}, } actual_dict = pipeline._to_dict() actual_dict = pydash.omit(actual_dict, *omit_fields) @@ -1704,9 +1698,9 @@ def concatenation_in_pipeline(str_param: str): pipeline = concatenation_in_pipeline(str_param="string value") for node_name, expected_value in ( - ("microsoft_samples_echo_string", "${{parent.inputs.str_param}} right"), - ("microsoft_samples_echo_string_1", "left ${{parent.inputs.str_param}}"), - ("microsoft_samples_echo_string_2", "${{parent.inputs.str_param}}${{parent.inputs.str_param}}"), + ("microsoft_samples_echo_string", "${{parent.inputs.str_param}} right"), + ("microsoft_samples_echo_string_1", "left ${{parent.inputs.str_param}}"), + ("microsoft_samples_echo_string_2", "${{parent.inputs.str_param}}${{parent.inputs.str_param}}"), ): assert pipeline.jobs[node_name].inputs.component_in_string._data == expected_value @@ -1787,7 +1781,7 @@ def root_pipeline(component_in_number: int, component_in_path: str): "type": "pipeline", }, }, - 'outputs': {'sub_pipeline_out': {'type': 'uri_folder'}} + "outputs": {"sub_pipeline_out": {"type": "uri_folder"}}, } actual_dict = pydash.omit(pipeline._to_dict(), *omit_fields) assert actual_dict == expected_root_dict @@ -1863,7 +1857,7 @@ def root_pipeline(component_in_number: int, component_in_path: str): "type": "pipeline", }, }, - 'outputs': {'sub_pipeline_out': {'type': 'uri_folder'}} + "outputs": {"sub_pipeline_out": {"type": "uri_folder"}}, } actual_dict = pydash.omit( pipeline._to_dict(), @@ -1935,50 +1929,47 @@ def test_pipeline_node_identity_with_component(self): @dsl.pipeline def pipeline_func(component_in_path): - node1 = component_func( - component_in_number=1, component_in_path=component_in_path - ) + node1 = component_func(component_in_number=1, component_in_path=component_in_path) node1.identity = AmlTokenConfiguration() - node2 = component_func( - component_in_number=1, component_in_path=component_in_path - ) + node2 = component_func(component_in_number=1, component_in_path=component_in_path) node2.identity = UserIdentityConfiguration() - node3 = component_func( - component_in_number=1, component_in_path=component_in_path - ) + node3 = component_func(component_in_number=1, component_in_path=component_in_path) node3.identity = ManagedIdentityConfiguration() pipeline = pipeline_func(component_in_path=Data(name="test", version="1", type=AssetTypes.MLTABLE)) - omit_fields = [ - "jobs.*.componentId", - "jobs.*._source" - ] + omit_fields = ["jobs.*.componentId", "jobs.*._source"] actual_dict = omit_with_wildcard(pipeline._to_rest_object().as_dict()["properties"], *omit_fields) assert actual_dict["jobs"] == { - 'node1': {'identity': {'type': 'aml_token'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}'}}, - 'name': 'node1', - 'type': 'command'}, - 'node2': {'identity': {'type': 'user_identity'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}'}}, - 'name': 'node2', - 'type': 'command'}, - 'node3': {'identity': {'type': 'managed_identity'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}'}}, - 'name': 'node3', - 'type': 'command'} + "node1": { + "identity": {"type": "aml_token"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, + "name": "node1", + "type": "command", + }, + "node2": { + "identity": {"type": "user_identity"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, + "name": "node2", + "type": "command", + }, + "node3": { + "identity": {"type": "managed_identity"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, + "name": "node3", + "type": "command", + }, } def test_pipeline_with_non_pipeline_inputs(self): @@ -1986,10 +1977,17 @@ def test_pipeline_with_non_pipeline_inputs(self): component_func1 = load_component(source=component_yaml, params_override=[{"name": "component_name_1"}]) component_func2 = load_component(source=component_yaml, params_override=[{"name": "component_name_2"}]) - @dsl.pipeline(non_pipeline_inputs=["other_params", "is_add_component", - "param_with_annotation", "param_with_default"]) - def pipeline_func(job_in_number, job_in_path, other_params, is_add_component, - param_with_annotation: Dict[str, str], param_with_default: int = 1): + @dsl.pipeline( + non_pipeline_inputs=["other_params", "is_add_component", "param_with_annotation", "param_with_default"] + ) + def pipeline_func( + job_in_number, + job_in_path, + other_params, + is_add_component, + param_with_annotation: Dict[str, str], + param_with_default: int = 1, + ): assert param_with_default == 1 assert param_with_annotation == {"mock": "dict"} component_func1(component_in_number=job_in_number, component_in_path=job_in_path) @@ -2017,7 +2015,6 @@ def pipeline_func(job_in_number, job_in_path, other_params, is_add_component): assert len(pipeline.jobs) == 3 def test_pipeline_with_invalid_non_pipeline_inputs(self): - @dsl.pipeline(non_pipeline_inputs=[123]) def pipeline_func(): pass @@ -2032,7 +2029,10 @@ def pipeline_func(): with pytest.raises(ParamValueNotExistsError) as error_info: pipeline_func() - assert "pipeline_func() got unexpected params in non_pipeline_inputs ['non_exist_param1', 'non_exist_param2']" in str(error_info) + assert ( + "pipeline_func() got unexpected params in non_pipeline_inputs ['non_exist_param1', 'non_exist_param2']" + in str(error_info) + ) def test_component_func_as_non_pipeline_inputs(self): component_yaml = components_dir / "helloworld_component.yml" @@ -2045,9 +2045,8 @@ def pipeline_func(job_in_number, job_in_path, component_func): component_func(component_in_number=job_in_number, component_in_path=job_in_path) pipeline = pipeline_func( - job_in_number=10, - job_in_path=Input(path="/a/path/on/ds"), - component_func=component_func2) + job_in_number=10, job_in_path=Input(path="/a/path/on/ds"), component_func=component_func2 + ) assert len(pipeline.jobs) == 2 assert component_func2.name in pipeline.jobs @@ -2058,8 +2057,9 @@ def test_pipeline_with_variable_inputs(self): @dsl.pipeline def pipeline_with_variable_args(**kwargs): - node_kwargs = component_func1(component_in_number=kwargs["component_in_number1"], - component_in_path=kwargs["component_in_path1"]) + node_kwargs = component_func1( + component_in_number=kwargs["component_in_number1"], component_in_path=kwargs["component_in_path1"] + ) @dsl.pipeline def root_pipeline(component_in_number: int, component_in_path: Input, **kwargs): @@ -2076,21 +2076,19 @@ def root_pipeline(component_in_number: int, component_in_path: Input, **kwargs): args_0: args_0 description """ node = component_func1(component_in_number=component_in_number, component_in_path=component_in_path) - node_kwargs = component_func1(component_in_number=kwargs["component_in_number1"], - component_in_path=kwargs["component_in_path1"]) + node_kwargs = component_func1( + component_in_number=kwargs["component_in_number1"], component_in_path=kwargs["component_in_path1"] + ) node_with_arg_kwarg = pipeline_with_variable_args(**kwargs) pipeline = root_pipeline(10, data, component_in_number1=12, component_in_path1=data) - assert pipeline.component.inputs['component_in_number'].description == "component_in_number description" - assert pipeline.component.inputs['component_in_path'].description == "component_in_path description" - assert pipeline.component.inputs['component_in_number1'].description == "component_in_number1 description" - assert pipeline.component.inputs['component_in_path1'].description == "component_in_path1 description" + assert pipeline.component.inputs["component_in_number"].description == "component_in_number description" + assert pipeline.component.inputs["component_in_path"].description == "component_in_path description" + assert pipeline.component.inputs["component_in_number1"].description == "component_in_number1 description" + assert pipeline.component.inputs["component_in_path1"].description == "component_in_path1 description" - omit_fields = [ - "jobs.*.componentId", - "jobs.*._source" - ] + omit_fields = ["jobs.*.componentId", "jobs.*._source"] actual_dict = omit_with_wildcard(pipeline._to_rest_object().as_dict()["properties"], *omit_fields) assert actual_dict["inputs"] == { @@ -2100,50 +2098,51 @@ def root_pipeline(component_in_number: int, component_in_path: Input, **kwargs): "component_in_path1": {"uri": "test:1", "job_input_type": "mltable"}, } assert actual_dict["jobs"] == { - 'node': { - 'name': 'node', 'type': 'command', 'inputs': { - 'component_in_number': { - 'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_number}}' + "node": { + "name": "node", + "type": "command", + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_number}}", }, - 'component_in_path': { - 'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path}}' - } - } + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.component_in_path}}"}, + }, }, - 'node_kwargs': { - 'name': 'node_kwargs', - 'type': 'command', - 'inputs': { - 'component_in_number': { - 'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_number1}}' + "node_kwargs": { + "name": "node_kwargs", + "type": "command", + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_number1}}", }, - 'component_in_path': { - 'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path1}}' - } - } + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_path1}}", + }, + }, }, - 'node_with_arg_kwarg': { - 'name': 'node_with_arg_kwarg', - 'type': 'pipeline', - 'inputs': { - 'component_in_number1': { - 'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_number1}}' + "node_with_arg_kwarg": { + "name": "node_with_arg_kwarg", + "type": "pipeline", + "inputs": { + "component_in_number1": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_number1}}", }, - 'component_in_path1': { - 'job_input_type': 'literal', - 'value': '${{parent.inputs.component_in_path1}}' - } - } - } + "component_in_path1": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_path1}}", + }, + }, + }, } - with pytest.raises(UnsupportedParameterKindError, - match="dsl pipeline does not accept \*custorm_args as parameters\."): + with pytest.raises( + UnsupportedParameterKindError, match="dsl pipeline does not accept \*custorm_args as parameters\." + ): + @dsl.pipeline def pipeline_with_variable_args(*custorm_args): pass @@ -2151,18 +2150,19 @@ def pipeline_with_variable_args(*custorm_args): pipeline_with_variable_args(1, 2, 3) with mock.patch("azure.ai.ml.dsl._pipeline_decorator.is_private_preview_enabled", return_value=False): - with pytest.raises(UnsupportedParameterKindError, - match="dsl pipeline does not accept \*args or \*\*kwargs as parameters\."): + with pytest.raises( + UnsupportedParameterKindError, match="dsl pipeline does not accept \*args or \*\*kwargs as parameters\." + ): root_pipeline(10, data, 11, data, component_in_number1=11, component_in_path1=data) def test_pipeline_with_dumplicate_variable_inputs(self): - @dsl.pipeline def pipeline_with_variable_args(key_1: int, **kargs): pass - with pytest.raises(MultipleValueError, - match="pipeline_with_variable_args\(\) got multiple values for argument 'key_1'\."): + with pytest.raises( + MultipleValueError, match="pipeline_with_variable_args\(\) got multiple values for argument 'key_1'\." + ): pipeline_with_variable_args(10, key_1=10) def test_pipeline_with_output_binding_in_dynamic_args(self): @@ -2199,7 +2199,7 @@ def pipeline_func_consume_expression(int_param: int): pipeline_job = pipeline_func_consume_expression(int_param=1) assert pipeline_job.jobs["control_node"]._to_rest_object() == { - '_source': 'DSL', + "_source": "DSL", "type": "if_else", "condition": "${{parent.jobs.expression_component.outputs.output}}", "true_block": "${{parent.jobs.node1}}", @@ -2326,9 +2326,7 @@ def spark_pipeline_from_yaml(iris_data): } def test_dsl_pipeline_with_data_transfer_copy_node(self) -> None: - merge_files = load_component( - "./tests/test_configs/components/data_transfer/copy_files.yaml" - ) + merge_files = load_component("./tests/test_configs/components/data_transfer/copy_files.yaml") @dsl.pipeline(description="submit a pipeline with data transfer copy job") def data_transfer_copy_pipeline_from_yaml(folder1): @@ -2353,38 +2351,42 @@ def data_transfer_copy_pipeline_from_yaml(folder1): data_transfer_copy_node_dict_from_rest = regenerated_data_transfer_copy_node._to_dict() omit_fields = [] - assert pydash.omit(data_transfer_copy_node_dict, *omit_fields) == pydash.omit(data_transfer_copy_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_copy_node_dict, *omit_fields) == pydash.omit( + data_transfer_copy_node_dict_from_rest, *omit_fields + ) omit_fields = [ "jobs.copy_files_node.componentId", ] actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) assert actual_job == { - 'description': 'submit a pipeline with data transfer copy job', - 'display_name': 'data_transfer_copy_pipeline_from_yaml', - 'inputs': {'folder1': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'copy_files_node': {'_source': 'YAML.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder1}}'}}, - 'name': 'copy_files_node', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.output}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer'}}, - 'outputs': {'output': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/my_blob/paths/merged_blob'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {} + "description": "submit a pipeline with data transfer copy job", + "display_name": "data_transfer_copy_pipeline_from_yaml", + "inputs": { + "folder1": {"job_input_type": "uri_folder", "uri": "azureml://datastores/my_cosmos/paths/source_cosmos"} + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "copy_files_node": { + "_source": "YAML.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": {"folder1": {"job_input_type": "literal", "value": "${{parent.inputs.folder1}}"}}, + "name": "copy_files_node", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.output}}"}}, + "task": "copy_data", + "type": "data_transfer", + } + }, + "outputs": { + "output": {"job_output_type": "uri_folder", "uri": "azureml://datastores/my_blob/paths/merged_blob"} + }, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, } def test_dsl_pipeline_with_data_transfer_merge_node(self) -> None: - merge_files = load_component( - "./tests/test_configs/components/data_transfer/merge_files.yaml" - ) + merge_files = load_component("./tests/test_configs/components/data_transfer/merge_files.yaml") @dsl.pipeline(description="submit a pipeline with data transfer copy job") def data_transfer_copy_pipeline_from_yaml(folder1, folder2): @@ -2413,36 +2415,48 @@ def data_transfer_copy_pipeline_from_yaml(folder1, folder2): data_transfer_copy_node_dict_from_rest = regenerated_data_transfer_copy_node._to_dict() omit_fields = [] - assert pydash.omit(data_transfer_copy_node_dict, *omit_fields) == pydash.omit(data_transfer_copy_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_copy_node_dict, *omit_fields) == pydash.omit( + data_transfer_copy_node_dict_from_rest, *omit_fields + ) omit_fields = [ "jobs.merge_files_node.componentId", ] actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) assert actual_job == { - 'description': 'submit a pipeline with data transfer copy job', - 'display_name': 'data_transfer_copy_pipeline_from_yaml', - 'inputs': {'folder1': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}, - 'folder2': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'merge_files_node': {'_source': 'YAML.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder1}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder2}}'}}, - 'name': 'merge_files_node', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.output}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer'}}, - 'outputs': {'output': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/my_blob/paths/merged_blob'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {} + "description": "submit a pipeline with data transfer copy job", + "display_name": "data_transfer_copy_pipeline_from_yaml", + "inputs": { + "folder1": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + "folder2": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "merge_files_node": { + "_source": "YAML.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.folder1}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.folder2}}"}, + }, + "name": "merge_files_node", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.output}}"}}, + "task": "copy_data", + "type": "data_transfer", + } + }, + "outputs": { + "output": {"job_output_type": "uri_folder", "uri": "azureml://datastores/my_blob/paths/merged_blob"} + }, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, } def test_node_sweep_with_optional_input(self) -> None: @@ -2488,8 +2502,7 @@ def pipeline_func(required_input: int, optional_input: int = 2): pipeline_job.settings.default_compute = "cpu-cluster" validate_result = pipeline_job._validate() assert validate_result.error_messages == { - 'inputs.required_input': "Required input 'required_input' for pipeline " - "'pipeline_func' not provided." + "inputs.required_input": "Required input 'required_input' for pipeline " "'pipeline_func' not provided." } validate_result = pipeline_job.component._validate() @@ -2497,8 +2510,8 @@ def pipeline_func(required_input: int, optional_input: int = 2): # pipeline component has required inputs assert pipeline_job.component._to_dict()["inputs"] == { - 'optional_input': {'default': '2', 'optional': True, 'type': 'integer'}, - 'required_input': {'type': 'integer'} + "optional_input": {"default": "2", "optional": True, "type": "integer"}, + "required_input": {"type": "integer"}, } # setting _validate_required_input_not_provided to False will skip the unprovided input check @@ -2532,8 +2545,10 @@ def pipeline_func(optional_input: Input(optional=True, type="uri_file")): # optional pipeline parameter binding to optional node parameter @dsl.pipeline() - def pipeline_func(optional_param: Input(optional=True, type="string"), - optional_param_duplicate: Input(optional=True, type="string")): + def pipeline_func( + optional_param: Input(optional=True, type="string"), + optional_param_duplicate: Input(optional=True, type="string"), + ): component_func( required_input=Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv"), required_param="def", @@ -2562,14 +2577,15 @@ def pipeline_func(required_input: Input(optional=False, type="uri_file")): pipeline_job.settings.default_compute = "cpu-cluster" validate_result = pipeline_job._validate() assert validate_result.error_messages == { - 'inputs.required_input': "Required input 'required_input' for pipeline " - "'pipeline_func' not provided." + "inputs.required_input": "Required input 'required_input' for pipeline " "'pipeline_func' not provided." } # required pipeline parameter binding to optional node parameter @dsl.pipeline() - def pipeline_func(required_param: Input(optional=False, type="string"), - required_param_duplicate: Input(optional=False, type="string")): + def pipeline_func( + required_param: Input(optional=False, type="string"), + required_param_duplicate: Input(optional=False, type="string"), + ): component_func( required_input=Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv"), required_param="def", @@ -2581,10 +2597,9 @@ def pipeline_func(required_param: Input(optional=False, type="string"), pipeline_job.settings.default_compute = "cpu-cluster" validate_result = pipeline_job._validate() assert validate_result.error_messages == { - 'inputs.required_param': "Required input 'required_param' for pipeline " - "'pipeline_func' not provided.", - 'inputs.required_param_duplicate': "Required input 'required_param_duplicate' for pipeline " - "'pipeline_func' not provided." + "inputs.required_param": "Required input 'required_param' for pipeline " "'pipeline_func' not provided.", + "inputs.required_param_duplicate": "Required input 'required_param_duplicate' for pipeline " + "'pipeline_func' not provided.", } # required pipeline parameter with default value binding to optional node parameter @@ -2616,8 +2631,7 @@ def subgraph_pipeline(optional_input: Input(optional=True, type="uri_file")): @dsl.pipeline() def root_pipeline(): - subgraph_node = subgraph_pipeline( - ) + subgraph_node = subgraph_pipeline() pipeline_job = root_pipeline() pipeline_job.settings.default_compute = "cpu-cluster" @@ -2626,8 +2640,10 @@ def root_pipeline(): # optional pipeline parameter binding to optional node parameter @dsl.pipeline() - def subgraph_pipeline(optional_parameter: Input(optional=True, type="string"), - optional_parameter_duplicate: Input(optional=True, type="string")): + def subgraph_pipeline( + optional_parameter: Input(optional=True, type="string"), + optional_parameter_duplicate: Input(optional=True, type="string"), + ): component_func( required_input=Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv"), required_param="def", @@ -2637,8 +2653,7 @@ def subgraph_pipeline(optional_parameter: Input(optional=True, type="string"), @dsl.pipeline() def root_pipeline(): - subgraph_node = subgraph_pipeline( - ) + subgraph_node = subgraph_pipeline() pipeline_job = root_pipeline() pipeline_job.settings.default_compute = "cpu-cluster" @@ -2654,33 +2669,30 @@ def subgraph_pipeline(required_input: Input(optional=False, type="uri_file")): component_func( required_input=Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv"), required_param="def", - optional_input=required_input + optional_input=required_input, ) @dsl.pipeline() def root_pipeline(): - subgraph_node = subgraph_pipeline( - ) + subgraph_node = subgraph_pipeline() pipeline_job = root_pipeline() pipeline_job.settings.default_compute = "cpu-cluster" validate_result = pipeline_job._validate() assert validate_result.error_messages == { - 'jobs.subgraph_node.inputs.required_input': "Required input 'required_input' for component 'subgraph_node'" - " not provided." + "jobs.subgraph_node.inputs.required_input": "Required input 'required_input' for component 'subgraph_node'" + " not provided." } @dsl.pipeline() def root_pipeline(required_input: Input(optional=False, type="uri_file")): - subgraph_node = subgraph_pipeline( - required_input=required_input - ) + subgraph_node = subgraph_pipeline(required_input=required_input) pipeline_job = root_pipeline() pipeline_job.settings.default_compute = "cpu-cluster" validate_result = pipeline_job._validate() assert validate_result.error_messages == { - 'inputs.required_input': "Required input 'required_input' for pipeline 'root_pipeline' not provided." + "inputs.required_input": "Required input 'required_input' for pipeline 'root_pipeline' not provided." } # required pipeline parameter binding to optional node parameter @@ -2689,20 +2701,19 @@ def subgraph_pipeline(required_parameter: Input(optional=False, type="string")): component_func( required_input=Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv"), required_param="def", - optional_param=required_parameter + optional_param=required_parameter, ) @dsl.pipeline() def root_pipeline(): - subgraph_node = subgraph_pipeline( - ) + subgraph_node = subgraph_pipeline() pipeline_job = root_pipeline() pipeline_job.settings.default_compute = "cpu-cluster" validate_result = pipeline_job._validate() assert validate_result.error_messages == { - 'jobs.subgraph_node.inputs.required_parameter': "Required input 'required_parameter' for component " - "'subgraph_node' not provided." + "jobs.subgraph_node.inputs.required_parameter": "Required input 'required_parameter' for component " + "'subgraph_node' not provided." } # required pipeline parameter with default value binding to optional node parameter @@ -2711,13 +2722,12 @@ def subgraph_pipeline(required_parameter: Input(optional=False, type="string", d component_func( required_input=Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv"), required_param="def", - optional_param=required_parameter + optional_param=required_parameter, ) @dsl.pipeline() def root_pipeline(): - subgraph_node = subgraph_pipeline( - ) + subgraph_node = subgraph_pipeline() pipeline_job = root_pipeline() pipeline_job.settings.default_compute = "cpu-cluster" @@ -2734,9 +2744,9 @@ def my_pipeline() -> Output(type="uri_folder", description="new description", mo return {"output": node.outputs.component_out_path} pipeline_job = my_pipeline() - expected_outputs = {'output': { - 'description': 'new description', 'job_output_type': 'uri_folder', 'mode': 'Upload' - }} + expected_outputs = { + "output": {"description": "new description", "job_output_type": "uri_folder", "mode": "Upload"} + } assert pipeline_job._to_rest_object().as_dict()["properties"]["outputs"] == expected_outputs def test_dsl_pipeline_run_settings(self) -> None: @@ -2757,30 +2767,28 @@ def my_pipeline() -> Output(type="uri_folder", description="new description", mo PipelineConstants.DEFAULT_COMPUTE: "cpu-cluster", PipelineConstants.CONTINUE_ON_STEP_FAILURE: True, PipelineConstants.CONTINUE_RUN_ON_FAILED_OPTIONAL_INPUT: False, - "_source": "DSL" + "_source": "DSL", } def test_register_output_without_name_sdk(self): component = load_component(source="./tests/test_configs/components/helloworld_component.yml") - component_input = Input(type='uri_file', path='https://dprepdata.blob.core.windows.net/demo/Titanic.csv') + component_input = Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv") @dsl.pipeline() def register_node_output(): node = component(component_in_path=component_input) node.outputs.component_out_path.version = 1 - + pipeline = register_node_output() pipeline.settings.default_compute = "azureml:cpu-cluster" - with pytest.raises(UserErrorException) as e: + with pytest.raises(UserErrorException) as e: pipeline._validate() assert "Output name is required when output version is specified." in str(e.value) @dsl.pipeline() def register_pipeline_output(): node = component(component_in_path=component_input) - return { - 'pipeine_a_output': node.outputs.component_out_path - } + return {"pipeine_a_output": node.outputs.component_out_path} pipeline = register_pipeline_output() pipeline.outputs.pipeine_a_output.version = 1 @@ -2791,19 +2799,22 @@ def register_pipeline_output(): def test_register_output_with_invalid_name_sdk(self): component = load_component(source="./tests/test_configs/components/helloworld_component.yml") - component_input = Input(type='uri_file', path='https://dprepdata.blob.core.windows.net/demo/Titanic.csv') + component_input = Input(type="uri_file", path="https://dprepdata.blob.core.windows.net/demo/Titanic.csv") @dsl.pipeline() def register_node_output(): node = component(component_in_path=component_input) - node.outputs.component_out_path.name = '@' - node.outputs.component_out_path.version = '1' + node.outputs.component_out_path.name = "@" + node.outputs.component_out_path.version = "1" pipeline = register_node_output() pipeline.settings.default_compute = "azureml:cpu-cluster" with pytest.raises(UserErrorException) as e: pipeline._validate() - assert 'The output name @ can only contain alphanumeric characters, dashes and underscores, with a limit of 255 characters.' in str(e.value) + assert ( + "The output name @ can only contain alphanumeric characters, dashes and underscores, with a limit of 255 characters." + in str(e.value) + ) def test_pipeline_output_settings_copy(self): component_yaml = components_dir / "helloworld_component.yml" @@ -2860,9 +2871,7 @@ def outer_pipeline(): def test_node_output_type_promotion(self): component_yaml = components_dir / "helloworld_component.yml" - params_override = [{"outputs": { - "component_out_path": {"type": "uri_file"} - }}] + params_override = [{"outputs": {"component_out_path": {"type": "uri_file"}}}] component_func1 = load_component(source=component_yaml, params_override=params_override) # without node level setting, node should have same type with component @@ -2908,9 +2917,7 @@ def my_pipeline(): def test_node_output_mode_promotion(self): component_yaml = components_dir / "helloworld_component.yml" - params_override = [{"outputs": { - "component_out_path": {"mode": "mount", "type": "uri_file"} - }}] + params_override = [{"outputs": {"component_out_path": {"mode": "mount", "type": "uri_file"}}}] component_func1 = load_component(source=component_yaml, params_override=params_override) # without node level setting, node should have same type with component @@ -2966,7 +2973,7 @@ def test_validate_pipeline_node_io_name_has_keyword(self, caplog): assert pipeline_job._customized_validate().passed warning_template = ( - "Reserved word \"{io_name}\" is used as {io} name in node \"{node_name}\", " + 'Reserved word "{io_name}" is used as {io} name in node "{node_name}", ' "can only be accessed with '{node_name}.{io}s[\"{io_name}\"]'" ) assert caplog.messages == [ diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_component.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_component.py index 08a08c6957cf..49797aabeabd 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_component.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_component.py @@ -10,10 +10,7 @@ components_dir = tests_root_dir / "test_configs/components/" -@pytest.mark.usefixtures( - "enable_pipeline_private_preview_features", - "enable_private_preview_schema_features" -) +@pytest.mark.usefixtures("enable_pipeline_private_preview_features", "enable_private_preview_schema_features") @pytest.mark.timeout(_DSL_TIMEOUT_SECOND) @pytest.mark.unittest @pytest.mark.pipeline_test @@ -39,12 +36,12 @@ def pipeline_func(job_in_path): ) assert not validation_result.passed assert validation_result.error_messages == { - "inputs.component_in_path": 'Parameter type unknown, ' - 'please add type annotation or specify input default value.', + "inputs.component_in_path": "Parameter type unknown, " + "please add type annotation or specify input default value.", } assert mock_machinelearning_client.jobs.validate(pipeline_job).error_messages == { - 'jobs.pipeline_no_arg.inputs.component_in_path': 'Parameter type unknown, please add type annotation' - ' or specify input default value.', - 'jobs.pipeline_no_arg.jobs.microsoftsamples_command_component_basic.compute': 'Compute not set' + "jobs.pipeline_no_arg.inputs.component_in_path": "Parameter type unknown, please add type annotation" + " or specify input default value.", + "jobs.pipeline_no_arg.jobs.microsoftsamples_command_component_basic.compute": "Compute not set", } diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py index 1fa36edaf721..82d5ef63646c 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py @@ -15,8 +15,11 @@ from azure.ai.ml.entities._builders import Command, Parallel, Spark, Sweep, DataTransferImport from azure.ai.ml.entities._component.parallel_component import ParallelComponent from azure.ai.ml.entities._job.automl.tabular import ClassificationJob -from azure.ai.ml.entities._job.data_transfer.data_transfer_job import DataTransferCopyJob, DataTransferImportJob, \ - DataTransferExportJob +from azure.ai.ml.entities._job.data_transfer.data_transfer_job import ( + DataTransferCopyJob, + DataTransferImportJob, + DataTransferExportJob, +) from azure.ai.ml.entities._job.job_service import ( JobService, JupyterLabJobService, @@ -222,17 +225,16 @@ def train_with_sweep_in_pipeline(raw_data): pytorch_node_rest = pytorch_node._to_rest_object() omit_fields = ["trial"] pydash.omit(pytorch_node_rest, *omit_fields) == { - 'limits': {'max_total_trials': 10}, - 'sampling_algorithm': 'random', - 'objective': {'goal': 'maximize', 'primary_metric': 'AUC'}, - 'search_space': {'component_in_number': {'values': [2, 3, 4, 5], 'type': 'choice'}}, - 'name': 'sweep_job', - 'type': 'sweep', - 'computeId': 'test-aks-large', - 'inputs': {'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.raw_data}}'}}, - '_source': 'YAML.COMPONENT', - 'resources': {'instance_type': 'cpularge'}, + "limits": {"max_total_trials": 10}, + "sampling_algorithm": "random", + "objective": {"goal": "maximize", "primary_metric": "AUC"}, + "search_space": {"component_in_number": {"values": [2, 3, 4, 5], "type": "choice"}}, + "name": "sweep_job", + "type": "sweep", + "computeId": "test-aks-large", + "inputs": {"component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.raw_data}}"}}, + "_source": "YAML.COMPONENT", + "resources": {"instance_type": "cpularge"}, } def test_dsl_pipeline_with_parallel(self) -> None: @@ -313,54 +315,63 @@ def spark_pipeline_from_yaml(iris_data): ] actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) assert actual_job == { - 'description': 'submit a pipeline with spark job', - 'display_name': 'spark_pipeline_from_yaml', - 'inputs': {'iris_data': {'job_input_type': 'uri_file', - 'mode': 'Direct', - 'uri': 'https://azuremlexamples.blob.core.windows.net/datasets/iris.csv'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'add_greeting_column': {'_source': 'YAML.COMPONENT', - 'args': '--file_input ${{inputs.file_input}}', - 'computeId': 'spark31', - 'conf': {'spark.driver.cores': 2, - 'spark.driver.memory': '1g', - 'spark.executor.cores': 1, - 'spark.executor.instances': 1, - 'spark.executor.memory': '1g'}, - 'entry': {'file': 'add_greeting_column.py', - 'spark_job_entry_type': 'SparkJobPythonEntry'}, - 'files': ['my_files.txt'], - 'identity': {'identity_type': 'Managed'}, - 'inputs': {'file_input': {'job_input_type': 'literal', - 'value': '${{parent.inputs.iris_data}}'}}, - 'name': 'add_greeting_column', - 'py_files': ['utils.zip'], - 'type': 'spark'}, - 'count_by_row': {'_source': 'YAML.COMPONENT', - 'args': '--file_input ${{inputs.file_input}} ' - '--output ${{outputs.output}}', - 'computeId': 'spark31', - 'conf': {'spark.driver.cores': 2, - 'spark.driver.memory': '1g', - 'spark.executor.cores': 1, - 'spark.executor.instances': 1, - 'spark.executor.memory': '1g'}, - 'entry': {'file': 'count_by_row.py', - 'spark_job_entry_type': 'SparkJobPythonEntry'}, - 'files': ['my_files.txt'], - 'identity': {'identity_type': 'Managed'}, - 'inputs': {'file_input': {'job_input_type': 'literal', - 'value': '${{parent.inputs.iris_data}}'}}, - 'jars': ['scalaproj.jar'], - 'name': 'count_by_row', - 'outputs': {'output': {'type': 'literal', - 'value': '${{parent.outputs.output}}'}}, - 'type': 'spark'}}, - 'outputs': {'output': {'job_output_type': 'uri_folder', 'mode': 'Direct'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "description": "submit a pipeline with spark job", + "display_name": "spark_pipeline_from_yaml", + "inputs": { + "iris_data": { + "job_input_type": "uri_file", + "mode": "Direct", + "uri": "https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + } + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "add_greeting_column": { + "_source": "YAML.COMPONENT", + "args": "--file_input ${{inputs.file_input}}", + "computeId": "spark31", + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "entry": {"file": "add_greeting_column.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "files": ["my_files.txt"], + "identity": {"identity_type": "Managed"}, + "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, + "name": "add_greeting_column", + "py_files": ["utils.zip"], + "type": "spark", + }, + "count_by_row": { + "_source": "YAML.COMPONENT", + "args": "--file_input ${{inputs.file_input}} " "--output ${{outputs.output}}", + "computeId": "spark31", + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "entry": {"file": "count_by_row.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "files": ["my_files.txt"], + "identity": {"identity_type": "Managed"}, + "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, + "jars": ["scalaproj.jar"], + "name": "count_by_row", + "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.output}}"}}, + "type": "spark", + }, + }, + "outputs": {"output": {"job_output_type": "uri_folder", "mode": "Direct"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } def test_pipeline_with_command_function(self): # component func @@ -425,59 +436,75 @@ def pipeline(number, path): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'experiment_name': 'test_pipeline_with_command_function', - 'inputs': {'number': {'job_input_type': 'literal', - 'value': '10'}, - 'path': {'job_input_type': 'uri_folder', - 'mode': 'ReadOnlyMount', - 'uri': '/a/path/on/ds'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'YAML.COMPONENT', - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.path}}'}}, - 'name': 'node1', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output1}}'}}, - 'type': 'command'}, - 'node2': {'_source': 'CLASS', - 'distribution': {'distribution_type': 'PyTorch', - 'process_count_per_instance': 2}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node1.outputs.component_out_path}}'}}, - 'name': 'node2', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output2}}'}}, - 'resources': {'instance_count': 2}, - 'type': 'command'}, - 'node3': {'_source': 'BUILDER', - 'display_name': 'my-evaluate-job', - 'distribution': {'distribution_type': 'PyTorch', - 'process_count_per_instance': 2}, - 'environment_variables': {'key': 'val'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node2.outputs.component_out_path}}'}}, - 'name': 'node3', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output3}}'}}, - 'resources': {'instance_count': 2}, - 'type': 'command'}}, - 'outputs': {'pipeline_output1': {'job_output_type': 'uri_folder'}, - 'pipeline_output2': {'job_output_type': 'mlflow_model'}, - 'pipeline_output3': {'job_output_type': 'mlflow_model', 'mode': 'ReadWriteMount'}}, - 'properties': {}, - 'settings': {}, - 'tags': {} - } + "properties": { + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_command_function", + "inputs": { + "number": {"job_input_type": "literal", "value": "10"}, + "path": {"job_input_type": "uri_folder", "mode": "ReadOnlyMount", "uri": "/a/path/on/ds"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "YAML.COMPONENT", + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, + }, + "name": "node1", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} + }, + "type": "command", + }, + "node2": { + "_source": "CLASS", + "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.node1.outputs.component_out_path}}", + }, + }, + "name": "node2", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} + }, + "resources": {"instance_count": 2}, + "type": "command", + }, + "node3": { + "_source": "BUILDER", + "display_name": "my-evaluate-job", + "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, + "environment_variables": {"key": "val"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.node2.outputs.component_out_path}}", + }, + }, + "name": "node3", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"} + }, + "resources": {"instance_count": 2}, + "type": "command", + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_folder"}, + "pipeline_output2": {"job_output_type": "mlflow_model"}, + "pipeline_output3": {"job_output_type": "mlflow_model", "mode": "ReadWriteMount"}, + }, + "properties": {}, + "settings": {}, + "tags": {}, } + } def test_pipeline_with_data_transfer_copy_function(self): # component func @@ -533,62 +560,84 @@ def pipeline(folder1, folder2): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'folder1': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}, - 'folder2': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'YAML.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder1}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder2}}'}}, - 'name': 'node1', - 'task': 'copy_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'CLASS', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node1.outputs.output_folder}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node1.outputs.output_folder}}'}}, - 'name': 'node2', - 'task': 'copy_data', - 'type': 'data_transfer'}, - 'node3': {'_source': 'BUILDER', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node2.outputs.output}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node2.outputs.output}}'}}, - 'name': 'node3', - 'outputs': {'output': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer'}}, - 'outputs': {'pipeline_output': { - 'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/my_blob/paths/merged_blob' - }}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}}} + "properties": { + "display_name": "pipeline", + "inputs": { + "folder1": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + "folder2": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "YAML.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.folder1}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.folder2}}"}, + }, + "name": "node1", + "task": "copy_data", + "type": "data_transfer", + }, + "node2": { + "_source": "CLASS", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": { + "job_input_type": "literal", + "value": "${{parent.jobs.node1.outputs.output_folder}}", + }, + "folder2": { + "job_input_type": "literal", + "value": "${{parent.jobs.node1.outputs.output_folder}}", + }, + }, + "name": "node2", + "task": "copy_data", + "type": "data_transfer", + }, + "node3": { + "_source": "BUILDER", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output}}"}, + }, + "name": "node3", + "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.pipeline_output}}"}}, + "task": "copy_data", + "type": "data_transfer", + }, + }, + "outputs": { + "pipeline_output": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/my_blob/paths/merged_blob", + } + }, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } + } def test_pipeline_with_data_transfer_import_database_function(self): - query_source_snowflake = 'SELECT * FROM my_table' - connection_target_azuresql = 'azureml:my_azuresql_connection' + query_source_snowflake = "SELECT * FROM my_table" + connection_target_azuresql = "azureml:my_azuresql_connection" outputs = {"sink": Output(type=AssetTypes.MLTABLE)} - source = {'type': 'database', 'connection': 'azureml:my_snowflake_connection', - 'query': query_source_snowflake} + source = {"type": "database", "connection": "azureml:my_snowflake_connection", "query": query_source_snowflake} data_transfer_job = DataTransferImportJob( source=source, outputs=outputs, task=DataTransferTaskType.IMPORT_DATA, - compute='adf_compute1', + compute="adf_compute1", ) data_transfer_job_func = to_component(job=data_transfer_job) @@ -607,56 +656,79 @@ def pipeline(query_source_snowflake, connection_target_azuresql): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target_azuresql': {'job_input_type': 'literal', - 'value': 'azureml:my_azuresql_connection'}, - 'query_source_snowflake': {'job_input_type': 'literal', - 'value': 'SELECT * FROM ' - 'my_table'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'name': 'node1', - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': 'SELECT * FROM my_table', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'name': 'node2', - 'outputs': {'sink': {'job_output_type': 'mltable'}}, - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': 'SELECT * FROM my_table', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node3': {'_source': 'BUILTIN', - 'name': 'node3', - 'source': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node4': {'_source': 'BUILTIN', - 'name': 'node4', - 'outputs': {'sink': {'job_output_type': 'mltable'}}, - 'source': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target_azuresql": { + "job_input_type": "literal", + "value": "azureml:my_azuresql_connection", + }, + "query_source_snowflake": {"job_input_type": "literal", "value": "SELECT * FROM " "my_table"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "name": "node1", + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "SELECT * FROM my_table", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "name": "node2", + "outputs": {"sink": {"job_output_type": "mltable"}}, + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "SELECT * FROM my_table", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node3": { + "_source": "BUILTIN", + "name": "node3", + "source": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node4": { + "_source": "BUILTIN", + "name": "node4", + "outputs": {"sink": {"job_output_type": "mltable"}}, + "source": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } for key, _ in pipeline1.jobs.items(): data_transfer_import_node = pipeline1.jobs[key] data_transfer_import_node_dict = data_transfer_import_node._to_dict() data_transfer_import_node_rest_obj = data_transfer_import_node._to_rest_object() - regenerated_data_transfer_import_node = DataTransferImport._from_rest_object(data_transfer_import_node_rest_obj) + regenerated_data_transfer_import_node = DataTransferImport._from_rest_object( + data_transfer_import_node_rest_obj + ) data_transfer_import_node_dict_from_rest = regenerated_data_transfer_import_node._to_dict() @@ -664,16 +736,22 @@ def pipeline(query_source_snowflake, connection_target_azuresql): # regenerated_data_transfer_import_node will only keep component id when call _to_rest_object() omit_fields = ["component"] - assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == \ - pydash.omit(data_transfer_import_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == pydash.omit( + data_transfer_import_node_dict_from_rest, *omit_fields + ) def test_pipeline_with_data_transfer_import_stored_database_function(self): - stored_procedure = 'SelectEmployeeByJobAndDepartment' - stored_procedure_params = [{'name': 'job', 'value': 'Engineer', 'type': 'String'}, - {'name': 'department', 'value': 'Engineering', 'type': 'String'}] + stored_procedure = "SelectEmployeeByJobAndDepartment" + stored_procedure_params = [ + {"name": "job", "value": "Engineer", "type": "String"}, + {"name": "department", "value": "Engineering", "type": "String"}, + ] outputs = {"sink": Output(type=AssetTypes.MLTABLE)} - source = {'type': 'database', 'stored_procedure': stored_procedure, - 'stored_procedure_params': stored_procedure_params} + source = { + "type": "database", + "stored_procedure": stored_procedure, + "stored_procedure_params": stored_procedure_params, + } data_transfer_job = DataTransferImportJob( source=source, outputs=outputs, @@ -691,47 +769,56 @@ def pipeline(): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'name': 'node1', - 'source': {'stored_procedure': 'SelectEmployeeByJobAndDepartment', - 'stored_procedure_params': [{'name': 'job', - 'type': 'String', - 'value': 'Engineer'}, - {'name': 'department', - 'type': 'String', - 'value': 'Engineering'}], - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'name': 'node2', - 'outputs': {'sink': {'job_output_type': 'mltable'}}, - 'source': {'stored_procedure': 'SelectEmployeeByJobAndDepartment', - 'stored_procedure_params': [{'name': 'job', - 'type': 'String', - 'value': 'Engineer'}, - {'name': 'department', - 'type': 'String', - 'value': 'Engineering'}], - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": {}, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "name": "node1", + "source": { + "stored_procedure": "SelectEmployeeByJobAndDepartment", + "stored_procedure_params": [ + {"name": "job", "type": "String", "value": "Engineer"}, + {"name": "department", "type": "String", "value": "Engineering"}, + ], + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "name": "node2", + "outputs": {"sink": {"job_output_type": "mltable"}}, + "source": { + "stored_procedure": "SelectEmployeeByJobAndDepartment", + "stored_procedure_params": [ + {"name": "job", "type": "String", "value": "Engineer"}, + {"name": "department", "type": "String", "value": "Engineering"}, + ], + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } for key, _ in pipeline1.jobs.items(): data_transfer_import_node = pipeline1.jobs[key] data_transfer_import_node_dict = data_transfer_import_node._to_dict() data_transfer_import_node_rest_obj = data_transfer_import_node._to_rest_object() - regenerated_data_transfer_import_node = DataTransferImport._from_rest_object(data_transfer_import_node_rest_obj) + regenerated_data_transfer_import_node = DataTransferImport._from_rest_object( + data_transfer_import_node_rest_obj + ) data_transfer_import_node_dict_from_rest = regenerated_data_transfer_import_node._to_dict() @@ -739,14 +826,15 @@ def pipeline(): # regenerated_data_transfer_import_node will only keep component id when call _to_rest_object() omit_fields = ["component"] - assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == \ - pydash.omit(data_transfer_import_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == pydash.omit( + data_transfer_import_node_dict_from_rest, *omit_fields + ) def test_pipeline_with_data_transfer_import_file_system_function(self): - path_source_s3 = 's3://my_bucket/my_folder' - connection_target = 'azureml:my_s3_connection' + path_source_s3 = "s3://my_bucket/my_folder" + connection_target = "azureml:my_s3_connection" outputs = {"sink": Output(type=AssetTypes.URI_FOLDER, path="azureml://datastores/managed/paths/some_path")} - source = {'type': 'file_system', 'connection': connection_target, 'path': path_source_s3} + source = {"type": "file_system", "connection": connection_target, "path": path_source_s3} data_transfer_job = DataTransferImportJob( source=source, outputs=outputs, @@ -759,7 +847,6 @@ def pipeline(path_source_s3, connection_target): node1 = data_transfer_job_func(source=FileSystem(**source)) node2 = import_data(source=FileSystem(**source), outputs=outputs) - source_snowflake = FileSystem(path=path_source_s3, connection=connection_target) node3 = data_transfer_job_func(source=source_snowflake) node4 = import_data(source=source_snowflake, outputs=outputs) @@ -770,57 +857,86 @@ def pipeline(path_source_s3, connection_target): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target': {'job_input_type': 'literal', - 'value': 'azureml:my_s3_connection'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 's3://my_bucket/my_folder'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'name': 'node1', - 'source': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'name': 'node2', - 'outputs': {'sink': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/managed/paths/some_path'}}, - 'source': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node3': {'_source': 'BUILTIN', - 'name': 'node3', - 'source': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node4': {'_source': 'BUILTIN', - 'name': 'node4', - 'outputs': {'sink': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/managed/paths/some_path'}}, - 'source': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target": {"job_input_type": "literal", "value": "azureml:my_s3_connection"}, + "path_source_s3": {"job_input_type": "literal", "value": "s3://my_bucket/my_folder"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "name": "node1", + "source": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "name": "node2", + "outputs": { + "sink": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/managed/paths/some_path", + } + }, + "source": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node3": { + "_source": "BUILTIN", + "name": "node3", + "source": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node4": { + "_source": "BUILTIN", + "name": "node4", + "outputs": { + "sink": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/managed/paths/some_path", + } + }, + "source": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } for key, _ in pipeline1.jobs.items(): data_transfer_import_node = pipeline1.jobs[key] data_transfer_import_node_dict = data_transfer_import_node._to_dict() data_transfer_import_node_rest_obj = data_transfer_import_node._to_rest_object() - regenerated_data_transfer_import_node = DataTransferImport._from_rest_object(data_transfer_import_node_rest_obj) + regenerated_data_transfer_import_node = DataTransferImport._from_rest_object( + data_transfer_import_node_rest_obj + ) data_transfer_import_node_dict_from_rest = regenerated_data_transfer_import_node._to_dict() @@ -828,15 +944,16 @@ def pipeline(path_source_s3, connection_target): # regenerated_data_transfer_import_node will only keep component id when call _to_rest_object() omit_fields = ["component"] - assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == \ - pydash.omit(data_transfer_import_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == pydash.omit( + data_transfer_import_node_dict_from_rest, *omit_fields + ) def test_pipeline_with_data_transfer_export_database_function(self): - connection_target_azuresql = 'azureml:my_azuresql_connection' + connection_target_azuresql = "azureml:my_azuresql_connection" table_name = "merged_table" cosmos_folder = Input(type=AssetTypes.URI_FILE, path="azureml://datastores/my_cosmos/paths/source_cosmos") inputs = {"source": cosmos_folder} - sink = {'type': 'database', 'connection': connection_target_azuresql, 'table_name': table_name} + sink = {"type": "database", "connection": connection_target_azuresql, "table_name": table_name} data_transfer_job = DataTransferExportJob( inputs=inputs, sink=sink, @@ -861,61 +978,101 @@ def pipeline(table_name, connection_target_azuresql): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target_azuresql': {'job_input_type': 'literal', - 'value': 'azureml:my_azuresql_connection'}, - 'table_name': {'job_input_type': 'literal', - 'value': 'merged_table'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node1', - 'sink': {'connection': 'azureml:my_azuresql_connection', - 'table_name': 'merged_table', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node2', - 'sink': {'connection': 'azureml:my_azuresql_connection', - 'table_name': 'merged_table', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node3': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node3', - 'sink': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'table_name': '${{parent.inputs.table_name}}', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node4': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node4', - 'sink': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'table_name': '${{parent.inputs.table_name}}', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target_azuresql": { + "job_input_type": "literal", + "value": "azureml:my_azuresql_connection", + }, + "table_name": {"job_input_type": "literal", "value": "merged_table"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_file", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node1", + "sink": { + "connection": "azureml:my_azuresql_connection", + "table_name": "merged_table", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_file", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node2", + "sink": { + "connection": "azureml:my_azuresql_connection", + "table_name": "merged_table", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node3": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_file", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node3", + "sink": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "table_name": "${{parent.inputs.table_name}}", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node4": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_file", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node4", + "sink": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "table_name": "${{parent.inputs.table_name}}", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } for key, _ in pipeline1.jobs.items(): data_transfer_import_node = pipeline1.jobs[key] data_transfer_import_node_dict = data_transfer_import_node._to_dict() data_transfer_import_node_rest_obj = data_transfer_import_node._to_rest_object() - regenerated_data_transfer_import_node = DataTransferImport._from_rest_object(data_transfer_import_node_rest_obj) + regenerated_data_transfer_import_node = DataTransferImport._from_rest_object( + data_transfer_import_node_rest_obj + ) data_transfer_import_node_dict_from_rest = regenerated_data_transfer_import_node._to_dict() @@ -923,16 +1080,17 @@ def pipeline(table_name, connection_target_azuresql): # regenerated_data_transfer_import_node will only keep component id when call _to_rest_object() omit_fields = ["component"] - assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == \ - pydash.omit(data_transfer_import_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == pydash.omit( + data_transfer_import_node_dict_from_rest, *omit_fields + ) def test_pipeline_with_data_transfer_export_file_system_function(self): - path_source_s3 = 's3://my_bucket/my_folder' - connection_target = 'azureml:my_s3_connection' + path_source_s3 = "s3://my_bucket/my_folder" + connection_target = "azureml:my_s3_connection" my_cosmos_folder = Input(type=AssetTypes.URI_FOLDER, path="azureml://datastores/my_cosmos/paths/source_cosmos") inputs = {"source": my_cosmos_folder} - sink = {'type': 'file_system', 'connection': connection_target, 'path': path_source_s3} + sink = {"type": "file_system", "connection": connection_target, "path": path_source_s3} data_transfer_job = DataTransferExportJob( inputs=inputs, sink=sink, @@ -944,7 +1102,7 @@ def test_pipeline_with_data_transfer_export_file_system_function(self): def pipeline(path_source_s3, connection_target, cosmos_folder, my_cosmos_folder): node1 = data_transfer_job_func(source=my_cosmos_folder) node1.sink = sink - node2 = export_data(inputs = {"source": my_cosmos_folder}, sink=sink) + node2 = export_data(inputs={"source": my_cosmos_folder}, sink=sink) source_snowflake = FileSystem(path=path_source_s3, connection=connection_target) node3 = data_transfer_job_func(source=cosmos_folder) @@ -957,64 +1115,90 @@ def pipeline(path_source_s3, connection_target, cosmos_folder, my_cosmos_folder) pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target': {'job_input_type': 'literal', - 'value': 'azureml:my_s3_connection'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 's3://my_bucket/my_folder'}, - 'cosmos_folder': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}, - }, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.my_cosmos_folder}}'}}, - 'name': 'node1', - 'sink': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.my_cosmos_folder}}'}}, - 'name': 'node2', - 'sink': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node3': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'name': 'node3', - 'sink': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node4': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'name': 'node4', - 'sink': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target": {"job_input_type": "literal", "value": "azureml:my_s3_connection"}, + "path_source_s3": {"job_input_type": "literal", "value": "s3://my_bucket/my_folder"}, + "cosmos_folder": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.my_cosmos_folder}}"} + }, + "name": "node1", + "sink": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.my_cosmos_folder}}"} + }, + "name": "node2", + "sink": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node3": { + "_source": "BUILTIN", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"} + }, + "name": "node3", + "sink": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node4": { + "_source": "BUILTIN", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"} + }, + "name": "node4", + "sink": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } for key, _ in pipeline1.jobs.items(): data_transfer_import_node = pipeline1.jobs[key] data_transfer_import_node_dict = data_transfer_import_node._to_dict() data_transfer_import_node_rest_obj = data_transfer_import_node._to_rest_object() - regenerated_data_transfer_import_node = DataTransferImport._from_rest_object(data_transfer_import_node_rest_obj) + regenerated_data_transfer_import_node = DataTransferImport._from_rest_object( + data_transfer_import_node_rest_obj + ) data_transfer_import_node_dict_from_rest = regenerated_data_transfer_import_node._to_dict() @@ -1022,8 +1206,9 @@ def pipeline(path_source_s3, connection_target, cosmos_folder, my_cosmos_folder) # regenerated_data_transfer_import_node will only keep component id when call _to_rest_object() omit_fields = ["component"] - assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == \ - pydash.omit(data_transfer_import_node_dict_from_rest, *omit_fields) + assert pydash.omit(data_transfer_import_node_dict, *omit_fields) == pydash.omit( + data_transfer_import_node_dict_from_rest, *omit_fields + ) def test_pipeline_with_spark_function(self): # component func @@ -1102,91 +1287,104 @@ def pipeline(iris_data, sample_rate): pipeline_job1 = pipeline1._to_rest_object().as_dict() pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'experiment_name': 'test_pipeline_with_spark_function', - 'inputs': {'iris_data': {'job_input_type': 'uri_file', - 'mode': 'Direct', - 'uri': './tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt'}, - 'sample_rate': {'job_input_type': 'literal', - 'value': '0.01'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'YAML.COMPONENT', - 'args': '--input1 ${{inputs.input1}} ' - '--output2 ${{outputs.output1}} ' - '--my_sample_rate ' - '${{inputs.sample_rate}}', - 'computeId': 'rezas-synapse-10', - 'conf': {'spark.driver.cores': 1, - 'spark.driver.memory': '2g', - 'spark.dynamicAllocation.enabled': True, - 'spark.dynamicAllocation.maxExecutors': 4, - 'spark.dynamicAllocation.minExecutors': 1, - 'spark.executor.cores': 2, - 'spark.executor.instances': 1, - 'spark.executor.memory': '2g'}, - 'entry': {'file': 'sampleword.py', - 'spark_job_entry_type': 'SparkJobPythonEntry'}, - 'identity': {'identity_type': 'Managed'}, - 'inputs': {'input1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.iris_data}}'}, - 'sample_rate': {'job_input_type': 'literal', - 'value': '${{parent.inputs.sample_rate}}'}}, - 'name': 'node1', - 'outputs': {'output1': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output1}}'}}, - 'type': 'spark'}, - 'node2': {'_source': 'CLASS', - 'args': '--input1 ${{inputs.input1}} ' - '--output2 ${{outputs.output1}} ' - '--my_sample_rate ' - '${{inputs.sample_rate}}', - 'computeId': 'rezas-synapse-10', - 'conf': {'spark.driver.cores': 2, - 'spark.driver.memory': '1g', - 'spark.executor.cores': 1, - 'spark.executor.instances': 1, - 'spark.executor.memory': '1g'}, - 'entry': {'file': 'sampleword.py', - 'spark_job_entry_type': 'SparkJobPythonEntry'}, - 'identity': {'identity_type': 'Managed'}, - 'inputs': {'input1': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node1.outputs.output1}}'}, - 'sample_rate': {'job_input_type': 'literal', - 'value': '${{parent.inputs.sample_rate}}'}}, - 'name': 'node2', - 'outputs': {'output1': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output2}}'}}, - 'type': 'spark'}, - 'node3': {'_source': 'BUILDER', - 'args': '--input1 ${{inputs.input1}} ' - '--output2 ${{outputs.output1}} ' - '--my_sample_rate ' - '${{inputs.sample_rate}}', - 'computeId': 'rezas-synapse-10', - 'conf': {'spark.driver.cores': 2, - 'spark.driver.memory': '1g', - 'spark.executor.cores': 1, - 'spark.executor.instances': 1, - 'spark.executor.memory': '1g'}, - 'entry': {'file': 'sampleword.py', - 'spark_job_entry_type': 'SparkJobPythonEntry'}, - 'identity': {'identity_type': 'Managed'}, - 'inputs': {'input1': {'job_input_type': 'literal', - 'value': '${{parent.jobs.node2.outputs.output1}}'}, - 'sample_rate': {'job_input_type': 'literal', - 'value': '${{parent.inputs.sample_rate}}'}}, - 'name': 'node3', - 'outputs': {'output1': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output3}}'}}, - 'type': 'spark'}}, - 'outputs': {'pipeline_output1': {'job_output_type': 'uri_file', 'mode': 'Direct'}, - 'pipeline_output2': {'job_output_type': 'uri_folder'}, - 'pipeline_output3': {'job_output_type': 'uri_folder', 'mode': 'Direct'}}, - 'properties': {}, - 'settings': {}, - 'tags': {} + "properties": { + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_spark_function", + "inputs": { + "iris_data": { + "job_input_type": "uri_file", + "mode": "Direct", + "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + }, + "sample_rate": {"job_input_type": "literal", "value": "0.01"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "YAML.COMPONENT", + "args": "--input1 ${{inputs.input1}} " + "--output2 ${{outputs.output1}} " + "--my_sample_rate " + "${{inputs.sample_rate}}", + "computeId": "rezas-synapse-10", + "conf": { + "spark.driver.cores": 1, + "spark.driver.memory": "2g", + "spark.dynamicAllocation.enabled": True, + "spark.dynamicAllocation.maxExecutors": 4, + "spark.dynamicAllocation.minExecutors": 1, + "spark.executor.cores": 2, + "spark.executor.instances": 1, + "spark.executor.memory": "2g", + }, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "identity": {"identity_type": "Managed"}, + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "name": "node1", + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, + "type": "spark", + }, + "node2": { + "_source": "CLASS", + "args": "--input1 ${{inputs.input1}} " + "--output2 ${{outputs.output1}} " + "--my_sample_rate " + "${{inputs.sample_rate}}", + "computeId": "rezas-synapse-10", + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "identity": {"identity_type": "Managed"}, + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node1.outputs.output1}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "name": "node2", + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"}}, + "type": "spark", + }, + "node3": { + "_source": "BUILDER", + "args": "--input1 ${{inputs.input1}} " + "--output2 ${{outputs.output1}} " + "--my_sample_rate " + "${{inputs.sample_rate}}", + "computeId": "rezas-synapse-10", + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "identity": {"identity_type": "Managed"}, + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output1}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "name": "node3", + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"}}, + "type": "spark", + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_file", "mode": "Direct"}, + "pipeline_output2": {"job_output_type": "uri_folder"}, + "pipeline_output3": {"job_output_type": "uri_folder", "mode": "Direct"}, + }, + "properties": {}, + "settings": {}, + "tags": {}, } } @@ -1362,9 +1560,9 @@ def pipeline(iris_data, sample_rate): }, }, "outputs": { - "pipeline_output1": {"job_output_type": "uri_file", 'mode': 'Direct'}, + "pipeline_output1": {"job_output_type": "uri_file", "mode": "Direct"}, "pipeline_output2": {"job_output_type": "uri_folder"}, - "pipeline_output3": {"job_output_type": "uri_folder", 'mode': 'Direct'}, + "pipeline_output3": {"job_output_type": "uri_folder", "mode": "Direct"}, }, "properties": {}, "settings": {}, @@ -1560,6 +1758,7 @@ def pipeline(folder1, folder2): return { "pipeline_output": data_transfer_node.outputs.output, } + pipeline1 = pipeline(folder1, folder2) pipeline_rest_obj = pipeline1._to_rest_object() pipeline_job1 = pipeline_rest_obj.as_dict() @@ -1568,46 +1767,53 @@ def pipeline(folder1, folder2): pipeline1_dict = pipeline1._to_dict() assert pipeline1_dict == pipeline_regenerated_from_rest._to_dict() - omit_fields = [ - "properties.jobs.data_transfer_node.componentId", - "properties.experiment_name" - ] + omit_fields = ["properties.jobs.data_transfer_node.componentId", "properties.experiment_name"] pipeline_job1 = pydash.omit(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'folder1': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}, - 'folder2': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'data_transfer_node': {'_source': 'CLASS', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder1}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.folder2}}'}}, - 'name': 'data_transfer_node', - 'outputs': {'output': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer'}}, - 'outputs': {'pipeline_output': {'job_output_type': 'uri_folder'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}}} + "properties": { + "display_name": "pipeline", + "inputs": { + "folder1": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + "folder2": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "data_transfer_node": { + "_source": "CLASS", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.folder1}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.folder2}}"}, + }, + "name": "data_transfer_node", + "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.pipeline_output}}"}}, + "task": "copy_data", + "type": "data_transfer", + } + }, + "outputs": {"pipeline_output": {"job_output_type": "uri_folder"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } + } def test_pipeline_with_data_transfer_import_database_job(self): - query_source_snowflake = 'SELECT * FROM my_table' - connection_target_azuresql = 'azureml:my_azuresql_connection' + query_source_snowflake = "SELECT * FROM my_table" + connection_target_azuresql = "azureml:my_azuresql_connection" # outputs = {"sink": Output(type=AssetTypes.MLTABLE)} - source = {'type': 'database', 'connection': 'azureml:my_snowflake_connection', - 'query': query_source_snowflake} + source = {"type": "database", "connection": "azureml:my_snowflake_connection", "query": query_source_snowflake} data_transfer_job = DataTransferImportJob( source=source, task=DataTransferTaskType.IMPORT_DATA, - compute='adf_compute1', + compute="adf_compute1", ) data_transfer_job_func = to_component(job=data_transfer_job) @@ -1631,54 +1837,70 @@ def pipeline(query_source_snowflake, connection_target_azuresql): # pipeline_regenerated_from_rest will only keep component id after call _to_rest_object() omit_fields = ["jobs.*.component"] pipeline1_dict = omit_with_wildcard(pipeline1._to_dict(), *omit_fields) - pipeline_regenerated_from_rest_dict = omit_with_wildcard(pipeline_regenerated_from_rest._to_dict(), *omit_fields) + pipeline_regenerated_from_rest_dict = omit_with_wildcard( + pipeline_regenerated_from_rest._to_dict(), *omit_fields + ) assert pipeline1_dict == pipeline_regenerated_from_rest_dict - omit_fields = [ - "properties.jobs.*.componentId", - "properties.experiment_name" - ] + omit_fields = ["properties.jobs.*.componentId", "properties.experiment_name"] pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target_azuresql': {'job_input_type': 'literal', - 'value': 'azureml:my_azuresql_connection'}, - 'query_source_snowflake': {'job_input_type': 'literal', - 'value': 'SELECT * FROM ' - 'my_table'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'name': 'node1', - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': 'SELECT * FROM my_table', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'name': 'node2', - 'outputs': {'sink': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output}}'}}, - 'source': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {'pipeline_output': {'job_output_type': 'mltable'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target_azuresql": { + "job_input_type": "literal", + "value": "azureml:my_azuresql_connection", + }, + "query_source_snowflake": {"job_input_type": "literal", "value": "SELECT * FROM " "my_table"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "name": "node1", + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "SELECT * FROM my_table", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "name": "node2", + "outputs": {"sink": {"type": "literal", "value": "${{parent.outputs.pipeline_output}}"}}, + "source": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {"pipeline_output": {"job_output_type": "mltable"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } def test_pipeline_with_data_transfer_import_stored_database_job(self): - stored_procedure = 'SelectEmployeeByJobAndDepartment' - stored_procedure_params = [{'name': 'job', 'value': 'Engineer', 'type': 'String'}, - {'name': 'department', 'value': 'Engineering', 'type': 'String'}] + stored_procedure = "SelectEmployeeByJobAndDepartment" + stored_procedure_params = [ + {"name": "job", "value": "Engineer", "type": "String"}, + {"name": "department", "value": "Engineering", "type": "String"}, + ] outputs = {"sink": Output(type=AssetTypes.MLTABLE)} - source = {'type': 'database', 'stored_procedure': stored_procedure, - 'stored_procedure_params': stored_procedure_params} + source = { + "type": "database", + "stored_procedure": stored_procedure, + "stored_procedure_params": stored_procedure_params, + } data_transfer_job = DataTransferImportJob( source=source, outputs=outputs, @@ -1704,47 +1926,50 @@ def pipeline(query_source_snowflake, connection_target_azuresql): # pipeline_regenerated_from_rest will only keep component id after call _to_rest_object() omit_fields = ["jobs.*.component"] pipeline1_dict = omit_with_wildcard(pipeline1._to_dict(), *omit_fields) - pipeline_regenerated_from_rest_dict = omit_with_wildcard(pipeline_regenerated_from_rest._to_dict(), *omit_fields) + pipeline_regenerated_from_rest_dict = omit_with_wildcard( + pipeline_regenerated_from_rest._to_dict(), *omit_fields + ) assert pipeline1_dict == pipeline_regenerated_from_rest_dict - omit_fields = [ - "properties.jobs.*.componentId", - "properties.experiment_name" - ] + omit_fields = ["properties.jobs.*.componentId", "properties.experiment_name"] pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'name': 'node1', - 'outputs': {'sink': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output}}'}}, - 'source': {'stored_procedure': 'SelectEmployeeByJobAndDepartment', - 'stored_procedure_params': [{'name': 'job', - 'type': 'String', - 'value': 'Engineer'}, - {'name': 'department', - 'type': 'String', - 'value': 'Engineering'}], - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {'pipeline_output': {'job_output_type': 'mltable'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": {}, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "name": "node1", + "outputs": {"sink": {"type": "literal", "value": "${{parent.outputs.pipeline_output}}"}}, + "source": { + "stored_procedure": "SelectEmployeeByJobAndDepartment", + "stored_procedure_params": [ + {"name": "job", "type": "String", "value": "Engineer"}, + {"name": "department", "type": "String", "value": "Engineering"}, + ], + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + } + }, + "outputs": {"pipeline_output": {"job_output_type": "mltable"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } def test_pipeline_with_data_transfer_export_database_job(self): - connection_target_azuresql = 'azureml:my_azuresql_connection' + connection_target_azuresql = "azureml:my_azuresql_connection" table_name = "merged_table" cosmos_folder = Input(type=AssetTypes.URI_FILE, path="azureml://datastores/my_cosmos/paths/source_cosmos") inputs = {"source": cosmos_folder} - sink = {'type': 'database', 'connection': connection_target_azuresql, 'table_name': table_name} + sink = {"type": "database", "connection": connection_target_azuresql, "table_name": table_name} data_transfer_job = DataTransferExportJob( inputs=inputs, sink=sink, @@ -1771,53 +1996,74 @@ def pipeline(table_name, connection_target_azuresql): # pipeline_regenerated_from_rest will only keep component id after call _to_rest_object() omit_fields = ["jobs.*.component"] pipeline1_dict = omit_with_wildcard(pipeline1._to_dict(), *omit_fields) - pipeline_regenerated_from_rest_dict = omit_with_wildcard(pipeline_regenerated_from_rest._to_dict(), *omit_fields) + pipeline_regenerated_from_rest_dict = omit_with_wildcard( + pipeline_regenerated_from_rest._to_dict(), *omit_fields + ) assert pipeline1_dict == pipeline_regenerated_from_rest_dict - omit_fields = [ - "properties.jobs.*.componentId", - "properties.experiment_name" - ] + omit_fields = ["properties.jobs.*.componentId", "properties.experiment_name"] pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target_azuresql': {'job_input_type': 'literal', - 'value': 'azureml:my_azuresql_connection'}, - 'table_name': {'job_input_type': 'literal', - 'value': 'merged_table'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node1', - 'sink': {'connection': 'azureml:my_azuresql_connection', - 'table_name': 'merged_table', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node2', - 'sink': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'table_name': '${{parent.inputs.table_name}}', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target_azuresql": { + "job_input_type": "literal", + "value": "azureml:my_azuresql_connection", + }, + "table_name": {"job_input_type": "literal", "value": "merged_table"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_file", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node1", + "sink": { + "connection": "azureml:my_azuresql_connection", + "table_name": "merged_table", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_file", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node2", + "sink": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "table_name": "${{parent.inputs.table_name}}", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } def test_pipeline_with_data_transfer_import_file_system_job(self): - path_source_s3 = 's3://my_bucket/my_folder' - connection_target = 'azureml:my_s3_connection' + path_source_s3 = "s3://my_bucket/my_folder" + connection_target = "azureml:my_s3_connection" outputs = {"sink": Output(type=AssetTypes.URI_FOLDER, path="azureml://datastores/managed/paths/some_path")} - source = {'type': 'file_system', 'connection': connection_target, 'path': path_source_s3} + source = {"type": "file_system", "connection": connection_target, "path": path_source_s3} data_transfer_job = DataTransferImportJob( source=source, outputs=outputs, @@ -1844,52 +2090,61 @@ def pipeline(path_source_s3, connection_target): # pipeline_regenerated_from_rest will only keep component id after call _to_rest_object() omit_fields = ["jobs.*.component"] pipeline1_dict = omit_with_wildcard(pipeline1._to_dict(), *omit_fields) - pipeline_regenerated_from_rest_dict = omit_with_wildcard(pipeline_regenerated_from_rest._to_dict(), *omit_fields) + pipeline_regenerated_from_rest_dict = omit_with_wildcard( + pipeline_regenerated_from_rest._to_dict(), *omit_fields + ) assert pipeline1_dict == pipeline_regenerated_from_rest_dict - omit_fields = [ - "properties.jobs.*.componentId", - "properties.experiment_name" - ] + omit_fields = ["properties.jobs.*.componentId", "properties.experiment_name"] pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target': {'job_input_type': 'literal', - 'value': 'azureml:my_s3_connection'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 's3://my_bucket/my_folder'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'name': 'node1', - 'source': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'name': 'node2', - 'outputs': {'sink': {'type': 'literal', - 'value': '${{parent.outputs.pipeline_output}}'}}, - 'source': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {'pipeline_output': {'job_output_type': 'uri_folder'}}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target": {"job_input_type": "literal", "value": "azureml:my_s3_connection"}, + "path_source_s3": {"job_input_type": "literal", "value": "s3://my_bucket/my_folder"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "name": "node1", + "source": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "name": "node2", + "outputs": {"sink": {"type": "literal", "value": "${{parent.outputs.pipeline_output}}"}}, + "source": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {"pipeline_output": {"job_output_type": "uri_folder"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } def test_pipeline_with_data_transfer_export_file_system_job(self): - path_source_s3 = 's3://my_bucket/my_folder' - connection_target = 'azureml:my_s3_connection' + path_source_s3 = "s3://my_bucket/my_folder" + connection_target = "azureml:my_s3_connection" my_cosmos_folder = Input(type=AssetTypes.URI_FOLDER, path="azureml://datastores/my_cosmos/paths/source_cosmos") inputs = {"source": my_cosmos_folder} - sink = {'type': 'file_system', 'connection': connection_target, 'path': path_source_s3} + sink = {"type": "file_system", "connection": connection_target, "path": path_source_s3} data_transfer_job = DataTransferExportJob( inputs=inputs, @@ -1917,48 +2172,65 @@ def pipeline(path_source_s3, connection_target, cosmos_folder): # pipeline_regenerated_from_rest will only keep component id after call _to_rest_object() omit_fields = ["jobs.*.component"] pipeline1_dict = omit_with_wildcard(pipeline1._to_dict(), *omit_fields) - pipeline_regenerated_from_rest_dict = omit_with_wildcard(pipeline_regenerated_from_rest._to_dict(), *omit_fields) + pipeline_regenerated_from_rest_dict = omit_with_wildcard( + pipeline_regenerated_from_rest._to_dict(), *omit_fields + ) assert pipeline1_dict == pipeline_regenerated_from_rest_dict - omit_fields = [ - "properties.jobs.*.componentId", - "properties.experiment_name" - ] + omit_fields = ["properties.jobs.*.componentId", "properties.experiment_name"] pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) assert pipeline_job1 == { - 'properties': { - 'display_name': 'pipeline', - 'inputs': {'connection_target': {'job_input_type': 'literal', - 'value': 'azureml:my_s3_connection'}, - 'cosmos_folder': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 's3://my_bucket/my_folder'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'node1': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 'node1', - 'sink': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'node2': {'_source': 'BUILTIN', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'name': 'node2', - 'sink': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'DSL'}, - 'tags': {}} + "properties": { + "display_name": "pipeline", + "inputs": { + "connection_target": {"job_input_type": "literal", "value": "azureml:my_s3_connection"}, + "cosmos_folder": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + "path_source_s3": {"job_input_type": "literal", "value": "s3://my_bucket/my_folder"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILTIN", + "inputs": { + "source": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "node1", + "sink": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + "node2": { + "_source": "BUILTIN", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"} + }, + "name": "node2", + "sink": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } } def test_pipeline_with_parallel_job(self): @@ -2197,8 +2469,8 @@ def pipeline(path): }, }, "outputs": { - "pipeline_output1": {"job_output_type": "uri_folder", 'mode': 'ReadWriteMount'}, - "pipeline_output2": {"job_output_type": "uri_folder", 'mode': 'ReadWriteMount'}, + "pipeline_output1": {"job_output_type": "uri_folder", "mode": "ReadWriteMount"}, + "pipeline_output2": {"job_output_type": "uri_folder", "mode": "ReadWriteMount"}, }, "properties": {}, "settings": {"_source": "DSL"}, @@ -2294,8 +2566,8 @@ def pipeline(number, path): }, }, "outputs": { - "pipeline_output1": {"job_output_type": "mlflow_model", 'mode': 'ReadWriteMount'}, - "pipeline_output2": {"job_output_type": "mlflow_model", 'mode': 'ReadWriteMount'}, + "pipeline_output1": {"job_output_type": "mlflow_model", "mode": "ReadWriteMount"}, + "pipeline_output2": {"job_output_type": "mlflow_model", "mode": "ReadWriteMount"}, }, "properties": {}, "settings": {"_source": "DSL"}, diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_io_builder.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_io_builder.py index c4d7c63daf6b..a5215bd5bd75 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_io_builder.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_io_builder.py @@ -63,63 +63,60 @@ def my_pipeline(job_in_number, job_in_path): node1 = component_func(component_in_number=job_in_number, component_in_path=job_in_path) # calling result() will convert pipeline input to actual value node2 = component_func(component_in_number=job_in_number.result(), component_in_path=job_in_path.result()) - return { - "output1": node1.outputs.component_out_path, - "output2": node2.outputs.component_out_path - } + return {"output1": node1.outputs.component_out_path, "output2": node2.outputs.component_out_path} - pipeline_job1 = my_pipeline( - job_in_number=1, job_in_path=Input(path="fake_path1") - ) + pipeline_job1 = my_pipeline(job_in_number=1, job_in_path=Input(path="fake_path1")) - rest_pipeline_job = omit_with_wildcard(pipeline_job1._to_rest_object().properties.as_dict(), - *common_omit_fields) + rest_pipeline_job = omit_with_wildcard( + pipeline_job1._to_rest_object().properties.as_dict(), *common_omit_fields + ) expected_pipeline_job1 = { - 'node1': { - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'node1', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.output1}}'}}, - 'type': 'command'}, - 'node2': { - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}, - 'component_in_path': {'job_input_type': 'uri_folder', - 'uri': 'fake_path1'}}, - 'name': 'node2', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.output2}}'}}, - 'type': 'command'} + "node1": { + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "node1", + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.output1}}"}}, + "type": "command", + }, + "node2": { + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": {"job_input_type": "uri_folder", "uri": "fake_path1"}, + }, + "name": "node2", + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.output2}}"}}, + "type": "command", + }, } assert rest_pipeline_job["jobs"] == expected_pipeline_job1 - pipeline_job2 = my_pipeline( - job_in_number=2, job_in_path=Input(path="fake_path2") - ) + pipeline_job2 = my_pipeline(job_in_number=2, job_in_path=Input(path="fake_path2")) - rest_pipeline_job = omit_with_wildcard(pipeline_job2._to_rest_object().properties.as_dict(), - *common_omit_fields) + rest_pipeline_job = omit_with_wildcard( + pipeline_job2._to_rest_object().properties.as_dict(), *common_omit_fields + ) expected_pipeline_job2 = { - 'node1': {'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'node1', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.output1}}'}}, - 'type': 'command'}, - 'node2': {'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '2'}, - 'component_in_path': {'job_input_type': 'uri_folder', - 'uri': 'fake_path2'}}, - 'name': 'node2', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.output2}}'}}, - 'type': 'command'} + "node1": { + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "node1", + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.output1}}"}}, + "type": "command", + }, + "node2": { + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "2"}, + "component_in_path": {"job_input_type": "uri_folder", "uri": "fake_path2"}, + }, + "name": "node2", + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.output2}}"}}, + "type": "command", + }, } assert rest_pipeline_job["jobs"] == expected_pipeline_job2 @@ -127,8 +124,9 @@ def my_pipeline(job_in_number, job_in_path): pipeline_job1.jobs["node2"].inputs["component_in_number"] == 1 pipeline_job1.jobs["node2"].inputs["component_in_path"].path == "fake_path1" - rest_pipeline_job = omit_with_wildcard(pipeline_job1._to_rest_object().properties.as_dict(), - *common_omit_fields) + rest_pipeline_job = omit_with_wildcard( + pipeline_job1._to_rest_object().properties.as_dict(), *common_omit_fields + ) assert rest_pipeline_job["jobs"] == expected_pipeline_job1 def test_pipeline_input_result_multiple_levle(self): @@ -153,28 +151,29 @@ def my_pipeline_level_2(job_in_number, job_in_path): my_pipeline_level_1(job_in_number=job_in_number, job_in_path=job_in_path) component_func(component_in_number=job_in_number, component_in_path=job_in_path) - pipeline_job2 = my_pipeline_level_2( - job_in_number=2, job_in_path=Input(path="fake_path2") - ) + pipeline_job2 = my_pipeline_level_2(job_in_number=2, job_in_path=Input(path="fake_path2")) - rest_pipeline_job = omit_with_wildcard(pipeline_job2._to_rest_object().properties.as_dict(), - *common_omit_fields) + rest_pipeline_job = omit_with_wildcard( + pipeline_job2._to_rest_object().properties.as_dict(), *common_omit_fields + ) expected_pipeline_job = { - 'microsoftsamples_command_component_basic': { - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'microsoftsamples_command_component_basic', - 'type': 'command'}, - 'my_pipeline_level_1': { - 'inputs': {'job_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_number}}'}, - 'job_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'my_pipeline_level_1', - 'type': 'pipeline'} + "microsoftsamples_command_component_basic": { + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "microsoftsamples_command_component_basic", + "type": "command", + }, + "my_pipeline_level_1": { + "inputs": { + "job_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, + "job_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "my_pipeline_level_1", + "type": "pipeline", + }, } assert rest_pipeline_job["jobs"] == expected_pipeline_job diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_mldesigner_imports.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_mldesigner_imports.py index 09200bfb97d1..664c25b94b9c 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_mldesigner_imports.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_mldesigner_imports.py @@ -1,12 +1,12 @@ import pytest from azure.ai.ml import Input -from azure.ai.ml.entities import( +from azure.ai.ml.entities import ( Component, CommandComponent, PipelineComponent, ValidationResult, ) -from azure.ai.ml.dsl._mldesigner import( +from azure.ai.ml.dsl._mldesigner import ( _AdditionalIncludes, InternalComponent, ) @@ -24,6 +24,7 @@ class TestMldesignerImports: The attributes are needed for a certain version of mldesigner package, modifying or deleting any of them will cause compatibility issues. If there are new dependencies for mldesigner package, add new assertions in this file. """ + def test_necessay_attributes(self): assert hasattr(Component, "_customized_validate") assert hasattr(Component, "_source_path") @@ -63,4 +64,4 @@ def test_class_names(self): assert GroupInput.__name__ == "GroupInput" assert PipelineInput.__name__ == "PipelineInput" assert NodeInput.__name__ == "NodeInput" - assert NodeOutput.__name__ == "NodeOutput" \ No newline at end of file + assert NodeOutput.__name__ == "NodeOutput" diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_pipeline_builder.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_pipeline_builder.py index ae9973db94de..cc9caaf6f846 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_pipeline_builder.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_pipeline_builder.py @@ -20,7 +20,7 @@ def pipeline_func(required_input: int, optional_input: int = 2): named_step = component_func(required_input=required_input, optional_input=optional_input) pipeline_job = pipeline_func(1, 2) - assert 'named_step' in pipeline_job.jobs + assert "named_step" in pipeline_job.jobs def test_raise_exception(self): @dsl.pipeline @@ -45,9 +45,9 @@ def pipeline_func(self, required_input: int, optional_input: int = 2): mock_obj = MockClass("./some/path") pipeline_job = mock_obj.pipeline_func(1, 2) - assert 'named_step' in pipeline_job.jobs - assert 'self' in pipeline_job.inputs - assert pipeline_job.inputs['self'].path == "./some/path" + assert "named_step" in pipeline_job.jobs + assert "self" in pipeline_job.inputs + assert pipeline_job.inputs["self"].path == "./some/path" def test_node_as_input_output(self): component_yaml = components_dir / "helloworld_component.yml" @@ -57,7 +57,7 @@ def test_node_as_input_output(self): def base_pipeline_func(input_path: Input, input_number: float = 0.5): node1 = component_func(component_in_path=input_path, component_in_number=input_number) node2 = component_func( - component_in_path=node1.outputs['component_out_path'], + component_in_path=node1.outputs["component_out_path"], component_in_number=input_number, ) # return { @@ -78,8 +78,10 @@ def pipeline_func(input_path: Input, input_number: float = 0.5): base_pipeline_job = base_pipeline_func(Input(path="./tests/test_configs/data"), 0.5) pipeline_job = pipeline_func(Input(path="./tests/test_configs/data"), 0.5) pipeline_job.display_name = base_pipeline_job.display_name - assert pipeline_job._to_rest_object().properties.as_dict() == \ - base_pipeline_job._to_rest_object().properties.as_dict() + assert ( + pipeline_job._to_rest_object().properties.as_dict() + == base_pipeline_job._to_rest_object().properties.as_dict() + ) def test_node_as_sub_pipeline_input(self): component_yaml = components_dir / "helloworld_component.yml" @@ -89,18 +91,18 @@ def test_node_as_sub_pipeline_input(self): def sub_pipeline_func(input_path: Input, input_number: float = 0.5): node1 = component_func(component_in_path=input_path, component_in_number=input_number) node2 = component_func( - component_in_path=node1.outputs['component_out_path'], + component_in_path=node1.outputs["component_out_path"], component_in_number=input_number, ) return { - 'component_out_path': node2.outputs['component_out_path'], + "component_out_path": node2.outputs["component_out_path"], } @dsl.pipeline def base_pipeline_func(input_path: Input, input_number: float = 0.5): node1 = component_func(component_in_path=input_path, component_in_number=input_number) node2 = sub_pipeline_func( - input_path=node1.outputs['component_out_path'], + input_path=node1.outputs["component_out_path"], input_number=input_number, ) # return { @@ -121,8 +123,10 @@ def pipeline_func(input_path: Input, input_number: float = 0.5): base_pipeline_job = base_pipeline_func(Input(path="./tests/test_configs/data"), 0.5) pipeline_job = pipeline_func(Input(path="./tests/test_configs/data"), 0.5) pipeline_job.display_name = base_pipeline_job.display_name - assert pipeline_job._to_rest_object().properties.as_dict() == \ - base_pipeline_job._to_rest_object().properties.as_dict() + assert ( + pipeline_job._to_rest_object().properties.as_dict() + == base_pipeline_job._to_rest_object().properties.as_dict() + ) def test_node_as_sub_pipeline_input_error(self): single_output_component_func = load_component(components_dir / "helloworld_component.yml") @@ -132,11 +136,11 @@ def test_node_as_sub_pipeline_input_error(self): def sub_pipeline_func(input_path: Input, input_number: float = 0.5): node1 = single_output_component_func(component_in_path=input_path, component_in_number=input_number) node2 = single_output_component_func( - component_in_path=node1.outputs['component_out_path'], + component_in_path=node1.outputs["component_out_path"], component_in_number=input_number, ) return { - 'component_out_path': node2.outputs['component_out_path'], + "component_out_path": node2.outputs["component_out_path"], } @dsl.pipeline @@ -151,7 +155,6 @@ def pipeline_func(input_path: Input, input_number: float = 0.5): # return node2 with pytest.raises( - ValueError, - match="Provided input input_path is not a single output node, cannot be used as a node input." + ValueError, match="Provided input input_path is not a single output node, cannot be used as a node input." ): pipeline_func(Input(path="./tests/test_configs/data"), 0.5) diff --git a/sdk/ml/azure-ai-ml/tests/environment/unittests/test_env_entity.py b/sdk/ml/azure-ai-ml/tests/environment/unittests/test_env_entity.py index 0b07bf8f1541..71487decddbc 100644 --- a/sdk/ml/azure-ai-ml/tests/environment/unittests/test_env_entity.py +++ b/sdk/ml/azure-ai-ml/tests/environment/unittests/test_env_entity.py @@ -51,7 +51,7 @@ def test_rest_object(self) -> None: assert env == env_from_rest_object assert env.properties == env_from_rest_object.properties assert env != diff_env - + def test_conda_file_deserialize_and_serialize(self) -> None: # Tests that conda file is deserialized same way if using load_environment() or Environment() conda_file_path = "tests/test_configs/environment/environment_files/environment.yml" @@ -110,17 +110,17 @@ def test_anonymous_environment_version_changes_with_inference_config(self): inference_conf_obj = json.loads(inference_conf) env_no_inference_config = Environment( - conda_file=tests_root_dir / "test_configs/deployments/model-1/environment/conda.yml", - image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1" - ) + conda_file=tests_root_dir / "test_configs/deployments/model-1/environment/conda.yml", + image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1", + ) env_with_inference_config = Environment( - conda_file=tests_root_dir / "test_configs/deployments/model-1/environment/conda.yml", - image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1", - inference_config=inference_conf_obj - ) + conda_file=tests_root_dir / "test_configs/deployments/model-1/environment/conda.yml", + image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1", + inference_config=inference_conf_obj, + ) assert env_no_inference_config.name == env_no_inference_config.name == ANONYMOUS_ENV_NAME assert env_no_inference_config.version != env_with_inference_config.version assert env_no_inference_config.version == "71fccbc128a554b5c3e23330ded8963b" - assert env_with_inference_config.version == "f223fcd33d34c386cf763b856300f3ce" \ No newline at end of file + assert env_with_inference_config.version == "f223fcd33d34c386cf763b856300f3ce" diff --git a/sdk/ml/azure-ai-ml/tests/import_job/e2etests/test_import_job.py b/sdk/ml/azure-ai-ml/tests/import_job/e2etests/test_import_job.py index dc24e23e22e1..c6d1dcdf6b29 100644 --- a/sdk/ml/azure-ai-ml/tests/import_job/e2etests/test_import_job.py +++ b/sdk/ml/azure-ai-ml/tests/import_job/e2etests/test_import_job.py @@ -69,11 +69,7 @@ def check_before_cancel(_import_job: ImportJob) -> True: assert import_job_2.status in [JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.RUNNING, JobStatus.STARTING] return True - import_job = assert_job_cancel( - job, - client, - check_before_cancelled=check_before_cancel - ) + import_job = assert_job_cancel(job, client, check_before_cancelled=check_before_cancel) assert import_job.type == JobType.IMPORT assert import_job.source.type == "azuresqldb" @@ -122,9 +118,7 @@ def sample_pipeline(): self.validate_test_import_pipepine_submit_cancel(pipeline, client, is_dsl=True) @classmethod - def validate_test_import_pipepine_submit_cancel( - cls, pipeline: PipelineJob, client: MLClient, is_dsl: bool - ) -> None: + def validate_test_import_pipepine_submit_cancel(cls, pipeline: PipelineJob, client: MLClient, is_dsl: bool) -> None: import_pipeline: PipelineJob = assert_job_cancel(job=pipeline, client=client) diff --git a/sdk/ml/azure-ai-ml/tests/internal/_utils.py b/sdk/ml/azure-ai-ml/tests/internal/_utils.py index 76db2aecd666..f7e972d89d82 100644 --- a/sdk/ml/azure-ai-ml/tests/internal/_utils.py +++ b/sdk/ml/azure-ai-ml/tests/internal/_utils.py @@ -185,10 +185,14 @@ ] # this is to shorten the test name -TEST_CASE_NAME_ENUMERATE = list(enumerate(map( - lambda params: Path(params[0]).name, - PARAMETERS_TO_TEST, -))) +TEST_CASE_NAME_ENUMERATE = list( + enumerate( + map( + lambda params: Path(params[0]).name, + PARAMETERS_TO_TEST, + ) + ) +) def get_expected_runsettings_items(runsettings_dict, client=None): @@ -208,11 +212,13 @@ def get_expected_runsettings_items(runsettings_dict, client=None): expected_values[dot_key] = "PT5M" # hack: compute_name for hdinsight will be transformed into arm str if dot_key == "compute_name" and client is not None: - expected_values[dot_key] = f"/subscriptions/{client.subscription_id}/" \ - f"resourceGroups/{client.resource_group_name}/" \ - f"providers/Microsoft.MachineLearningServices/" \ - f"workspaces/{client.workspace_name}/" \ - f"computes/{expected_values[dot_key]}" + expected_values[dot_key] = ( + f"/subscriptions/{client.subscription_id}/" + f"resourceGroups/{client.resource_group_name}/" + f"providers/Microsoft.MachineLearningServices/" + f"workspaces/{client.workspace_name}/" + f"computes/{expected_values[dot_key]}" + ) return expected_values.items() @@ -220,12 +226,12 @@ def get_expected_runsettings_items(runsettings_dict, client=None): ( "simple-command/powershell_copy.yaml", # Please DO NOT change the expected snapshot id unless you are sure you have changed the component spec - "75c43313-4777-b2e9-fe3a-3b98cabfaa77" + "75c43313-4777-b2e9-fe3a-3b98cabfaa77", ), ( "additional-includes/component_spec.yaml", # Please DO NOT change the expected snapshot id unless you are sure you have changed the component spec - "a0083afd-fee4-9c0d-65c2-ec75d0d5f048" + "1dc8271a-9184-df03-c9a5-afac8dcdcf26", ), # TODO(2076035): skip tests related to zip additional includes for now # ( diff --git a/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_component.py b/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_component.py index 9146820cd7c8..513b53082594 100644 --- a/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_component.py +++ b/sdk/ml/azure-ai-ml/tests/internal/e2etests/test_component.py @@ -46,6 +46,7 @@ def load_registered_component( component_rest_object = component_entity._to_rest_object() return pydash.omit(component_rest_object.properties.component_spec, *omit_fields) + # previous bodiless_matcher fixture doesn't take effect because of typo, please add it in method level if needed @@ -93,7 +94,7 @@ def test_component_load( loaded_dict = load_registered_component(client, component_name, component_resource.version, omit_fields) base_dir = "./tests/test_configs/internal" - json_path = (yaml_path.rsplit(".", 1)[0] + ".json") + json_path = yaml_path.rsplit(".", 1)[0] + ".json" json_path = os.path.join(base_dir, "loaded_from_rest", os.path.relpath(json_path, base_dir)) os.makedirs(os.path.dirname(json_path), exist_ok=True) if not os.path.isfile(json_path): @@ -105,9 +106,7 @@ def test_component_load( # default value for datatransfer if expected_dict["type"] == "DataTransferComponent" and "datatransfer" not in expected_dict: - expected_dict["datatransfer"] = { - 'allow_overwrite': 'True' - } + expected_dict["datatransfer"] = {"allow_overwrite": "True"} # TODO: check if loaded environment is expected to be an ordered dict assert pydash.omit(loaded_dict, *omit_fields) == pydash.omit(expected_dict, *omit_fields) diff --git a/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py b/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py index 0034ba064fab..ddc51179811a 100644 --- a/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py +++ b/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py @@ -32,6 +32,7 @@ class AdditionalIncludesCheckFunc(enum.Enum): """Enum for additional includes check function""" + SKIP = 0 SELF_IS_FILE = 1 PARENT_EXISTS = 2 @@ -50,12 +51,15 @@ def test_load_v2_component(self): def test_validate_internal_component(self): yaml_path = r"./tests/test_configs/internal/component_with_code/component_spec.yaml" from azure.ai.ml.entities._validate_funcs import validate_component + validation_result = validate_component(yaml_path) assert validation_result.passed, repr(validation_result) def test_component_inputs_with_bool_and_date_value(self): - yaml_path = r"tests/test_configs/internal/command-component/command-linux/" \ - r"component_with_bool_and_data_input/component.yaml" + yaml_path = ( + r"tests/test_configs/internal/command-component/command-linux/" + r"component_with_bool_and_data_input/component.yaml" + ) component = load_component(yaml_path) assert component.inputs["bool_input"].default == "true" assert component.inputs["enum_input"].default == "true" @@ -159,12 +163,12 @@ def test_load_from_registered_internal_scope_component_rest_obj(self): "inputs": { "TextData": { "type": "AnyFile", - 'optional': False, + "optional": False, "description": "relative path on ADLS storage", }, "ExtractionClause": { "type": "string", - 'optional': False, + "optional": False, "description": 'the extraction clause,something like "column1:string, column2:int"', }, }, @@ -221,11 +225,22 @@ def test_component_serialization(self, yaml_path): for input_port_name in expected_dict.get("inputs", {}): input_port = expected_dict["inputs"][input_port_name] # enum will be transformed to string - if isinstance(input_port["type"], str) and input_port["type"].lower() in ["string", "enum", "float", "integer"]: + if isinstance(input_port["type"], str) and input_port["type"].lower() in [ + "string", + "enum", + "float", + "integer", + ]: if "enum" in input_port: - input_port["enum"] = list(map(lambda x: str(x).lower() if isinstance(x, bool) else str(x), input_port["enum"])) + input_port["enum"] = list( + map(lambda x: str(x).lower() if isinstance(x, bool) else str(x), input_port["enum"]) + ) if "default" in input_port: - input_port["default"] = str(input_port["default"]).lower() if isinstance(input_port["default"], bool) else str(input_port["default"]) + input_port["default"] = ( + str(input_port["default"]).lower() + if isinstance(input_port["default"], bool) + else str(input_port["default"]) + ) # code will be dumped as absolute path if "code" in expected_dict: @@ -251,7 +266,7 @@ def test_component_serialization(self, yaml_path): [ ("preview_command_component.yaml", "1-preview"), ("legacy_distributed_component.yaml", "1-legacy"), - ] + ], ) def test_command_mode_command_component(self, yaml_path: str, label: str): component = load_component("./tests/test_configs/internal/command-mode/{}".format(yaml_path)) @@ -389,8 +404,16 @@ def test_additional_includes(self) -> None: [ pytest.param( [ - ("component_with_additional_includes/.amlignore", "test_ignore/*\nlibrary1/ignore.py", AdditionalIncludesCheckFunc.SELF_IS_FILE), - ("component_with_additional_includes/test_ignore/a.py", None, AdditionalIncludesCheckFunc.NO_PARENT), + ( + "component_with_additional_includes/.amlignore", + "test_ignore/*\nlibrary1/ignore.py", + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), + ( + "component_with_additional_includes/test_ignore/a.py", + None, + AdditionalIncludesCheckFunc.NO_PARENT, + ), # will be saved to library1/ignore.py, should be ignored ("additional_includes/library1/ignore.py", None, AdditionalIncludesCheckFunc.NOT_EXISTS), # will be saved to library1/ignore.py/a.py, should be ignored @@ -404,9 +427,21 @@ def test_additional_includes(self) -> None: pytest.param( [ # additional_includes for other spec, should be kept - ("component_with_additional_includes/x.additional_includes", None, AdditionalIncludesCheckFunc.SELF_IS_FILE), - ("additional_includes/library1/x.additional_includes", None, AdditionalIncludesCheckFunc.SELF_IS_FILE), - ("additional_includes/library1/test/x.additional_includes", None, AdditionalIncludesCheckFunc.SELF_IS_FILE), + ( + "component_with_additional_includes/x.additional_includes", + None, + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), + ( + "additional_includes/library1/x.additional_includes", + None, + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), + ( + "additional_includes/library1/test/x.additional_includes", + None, + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), # additional_includes in a different level, should be kept ( "component_with_additional_includes/library2/helloworld_additional_includes.additional_includes", @@ -430,17 +465,32 @@ def test_additional_includes(self) -> None: pytest.param( [ ("component_with_additional_includes/hello.py", None, AdditionalIncludesCheckFunc.SELF_IS_FILE), - ("component_with_additional_includes/test_code/.amlignore", "hello.py", AdditionalIncludesCheckFunc.SELF_IS_FILE), - ("component_with_additional_includes/test_code/hello.py", None, AdditionalIncludesCheckFunc.NOT_EXISTS), + ( + "component_with_additional_includes/test_code/.amlignore", + "hello.py", + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), + ( + "component_with_additional_includes/test_code/hello.py", + None, + AdditionalIncludesCheckFunc.NOT_EXISTS, + ), # shall we keep the empty folder? - ("component_with_additional_includes/test_code/a/hello.py", None, AdditionalIncludesCheckFunc.NO_PARENT), + ( + "component_with_additional_includes/test_code/a/hello.py", + None, + AdditionalIncludesCheckFunc.NO_PARENT, + ), ], id="amlignore_subfolder", ), pytest.param( [ - ("additional_includes/library1/.amlignore", "test_ignore\nignore.py", - AdditionalIncludesCheckFunc.SELF_IS_FILE), + ( + "additional_includes/library1/.amlignore", + "test_ignore\nignore.py", + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), # will be saved to library1/ignore.py, should be ignored ("additional_includes/library1/ignore.py", None, AdditionalIncludesCheckFunc.NOT_EXISTS), # will be saved to library1/ignore.py/a.py, should be ignored @@ -453,38 +503,57 @@ def test_additional_includes(self) -> None: ), pytest.param( [ - ("additional_includes/library1/test_ignore/.amlignore", "ignore.py", - AdditionalIncludesCheckFunc.SELF_IS_FILE), + ( + "additional_includes/library1/test_ignore/.amlignore", + "ignore.py", + AdditionalIncludesCheckFunc.SELF_IS_FILE, + ), # will be saved to library1/ignore.py, should be ignored - ("additional_includes/library1/test_ignore/ignore.py", None, AdditionalIncludesCheckFunc.NOT_EXISTS), ( - "additional_includes/library1/test_ignore/ignore.py", None, AdditionalIncludesCheckFunc.NOT_EXISTS), + "additional_includes/library1/test_ignore/ignore.py", + None, + AdditionalIncludesCheckFunc.NOT_EXISTS, + ), + ( + "additional_includes/library1/test_ignore/ignore.py", + None, + AdditionalIncludesCheckFunc.NOT_EXISTS, + ), ], id="amlignore_in_additional_includes_subfolder", ), pytest.param( [ - ("component_with_additional_includes/__pycache__/a.pyc", None, - AdditionalIncludesCheckFunc.NO_PARENT), - ("component_with_additional_includes/test/__pycache__/a.pyc", None, - AdditionalIncludesCheckFunc.NO_PARENT), + ( + "component_with_additional_includes/__pycache__/a.pyc", + None, + AdditionalIncludesCheckFunc.NO_PARENT, + ), + ( + "component_with_additional_includes/test/__pycache__/a.pyc", + None, + AdditionalIncludesCheckFunc.NO_PARENT, + ), ("additional_includes/library1/__pycache__/a.pyc", None, AdditionalIncludesCheckFunc.NO_PARENT), - ("additional_includes/library1/test/__pycache__/a.pyc", None, AdditionalIncludesCheckFunc.NO_PARENT), + ( + "additional_includes/library1/test/__pycache__/a.pyc", + None, + AdditionalIncludesCheckFunc.NO_PARENT, + ), ], id="pycache", ), - ] + ], ) def test_additional_includes_advanced(self, test_files) -> None: with build_temp_folder( source_base_dir="./tests/test_configs/internal/", - relative_dirs_to_copy=[ - "component_with_additional_includes", - "additional_includes" - ], - extra_files_to_create={file: content for file, content, _ in test_files} + relative_dirs_to_copy=["component_with_additional_includes", "additional_includes"], + extra_files_to_create={file: content for file, content, _ in test_files}, ) as test_configs_dir: - yaml_path = Path(test_configs_dir) / "component_with_additional_includes" / "helloworld_additional_includes.yml" + yaml_path = ( + Path(test_configs_dir) / "component_with_additional_includes" / "helloworld_additional_includes.yml" + ) component: InternalComponent = load_component(source=yaml_path) @@ -563,8 +632,9 @@ def test_additional_includes_with_code_specified(self, yaml_path: str, has_addit assert code_path.resolve() == specified_code_path.resolve() def test_docker_file_in_additional_includes(self): - yaml_path = "./tests/test_configs/internal/component_with_dependency_" \ - "in_additional_includes/with_docker_file.yml" + yaml_path = ( + "./tests/test_configs/internal/component_with_dependency_" "in_additional_includes/with_docker_file.yml" + ) docker_file_path = "./tests/test_configs/internal/additional_includes/docker/DockerFile" with open(docker_file_path, "r") as docker_file: @@ -584,8 +654,9 @@ def test_docker_file_in_additional_includes(self): } def test_conda_pip_in_additional_includes(self): - yaml_path = "./tests/test_configs/internal/component_with_dependency_" \ - "in_additional_includes/with_conda_pip.yml" + yaml_path = ( + "./tests/test_configs/internal/component_with_dependency_" "in_additional_includes/with_conda_pip.yml" + ) conda_file_path = "./tests/test_configs/internal/env-conda-dependencies/conda.yaml" with open(conda_file_path, "r") as conda_file: @@ -616,27 +687,42 @@ def mock_get_artifacts(**kwargs): (artifact / f"file_{version}").touch(exist_ok=True) return str(artifact) - mocker.patch("azure.ai.ml._internal.entities._artifact_cache.ArtifactCache.get", - side_effect=mock_get_artifacts) + mocker.patch( + "azure.ai.ml._internal.entities._artifact_cache.ArtifactCache.get", side_effect=mock_get_artifacts + ) from azure.ai.ml._internal.entities._artifact_cache import ArtifactCache + yaml_path = "./tests/test_configs/internal/component_with_additional_includes/with_artifacts.yml" component: InternalComponent = load_component(source=yaml_path) assert component._validate().passed, repr(component._validate()) with component._resolve_local_code() as code: code_path = code.path assert code_path.is_dir() - for path in ['version_1/', 'version_1/file', 'version_2/', 'version_2/file', - 'file_version_1', 'file_version_2', 'DockerFile']: + for path in [ + "version_1/", + "version_1/file", + "version_2/", + "version_2/file", + "file_version_1", + "file_version_2", + "DockerFile", + ]: assert (code_path / path).exists() - yaml_path = "./tests/test_configs/internal/component_with_additional_includes/" \ - "artifacts_additional_includes_with_conflict.yml" + yaml_path = ( + "./tests/test_configs/internal/component_with_additional_includes/" + "artifacts_additional_includes_with_conflict.yml" + ) component: InternalComponent = load_component(source=yaml_path) validation_result = component._validate() assert validation_result.passed is False assert "There are conflict files in additional include" in validation_result.error_messages["*"] - assert 'test_additional_include:version_1 in component-sdk-test-feed' in validation_result.error_messages["*"] - assert 'test_additional_include:version_3 in component-sdk-test-feed' in validation_result.error_messages["*"] + assert ( + "test_additional_include:version_1 in component-sdk-test-feed" in validation_result.error_messages["*"] + ) + assert ( + "test_additional_include:version_3 in component-sdk-test-feed" in validation_result.error_messages["*"] + ) @pytest.mark.parametrize( "yaml_path,expected_error_msg_prefix", @@ -713,13 +799,13 @@ def test_component_input_with_attrs(self) -> None: "param_string_with_quote_yes_value": {"default": "yes", "type": "string"}, }, "outputs": { - 'output_data_path': { - 'datastore_mode': 'mount', - 'description': 'Path to the data', - 'is_link_mode': True, - 'type': 'path' + "output_data_path": { + "datastore_mode": "mount", + "description": "Path to the data", + "is_link_mode": True, + "type": "path", } - } + }, } assert component._to_rest_object().properties.component_spec["inputs"] == expected_inputs["inputs"] assert component._to_rest_object().properties.component_spec["outputs"] == expected_inputs["outputs"] @@ -737,12 +823,12 @@ def test_component_output_with_attrs(self) -> None: expected_outputs = { "path_with_optional": { # unknown field optional will be ignored - "type": 'AnyDirectory', + "type": "AnyDirectory", }, "primitive_is_control": { "is_control": True, "type": "boolean", - } + }, } assert component._to_rest_object().properties.component_spec["outputs"] == expected_outputs assert component._validate().passed is True, repr(component._validate()) @@ -785,8 +871,7 @@ def test_anonymous_component_reuse(self, relative_yaml_path: str, expected_snaps code.name = expected_snapshot_id with pytest.raises( - AttributeError, - match="InternalCode name are calculated based on its content and cannot be changed.*" + AttributeError, match="InternalCode name are calculated based on its content and cannot be changed.*" ): code.name = expected_snapshot_id + "1" @@ -794,8 +879,7 @@ def test_snapshot_id_calculation(self): origin_test_configs_dir = Path("./tests/test_configs/internal/") with tempfile.TemporaryDirectory() as test_configs_dir: shutil.copytree( - origin_test_configs_dir / "component-reuse/simple-command", - Path(test_configs_dir) / "simple-command" + origin_test_configs_dir / "component-reuse/simple-command", Path(test_configs_dir) / "simple-command" ) yaml_path = Path(test_configs_dir) / "simple-command" / "powershell_copy.yaml" @@ -817,34 +901,19 @@ def test_component_serialization_corner_case(self): assert component rest_object = component._to_rest_object() assert rest_object.properties.component_spec == { - '$schema': 'https://componentsdk.azureedge.net/jsonschema/CommandComponent.json', - '_source': 'YAML.COMPONENT', - 'command': 'echo {inputs.input_float} && echo {inputs.delimiter}', - 'display_name': 'Hello Command', - 'environment': {'name': 'AzureML-Designer', 'os': 'Linux'}, - 'inputs': { - 'input_float': { - 'default': '1', # previously this is 1.0 - 'optional': False, - 'type': 'Float' - }, - 'input_boolean': { - 'default': 'False', - 'optional': False, - 'type': 'Boolean' - }, - 'delimiter': { - 'default': '\t', - 'optional': True, - 'type': 'String' - }, + "$schema": "https://componentsdk.azureedge.net/jsonschema/CommandComponent.json", + "_source": "YAML.COMPONENT", + "command": "echo {inputs.input_float} && echo {inputs.delimiter}", + "display_name": "Hello Command", + "environment": {"name": "AzureML-Designer", "os": "Linux"}, + "inputs": { + "input_float": {"default": "1", "optional": False, "type": "Float"}, # previously this is 1.0 + "input_boolean": {"default": "False", "optional": False, "type": "Boolean"}, + "delimiter": {"default": "\t", "optional": True, "type": "String"}, }, - 'is_deterministic': True, - 'name': 'hello_command', - 'type': 'CommandComponent', - 'version': '0.10', # previously this is 0.1 - "datatransfer": { - "cloud_type": "aether" - } + "is_deterministic": True, + "name": "hello_command", + "type": "CommandComponent", + "version": "0.10", # previously this is 0.1 + "datatransfer": {"cloud_type": "aether"}, } - diff --git a/sdk/ml/azure-ai-ml/tests/internal/unittests/test_pipeline_job.py b/sdk/ml/azure-ai-ml/tests/internal/unittests/test_pipeline_job.py index 52d444e94962..aea328d81b90 100644 --- a/sdk/ml/azure-ai-ml/tests/internal/unittests/test_pipeline_job.py +++ b/sdk/ml/azure-ai-ml/tests/internal/unittests/test_pipeline_job.py @@ -280,9 +280,7 @@ def test_gjd_internal_component_in_pipeline(self): } def test_elastic_component_in_pipeline(self): - yaml_path = ( - "./tests/test_configs/internal/command-component-ls/ls_command_component.yaml" # itp & elastic are based on CommandComponent - ) + yaml_path = "./tests/test_configs/internal/command-component-ls/ls_command_component.yaml" # itp & elastic are based on CommandComponent node_func: CommandComponent = load_component(yaml_path) node = node_func() configuration = ITPConfiguration( @@ -337,9 +335,7 @@ def test_elastic_component_in_pipeline(self): } def test_singularity_component_in_pipeline(self): - yaml_path = ( - "./tests/test_configs/internal/command-component-ls/ls_command_component.yaml" # singularity is based on CommandComponent - ) + yaml_path = "./tests/test_configs/internal/command-component-ls/ls_command_component.yaml" # singularity is based on CommandComponent node_func: CommandComponent = load_component(yaml_path) node = node_func() configuration = AISuperComputerConfiguration( @@ -567,6 +563,7 @@ def pipeline_func(compute_name: str = "cpu-cluster", environment_name: str = "Az ) node.compute = compute_name node.environment = environment_name + pipeline_job = pipeline_func() assert pipeline_job._validate().passed, repr(pipeline_job._validate()) rest_object = pipeline_job._to_rest_object().properties.jobs["node"] diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/e2etests/test_upload_download.py b/sdk/ml/azure-ai-ml/tests/internal_utils/e2etests/test_upload_download.py index 981da3eaaa72..91f05acc20ed 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/e2etests/test_upload_download.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/e2etests/test_upload_download.py @@ -90,10 +90,7 @@ def artifact_path_dir(tmpdir_factory, variable_recorder) -> str: # type: ignore @pytest.mark.e2etest @pytest.mark.usefixtures("recorded_test") -@pytest.mark.skipif( - condition=not is_live(), - reason="test are flaky in playback" -) +@pytest.mark.skipif(condition=not is_live(), reason="test are flaky in playback") @pytest.mark.core_sdk_test class TestUpload(AzureRecordedTestCase): def test_upload_file_blob( diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cache_utils.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cache_utils.py index b9c1fab95938..949ed5fde1d7 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cache_utils.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cache_utils.py @@ -76,4 +76,4 @@ def test_on_disk_cache_share_among_users(self, mock_machinelearning_client: MLCl self.register_target_node_and_resolve(resolver) assert target_cache_path.exists() - assert stat.filemode(target_cache_path.stat().st_mode) == '-rw-rw-rw-' + assert stat.filemode(target_cache_path.stat().st_mode) == "-rw-rw-rw-" diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cloud_environments.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cloud_environments.py index 93f0df644dee..9666e937e13d 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cloud_environments.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_cloud_environments.py @@ -15,52 +15,46 @@ _get_storage_endpoint_from_metadata, _set_cloud, ) -from azure.ai.ml.constants._common import ArmConstants, AZUREML_CLOUD_ENV_NAME +from azure.ai.ml.constants._common import ArmConstants, AZUREML_CLOUD_ENV_NAME from azure.mgmt.core import ARMPipelineClient + def mocked_send_request_get(*args, **kwargs): class MockResponse: def __init__(self): self.status_code = 201 + def __enter__(self): return self + def __exit__(self, exc_type, exc_value, traceback): return + def json(self): return [ { - "name": "TEST_ENV", - "portal": "testportal.azure.com", + "name": "TEST_ENV", + "portal": "testportal.azure.com", "resourceManager": "testresourcemanager.azure.com", - "authentication": { - "loginEndpoint": "testdirectoryendpoint.azure.com" - }, - "suffixes": { - "storage": "teststorageendpoint" - } + "authentication": {"loginEndpoint": "testdirectoryendpoint.azure.com"}, + "suffixes": {"storage": "teststorageendpoint"}, }, { - "name": "TEST_ENV2", - "portal": "testportal.azure.windows.net", + "name": "TEST_ENV2", + "portal": "testportal.azure.windows.net", "resourceManager": "testresourcemanager.azure.com", - "authentication": { - "loginEndpoint": "testdirectoryendpoint.azure.com" - }, - "suffixes": { - "storage": "teststorageendpoint" - } + "authentication": {"loginEndpoint": "testdirectoryendpoint.azure.com"}, + "suffixes": {"storage": "teststorageendpoint"}, }, - { - "name": "MISCONFIGURED" - } + {"name": "MISCONFIGURED"}, ] + return MockResponse() @pytest.mark.unittest @pytest.mark.core_sdk_test class TestCloudEnvironments: - @mock.patch.dict(os.environ, {AZUREML_CLOUD_ENV_NAME: AzureEnvironments.ENV_DEFAULT}, clear=True) def test_set_valid_cloud_details_china(self): cloud_environment = AzureEnvironments.ENV_CHINA @@ -112,13 +106,13 @@ def test_get_default_cloud(self): with mock.patch("os.environ", {AZUREML_CLOUD_ENV_NAME: "yadadada"}): cloud_name = _get_default_cloud_name() assert cloud_name == "yadadada" - + def test_get_registry_endpoint_from_public(self): cloud_environment = AzureEnvironments.ENV_DEFAULT _set_cloud(cloud_environment) base_url = _get_registry_discovery_endpoint_from_metadata(cloud_environment) assert "https://eastus.api.azureml.ms/" in base_url - + def test_get_registry_endpoint_from_china(self): cloud_environment = AzureEnvironments.ENV_CHINA _set_cloud(cloud_environment) @@ -134,8 +128,8 @@ def test_get_registry_endpoint_from_us_gov(self): @mock.patch.dict(os.environ, {}, clear=True) @mock.patch("azure.mgmt.core.ARMPipelineClient.send_request", side_effect=mocked_send_request_get) def test_get_cloud_from_arm(self, mock_arm_pipeline_client_send_request): - - _set_cloud('TEST_ENV') + + _set_cloud("TEST_ENV") cloud_details = _get_cloud_information_from_metadata("TEST_ENV") assert cloud_details.get("cloud") == "TEST_ENV" @@ -143,7 +137,7 @@ def test_get_cloud_from_arm(self, mock_arm_pipeline_client_send_request): @mock.patch("azure.mgmt.core.ARMPipelineClient.send_request", side_effect=mocked_send_request_get) def test_all_endpointurls_used(self, mock_get): cloud_details = _get_cloud_details("TEST_ENV") - endpoint_urls = [a for a in dir(EndpointURLS) if not a.startswith('__')] + endpoint_urls = [a for a in dir(EndpointURLS) if not a.startswith("__")] for url in endpoint_urls: try: cloud_details[EndpointURLS.__dict__[url]] @@ -155,8 +149,11 @@ def test_all_endpointurls_used(self, mock_get): @mock.patch("azure.mgmt.core.ARMPipelineClient.send_request", side_effect=mocked_send_request_get) def test_metadata_registry_endpoint(self, mock_get): cloud_details = _get_cloud_details("TEST_ENV2") - assert cloud_details.get(EndpointURLS.REGISTRY_DISCOVERY_ENDPOINT) == "https://test_env2west.api.azureml.windows.net/" - + assert ( + cloud_details.get(EndpointURLS.REGISTRY_DISCOVERY_ENDPOINT) + == "https://test_env2west.api.azureml.windows.net/" + ) + @mock.patch.dict(os.environ, {}, clear=True) @mock.patch("azure.mgmt.core.ARMPipelineClient.send_request", side_effect=mocked_send_request_get) def test_arm_misconfigured(self, mock_get): diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_ml_client.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_ml_client.py index d734e7c3f805..a1e78f481e5e 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_ml_client.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_ml_client.py @@ -421,7 +421,9 @@ def test_ml_client_with_invalid_cloud(self, mock_credential): assert ml_client._kwargs["cloud"] == "SomeInvalidCloudName" assert "Unknown cloud environment supplied" in str(e) - def test_ml_client_with_both_workspace_registry_names_throws(self, e2e_ws_scope: OperationScope, auth: ClientSecretCredential) -> None: + def test_ml_client_with_both_workspace_registry_names_throws( + self, e2e_ws_scope: OperationScope, auth: ClientSecretCredential + ) -> None: with pytest.raises(ValidationException) as exception: MLClient( credential=auth, @@ -429,7 +431,4 @@ def test_ml_client_with_both_workspace_registry_names_throws(self, e2e_ws_scope: registry_name="testfeed", ) message = exception.value.args[0] - assert ( - message - == "Both workspace_name and registry_name cannot be used together, for the ml_client." - ) + assert message == "Both workspace_name and registry_name cannot be used together, for the ml_client." diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_operation_orchestrator.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_operation_orchestrator.py index 601d9104fa60..cd16e1b25bc3 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_operation_orchestrator.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_operation_orchestrator.py @@ -187,8 +187,8 @@ def create_yaml_inline_model(tmp_path: Path, resource_group_name: str) -> Path: @pytest.mark.core_sdk_test class TestOperationOrchestration: def test_registry_environment(self, operation_orchestrator: OperationOrchestrator) -> None: - test_input = '//registries/my-registry/environments/conda_name_version_e2e/versions/1.0.2' - expected = 'azureml://registries/my-registry/environments/conda_name_version_e2e/versions/1.0.2' + test_input = "//registries/my-registry/environments/conda_name_version_e2e/versions/1.0.2" + expected = "azureml://registries/my-registry/environments/conda_name_version_e2e/versions/1.0.2" actual = operation_orchestrator.get_asset_arm_id(test_input, azureml_type=AzureMLResourceType.ENVIRONMENT) assert actual == expected @@ -197,8 +197,10 @@ def test_get_asset_arm_id_when_model_already_created(self, operation_orchestrato model = Model(id=test_id, name="some_name", version="1") actual = operation_orchestrator.get_asset_arm_id(model, azureml_type=AzureMLResourceType.MODEL) assert actual == test_id - - def test_get_asset_arm_id_when_environment_already_created(self, operation_orchestrator: OperationOrchestrator) -> None: + + def test_get_asset_arm_id_when_environment_already_created( + self, operation_orchestrator: OperationOrchestrator + ) -> None: test_id = "azureml://registries/my-registry/environments/env-base/versions/1" environment = Environment(id=test_id, name="some_name", version="1") actual = operation_orchestrator.get_asset_arm_id(environment, azureml_type=AzureMLResourceType.ENVIRONMENT) diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_persistent_locals.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_persistent_locals.py index aa6fd038fb10..4db1d36ece0e 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_persistent_locals.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_persistent_locals.py @@ -2,8 +2,10 @@ import pytest -from azure.ai.ml._utils._func_utils import PersistentLocalsFunctionBytecodeBuilder, \ - PersistentLocalsFunctionProfilerBuilder +from azure.ai.ml._utils._func_utils import ( + PersistentLocalsFunctionBytecodeBuilder, + PersistentLocalsFunctionProfilerBuilder, +) class MockClass: @@ -47,12 +49,12 @@ def mock_function(mock_arg): outputs, _locals = self.get_outputs_and_locals(mock_function, {"mock_arg": 1}) assert outputs == (1, 1) - assert set(_locals.keys()) == {'mock_arg', 'mock_local_variable'} + assert set(_locals.keys()) == {"mock_arg", "mock_local_variable"} def test_func_with_named_self_argument(self): outputs, _locals = self.get_outputs_and_locals(mock_function_with_self, {"self": 1}) assert outputs == 1 - assert set(_locals.keys()) == {'self'} + assert set(_locals.keys()) == {"self"} def test_raise_exception(self): def mock_error_exception(): @@ -66,28 +68,28 @@ def test_instance_func(self): mock_obj = MockClass(1) outputs, _locals = self.get_outputs_and_locals(mock_obj.mock_instance_func, {"arg": 1}) assert outputs == 2 - assert set(_locals.keys()) == {'result', 'arg', 'self'} + assert set(_locals.keys()) == {"result", "arg", "self"} def test_class_method(self): mock_obj = MockClass(1) outputs, _locals = self.get_outputs_and_locals(mock_obj.mock_class_method, {"arg": 1}) assert outputs == 3 - assert set(_locals.keys()) == {'result', 'arg', 'cls'} + assert set(_locals.keys()) == {"result", "arg", "cls"} def test_instance_call(self): mock_obj = MockClass(1) outputs, _locals = self.get_outputs_and_locals(mock_obj, {"arg": 1}) assert outputs == 2 - assert set(_locals.keys()) == {'result', 'arg', 'self'} + assert set(_locals.keys()) == {"result", "arg", "self"} def test_invalid_passed_func(self): - with pytest.raises(TypeError, match='func must be a function or a callable object'): + with pytest.raises(TypeError, match="func must be a function or a callable object"): self.get_outputs_and_locals(1, {"arg": 1}) def test_param_conflict(self): with pytest.raises( ValueError, - match=re.escape('Injected param name __self conflicts with function args [\'__self\']'), + match=re.escape("Injected param name __self conflicts with function args ['__self']"), ): self.get_outputs_and_locals(mock_conflict_function, {"arg": 1}) @@ -100,6 +102,8 @@ def get_outputs_and_locals(self, func, injected_params): import bytecode except ImportError: import subprocess - subprocess.check_call(['pip', 'install', 'bytecode']) + + subprocess.check_call(["pip", "install", "bytecode"]) import bytecode + return PersistentLocalsFunctionBytecodeBuilder().call(func, injected_params) diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_storage_utils.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_storage_utils.py index 7a09a6161d01..4b1a71cf86fd 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_storage_utils.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_storage_utils.py @@ -1,4 +1,3 @@ - import pytest from azure.ai.ml._utils._storage_utils import get_ds_name_and_path_prefix @@ -10,36 +9,35 @@ def test_storage_uri_to_prefix( self, ) -> None: # These are the asset storage patterns supported for download - reg_uri_1 = 'https://ccccccccddddd345.blob.core.windows.net/demoregist-16d33653-20bf-549b-a3c1-17d975359581/ExperimentRun/dcid.5823bbb4-bb28-497c-b9f2-1ff3a0778b10/model' - reg_uri_2 = 'https://ccccccccccc1978ccc.blob.core.windows.net/demoregist-b46fb119-d3f8-5994-a971-a9c730227846/LocalUpload/0c225a0230907e61c00ea33eac35a54d/model.pkl' - reg_uri_3 = 'https://ccccccccddr546ddd.blob.core.windows.net/some-reg-9717e928-33c2-50c2-90f5-f410b12b8727/sklearn_regression_model.pkl' - workspace_uri_1 = 'azureml://subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/000000000000000/workspaces/some_test_3/datastores/workspaceblobstore/paths/LocalUpload/26960525964086056a7301dd061fb9be/lightgbm_mlflow_model' - - assert get_ds_name_and_path_prefix(reg_uri_1, "registry_name") == (None,'ExperimentRun/dcid.5823bbb4-bb28-497c-b9f2-1ff3a0778b10/model') - assert get_ds_name_and_path_prefix(reg_uri_2, "registry_name") == (None, 'LocalUpload/0c225a0230907e61c00ea33eac35a54d/model.pkl') - assert get_ds_name_and_path_prefix(reg_uri_3, "registry_name") == (None, 'sklearn_regression_model.pkl') - assert get_ds_name_and_path_prefix(workspace_uri_1) == ('workspaceblobstore','LocalUpload/26960525964086056a7301dd061fb9be/lightgbm_mlflow_model') + reg_uri_1 = "https://ccccccccddddd345.blob.core.windows.net/demoregist-16d33653-20bf-549b-a3c1-17d975359581/ExperimentRun/dcid.5823bbb4-bb28-497c-b9f2-1ff3a0778b10/model" + reg_uri_2 = "https://ccccccccccc1978ccc.blob.core.windows.net/demoregist-b46fb119-d3f8-5994-a971-a9c730227846/LocalUpload/0c225a0230907e61c00ea33eac35a54d/model.pkl" + reg_uri_3 = "https://ccccccccddr546ddd.blob.core.windows.net/some-reg-9717e928-33c2-50c2-90f5-f410b12b8727/sklearn_regression_model.pkl" + workspace_uri_1 = "azureml://subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/000000000000000/workspaces/some_test_3/datastores/workspaceblobstore/paths/LocalUpload/26960525964086056a7301dd061fb9be/lightgbm_mlflow_model" + + assert get_ds_name_and_path_prefix(reg_uri_1, "registry_name") == ( + None, + "ExperimentRun/dcid.5823bbb4-bb28-497c-b9f2-1ff3a0778b10/model", + ) + assert get_ds_name_and_path_prefix(reg_uri_2, "registry_name") == ( + None, + "LocalUpload/0c225a0230907e61c00ea33eac35a54d/model.pkl", + ) + assert get_ds_name_and_path_prefix(reg_uri_3, "registry_name") == (None, "sklearn_regression_model.pkl") + assert get_ds_name_and_path_prefix(workspace_uri_1) == ( + "workspaceblobstore", + "LocalUpload/26960525964086056a7301dd061fb9be/lightgbm_mlflow_model", + ) - def test_storage_uri_to_prefix_malformed( self, ) -> None: - reg_uri_bad = 'https://ccccccccddd4512d.blob.core.windows.net/5823bbb4-bb28-497c-b9f2-1ff3a0778b10' - workspace_uri_bad = 'azureml://subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/000000000000000/workspaces/some_test_3/datastores/workspaceblobstore/path/LocalUpload/26960525964086056a7301dd061fb9be/lightgbm_mlflow_model' + reg_uri_bad = "https://ccccccccddd4512d.blob.core.windows.net/5823bbb4-bb28-497c-b9f2-1ff3a0778b10" + workspace_uri_bad = "azureml://subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/000000000000000/workspaces/some_test_3/datastores/workspaceblobstore/path/LocalUpload/26960525964086056a7301dd061fb9be/lightgbm_mlflow_model" with pytest.raises(Exception) as e: get_ds_name_and_path_prefix(reg_uri_bad, "registry_name") - assert ( - 'Registry asset URI could not be parsed.' - in str(e.value) - ) + assert "Registry asset URI could not be parsed." in str(e.value) with pytest.raises(Exception) as e: get_ds_name_and_path_prefix(workspace_uri_bad) - assert ( - 'Workspace asset URI could not be parsed.' - in str(e.value) - ) - - - \ No newline at end of file + assert "Workspace asset URI could not be parsed." in str(e.value) diff --git a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_utils.py b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_utils.py index 2d3a9ce9fc0d..2be815c7df65 100644 --- a/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_utils.py +++ b/sdk/ml/azure-ai-ml/tests/internal_utils/unittests/test_utils.py @@ -81,6 +81,7 @@ def test_open_with_int_mode(self): def get_int_mode(file_path: str) -> str: int_mode = os.stat(file_path).st_mode & 0o777 return oct(int_mode) + with tempfile.TemporaryDirectory() as temp_dir: target_file_path = temp_dir + "/test.txt" with open(target_file_path, "w") as f: @@ -103,7 +104,7 @@ def test_get_valid_dot_keys_with_wildcard(self): "l1_2": { "l2": 3, }, - } + }, } assert get_valid_dot_keys_with_wildcard(root, "simple") == ["simple"] assert get_valid_dot_keys_with_wildcard(root, "deep.l1.l2") == ["deep.l1.l2"] diff --git a/sdk/ml/azure-ai-ml/tests/job_common/unittests/test_job_operations.py b/sdk/ml/azure-ai-ml/tests/job_common/unittests/test_job_operations.py index d8f949dc92a4..847a8a7c38b0 100644 --- a/sdk/ml/azure-ai-ml/tests/job_common/unittests/test_job_operations.py +++ b/sdk/ml/azure-ai-ml/tests/job_common/unittests/test_job_operations.py @@ -189,14 +189,16 @@ def test_user_identity_get_aml_token(self, mock_method, mock_job_operation: JobO with patch.object(mock_job_operation._credential, "get_token") as mock_get_token: mock_get_token.return_value = AccessToken( - token=jwt.encode({"aud": aml_resource_id}, key="utf-8"), expires_on=1234) + token=jwt.encode({"aud": aml_resource_id}, key="utf-8"), expires_on=1234 + ) mock_job_operation.create_or_update(job=job) mock_job_operation._operation_2022_12_preview.create_or_update.assert_called_once() mock_job_operation._credential.get_token.assert_called_once_with(azure_ml_scopes[0]) with patch.object(mock_job_operation._credential, "get_token") as mock_get_token: mock_get_token.return_value = AccessToken( - token=jwt.encode({"aud": "https://management.azure.com"}, key="utf-8"), expires_on=1234) + token=jwt.encode({"aud": "https://management.azure.com"}, key="utf-8"), expires_on=1234 + ) with pytest.raises(Exception): mock_job_operation.create_or_update(job=job) diff --git a/sdk/ml/azure-ai-ml/tests/local_endpoint/unittests/test_local_endpoint_validator.py b/sdk/ml/azure-ai-ml/tests/local_endpoint/unittests/test_local_endpoint_validator.py index a9c3f305de68..0c3fdeac4068 100644 --- a/sdk/ml/azure-ai-ml/tests/local_endpoint/unittests/test_local_endpoint_validator.py +++ b/sdk/ml/azure-ai-ml/tests/local_endpoint/unittests/test_local_endpoint_validator.py @@ -52,9 +52,7 @@ def test_environment_does_not_contain_local_docker_fails(self): def test_environment_contains_base_image_succeeds(self): environment = Environment(docker_image="ubuntu:latest") deployment = ManagedOnlineDeployment(name="deployment", environment=environment) - (base_image, dockerfile) = get_local_environment_artifacts( - endpoint_name="test-endpoint", deployment=deployment - ) + (base_image, dockerfile) = get_local_environment_artifacts(endpoint_name="test-endpoint", deployment=deployment) assert "ubuntu:latest" == base_image assert dockerfile is None @@ -64,9 +62,7 @@ def test_environment_contains_dockerfile_succeeds(self): name="deployment", environment=environment, ) - (base_image, dockerfile) = get_local_environment_artifacts( - endpoint_name="test-endpoint", deployment=deployment - ) + (base_image, dockerfile) = get_local_environment_artifacts(endpoint_name="test-endpoint", deployment=deployment) assert base_image is None assert "file:./Dockerfile" == dockerfile diff --git a/sdk/ml/azure-ai-ml/tests/model/e2etests/test_model.py b/sdk/ml/azure-ai-ml/tests/model/e2etests/test_model.py index b553021e2ead..87d445f49e27 100644 --- a/sdk/ml/azure-ai-ml/tests/model/e2etests/test_model.py +++ b/sdk/ml/azure-ai-ml/tests/model/e2etests/test_model.py @@ -29,6 +29,7 @@ def artifact_path(tmpdir_factory) -> str: # type: ignore file_name.write("content") return str(file_name) + # previous bodiless_matcher fixture doesn't take effect because of typo, please add it in method level if needed @@ -145,10 +146,7 @@ def get_model_list(): client.models.restore(name=name) assert name in get_model_list() - @pytest.mark.skipif( - condition=not is_live(), - reason="Registry uploads do not record well. Investigate later" - ) + @pytest.mark.skipif(condition=not is_live(), reason="Registry uploads do not record well. Investigate later") def test_create_get_download_model_registry(self, registry_client: MLClient, randstr: Callable[[], str]) -> None: model_path = Path("./tests/test_configs/model/model_full.yml") model_name = randstr("model_name") @@ -175,10 +173,7 @@ def test_create_get_download_model_registry(self, registry_client: MLClient, ran assert os.path.exists(wd) assert os.path.exists(f"{wd}/lightgbm_mlflow_model/MLmodel") - @pytest.mark.skipif( - condition=not is_live(), - reason="Registry uploads do not record well. Investigate later" - ) + @pytest.mark.skipif(condition=not is_live(), reason="Registry uploads do not record well. Investigate later") def test_list_model_registry(self, registry_client: MLClient, randstr: Callable[[], str]) -> None: model_path = Path("./tests/test_configs/model/model_full.yml") model_name = randstr("model_name") @@ -197,10 +192,7 @@ def test_list_model_registry(self, registry_client: MLClient, randstr: Callable[ model_list = [m.name for m in model_list if m is not None] assert model.name in model_list - @pytest.mark.skipif( - condition=not is_live(), - reason="Registry uploads do not record well. Investigate later" - ) + @pytest.mark.skipif(condition=not is_live(), reason="Registry uploads do not record well. Investigate later") def test_promote_model(self, randstr: Callable[[], str], client: MLClient, registry_client: MLClient) -> None: # Create model in workspace model_path = Path("./tests/test_configs/model/model_full.yml") @@ -221,4 +213,4 @@ def test_promote_model(self, randstr: Callable[[], str], client: MLClient, regis # 4. Check that model has been promoted model = registry_client.models.get(name=model_name, version=model_version) assert model.name == model_name - assert model.version == model_version \ No newline at end of file + assert model.version == model_version diff --git a/sdk/ml/azure-ai-ml/tests/online_services/e2etests/test_online_deployment.py b/sdk/ml/azure-ai-ml/tests/online_services/e2etests/test_online_deployment.py index bd954abe6b46..b5b6638c0559 100644 --- a/sdk/ml/azure-ai-ml/tests/online_services/e2etests/test_online_deployment.py +++ b/sdk/ml/azure-ai-ml/tests/online_services/e2etests/test_online_deployment.py @@ -14,7 +14,9 @@ class TestOnlineDeployment(AzureRecordedTestCase): @pytest.mark.skip( reason="Tests failing in internal automation due to lack of quota. Cannot record or run in live mode." ) - def test_online_deployment(self, client: MLClient, rand_online_name: Callable[[], str], rand_online_deployment_name: Callable[[], str]) -> None: + def test_online_deployment( + self, client: MLClient, rand_online_name: Callable[[], str], rand_online_deployment_name: Callable[[], str] + ) -> None: endpoint_yaml = "tests/test_configs/deployments/online/simple_online_endpoint_mir.yaml" deployment_yaml = "tests/test_configs/deployments/online/online_deployment_1.yaml" name = rand_online_name("name") @@ -52,7 +54,9 @@ def test_online_deployment(self, client: MLClient, rand_online_name: Callable[[] client.online_endpoints.begin_delete(name=endpoint.name) @pytest.mark.skip(reason="Known failure") - def test_online_deployment_skip_script_validation(self, client: MLClient, rand_online_name: Callable[[], str], rand_online_deployment_name: Callable[[], str]) -> None: + def test_online_deployment_skip_script_validation( + self, client: MLClient, rand_online_name: Callable[[], str], rand_online_deployment_name: Callable[[], str] + ) -> None: online_endpoint_name = rand_online_name("online_endpoint_name") online_deployment_name = rand_online_deployment_name("online_deployment_name") @@ -88,4 +92,4 @@ def test_online_deployment_skip_script_validation(self, client: MLClient, rand_o except Exception as ex: raise ex finally: - client.online_endpoints.begin_delete(name=online_endpoint_name) \ No newline at end of file + client.online_endpoints.begin_delete(name=online_endpoint_name) diff --git a/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_deployments.py b/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_deployments.py index d642b2067458..d18300338c17 100644 --- a/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_deployments.py +++ b/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_deployments.py @@ -139,7 +139,7 @@ def mock_online_deployment_operations( class TestOnlineDeploymentOperations: @pytest.mark.skipif( condition=platform.python_implementation == "PyPy", - reason="writing dumped entity back to file does not work on PyPy" + reason="writing dumped entity back to file does not work on PyPy", ) def test_online_deployment_k8s_create( self, diff --git a/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_endpoints.py b/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_endpoints.py index 6ab93d260819..4cbeb221ca52 100644 --- a/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_endpoints.py +++ b/sdk/ml/azure-ai-ml/tests/online_services/unittests/test_online_endpoints.py @@ -393,20 +393,16 @@ def test_online_invoke( mock_online_endpoint_operations._online_operation.get.assert_called_once() mock_online_endpoint_operations._online_operation.list_keys.assert_called_once() - def test_create_no_file_throw_exception( - self, mock_online_endpoint_operations: OnlineEndpointOperations - ) -> None: + def test_create_no_file_throw_exception(self, mock_online_endpoint_operations: OnlineEndpointOperations) -> None: with pytest.raises(Exception): mock_online_endpoint_operations.begin_create(name="random_name", file=None) - def test_create_no_type_throw_exception( - self, mock_online_endpoint_operations: OnlineEndpointOperations - ) -> None: + def test_create_no_type_throw_exception(self, mock_online_endpoint_operations: OnlineEndpointOperations) -> None: with pytest.raises(Exception): mock_online_endpoint_operations.begin_create(name="random_name", file=None) def test_create_no_type_in_file_throw_exception( - self, mock_online_endpoint_operations: OnlineEndpointOperations, create_yaml_no_type + self, mock_online_endpoint_operations: OnlineEndpointOperations, create_yaml_no_type ) -> None: with pytest.raises(Exception): mock_online_endpoint_operations.begin_create(name="random_name", file=None) @@ -430,9 +426,7 @@ def test_online_regenerate_keys( mock_online_endpoint_operations._online_operation.begin_regenerate_keys.assert_called_once() mock_online_endpoint_operations._online_operation.get.assert_called_once() - def test_regenerate_invalid_key_type( - self, mock_online_endpoint_operations: OnlineEndpointOperations - ) -> None: + def test_regenerate_invalid_key_type(self, mock_online_endpoint_operations: OnlineEndpointOperations) -> None: with pytest.raises(Exception): mock_online_endpoint_operations.begin_regenerate_keys(name="random_name", key_type="invalid key type") diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/_util.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/_util.py index e0c96844b71e..9a3b9d42e487 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/_util.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/_util.py @@ -52,10 +52,14 @@ ] # this is to shorten the test name -DATABINDING_EXPRESSION_TEST_CASE_ENUMERATE = list(enumerate(map( - lambda params: Path(params[0]).name, - DATABINDING_EXPRESSION_TEST_CASES, -))) +DATABINDING_EXPRESSION_TEST_CASE_ENUMERATE = list( + enumerate( + map( + lambda params: Path(params[0]).name, + DATABINDING_EXPRESSION_TEST_CASES, + ) + ) +) SERVERLESS_COMPUTE_TEST_PARAMETERS = [ diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_control_flow_pipeline.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_control_flow_pipeline.py index 94c372f2f97c..5b003f07e68a 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_control_flow_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_control_flow_pipeline.py @@ -53,19 +53,23 @@ def test_happy_path_if_else(self, client: MLClient, randstr: Callable[[str], str pipeline_job_dict = omit_with_wildcard(pipeline_job_dict, *omit_fields) assert pipeline_job_dict["properties"]["jobs"] == { - 'conditionnode': {'condition': '${{parent.jobs.result.outputs.output}}', - 'false_block': '${{parent.jobs.node1}}', - 'true_block': '${{parent.jobs.node2}}', - 'type': 'if_else'}, - 'node1': {'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}}, - 'name': 'node1', - 'type': 'command'}, - 'node2': {'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '2'}}, - 'name': 'node2', - 'type': 'command'}, - 'result': {'name': 'result', 'type': 'command'} + "conditionnode": { + "condition": "${{parent.jobs.result.outputs.output}}", + "false_block": "${{parent.jobs.node1}}", + "true_block": "${{parent.jobs.node2}}", + "type": "if_else", + }, + "node1": { + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "1"}}, + "name": "node1", + "type": "command", + }, + "node2": { + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "2"}}, + "name": "node2", + "type": "command", + }, + "result": {"name": "result", "type": "command"}, } def test_if_else_one_branch(self, client: MLClient, randstr: Callable[[str], str]) -> None: @@ -80,14 +84,17 @@ def test_if_else_one_branch(self, client: MLClient, randstr: Callable[[str], str pipeline_job_dict = omit_with_wildcard(pipeline_job_dict, *omit_fields) assert pipeline_job_dict["properties"]["jobs"] == { - 'conditionnode': {'condition': '${{parent.jobs.result.outputs.output}}', - 'true_block': '${{parent.jobs.node1}}', - 'type': 'if_else'}, - 'node1': {'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}}, - 'name': 'node1', - 'type': 'command'}, - 'result': {'name': 'result', 'type': 'command'} + "conditionnode": { + "condition": "${{parent.jobs.result.outputs.output}}", + "true_block": "${{parent.jobs.node1}}", + "type": "if_else", + }, + "node1": { + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "1"}}, + "name": "node1", + "type": "command", + }, + "result": {"name": "result", "type": "command"}, } def test_if_else_literal_condition(self, client: MLClient, randstr: Callable[[str], str]) -> None: @@ -102,13 +109,12 @@ def test_if_else_literal_condition(self, client: MLClient, randstr: Callable[[st pipeline_job_dict = omit_with_wildcard(pipeline_job_dict, *omit_fields) assert pipeline_job_dict["properties"]["jobs"] == { - 'conditionnode': {'condition': True, - 'true_block': '${{parent.jobs.node1}}', - 'type': 'if_else'}, - 'node1': {'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '1'}}, - 'name': 'node1', - 'type': 'command'} + "conditionnode": {"condition": True, "true_block": "${{parent.jobs.node1}}", "type": "if_else"}, + "node1": { + "inputs": {"component_in_number": {"job_input_type": "literal", "value": "1"}}, + "name": "node1", + "type": "command", + }, } def test_if_else_invalid_case(self, client: MLClient) -> None: @@ -180,10 +186,10 @@ class TestParallelFor(TestConditionalNodeInPipeline): def test_simple_foreach_string_item(self, client: MLClient, randstr: Callable): source = "./tests/test_configs/pipeline_jobs/helloworld_parallel_for_pipeline_job.yaml" expected_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, {"component_in_number": 2}]', - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, {"component_in_number": 2}]', + "type": "parallel_for", + "_source": "YAML.JOB", } assert_foreach(client, randstr("job_name"), source, expected_node) @@ -191,93 +197,89 @@ def test_simple_foreach_string_item(self, client: MLClient, randstr: Callable): def test_simple_foreach_list_item(self, client: MLClient, randstr: Callable): source = "./tests/test_configs/pipeline_jobs/helloworld_parallel_for_pipeline_job_list_input.yaml" expected_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, {"component_in_number": 2}]', - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, {"component_in_number": 2}]', + "type": "parallel_for", + "_source": "YAML.JOB", } assert_foreach(client, randstr("job_name"), source, expected_node) def test_simple_foreach_dict_item(self, client: MLClient, randstr: Callable): source = "./tests/test_configs/pipeline_jobs/helloworld_parallel_for_pipeline_job_dict_input.yaml" expected_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '{"branch1": {"component_in_number": 1}, "branch2": ' - '{"component_in_number": 2}}', - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": '{"branch1": {"component_in_number": 1}, "branch2": ' '{"component_in_number": 2}}', + "type": "parallel_for", + "_source": "YAML.JOB", } assert_foreach(client, randstr("job_name"), source, expected_node) def test_output_binding_foreach_node(self, client: MLClient, randstr: Callable): source = "./tests/test_configs/pipeline_jobs/helloworld_parallel_for_pipeline_job_output_binding.yaml" expected_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, {"component_in_number": 2}]', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}}, - 'type': 'parallel_for', - '_source': 'YAML.JOB' + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, {"component_in_number": 2}]', + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"}}, + "type": "parallel_for", + "_source": "YAML.JOB", } yaml_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_number": 1}, {"component_in_number": 2}]', - 'outputs': {'component_out_path': '${{parent.outputs.component_out_path}}'}, - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_number": 1}, {"component_in_number": 2}]', + "outputs": {"component_out_path": "${{parent.outputs.component_out_path}}"}, + "type": "parallel_for", + "_source": "YAML.JOB", } assert_foreach(client, randstr("job_name"), source, expected_node, yaml_node) def test_assets_in_items(self, client: MLClient, randstr: Callable): source = "./tests/test_configs/pipeline_jobs/control_flow/parallel_for/assets_items.yaml" expected_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_path": {"uri": ' - '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' - '"job_input_type": "uri_file"}}, {"component_in_path": {"uri": ' - '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' - '"job_input_type": "uri_file"}}]', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}}, - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_path": {"uri": ' + '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' + '"job_input_type": "uri_file"}}, {"component_in_path": {"uri": ' + '"https://dprepdata.blob.core.windows.net/demo/Titanic.csv", ' + '"job_input_type": "uri_file"}}]', + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"}}, + "type": "parallel_for", + "_source": "YAML.JOB", } yaml_node = { - 'body': '${{parent.jobs.parallel_body}}', + "body": "${{parent.jobs.parallel_body}}", # items will become json string when dump to avoid removal of empty inputs - 'items': '[{"component_in_path": "{\'type\': \'uri_file\', \'path\': ' - '\'https://dprepdata.blob.core.windows.net/demo/Titanic.csv\'}"}, ' - '{"component_in_path": "{\'type\': \'uri_file\', \'path\': ' - '\'https://dprepdata.blob.core.windows.net/demo/Titanic.csv\'}"}]', - 'outputs': {'component_out_path': '${{parent.outputs.component_out_path}}'}, - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "items": "[{\"component_in_path\": \"{'type': 'uri_file', 'path': " + "'https://dprepdata.blob.core.windows.net/demo/Titanic.csv'}\"}, " + "{\"component_in_path\": \"{'type': 'uri_file', 'path': " + "'https://dprepdata.blob.core.windows.net/demo/Titanic.csv'}\"}]", + "outputs": {"component_out_path": "${{parent.outputs.component_out_path}}"}, + "type": "parallel_for", + "_source": "YAML.JOB", } assert_foreach(client, randstr("job_name"), source, expected_node, yaml_node) def test_path_on_datastore_in_items(self, client: MLClient, randstr: Callable): source = "./tests/test_configs/pipeline_jobs/control_flow/parallel_for/path_on_ds_items.yaml" expected_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_path": {"uri": ' - '"azureml://datastores/workspaceblobstore/paths/path/on/datastore/1", ' - '"job_input_type": "uri_folder"}}, {"component_in_path": {"uri": ' - '"azureml://datastores/workspaceblobstore/paths/path/on/datastore/2", ' - '"job_input_type": "uri_folder"}}]', - 'outputs': {'component_out_path': {'type': 'literal', - 'value': '${{parent.outputs.component_out_path}}'}}, - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": '[{"component_in_path": {"uri": ' + '"azureml://datastores/workspaceblobstore/paths/path/on/datastore/1", ' + '"job_input_type": "uri_folder"}}, {"component_in_path": {"uri": ' + '"azureml://datastores/workspaceblobstore/paths/path/on/datastore/2", ' + '"job_input_type": "uri_folder"}}]', + "outputs": {"component_out_path": {"type": "literal", "value": "${{parent.outputs.component_out_path}}"}}, + "type": "parallel_for", + "_source": "YAML.JOB", } yaml_node = { - 'body': '${{parent.jobs.parallel_body}}', - 'items': '[{"component_in_path": "{\'type\': \'uri_folder\', \'path\': ' - '\'azureml://datastores/workspaceblobstore/paths/path/on/datastore/1\'}"}, ' - '{"component_in_path": "{\'type\': \'uri_folder\', \'path\': ' - '\'azureml://datastores/workspaceblobstore/paths/path/on/datastore/2\'}"}]', - 'outputs': {'component_out_path': '${{parent.outputs.component_out_path}}'}, - 'type': 'parallel_for', - '_source': 'YAML.JOB', + "body": "${{parent.jobs.parallel_body}}", + "items": "[{\"component_in_path\": \"{'type': 'uri_folder', 'path': " + "'azureml://datastores/workspaceblobstore/paths/path/on/datastore/1'}\"}, " + "{\"component_in_path\": \"{'type': 'uri_folder', 'path': " + "'azureml://datastores/workspaceblobstore/paths/path/on/datastore/2'}\"}]", + "outputs": {"component_out_path": "${{parent.outputs.component_out_path}}"}, + "type": "parallel_for", + "_source": "YAML.JOB", } assert_foreach(client, randstr("job_name"), source, expected_node, yaml_node) @@ -300,22 +302,20 @@ def test_if_else(self, client: MLClient, randstr: Callable[[], str]): assert_control_flow_in_pipeline_component( client=client, component_path="./if_else/simple_pipeline.yml", - pipeline_path="./tests/test_configs/pipeline_jobs/control_flow/control_flow_with_pipeline_component.yml" + pipeline_path="./tests/test_configs/pipeline_jobs/control_flow/control_flow_with_pipeline_component.yml", ) - @pytest.mark.skip( - reason="TODO(2177353): check why recorded tests failure." - ) + @pytest.mark.skip(reason="TODO(2177353): check why recorded tests failure.") def test_do_while(self, client: MLClient, randstr: Callable[[], str]): assert_control_flow_in_pipeline_component( client=client, component_path="./do_while/pipeline_component.yml", - pipeline_path="./tests/test_configs/pipeline_jobs/control_flow/control_flow_with_pipeline_component.yml" + pipeline_path="./tests/test_configs/pipeline_jobs/control_flow/control_flow_with_pipeline_component.yml", ) def test_foreach(self, client: MLClient, randstr: Callable[[], str]): assert_control_flow_in_pipeline_component( client=client, component_path="./parallel_for/simple_pipeline.yml", - pipeline_path="./tests/test_configs/pipeline_jobs/control_flow/control_flow_with_pipeline_component.yml" + pipeline_path="./tests/test_configs/pipeline_jobs/control_flow/control_flow_with_pipeline_component.yml", ) diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py index b5253ef37d23..5585ee9cf5a4 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py @@ -77,9 +77,7 @@ def test_pipeline_job_create( assert updated_job.tags[new_tag_name] == new_tag_value @pytest.mark.skipif(condition=not is_live(), reason="registry test, may fail in playback mode") - def test_pipeline_job_create_with_registries( - self, client: MLClient, randstr: Callable[[str], str] - ) -> None: + def test_pipeline_job_create_with_registries(self, client: MLClient, randstr: Callable[[str], str]) -> None: params_override = [{"name": randstr("name")}] pipeline_job = load_job( source="./tests/test_configs/pipeline_jobs/hello_pipeline_job_with_registries.yml", @@ -1364,341 +1362,320 @@ def test_pipeline_job_with_singularity_compute(self, client: MLClient, randstr: assert created_pipeline_job.settings.default_compute == singularity_compute_id assert created_pipeline_job.jobs["hello_job"].compute == singularity_compute_id - def test_register_output_yaml(self, client: MLClient, randstr: Callable[[str], str],): + def test_register_output_yaml( + self, + client: MLClient, + randstr: Callable[[str], str], + ): # only register pipeline output - register_pipeline_output_path = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_output_name_version.yaml" + register_pipeline_output_path = ( + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_output_name_version.yaml" + ) pipeline = load_job(source=register_pipeline_output_path) pipeline_job = assert_job_cancel(pipeline, client) output = pipeline_job.outputs.component_out_path - assert output.name == 'pipeline_output' - assert output.version == '1' + assert output.name == "pipeline_output" + assert output.version == "1" # only register node output - register_node_output_path = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_node_output_name_version.yaml" + register_node_output_path = ( + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_node_output_name_version.yaml" + ) pipeline = load_job(source=register_node_output_path) pipeline_job = assert_job_cancel(pipeline, client) - output = pipeline_job.jobs['parallel_body'].outputs.component_out_path - assert output.name == 'node_output' - assert output.version == '1' + output = pipeline_job.jobs["parallel_body"].outputs.component_out_path + assert output.name == "node_output" + assert output.version == "1" # register node output and pipeline output while the node output isn't binding to pipeline output - register_both_output_path = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_and_node_output.yaml" + register_both_output_path = ( + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_and_node_output.yaml" + ) pipeline = load_job(source=register_both_output_path) pipeline_job = assert_job_cancel(pipeline, client) pipeline_output = pipeline_job.outputs.pipeline_out_path - assert pipeline_output.name == 'pipeline_output' - assert pipeline_output.version == '2' - node_output = pipeline_job.jobs['parallel_body'].outputs.component_out_path - assert node_output.name == 'node_output' - assert node_output.version == '1' + assert pipeline_output.name == "pipeline_output" + assert pipeline_output.version == "2" + node_output = pipeline_job.jobs["parallel_body"].outputs.component_out_path + assert node_output.name == "node_output" + assert node_output.version == "1" # register node output and pipeline output while the node output is binding to pipeline output - register_both_output_binding_path = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_and_node_binding_output.yaml" + register_both_output_binding_path = ( + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_and_node_binding_output.yaml" + ) pipeline = load_job(source=register_both_output_binding_path) pipeline_job = assert_job_cancel(pipeline, client) pipeline_output = pipeline_job.outputs.pipeline_out_path - assert pipeline_output.name == 'pipeline_output' - assert pipeline_output.version == '2' - node_output = pipeline_job.jobs['parallel_body'].outputs.component_out_path - assert node_output.name == 'node_output' - assert node_output.version == '1' + assert pipeline_output.name == "pipeline_output" + assert pipeline_output.version == "2" + node_output = pipeline_job.jobs["parallel_body"].outputs.component_out_path + assert node_output.name == "node_output" + assert node_output.version == "1" # register spark node output - register_spark_output_path = "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/pipeline_inline_job_register_output.yml" + register_spark_output_path = ( + "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/pipeline_inline_job_register_output.yml" + ) pipeline = load_job(source=register_spark_output_path) pipeline_job = assert_job_cancel(pipeline, client) - node_output = pipeline_job.jobs['count_by_row'].outputs.output - assert node_output.name == 'spark_output' - assert node_output.version == '12' + node_output = pipeline_job.jobs["count_by_row"].outputs.output + assert node_output.name == "spark_output" + assert node_output.version == "12" # register sweep node output - register_sweep_output_path = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_sweep_node_register_output.yml" - pipeline = load_job(source=register_sweep_output_path, params_override=[{'name': randstr("job_name")}]) + register_sweep_output_path = ( + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_sweep_node_register_output.yml" + ) + pipeline = load_job(source=register_sweep_output_path, params_override=[{"name": randstr("job_name")}]) pipeline_job = assert_job_cancel(pipeline, client) - node_output = pipeline_job.jobs['hello_sweep_inline_file_trial'].outputs.trained_model_dir - assert node_output.name == 'sweep_output' - assert node_output.version == '123_sweep' + node_output = pipeline_job.jobs["hello_sweep_inline_file_trial"].outputs.trained_model_dir + assert node_output.name == "sweep_output" + assert node_output.version == "123_sweep" # register parallel node output - register_parallel_output_path ="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/pipeline_register_output.yml" + register_parallel_output_path = ( + "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/pipeline_register_output.yml" + ) pipeline = load_job(source=register_parallel_output_path) pipeline_job = assert_job_cancel(pipeline, client) - node_output = pipeline_job.jobs['convert_data_node'].outputs.file_output_data - assert node_output.name == 'convert_data_node_output' - assert node_output.version == '1' + node_output = pipeline_job.jobs["convert_data_node"].outputs.file_output_data + assert node_output.name == "convert_data_node_output" + assert node_output.version == "1" @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") - def test_pipeline_job_with_data_transfer_copy_urifolder( - self, client: MLClient, randstr: Callable[[str], str] - ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/copy_files.yaml" - ) + def test_pipeline_job_with_data_transfer_copy_urifolder(self, client: MLClient, randstr: Callable[[str], str]): + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/copy_files.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["copy_files"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["copy_files"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": {"folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}}, + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") - def test_pipeline_job_with_data_transfer_copy_urifile( - self, client: MLClient, randstr: Callable[[str], str] - ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/copy_uri_files.yaml" - ) + def test_pipeline_job_with_data_transfer_copy_urifile(self, client: MLClient, randstr: Callable[[str], str]): + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/copy_uri_files.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["copy_files"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["copy_files"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'data_copy_mode': 'fail_if_conflict', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "data_copy_mode": "fail_if_conflict", + "inputs": {"folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}}, + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") - def test_pipeline_job_with_data_transfer_copy_2urifolder( - self, client: MLClient, randstr: Callable[[str], str] - ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/merge_files.yaml" - ) + def test_pipeline_job_with_data_transfer_copy_2urifolder(self, client: MLClient, randstr: Callable[[str], str]): + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/merge_files.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["merge_files"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["merge_files"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder_dup}}'} - }, - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder_dup}}"}, + }, + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") def test_pipeline_job_with_inline_data_transfer_copy_2urifolder( self, client: MLClient, randstr: Callable[[str], str] ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/merge_files_job.yaml" - ) + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/merge_files_job.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["merge_files_job"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["merge_files_job"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder_dup}}'} - }, - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder_dup}}"}, + }, + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } @pytest.mark.skipif(condition=is_live(), reason="need worskspace with datafactory compute") def test_pipeline_job_with_inline_data_transfer_copy_mixtype_file( self, client: MLClient, randstr: Callable[[str], str] ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/merge_mixtype_files.yaml" - ) + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/merge_mixtype_files.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["merge_files"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["merge_files"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'input1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.input1}}'}, - 'input2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.input2}}'}, - 'input3': {'job_input_type': 'literal', - 'value': '${{parent.inputs.input3}}'} - }, - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.inputs.input1}}"}, + "input2": {"job_input_type": "literal", "value": "${{parent.inputs.input2}}"}, + "input3": {"job_input_type": "literal", "value": "${{parent.inputs.input3}}"}, + }, + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } @pytest.mark.skip(reason="need worskspace with datafactory compute, and builtin components") - def test_pipeline_job_with_data_transfer_import_filesystem( - self, client: MLClient, randstr: Callable[[str], str] - ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/import_file_system_to_blob.yaml" - ) + def test_pipeline_job_with_data_transfer_import_filesystem(self, client: MLClient, randstr: Callable[[str], str]): + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/import_file_system_to_blob.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["s3_blob"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["s3_blob"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'outputs': {'sink': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/workspaceblobstore/paths/importjob/${{name}}/output_dir/s3//'}}, - 'source': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "outputs": { + "sink": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/workspaceblobstore/paths/importjob/${{name}}/output_dir/s3//", + } + }, + "source": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", } @pytest.mark.skip(reason="need worskspace with datafactory compute, and builtin components") - def test_pipeline_job_with_data_transfer_import_sql_database( - self, client: MLClient, randstr: Callable[[str], str] - ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/import_sql_database_to_blob.yaml" - ) + def test_pipeline_job_with_data_transfer_import_sql_database(self, client: MLClient, randstr: Callable[[str], str]): + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/import_sql_database_to_blob.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["snowflake_blob"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["snowflake_blob"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'computeId': 'adftest', - 'outputs': {'sink': {'job_output_type': 'mltable'}}, - 'source': {'connection': 'azureml:my_azuresqldb_connection', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "computeId": "adftest", + "outputs": {"sink": {"job_output_type": "mltable"}}, + "source": { + "connection": "azureml:my_azuresqldb_connection", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", } @pytest.mark.skip(reason="need worskspace with datafactory compute, and builtin components") def test_pipeline_job_with_data_transfer_import_snowflake_database( self, client: MLClient, randstr: Callable[[str], str] ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/import_database_to_blob.yaml" - ) + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/import_database_to_blob.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["snowflake_blob"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["snowflake_blob"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'computeId': 'adftest', - 'outputs': {'sink': {'job_output_type': 'mltable', - 'uri': 'azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/'}}, - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "computeId": "adftest", + "outputs": { + "sink": { + "job_output_type": "mltable", + "uri": "azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/", + } + }, + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", } @pytest.mark.skip(reason="need worskspace with datafactory compute, and builtin components") - def test_pipeline_job_with_data_transfer_export_sql_database( - self, client: MLClient, randstr: Callable[[str], str] - ): - test_path = ( - "./tests/test_configs/pipeline_jobs/data_transfer/export_database_to_blob.yaml" - ) + def test_pipeline_job_with_data_transfer_export_sql_database(self, client: MLClient, randstr: Callable[[str], str]): + test_path = "./tests/test_configs/pipeline_jobs/data_transfer/export_database_to_blob.yaml" pipeline: PipelineJob = load_job(source=test_path, params_override=[{"name": randstr("name")}]) created_pipeline = assert_job_cancel(pipeline, client) pipeline_dict = created_pipeline._to_rest_object().as_dict() fields_to_omit = ["name", "display_name", "experiment_name", "properties", "componentId"] - actual_dict = pydash.omit( - pipeline_dict["properties"]["jobs"]["blob_azuresql"], fields_to_omit - ) + actual_dict = pydash.omit(pipeline_dict["properties"]["jobs"]["blob_azuresql"], fields_to_omit) assert actual_dict == { - '_source': 'REMOTE.WORKSPACE.COMPONENT', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'sink': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'table_name': '${{parent.inputs.table_name}}', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer' + "_source": "REMOTE.WORKSPACE.COMPONENT", + "inputs": {"source": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}}, + "sink": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "table_name": "${{parent.inputs.table_name}}", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", } - - def test_register_output_yaml_succeed(self, client: MLClient, randstr: Callable[[str], str],): - register_pipeline_path = "./tests/test_configs/dsl_pipeline/pipeline_with_pipeline_component/pipeline_register_output.yml" + def test_register_output_yaml_succeed( + self, + client: MLClient, + randstr: Callable[[str], str], + ): + register_pipeline_path = ( + "./tests/test_configs/dsl_pipeline/pipeline_with_pipeline_component/pipeline_register_output.yml" + ) pipeline = load_job(source=register_pipeline_path) # overwrite version random_version = randstr("version") pipeline.outputs.pipeline_job_best_model.version = random_version - pipeline.jobs['train_and_evaludate_model1'].outputs.trained_model.version = random_version - pipeline.jobs['compare'].outputs.best_model.version = random_version - pipeline.jobs['compare'].outputs.best_result.version = random_version - pipeline.jobs['compare_2'].outputs.best_model.version = random_version - pipeline.jobs['compare_2'].outputs.best_result.version = random_version + pipeline.jobs["train_and_evaludate_model1"].outputs.trained_model.version = random_version + pipeline.jobs["compare"].outputs.best_model.version = random_version + pipeline.jobs["compare"].outputs.best_result.version = random_version + pipeline.jobs["compare_2"].outputs.best_model.version = random_version + pipeline.jobs["compare_2"].outputs.best_result.version = random_version pipeline_job = client.jobs.create_or_update(pipeline) client.jobs.stream(pipeline_job.name) @@ -1708,14 +1685,16 @@ def check_name_version_and_register_succeed(output, asset_name): assert output.version == random_version assert client.data.get(name=asset_name, version=random_version) - check_name_version_and_register_succeed(pipeline_job.outputs.pipeline_job_best_model, 'pipeline_output_a') - check_name_version_and_register_succeed(pipeline_job.jobs['train_and_evaludate_model1'].outputs.trained_model, 'model1_output') - check_name_version_and_register_succeed(pipeline_job.jobs['compare_2'].outputs.best_model, 'best_model_2') - check_name_version_and_register_succeed(pipeline_job.jobs['compare_2'].outputs.best_result, 'best_result_2') + check_name_version_and_register_succeed(pipeline_job.outputs.pipeline_job_best_model, "pipeline_output_a") + check_name_version_and_register_succeed( + pipeline_job.jobs["train_and_evaludate_model1"].outputs.trained_model, "model1_output" + ) + check_name_version_and_register_succeed(pipeline_job.jobs["compare_2"].outputs.best_model, "best_model_2") + check_name_version_and_register_succeed(pipeline_job.jobs["compare_2"].outputs.best_result, "best_result_2") # name and version are not rewritten, but the display content in page is the PipelineOutput - assert pipeline_job.jobs['compare'].outputs.best_model.name == 'best_model' - assert pipeline_job.jobs['compare'].outputs.best_model.version == random_version + assert pipeline_job.jobs["compare"].outputs.best_model.name == "best_model" + assert pipeline_job.jobs["compare"].outputs.best_model.version == random_version @pytest.mark.usefixtures("enable_pipeline_private_preview_features") @@ -1725,6 +1704,7 @@ def check_name_version_and_register_succeed(output, asset_name): @pytest.mark.timeout(timeout=_PIPELINE_JOB_LONG_RUNNING_TIMEOUT_SECOND, method=_PYTEST_TIMEOUT_METHOD) class TestPipelineJobLongRunning: """Long-running tests that require pipeline job completed.""" + def test_pipeline_job_get_child_run(self, client: MLClient, randstr: Callable[[str], str]): pipeline_job = load_job( source="./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_quick_with_output.yml", @@ -1742,9 +1722,7 @@ def test_pipeline_job_get_child_run(self, client: MLClient, randstr: Callable[[s assert isinstance(retrieved_child_run, Job) assert retrieved_child_run.name == child_job.name - def test_pipeline_job_download( - self, client: MLClient, randstr: Callable[[str], str], tmp_path: Path - ) -> None: + def test_pipeline_job_download(self, client: MLClient, randstr: Callable[[str], str], tmp_path: Path) -> None: job = client.jobs.create_or_update( load_job( source="./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_quick_with_output.yml", diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_controlflow_pipeline_job.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_controlflow_pipeline_job.py index 03a1f9133e28..3026402ecc77 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_controlflow_pipeline_job.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_controlflow_pipeline_job.py @@ -64,7 +64,6 @@ def test_do_while_pipeline_illegal_cases(self): class TestParallelForPipelineJobUT(TestControlFlowPipelineJobUT): - @pytest.mark.parametrize( "exception_cls, yaml_path, msg, location", [ @@ -72,52 +71,52 @@ class TestParallelForPipelineJobUT(TestControlFlowPipelineJobUT): ( ValidationError, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/items_invalid_value_type.yml", - 'Not a valid mapping type.', - '"path": "jobs.parallelfor.items",' + "Not a valid mapping type.", + '"path": "jobs.parallelfor.items",', ), # # items with empty dict as content ( ValidationException, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/items_empty.yml", "Items is an empty list/dict.", - '"path": "jobs.parallelfor.items",' + '"path": "jobs.parallelfor.items",', ), # item meta not match ( ValidationException, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/items_meta_mismatch.yml", '"message": "Items should have same keys', - '"path": "jobs.parallelfor.items"' + '"path": "jobs.parallelfor.items"', ), # items not exist ( ValidationException, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/items_not_exist.yml", - 'got unmatched inputs with loop body component', - '"path": "jobs.parallelfor.items"' + "got unmatched inputs with loop body component", + '"path": "jobs.parallelfor.items"', ), # items invalid json ( ValidationException, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/items_invalid_json.yml", '"message": "Items is neither a valid JSON', - '"path": "jobs.parallelfor.items"' + '"path": "jobs.parallelfor.items"', ), # required field unprovided ( ValidationError, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/items_unprovided.yml", '"message": "Missing data for required field', - 'items_unprovided.yml#line 7' + "items_unprovided.yml#line 7", ), # body unsupported ( ValidationException, "./tests/test_configs/pipeline_jobs/invalid/parallel_for/body_not_supported.yml", " got instead.", - '' + "", ), - ] + ], ) def test_dsl_parallel_for_pipeline_illegal_cases(self, exception_cls, yaml_path, msg, location): with pytest.raises(exception_cls) as e: diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_entity.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_entity.py index d71ba14b30f5..8d19ab53d42e 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_entity.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_entity.py @@ -162,16 +162,16 @@ def test_automl_node_in_pipeline_forecasting(self, mock_machinelearning_client: "rest_job_file, node_name", [ ( - "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_regression.json", - "hello_automl_regression", + "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_regression.json", + "hello_automl_regression", ), ( - "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/rest_pipeline_with_automl_output_binding.json", - "classification_node", + "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/rest_pipeline_with_automl_output_binding.json", + "classification_node", ), ( - "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/rest_pipeline_with_automl_output.json", - "hello_automl_regression", + "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/rest_pipeline_with_automl_output.json", + "hello_automl_regression", ), ], ) @@ -209,9 +209,9 @@ def test_command_job_with_invalid_mode_type_in_pipeline_deserialize(self): "hello_python_world_job": { "inputs": { "sample_input_data": { - 'mode': "ro_mount", + "mode": "ro_mount", "type": "uri_folder", - 'path': "azureml://datastores/workspaceblobstore/paths/LocalUpload/22fd2a62-9759-4843-ab92-5bd79c35f6f0/data/", + "path": "azureml://datastores/workspaceblobstore/paths/LocalUpload/22fd2a62-9759-4843-ab92-5bd79c35f6f0/data/", }, "sample_input_string": { "mode": "ro_mount", @@ -219,7 +219,7 @@ def test_command_job_with_invalid_mode_type_in_pipeline_deserialize(self): }, }, "outputs": {"sample_output_data": "${{parent.outputs.pipeline_sample_output_data}}"}, - 'component': "azureml:/subscriptions/96aede12-2f73-41cb-b983-6d11a904839b/resourceGroups/chenyin-test-eastus/providers/Microsoft.MachineLearningServices/workspaces/sdk_vnext_cli/components/azureml_anonymous/versions/9904ff48-9cb2-4733-ad1c-eb1eb9940a19", + "component": "azureml:/subscriptions/96aede12-2f73-41cb-b983-6d11a904839b/resourceGroups/chenyin-test-eastus/providers/Microsoft.MachineLearningServices/workspaces/sdk_vnext_cli/components/azureml_anonymous/versions/9904ff48-9cb2-4733-ad1c-eb1eb9940a19", "type": "command", "compute": "azureml:cpu-cluster", } @@ -335,7 +335,7 @@ def test_pipeline_job_automl_regression_output(self, mock_machinelearning_client } def test_automl_node_in_pipeline_text_classification( - self, mock_machinelearning_client: MLClient, mocker: MockFixture + self, mock_machinelearning_client: MLClient, mocker: MockFixture ): test_path = "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_text_classification.yml" job = load_job(source=test_path) @@ -367,7 +367,7 @@ def test_automl_node_in_pipeline_text_classification( } def test_automl_node_in_pipeline_text_classification_multilabel( - self, mock_machinelearning_client: MLClient, mocker: MockFixture + self, mock_machinelearning_client: MLClient, mocker: MockFixture ): test_path = ( "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_text_classification_multilabel.yml" @@ -431,11 +431,11 @@ def test_automl_node_in_pipeline_text_ner(self, mock_machinelearning_client: MLC @pytest.mark.parametrize("run_type", ["single", "sweep", "automode"]) def test_automl_node_in_pipeline_image_multiclass_classification( - self, - mock_machinelearning_client: MLClient, - mocker: MockFixture, - run_type: str, - tmp_path: Path, + self, + mock_machinelearning_client: MLClient, + mocker: MockFixture, + run_type: str, + tmp_path: Path, ): test_path = "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_image_multiclass_classification.yml" @@ -508,11 +508,11 @@ def test_automl_node_in_pipeline_image_multiclass_classification( @pytest.mark.parametrize("run_type", ["single", "sweep", "automode"]) def test_automl_node_in_pipeline_image_multilabel_classification( - self, - mock_machinelearning_client: MLClient, - mocker: MockFixture, - run_type: str, - tmp_path: Path, + self, + mock_machinelearning_client: MLClient, + mocker: MockFixture, + run_type: str, + tmp_path: Path, ): test_path = "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_image_multilabel_classification.yml" @@ -585,7 +585,7 @@ def test_automl_node_in_pipeline_image_multilabel_classification( @pytest.mark.parametrize("run_type", ["single", "sweep", "automode"]) def test_automl_node_in_pipeline_image_object_detection( - self, mock_machinelearning_client: MLClient, mocker: MockFixture, run_type: str, tmp_path: Path + self, mock_machinelearning_client: MLClient, mocker: MockFixture, run_type: str, tmp_path: Path ): test_path = "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_image_object_detection.yml" @@ -659,11 +659,11 @@ def test_automl_node_in_pipeline_image_object_detection( @pytest.mark.parametrize("run_type", ["single", "sweep", "automode"]) def test_automl_node_in_pipeline_image_instance_segmentation( - self, - mock_machinelearning_client: MLClient, - mocker: MockFixture, - run_type: str, - tmp_path: Path, + self, + mock_machinelearning_client: MLClient, + mocker: MockFixture, + run_type: str, + tmp_path: Path, ): test_path = ( "./tests/test_configs/pipeline_jobs/jobs_with_automl_nodes/onejob_automl_image_instance_segmentation.yml" @@ -773,7 +773,7 @@ def test_spark_node_in_pipeline(self, mock_machinelearning_client: MLClient, moc "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, "name": "add_greeting_column", "py_files": ["utils.zip"], - 'resources': {'instance_type': 'standard_e4s_v3', 'runtime_version': '3.1.0'}, + "resources": {"instance_type": "standard_e4s_v3", "runtime_version": "3.1.0"}, "type": "spark", } assert actual_dict == expected_dict @@ -799,7 +799,7 @@ def test_spark_node_in_pipeline(self, mock_machinelearning_client: MLClient, moc "jars": ["scalaproj.jar"], "name": "count_by_row", "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.output}}"}}, - 'resources': {'instance_type': 'standard_e4s_v3', 'runtime_version': '3.1.0'}, + "resources": {"instance_type": "standard_e4s_v3", "runtime_version": "3.1.0"}, "type": "spark", } assert actual_dict == expected_dict @@ -829,14 +829,12 @@ def test_data_transfer_copy_node_in_pipeline(self, mock_machinelearning_client: "_source": "YAML.COMPONENT", "componentId": "", "computeId": "", - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'name': 'copy_files', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "data_copy_mode": "merge_with_overwrite", + "inputs": {"folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}}, + "name": "copy_files", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } assert actual_dict == expected_dict @@ -865,20 +863,21 @@ def test_data_transfer_merge_node_in_pipeline(self, mock_machinelearning_client: "_source": "YAML.COMPONENT", "componentId": "", "computeId": "", - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder_dup}}'}}, - 'name': 'merge_files', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer' + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder_dup}}"}, + }, + "name": "merge_files", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", } assert actual_dict == expected_dict - def test_inline_data_transfer_merge_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): + def test_inline_data_transfer_merge_node_in_pipeline( + self, mock_machinelearning_client: MLClient, mocker: MockFixture + ): test_path = "./tests/test_configs/pipeline_jobs/data_transfer/merge_files_job.yaml" job = load_job(test_path) @@ -900,23 +899,24 @@ def test_inline_data_transfer_merge_node_in_pipeline(self, mock_machinelearning_ actual_dict = pydash.omit(rest_job_dict["properties"]["jobs"]["merge_files_job"], *omit_fields) expected_dict = { - 'data_copy_mode': 'merge_with_overwrite', + "data_copy_mode": "merge_with_overwrite", "_source": "YAML.JOB", "componentId": "", "computeId": "", - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder_dup}}'}}, - 'name': 'merge_files_job', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'type': 'data_transfer', - 'task': 'copy_data', + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"}, + "folder2": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder_dup}}"}, + }, + "name": "merge_files_job", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "type": "data_transfer", + "task": "copy_data", } assert actual_dict == expected_dict - def test_inline_data_transfer_import_database_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): + def test_inline_data_transfer_import_database_node_in_pipeline( + self, mock_machinelearning_client: MLClient, mocker: MockFixture + ): test_path = "./tests/test_configs/pipeline_jobs/data_transfer/import_database_to_blob.yaml" job = load_job(test_path) @@ -940,41 +940,65 @@ def test_inline_data_transfer_import_database_node_in_pipeline(self, mock_machin rest_job_dict = pydash.omit(job._to_rest_object().as_dict(), *omit_fields) assert rest_job_dict == { - 'properties': { - 'compute_id': '', - 'description': 'pipeline with data transfer components', - 'inputs': {'query_source_snowflake': {'job_input_type': 'literal', - 'value': 'select * from TPCH_SF1000.PARTSUPP limit 10'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'snowflake_blob': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 'snowflake_blob', - 'outputs': {'sink': {'job_output_type': 'mltable', - 'uri': 'azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/'}}, - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'snowflake_blob_node_input': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 'snowflake_blob_node_input', - 'outputs': {'sink': {'job_output_type': 'mltable', - 'uri': 'azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/'}}, - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': 'select * from TPCH_SF1000.PARTSUPP limit 10', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'YAML.JOB', - 'default_compute': '', - 'default_datastore': ''}, - 'tags': {}}} - - def test_inline_data_transfer_import_stored_database_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): + "properties": { + "compute_id": "", + "description": "pipeline with data transfer components", + "inputs": { + "query_source_snowflake": { + "job_input_type": "literal", + "value": "select * from TPCH_SF1000.PARTSUPP limit 10", + } + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "snowflake_blob": { + "_source": "BUILTIN", + "computeId": "", + "name": "snowflake_blob", + "outputs": { + "sink": { + "job_output_type": "mltable", + "uri": "azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/", + } + }, + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + "snowflake_blob_node_input": { + "_source": "BUILTIN", + "computeId": "", + "name": "snowflake_blob_node_input", + "outputs": { + "sink": { + "job_output_type": "mltable", + "uri": "azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/", + } + }, + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "select * from TPCH_SF1000.PARTSUPP limit 10", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "YAML.JOB", "default_compute": "", "default_datastore": ""}, + "tags": {}, + } + } + + def test_inline_data_transfer_import_stored_database_node_in_pipeline( + self, mock_machinelearning_client: MLClient, mocker: MockFixture + ): test_path = "./tests/test_configs/pipeline_jobs/data_transfer/import_stored_database_to_blob.yaml" job = load_job(test_path) @@ -997,36 +1021,41 @@ def test_inline_data_transfer_import_stored_database_node_in_pipeline(self, mock rest_job_dict = pydash.omit(job._to_rest_object().as_dict(), *omit_fields) assert rest_job_dict == { - 'properties': { - 'compute_id': '', - 'description': 'pipeline with data transfer components', - 'inputs': {}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'snowflake_blob': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 'snowflake_blob', - 'outputs': {'sink': {'job_output_type': 'mltable'}}, - 'source': {'connection': 'azureml:my_sql_connection', - 'stored_procedure': 'SelectEmployeeByJobAndDepartment', - 'stored_procedure_params': [{'name': 'job', - 'type': 'String', - 'value': 'Engineer'}, - {'name': 'department', - 'type': 'String', - 'value': 'Engineering'}], - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'YAML.JOB', - 'default_compute': '', - 'default_datastore': ''}, - 'tags': {}} + "properties": { + "compute_id": "", + "description": "pipeline with data transfer components", + "inputs": {}, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "snowflake_blob": { + "_source": "BUILTIN", + "computeId": "", + "name": "snowflake_blob", + "outputs": {"sink": {"job_output_type": "mltable"}}, + "source": { + "connection": "azureml:my_sql_connection", + "stored_procedure": "SelectEmployeeByJobAndDepartment", + "stored_procedure_params": [ + {"name": "job", "type": "String", "value": "Engineer"}, + {"name": "department", "type": "String", "value": "Engineering"}, + ], + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + } + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "YAML.JOB", "default_compute": "", "default_datastore": ""}, + "tags": {}, + } } - def test_inline_data_transfer_import_file_system_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): + def test_inline_data_transfer_import_file_system_node_in_pipeline( + self, mock_machinelearning_client: MLClient, mocker: MockFixture + ): test_path = "./tests/test_configs/pipeline_jobs/data_transfer/import_file_system_to_blob.yaml" job = load_job(test_path) @@ -1050,44 +1079,59 @@ def test_inline_data_transfer_import_file_system_node_in_pipeline(self, mock_mac rest_job_dict = pydash.omit(job._to_rest_object().as_dict(), *omit_fields) assert rest_job_dict == { - 'properties': { - 'compute_id': '', - 'description': 'pipeline with data transfer components', - 'inputs': {'connection_target': {'job_input_type': 'literal', - 'value': 'azureml:my-s3-connection'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 'test1/*'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'s3_blob': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 's3_blob', - 'outputs': {'sink': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/workspaceblobstore/paths/importjob/${{name}}/output_dir/s3//'}}, - 'source': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 's3_blob_input': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 's3_blob_input', - 'outputs': {'sink': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/workspaceblobstore/paths/importjob/${{name}}/output_dir/s3//'}}, - 'source': {'connection': 'azureml:my-s3-connection', - 'path': 'test1/*', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'YAML.JOB', - 'default_compute': '', - 'default_datastore': ''}, - 'tags': {}} + "properties": { + "compute_id": "", + "description": "pipeline with data transfer components", + "inputs": { + "connection_target": {"job_input_type": "literal", "value": "azureml:my-s3-connection"}, + "path_source_s3": {"job_input_type": "literal", "value": "test1/*"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "s3_blob": { + "_source": "BUILTIN", + "computeId": "", + "name": "s3_blob", + "outputs": { + "sink": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/workspaceblobstore/paths/importjob/${{name}}/output_dir/s3//", + } + }, + "source": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + "s3_blob_input": { + "_source": "BUILTIN", + "computeId": "", + "name": "s3_blob_input", + "outputs": { + "sink": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/workspaceblobstore/paths/importjob/${{name}}/output_dir/s3//", + } + }, + "source": {"connection": "azureml:my-s3-connection", "path": "test1/*", "type": "file_system"}, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "YAML.JOB", "default_compute": "", "default_datastore": ""}, + "tags": {}, + } } - def test_inline_data_transfer_export_database_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): + def test_inline_data_transfer_export_database_node_in_pipeline( + self, mock_machinelearning_client: MLClient, mocker: MockFixture + ): test_path = "./tests/test_configs/pipeline_jobs/data_transfer/export_database_to_blob.yaml" job = load_job(test_path) @@ -1111,46 +1155,59 @@ def test_inline_data_transfer_export_database_node_in_pipeline(self, mock_machin rest_job_dict = pydash.omit(job._to_rest_object().as_dict(), *omit_fields) assert rest_job_dict == { - 'properties': { - 'compute_id': '', - 'description': 'pipeline with data transfer components', - 'inputs': {'connection_target_azuresql': {'job_input_type': 'literal', - 'value': 'azureml:my_export_azuresqldb_connection'}, - 'cosmos_folder': {'job_input_type': 'uri_file', - 'uri': 'yyy'}, - 'table_name': {'job_input_type': 'literal', - 'value': 'dbo.Persons'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'blob_azuresql': {'_source': 'BUILTIN', - 'computeId': '', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'name': 'blob_azuresql', - 'sink': {'connection': '${{parent.inputs.connection_target_azuresql}}', - 'table_name': '${{parent.inputs.table_name}}', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'blob_azuresql_node_input': {'_source': 'BUILTIN', - 'computeId': '', - 'inputs': {'source': {'job_input_type': 'uri_file', - 'uri': 'yyy'}}, - 'name': 'blob_azuresql_node_input', - 'sink': {'connection': 'azureml:my_export_azuresqldb_connection', - 'table_name': 'dbo.Persons', - 'type': 'database'}, - 'task': 'export_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'YAML.JOB', - 'default_compute': '', - 'default_datastore': ''}, - 'tags': {}} + "properties": { + "compute_id": "", + "description": "pipeline with data transfer components", + "inputs": { + "connection_target_azuresql": { + "job_input_type": "literal", + "value": "azureml:my_export_azuresqldb_connection", + }, + "cosmos_folder": {"job_input_type": "uri_file", "uri": "yyy"}, + "table_name": {"job_input_type": "literal", "value": "dbo.Persons"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "blob_azuresql": { + "_source": "BUILTIN", + "computeId": "", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"} + }, + "name": "blob_azuresql", + "sink": { + "connection": "${{parent.inputs.connection_target_azuresql}}", + "table_name": "${{parent.inputs.table_name}}", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + "blob_azuresql_node_input": { + "_source": "BUILTIN", + "computeId": "", + "inputs": {"source": {"job_input_type": "uri_file", "uri": "yyy"}}, + "name": "blob_azuresql_node_input", + "sink": { + "connection": "azureml:my_export_azuresqldb_connection", + "table_name": "dbo.Persons", + "type": "database", + }, + "task": "export_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "YAML.JOB", "default_compute": "", "default_datastore": ""}, + "tags": {}, + } } - def test_inline_data_transfer_export_file_system_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): + def test_inline_data_transfer_export_file_system_node_in_pipeline( + self, mock_machinelearning_client: MLClient, mocker: MockFixture + ): test_path = "./tests/test_configs/pipeline_jobs/data_transfer/export_file_system_to_blob.yaml" job = load_job(test_path) @@ -1174,43 +1231,59 @@ def test_inline_data_transfer_export_file_system_node_in_pipeline(self, mock_mac rest_job_dict = pydash.omit(job._to_rest_object().as_dict(), *omit_fields) assert rest_job_dict == { - 'properties': { - 'compute_id': '', - 'description': 'pipeline with data transfer components', - 'inputs': {'connection_target': {'job_input_type': 'literal', - 'value': 'azureml:my_s3_connection'}, - 'cosmos_folder': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 's3://my_bucket/my_folder'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'s3_blob': {'_source': 'BUILTIN', - 'computeId': '', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.inputs.cosmos_folder}}'}}, - 'name': 's3_blob', - 'sink': {'connection': '${{parent.inputs.connection_target}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 's3_blob_input': {'_source': 'BUILTIN', - 'computeId': '', - 'inputs': {'source': {'job_input_type': 'uri_folder', - 'uri': 'azureml://datastores/my_cosmos/paths/source_cosmos'}}, - 'name': 's3_blob_input', - 'sink': {'connection': 'azureml:my_s3_connection', - 'path': 's3://my_bucket/my_folder', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}}, - 'outputs': {}, - 'properties': {}, - 'settings': {'_source': 'YAML.JOB', - 'default_compute': '', - 'default_datastore': ''}, - 'tags': {}} + "properties": { + "compute_id": "", + "description": "pipeline with data transfer components", + "inputs": { + "connection_target": {"job_input_type": "literal", "value": "azureml:my_s3_connection"}, + "cosmos_folder": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + }, + "path_source_s3": {"job_input_type": "literal", "value": "s3://my_bucket/my_folder"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "s3_blob": { + "_source": "BUILTIN", + "computeId": "", + "inputs": { + "source": {"job_input_type": "literal", "value": "${{parent.inputs.cosmos_folder}}"} + }, + "name": "s3_blob", + "sink": { + "connection": "${{parent.inputs.connection_target}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + "s3_blob_input": { + "_source": "BUILTIN", + "computeId": "", + "inputs": { + "source": { + "job_input_type": "uri_folder", + "uri": "azureml://datastores/my_cosmos/paths/source_cosmos", + } + }, + "name": "s3_blob_input", + "sink": { + "connection": "azureml:my_s3_connection", + "path": "s3://my_bucket/my_folder", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"_source": "YAML.JOB", "default_compute": "", "default_datastore": ""}, + "tags": {}, + } } def test_data_transfer_multi_node_in_pipeline(self, mock_machinelearning_client: MLClient, mocker: MockFixture): @@ -1237,65 +1310,94 @@ def test_data_transfer_multi_node_in_pipeline(self, mock_machinelearning_client: actual_dict = omit_with_wildcard(rest_job_dict, *omit_fields) assert actual_dict == { - 'properties': { - 'compute_id': '', - 'description': 'pipeline with data transfer components', - 'inputs': {'connection_target_s3': {'job_input_type': 'literal', - 'value': 'azureml:my_s3_connection'}, - 'path_source_s3': {'job_input_type': 'literal', - 'value': 's3://my_bucket/my_folder'}, - 'query_source_snowflake': {'job_input_type': 'literal', - 'value': 'SELECT * FROM my_table'}}, - 'is_archived': False, - 'job_type': 'Pipeline', - 'jobs': {'blob_s3': {'_source': 'BUILTIN', - 'computeId': '', - 'inputs': {'source': {'job_input_type': 'literal', - 'value': '${{parent.jobs.merge_files.outputs.output_folder}}'}}, - 'name': 'blob_s3', - 'sink': {'connection': '${{parent.inputs.connection_target_s3}}', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'export_data', - 'type': 'data_transfer'}, - 'merge_files': {'_source': 'YAML.COMPONENT', - 'computeId': '', - 'data_copy_mode': 'merge_with_overwrite', - 'inputs': {'folder1': {'job_input_type': 'literal', - 'value': '${{parent.jobs.s3_blob.outputs.sink}}'}, - 'folder2': {'job_input_type': 'literal', - 'value': '${{parent.jobs.snowflake_blob.outputs.sink}}'}}, - 'name': 'merge_files', - 'outputs': {'output_folder': {'type': 'literal', - 'value': '${{parent.outputs.merged_blob}}'}}, - 'task': 'copy_data', - 'type': 'data_transfer'}, - 's3_blob': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 's3_blob', - 'outputs': {'sink': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/managed/paths/some_path'}}, - 'source': {'connection': 'azureml:my_s3_connection', - 'path': '${{parent.inputs.path_source_s3}}', - 'type': 'file_system'}, - 'task': 'import_data', - 'type': 'data_transfer'}, - 'snowflake_blob': {'_source': 'BUILTIN', - 'computeId': '', - 'name': 'snowflake_blob', - 'outputs': {'sink': {'job_output_type': 'mltable'}}, - 'source': {'connection': 'azureml:my_snowflake_connection', - 'query': '${{parent.inputs.query_source_snowflake}}', - 'type': 'database'}, - 'task': 'import_data', - 'type': 'data_transfer'}}, - 'outputs': {'merged_blob': {'job_output_type': 'uri_folder', - 'uri': 'azureml://datastores/my_blob/paths/merged_blob'}}, - 'properties': {}, - 'settings': {'_source': 'YAML.JOB', - 'default_compute': '', - 'default_datastore': ''}, - 'tags': {}}} + "properties": { + "compute_id": "", + "description": "pipeline with data transfer components", + "inputs": { + "connection_target_s3": {"job_input_type": "literal", "value": "azureml:my_s3_connection"}, + "path_source_s3": {"job_input_type": "literal", "value": "s3://my_bucket/my_folder"}, + "query_source_snowflake": {"job_input_type": "literal", "value": "SELECT * FROM my_table"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "blob_s3": { + "_source": "BUILTIN", + "computeId": "", + "inputs": { + "source": { + "job_input_type": "literal", + "value": "${{parent.jobs.merge_files.outputs.output_folder}}", + } + }, + "name": "blob_s3", + "sink": { + "connection": "${{parent.inputs.connection_target_s3}}", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "export_data", + "type": "data_transfer", + }, + "merge_files": { + "_source": "YAML.COMPONENT", + "computeId": "", + "data_copy_mode": "merge_with_overwrite", + "inputs": { + "folder1": {"job_input_type": "literal", "value": "${{parent.jobs.s3_blob.outputs.sink}}"}, + "folder2": { + "job_input_type": "literal", + "value": "${{parent.jobs.snowflake_blob.outputs.sink}}", + }, + }, + "name": "merge_files", + "outputs": {"output_folder": {"type": "literal", "value": "${{parent.outputs.merged_blob}}"}}, + "task": "copy_data", + "type": "data_transfer", + }, + "s3_blob": { + "_source": "BUILTIN", + "computeId": "", + "name": "s3_blob", + "outputs": { + "sink": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/managed/paths/some_path", + } + }, + "source": { + "connection": "azureml:my_s3_connection", + "path": "${{parent.inputs.path_source_s3}}", + "type": "file_system", + }, + "task": "import_data", + "type": "data_transfer", + }, + "snowflake_blob": { + "_source": "BUILTIN", + "computeId": "", + "name": "snowflake_blob", + "outputs": {"sink": {"job_output_type": "mltable"}}, + "source": { + "connection": "azureml:my_snowflake_connection", + "query": "${{parent.inputs.query_source_snowflake}}", + "type": "database", + }, + "task": "import_data", + "type": "data_transfer", + }, + }, + "outputs": { + "merged_blob": { + "job_output_type": "uri_folder", + "uri": "azureml://datastores/my_blob/paths/merged_blob", + } + }, + "properties": {}, + "settings": {"_source": "YAML.JOB", "default_compute": "", "default_datastore": ""}, + "tags": {}, + } + } def test_default_user_identity_if_empty_identity_input(self): test_path = "./tests/test_configs/pipeline_jobs/shakespear_sample/pipeline.yml" @@ -1332,15 +1434,14 @@ def test_default_user_identity_if_empty_identity_input(self): "input1": {"job_input_type": "literal", "value": "${{parent.jobs.sample_word.outputs.output1}}"} }, "name": "count_word", - 'resources': {'instance_type': 'standard_e4s_v3', - 'runtime_version': '3.1.0'}, + "resources": {"instance_type": "standard_e4s_v3", "runtime_version": "3.1.0"}, "type": "spark", }, "sample_word": { "_source": "YAML.JOB", "args": "--input1 ${{inputs.input1}} --output2 " - "${{outputs.output1}} --my_sample_rate " - "${{inputs.sample_rate}}", + "${{outputs.output1}} --my_sample_rate " + "${{inputs.sample_rate}}", "conf": { "spark.driver.cores": 1, "spark.driver.memory": "2g", @@ -1359,8 +1460,7 @@ def test_default_user_identity_if_empty_identity_input(self): }, "name": "sample_word", "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.output1}}"}}, - 'resources': {'instance_type': 'standard_e4s_v3', - 'runtime_version': '3.1.0'}, + "resources": {"instance_type": "standard_e4s_v3", "runtime_version": "3.1.0"}, "type": "spark", }, }, @@ -1371,7 +1471,7 @@ def test_default_user_identity_if_empty_identity_input(self): } def test_spark_node_in_pipeline_with_dynamic_allocation_disabled( - self, + self, ): test_path = "./tests/test_configs/pipeline_jobs/invalid/pipeline_job_with_spark_job_with_dynamic_allocation_disabled.yml" job = load_job(test_path) @@ -1380,90 +1480,90 @@ def test_spark_node_in_pipeline_with_dynamic_allocation_disabled( assert ve.message == "Should not specify min or max executors when dynamic allocation is disabled." def test_spark_node_in_pipeline_with_invalid_code( - self, + self, ): test_path = "./tests/test_configs/pipeline_jobs/invalid/pipeline_job_with_spark_job_with_invalid_code.yml" job = load_job(test_path) result = job._validate() - assert 'jobs.hello_world.component.entry' in result.error_messages + assert "jobs.hello_world.component.entry" in result.error_messages def test_spark_node_in_pipeline_with_git_code( - self, + self, ): test_path = "./tests/test_configs/pipeline_jobs/invalid/pipeline_job_with_spark_job_with_git_code.yml" job = load_job(test_path) job._validate() def test_infer_pipeline_output_type_as_node_type( - self, + self, ) -> None: pipeline_job = load_job( source="./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_defaults_with_parallel_job_tabular_input_e2e.yml", ) assert ( - pipeline_job.jobs["hello_world_inline_parallel_tabular_job_1"].outputs["job_output_file"].type - == AssetTypes.URI_FILE + pipeline_job.jobs["hello_world_inline_parallel_tabular_job_1"].outputs["job_output_file"].type + == AssetTypes.URI_FILE ) @pytest.mark.parametrize( "pipeline_job_path, expected_type, expected_components", [ ( - "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_registered_component_literal_output_binding_to_inline_job_input.yml", - "uri_folder", - { - "score_job": { - "_source": "YAML.JOB", - "command": 'echo "hello" && echo "world" && echo "train" > world.txt', - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - "inputs": {"model_input": {"type": "uri_folder"}, "test_data": {"type": "uri_folder"}}, - "is_deterministic": True, - "outputs": {"score_output": {"type": "uri_folder"}}, - "type": "command", - "version": "1", - }, + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_registered_component_literal_output_binding_to_inline_job_input.yml", + "uri_folder", + { + "score_job": { + "_source": "YAML.JOB", + "command": 'echo "hello" && echo "world" && echo "train" > world.txt', + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + "inputs": {"model_input": {"type": "uri_folder"}, "test_data": {"type": "uri_folder"}}, + "is_deterministic": True, + "outputs": {"score_output": {"type": "uri_folder"}}, + "type": "command", + "version": "1", }, + }, ), ( - "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_registered_component_literal_output_binding_to_inline_job_input2.yml", - "mltable", - { - "score_job": { - "_source": "YAML.JOB", - "command": 'echo "hello" && echo "world" && echo "train" > world.txt', - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - "inputs": {"model_input": {"type": "mltable"}, "test_data": {"type": "uri_folder"}}, - "is_deterministic": True, - "outputs": {"score_output": {"type": "uri_folder"}}, - "type": "command", - "version": "1", - }, + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_registered_component_literal_output_binding_to_inline_job_input2.yml", + "mltable", + { + "score_job": { + "_source": "YAML.JOB", + "command": 'echo "hello" && echo "world" && echo "train" > world.txt', + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + "inputs": {"model_input": {"type": "mltable"}, "test_data": {"type": "uri_folder"}}, + "is_deterministic": True, + "outputs": {"score_output": {"type": "uri_folder"}}, + "type": "command", + "version": "1", }, + }, ), ( - "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_registered_component_output_binding_to_inline_job_input.yml", - "uri_folder", - { - "score_job": { - "_source": "YAML.JOB", - "command": 'echo "hello" && echo "world" && echo "train" > world.txt', - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - "inputs": {"model_input": {"type": "uri_folder"}, "test_data": {"type": "uri_folder"}}, - "is_deterministic": True, - "outputs": {"score_output": {"type": "uri_folder"}}, - "type": "command", - "version": "1", - }, + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_registered_component_output_binding_to_inline_job_input.yml", + "uri_folder", + { + "score_job": { + "_source": "YAML.JOB", + "command": 'echo "hello" && echo "world" && echo "train" > world.txt', + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + "inputs": {"model_input": {"type": "uri_folder"}, "test_data": {"type": "uri_folder"}}, + "is_deterministic": True, + "outputs": {"score_output": {"type": "uri_folder"}}, + "type": "command", + "version": "1", }, + }, ), ], ) def test_pipeline_job_with_inline_command_job_input_binding_to_registered_component_job_output( - self, - client: MLClient, - pipeline_job_path: str, - expected_type, - expected_components, + self, + client: MLClient, + pipeline_job_path: str, + expected_type, + expected_components, ) -> None: pipeline_job = load_job( source=pipeline_job_path, @@ -1534,7 +1634,7 @@ def test_pipeline_without_setting_binding_node(self, mock_machinelearning_client } def test_pipeline_with_only_setting_pipeline_level( - self, mock_machinelearning_client: MLClient, mocker: MockFixture + self, mock_machinelearning_client: MLClient, mocker: MockFixture ): test_path = "./tests/test_configs/dsl_pipeline/pipeline_with_set_binding_output_input/pipeline_with_only_setting_pipeline_level.yml" job = load_job(source=test_path) @@ -1651,7 +1751,7 @@ def test_pipeline_with_only_setting_binding_node(self, mock_machinelearning_clie } def test_pipeline_with_setting_binding_node_and_pipeline_level( - self, mock_machinelearning_client: MLClient, mocker: MockFixture + self, mock_machinelearning_client: MLClient, mocker: MockFixture ): test_path = "./tests/test_configs/dsl_pipeline/pipeline_with_set_binding_output_input/pipeline_with_setting_binding_node_and_pipeline_level.yml" job = load_job(source=test_path) @@ -1713,7 +1813,7 @@ def test_pipeline_with_setting_binding_node_and_pipeline_level( } def test_pipeline_with_inline_job_setting_binding_node_and_pipeline_level( - self, mock_machinelearning_client: MLClient, mocker: MockFixture + self, mock_machinelearning_client: MLClient, mocker: MockFixture ): test_path = "./tests/test_configs/dsl_pipeline/pipeline_with_set_binding_output_input/pipeline_with_inline_job_setting_binding_node_and_pipeline_level.yml" job = load_job(source=test_path) @@ -1891,39 +1991,45 @@ def test_pipeline_node_with_identity(self): test_path = "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_identity.yml" pipeline_job: PipelineJob = load_job(source=test_path) - omit_fields = [ - "jobs.*.componentId", - "jobs.*._source" - ] + omit_fields = ["jobs.*.componentId", "jobs.*._source"] actual_dict = omit_with_wildcard(pipeline_job._to_rest_object().as_dict()["properties"], *omit_fields) assert actual_dict["jobs"] == { - 'hello_world_component': { - 'computeId': 'cpu-cluster', - 'identity': {'type': 'user_identity'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'hello_world_component', - 'type': 'command'}, - 'hello_world_component_2': { - 'computeId': 'cpu-cluster', - 'identity': {'type': 'aml_token'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_other_number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'hello_world_component_2', - 'type': 'command'}, - 'hello_world_component_3': { - 'computeId': 'cpu-cluster', - 'identity': {'type': 'user_identity'}, - 'inputs': {'component_in_number': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_other_number}}'}, - 'component_in_path': {'job_input_type': 'literal', - 'value': '${{parent.inputs.job_in_path}}'}}, - 'name': 'hello_world_component_3', - 'type': 'command'} + "hello_world_component": { + "computeId": "cpu-cluster", + "identity": {"type": "user_identity"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "hello_world_component", + "type": "command", + }, + "hello_world_component_2": { + "computeId": "cpu-cluster", + "identity": {"type": "aml_token"}, + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.job_in_other_number}}", + }, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "hello_world_component_2", + "type": "command", + }, + "hello_world_component_3": { + "computeId": "cpu-cluster", + "identity": {"type": "user_identity"}, + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.job_in_other_number}}", + }, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "name": "hello_world_component_3", + "type": "command", + }, } def test_pipeline_parameter_with_empty_value(self, client: MLClient) -> None: @@ -1939,18 +2045,22 @@ class InputGroup: default_compute="cpu-cluster", description="This is the basic pipeline with empty_value", ) - def empty_value_pipeline(integer: int, boolean: bool, number: float, - str_param: str, empty_str: str, input_group: InputGroup): - input_types_func(component_in_string=str_param, - component_in_ranged_integer=integer, - component_in_boolean=boolean, - component_in_ranged_number=number) + def empty_value_pipeline( + integer: int, boolean: bool, number: float, str_param: str, empty_str: str, input_group: InputGroup + ): + input_types_func( + component_in_string=str_param, + component_in_ranged_integer=integer, + component_in_boolean=boolean, + component_in_ranged_number=number, + ) input_types_func(component_in_string=empty_str) input_types_func(component_in_string=input_group.group_empty_str) input_types_func(component_in_string=input_group.group_none_str) - pipeline = empty_value_pipeline(integer=0, boolean=False, number=0, - str_param="str_param", empty_str="", input_group=InputGroup()) + pipeline = empty_value_pipeline( + integer=0, boolean=False, number=0, str_param="str_param", empty_str="", input_group=InputGroup() + ) rest_obj = pipeline._to_rest_object() # Currently MFE not support pass empty str or None as pipeline input. assert len(rest_obj.properties.inputs) == 4 @@ -1966,19 +2076,19 @@ def test_pipeline_input_as_runsettings_value(self, client: MLClient) -> None: default_compute="cpu-cluster", description="Set pipeline input to runsettings", ) - def empty_value_pipeline(integer: int, boolean: bool, number: float, - str_param: str, shm_size: str): - component = input_types_func(component_in_string=str_param, - component_in_ranged_integer=integer, - component_in_boolean=boolean, - component_in_ranged_number=number) + def empty_value_pipeline(integer: int, boolean: bool, number: float, str_param: str, shm_size: str): + component = input_types_func( + component_in_string=str_param, + component_in_ranged_integer=integer, + component_in_boolean=boolean, + component_in_ranged_number=number, + ) component.resources = JobResourceConfiguration( instance_count=integer, shm_size=shm_size, ) - pipeline = empty_value_pipeline(integer=0, boolean=False, number=0, - str_param="str_param", shm_size="20g") + pipeline = empty_value_pipeline(integer=0, boolean=False, number=0, str_param="str_param", shm_size="20g") rest_obj = pipeline._to_rest_object() - expect_resource = {'instance_count': '${{parent.inputs.integer}}', 'shm_size': '${{parent.inputs.shm_size}}'} - assert rest_obj.properties.jobs["component"]["resources"] == expect_resource \ No newline at end of file + expect_resource = {"instance_count": "${{parent.inputs.integer}}", "shm_size": "${{parent.inputs.shm_size}}"} + assert rest_obj.properties.jobs["component"]["resources"] == expect_resource diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_schema.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_schema.py index c2bb7f98b3b0..d4e8bd29c210 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_schema.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_schema.py @@ -34,7 +34,13 @@ validate_pipeline_input_key_contains_allowed_characters, ) from azure.ai.ml.entities._job.automl.search_space_utils import _convert_sweep_dist_dict_to_str_dict -from azure.ai.ml.entities._job.job_service import JobService, JupyterLabJobService, SshJobService, TensorBoardJobService, VsCodeJobService +from azure.ai.ml.entities._job.job_service import ( + JobService, + JupyterLabJobService, + SshJobService, + TensorBoardJobService, + VsCodeJobService, +) from azure.ai.ml.entities._job.pipeline._io import PipelineInput, PipelineOutput from azure.ai.ml.exceptions import UserErrorException, ValidationException @@ -586,8 +592,9 @@ def _check_data_output_from_rest_formatting(self, rest_output_data: RestJobOutpu assert from_rest_output.mode == rest_output_data.mode def assert_inline_component(self, component_job, component_dict): - assert isinstance(component_job.component, (CommandComponent, ParallelComponent, SparkComponent, - DataTransferComponent)) + assert isinstance( + component_job.component, (CommandComponent, ParallelComponent, SparkComponent, DataTransferComponent) + ) component = component_job.component or component_job.trial assert component._is_anonymous # hash will be generated before create_or_update, so can't check it in unit tests @@ -623,18 +630,14 @@ def test_pipeline_job_inline_component_file(self): job = load_job(test_path) # make sure inline component is parsed into component entity spark_component = job.jobs["merge_files_job"] - component_dict = load_yaml( - "./tests/test_configs/components/data_transfer/merge_files.yaml" - ) + component_dict = load_yaml("./tests/test_configs/components/data_transfer/merge_files.yaml") self.assert_inline_component(spark_component, component_dict) test_path = "./tests/test_configs/pipeline_jobs/data_transfer/copy_files.yaml" job = load_job(test_path) # make sure inline component is parsed into component entity spark_component = job.jobs["copy_files"] - component_dict = load_yaml( - "./tests/test_configs/components/data_transfer/copy_files.yaml" - ) + component_dict = load_yaml("./tests/test_configs/components/data_transfer/copy_files.yaml") self.assert_inline_component(spark_component, component_dict) def test_pipeline_job_inline_component_file_with_complex_path(self): @@ -1149,9 +1152,13 @@ def test_dump_distribution(self): # msrest has been removed from public interface distribution_obj = TensorFlowDistribution(**distribution_dict) - with pytest.raises(ValidationError, match=r"Cannot dump non-PyTorchDistribution object into PyTorchDistributionSchema"): + with pytest.raises( + ValidationError, match=r"Cannot dump non-PyTorchDistribution object into PyTorchDistributionSchema" + ): _ = PyTorchDistributionSchema(context={"base_path": "./"}).dump(distribution_dict) - with pytest.raises(ValidationError, match=r"Cannot dump non-PyTorchDistribution object into PyTorchDistributionSchema"): + with pytest.raises( + ValidationError, match=r"Cannot dump non-PyTorchDistribution object into PyTorchDistributionSchema" + ): _ = PyTorchDistributionSchema(context={"base_path": "./"}).dump(distribution_obj) after_dump_correct = TensorFlowDistributionSchema(context={"base_path": "./"}).dump(distribution_obj) diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_validate.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_validate.py index 6186905f3db9..edb1b6c99e7c 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_validate.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/unittests/test_pipeline_job_validate.py @@ -53,8 +53,8 @@ class TestPipelineJobValidate: ), ( "./tests/test_configs/pipeline_jobs/invalid/invalid_pipeline_referencing_component_file.yml", - "In order to specify an existing components, please provide the correct registry" - ) + "In order to specify an existing components, please provide the correct registry", + ), ], ) def test_pipeline_job_validation_on_load(self, pipeline_job_path: str, expected_error: str) -> None: @@ -148,7 +148,7 @@ def test_pipeline_job_type_sensitive_error_message(self): { "path": "jobs.hello_world_no_env.trial", "value": None, - } + }, ], "result": "Failed", } @@ -254,15 +254,15 @@ def test_pipeline_job_diagnostics_location_resolution(self, mock_machinelearning [ ( "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_output_without_name.yaml", - "Output name is required when output version is specified." + "Output name is required when output version is specified.", ), ( "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_node_output_without_name.yaml", - "Output name is required when output version is specified." + "Output name is required when output version is specified.", ), ( "tests/test_configs/pipeline_jobs/helloworld_pipeline_job_register_pipeline_output_with_invalid_name.yaml", - "The output name pipeline_output@ can only contain alphanumeric characters, dashes and underscores, with a limit of 255 characters." + "The output name pipeline_output@ can only contain alphanumeric characters, dashes and underscores, with a limit of 255 characters.", ), ], ) @@ -678,7 +678,7 @@ def pipeline_with_compute_binding(compute_name: str): node1.compute = compute_name sub_pipeline_with_compute_binding(compute_name) - pipeline_job = pipeline_with_compute_binding('cpu-cluster') + pipeline_job = pipeline_with_compute_binding("cpu-cluster") # Assert compute binding validate not raise error when validate assert pipeline_job._validate().passed diff --git a/sdk/ml/azure-ai-ml/tests/registry/e2etests/test_registry.py b/sdk/ml/azure-ai-ml/tests/registry/e2etests/test_registry.py index 96d62b8e3023..37b1289203d7 100644 --- a/sdk/ml/azure-ai-ml/tests/registry/e2etests/test_registry.py +++ b/sdk/ml/azure-ai-ml/tests/registry/e2etests/test_registry.py @@ -48,7 +48,9 @@ def test_registry_operations( # Some values are assigned by registries, but hidden in the local representation to avoid confusing users. # Double check that they're set properly by examining the raw registry format. - rest_registry = crud_registry_client.registries._operation.get(resource_group_name=crud_registry_client.resource_group_name, registry_name=reg_name) + rest_registry = crud_registry_client.registries._operation.get( + resource_group_name=crud_registry_client.resource_group_name, registry_name=reg_name + ) assert rest_registry # don't do a standard dictionary equality check to avoid being surprised by auto-set tags assert rest_registry.tags["one"] == "two" @@ -82,7 +84,7 @@ def test_registry_operations_with_storage_replication( registry = crud_registry_client.registries.begin_create(registry=reg).result( timeout=LROConfigurations.POLLING_TIMEOUT ) - + assert registry.name == reg_name assert registry.replication_locations[0].storage_config.replication_count == 3 assert registry.replication_locations[0].storage_config.storage_account_hns == False @@ -96,7 +98,9 @@ def test_registry_operations_with_storage_replication( # Some values are assigned by registries, but hidden in the local representation to avoid confusing users. # Double check that they're set properly by examining the raw registry format. - rest_registry = crud_registry_client.registries._operation.get(resource_group_name=crud_registry_client.resource_group_name, registry_name=reg_name) + rest_registry = crud_registry_client.registries._operation.get( + resource_group_name=crud_registry_client.resource_group_name, registry_name=reg_name + ) assert rest_registry # ensure that the underlying data behind the replicated storage looks reasonable. diff --git a/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_entity.py b/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_entity.py index 52126daf829a..7b98eb9bcd4a 100644 --- a/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_entity.py +++ b/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_entity.py @@ -75,7 +75,11 @@ def test_deserialize_from_autorest_object(self) -> None: RestRegistryRegionArmDetails( location=loc_2, acr_details=[ - AcrDetails(user_created_acr_account=UserCreatedAcrAccount(arm_resource_id=RestArmResourceId(resource_id=acr_id_1))), + AcrDetails( + user_created_acr_account=UserCreatedAcrAccount( + arm_resource_id=RestArmResourceId(resource_id=acr_id_1) + ) + ), AcrDetails( system_created_acr_account=RestSystemCreatedAcrAccount( acr_account_sku=sku, arm_resource_id=RestArmResourceId(resource_id=acr_id_2) @@ -187,17 +191,25 @@ def test_system_user_storage_serialization(self): def test_system_managed_storage_serialization(self): - system_storage = SystemCreatedStorageAccount(storage_account_hns=True, storage_account_type=StorageAccountType.PREMIUM_LRS, arm_resource_id="some managed storage id") + system_storage = SystemCreatedStorageAccount( + storage_account_hns=True, + storage_account_type=StorageAccountType.PREMIUM_LRS, + arm_resource_id="some managed storage id", + ) system_details = RegistryRegionDetails(storage_config=system_storage) rest_system_storage = system_details._storage_config_to_rest_object() assert len(rest_system_storage) == 1 assert rest_system_storage[0].system_created_storage_account.storage_account_hns_enabled == True - assert rest_system_storage[0].system_created_storage_account.storage_account_type == StorageAccountType.PREMIUM_LRS + assert ( + rest_system_storage[0].system_created_storage_account.storage_account_type == StorageAccountType.PREMIUM_LRS + ) # Ensure that arm_resource_id is never set by entity->rest converter. assert rest_system_storage[0].system_created_storage_account.arm_resource_id == None # ... but still test that ID is set in the other direction - rest_system_storage[0].system_created_storage_account.arm_resource_id = RestArmResourceId(resource_id="another storage id") + rest_system_storage[0].system_created_storage_account.arm_resource_id = RestArmResourceId( + resource_id="another storage id" + ) new_system_storage = RegistryRegionDetails._storage_config_from_rest_object(rest_system_storage) assert new_system_storage.arm_resource_id == "another storage id" @@ -205,12 +217,19 @@ def test_system_managed_storage_serialization(self): assert new_system_storage.storage_account_type == StorageAccountType.PREMIUM_LRS def test_system_replicated_managed_storage_serialization(self): - system_storage = SystemCreatedStorageAccount(storage_account_hns=False, storage_account_type=StorageAccountType.PREMIUM_ZRS, arm_resource_id="some managed storage id", replication_count=3) + system_storage = SystemCreatedStorageAccount( + storage_account_hns=False, + storage_account_type=StorageAccountType.PREMIUM_ZRS, + arm_resource_id="some managed storage id", + replication_count=3, + ) system_details = RegistryRegionDetails(storage_config=system_storage) rest_system_storage = system_details._storage_config_to_rest_object() assert len(rest_system_storage) == 3 assert rest_system_storage[0].system_created_storage_account.storage_account_hns_enabled == False - assert rest_system_storage[0].system_created_storage_account.storage_account_type == StorageAccountType.PREMIUM_ZRS + assert ( + rest_system_storage[0].system_created_storage_account.storage_account_type == StorageAccountType.PREMIUM_ZRS + ) # Ensure that arm_resource_id is never set by entity->rest converter. assert rest_system_storage[0].system_created_storage_account.arm_resource_id == None assert rest_system_storage[0] == rest_system_storage[1] @@ -218,20 +237,26 @@ def test_system_replicated_managed_storage_serialization(self): # ... but still test that ID is set in the other direction expected_ids = ["another storage id", "a second storage id", "a third storage id"] - rest_system_storage[0].system_created_storage_account.arm_resource_id = RestArmResourceId(resource_id=expected_ids[0]) + rest_system_storage[0].system_created_storage_account.arm_resource_id = RestArmResourceId( + resource_id=expected_ids[0] + ) # and that replicated ID's are populated properly - rest_system_storage[1].system_created_storage_account.arm_resource_id = RestArmResourceId(resource_id=expected_ids[1]) - rest_system_storage[2].system_created_storage_account.arm_resource_id = RestArmResourceId(resource_id=expected_ids[2]) + rest_system_storage[1].system_created_storage_account.arm_resource_id = RestArmResourceId( + resource_id=expected_ids[1] + ) + rest_system_storage[2].system_created_storage_account.arm_resource_id = RestArmResourceId( + resource_id=expected_ids[2] + ) new_system_storage = RegistryRegionDetails._storage_config_from_rest_object(rest_system_storage) assert new_system_storage.arm_resource_id == expected_ids[0] assert new_system_storage.storage_account_hns == False assert new_system_storage.storage_account_type == StorageAccountType.PREMIUM_ZRS assert new_system_storage.replication_count == 3 - + for expected_id in expected_ids: assert expected_id in new_system_storage.replicated_ids - + system_details.storage_config.replication_count = -1 try: system_details._storage_config_to_rest_object() @@ -241,14 +266,21 @@ def test_system_replicated_managed_storage_serialization(self): pass def test_system_region_details_serialization(self): - region_detail = RegistryRegionDetails(acr_config=[SystemCreatedAcrAccount(acr_account_sku="Premium")], + region_detail = RegistryRegionDetails( + acr_config=[SystemCreatedAcrAccount(acr_account_sku="Premium")], location="USEast2", - storage_config=SystemCreatedStorageAccount(storage_account_hns=False, storage_account_type=StorageAccountType.PREMIUM_LRS)) + storage_config=SystemCreatedStorageAccount( + storage_account_hns=False, storage_account_type=StorageAccountType.PREMIUM_LRS + ), + ) rest_region_detail = region_detail._to_rest_object() assert rest_region_detail.acr_details[0].system_created_acr_account.acr_account_sku == "Premium" assert rest_region_detail.location == "USEast2" - assert rest_region_detail.storage_account_details[0].system_created_storage_account.storage_account_hns_enabled == False + assert ( + rest_region_detail.storage_account_details[0].system_created_storage_account.storage_account_hns_enabled + == False + ) new_region_detail = RegistryRegionDetails._from_rest_object(rest_region_detail) diff --git a/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_operations.py b/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_operations.py index 45ef3286a971..58863fec7095 100644 --- a/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_operations.py +++ b/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_operations.py @@ -54,11 +54,7 @@ def test_check_registry_name(self, mock_registry_operation: RegistryOperations): def test_create(self, mock_registry_operation: RegistryOperations, randstr: Callable[[], str]) -> None: reg_name = f"unittest{randstr('reg_name')}" - params_override = [ - { - "name": reg_name - } - ] + params_override = [{"name": reg_name}] reg = load_registry( source="./tests/test_configs/registry/registry_valid_min.yaml", params_override=params_override ) @@ -68,4 +64,4 @@ def test_create(self, mock_registry_operation: RegistryOperations, randstr: Call def test_delete(self, mock_registry_operation: RegistryOperations, randstr: Callable[[], str]) -> None: mock_registry_operation.begin_delete(name="some registry") - mock_registry_operation._operation.begin_delete.assert_called_once() \ No newline at end of file + mock_registry_operation._operation.begin_delete.assert_called_once() diff --git a/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_schema.py b/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_schema.py index fd559eb72f0c..98575c0bd6e6 100644 --- a/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_schema.py +++ b/sdk/ml/azure-ai-ml/tests/registry/unittests/test_registry_schema.py @@ -113,4 +113,4 @@ def test_deserialize_replication_counts(self) -> None: registry = load_from_dict(RegistrySchema, target, context) registry["replication_locations"][0].storage_config.replication_count == 6 registry["replication_locations"][0].storage_config.storage_account_hns == False - registry["replication_locations"][0].storage_config.storage_account_type == StorageAccountType.STANDARD_LRS \ No newline at end of file + registry["replication_locations"][0].storage_config.storage_account_type == StorageAccountType.STANDARD_LRS diff --git a/sdk/ml/azure-ai-ml/tests/schedule/e2etests/test_schedule.py b/sdk/ml/azure-ai-ml/tests/schedule/e2etests/test_schedule.py index 9f607ef06c29..59780a1d6e4e 100644 --- a/sdk/ml/azure-ai-ml/tests/schedule/e2etests/test_schedule.py +++ b/sdk/ml/azure-ai-ml/tests/schedule/e2etests/test_schedule.py @@ -200,4 +200,4 @@ def test_spark_job_schedule(self, client: MLClient, randstr: Callable[[], str]): rest_schedule_job_dict.pop("status", None) schedule_job_dict = schedule._to_dict()["create_job"] # pop job name, empty parameters from local dict - assert schedule_job_dict == rest_schedule_job_dict \ No newline at end of file + assert schedule_job_dict == rest_schedule_job_dict diff --git a/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_entity.py b/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_entity.py index f40395d3a8a0..9fd59125d46f 100644 --- a/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_entity.py +++ b/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_entity.py @@ -136,9 +136,12 @@ def test_schedule_with_command_job(self): rest_schedule_job_dict = schedule._to_rest_object().as_dict()["properties"]["action"]["job_definition"] # assert overwrite values assert rest_schedule_job_dict["environment_variables"] == {"key": "val"} - assert rest_schedule_job_dict["resources"] == {'properties': {}, 'shm_size': '1g'} - assert rest_schedule_job_dict["distribution"] == {'distribution_type': 'PyTorch', 'process_count_per_instance': 1} - assert rest_schedule_job_dict["limits"] == {'job_limits_type': 'Command', 'timeout': 'PT50M'} + assert rest_schedule_job_dict["resources"] == {"properties": {}, "shm_size": "1g"} + assert rest_schedule_job_dict["distribution"] == { + "distribution_type": "PyTorch", + "process_count_per_instance": 1, + } + assert rest_schedule_job_dict["limits"] == {"job_limits_type": "Command", "timeout": "PT50M"} @pytest.mark.usefixtures( "enable_pipeline_private_preview_features", @@ -157,7 +160,13 @@ def test_schedule_entity_with_spark_job(self): schedule = load_schedule(test_path) rest_schedule_job_dict = schedule._to_rest_object().as_dict()["properties"]["action"]["job_definition"] # assert overwrite values - assert rest_schedule_job_dict["conf"] == {'spark.driver.cores': '2', 'spark.driver.memory': '2g', 'spark.executor.cores': '2', 'spark.executor.memory': '2g', 'spark.executor.instances': '2'} + assert rest_schedule_job_dict["conf"] == { + "spark.driver.cores": "2", + "spark.driver.memory": "2g", + "spark.executor.cores": "2", + "spark.executor.memory": "2g", + "spark.executor.instances": "2", + } assert "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04" in rest_schedule_job_dict["environment_id"] def test_invalid_date_string(self): diff --git a/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_schema.py b/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_schema.py index 9fcc489c0cc3..4a6e927c08fc 100644 --- a/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_schema.py +++ b/sdk/ml/azure-ai-ml/tests/schedule/unittests/test_schedule_schema.py @@ -130,7 +130,7 @@ def test_load_recurrence_schedule_no_pattern(self): actual_trigger_dict = schedule._to_dict()["trigger"] # Remove emtpy key 'schedule': {'hours': [], 'minutes': []} actual_trigger_dict = pydash.omit(actual_trigger_dict, ["schedule"]) - assert actual_trigger_dict == expected_trigger_dict + assert actual_trigger_dict == expected_trigger_dict def test_load_recurrence_schedule_with_pattern(self): test_path = "./tests/test_configs/schedule/hello_recurrence_schedule_with_pattern.yml" diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/batch_setup/light_gbm_examples/python-guide/advanced_example.py b/sdk/ml/azure-ai-ml/tests/test_configs/batch_setup/light_gbm_examples/python-guide/advanced_example.py index 81867b99224b..f3e57f1fda39 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/batch_setup/light_gbm_examples/python-guide/advanced_example.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/batch_setup/light_gbm_examples/python-guide/advanced_example.py @@ -132,7 +132,7 @@ def main(): lgb_train, num_boost_round=10, init_model=gbm, - learning_rates=lambda iter: 0.05 * (0.99 ** iter), + learning_rates=lambda iter: 0.05 * (0.99**iter), valid_sets=lgb_eval, ) diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/components/component_with_conditional_output/entry.py b/sdk/ml/azure-ai-ml/tests/test_configs/components/component_with_conditional_output/entry.py index e37438915eb8..9e18620b2b99 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/components/component_with_conditional_output/entry.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/components/component_with_conditional_output/entry.py @@ -6,9 +6,9 @@ @command_component() def basic_component( - output1: Output(type="boolean", is_control=True), - output2: Output(type="boolean", is_control=True), - output3: Output(type="boolean"), + output1: Output(type="boolean", is_control=True), + output2: Output(type="boolean", is_control=True), + output3: Output(type="boolean"), ) -> Output(type="boolean", is_control=True): """module run logic goes here""" return False diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/components/do_while_test/entry.py b/sdk/ml/azure-ai-ml/tests/test_configs/components/do_while_test/entry.py index d24aaa01385e..2383dbc63557 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/components/do_while_test/entry.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/components/do_while_test/entry.py @@ -2,8 +2,8 @@ import sys from pathlib import Path -os.environ['AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED'] = 'true' -os.environ['AZURE_ML_INTERNAL_COMPONENTS_ENABLED'] = 'true' +os.environ["AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED"] = "true" +os.environ["AZURE_ML_INTERNAL_COMPONENTS_ENABLED"] = "true" from mldesigner import command_component from mldesigner import dsl as designerdsl @@ -17,30 +17,30 @@ from azure.identity import DefaultAzureCredential ENVIRONMENT_DICT = dict( - image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04", - conda_file={ - "name": "default_environment", - "channels": ["defaults"], - "dependencies": [ - "python=3.8.12", - "pip=21.2.2", - { - "pip": [ - "--extra-index-url=https://azuremlsdktestpypi.azureedge.net/sdk-cli-v2", - "mldesigner==0.1.0b6", - "mlflow==1.29.0", - "azureml-mlflow==1.45.0", - "azure-ai-ml==1.0.0", - "azure-core==1.26.0", - "azure-common==1.1.28", - "azureml-core==1.45.0.post2", - "azure-ml-component==0.9.13.post1", - "azure-identity==1.11.0" - ] - }, - ], - } - ) + image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04", + conda_file={ + "name": "default_environment", + "channels": ["defaults"], + "dependencies": [ + "python=3.8.12", + "pip=21.2.2", + { + "pip": [ + "--extra-index-url=https://azuremlsdktestpypi.azureedge.net/sdk-cli-v2", + "mldesigner==0.1.0b6", + "mlflow==1.29.0", + "azureml-mlflow==1.45.0", + "azure-ai-ml==1.0.0", + "azure-core==1.26.0", + "azure-common==1.1.28", + "azureml-core==1.45.0.post2", + "azure-ml-component==0.9.13.post1", + "azure-identity==1.11.0", + ] + }, + ], + }, +) def write_text(input, output): @@ -48,24 +48,28 @@ def write_text(input, output): if input_data.is_dir(): files = os.listdir(input_data) for f in files: - lines = (Path(input_data) / f).read_text(encoding='utf-8') + lines = (Path(input_data) / f).read_text(encoding="utf-8") (Path(output) / f).write_text(lines) else: - lines = (Path(input_data)).read_text(encoding='utf-8') + lines = (Path(input_data)).read_text(encoding="utf-8") (Path(output) / Path(input_data).name).write_text(lines) @command_component(environment=ENVIRONMENT_DICT) def primitive_component_with_normal_input_output_v2( - input_data: Input, parambool: bool, paramint: int, paramfloat: float, paramstr: str, + input_data: Input, + parambool: bool, + paramint: int, + paramfloat: float, + paramstr: str, output_data: Output, bool_param_output: Output(type="boolean", is_control=True), int_param_output: Output(type="integer", is_control=True), float_param_output: Output(type="number", is_control=True), - str_param_output: Output(type="string", is_control=True) + str_param_output: Output(type="string", is_control=True), ): if input_data is None or not Path(input_data).exists(): - (Path(output_data) / 'file').write_text('abc') + (Path(output_data) / "file").write_text("abc") return write_text(input_data, output_data) @@ -74,9 +78,13 @@ def primitive_component_with_normal_input_output_v2( int_param_output = paramint float_param_output = paramfloat str_param_output = paramstr - print(f"output params are: bool_param_output: {bool_param_output}, int_param_output: {int_param_output}, float_param_output: {float_param_output}, str_param_output: {str_param_output}") - control_output_content = '{"int_param_output": "%s", "bool_param_output": "%s", "float_param_output": "%s", "str_param_output": "%s"}' % ( - int_param_output, bool_param_output, float_param_output, str_param_output) + print( + f"output params are: bool_param_output: {bool_param_output}, int_param_output: {int_param_output}, float_param_output: {float_param_output}, str_param_output: {str_param_output}" + ) + control_output_content = ( + '{"int_param_output": "%s", "bool_param_output": "%s", "float_param_output": "%s", "str_param_output": "%s"}' + % (int_param_output, bool_param_output, float_param_output, str_param_output) + ) ExecutorBase._write_control_outputs_to_run_history(control_output_content=control_output_content) @@ -94,7 +102,7 @@ def do_while_body_component( bool_param_output: Output(type="boolean", is_control=True), int_param_output: Output(type="integer", is_control=True), float_param_output: Output(type="number", is_control=True), - str_param_output: Output(type="string", is_control=True) + str_param_output: Output(type="string", is_control=True), ): if not (input_1 is None or not Path(input_1).exists()): write_text(input_1, output_1) @@ -112,7 +120,8 @@ def do_while_body_component( bool_param_output = bool_param str_param_output = str_param - control_output_content = '{"condition": "%s", "int_param_output": "%s", "bool_param_output": "%s", "float_param_output": "%s", "str_param_output": "%s"}' % ( - str(condition), int_param_output, bool_param_output, float_param_output, str_param_output) + control_output_content = ( + '{"condition": "%s", "int_param_output": "%s", "bool_param_output": "%s", "float_param_output": "%s", "str_param_output": "%s"}' + % (str(condition), int_param_output, bool_param_output, float_param_output, str_param_output) + ) ExecutorBase._write_control_outputs_to_run_history(control_output_content=control_output_content) - diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/deployments/batch/hello-component/src/hello.py b/sdk/ml/azure-ai-ml/tests/test_configs/deployments/batch/hello-component/src/hello.py index e75154b7c390..8cde7829c178 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/deployments/batch/hello-component/src/hello.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/deployments/batch/hello-component/src/hello.py @@ -1 +1 @@ -print("hello world") \ No newline at end of file +print("hello world") diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/copy_data/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/copy_data/pipeline.py index 08e4114499e0..ba2a431a3d68 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/copy_data/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/copy_data/pipeline.py @@ -24,7 +24,8 @@ def data_transfer_copy_pipeline_from_yaml(cosmos_folder, cosmos_folder_dup): type=AssetTypes.URI_FOLDER, ), cosmos_folder_dup=Input( - type=AssetTypes.URI_FOLDER, path=parent_dir + "/data/iris_model", + type=AssetTypes.URI_FOLDER, + path=parent_dir + "/data/iris_model", ), ) @@ -62,8 +63,7 @@ def data_transfer_copy_pipeline_from_builder(cosmos_folder, cosmos_folder_dup): return {"merged_blob": merge_files.outputs.output_folder} pipeline = data_transfer_copy_pipeline_from_builder( - cosmos_folder=cosmos_folder, - cosmos_folder_dup=cosmos_folder_dup + cosmos_folder=cosmos_folder, cosmos_folder_dup=cosmos_folder_dup ) pipeline.settings.default_compute = "adftest" return pipeline diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_database/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_database/pipeline.py index 75f3d2f66601..1ef7037d2306 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_database/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_database/pipeline.py @@ -11,20 +11,23 @@ def generate_dsl_pipeline_from_builder() -> PipelineJob: - connection_target_azuresql = 'azureml:my_export_azuresqldb_connection' + connection_target_azuresql = "azureml:my_export_azuresqldb_connection" table_name = "dbo.Persons" my_cosmos_folder = Input(type=AssetTypes.URI_FILE, path=parent_dir + "/data/testFile_ForSqlDB.parquet") inputs = {"source": my_cosmos_folder} - sink = {'type': 'database', 'connection': connection_target_azuresql, 'table_name': table_name} + sink = {"type": "database", "connection": connection_target_azuresql, "table_name": table_name} - @dsl.pipeline(description='submit a pipeline with data transfer export database job') + @dsl.pipeline(description="submit a pipeline with data transfer export database job") def data_transfer_export_database_pipeline_from_builder(table_name, connection_target_azuresql, cosmos_folder): from azure.ai.ml.data_transfer import Database + blob_azuresql_node_input = export_data(inputs=inputs, sink=sink) source_snowflake = Database(table_name=table_name, connection=connection_target_azuresql) blob_azuresql = export_data(inputs={"source": cosmos_folder}, sink=source_snowflake) - pipeline = data_transfer_export_database_pipeline_from_builder(table_name, connection_target_azuresql, my_cosmos_folder) + pipeline = data_transfer_export_database_pipeline_from_builder( + table_name, connection_target_azuresql, my_cosmos_folder + ) pipeline.settings.default_compute = "adftest" return pipeline diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_file_system/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_file_system/pipeline.py index 79c90950b20a..789cd90b432f 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_file_system/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/export_file_system/pipeline.py @@ -11,21 +11,24 @@ def generate_dsl_pipeline_from_builder() -> PipelineJob: - path_source_s3 = 's3://my_bucket/my_folder' - connection_target = 'azureml:my_s3_connection' + path_source_s3 = "s3://my_bucket/my_folder" + connection_target = "azureml:my_s3_connection" my_cosmos_folder = Input(type=AssetTypes.URI_FOLDER, path="azureml://datastores/my_cosmos/paths/source_cosmos") inputs = {"source": my_cosmos_folder} - sink = {'type': 'file_system', 'connection': connection_target, 'path': path_source_s3} + sink = {"type": "file_system", "connection": connection_target, "path": path_source_s3} - @dsl.pipeline(description='submit a pipeline with data transfer export file system job') + @dsl.pipeline(description="submit a pipeline with data transfer export file system job") def data_transfer_export_file_system_pipeline_from_builder(path_source_s3, connection_target, cosmos_folder): from azure.ai.ml.data_transfer import FileSystem + s3_blob_input = export_data(inputs=inputs, sink=sink, task=DataTransferTaskType.EXPORT_DATA) source_snowflake = FileSystem(path=path_source_s3, connection=connection_target) s3_blob = export_data(inputs={"source": cosmos_folder}, sink=source_snowflake) - pipeline = data_transfer_export_file_system_pipeline_from_builder(path_source_s3, connection_target, my_cosmos_folder) + pipeline = data_transfer_export_file_system_pipeline_from_builder( + path_source_s3, connection_target, my_cosmos_folder + ) pipeline.settings.default_compute = "adf_compute" return pipeline diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_database/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_database/pipeline.py index 130b74affb0b..110a24fd469a 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_database/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_database/pipeline.py @@ -10,15 +10,20 @@ def generate_dsl_pipeline_from_builder() -> PipelineJob: - query_source_snowflake = 'select * from TPCH_SF1000.PARTSUPP limit 10' - connection_target_azuresql = 'azureml:my_snowflake_connection' - outputs = {"sink": Output(type=AssetTypes.MLTABLE, path="azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/")} - source = {'type': 'database', 'connection': connection_target_azuresql, - 'query': query_source_snowflake} + query_source_snowflake = "select * from TPCH_SF1000.PARTSUPP limit 10" + connection_target_azuresql = "azureml:my_snowflake_connection" + outputs = { + "sink": Output( + type=AssetTypes.MLTABLE, + path="azureml://datastores/workspaceblobstore_sas/paths/importjob/${{name}}/output_dir/snowflake/", + ) + } + source = {"type": "database", "connection": connection_target_azuresql, "query": query_source_snowflake} - @dsl.pipeline(description='submit a pipeline with data transfer import database job') + @dsl.pipeline(description="submit a pipeline with data transfer import database job") def data_transfer_import_database_pipeline_from_builder(query_source_snowflake, connection_target_azuresql): from azure.ai.ml.data_transfer import Database + snowflake_blob_node_input = import_data( source=Database(**source), outputs=outputs, @@ -38,15 +43,15 @@ def data_transfer_import_database_pipeline_from_builder(query_source_snowflake, def generate_dsl_pipeline_from_builder_sql() -> PipelineJob: - query_source_snowflake = 'select top(10) Name from SalesLT.ProductCategory' - connection_target_azuresql = 'azureml:my_azuresqldb_connection' + query_source_snowflake = "select top(10) Name from SalesLT.ProductCategory" + connection_target_azuresql = "azureml:my_azuresqldb_connection" outputs = {"sink": Output(type=AssetTypes.MLTABLE)} - source = {'type': 'database', 'connection': connection_target_azuresql, - 'query': query_source_snowflake} + source = {"type": "database", "connection": connection_target_azuresql, "query": query_source_snowflake} - @dsl.pipeline(description='submit a pipeline with data transfer import database job') + @dsl.pipeline(description="submit a pipeline with data transfer import database job") def data_transfer_import_database_pipeline_from_builder(query_source_snowflake, connection_target_azuresql): from azure.ai.ml.data_transfer import Database + sql_blob_node_input = import_data( source=Database(**source), outputs=outputs, diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_file_system/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_file_system/pipeline.py index dbdaa162d8f8..aa16c066cc54 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_file_system/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_file_system/pipeline.py @@ -11,14 +11,15 @@ def generate_dsl_pipeline_from_builder() -> PipelineJob: - path_source_s3 = 'test1/*' - connection_target = 'azureml:my-s3-connection' + path_source_s3 = "test1/*" + connection_target = "azureml:my-s3-connection" outputs = {"sink": Output(type=AssetTypes.URI_FOLDER)} - source = {'type': 'file_system', 'connection': connection_target, 'path': path_source_s3} + source = {"type": "file_system", "connection": connection_target, "path": path_source_s3} - @dsl.pipeline(description='submit a pipeline with data transfer import file system job') + @dsl.pipeline(description="submit a pipeline with data transfer import file system job") def data_transfer_import_file_system_pipeline_from_builder(path_source_s3, connection_target): from azure.ai.ml.data_transfer import FileSystem + s3_blob_input = import_data(source=source, outputs=outputs) source_snowflake = FileSystem(path=path_source_s3, connection=connection_target) diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_stored_database/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_stored_database/pipeline.py index 4d9d48ad9a89..814b27d74fd0 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_stored_database/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/import_stored_database/pipeline.py @@ -11,16 +11,23 @@ def generate_dsl_pipeline_from_builder() -> PipelineJob: - stored_procedure = 'SelectEmployeeByJobAndDepartment' - stored_procedure_params = [{'name': 'job', 'value': 'Engineer', 'type': 'String'}, - {'name': 'department', 'value': 'Engineering', 'type': 'String'}] + stored_procedure = "SelectEmployeeByJobAndDepartment" + stored_procedure_params = [ + {"name": "job", "value": "Engineer", "type": "String"}, + {"name": "department", "value": "Engineering", "type": "String"}, + ] outputs = {"sink": Output(type=AssetTypes.MLTABLE)} - source = {'type': 'database', 'connection': 'azureml:my_sql_connection', 'stored_procedure': stored_procedure, - 'stored_procedure_params': stored_procedure_params} + source = { + "type": "database", + "connection": "azureml:my_sql_connection", + "stored_procedure": stored_procedure, + "stored_procedure_params": stored_procedure_params, + } - @dsl.pipeline(description='submit a pipeline with data transfer import stored database job') + @dsl.pipeline(description="submit a pipeline with data transfer import stored database job") def data_transfer_import_database_pipeline_from_builder(): from azure.ai.ml.data_transfer import Database + snowflake_blob = import_data( source=Database(**source), outputs=outputs, diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/pipeline.py b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/pipeline.py index 313a348de468..af2ba7a5effc 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/dsl_pipeline/data_transfer_job_in_pipeline/pipeline.py @@ -11,27 +11,24 @@ def generate_dsl_pipeline() -> PipelineJob: - path_source_s3 = 'test1/*' - query_source_sql = 'select top(10) Name from SalesLT.ProductCategory' - connection_target_s3 = 'azureml:my-s3-connection' + path_source_s3 = "test1/*" + query_source_sql = "select top(10) Name from SalesLT.ProductCategory" + connection_target_s3 = "azureml:my-s3-connection" merge_files_func = load_component(parent_dir + "/copy_data/merge_files.yaml") @dsl.pipeline(description="submit a pipeline with data transfer job") def data_transfer_copy_pipeline_from_yaml(query_source_sql, path_source_s3, connection_target_s3): - source_snowflake = Database(query=query_source_sql, connection='azureml:my_azuresqldb_connection') - snowflake_blob = import_data( - source=source_snowflake, - outputs={"sink": Output(type=AssetTypes.MLTABLE)} - ) - snowflake_blob.compute = 'adftest' + source_snowflake = Database(query=query_source_sql, connection="azureml:my_azuresqldb_connection") + snowflake_blob = import_data(source=source_snowflake, outputs={"sink": Output(type=AssetTypes.MLTABLE)}) + snowflake_blob.compute = "adftest" - source_s3 = FileSystem(path=path_source_s3, connection='azureml:my-s3-connection') - s3_blob = import_data( source=source_s3, outputs={"sink": Output(type=AssetTypes.URI_FOLDER)}) + source_s3 = FileSystem(path=path_source_s3, connection="azureml:my-s3-connection") + s3_blob = import_data(source=source_s3, outputs={"sink": Output(type=AssetTypes.URI_FOLDER)}) merge_files = merge_files_func(folder1=s3_blob.outputs.sink, folder2=snowflake_blob.outputs.sink) - sink_s3 = FileSystem(path='test1/', connection=connection_target_s3) + sink_s3 = FileSystem(path="test1/", connection=connection_target_s3) blob_s3 = export_data(inputs={"source": merge_files.outputs.output_folder}, sink=sink_s3) return {"merged_blob": merge_files.outputs.output_folder} @@ -40,7 +37,7 @@ def data_transfer_copy_pipeline_from_yaml(query_source_sql, path_source_s3, conn query_source_sql=query_source_sql, path_source_s3=path_source_s3, connection_target_s3=connection_target_s3, - ) + ) # pipeline.outputs.merged_blob.path = "azureml://datastores/my_blob/paths/merged_blob" pipeline.settings.default_compute = "adftest" diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library2/greetings.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library2/greetings.py index ee111f2ca417..c8554cc1516a 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library2/greetings.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library2/greetings.py @@ -2,5 +2,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- + def greetings(): - print(f'greetings from {__name__}') \ No newline at end of file + print(f"greetings from {__name__}") diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/__init__.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/__init__.py index 9dd81a39770e..d540fd20468c 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/__init__.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/__init__.py @@ -1,3 +1,3 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. -# --------------------------------------------------------- \ No newline at end of file +# --------------------------------------------------------- diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/hello.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/hello.py index 45581f87d06d..1f1d9e0b0d48 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/hello.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/library3/hello.py @@ -4,4 +4,4 @@ def say_hello(): - print(f'hello from {__name__}') \ No newline at end of file + print(f"hello from {__name__}") diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/__init__.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/__init__.py index 69e3be50dac4..8db66d3d0f0f 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/__init__.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/__init__.py @@ -1 +1 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/hello.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/hello.py index 2103e1a7e54b..a60acfc38bfc 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/hello.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library1/hello.py @@ -1,2 +1,2 @@ def say_hello(): - print(f'hello from {__name__}') + print(f"hello from {__name__}") diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library2/greetings.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library2/greetings.py index eeb6c2703786..184e4a264adb 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library2/greetings.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src/python/library2/greetings.py @@ -1,2 +1,2 @@ def greetings(): - print(f'greetings from {__name__}') + print(f"greetings from {__name__}") diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/__init__.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/__init__.py index 69e3be50dac4..8db66d3d0f0f 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/__init__.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/__init__.py @@ -1 +1 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/__init__.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/__init__.py index 69e3be50dac4..8db66d3d0f0f 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/__init__.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/__init__.py @@ -1 +1 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/salute.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/salute.py index 9811d01382e9..7727b910e7f0 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/salute.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/additional_includes/src1/library1/utils/salute.py @@ -1,2 +1,2 @@ def salute(): - print(f'salute from {__name__}') + print(f"salute from {__name__}") diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes-in-zip/run.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes-in-zip/run.py index 260efb76f0a7..f59980ce7fcd 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes-in-zip/run.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes-in-zip/run.py @@ -12,5 +12,5 @@ # print(sys.path) -if __name__ == '__main__': - say_hello() \ No newline at end of file +if __name__ == "__main__": + say_hello() diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes/run.py b/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes/run.py index 260efb76f0a7..f59980ce7fcd 100644 --- a/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes/run.py +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/component-reuse/additional-includes/run.py @@ -12,5 +12,5 @@ # print(sys.path) -if __name__ == '__main__': - say_hello() \ No newline at end of file +if __name__ == "__main__": + say_hello() diff --git a/sdk/ml/azure-ai-ml/tests/test_utilities/utils.py b/sdk/ml/azure-ai-ml/tests/test_utilities/utils.py index 6b4f0eec8117..b52fb9fa913d 100644 --- a/sdk/ml/azure-ai-ml/tests/test_utilities/utils.py +++ b/sdk/ml/azure-ai-ml/tests/test_utilities/utils.py @@ -349,11 +349,11 @@ def parse_local_path(origin_path, base_path=None): @contextmanager def build_temp_folder( - *, - source_base_dir: Union[str, os.PathLike], - relative_dirs_to_copy: List[str] = None, - relative_files_to_copy: List[str] = None, - extra_files_to_create: Dict[str, Optional[str]] = None, + *, + source_base_dir: Union[str, os.PathLike], + relative_dirs_to_copy: List[str] = None, + relative_files_to_copy: List[str] = None, + extra_files_to_create: Dict[str, Optional[str]] = None, ) -> str: """Build a temporary folder with files and subfolders copied from source_base_dir. diff --git a/sdk/ml/azure-ai-ml/tests/virtual_cluster/e2etests/test_vc.py b/sdk/ml/azure-ai-ml/tests/virtual_cluster/e2etests/test_vc.py index 18a50454d0b3..349d73ee0f72 100644 --- a/sdk/ml/azure-ai-ml/tests/virtual_cluster/e2etests/test_vc.py +++ b/sdk/ml/azure-ai-ml/tests/virtual_cluster/e2etests/test_vc.py @@ -1,14 +1,13 @@ - import pytest import re from devtools_testutils import AzureRecordedTestCase from azure.ai.ml import MLClient + @pytest.mark.usefixtures("recorded_test") @pytest.mark.virtual_cluster_test class TestVirtualCluster(AzureRecordedTestCase): - @pytest.mark.e2etest def test_get_and_list(self, client: MLClient) -> None: vc_list = client._virtual_clusters.list() @@ -17,9 +16,7 @@ def test_get_and_list(self, client: MLClient) -> None: test_vc_name = "SingularityTestVC" singularity_test_vc = [vc for vc in vc_list if vc["name"] == test_vc_name][0] - REGEX_PATTERN = ( - "^/?subscriptions/([^/]+)/resourceGroups/([^/]+)/providers/Microsoft.MachineLearningServices/virtualclusters/([^/]+)" - ) + REGEX_PATTERN = "^/?subscriptions/([^/]+)/resourceGroups/([^/]+)/providers/Microsoft.MachineLearningServices/virtualclusters/([^/]+)" match = re.match(REGEX_PATTERN, singularity_test_vc["id"]) subscription_id = match.group(1) resource_group_name = match.group(2) @@ -28,5 +25,3 @@ def test_get_and_list(self, client: MLClient) -> None: vc_get_client = MLClient(client._credential, subscription_id, resource_group_name) vc = vc_get_client._virtual_clusters.get(vc_name) assert test_vc_name == vc["name"] - - \ No newline at end of file diff --git a/sdk/ml/azure-ai-ml/tests/virtual_cluster/unittests/test_vc_operations.py b/sdk/ml/azure-ai-ml/tests/virtual_cluster/unittests/test_vc_operations.py index 74469b68bbe5..597ae47479af 100644 --- a/sdk/ml/azure-ai-ml/tests/virtual_cluster/unittests/test_vc_operations.py +++ b/sdk/ml/azure-ai-ml/tests/virtual_cluster/unittests/test_vc_operations.py @@ -13,18 +13,20 @@ def mock_credential() -> Mock: @pytest.fixture def mock_vc_operation(mock_workspace_scope: OperationScope, mock_credential) -> VirtualClusterOperations: - yield VirtualClusterOperations( - operation_scope=mock_workspace_scope, - credentials=mock_credential - ) + yield VirtualClusterOperations(operation_scope=mock_workspace_scope, credentials=mock_credential) @pytest.mark.unittest @pytest.mark.virtual_cluster_test class TestVCOperations: - @patch("azure.ai.ml.operations._virtual_cluster_operations.get_virtual_clusters_from_subscriptions") - def test_list(self, mock_function, mock_vc_operation: VirtualClusterOperations, mock_workspace_scope: OperationScope, mock_credential: Mock) -> None: + def test_list( + self, + mock_function, + mock_vc_operation: VirtualClusterOperations, + mock_workspace_scope: OperationScope, + mock_credential: Mock, + ) -> None: dummy_vc_list = [ { "id": "id1", @@ -33,14 +35,13 @@ def test_list(self, mock_function, mock_vc_operation: VirtualClusterOperations, { "id": "id2", "name": "name2", - } + }, ] - - + mock_function.return_value = dummy_vc_list result = mock_vc_operation.list() - assert dummy_vc_list == result + assert dummy_vc_list == result mock_function.assert_called_once_with(mock_credential, subscription_list=None) dummy_sub_list = [mock_workspace_scope.subscription_id] @@ -51,7 +52,13 @@ def test_list(self, mock_function, mock_vc_operation: VirtualClusterOperations, assert mock_function.call_count == 2 @patch("azure.ai.ml.operations._virtual_cluster_operations.get_virtual_cluster_by_id") - def test_get(self, mock_function, mock_vc_operation: VirtualClusterOperations, mock_workspace_scope: OperationScope, mock_credential: Mock) -> None: + def test_get( + self, + mock_function, + mock_vc_operation: VirtualClusterOperations, + mock_workspace_scope: OperationScope, + mock_credential: Mock, + ) -> None: dummy_vc = { "id": "id1", "name": "name1", @@ -60,5 +67,10 @@ def test_get(self, mock_function, mock_vc_operation: VirtualClusterOperations, m result = mock_vc_operation.get("name1") assert dummy_vc == result - - mock_function.assert_called_once_with(name="name1", resource_group=mock_workspace_scope.resource_group_name, subscription_id=mock_workspace_scope.subscription_id, credential=mock_credential) + + mock_function.assert_called_once_with( + name="name1", + resource_group=mock_workspace_scope.resource_group_name, + subscription_id=mock_workspace_scope.subscription_id, + credential=mock_credential, + ) diff --git a/sdk/ml/azure-ai-ml/tests/workspace/e2etests/test_workspace.py b/sdk/ml/azure-ai-ml/tests/workspace/e2etests/test_workspace.py index 0dd5bf5d0c69..8d9b0c8a2a08 100644 --- a/sdk/ml/azure-ai-ml/tests/workspace/e2etests/test_workspace.py +++ b/sdk/ml/azure-ai-ml/tests/workspace/e2etests/test_workspace.py @@ -174,15 +174,15 @@ def test_uai_workspace_create_update_and_delete( { "identity": { "type": "user_assigned", - "user_assigned_identities":{ + "user_assigned_identities": { user_assigned_identity.id: { "client_id": user_assigned_identity.client_id, - "principal_id": user_assigned_identity.principal_id + "principal_id": user_assigned_identity.principal_id, }, user_assigned_identity2.id: { "client_id": user_assigned_identity2.client_id, - "principal_id": user_assigned_identity2.principal_id - } + "principal_id": user_assigned_identity2.principal_id, + }, }, } }, @@ -271,7 +271,6 @@ def test_update_sai_to_sai_and_uai_workspace_with_uai_deletion( assert workspace.identity.user_assigned_identities == None assert workspace.primary_user_assigned_identity == None - # test updating identity type from system_assgined to system_assigned and user_assigned msi_client = ManagedServiceIdentityClient( credential=client._credential, subscription_id=client._operation_scope.subscription_id @@ -290,15 +289,15 @@ def test_update_sai_to_sai_and_uai_workspace_with_uai_deletion( { "identity": { "type": "system_assigned, user_assigned", - "user_assigned_identities":{ + "user_assigned_identities": { user_assigned_identity.id: { "client_id": user_assigned_identity.client_id, - "principal_id": user_assigned_identity.principal_id + "principal_id": user_assigned_identity.principal_id, }, user_assigned_identity2.id: { "client_id": user_assigned_identity2.client_id, - "principal_id": user_assigned_identity2.principal_id - } + "principal_id": user_assigned_identity2.principal_id, + }, }, } }, @@ -306,13 +305,13 @@ def test_update_sai_to_sai_and_uai_workspace_with_uai_deletion( wps = load_workspace("./tests/test_configs/workspace/workspace_min.yaml", params_override=params_override) workspace_poller = client.workspaces.begin_update( wps, - # primary_user_assigned_identity=user_assigned_identity.id, # uncomment this when sai to sai|uai fixing pr released. + # primary_user_assigned_identity=user_assigned_identity.id, # uncomment this when sai to sai|uai fixing pr released. ) assert isinstance(workspace_poller, LROPoller) workspace = workspace_poller.result() assert isinstance(workspace, Workspace) assert len(workspace.identity.user_assigned_identities) == 2 - # assert workspace.primary_user_assigned_identity == user_assigned_identity.id # uncomment this when sai to sai|uai fixing pr released. + # assert workspace.primary_user_assigned_identity == user_assigned_identity.id # uncomment this when sai to sai|uai fixing pr released. assert workspace.identity.type == camel_to_snake(ManagedServiceIdentityType.SYSTEM_ASSIGNED_USER_ASSIGNED) ## test uai removal. not supported yet, service returning "Code: FailedIdentityOperation, Removal of all user-assigned identities assigned to resource '...' with type 'SystemAssigned, UserAssigned' is invalid."