diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index d00c8a53f0a8..8235e9440c85 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -9,6 +9,8 @@ ### Bugs Fixed ### Other Changes +- Refined error messages for serviced-based evaluators and simulators. +- Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features. ## 1.0.0b5 (2024-10-28) diff --git a/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md index fe0ba312624b..53615797a528 100644 --- a/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md +++ b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md @@ -6,7 +6,7 @@ This guide walks you through how to investigate failures, common errors in the ` - [Handle Evaluate API Errors](#handle-evaluate-api-errors) - [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues) - - [Safety Metric Supported Regions](#safety-metric-supported-regions) + - [Troubleshoot Safety Evaluator Issues](#troubleshoot-safety-evaluator-issues) - [Handle Simulation Errors](#handle-simulation-errors) - [Adversarial Simulation Supported Regions](#adversarial-simulation-supported-regions) - [Logging](#logging) @@ -31,9 +31,10 @@ This guide walks you through how to investigate failures, common errors in the ` - Additionally, if you're using a virtual network or private link, and your evaluation run upload fails because of that, check out this [guide](https://docs.microsoft.com/azure/machine-learning/how-to-enable-studio-virtual-network#access-data-using-the-studio). -### Safety Metric Supported Regions +### Troubleshoot Safety Evaluator Issues -Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport). +- Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport). +- If you encounter a 403 Unauthorized error when using safety evaluators, verify that you have the `Contributor` role assigned to your Azure AI project. `Contributor` role is currently required to run safety evaluations. ## Handle Simulation Errors diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py index ca676c9bcdc9..41368e571094 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- +import os import functools import inspect import logging @@ -149,6 +150,9 @@ def _get_indentation_size(doc_string: str) -> int: def _should_skip_warning(): skip_warning_msg = False + if os.getenv("AI_EVALS_DISABLE_EXPERIMENTAL_WARNING", "false").lower() == "true": + skip_warning_msg = True + # Cases where we want to suppress the warning: # 1. When converting from REST object to SDK object for frame in inspect.stack(): diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py index 3d8ad943522d..e359abcca325 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py @@ -83,27 +83,31 @@ async def ensure_service_availability(rai_svc_url: str, token: str, capability: async with get_async_http_client() as client: response = await client.get(svc_liveness_url, headers=headers) - if response.status_code != 200: - msg = f"RAI service is not available in this region. Status Code: {response.status_code}" - raise EvaluationException( - message=msg, - internal_message=msg, - target=ErrorTarget.UNKNOWN, - category=ErrorCategory.SERVICE_UNAVAILABLE, - blame=ErrorBlame.USER_ERROR, - ) - - capabilities = response.json() + if response.status_code != 200: + msg = ( + f"RAI service is unavailable in this region, or you lack the necessary permissions " + f"to access the AI project. Status Code: {response.status_code}" + ) + raise EvaluationException( + message=msg, + internal_message=msg, + target=ErrorTarget.RAI_CLIENT, + category=ErrorCategory.SERVICE_UNAVAILABLE, + blame=ErrorBlame.USER_ERROR, + tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot", + ) - if capability and capability not in capabilities: - msg = f"Capability '{capability}' is not available in this region" - raise EvaluationException( - message=msg, - internal_message=msg, - target=ErrorTarget.RAI_CLIENT, - category=ErrorCategory.SERVICE_UNAVAILABLE, - blame=ErrorBlame.USER_ERROR, - ) + capabilities = response.json() + if capability and capability not in capabilities: + msg = f"The needed capability '{capability}' is not supported by the RAI service in this region." + raise EvaluationException( + message=msg, + internal_message=msg, + target=ErrorTarget.RAI_CLIENT, + category=ErrorCategory.SERVICE_UNAVAILABLE, + blame=ErrorBlame.USER_ERROR, + tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot", + ) def generate_payload(normalized_user_text: str, metric: str, annotation_task: str) -> Dict: @@ -371,13 +375,17 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st ) if response.status_code != 200: - msg = "Failed to retrieve the discovery service URL." + msg = ( + f"Failed to connect to your Azure AI project. Please check if the project scope is configured correctly, " + f"and make sure you have the necessary access permissions. " + f"Status code: {response.status_code}." + ) raise EvaluationException( message=msg, - internal_message=msg, target=ErrorTarget.RAI_CLIENT, - category=ErrorCategory.SERVICE_UNAVAILABLE, - blame=ErrorBlame.UNKNOWN, + blame=ErrorBlame.USER_ERROR, + category=ErrorCategory.PROJECT_ACCESS_ERROR, + tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot", ) base_url = urlparse(response.json()["properties"]["discoveryUrl"]) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py index fc419ed0a6ea..aab2b674c2ff 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py @@ -68,12 +68,22 @@ def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]: run = proxy_run.run.result() # pylint: disable=protected-access + completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA") + failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA") + + # Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines + if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0: + status = "Completed with Errors" + else: + status = run.status + + # Return the ordered dictionary with the updated status return OrderedDict( [ - ("status", run.status), + ("status", status), ("duration", str(run._end_time - run._created_on)), - ("completed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")), - ("failed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")), + ("completed_lines", completed_lines), + ("failed_lines", failed_lines), ("log_path", str(run._output_path)), ] ) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py index 191703fb5715..fc81e7f0e41a 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py @@ -23,6 +23,7 @@ class ErrorCategory(Enum): * SERVICE_UNAVAILABLE -> Service is unavailable * MISSING_PACKAGE -> Required package is missing * FAILED_REMOTE_TRACKING -> Remote tracking failed + * PROJECT_ACCESS_ERROR -> Access to project failed * UNKNOWN -> Undefined placeholder. Avoid using. """ @@ -35,6 +36,7 @@ class ErrorCategory(Enum): SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE" MISSING_PACKAGE = "MISSING PACKAGE" FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING" + PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR" UNKNOWN = "UNKNOWN" diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py index f515ce1a08e6..86b4609562e4 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py @@ -74,14 +74,18 @@ def _get_service_discovery_url(self): timeout=5, ) if response.status_code != 200: - msg = "Failed to retrieve the discovery service URL." + msg = ( + f"Failed to connect to your Azure AI project. Please check if the project scope is configured " + f"correctly, and make sure you have the necessary access permissions. " + f"Status code: {response.status_code}." + ) raise EvaluationException( message=msg, - internal_message=msg, target=ErrorTarget.RAI_CLIENT, - category=ErrorCategory.SERVICE_UNAVAILABLE, - blame=ErrorBlame.UNKNOWN, + category=ErrorCategory.PROJECT_ACCESS_ERROR, + blame=ErrorBlame.USER_ERROR, ) + base_url = urlparse(response.json()["properties"]["discoveryUrl"]) return f"{base_url.scheme}://{base_url.netloc}" diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py index 7a4d4f1efa91..6a40585972e4 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py @@ -146,7 +146,8 @@ async def test_ensure_service_availability(self, client_mock): async def test_ensure_service_availability_service_unavailable(self, client_mock): with pytest.raises(Exception) as exc_info: _ = await ensure_service_availability("dummy_url", "dummy_token") - assert "RAI service is not available in this region. Status Code: 9001" in str(exc_info._excinfo[1]) + assert "RAI service is unavailable in this region" in str(exc_info._excinfo[1]) + assert "Status Code: 9001" in str(exc_info._excinfo[1]) assert client_mock._mock_await_count == 1 @pytest.mark.asyncio @@ -154,7 +155,9 @@ async def test_ensure_service_availability_service_unavailable(self, client_mock async def test_ensure_service_availability_exception_capability_unavailable(self, client_mock): with pytest.raises(Exception) as exc_info: _ = await ensure_service_availability("dummy_url", "dummy_token", capability="does not exist") - assert "Capability 'does not exist' is not available in this region" in str(exc_info._excinfo[1]) + assert "The needed capability 'does not exist' is not supported by the RAI service in this region" in str( + exc_info._excinfo[1] + ) assert client_mock._mock_await_count == 1 @pytest.mark.asyncio @@ -359,7 +362,7 @@ async def test_get_service_discovery_url_exception(self, client_mock): with pytest.raises(Exception) as exc_info: _ = await _get_service_discovery_url(azure_ai_project=azure_ai_project, token=token) - assert "Failed to retrieve the discovery service URL" in str(exc_info._excinfo[1]) + assert "Failed to connect to your Azure AI project." in str(exc_info._excinfo[1]) @pytest.mark.asyncio @patch(