Fix __call__ overload issues

Azure · needuv · Nov 1, 2024 · Oct 31, 2024 · Oct 31, 2024 · Nov 1, 2024
commit aee710e2d2c626f4993d5f196033c2ed2117fbdf
@@ -93,10 +93,10 @@ def __call__(  # pylint: disable=docstring-missing-param
         or a conversation for a potentially multi-turn evaluation. If the conversation has more than one pair of
         turns, the evaluator will aggregate the results of each turn.
 
+        :keyword query: The query to be evaluated.
+        :paramtype query: str
         :keyword response: The response to be evaluated.
         :paramtype response: Optional[str]
-        :keyword context: The context to be evaluated.
-        :paramtype context: Optional[str]
         :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
             key "messages". Conversation turns are expected
             to be dictionaries with keys "content" and "role".

@@ -55,7 +55,7 @@ def __call__(
         *,
         response: str,
     ) -> Dict[str, Union[str, float]]:
-        """Evaluate fluency in given query/response
+        """Evaluate fluency in given response
 
         :keyword response: The response to be evaluated.
         :paramtype response: str

@@ -180,7 +180,7 @@ def __call__(
         self,
         *,
         conversation: Conversation,
-    ) -> Dict[str, Union[float, Dict[str, List[float]]]]:
+    ) -> Dict[str, Union[float, Dict[str, List[str, float]]]]:
         """Evaluates retrieval for a for a multi-turn evaluation. If the conversation has more than one turn,
         the evaluator will aggregate the results of each turn.
 
@@ -202,7 +202,7 @@ def __call__(self, *args, **kwargs):  # pylint: disable=docstring-missing-param
         :keyword conversation: The conversation to be evaluated.
         :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
         :return: The scores for Chat scenario.
-        :rtype: :rtype: Dict[str, Union[float, Dict[str, List[float]]]]
+        :rtype: :rtype: Dict[str, Union[float, Dict[str, List[str, float]]]]
         """
         query = kwargs.pop("query", None)
         context = kwargs.pop("context", None)

@@ -105,18 +105,18 @@ def __init__(
     def __call__(
         self,
         *,
+        response: str,
+        context: str,
         query: Optional[str] = None,
-        response: Optional[str] = None,
-        context: Optional[str] = None,
     ) -> Dict[str, Union[str, bool]]:
         """Evaluate groundedness for a given query/response/context
 
-        :keyword query: The query to be evaluated.
-        :paramtype query: Optional[str]
         :keyword response: The response to be evaluated.
-        :paramtype response: Optional[str]
+        :paramtype response: str
         :keyword context: The context to be evaluated.
-        :paramtype context: Optional[str]
+        :paramtype context: str
+        :keyword query: The query to be evaluated.
+        :paramtype query: Optional[str]
         :return: The relevance score.
         :rtype: Dict[str, Union[str, bool]]
         """