Skip to content
Merged
Prev Previous commit
Next Next commit
Context coverage score
  • Loading branch information
alekszievr committed Mar 10, 2025
commit ec7a0cc16ce78acce0c5cb2bba9fe1ed46dd8ae1
4 changes: 2 additions & 2 deletions cognee/eval_framework/evaluation/deep_eval_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
from cognee.eval_framework.evaluation.metrics.exact_match import ExactMatchMetric
from cognee.eval_framework.evaluation.metrics.f1 import F1ScoreMetric
from cognee.eval_framework.evaluation.metrics.context_match import ContextMatchMetric
from cognee.eval_framework.evaluation.metrics.context_coverage import ContextCoverageMetric
from typing import Any, Dict, List
from deepeval.metrics import ContextualRelevancyMetric

Expand All @@ -16,7 +16,7 @@ def __init__(self):
"EM": ExactMatchMetric(),
"f1": F1ScoreMetric(),
"contextual_relevancy": ContextualRelevancyMetric(),
"context_match": ContextMatchMetric(),
"context_coverage": ContextCoverageMetric(),
}

async def evaluate_answers(
Expand Down
2 changes: 1 addition & 1 deletion cognee/eval_framework/evaluation/evaluation_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ def __init__(
async def execute(self, answers: List[Dict[str, str]], evaluator_metrics: Any) -> Any:
if self.evaluate_contexts:
evaluator_metrics.append("contextual_relevancy")
evaluator_metrics.append("context_match")
evaluator_metrics.append("context_coverage")
metrics = await self.eval_adapter.evaluate_answers(answers, evaluator_metrics)
return metrics
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from deepeval.metrics.summarization.schema import ScoreType
from deepeval.metrics.indicator import metric_progress_indicator
from deepeval.utils import get_or_create_event_loop
import asyncio
from deepeval.metrics.summarization.template import SummarizationTemplate
from deepeval.metrics.summarization.schema import Reason
from deepeval.metrics.utils import trimAndLoadJson


class ContextMatchMetric(SummarizationMetric):
class ContextCoverageMetric(SummarizationMetric):
def measure(
self,
test_case,
Expand All @@ -16,7 +18,7 @@ def measure(
input=test_case.context[0],
actual_output=test_case.retrieval_context[0],
)

self.assessment_questions = None
self.evaluation_cost = 0 if self.using_native_model else None
with metric_progress_indicator(self, _show_indicator=_show_indicator):
if self.async_mode:
Expand All @@ -26,7 +28,9 @@ def measure(
)
else:
self.coverage_verdicts = self._generate_coverage_verdicts(mapped_test_case)
self.alignment_verdicts = []
self.score = self._calculate_score(ScoreType.COVERAGE)
self.reason = self._generate_reason()
self.success = self.score >= self.threshold
return self.score

Expand All @@ -42,7 +46,9 @@ async def a_measure(
_show_indicator=_show_indicator,
):
self.coverage_verdicts = await self._a_generate_coverage_verdicts(test_case)
self.alignment_verdicts = []
self.score = self._calculate_score(ScoreType.COVERAGE)
self.reason = self._generate_reason()
self.success = self.score >= self.threshold
return self.score

Expand Down
2 changes: 1 addition & 1 deletion cognee/eval_framework/metrics_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

metrics_fields = {
"contextual_relevancy": ["question", "retrieval_context"],
"context_match": ["question", "retrieval_context", "golden_context"],
"context_coverage": ["question", "retrieval_context", "golden_context"],
}
default_metrics_fields = ["question", "answer", "golden_answer"]

Expand Down
2 changes: 1 addition & 1 deletion cognee/tests/unit/eval_framework/deepeval_adapter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"deepeval": MagicMock(),
"deepeval.metrics": MagicMock(),
"deepeval.test_case": MagicMock(),
"cognee.eval_framework.evaluation.metrics.context_match": MagicMock(),
"cognee.eval_framework.evaluation.metrics.context_coverage": MagicMock(),
},
):
from cognee.eval_framework.evaluation.deep_eval_adapter import DeepEvalAdapter
Expand Down
Loading