Skip to content
Closed
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5bd5cd5
modal run for regular eval
alekszievr Mar 5, 2025
3c655fc
simplify output name
alekszievr Mar 5, 2025
c767900
Send results to posthog
alekszievr Mar 6, 2025
65b53fb
send results to correct posthog project
alekszievr Mar 6, 2025
caa381e
use all necessary envvars
alekszievr Mar 6, 2025
90a4e2e
set necessary envvars
alekszievr Mar 6, 2025
b219ee4
test
alekszievr Mar 7, 2025
54073eb
test
alekszievr Mar 7, 2025
3169207
test
alekszievr Mar 7, 2025
887482e
test
alekszievr Mar 7, 2025
1d39e7d
test
alekszievr Mar 7, 2025
1893bbe
more logging
alekszievr Mar 10, 2025
547df24
send to dev posthog
alekszievr Mar 10, 2025
4daca96
revert pyproject toml file
alekszievr Mar 10, 2025
b17393c
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 11, 2025
bcd8828
Calculate evaluation metrics and send to PostHog
alekszievr Mar 11, 2025
0686325
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 12, 2025
73053b5
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 12, 2025
ffbff5f
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 12, 2025
05e6470
update Modal files
alekszievr Mar 14, 2025
b9e15ae
Send results to segment instead of posthog
alekszievr Mar 14, 2025
7bb8a73
set sample size in regular eval
alekszievr Mar 14, 2025
7cf30a7
use specific segment key
alekszievr Mar 14, 2025
782353e
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 14, 2025
28b529a
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 19, 2025
21cc39c
add segment analytics to poetry
alekszievr Mar 19, 2025
6a8380c
Merge branch 'dev' into feat/cog-1414-regular-cognee-eval
alekszievr Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cognee/eval_framework/modal_run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def read_and_combine_metrics(eval_params: dict) -> dict:
}
)
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
.pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
.pip_install("protobuf", "h2", "deepeval", "gdown", "plotly", "unstructured")
)


Expand Down
62 changes: 62 additions & 0 deletions cognee/tests/evaluation/modal_run_regular_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from cognee.eval_framework.modal_run_eval import read_and_combine_metrics, image
from cognee.eval_framework.eval_config import EvalConfig
import modal
import logging
from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_builder
from cognee.eval_framework.answer_generation.run_question_answering_module import (
run_question_answering,
)
from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
import json


logger = logging.getLogger(__name__)

app = modal.App("cognee-regular-eval")


@app.function(image=image, max_containers=2, timeout=1800, retries=3)
async def modal_run_eval(eval_params=None):
"""Runs evaluation pipeline and returns combined metrics results."""

if eval_params is None:
eval_params = EvalConfig().to_dict()

logger.info(f"Running evaluation with params: {eval_params}")

# Run the evaluation pipeline
await run_corpus_builder(eval_params)
await run_question_answering(eval_params)
await run_evaluation(eval_params)

# Early return if metrics calculation wasn't requested
if not eval_params.get("evaluating_answers") or not eval_params.get("calculate_metrics"):
logger.info(
"Skipping metrics collection as either evaluating_answers or calculate_metrics is False"
)
return None

return read_and_combine_metrics(eval_params)


@app.local_entrypoint()
async def main():
config = EvalConfig(
task_getter_type="Default",
benchmark="HotPotQA",
number_of_samples_in_corpus=50,
building_corpus_from_scratch=True,
answering_questions=True,
qa_engine="cognee_graph_completion",
evaluating_answers=True,
calculate_metrics=True,
dashboard=False,
)

results = await modal_run_eval.remote.aio(config.to_dict())

output_file = "metrics_output.json"
with open(output_file, "w") as f:
json.dump(results, f, indent=4)

logger.info(f"Completed parallel evaluation runs. Results saved to {output_file}")
57 changes: 57 additions & 0 deletions cognee/tests/evaluation/send_results_to_segment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from posthog import Posthog
import os
import uuid
import logging
import json
from dotenv import load_dotenv
import argparse
from cognee.shared.utils import setup_logging
import analytics
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where do we get analytics from?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modified this to use the newer name of the package and added it to poetry

import datetime

load_dotenv()

setup_logging(logging.INFO)

SEGMENT_WRITE_KEY = os.getenv("SEGMENT_WRITE_KEY_EVAL")
analytics.write_key = SEGMENT_WRITE_KEY


def send_event_to_segment(results):
created_at = datetime.datetime.now(datetime.timezone.utc).isoformat() + "Z"

properties = {
f"mean_{key}": results["aggregate_metrics"][key]["mean"]
for key in results["aggregate_metrics"].keys()
}
properties["created_at"] = created_at

# Send event to Segment
analytics.track(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If analytics is some analytics library, then I don't understand why we have this test?

user_id="evalresults_ingest_bot", # Unique identifier for the event
event="cognee_eval_results",
properties=properties,
)

# Ensure all events are sent
analytics.flush()


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--filename",
default="metrics_output.json",
help="The filename of the results to send to PostHog.",
)
args = parser.parse_args()
with open(args.filename, "r") as f:
results = json.load(f)
logging.info(
f"results loaded, mean correctness {results['aggregate_metrics']['correctness']['mean']}"
)
send_event_to_segment(results)


if __name__ == "__main__":
main()
Loading