diff --git a/cognee/eval_framework/modal_run_eval.py b/cognee/eval_framework/modal_run_eval.py index 77345fe428..8af45c73b1 100644 --- a/cognee/eval_framework/modal_run_eval.py +++ b/cognee/eval_framework/modal_run_eval.py @@ -47,7 +47,7 @@ def read_and_combine_metrics(eval_params: dict) -> dict: } ) .poetry_install_from_file(poetry_pyproject_toml="pyproject.toml") - .pip_install("protobuf", "h2", "deepeval", "gdown", "plotly") + .pip_install("protobuf", "h2", "deepeval", "gdown", "plotly", "unstructured") ) diff --git a/cognee/tests/evaluation/modal_run_regular_eval.py b/cognee/tests/evaluation/modal_run_regular_eval.py new file mode 100644 index 0000000000..c612741f5f --- /dev/null +++ b/cognee/tests/evaluation/modal_run_regular_eval.py @@ -0,0 +1,62 @@ +from cognee.eval_framework.modal_run_eval import read_and_combine_metrics, image +from cognee.eval_framework.eval_config import EvalConfig +import modal +import logging +from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_builder +from cognee.eval_framework.answer_generation.run_question_answering_module import ( + run_question_answering, +) +from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation +import json + + +logger = logging.getLogger(__name__) + +app = modal.App("cognee-regular-eval") + + +@app.function(image=image, max_containers=2, timeout=1800, retries=3) +async def modal_run_eval(eval_params=None): + """Runs evaluation pipeline and returns combined metrics results.""" + + if eval_params is None: + eval_params = EvalConfig().to_dict() + + logger.info(f"Running evaluation with params: {eval_params}") + + # Run the evaluation pipeline + await run_corpus_builder(eval_params) + await run_question_answering(eval_params) + await run_evaluation(eval_params) + + # Early return if metrics calculation wasn't requested + if not eval_params.get("evaluating_answers") or not eval_params.get("calculate_metrics"): + logger.info( + "Skipping metrics collection as either evaluating_answers or calculate_metrics is False" + ) + return None + + return read_and_combine_metrics(eval_params) + + +@app.local_entrypoint() +async def main(): + config = EvalConfig( + task_getter_type="Default", + benchmark="HotPotQA", + number_of_samples_in_corpus=50, + building_corpus_from_scratch=True, + answering_questions=True, + qa_engine="cognee_graph_completion", + evaluating_answers=True, + calculate_metrics=True, + dashboard=False, + ) + + results = await modal_run_eval.remote.aio(config.to_dict()) + + output_file = "metrics_output.json" + with open(output_file, "w") as f: + json.dump(results, f, indent=4) + + logger.info(f"Completed parallel evaluation runs. Results saved to {output_file}") diff --git a/cognee/tests/evaluation/send_results_to_segment.py b/cognee/tests/evaluation/send_results_to_segment.py new file mode 100644 index 0000000000..82865e6b91 --- /dev/null +++ b/cognee/tests/evaluation/send_results_to_segment.py @@ -0,0 +1,55 @@ +import os +import logging +import json +from dotenv import load_dotenv +import argparse +from cognee.shared.utils import setup_logging +import segment.analytics as analytics +import datetime + +load_dotenv() + +setup_logging(logging.INFO) + +SEGMENT_WRITE_KEY = os.getenv("SEGMENT_WRITE_KEY_EVAL") +analytics.write_key = SEGMENT_WRITE_KEY + + +def send_event_to_segment(results): + created_at = datetime.datetime.now(datetime.timezone.utc).isoformat() + "Z" + + properties = { + f"mean_{key}": results["aggregate_metrics"][key]["mean"] + for key in results["aggregate_metrics"].keys() + } + properties["created_at"] = created_at + + # Send event to Segment + analytics.track( + user_id="evalresults_ingest_bot", # Unique identifier for the event + event="cognee_eval_results", + properties=properties, + ) + + # Ensure all events are sent + analytics.flush() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--filename", + default="metrics_output.json", + help="The filename of the results to send to PostHog.", + ) + args = parser.parse_args() + with open(args.filename, "r") as f: + results = json.load(f) + logging.info( + f"results loaded, mean correctness {results['aggregate_metrics']['correctness']['mean']}" + ) + send_event_to_segment(results) + + +if __name__ == "__main__": + main() diff --git a/poetry.lock b/poetry.lock index 62490ceb2b..93a83813ea 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3853,8 +3853,6 @@ groups = ["main"] markers = "python_version <= \"3.11\" or python_version == \"3.12\" or python_version >= \"3.13\"" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, - {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, - {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -7427,7 +7425,6 @@ files = [ {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, - {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, @@ -9397,6 +9394,28 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.16.5)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +[[package]] +name = "segment-analytics-python" +version = "2.3.3" +description = "The hassle-free way to integrate analytics into any python application." +optional = true +python-versions = ">=3.6.0" +groups = ["main"] +markers = "(python_version <= \"3.11\" or python_version == \"3.12\" or python_version >= \"3.13\") and extra == \"analytics\"" +files = [ + {file = "segment-analytics-python-2.3.3.tar.gz", hash = "sha256:ce6b3b4387ec9ebc5b55842c44d7dd63b4d4b0b8188e268c4492f909e5eeeed8"}, + {file = "segment_analytics_python-2.3.3-py2.py3-none-any.whl", hash = "sha256:769251706d71f4c96d2039391d119222dbd9faf00308400f7b314ec9fb86cfc7"}, +] + +[package.dependencies] +backoff = ">=2.1,<3.0" +PyJWT = ">=2.8,<3.0" +python-dateutil = ">=2.2,<3.0" +requests = ">=2.7,<3.0" + +[package.extras] +test = ["flake8 (==3.7.9)", "mock (==2.0.0)", "pylint (==2.8.0)"] + [[package]] name = "semver" version = "3.0.4" @@ -11578,6 +11597,7 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] +analytics = ["segment-analytics-python"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] deepeval = ["deepeval"] docs = ["unstructured"] @@ -11605,4 +11625,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "085d7e0eeca17bbb667b0a7b775a8859bf68611983ac665d4a3585f9c59ca68e" +content-hash = "b6b3883f5ac24c530666b9c583ceba72bb3e26087c803c14c7338dce7be83659" diff --git a/pyproject.toml b/pyproject.toml index 44e9692ec7..616977ba91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,6 +90,7 @@ pyside6 = {version = "^6.8.2.1", optional = true} qasync = {version = "^0.27.1", optional = true} graphiti-core = {version = "^0.7.0", optional = true} owlready2 = "^0.47" +segment-analytics-python = { version = "2.3.3", optional = true } [tool.poetry.extras] @@ -116,6 +117,8 @@ codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] evals = ["plotly", "gdown"] gui = ["pyside6", "qasync"] graphiti = ["graphiti-core"] +analytics = ["segment-analytics-python"] + [tool.poetry.group.dev.dependencies] pytest = "^7.4.0"