diff --git a/.github/workflows/profiling.yaml b/.github/workflows/profiling.yaml new file mode 100644 index 0000000000..6ca09f9dc1 --- /dev/null +++ b/.github/workflows/profiling.yaml @@ -0,0 +1,137 @@ +name: test | profiling + +on: + push +jobs: + profiler: + runs-on: ubuntu-latest + + steps: + # Checkout the code from the repository with full history + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history so we can checkout any commit + + - name: Check if the sender is a maintainer + id: check_permissions + uses: actions/github-script@v6 + with: + script: | + const sender = context.payload.sender.login; + const { data: membership } = await github.rest.orgs.getMembershipForUser({ + org: context.repo.owner, + username: sender, + }).catch(() => ({ data: { role: null } })); + return membership.role; + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Set is_maintainer variable + run: | + echo "is_maintainer=${{ steps.check_permissions.outputs.result == 'admin' || steps.check_permissions.outputs.result == 'maintainer' }}" >> $GITHUB_ENV + + - name: Stop if not a maintainer + if: env.is_maintainer != 'true' + run: | + echo "User ${{ github.event.sender.login }} is not a maintainer. Exiting." + exit 0 # Use exit 0 to mark the job as successful but stop execution + + # Set up Python environment + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: | + poetry install --no-interaction --all-extras + poetry run pip install pyinstrument + + + # Set environment variables for SHAs + - name: Set environment variables + run: | + echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> $GITHUB_ENV + echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + + # Run profiler on the base branch + - name: Run profiler on base branch + env: + BASE_SHA: ${{ env.BASE_SHA }} + run: | + echo "Profiling the base branch for code_graph_pipeline.py" + echo "Checking out base SHA: $BASE_SHA" + git checkout $BASE_SHA + echo "This is the working directory: $PWD" + # Ensure the script is executable + chmod +x cognee/api/v1/cognify/code_graph_pipeline.py + # Run Scalene + poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py + + # Run profiler on head branch + - name: Run profiler on head branch + env: + HEAD_SHA: ${{ env.HEAD_SHA }} + run: | + echo "Profiling the head branch for code_graph_pipeline.py" + echo "Checking out head SHA: $HEAD_SHA" + git checkout $HEAD_SHA + echo "This is the working directory: $PWD" + # Ensure the script is executable + chmod +x cognee/api/v1/cognify/code_graph_pipeline.py + # Run Scalene + poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py + + # Compare profiling results + - name: Compare profiling results + run: | + python -c ' + import json + try: + with open("base_results.json") as f: + base = json.load(f) + with open("head_results.json") as f: + head = json.load(f) + cpu_diff = head.get("total_cpu_samples_python", 0) - base.get("total_cpu_samples_python", 0) + memory_diff = head.get("malloc_samples", 0) - base.get("malloc_samples", 0) + results = [ + f"CPU Usage Difference: {cpu_diff}", + f"Memory Usage Difference: {memory_diff} bytes" + ] + with open("profiling_diff.txt", "w") as f: + f.write("\\n".join(results) + "\\n") + print("\\n".join(results)) # Print results to terminal + except Exception as e: + error_message = f"Error comparing profiling results: {e}" + with open("profiling_diff.txt", "w") as f: + f.write(error_message + "\\n") + print(error_message) # Print error to terminal + ' + + - name: Upload profiling diff artifact + uses: actions/upload-artifact@v3 + with: + name: profiling-diff + path: profiling_diff.txt + + # Post results to the pull request +# - name: Post profiling results to PR +# uses: actions/github-script@v6 +# with: +# script: | +# const fs = require('fs'); +# const diff = fs.readFileSync('profiling_diff.txt', 'utf-8'); +# github.rest.issues.createComment({ +# issue_number: context.issue.number, +# owner: context.repo.owner, +# repo: context.repo.repo, +# body: `### Profiling Results for code_graph_pipeline.py\n\`\`\`\n${diff || 'No differences found.'}\n\`\`\`` +# }); diff --git a/.github/workflows/py_lint.yml b/.github/workflows/py_lint.yml index 07b6e8803d..713b236243 100644 --- a/.github/workflows/py_lint.yml +++ b/.github/workflows/py_lint.yml @@ -13,14 +13,8 @@ concurrency: cancel-in-progress: true jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml - run_lint: name: lint - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' strategy: fail-fast: true matrix: diff --git a/.github/workflows/reusable_notebook.yml b/.github/workflows/reusable_notebook.yml index e4bcfc14f3..0c0b63ec45 100644 --- a/.github/workflows/reusable_notebook.yml +++ b/.github/workflows/reusable_notebook.yml @@ -19,14 +19,9 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_notebook_test: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest defaults: run: diff --git a/.github/workflows/reusable_python_example.yml b/.github/workflows/reusable_python_example.yml index 5a8e47e646..03f928656f 100644 --- a/.github/workflows/reusable_python_example.yml +++ b/.github/workflows/reusable_python_example.yml @@ -19,14 +19,9 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_notebook_test: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest defaults: run: diff --git a/.github/workflows/test_milvus.yml b/.github/workflows/test_milvus.yml index d7f401632d..5cad723786 100644 --- a/.github/workflows/test_milvus.yml +++ b/.github/workflows/test_milvus.yml @@ -14,14 +14,9 @@ env: ENV: 'dev' jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_milvus: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/test_neo4j.yml b/.github/workflows/test_neo4j.yml index 934ac653bb..3f3a35e4f5 100644 --- a/.github/workflows/test_neo4j.yml +++ b/.github/workflows/test_neo4j.yml @@ -13,14 +13,8 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml - run_neo4j_integration_test: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest defaults: diff --git a/.github/workflows/test_pgvector.yml b/.github/workflows/test_pgvector.yml index bf4db6f9aa..a162d2cb41 100644 --- a/.github/workflows/test_pgvector.yml +++ b/.github/workflows/test_pgvector.yml @@ -14,14 +14,9 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_pgvector_integration_test: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest defaults: run: diff --git a/.github/workflows/test_python_3_10.yml b/.github/workflows/test_python_3_10.yml index 30d2663796..39eb4e57a9 100644 --- a/.github/workflows/test_python_3_10.yml +++ b/.github/workflows/test_python_3_10.yml @@ -14,14 +14,9 @@ env: ENV: 'dev' jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_common: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/test_python_3_11.yml b/.github/workflows/test_python_3_11.yml index 1bf8b50f0f..2dd704eb92 100644 --- a/.github/workflows/test_python_3_11.yml +++ b/.github/workflows/test_python_3_11.yml @@ -14,14 +14,9 @@ env: ENV: 'dev' jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_common: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/test_python_3_9.yml b/.github/workflows/test_python_3_9.yml index cac4d0cd5a..99c2b9a7a8 100644 --- a/.github/workflows/test_python_3_9.yml +++ b/.github/workflows/test_python_3_9.yml @@ -14,14 +14,9 @@ env: ENV: 'dev' jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_common: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/test_qdrant.yml b/.github/workflows/test_qdrant.yml index 4eaf035048..f0a2e3d3fa 100644 --- a/.github/workflows/test_qdrant.yml +++ b/.github/workflows/test_qdrant.yml @@ -14,14 +14,9 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_qdrant_integration_test: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest defaults: diff --git a/.github/workflows/test_weaviate.yml b/.github/workflows/test_weaviate.yml index 711bd392f5..b8eb72383f 100644 --- a/.github/workflows/test_weaviate.yml +++ b/.github/workflows/test_weaviate.yml @@ -14,14 +14,9 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml run_weaviate_integration_test: name: test - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} runs-on: ubuntu-latest defaults: