From a61ad40dc9caaf63e05558754dfeef51d593b70e Mon Sep 17 00:00:00 2001 From: Cathy Teng Date: Mon, 14 Jul 2025 15:22:43 -0700 Subject: [PATCH 1/3] add bulk update function to repository_service --- src/sentry/integrations/services/repository/impl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sentry/integrations/services/repository/impl.py b/src/sentry/integrations/services/repository/impl.py index 20045ee07f84b4..ea8ecbe6223882 100644 --- a/src/sentry/integrations/services/repository/impl.py +++ b/src/sentry/integrations/services/repository/impl.py @@ -106,7 +106,6 @@ def update_repositories(self, *, organization_id: int, updates: list[RpcReposito fields_to_update = set(list(update_mapping.values())[0].keys()) with transaction.atomic(router.db_for_write(Repository)): - repositories = Repository.objects.filter( organization_id=organization_id, id__in=update_mapping.keys() ) From 97d0175a604803daabbaeacddaa9df05a03403ba Mon Sep 17 00:00:00 2001 From: Cathy Teng Date: Mon, 14 Jul 2025 15:26:04 -0700 Subject: [PATCH 2/3] refactor link_all_repos to use bulk create_repositories --- .../integrations/bitbucket/repository.py | 7 +- .../bitbucket_server/repository.py | 10 +- src/sentry/integrations/github/repository.py | 5 +- .../github/tasks/link_all_repos.py | 51 +++---- .../github_enterprise/repository.py | 7 +- src/sentry/integrations/gitlab/repository.py | 7 +- src/sentry/integrations/vsts/repository.py | 5 +- .../providers/integration_repository.py | 129 +++++++++++++++--- .../github/tasks/test_link_all_repos.py | 37 ++--- 9 files changed, 175 insertions(+), 83 deletions(-) diff --git a/src/sentry/integrations/bitbucket/repository.py b/src/sentry/integrations/bitbucket/repository.py index e676e183ecedb9..78aa2a539075db 100644 --- a/src/sentry/integrations/bitbucket/repository.py +++ b/src/sentry/integrations/bitbucket/repository.py @@ -1,9 +1,12 @@ +from typing import Any + from sentry.integrations.types import IntegrationProviderSlug from sentry.locks import locks from sentry.models.apitoken import generate_token from sentry.models.options.organization_option import OrganizationOption from sentry.organizations.services.organization.model import RpcOrganization from sentry.plugins.providers import IntegrationRepositoryProvider +from sentry.plugins.providers.integration_repository import RepositoryConfig from sentry.shared_integrations.exceptions import ApiError from sentry.utils.email import parse_email, parse_user_name from sentry.utils.http import absolute_uri @@ -43,7 +46,9 @@ def get_webhook_secret(self, organization): ) return secret - def build_repository_config(self, organization: RpcOrganization, data): + def build_repository_config( + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: installation = self.get_installation(data.get("installation"), organization.id) client = installation.get_client() try: diff --git a/src/sentry/integrations/bitbucket_server/repository.py b/src/sentry/integrations/bitbucket_server/repository.py index 4c947456079757..6b3bab8c6c463d 100644 --- a/src/sentry/integrations/bitbucket_server/repository.py +++ b/src/sentry/integrations/bitbucket_server/repository.py @@ -1,11 +1,15 @@ from datetime import datetime, timezone +from typing import Any from django.core.cache import cache from django.urls import reverse from sentry.integrations.types import IntegrationProviderSlug from sentry.organizations.services.organization.model import RpcOrganization -from sentry.plugins.providers.integration_repository import IntegrationRepositoryProvider +from sentry.plugins.providers.integration_repository import ( + IntegrationRepositoryProvider, + RepositoryConfig, +) from sentry.shared_integrations.exceptions import ApiError from sentry.utils.hashlib import md5_text from sentry.utils.http import absolute_uri @@ -30,7 +34,9 @@ def get_repository_data(self, organization, config): config["repo"] = repo["name"] return config - def build_repository_config(self, organization: RpcOrganization, data): + def build_repository_config( + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: installation = self.get_installation(data.get("installation"), organization.id) client = installation.get_client() diff --git a/src/sentry/integrations/github/repository.py b/src/sentry/integrations/github/repository.py index 37aa178a9d9041..c490512d7f3dc4 100644 --- a/src/sentry/integrations/github/repository.py +++ b/src/sentry/integrations/github/repository.py @@ -12,6 +12,7 @@ from sentry.models.repository import Repository from sentry.organizations.services.organization.model import RpcOrganization from sentry.plugins.providers import IntegrationRepositoryProvider +from sentry.plugins.providers.integration_repository import RepositoryConfig from sentry.shared_integrations.exceptions import ApiError, IntegrationError WEBHOOK_EVENTS = ["push", "pull_request"] @@ -51,8 +52,8 @@ def get_repository_data( return config def build_repository_config( - self, organization: RpcOrganization, data: Mapping[str, Any] - ) -> Mapping[str, Any]: + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: return { "name": data["identifier"], "external_id": data["external_id"], diff --git a/src/sentry/integrations/github/tasks/link_all_repos.py b/src/sentry/integrations/github/tasks/link_all_repos.py index 3df12a32937b15..fb129fc9a06404 100644 --- a/src/sentry/integrations/github/tasks/link_all_repos.py +++ b/src/sentry/integrations/github/tasks/link_all_repos.py @@ -1,4 +1,5 @@ import logging +from typing import Any from sentry.constants import ObjectStatus from sentry.integrations.services.integration import integration_service @@ -18,7 +19,6 @@ from sentry.taskworker.config import TaskworkerConfig from sentry.taskworker.namespaces import integrations_control_tasks from sentry.taskworker.retry import Retry -from sentry.utils import metrics logger = logging.getLogger(__name__) @@ -58,27 +58,11 @@ def link_all_repos( integration_id=integration_id, status=ObjectStatus.ACTIVE ) if not integration: - # TODO: Remove this logger in favor of context manager - logger.error( - "%s.link_all_repos.integration_missing", - integration_key, - extra={"organization_id": organization_id}, - ) - metrics.incr("github.link_all_repos.error", tags={"type": "missing_integration"}) lifecycle.record_failure(str(LinkAllReposHaltReason.MISSING_INTEGRATION)) return rpc_org = organization_service.get(id=organization_id) if rpc_org is None: - logger.error( - "%s.link_all_repos.organization_missing", - integration_key, - extra={"organization_id": organization_id}, - ) - metrics.incr( - f"{integration_key}.link_all_repos.error", - tags={"type": "missing_organization"}, - ) lifecycle.record_failure(str(LinkAllReposHaltReason.MISSING_ORGANIZATION)) return @@ -93,26 +77,31 @@ def link_all_repos( lifecycle.record_halt(str(LinkAllReposHaltReason.RATE_LIMITED)) return - metrics.incr(f"{integration_key}.link_all_repos.api_error") raise integration_repo_provider = get_integration_repository_provider(integration) - # If we successfully create any repositories, we'll set this to True - success = False - + repo_configs: list[dict[str, Any]] = [] + missing_repos = [] for repo in repositories: try: - config = get_repo_config(repo, integration_id) - integration_repo_provider.create_repository( - repo_config=config, organization=rpc_org - ) - success = True + repo_configs.append(get_repo_config(repo, integration_id)) except KeyError: - continue - except RepoExistsError: - metrics.incr("sentry.integration_repo_provider.repo_exists") + missing_repos.append(repo) continue - if not success: - lifecycle.record_halt(str(LinkAllReposHaltReason.REPOSITORY_NOT_CREATED)) + try: + integration_repo_provider.create_repositories( + configs=repo_configs, organization=rpc_org + ) + except RepoExistsError as e: + lifecycle.record_halt( + str(LinkAllReposHaltReason.REPOSITORY_NOT_CREATED), + {"missing_repos": e.repos, "integration_id": integration_id}, + ) + + if missing_repos: + lifecycle.record_halt( + str(LinkAllReposHaltReason.REPOSITORY_NOT_CREATED), + {"missing_repos": missing_repos, "integration_id": integration_id}, + ) diff --git a/src/sentry/integrations/github_enterprise/repository.py b/src/sentry/integrations/github_enterprise/repository.py index 333f8a7a53ab18..5f256206ffd418 100644 --- a/src/sentry/integrations/github_enterprise/repository.py +++ b/src/sentry/integrations/github_enterprise/repository.py @@ -1,7 +1,10 @@ +from typing import Any + from sentry.integrations.github.repository import GitHubRepositoryProvider from sentry.integrations.services.integration import integration_service from sentry.integrations.types import IntegrationProviderSlug from sentry.organizations.services.organization.model import RpcOrganization +from sentry.plugins.providers.integration_repository import RepositoryConfig from sentry.shared_integrations.exceptions import ApiError, IntegrationError WEBHOOK_EVENTS = ["push", "pull_request"] @@ -25,7 +28,9 @@ def _validate_repo(self, client, installation, repo): return repo_data - def build_repository_config(self, organization: RpcOrganization, data): + def build_repository_config( + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: integration = integration_service.get_integration( integration_id=data["integration_id"], provider=self.repo_provider ) diff --git a/src/sentry/integrations/gitlab/repository.py b/src/sentry/integrations/gitlab/repository.py index 2f32b070a7c182..315bc22103a7b1 100644 --- a/src/sentry/integrations/gitlab/repository.py +++ b/src/sentry/integrations/gitlab/repository.py @@ -1,6 +1,9 @@ +from typing import Any + from sentry.integrations.types import IntegrationProviderSlug from sentry.organizations.services.organization.model import RpcOrganization from sentry.plugins.providers import IntegrationRepositoryProvider +from sentry.plugins.providers.integration_repository import RepositoryConfig from sentry.shared_integrations.exceptions import ApiError @@ -31,7 +34,9 @@ def get_repository_data(self, organization, config): ) return config - def build_repository_config(self, organization: RpcOrganization, data): + def build_repository_config( + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: installation = self.get_installation(data.get("installation"), organization.id) client = installation.get_client() diff --git a/src/sentry/integrations/vsts/repository.py b/src/sentry/integrations/vsts/repository.py index b628f66f111f66..f9a9b74007acfa 100644 --- a/src/sentry/integrations/vsts/repository.py +++ b/src/sentry/integrations/vsts/repository.py @@ -9,6 +9,7 @@ from sentry.models.repository import Repository from sentry.organizations.services.organization.model import RpcOrganization from sentry.plugins.providers import IntegrationRepositoryProvider +from sentry.plugins.providers.integration_repository import RepositoryConfig MAX_COMMIT_DATA_REQUESTS = 90 @@ -46,8 +47,8 @@ def get_repository_data( return config def build_repository_config( - self, organization: RpcOrganization, data: Mapping[str, str] - ) -> Mapping[str, Any]: + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: return { "name": data["name"], "external_id": data["external_id"], diff --git a/src/sentry/plugins/providers/integration_repository.py b/src/sentry/plugins/providers/integration_repository.py index f55bb932e670ed..16f8e72c385d6d 100644 --- a/src/sentry/plugins/providers/integration_repository.py +++ b/src/sentry/plugins/providers/integration_repository.py @@ -1,23 +1,20 @@ from __future__ import annotations import logging -from collections.abc import MutableMapping from datetime import timezone -from typing import Any, ClassVar +from typing import Any, ClassVar, TypedDict from dateutil.parser import parse as parse_date -from rest_framework import status from rest_framework.request import Request from rest_framework.response import Response from sentry import analytics -from sentry.api.exceptions import SentryAPIException from sentry.constants import ObjectStatus from sentry.integrations.base import IntegrationInstallation from sentry.integrations.models.integration import Integration from sentry.integrations.services.integration import integration_service from sentry.integrations.services.repository import repository_service -from sentry.integrations.services.repository.model import RpcCreateRepository +from sentry.integrations.services.repository.model import RpcCreateRepository, RpcRepository from sentry.models.repository import Repository from sentry.organizations.services.organization.model import RpcOrganization from sentry.shared_integrations.exceptions import IntegrationError @@ -27,10 +24,22 @@ from sentry.utils import metrics -class RepoExistsError(SentryAPIException): - status_code = status.HTTP_400_BAD_REQUEST - code = "repo_exists" - message = "A repository with that configuration already exists" +class RepositoryConfig(TypedDict): + name: str + external_id: str + url: str + config: dict[str, Any] + integration_id: int + + +class RepoExistsError(Exception): + def __init__(self, repos: list[RepositoryConfig] | None = None): + self.repos = repos + + def __str__(self): + if self.repos: + return f"Repositories already exist: {', '.join(repo['name'] for repo in self.repos)}" + return "Repositories already exist." def get_integration_repository_provider(integration): @@ -83,15 +92,15 @@ def get_installation( def create_repository( self, - repo_config: MutableMapping[str, Any], + repo_config: dict[str, Any], organization: RpcOrganization, ): result = self.build_repository_config(organization=organization, data=repo_config) - integration_id = result.get("integration_id") - external_id = result.get("external_id") - name = result.get("name") - url = result.get("url") + integration_id = result["integration_id"] + external_id = result["external_id"] + name = result["name"] + url = result["url"] # first check if there is an existing hidden repository for the organization and external id repositories = repository_service.get_repositories( @@ -177,10 +186,96 @@ def create_repository( update=repo, ) - raise RepoExistsError + raise RepoExistsError(repos=[result]) return result, repo + def _update_repository(self, repo: RpcRepository, config: RepositoryConfig): + repo.status = ObjectStatus.ACTIVE + + for field_name, field_value in config.items(): + setattr(repo, field_name, field_value) + return repo + + def _update_repositories( + self, + repositories: list[RpcRepository], + external_id_to_repo_config: dict[str, RepositoryConfig], + ) -> list[RpcRepository]: + repos_to_update: list[RpcRepository] = [] + for repo in repositories: + external_id = repo.external_id + if external_id and (repo_config := external_id_to_repo_config.get(external_id)): + repos_to_update.append(self._update_repository(repo, repo_config)) + external_id_to_repo_config.pop(external_id, None) + return repos_to_update + + def create_repositories( + self, + configs: list[dict[str, Any]], + organization: RpcOrganization, + ): + external_id_to_repo_config: dict[str, RepositoryConfig] = {} + for config in configs: + result = self.build_repository_config(organization=organization, data=config) + external_id_to_repo_config[result["external_id"]] = result + + repos_to_update: list[RpcRepository] = [] + + hidden_repos = repository_service.get_repositories( + organization_id=organization.id, + status=ObjectStatus.HIDDEN, + ) + repos_to_update.extend(self._update_repositories(hidden_repos, external_id_to_repo_config)) + + # then check if there are repositories without an integration that matches + repositories = repository_service.get_repositories( + organization_id=organization.id, + has_integration=False, + ) + repos_to_update.extend(self._update_repositories(repositories, external_id_to_repo_config)) + + # create remaining repositories + missing_repos: list[RepositoryConfig] = [] + for external_id, repo_config in external_id_to_repo_config.items(): + integration_id = repo_config["integration_id"] + create_repository = RpcCreateRepository.parse_obj( + {**repo_config, "provider": self.id, "status": ObjectStatus.ACTIVE} + ) + new_repository = repository_service.create_repository( + organization_id=organization.id, create=create_repository + ) + if new_repository is not None: + continue + + missing_repos.append(repo_config) + # Try to delete webhook we just created + try: + self.on_delete_repository( + Repository(organization_id=organization.id, **repo_config) + ) + except IntegrationError: + pass + + # if possible update the repo with matching integration + repositories = repository_service.get_repositories( + organization_id=organization.id, + integration_id=integration_id, + external_id=external_id, + ) + # We anticipate to only update one repository, but we update any duplicates as well. + for repo in repositories: + repos_to_update.append(self._update_repository(repo, repo_config)) + + if repos_to_update: + repository_service.update_repositories( + organization_id=organization.id, + updates=repos_to_update, + ) + + if missing_repos: + raise RepoExistsError(repos=missing_repos) + def dispatch(self, request: Request, organization, **kwargs): try: config = self.get_repository_data(organization, request.data) @@ -242,7 +337,9 @@ def get_repository_data(self, organization, config): """ return config - def build_repository_config(self, organization: RpcOrganization, data): + def build_repository_config( + self, organization: RpcOrganization, data: dict[str, Any] + ) -> RepositoryConfig: """ Builds final dict containing all necessary data to create the repository diff --git a/tests/sentry/integrations/github/tasks/test_link_all_repos.py b/tests/sentry/integrations/github/tasks/test_link_all_repos.py index 5a9bb2abb5a17e..96cd90ec8e01dc 100644 --- a/tests/sentry/integrations/github/tasks/test_link_all_repos.py +++ b/tests/sentry/integrations/github/tasks/test_link_all_repos.py @@ -45,8 +45,7 @@ def _add_responses(self): ) @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - @patch("sentry.integrations.github.tasks.link_all_repos.metrics") - def test_link_all_repos_inactive_integration(self, mock_metrics, mock_record, _): + def test_link_all_repos_inactive_integration(self, mock_record, _): self.integration.update(status=ObjectStatus.DISABLED) link_all_repos( @@ -55,10 +54,6 @@ def test_link_all_repos_inactive_integration(self, mock_metrics, mock_record, _) organization_id=self.organization.id, ) - mock_metrics.incr.assert_called_with( - "github.link_all_repos.error", tags={"type": "missing_integration"} - ) - assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) assert_failure_metric(mock_record, LinkAllReposHaltReason.MISSING_INTEGRATION.value) @@ -123,7 +118,10 @@ def test_link_all_repos_api_response_keyerror(self, mock_record, _): assert repos[0].name == "getsentry/snuba" - assert_slo_metric(mock_record, EventLifecycleOutcome.SUCCESS) + assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) + assert_halt_metric( + mock_record, LinkAllReposHaltReason.REPOSITORY_NOT_CREATED.value + ) # should be halt because it didn't complete successfully @responses.activate @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") @@ -157,39 +155,30 @@ def test_link_all_repos_api_response_keyerror_single_repo(self, mock_record, _): assert_halt_metric(mock_record, LinkAllReposHaltReason.REPOSITORY_NOT_CREATED.value) @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - @patch("sentry.integrations.github.tasks.link_all_repos.metrics") - def test_link_all_repos_missing_integration(self, mock_metrics, mock_record, _): + def test_link_all_repos_missing_integration(self, mock_record, _): link_all_repos( integration_key=self.key, integration_id=0, organization_id=self.organization.id, ) - mock_metrics.incr.assert_called_with( - "github.link_all_repos.error", tags={"type": "missing_integration"} - ) assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) assert_failure_metric(mock_record, LinkAllReposHaltReason.MISSING_INTEGRATION.value) @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - @patch("sentry.integrations.github.tasks.link_all_repos.metrics") - def test_link_all_repos_missing_organization(self, mock_metrics, mock_record, _): + def test_link_all_repos_missing_organization(self, mock_record, _): link_all_repos( integration_key=self.key, integration_id=self.integration.id, organization_id=0, ) - mock_metrics.incr.assert_called_with( - "github.link_all_repos.error", tags={"type": "missing_organization"} - ) assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) assert_failure_metric(mock_record, LinkAllReposHaltReason.MISSING_ORGANIZATION.value) @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - @patch("sentry.integrations.github.tasks.link_all_repos.metrics") @responses.activate - def test_link_all_repos_api_error(self, mock_metrics, mock_record, _): + def test_link_all_repos_api_error(self, mock_record, _): responses.add( responses.GET, @@ -203,14 +192,12 @@ def test_link_all_repos_api_error(self, mock_metrics, mock_record, _): integration_id=self.integration.id, organization_id=self.organization.id, ) - mock_metrics.incr.assert_called_with("github.link_all_repos.api_error") assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - @patch("sentry.integrations.github.integration.metrics") @responses.activate - def test_link_all_repos_api_error_rate_limited(self, mock_metrics, mock_record, _): + def test_link_all_repos_api_error_rate_limited(self, mock_record, _): responses.add( responses.GET, @@ -227,16 +214,14 @@ def test_link_all_repos_api_error_rate_limited(self, mock_metrics, mock_record, integration_id=self.integration.id, organization_id=self.organization.id, ) - mock_metrics.incr.assert_called_with("github.link_all_repos.rate_limited_error") assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) assert_halt_metric(mock_record, LinkAllReposHaltReason.RATE_LIMITED.value) @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") @patch("sentry.models.Repository.objects.create") - @patch("sentry.integrations.github.tasks.link_all_repos.metrics") @responses.activate - def test_link_all_repos_repo_creation_error(self, mock_metrics, mock_repo, mock_record, _): + def test_link_all_repos_repo_creation_error(self, mock_repo, mock_record, _): mock_repo.side_effect = IntegrityError self._add_responses() @@ -247,8 +232,6 @@ def test_link_all_repos_repo_creation_error(self, mock_metrics, mock_repo, mock_ organization_id=self.organization.id, ) - mock_metrics.incr.assert_called_with("sentry.integration_repo_provider.repo_exists") - assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) assert_halt_metric(mock_record, LinkAllReposHaltReason.REPOSITORY_NOT_CREATED.value) From c06c9c7d21dbc1c2c082bb311eed27434c4b6d86 Mon Sep 17 00:00:00 2001 From: Cathy Teng Date: Mon, 14 Jul 2025 16:42:54 -0700 Subject: [PATCH 3/3] put back APIException in RepoExistsError --- .../providers/integration_repository.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/sentry/plugins/providers/integration_repository.py b/src/sentry/plugins/providers/integration_repository.py index 16f8e72c385d6d..cab7abcb016a65 100644 --- a/src/sentry/plugins/providers/integration_repository.py +++ b/src/sentry/plugins/providers/integration_repository.py @@ -5,10 +5,12 @@ from typing import Any, ClassVar, TypedDict from dateutil.parser import parse as parse_date +from rest_framework import status from rest_framework.request import Request from rest_framework.response import Response from sentry import analytics +from sentry.api.exceptions import SentryAPIException from sentry.constants import ObjectStatus from sentry.integrations.base import IntegrationInstallation from sentry.integrations.models.integration import Integration @@ -32,8 +34,20 @@ class RepositoryConfig(TypedDict): integration_id: int -class RepoExistsError(Exception): - def __init__(self, repos: list[RepositoryConfig] | None = None): +class RepoExistsError(SentryAPIException): + status_code = status.HTTP_400_BAD_REQUEST + code = "repo_exists" + message = "A repository with that configuration already exists" + + def __init__( + self, + code=None, + message=None, + detail=None, + repos: list[RepositoryConfig] | None = None, + **kwargs, + ): + super().__init__(code=code, message=message, detail=detail, **kwargs) self.repos = repos def __str__(self):