Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions src/sentry/integrations/source_code_management/repo_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sentry.integrations.services.integration import RpcOrganizationIntegration
from sentry.issues.auto_source_code_config.utils import get_supported_extensions
from sentry.shared_integrations.exceptions import ApiError, IntegrationError
from sentry.utils import metrics
from sentry.utils.cache import cache

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -63,11 +64,13 @@ def integration_name(self) -> str:

def get_trees_for_org(self) -> dict[str, RepoTree]:
trees = {}
repositories = self._populate_repositories()
with metrics.timer("integrations.source_code_management.populate_repositories.duration"):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the root of all slowdowns. I want to verify it.

repositories = self._populate_repositories()
if not repositories:
logger.warning("Fetching repositories returned an empty list.")
else:
trees = self._populate_trees(repositories)
with metrics.timer("integrations.source_code_management.populate_trees.duration"):
trees = self._populate_trees(repositories)

return trees

Expand All @@ -80,7 +83,9 @@ def _populate_repositories(self) -> list[dict[str, str]]:
)
repositories: list[dict[str, str]] = cache.get(cache_key, [])

use_cache = True
if not repositories:
use_cache = False
repositories = [
# Do not use RepoAndBranch so it stores in the cache as a simple dict
{
Expand All @@ -91,6 +96,11 @@ def _populate_repositories(self) -> list[dict[str, str]]:
if not repo_info.get("archived")
]

metrics.incr(
"integrations.source_code_management.populate_repositories",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to calculate how often the cache is hit.

tags={"cached": use_cache, "integration": self.integration_name},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cardinality of self.integration_name is pretty low, right? It'll just be the SCM?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct

)

if repositories:
cache.set(cache_key, repositories, self.CACHE_SECONDS)

Expand All @@ -115,6 +125,11 @@ def _populate_trees(self, repositories: Sequence[dict[str, str]]) -> dict[str, R
"Loading trees from cache. Execution will continue. Check logs.", exc_info=True
)

metrics.incr(
"integrations.source_code_management.populate_trees",
tags={"cached": use_cache, "integration": self.integration_name},
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

)

for index, repo_info in enumerate(repositories):
repo_full_name = repo_info["full_name"]
extra = {"repo_full_name": repo_full_name}
Expand Down
25 changes: 15 additions & 10 deletions src/sentry/issues/auto_source_code_config/code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.models.repository import Repository
from sentry.utils import metrics
from sentry.utils.event_frames import EventFrame, try_munge_frame_path

from .constants import METRIC_PREFIX
from .integration_utils import InstallationNotFoundError, get_installation
from .utils import PlatformConfig

Expand Down Expand Up @@ -172,16 +174,19 @@ def generate_code_mappings(
self.code_mappings = {}
self.platform = platform

buckets: dict[str, list[FrameInfo]] = self._stacktrace_buckets(frames)

# We reprocess stackframes until we are told that no code mappings were produced
# This is order to reprocess past stackframes in light of newly discovered code mappings
# This allows for idempotency since the order of the stackframes will not matter
# This has no performance issue because stackframes that match an existing code mapping
# will be skipped
while True:
if not self._process_stackframes(buckets):
break
with metrics.timer(
f"{METRIC_PREFIX}.generate_code_mappings.duration", tags={"platform": platform}
):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change in this section can be more easily viewed by hiding the white spaces:
image

buckets: dict[str, list[FrameInfo]] = self._stacktrace_buckets(frames)

# We reprocess stackframes until we are told that no code mappings were produced
# This is order to reprocess past stackframes in light of newly discovered code mappings
# This allows for idempotency since the order of the stackframes will not matter
# This has no performance issue because stackframes that match an existing code mapping
# will be skipped
while True:
if not self._process_stackframes(buckets):
break

return list(self.code_mappings.values())

Expand Down
101 changes: 53 additions & 48 deletions src/sentry/issues/auto_source_code_config/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,58 +176,63 @@ def create_repos_and_code_mappings(
raise InstallationNotFoundError

organization_id = organization_integration.organization_id
for code_mapping in code_mappings:
repository = (
Repository.objects.filter(name=code_mapping.repo.name, organization_id=organization_id)
.order_by("-date_added")
.first()
)

if not repository:
if not dry_run:
repository = Repository.objects.create(
name=code_mapping.repo.name,
organization_id=organization_id,
integration_id=organization_integration.integration_id,
with metrics.timer(
f"{METRIC_PREFIX}.create_configurations.duration", tags={"platform": platform}
):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hiding the white spaces will show that only a line is changed into a multi-line.
image

for code_mapping in code_mappings:
repository = (
Repository.objects.filter(
name=code_mapping.repo.name, organization_id=organization_id
)
metrics.incr(
key=f"{METRIC_PREFIX}.repository.created",
tags={"platform": platform, "dry_run": dry_run},
sample_rate=1.0,
.order_by("-date_added")
.first()
)

extra = {
"project_id": project.id,
"stack_root": code_mapping.stacktrace_root,
"repository_name": code_mapping.repo.name,
}
# The project and stack_root are unique together
existing_code_mappings = RepositoryProjectPathConfig.objects.filter(
project=project, stack_root=code_mapping.stacktrace_root
)
if existing_code_mappings.exists():
logger.warning("Investigate.", extra=extra)
continue

if not dry_run:
if repository is None: # This is mostly to appease the type checker
if not repository:
if not dry_run:
repository = Repository.objects.create(
name=code_mapping.repo.name,
organization_id=organization_id,
integration_id=organization_integration.integration_id,
)
metrics.incr(
key=f"{METRIC_PREFIX}.repository.created",
tags={"platform": platform, "dry_run": dry_run},
sample_rate=1.0,
)

extra = {
"project_id": project.id,
"stack_root": code_mapping.stacktrace_root,
"repository_name": code_mapping.repo.name,
}
# The project and stack_root are unique together
existing_code_mappings = RepositoryProjectPathConfig.objects.filter(
project=project, stack_root=code_mapping.stacktrace_root
)
if existing_code_mappings.exists():
logger.warning("Investigate.", extra=extra)
continue

RepositoryProjectPathConfig.objects.create(
project=project,
stack_root=code_mapping.stacktrace_root,
repository=repository,
organization_integration_id=organization_integration.id,
integration_id=organization_integration.integration_id,
organization_id=organization_integration.organization_id,
source_root=code_mapping.source_path,
default_branch=code_mapping.repo.branch,
automatically_generated=True,
)
if not dry_run:
if repository is None: # This is mostly to appease the type checker
logger.warning("Investigate.", extra=extra)
continue

RepositoryProjectPathConfig.objects.create(
project=project,
stack_root=code_mapping.stacktrace_root,
repository=repository,
organization_integration_id=organization_integration.id,
integration_id=organization_integration.integration_id,
organization_id=organization_integration.organization_id,
source_root=code_mapping.source_path,
default_branch=code_mapping.repo.branch,
automatically_generated=True,
)

metrics.incr(
key=f"{METRIC_PREFIX}.code_mapping.created",
tags={"platform": platform, "dry_run": dry_run},
sample_rate=1.0,
)
metrics.incr(
key=f"{METRIC_PREFIX}.code_mapping.created",
tags={"platform": platform, "dry_run": dry_run},
sample_rate=1.0,
)
Loading