Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 47 additions & 18 deletions src/sentry/api/endpoints/artifact_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
ArtifactBundle,
DebugIdArtifactBundle,
Distribution,
File,
Project,
ProjectArtifactBundle,
Release,
Expand All @@ -45,23 +44,43 @@
class ProjectArtifactLookupEndpoint(ProjectEndpoint):
permission_classes = (ProjectReleasePermission,)

def download_file(self, file_id, project: Project):
def download_file(self, download_id, project: Project):
ty, ty_id = download_id.split("/")

rate_limited = ratelimits.is_limited(
project=project,
key=f"rl:ArtifactLookupEndpoint:download:{file_id}:{project.id}",
key=f"rl:ArtifactLookupEndpoint:download:{download_id}:{project.id}",
limit=10,
)
if rate_limited:
logger.info(
"notification.rate_limited",
extra={"project_id": project.id, "file_id": file_id},
extra={"project_id": project.id, "file_id": download_id},
)
return HttpResponse({"Too many download requests"}, status=429)

file = File.objects.filter(id=file_id).first()
file = None
if ty == "artifact_bundle":
file = (
ArtifactBundle.objects.filter(
id=ty_id,
projectartifactbundle__project_id=project.id,
)
.select_related("file")
.first()
)
elif ty == "release_file":
# NOTE: `ReleaseFile` does have a `project_id`, but that seems to
# be always empty, so using the `organization_id` instead.
file = (
ReleaseFile.objects.filter(id=ty_id, organization_id=project.organization.id)
.select_related("file")
.first()
)

if file is None:
raise Http404
file = file.file

try:
fp = file.getfile()
Expand Down Expand Up @@ -93,9 +112,9 @@ def get(self, request: Request, project: Project) -> Response:

:auth: required
"""
if request.GET.get("download") is not None:
if (download_id := request.GET.get("download")) is not None:
if has_download_permission(request, project):
return self.download_file(request.GET.get("download"), project)
return self.download_file(download_id, project)
else:
return Response(status=403)

Expand All @@ -114,7 +133,7 @@ def get(self, request: Request, project: Project) -> Response:
def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
for (bundle_id, date_added, file_id) in inner_bundles:
used_artifact_bundles[bundle_id] = date_added
bundle_file_ids.add(file_id)
bundle_file_ids.add(("artifact_bundle", bundle_id, file_id))

if debug_id:
bundles = get_artifact_bundles_containing_debug_id(debug_id, project)
Expand All @@ -132,7 +151,8 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):

release, dist = try_resolve_release_dist(project, release_name, dist_name)
if release:
bundle_file_ids |= get_legacy_release_bundles(release, dist)
for (releasefile_id, file_id) in get_legacy_release_bundles(release, dist):
bundle_file_ids.add(("release_file", releasefile_id, file_id))
individual_files = get_legacy_releasefile_by_file_url(release, dist, url)

if options.get("sourcemaps.artifact-bundles.enable-renewal") == 1.0:
Expand All @@ -144,12 +164,16 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
url_constructor = UrlConstructor(request, project)

found_artifacts = []
for file_id in bundle_file_ids:
# NOTE: the reason we use the `file_id` as the `id` we return is because
# downstream symbolicator relies on that for its internal caching.
# We do not want to hard-refresh those caches quite yet, and the `id`
# should also be as unique as possible, which the `file_id` is.
for (ty, ty_id, file_id) in bundle_file_ids:
found_artifacts.append(
{
"id": str(file_id),
"type": "bundle",
"url": url_constructor.url_for_file_id(file_id),
"url": url_constructor.url_for_file_id(ty, ty_id),
}
)

Expand All @@ -158,7 +182,7 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
{
"id": str(release_file.file.id),
"type": "file",
"url": url_constructor.url_for_file_id(release_file.file.id),
"url": url_constructor.url_for_file_id("release_file", release_file.id),
# The `name` is the url/abs_path of the file,
# as in: `"~/path/to/file.min.js"`.
"abs_path": release_file.name,
Expand All @@ -167,6 +191,9 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
}
)

# make sure we have a stable sort order for tests
found_artifacts.sort(key=lambda x: int(x["id"]))

# NOTE: We do not paginate this response, as we have very tight limits on all the individual queries.
return Response(serialize(found_artifacts, request.user))

Expand Down Expand Up @@ -259,10 +286,11 @@ def try_resolve_release_dist(
return release, dist


def get_legacy_release_bundles(release: Release, dist: Optional[Distribution]):
def get_legacy_release_bundles(
release: Release, dist: Optional[Distribution]
) -> Set[Tuple[int, int]]:
return set(
ReleaseFile.objects.select_related("file")
.filter(
ReleaseFile.objects.filter(
release_id=release.id,
dist_id=dist.id if dist else None,
# a `ReleaseFile` with `0` artifacts represents a release archive,
Expand All @@ -271,7 +299,8 @@ def get_legacy_release_bundles(release: Release, dist: Optional[Distribution]):
# similarly the special `type` is also used for release archives.
file__type=RELEASE_BUNDLE_TYPE,
)
.values_list("file_id", flat=True)
.select_related("file")
.values_list("id", "file_id")
# TODO: this `order_by` might be incredibly slow
# we want to have a hard limit on the returned bundles here. and we would
# want to pick the most recently uploaded ones. that should mostly be
Expand Down Expand Up @@ -304,11 +333,11 @@ def __init__(self, request: Request, project: Project):
else:
self.base_url = request.build_absolute_uri(request.path)

def url_for_file_id(self, file_id: int) -> str:
def url_for_file_id(self, ty: str, file_id: int) -> str:
# NOTE: Returning a self-route that requires authentication (via Bearer token)
# is not really forward compatible with a pre-signed URL that does not
# require any authentication or headers whatsoever.
# This also requires a workaround in Symbolicator, as its generic http
# downloader blocks "internal" IPs, whereas the internal Sentry downloader
# is explicitly exempt.
return f"{self.base_url}?download={file_id}"
return f"{self.base_url}?download={ty}/{file_id}"
6 changes: 4 additions & 2 deletions src/sentry/api/endpoints/debug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ def download(self, debug_file_id, project):
"notification.rate_limited",
extra={"project_id": project.id, "project_debug_file_id": debug_file_id},
)
return HttpResponse({"Too many download requests"}, status=403)
return HttpResponse({"Too many download requests"}, status=429)

debug_file = ProjectDebugFile.objects.filter(id=debug_file_id).first()
debug_file = ProjectDebugFile.objects.filter(
id=debug_file_id, project_id=project.id
).first()

if debug_file is None:
raise Http404
Expand Down
Loading