Skip to content

Commit

Permalink
Merge pull request #89 from bento-platform/fix/content-length-downloa…
Browse files Browse the repository at this point in the history
…d-res

fix!: correct range-not-satisfiable errs + content length for partial res
  • Loading branch information
davidlougheed authored Jan 3, 2024
2 parents 010c73c + 77918fb commit b1c0793
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 21 deletions.
9 changes: 8 additions & 1 deletion chord_drs/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from flask import Flask
from flask_cors import CORS
from flask_migrate import Migrate
from werkzeug.exceptions import BadRequest, Forbidden, NotFound
from werkzeug.exceptions import BadRequest, Forbidden, NotFound, RequestedRangeNotSatisfiable

from .authz import authz_middleware
from .backend import close_backend
Expand Down Expand Up @@ -64,6 +64,13 @@
drs_compat=True,
authz=authz_middleware,
)(e))
application.register_error_handler(
RequestedRangeNotSatisfiable,
flask_errors.flask_error_wrap(
flask_errors.flask_range_not_satisfiable_error,
drs_compat=True,
authz=authz_middleware,
))

# Attach the database to the application and run migrations if needed
db.init_app(application)
Expand Down
31 changes: 23 additions & 8 deletions chord_drs/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from sqlalchemy import or_
from urllib.parse import urlparse
from werkzeug.exceptions import BadRequest, Forbidden, NotFound, InternalServerError
from werkzeug.exceptions import BadRequest, Forbidden, NotFound, InternalServerError, RequestedRangeNotSatisfiable

from . import __version__
from .authz import authz_middleware
Expand Down Expand Up @@ -110,6 +110,12 @@ def bad_request_log_mark(err: str) -> BadRequest:
return BadRequest(err)


def range_not_satisfiable_log_mark(description: str, length: int) -> RequestedRangeNotSatisfiable:
authz_middleware.mark_authz_done(request)
current_app.logger.error(f"Requested range not satisfiable: {description}; true length: {length}")
return RequestedRangeNotSatisfiable(description=description, length=length)


def get_drs_base_path() -> str:
parsed_service_url = urlparse(current_app.config["SERVICE_BASE_URL"])
return f"{parsed_service_url.netloc}{parsed_service_url.path}"
Expand Down Expand Up @@ -335,6 +341,8 @@ def object_download(object_id: str):
res.headers["Accept-Ranges"] = "bytes"
return res

drs_end_byte = drs_object.size - 1

logger.debug(f"Found Range header: {range_header}")
range_err = f"Malformatted range header: expected bytes=X-Y or bytes=X-, got {range_header}"

Expand All @@ -346,14 +354,20 @@ def object_download(object_id: str):
logger.debug(f"Retrieving byte range {byte_range}")

try:
start = int(byte_range[0])
end = int(byte_range[1]) if byte_range[1] else None
start: int = int(byte_range[0])
end: int = int(byte_range[1]) if byte_range[1] else drs_end_byte
except (IndexError, ValueError):
raise bad_request_log_mark(range_err)

if end is not None and end < start:
raise bad_request_log_mark(
f"Invalid range header: end cannot be less than start (start={start}, end={end})")
if end > drs_end_byte:
raise range_not_satisfiable_log_mark(
f"End cannot be past last byte ({end} > {drs_end_byte})",
drs_object.size)

if end < start:
raise range_not_satisfiable_log_mark(
f"Invalid range header: end cannot be less than start (start={start}, end={end})",
drs_object.size)

def generate_bytes():
with open(drs_object.location, "rb") as fh2:
Expand All @@ -372,12 +386,13 @@ def generate_bytes():
# If we've hit the end of the file and are reading empty byte strings, or we've reached the
# end of our range (inclusive), then escape the loop.
# This is guaranteed to terminate with a finite-sized file.
if len(data) == 0 or (end is not None and byte_offset > end):
if len(data) == 0 or byte_offset > end:
break

# Stream the bytes of the file or file segment from the generator function
r = current_app.response_class(generate_bytes(), status=206, mimetype=MIME_OCTET_STREAM)
r.headers["Content-Range"] = f"bytes {start}-{end or (drs_object.size - 1)}/{drs_object.size}"
r.headers["Content-Length"] = (end + 1 - start) # byte range is inclusive, so need to add one
r.headers["Content-Range"] = f"bytes {start}-{end}/{drs_object.size}"
r.headers["Content-Disposition"] = \
f"attachment; filename*=UTF-8'{urllib.parse.quote(drs_object.name, encoding='utf-8')}'"
return r
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "chord-drs"
version = "0.14.3"
version = "0.15.0"
description = "An implementation of a data repository system (as per GA4GH's specs) for the Bento platform."
authors = ["David Lougheed <david.lougheed@mail.mcgill.ca>"]
license = "LGPL-3.0"
Expand All @@ -14,7 +14,7 @@ include = [
[tool.poetry.dependencies]
python = "^3.10"
boto3 = ">=1.34.6,<1.35"
bento-lib = {version = "^11.1.1", extras = ["flask"]}
bento-lib = {version = "^11.2.0", extras = ["flask"]}
flask = ">=2.2.5,<2.3"
flask-sqlalchemy = ">=3.0.5,<3.1" # 3.1 drops SQLAlchemy 1 support
flask-migrate = ">=4.0.5,<4.1"
Expand Down
12 changes: 6 additions & 6 deletions tests/test_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,14 @@ def _test_object_and_download(client, obj, test_range=False):
body = res.get_data(as_text=False)
assert len(body) == 5

# - bytes 100-19999
# - bytes 100-1999
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-1999"),))
assert res.status_code == 206
body = res.get_data(as_text=False)
assert len(body) == 1900

# Size is 2455, so these'll run off the end and return the whole thing after 100

res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-19999"),))
assert res.status_code == 206
body = res.get_data(as_text=False)
assert len(body) == 2355
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-"),))
assert res.status_code == 206
body = res.get_data(as_text=False)
Expand All @@ -165,9 +161,13 @@ def _test_object_and_download(client, obj, test_range=False):
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bites=0-4"),))
assert res.status_code == 400

# - cannot request more than what is available
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-19999"),))
assert res.status_code == 416

# - reversed interval
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=4-0"),))
assert res.status_code == 400
assert res.status_code == 416


@responses.activate
Expand Down

0 comments on commit b1c0793

Please sign in to comment.