diff --git a/src/dso_api/dynamic_api/views/mvt.py b/src/dso_api/dynamic_api/views/mvt.py index 1a3532950..5d06e0543 100644 --- a/src/dso_api/dynamic_api/views/mvt.py +++ b/src/dso_api/dynamic_api/views/mvt.py @@ -1,16 +1,19 @@ """Mapbox Vector Tiles (MVT) views of geographic datasets.""" - +import itertools import logging -import time +import mercantile +from django.contrib.gis.db.models import functions from django.core.exceptions import FieldDoesNotExist, PermissionDenied -from django.http import Http404 +from django.db import connection +from django.http import Http404, HttpResponse, StreamingHttpResponse from django.urls.base import reverse +from django.views import View from django.views.generic import TemplateView from schematools.contrib.django.models import Dataset from schematools.exceptions import SchemaObjectNotFound from schematools.types import DatasetTableSchema -from vectortiles.postgis.views import MVTView +from vectortiles.postgis.functions import AsMVTGeom, MakeEnvelope from ..datasets import get_active_datasets from ..permissions import CheckPermissionsMixin @@ -93,9 +96,15 @@ def get_context_data(self, **kwargs): return context -class DatasetMVTView(CheckPermissionsMixin, MVTView): +class DatasetMVTView(CheckPermissionsMixin, View): """An MVT view for a single dataset.""" + _CONTENT_TYPE = "application/vnd.mapbox-vector-tile" + _EXTENT = 4096 # Default extent for MVT. + # Name of temporary MVT row that we add to our queryset. + # No field name starting with an underscore ever occurs in our datasets. + _MVT_ROW = "_geom_prepared_for_mvt" + def setup(self, request, *args, **kwargs): super().setup(request, *args, **kwargs) from ..urls import router @@ -108,58 +117,103 @@ def setup(self, request, *args, **kwargs): except KeyError: raise Http404(f"Invalid table: {dataset_name}.{table_name}") from None + schema: DatasetTableSchema = model.table_schema() + try: + self._main_geo = schema.main_geometry_field.python_name + except SchemaObjectNotFound as e: + raise FieldDoesNotExist(f"No field named '{schema.main_geometry}'") from e + self.model = model self.check_permissions(request, [self.model]) def get(self, request, *args, **kwargs): - kwargs.pop("dataset_name") - kwargs.pop("table_name") - self.z = kwargs["z"] - - t0 = time.perf_counter_ns() - result = super().get(request, *args, **kwargs) - logging.info( - "retrieved tile for %s (%d bytes) in %.3fs", - request.path, - len(result.content), - (time.perf_counter_ns() - t0) * 1e-9, - ) - return result + x, y, z = kwargs["x"], kwargs["y"], kwargs["z"] - def get_queryset(self): - return self.model.objects.all() + tile = self._stream_tile(x, y, z) + try: + chunk = next(tile) + except StopIteration: + return HttpResponse(content=b"", content_type=self._CONTENT_TYPE, status=204) + tile = itertools.chain((chunk,), tile) - @property - def vector_tile_fields(self) -> tuple: + return StreamingHttpResponse( + streaming_content=tile, content_type=self._CONTENT_TYPE, status=200 + ) + + def _stream_tile(self, x: int, y: int, z: int): + qs = self.model.objects.all() + bbox = MakeEnvelope(*mercantile.xy_bounds(x, y, z), 3857) + qs = qs.filter( + **{ + f"{self._main_geo}__intersects": bbox, + } + ) + # Add MVT row and restrict to the fields we want in the response. + qs = qs.annotate( + **{self._MVT_ROW: AsMVTGeom(functions.Transform(self._main_geo, 3857), bbox)} + ) + qs = qs.values(self._MVT_ROW, *self._property_fields(z)) + + sql, params = qs.query.sql_with_params() + with connection.cursor() as cursor: + # This hairy query generates the MVT tile using ST_AsMVT, then breaks it up into rows + # that we can stream to the client. We do this because the tile is a potentially very + # large bytea in PostgreSQL, which psycopg would otherwise consume in its entirety + # before passing it on to us. Since psycopg also needs to keep the hex-encoded + # PostgreSQL wire format representation of the tile in memory while decoding it, it + # needs 3*n memory to decode an n-byte tile, and tiles can be up to hundreds of + # megabytes. + # + # To make matters worse, what psycopg returns is a memoryview, and Django accepts + # memoryviews just fine but casts them to bytes objects, meaning the entire tile gets + # copied again. That happens after the hex version has been decoded, but it still + # means a slow client can keep our memory use at 2*n for the duration of the request. + CHUNK_SIZE = 8192 + cursor.execute( # nosec + f""" + WITH mvt AS ( + SELECT ST_AsMVT(_sub.*, %s, %s, %s) FROM ({sql}) as _sub + ) + SELECT * FROM ( + /* This is SQL for range(1, len(x), CHUNK_SIZE), sort of. */ + WITH RECURSIVE chunk(i) AS ( + VALUES (1) + UNION ALL + SELECT chunk.i+{CHUNK_SIZE} FROM chunk, mvt + WHERE i < octet_length(mvt.ST_AsMVT) + ), + sorted AS (SELECT * FROM chunk ORDER BY i) + SELECT substring(mvt.ST_AsMVT FROM sorted.i FOR {CHUNK_SIZE}) FROM mvt, sorted + ) AS chunked_mvt WHERE octet_length(substring) > 0 + """, + params=["default", self._EXTENT, self._MVT_ROW, *params], + ) + for chunk in cursor: + yield chunk[0] + + def _property_fields(self, z) -> tuple: + """Returns fields to include in the tile as MVT properties alongside the geometry.""" # If we are zoomed far out (low z), only fetch the geometry field for a smaller payload. # The cutoff is arbitrary. Play around with # https://www.maptiler.com/google-maps-coordinates-tile-bounds-projection/#14/4.92/52.37 # to get a feel for the MVT zoom levels and how much detail a single tile should contain. - if self.z < 15: + if z < 15: return () - geom_name = self.vector_tile_geom_name user_scopes = self.request.user_scopes return tuple( f.name for f in self.model._meta.get_fields() - if f.name != geom_name and user_scopes.has_field_access(self.model.get_field_schema(f)) + if f.name != self._main_geo + and user_scopes.has_field_access(self.model.get_field_schema(f)) ) - @property - def vector_tile_geom_name(self) -> str: - schema: DatasetTableSchema = self.model.table_schema() - try: - return schema.main_geometry_field.python_name - except SchemaObjectNotFound as e: - raise FieldDoesNotExist(f"No field named '{schema.main_geometry}'") from e - def check_permissions(self, request, models) -> None: """Override CheckPermissionsMixin to add extra checks""" super().check_permissions(request, models) # Check whether the geometry field can be accessed, otherwise reading MVT is pointless. - model_field = self.model._meta.get_field(self.vector_tile_geom_name) + model_field = self.model._meta.get_field(self._main_geo) field_schema = self.model.get_field_schema(model_field) if not self.request.user_scopes.has_field_access(field_schema): raise PermissionDenied() diff --git a/src/tests/test_dynamic_api/test_views_mvt.py b/src/tests/test_dynamic_api/test_views_mvt.py index 22e41492f..ff190d8d7 100644 --- a/src/tests/test_dynamic_api/test_views_mvt.py +++ b/src/tests/test_dynamic_api/test_views_mvt.py @@ -3,6 +3,7 @@ import mapbox_vector_tile import pytest from django.contrib.gis.geos import Point +from django.http.response import HttpResponse, HttpResponseBase, StreamingHttpResponse from django.utils.timezone import make_aware CONTENT_TYPE = "application/vnd.mapbox-vector-tile" @@ -130,7 +131,7 @@ def test_mvt_content(api_client, afval_container_model, afval_cluster, filled_ro assert response.status_code == 200 assert response["Content-Type"] == CONTENT_TYPE - vt = mapbox_vector_tile.decode(response.content) + vt = decode_mvt(response) assert vt == { "default": { @@ -160,11 +161,10 @@ def test_mvt_content(api_client, afval_container_model, afval_cluster, filled_ro # Try again at a higher zoom level. We should get the same features, but with no properties. url = "/v1/mvt/afvalwegingen/containers/14/8415/5384.pbf" response = api_client.get(url) - # MVT view returns 204 when the tile is empty. assert response.status_code == 200 assert response["Content-Type"] == CONTENT_TYPE - vt = mapbox_vector_tile.decode(response.content) + vt = decode_mvt(response) assert vt == { "default": { @@ -181,6 +181,13 @@ def test_mvt_content(api_client, afval_container_model, afval_cluster, filled_ro } } + # MVT view returns 204 when the tile is empty. + url = "/v1/mvt/afvalwegingen/containers/14/0/0.pbf" + response = api_client.get(url) + assert response.status_code == 204 + assert response["Content-Type"] == CONTENT_TYPE + assert response.content == b"" + @pytest.mark.django_db def test_mvt_forbidden(api_client, geometry_auth_thing, fetch_auth_token, filled_router): @@ -226,7 +233,7 @@ def test_mvt_model_auth(api_client, geometry_auth_model, fetch_auth_token, fille token = fetch_auth_token(["TEST/GEO", "TEST/META"]) response = api_client.get(url, HTTP_AUTHORIZATION=f"Bearer {token}") assert response.status_code == 200 - assert mapbox_vector_tile.decode(response.content) == content + assert decode_mvt(response) == content # With only the GEO scope, we still get a 200 response # but we lose access to the metadata field. @@ -235,4 +242,14 @@ def test_mvt_model_auth(api_client, geometry_auth_model, fetch_auth_token, fille assert response.status_code == 200 del content["default"]["features"][0]["properties"]["metadata"] - assert mapbox_vector_tile.decode(response.content) == content + assert decode_mvt(response) == content + + +def decode_mvt(response: HttpResponseBase) -> bytes: + if isinstance(response, HttpResponse): + content = response.content + elif isinstance(response, StreamingHttpResponse): + content = b"".join(response.streaming_content) + else: + raise TypeError(f"unexpected {type(response)}") + return mapbox_vector_tile.decode(content)