From b538ccfba5fa3f19c9ed3d8224b238a669e908dc Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Wed, 18 Oct 2023 23:03:14 +0200 Subject: [PATCH] remove LIST and some requirements --- CHANGES.md | 7 +++++ README.md | 45 ++++----------------------- pyproject.toml | 3 +- tests/test_cli.py | 10 +++--- tests/test_middleware.py | 2 -- tests/test_reader.py | 2 +- tests/test_viz.py | 1 - tilebench/__init__.py | 56 +++++++++++++--------------------- tilebench/middleware.py | 14 +++------ tilebench/templates/index.html | 8 ++--- tilebench/viz.py | 3 +- 11 files changed, 48 insertions(+), 103 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 421dc67..21828c0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,13 @@ * update requirements - `rio-tiler>=6.0,<7.0` - `fastapi>=0.100.0` + - `rasterio>=1.3.8` + +* remove `wurlitzer` dependency + +* only use `rasterio` logs + +* remove `LIST` information **breaking change** ## 0.10.0 (2023-06-02) diff --git a/README.md b/README.md index c551859..a3c884c 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ --- -Inspect HEAD/LIST/GET requests withing Rasterio. +Inspect HEAD/GET requests withing Rasterio. Note: In GDAL 3.2, logging capabilities for /vsicurl, /vsis3 and the like was added (ref: https://github.com/OSGeo/gdal/pull/2742). @@ -66,7 +66,7 @@ def info(src_path: str): meta = info("https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif") -> 2022-10-25T00:20:24.215385+0200 | TILEBENCH | {"LIST": {"count": 0}, "HEAD": {"count": 1}, "GET": {"count": 1, "bytes": 32768, "ranges": ["0-32767"]}, "Timing": 0.8705799579620361} +> 2023-10-18T23:00:11.184745+0200 | TILEBENCH | {"HEAD": {"count": 1}, "GET": {"count": 1, "bytes": 32768, "ranges": ["0-32767"]}, "Timing": 0.7379939556121826} ``` ```python @@ -85,7 +85,7 @@ img = _read_tile( 15, ) -> 2022-10-25T00:21:32.895752+0200 | TILEBENCH | {"LIST": {"count": 0}, "HEAD": {"count": 1}, "GET": {"count": 2, "bytes": 409600, "ranges": ["0-32767", "32768-409599"]}, "Timing": 1.2970409393310547} +> 2023-10-18T23:01:00.572263+0200 | TILEBENCH | {"HEAD": {"count": 1}, "GET": {"count": 2, "bytes": 409600, "ranges": ["0-32767", "32768-409599"]}, "Timing": 1.0749869346618652} ``` ## Command Line Interface (CLI) @@ -119,9 +119,6 @@ $ tilebench random https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/2022 $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif --tile 15-9114-13215 --config GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR | jq { - "LIST": { - "count": 0 - }, "HEAD": { "count": 1 }, @@ -133,14 +130,11 @@ $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/202 "32768-409599" ] }, - "Timing": 1.2364399433135986 + "Timing": 0.9715230464935303 } $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif --tile 15-9114-13215 --config GDAL_DISABLE_READDIR_ON_OPEN=FALSE | jq { - "LIST": { - "count": 1 - }, "HEAD": { "count": 8 }, @@ -152,7 +146,7 @@ $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/202 "32768-409599" ] }, - "Timing": 2.2018940448760986 + "Timing": 2.1837549209594727 } ``` @@ -227,34 +221,7 @@ docker run \ ## Contribution & Development -Issues and pull requests are more than welcome. - -**dev install** - -```bash -$ git clone https://github.com/developmentseed/tilebench.git -$ cd tilebench -$ pip install -e .[dev] -``` - -**pre-commit** - -This repo is set to use `pre-commit` to run *isort*, *flake8*, *pydocstring*, *black* ("uncompromising Python code formatter") and mypy when committing new code. - -``` -$ pre-commit install - -$ git add . - -$ git commit -m'my change' -isort....................................................................Passed -black....................................................................Passed -Flake8...................................................................Passed -Verifying PEP257 Compliance..............................................Passed -mypy.....................................................................Passed - -$ git push origin -``` +See [CONTRIBUTING.md](https://github.com/developmentseed/tilebench/blob/main/CONTRIBUTING.md) ## License diff --git a/pyproject.toml b/pyproject.toml index d3dc124..0b01bf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,9 +23,8 @@ dependencies = [ "fastapi>=0.100.0", "jinja2>=3.0,<4.0.0", "loguru", - "rasterio>=1.3.0", + "rasterio>=1.3.8", "rio-tiler>=6.0,<7.0", - "wurlitzer", "uvicorn[standard]", ] diff --git a/tests/test_cli.py b/tests/test_cli.py index e58c039..a65b4ec 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,7 +18,7 @@ def test_profile(): assert not result.exception assert result.exit_code == 0 log = json.loads(result.output) - assert ["LIST", "HEAD", "GET", "Timing"] == list(log) + assert ["HEAD", "GET", "Timing"] == list(log) # Make sure we didn't cache any request when `--tile` is not provided assert "0-" in log["GET"]["ranges"][0] @@ -38,7 +38,7 @@ def test_profile(): assert not result.exception assert result.exit_code == 0 log = json.loads(result.output) - assert ["LIST", "HEAD", "GET", "Timing"] == list(log) + assert ["HEAD", "GET", "Timing"] == list(log) result = runner.invoke( cli, ["profile", COG_PATH, "--tilesize", 512, "--tile", "16-18229-26433"] @@ -46,7 +46,7 @@ def test_profile(): assert not result.exception assert result.exit_code == 0 log = json.loads(result.output) - assert ["LIST", "HEAD", "GET", "Timing"] == list(log) + assert ["HEAD", "GET", "Timing"] == list(log) result = runner.invoke( cli, ["profile", COG_PATH, "--add-kernels", "--add-stdout", "--add-cprofile"] @@ -55,14 +55,12 @@ def test_profile(): assert result.exit_code == 0 log = json.loads(result.output) assert [ - "LIST", "HEAD", "GET", "WarpKernels", "Timing", "cprofile", - "curl", - "rasterio", + "logs", ] == list(log) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 998e6a4..fcf4a26 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -41,7 +41,6 @@ def skip(): assert response.headers["Cache-Control"] == "no-cache" assert response.headers["VSI-Stats"] stats = response.headers["VSI-Stats"] - assert "list;count=" in stats assert "head;count=" in stats assert "get;count=" in stats @@ -50,7 +49,6 @@ def skip(): assert response.headers["content-type"] == "application/json" assert response.headers["VSI-Stats"] stats = response.headers["VSI-Stats"] - assert "list;count=" in stats assert "head;count=" in stats assert "get;count=" in stats diff --git a/tests/test_reader.py b/tests/test_reader.py index db1c84a..323265b 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -37,7 +37,7 @@ def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256): assert data.shape assert mask.shape assert stats - assert stats.get("LIST") + assert stats.get("HEAD") assert stats.get("GET") assert stats.get("Timing") assert stats.get("WarpKernels") diff --git a/tests/test_viz.py b/tests/test_viz.py index bc96a0c..2335a87 100644 --- a/tests/test_viz.py +++ b/tests/test_viz.py @@ -28,7 +28,6 @@ def test_viz(): stats = response.headers["VSI-Stats"] assert "head;count=" in stats assert "get;count=" in stats - assert "list;count=" in stats response = client.get("/info.geojson") assert response.status_code == 200 diff --git a/tilebench/__init__.py b/tilebench/__init__.py index f790a8a..0a5bf99 100644 --- a/tilebench/__init__.py +++ b/tilebench/__init__.py @@ -13,34 +13,26 @@ import rasterio from loguru import logger as log -from wurlitzer import pipes fmt = "{time} | TILEBENCH | {message}" log.remove() log.add(sys.stderr, format=fmt) -def parse_logs(rio_lines: List[str], curl_lines: List[str]) -> Dict[str, Any]: +def parse_logs(logs: List[str]) -> Dict[str, Any]: """Parse Rasterio and CURL logs.""" - # LIST - list_requests = [line for line in rio_lines if " VSICURL: GetFileList" in line] - list_summary = { - "count": len(list_requests), - } - # HEAD - curl_head_requests = [line for line in curl_lines if line.startswith("> HEAD")] + head_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: HEAD" in line]) head_summary = { - "count": len(curl_head_requests), + "count": head_requests, } - # CURL GET - # CURL logs failed requests - curl_get_requests = [line for line in curl_lines if line.startswith("> GET")] + # GET + all_get_requests = len( + [line for line in logs if "CURL_INFO_HEADER_OUT: GET" in line] + ) - # Rasterio GET - # Rasterio only log successfull requests - get_requests = [line for line in rio_lines if ": Downloading" in line] + get_requests = [line for line in logs if ": Downloading" in line] get_values = [ map(int, get.split(" Downloading ")[1].split(" ")[0].split("-")) for get in get_requests @@ -49,17 +41,14 @@ def parse_logs(rio_lines: List[str], curl_lines: List[str]) -> Dict[str, Any]: data_transfer = sum([j - i + 1 for i, j in get_values]) get_summary = { - "count": len(curl_get_requests), + "count": all_get_requests, "bytes": data_transfer, "ranges": get_values_str, } - warp_kernel = [ - line.split(" ")[-2:] for line in rio_lines if "GDALWarpKernel" in line - ] + warp_kernel = [line.split(" ")[-2:] for line in logs if "GDALWarpKernel" in line] return { - "LIST": list_summary, "HEAD": head_summary, "GET": get_summary, "WarpKernels": warp_kernel, @@ -88,25 +77,23 @@ def wrapped_f(*args, **kwargs): logger.addHandler(handler) gdal_config = config or {} - gdal_config.update({"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"}) + gdal_config.update({"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "YES"}) - with pipes() as (_, curl_stream): - with rasterio.Env(**gdal_config): - with Timer() as t: - prof = cProfile.Profile() - retval = prof.runcall(func, *args, **kwargs) - profile_stream = StringIO() - ps = pstats.Stats(prof, stream=profile_stream) - ps.strip_dirs().sort_stats("time", "ncalls").print_stats() + with rasterio.Env(**gdal_config): + with Timer() as t: + prof = cProfile.Profile() + retval = prof.runcall(func, *args, **kwargs) + profile_stream = StringIO() + ps = pstats.Stats(prof, stream=profile_stream) + ps.strip_dirs().sort_stats("time", "ncalls").print_stats() logger.removeHandler(handler) handler.close() - rio_lines = rio_stream.getvalue().splitlines() - curl_lines = curl_stream.read().splitlines() + logs = rio_stream.getvalue().splitlines() profile_lines = [p for p in profile_stream.getvalue().splitlines() if p] - results = parse_logs(rio_lines, curl_lines) + results = parse_logs(logs) results["Timing"] = t.elapsed if cprofile: @@ -119,8 +106,7 @@ def wrapped_f(*args, **kwargs): results.pop("WarpKernels") if raw: - results["curl"] = curl_lines - results["rasterio"] = rio_lines + results["logs"] = logs if not quiet: log.info(json.dumps(results)) diff --git a/tilebench/middleware.py b/tilebench/middleware.py index e38f337..2562477 100644 --- a/tilebench/middleware.py +++ b/tilebench/middleware.py @@ -9,7 +9,6 @@ from starlette.middleware.base import BaseHTTPMiddleware from starlette.requests import Request from starlette.types import ASGIApp, Message, Receive, Scope, Send -from wurlitzer import pipes from tilebench import parse_logs @@ -41,27 +40,24 @@ async def dispatch(self, request: Request, call_next): logger.addHandler(handler) gdal_config = {"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"} - with pipes() as (_, curl_stream): - with rasterio.Env(**gdal_config, **self.config): - response = await call_next(request) + with rasterio.Env(**gdal_config, **self.config): + response = await call_next(request) logger.removeHandler(handler) handler.close() - if rio_stream or curl_stream: + if rio_stream: rio_lines = rio_stream.getvalue().splitlines() - curl_lines = curl_stream.read().splitlines() - results = parse_logs(rio_lines, curl_lines) + results = parse_logs(rio_lines) head_results = "head;count={count}".format(**results["HEAD"]) - list_results = "list;count={count}".format(**results["LIST"]) get_results = "get;count={count};size={bytes}".format(**results["GET"]) ranges_results = "ranges; values={}".format( "|".join(results["GET"]["ranges"]) ) response.headers[ "VSI-Stats" - ] = f"{list_results}, {head_results}, {get_results}, {ranges_results}" + ] = f"{head_results}, {get_results}, {ranges_results}" return response diff --git a/tilebench/templates/index.html b/tilebench/templates/index.html index eee5dd2..0794225 100644 --- a/tilebench/templates/index.html +++ b/tilebench/templates/index.html @@ -449,18 +449,14 @@ // Display the key/value pairs const stats = data.get("vsi-stats") if (stats) { - const list = stats.split(',')[0] - const listCount = list.split(';')[1].split('=')[1] - - const head = stats.split(',')[1] + const head = stats.split(',')[0] const headCount = head.split(';')[1].split('=')[1] - const get = stats.split(',')[2] + const get = stats.split(',')[1] const getCount = get.split(';')[1].split('=')[1] const getSize = get.split(';')[2].split('=')[1] html += '' - html += `` html += `` html += `` html += `` diff --git a/tilebench/viz.py b/tilebench/viz.py index 77c48b0..dae7eb2 100644 --- a/tilebench/viz.py +++ b/tilebench/viz.py @@ -223,14 +223,13 @@ def _read_tile(src_path: str, x: int, y: int, z: int): (_, _), stats = _read_tile(self.src_path, x, y, z) head_results = "head;count={count}".format(**stats["HEAD"]) - list_results = "list;count={count}".format(**stats["LIST"]) get_results = "get;count={count};size={bytes}".format(**stats["GET"]) ranges_results = "ranges; values={}".format( "|".join(stats["GET"]["ranges"]) ) response.headers[ "VSI-Stats" - ] = f"{list_results}, {head_results}, {get_results}, {ranges_results}" + ] = f"{head_results}, {get_results}, {ranges_results}" response.headers[ "server-timing"
LIST${listCount}
HEAD${headCount}
GET${getCount}
GET (bytes)${getSize}