Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove LIST and some requirements #22

Merged
merged 1 commit into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
* update requirements
- `rio-tiler>=6.0,<7.0`
- `fastapi>=0.100.0`
- `rasterio>=1.3.8`

* remove `wurlitzer` dependency

* only use `rasterio` logs

* remove `LIST` information **breaking change**

## 0.10.0 (2023-06-02)

Expand Down
45 changes: 6 additions & 39 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
---


Inspect HEAD/LIST/GET requests withing Rasterio.
Inspect HEAD/GET requests withing Rasterio.

Note: In GDAL 3.2, logging capabilities for /vsicurl, /vsis3 and the like was added (ref: https://github.com/OSGeo/gdal/pull/2742).

Expand Down Expand Up @@ -66,7 +66,7 @@ def info(src_path: str):

meta = info("https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif")

> 2022-10-25T00:20:24.215385+0200 | TILEBENCH | {"LIST": {"count": 0}, "HEAD": {"count": 1}, "GET": {"count": 1, "bytes": 32768, "ranges": ["0-32767"]}, "Timing": 0.8705799579620361}
> 2023-10-18T23:00:11.184745+0200 | TILEBENCH | {"HEAD": {"count": 1}, "GET": {"count": 1, "bytes": 32768, "ranges": ["0-32767"]}, "Timing": 0.7379939556121826}
```

```python
Expand All @@ -85,7 +85,7 @@ img = _read_tile(
15,
)

> 2022-10-25T00:21:32.895752+0200 | TILEBENCH | {"LIST": {"count": 0}, "HEAD": {"count": 1}, "GET": {"count": 2, "bytes": 409600, "ranges": ["0-32767", "32768-409599"]}, "Timing": 1.2970409393310547}
> 2023-10-18T23:01:00.572263+0200 | TILEBENCH | {"HEAD": {"count": 1}, "GET": {"count": 2, "bytes": 409600, "ranges": ["0-32767", "32768-409599"]}, "Timing": 1.0749869346618652}
```

## Command Line Interface (CLI)
Expand Down Expand Up @@ -119,9 +119,6 @@ $ tilebench random https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/2022
$ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif --tile 15-9114-13215 --config GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR | jq
{
"LIST": {
"count": 0
},
"HEAD": {
"count": 1
},
Expand All @@ -133,14 +130,11 @@ $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/202
"32768-409599"
]
},
"Timing": 1.2364399433135986
"Timing": 0.9715230464935303
}
$ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/20221002a_RGB/20221002aC0795145w325100n.tif --tile 15-9114-13215 --config GDAL_DISABLE_READDIR_ON_OPEN=FALSE | jq
{
"LIST": {
"count": 1
},
"HEAD": {
"count": 8
},
Expand All @@ -152,7 +146,7 @@ $ tilebench profile https://noaa-eri-pds.s3.amazonaws.com/2022_Hurricane_Ian/202
"32768-409599"
]
},
"Timing": 2.2018940448760986
"Timing": 2.1837549209594727
}
```

Expand Down Expand Up @@ -227,34 +221,7 @@ docker run \

## Contribution & Development

Issues and pull requests are more than welcome.

**dev install**

```bash
$ git clone https://github.com/developmentseed/tilebench.git
$ cd tilebench
$ pip install -e .[dev]
```

**pre-commit**

This repo is set to use `pre-commit` to run *isort*, *flake8*, *pydocstring*, *black* ("uncompromising Python code formatter") and mypy when committing new code.

```
$ pre-commit install
$ git add .
$ git commit -m'my change'
isort....................................................................Passed
black....................................................................Passed
Flake8...................................................................Passed
Verifying PEP257 Compliance..............................................Passed
mypy.....................................................................Passed
$ git push origin
```
See [CONTRIBUTING.md](https://github.com/developmentseed/tilebench/blob/main/CONTRIBUTING.md)

## License

Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ dependencies = [
"fastapi>=0.100.0",
"jinja2>=3.0,<4.0.0",
"loguru",
"rasterio>=1.3.0",
"rasterio>=1.3.8",
"rio-tiler>=6.0,<7.0",
"wurlitzer",
"uvicorn[standard]",
]

Expand Down
10 changes: 4 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_profile():
assert not result.exception
assert result.exit_code == 0
log = json.loads(result.output)
assert ["LIST", "HEAD", "GET", "Timing"] == list(log)
assert ["HEAD", "GET", "Timing"] == list(log)
# Make sure we didn't cache any request when `--tile` is not provided
assert "0-" in log["GET"]["ranges"][0]

Expand All @@ -38,15 +38,15 @@ def test_profile():
assert not result.exception
assert result.exit_code == 0
log = json.loads(result.output)
assert ["LIST", "HEAD", "GET", "Timing"] == list(log)
assert ["HEAD", "GET", "Timing"] == list(log)

result = runner.invoke(
cli, ["profile", COG_PATH, "--tilesize", 512, "--tile", "16-18229-26433"]
)
assert not result.exception
assert result.exit_code == 0
log = json.loads(result.output)
assert ["LIST", "HEAD", "GET", "Timing"] == list(log)
assert ["HEAD", "GET", "Timing"] == list(log)

result = runner.invoke(
cli, ["profile", COG_PATH, "--add-kernels", "--add-stdout", "--add-cprofile"]
Expand All @@ -55,14 +55,12 @@ def test_profile():
assert result.exit_code == 0
log = json.loads(result.output)
assert [
"LIST",
"HEAD",
"GET",
"WarpKernels",
"Timing",
"cprofile",
"curl",
"rasterio",
"logs",
] == list(log)


Expand Down
2 changes: 0 additions & 2 deletions tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def skip():
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "list;count=" in stats
assert "head;count=" in stats
assert "get;count=" in stats

Expand All @@ -50,7 +49,6 @@ def skip():
assert response.headers["content-type"] == "application/json"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "list;count=" in stats
assert "head;count=" in stats
assert "get;count=" in stats

Expand Down
2 changes: 1 addition & 1 deletion tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256):
assert data.shape
assert mask.shape
assert stats
assert stats.get("LIST")
assert stats.get("HEAD")
assert stats.get("GET")
assert stats.get("Timing")
assert stats.get("WarpKernels")
1 change: 0 additions & 1 deletion tests/test_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def test_viz():
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats
assert "list;count=" in stats

response = client.get("/info.geojson")
assert response.status_code == 200
Expand Down
56 changes: 21 additions & 35 deletions tilebench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,26 @@

import rasterio
from loguru import logger as log
from wurlitzer import pipes

fmt = "{time} | TILEBENCH | {message}"
log.remove()
log.add(sys.stderr, format=fmt)


def parse_logs(rio_lines: List[str], curl_lines: List[str]) -> Dict[str, Any]:
def parse_logs(logs: List[str]) -> Dict[str, Any]:
"""Parse Rasterio and CURL logs."""
# LIST
list_requests = [line for line in rio_lines if " VSICURL: GetFileList" in line]
list_summary = {
"count": len(list_requests),
}

# HEAD
curl_head_requests = [line for line in curl_lines if line.startswith("> HEAD")]
head_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: HEAD" in line])
head_summary = {
"count": len(curl_head_requests),
"count": head_requests,
}

# CURL GET
# CURL logs failed requests
curl_get_requests = [line for line in curl_lines if line.startswith("> GET")]
# GET
all_get_requests = len(
[line for line in logs if "CURL_INFO_HEADER_OUT: GET" in line]
)

# Rasterio GET
# Rasterio only log successfull requests
get_requests = [line for line in rio_lines if ": Downloading" in line]
get_requests = [line for line in logs if ": Downloading" in line]
get_values = [
map(int, get.split(" Downloading ")[1].split(" ")[0].split("-"))
for get in get_requests
Expand All @@ -49,17 +41,14 @@ def parse_logs(rio_lines: List[str], curl_lines: List[str]) -> Dict[str, Any]:
data_transfer = sum([j - i + 1 for i, j in get_values])

get_summary = {
"count": len(curl_get_requests),
"count": all_get_requests,
"bytes": data_transfer,
"ranges": get_values_str,
}

warp_kernel = [
line.split(" ")[-2:] for line in rio_lines if "GDALWarpKernel" in line
]
warp_kernel = [line.split(" ")[-2:] for line in logs if "GDALWarpKernel" in line]

return {
"LIST": list_summary,
"HEAD": head_summary,
"GET": get_summary,
"WarpKernels": warp_kernel,
Expand Down Expand Up @@ -88,25 +77,23 @@ def wrapped_f(*args, **kwargs):
logger.addHandler(handler)

gdal_config = config or {}
gdal_config.update({"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"})
gdal_config.update({"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "YES"})

with pipes() as (_, curl_stream):
with rasterio.Env(**gdal_config):
with Timer() as t:
prof = cProfile.Profile()
retval = prof.runcall(func, *args, **kwargs)
profile_stream = StringIO()
ps = pstats.Stats(prof, stream=profile_stream)
ps.strip_dirs().sort_stats("time", "ncalls").print_stats()
with rasterio.Env(**gdal_config):
with Timer() as t:
prof = cProfile.Profile()
retval = prof.runcall(func, *args, **kwargs)
profile_stream = StringIO()
ps = pstats.Stats(prof, stream=profile_stream)
ps.strip_dirs().sort_stats("time", "ncalls").print_stats()

logger.removeHandler(handler)
handler.close()

rio_lines = rio_stream.getvalue().splitlines()
curl_lines = curl_stream.read().splitlines()
logs = rio_stream.getvalue().splitlines()
profile_lines = [p for p in profile_stream.getvalue().splitlines() if p]

results = parse_logs(rio_lines, curl_lines)
results = parse_logs(logs)
results["Timing"] = t.elapsed

if cprofile:
Expand All @@ -119,8 +106,7 @@ def wrapped_f(*args, **kwargs):
results.pop("WarpKernels")

if raw:
results["curl"] = curl_lines
results["rasterio"] = rio_lines
results["logs"] = logs

if not quiet:
log.info(json.dumps(results))
Expand Down
14 changes: 5 additions & 9 deletions tilebench/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.types import ASGIApp, Message, Receive, Scope, Send
from wurlitzer import pipes

from tilebench import parse_logs

Expand Down Expand Up @@ -41,27 +40,24 @@ async def dispatch(self, request: Request, call_next):
logger.addHandler(handler)

gdal_config = {"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"}
with pipes() as (_, curl_stream):
with rasterio.Env(**gdal_config, **self.config):
response = await call_next(request)
with rasterio.Env(**gdal_config, **self.config):
response = await call_next(request)

logger.removeHandler(handler)
handler.close()

if rio_stream or curl_stream:
if rio_stream:
rio_lines = rio_stream.getvalue().splitlines()
curl_lines = curl_stream.read().splitlines()

results = parse_logs(rio_lines, curl_lines)
results = parse_logs(rio_lines)
head_results = "head;count={count}".format(**results["HEAD"])
list_results = "list;count={count}".format(**results["LIST"])
get_results = "get;count={count};size={bytes}".format(**results["GET"])
ranges_results = "ranges; values={}".format(
"|".join(results["GET"]["ranges"])
)
response.headers[
"VSI-Stats"
] = f"{list_results}, {head_results}, {get_results}, {ranges_results}"
] = f"{head_results}, {get_results}, {ranges_results}"

return response

Expand Down
8 changes: 2 additions & 6 deletions tilebench/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -449,18 +449,14 @@
// Display the key/value pairs
const stats = data.get("vsi-stats")
if (stats) {
const list = stats.split(',')[0]
const listCount = list.split(';')[1].split('=')[1]

const head = stats.split(',')[1]
const head = stats.split(',')[0]
const headCount = head.split(';')[1].split('=')[1]

const get = stats.split(',')[2]
const get = stats.split(',')[1]
const getCount = get.split(';')[1].split('=')[1]
const getSize = get.split(';')[2].split('=')[1]

html += '<table>'
html += `<tr><td class="align-l">LIST</td><td class="px3 align-r">${listCount}</td></tr>`
html += `<tr><td class="align-l">HEAD</td><td class="px3 align-r">${headCount}</td></tr>`
html += `<tr><td class="align-l">GET</td><td class="px3 align-r">${getCount}</td></tr>`
html += `<tr><td class="align-l">GET (bytes)</td><td class="px3 align-r">${getSize}</td></tr>`
Expand Down
3 changes: 1 addition & 2 deletions tilebench/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,13 @@ def _read_tile(src_path: str, x: int, y: int, z: int):
(_, _), stats = _read_tile(self.src_path, x, y, z)

head_results = "head;count={count}".format(**stats["HEAD"])
list_results = "list;count={count}".format(**stats["LIST"])
get_results = "get;count={count};size={bytes}".format(**stats["GET"])
ranges_results = "ranges; values={}".format(
"|".join(stats["GET"]["ranges"])
)
response.headers[
"VSI-Stats"
] = f"{list_results}, {head_results}, {get_results}, {ranges_results}"
] = f"{head_results}, {get_results}, {ranges_results}"

response.headers[
"server-timing"
Expand Down
Loading