Skip to content

Commit

Permalink
60 handle errors from non http url sources (#61)
Browse files Browse the repository at this point in the history
* generic error hadnling

* generic error hadnling

* #60

* #60

* backup downloader added

* add tests for ftp

* fix urls

* fix test

* cleanup

* wget only

* ftp test added

* multiurl updated to handle ftp properly

* qa

* unit tests

* qa

* removing unused test

* clean up and extra download test

* qa

* Mattia comments addressed

---------

Co-authored-by: Mattia Almansi <m.almansi@bopen.eu>
  • Loading branch information
EddyCMWF and malmans2 authored Nov 6, 2023
1 parent 963cb04 commit 7496113
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 14 deletions.
20 changes: 9 additions & 11 deletions cads_adaptors/tools/url_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import jinja2
import multiurl
import requests
import yaml

from . import hcube_tools

Expand All @@ -31,25 +32,22 @@ def requests_to_urls(

def try_download(urls: List[str]) -> List[str]:
paths = []
excs = []
for url in urls:
path = urllib.parse.urlparse(url).path.lstrip("/")
dir = os.path.dirname(path)
os.makedirs(dir, exist_ok=True)
if dir:
os.makedirs(dir, exist_ok=True)
try:
multiurl.download(url, path)
except Exception as exc:
logger.warning(f"Failed download for URL: {url}\nTraceback: {exc}")
else:
paths.append(path)
except requests.exceptions.HTTPError as exc:
if exc.response is not None and exc.response.status_code == 404:
logger.warning(exc)
excs.append(exc)
else:
raise exc

if len(paths) == 0:
raise RuntimeError(
f"Request empty. At least one of the following {urls} "
"must be a valid url from which to download the data "
f"download errors: {[str(exc) for exc in excs]}"
f"Request empty. At least one of the following:\n{yaml.safe_dump(urls, indent=2)} "
"must be a valid url from which to download the data. "
)
return paths

Expand Down
1 change: 1 addition & 0 deletions ci/environment-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ dependencies:
- types-python-dateutil
- types-pyyaml
- types-requests
- pytest-localftpserver
- pip:
- git+https://github.com/ecmwf-projects/cacholote.git
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- cfgrib
- h5netcdf
- wget
- multiurl>=0.2.3.2
- pyyaml
- pip:
- rooki
- multiurl
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ dependencies = [
"ecmwflibs",
"cfgrib",
"cacholote",
"multiurl",
"multiurl>=0.2.3.2",
"rooki",
"python-dateutil"
"python-dateutil",
"pyyaml"
]
description = "CADS data retrieve utilities to be used by adaptors"
dynamic = ["version"]
Expand Down
50 changes: 50 additions & 0 deletions tests/test_20_url_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os

import pytest

from cads_adaptors.tools import url_tools


@pytest.mark.parametrize(
"urls,expected_nfiles",
(
(
[
"https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/test_single.nc"
],
1,
),
(
[
"https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/test_single.nc",
"https://get.ecmwf.int/repository/test-data/earthkit-data/test-data/test_single.grib",
],
2,
),
),
)
def test_downloaders(tmp_path, monkeypatch, urls, expected_nfiles):
monkeypatch.chdir(tmp_path) # try_download generates files in the working dir
paths = url_tools.try_download(urls)
assert len(paths) == expected_nfiles


@pytest.mark.parametrize(
"anon",
(
True,
False,
),
)
def test_ftp_download(tmp_path, ftpserver, anon):
local_test_file = os.path.join(tmp_path, "testfile.txt")
with open(local_test_file, "w") as f:
f.write("This is a test file")

ftp_url = ftpserver.put_files(local_test_file, style="url", anon=anon)
work_dir = os.path.join(tmp_path, "work_dir")
os.makedirs(work_dir)
os.chdir(work_dir)
local_test_download = url_tools.try_download(ftp_url)[0]
with open(local_test_file) as original, open(local_test_download) as downloaded:
assert original.read() == downloaded.read()

0 comments on commit 7496113

Please sign in to comment.