From 296891a580f61638d80f24b820c0e8bf7576c546 Mon Sep 17 00:00:00 2001 From: Eddy Comyn-Platt <53045993+EddyCMWF@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:11:14 +0000 Subject: [PATCH] fix for URL adaptor (#239) --- cads_adaptors/tools/url_tools.py | 7 ++++++- tests/test_20_url_tools.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/cads_adaptors/tools/url_tools.py b/cads_adaptors/tools/url_tools.py index 77e4a25e..8b034d7c 100644 --- a/cads_adaptors/tools/url_tools.py +++ b/cads_adaptors/tools/url_tools.py @@ -37,7 +37,9 @@ def requests_to_urls( yield {"url": url, "req": req} -def try_download(urls: List[str], context: Context, **kwargs) -> List[str]: +def try_download( + urls: List[str], context: Context, server_suggested_filename=False, **kwargs +) -> List[str]: # Ensure that URLs are unique to prevent downloading the same file multiple times urls = sorted(set(urls)) @@ -48,6 +50,9 @@ def try_download(urls: List[str], context: Context, **kwargs) -> List[str]: kwargs = {"timeout": 3, "maximum_retries": 1, "retry_after": 1, **kwargs} for url in urls: path = urllib.parse.urlparse(url).path.lstrip("/") + if server_suggested_filename: + path = os.path.join(os.path.dirname(path), multiurl.Downloader(url).title()) + dir = os.path.dirname(path) if dir: os.makedirs(dir, exist_ok=True) diff --git a/tests/test_20_url_tools.py b/tests/test_20_url_tools.py index 4753b396..74beeba2 100644 --- a/tests/test_20_url_tools.py +++ b/tests/test_20_url_tools.py @@ -38,6 +38,27 @@ def test_downloaders(tmp_path, monkeypatch, urls, expected_nfiles): assert len(paths) == expected_nfiles +def test_download_with_server_suggested_filename(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) # try_download generates files in the working dir + urls = ["https://gerb.oma.be/c3s/data/ceres-ebaf/tcdr/v4.2/toa_lw_all_mon/2000/07"] + paths_false = url_tools.try_download( + urls, context=url_tools.Context(), server_suggested_filename=False + ) + assert len(paths_false) == 1 + assert os.path.basename(paths_false[0]) == "07" + + paths_true = url_tools.try_download( + urls, context=url_tools.Context(), server_suggested_filename=True + ) + assert len(paths_true) == 1 + assert ( + os.path.basename(paths_true[0]) + == "data_312a_Lot1_ceres-ebaf_tcdr_v4.2_toa_lw_all_mon_2000_07.nc" + ) + + assert os.path.dirname(paths_false[0]) == os.path.dirname(paths_true[0]) + + @pytest.mark.parametrize( "anon", (