From 8efafe1f50284698d32fa4a5e6161b588d5eb6a8 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 7 Oct 2023 21:10:02 -0700 Subject: [PATCH] Fix issue where DEF 14A filings fail to download (#130) --- sec_edgar_downloader/_orchestrator.py | 9 ++++----- tests/conftest.py | 5 +++++ tests/test_end_to_end_integration.py | 18 ++++++++++++++++-- tests/test_orchestrator.py | 22 ++++++++++++---------- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/sec_edgar_downloader/_orchestrator.py b/sec_edgar_downloader/_orchestrator.py index a97b8cb..223f827 100644 --- a/sec_edgar_downloader/_orchestrator.py +++ b/sec_edgar_downloader/_orchestrator.py @@ -76,12 +76,11 @@ def aggregate_filings_to_download( for acc_num, form, doc, f_date in zip( # noqa: B905 accession_numbers, forms, documents, filing_dates ): + is_amend = form.endswith(AMENDS_SUFFIX) + form = form[:-2] if is_amend else form if ( - form.rstrip(AMENDS_SUFFIX) != download_metadata.form - or ( - not download_metadata.include_amends - and form.endswith(AMENDS_SUFFIX) - ) + form != download_metadata.form + or (not download_metadata.include_amends and is_amend) or not within_requested_date_range(download_metadata, f_date) ): continue diff --git a/tests/conftest.py b/tests/conftest.py index b77389c..d6e7751 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,11 @@ def form_10k() -> str: return "10-K" +@pytest.fixture(scope="session") +def form_def_14a() -> str: + return "DEF 14A" + + @pytest.fixture(scope="session") def apple_cik() -> str: return "0000320193" diff --git a/tests/test_end_to_end_integration.py b/tests/test_end_to_end_integration.py index 1115b0f..9053283 100644 --- a/tests/test_end_to_end_integration.py +++ b/tests/test_end_to_end_integration.py @@ -69,12 +69,26 @@ def test_integration_apple_10_k_given_include_amends( dl, dl_path = network_downloader assert directory_is_empty(dl_path) + dl.get(form_10k, apple_cik, before=date(2023, 9, 1), include_amends=True) + + downloaded_file_path = dl_path / ROOT_SAVE_FOLDER_NAME / apple_cik / form_10k + downloaded_acc_nums = downloaded_file_path.glob("*") + forms = downloaded_file_path.glob("*/*.txt") + + assert len(list(downloaded_acc_nums)) == len(list(forms)) == 29 + assert all(form.stat() > 0 for form in forms) + + +# Integration test for issue #129 +def test_integration_apple_def_14a_given_include_amends( + network_downloader, form_def_14a, apple_cik +): dl, dl_path = network_downloader assert directory_is_empty(dl_path) - dl.get(form_10k, apple_cik, before=date(2023, 9, 1), include_amends=True) + dl.get(form_def_14a, apple_cik, before=date(2023, 10, 7), include_amends=True) - downloaded_file_path = dl_path / ROOT_SAVE_FOLDER_NAME / apple_cik / form_10k + downloaded_file_path = dl_path / ROOT_SAVE_FOLDER_NAME / apple_cik / form_def_14a downloaded_acc_nums = downloaded_file_path.glob("*") forms = downloaded_file_path.glob("*/*.txt") diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 54b4b90..3e40a68 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -77,22 +77,24 @@ def test_save_document(tmp_path): not (Path(__file__).parent / "test_data").exists(), reason="test data is required" ) @pytest.mark.parametrize( - "limit,after_date,before_date,include_amends,expected_num_results", + "form,limit,after_date,before_date,include_amends,expected_num_results", [ # Test limit handling - (3, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, False, 3), - (sys.maxsize, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, False, 27), + ("10-K", 3, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, False, 3), + ("10-K", sys.maxsize, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, False, 27), # Test amends handling - (sys.maxsize, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, True, 29), + ("10-K", sys.maxsize, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, True, 29), # Test date range handling - (sys.maxsize, date(2008, 1, 1), date(2012, 1, 1), False, 4), + ("10-K", sys.maxsize, date(2008, 1, 1), date(2012, 1, 1), False, 4), + # Regression test for issue #129 + ("DEF 14A", sys.maxsize, DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE, True, 29), ], ) def test_aggregate_filings_to_download_given_multiple_pages( user_agent, - form_10k, apple_cik, accession_number_to_metadata, + form: str, limit: int, after_date: date, before_date: date, @@ -101,7 +103,7 @@ def test_aggregate_filings_to_download_given_multiple_pages( ): download_metadata = DownloadMetadata( download_folder=Path("."), - form=form_10k, + form=form, cik=apple_cik, limit=limit, after=after_date, @@ -122,8 +124,8 @@ def test_aggregate_filings_to_download_given_multiple_pages( assert len(result) == expected_num_results for td in result: metadata = accession_number_to_metadata[td.accession_number] - assert metadata["form"] == form_10k or ( - include_amends and metadata["form"] == f"{form_10k}/A" + assert metadata["form"] == form or ( + include_amends and metadata["form"] == f"{form}/A" ) assert metadata["filingDate"] >= after_date assert metadata["filingDate"] <= before_date @@ -155,7 +157,7 @@ def test_get_to_download_given_html(apple_cik, accession_number, form_8k_primary assert result.details_doc_suffix == ".html" -# Regression test for https://github.com/jadchaar/sec-edgar-downloader/issues/126 +# Regression test for issue #126 def test_get_to_download_given_cik_with_trailing_zero(): result = get_to_download( "0000312070", "0000950103-23-014290", "dp200734_424b2-5333pps.htm"