From ef68000e401097675e1e91b7e9ddef2076888f9a Mon Sep 17 00:00:00 2001 From: Audiosutras <37882933+Audiosutras@users.noreply.github.com> Date: Sun, 19 Nov 2023 08:19:08 -0500 Subject: [PATCH] Anna's Archive Instance Support (#13) * wip: add logic now working on tests * update ReadMe.md * update ReadMe.md * update: test_main.py * update: test_utils.py * fix: gracefully handle FileNotFoundError for to_filesystem method (#10) * update & resolve dependencies --- docs/ReadMe.md | 40 +++++++++++++++----- poetry.lock | 6 +-- src/getdat/main.py | 18 +++++++-- src/getdat/utils.py | 27 +++++++++++-- tests/test_main.py | 49 +++++++++++++++++++++--- tests/test_utils.py | 92 +++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 200 insertions(+), 32 deletions(-) diff --git a/docs/ReadMe.md b/docs/ReadMe.md index 0611e3c..7d9ba16 100644 --- a/docs/ReadMe.md +++ b/docs/ReadMe.md @@ -13,7 +13,17 @@

-![CI](https://github.com/Audiosutras/getdat/actions/workflows/ci.yml/badge.svg?branch=master) +
+ + PyPI - Downloads + + + CI + + + GitHub issues + +
Table of Contents @@ -97,22 +107,32 @@ Usage: getdat ebook [OPTIONS] [Q]... ex: getdat ebook Options: - -o, --output_dir TEXT Path to ebook's output directory from home directory. - Path must be prefixed by '~' on Unix or '~user' on - Windows. This argument overrides GETDAT_BOOK_DIR env - var if set. Outputs book to working directory if - neither are set. - -e, --ext [epub|pdf] Preferred ebook extension for search results - --help Show this message and exit. + -o, --output_dir TEXT Path to ebook's output directory from home + directory. Path must be prefixed by '~' on Unix + or '~user' on Windows. This argument overrides + GETDAT_BOOK_DIR env var if set. Outputs book to + working directory if neither are set. + -e, --ext [epub|pdf] Preferred ebook extension for search results - + Default: epub + -i, --instance [org|gs|se] The instance of Anna's Archive you would like to + use for your search: https://annas-archive.org, + https://annas-archive.gs, https://annas- + archive.se - Default: org + --help Show this message and exit. + ``` Example: ```bash --> getdat ebook Treasure Island Stevenson --ext=epub --output_dir=~/books/epub/ +-> getdat ebook Treasure Island Stevenson --ext=epub --output_dir=~/books/epub/ --instance=gs +``` +or +```bash +-> getdat ebook "Treasure Island Stevenson" -e epub -o ~/books/epub -i gs ``` or ```bash --> getdat ebook "Treasure Island Stevensonn" -e epub -o ~/books/epub +-> getdat ebook "Treasure Island Stevenson" ``` #### Environment Variable diff --git a/poetry.lock b/poetry.lock index 0a79eca..cf04870 100644 --- a/poetry.lock +++ b/poetry.lock @@ -193,13 +193,13 @@ typing = ["typing-extensions (>=4.8)"] [[package]] name = "identify" -version = "2.5.31" +version = "2.5.32" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.5.31-py2.py3-none-any.whl", hash = "sha256:90199cb9e7bd3c5407a9b7e81b4abec4bb9d249991c79439ec8af740afc6293d"}, - {file = "identify-2.5.31.tar.gz", hash = "sha256:7736b3c7a28233637e3c36550646fc6389bedd74ae84cb788200cc8e2dd60b75"}, + {file = "identify-2.5.32-py2.py3-none-any.whl", hash = "sha256:0b7656ef6cba81664b783352c73f8c24b39cf82f926f78f4550eda928e5e0545"}, + {file = "identify-2.5.32.tar.gz", hash = "sha256:5d9979348ec1a21c768ae07e0a652924538e8bce67313a73cb0f681cf08ba407"}, ] [package.extras] diff --git a/src/getdat/main.py b/src/getdat/main.py index 3e8b4f7..1fdd65c 100644 --- a/src/getdat/main.py +++ b/src/getdat/main.py @@ -39,15 +39,27 @@ def cinema(): "--ext", type=click.Choice(["epub", "pdf"]), default="epub", - help="Preferred ebook extension for search results", + help=("Preferred ebook extension for search results " "- Default: epub"), +) +@click.option( + "-i", + "--instance", + type=click.Choice(AnnasEbook._ANNAS_URLS.keys()), + default=AnnasEbook._ANNAS_ORG_URL, + help=( + "The instance of Anna's Archive you would like to " + "use for your search:\n " + f"{', '.join(AnnasEbook._ANNAS_URLS.values())}\n" + f"- Default: {AnnasEbook._ANNAS_ORG_URL}" + ), ) @click.argument("q", nargs=-1) -def ebook(q, ext, output_dir): +def ebook(q, ext, output_dir, instance): """Search and download an ebook available through Anna's Archive ex: getdat ebook """ if not q: print_help(EBOOK_ERROR_MSG) - ebook = AnnasEbook(q=q, ext=ext, output_dir=output_dir) + ebook = AnnasEbook(q=q, ext=ext, output_dir=output_dir, instance=instance) ebook.run() diff --git a/src/getdat/utils.py b/src/getdat/utils.py index e53cb97..1a10364 100644 --- a/src/getdat/utils.py +++ b/src/getdat/utils.py @@ -1,6 +1,7 @@ import click import os import requests +from typing import Literal from requests.exceptions import ConnectionError, ChunkedEncodingError from requests.models import Response from bs4 import BeautifulSoup @@ -36,10 +37,19 @@ class AnnasEbook: _SOURCE_ANNAS = "Anna's Archive" + _ANNAS_ORG_URL = "org" + _ANNAS_GS_URL = "gs" + _ANNAS_SE_URL = "se" + _ANNAS_URLS = { + _ANNAS_ORG_URL: "https://annas-archive.org", + _ANNAS_GS_URL: "https://annas-archive.gs", + _ANNAS_SE_URL: "https://annas-archive.se", + } + _SOURCE_DICT = { _SOURCE_ANNAS: { "name": _SOURCE_ANNAS, - "url": "https://annas-archive.org", + "url": _ANNAS_URLS.get(_ANNAS_ORG_URL), "search_page_scrape": { "tag": "a", "class": ( @@ -67,13 +77,24 @@ class AnnasEbook: _msg = "Searching Anna's Archive..." _resource_name = "" - def __init__(self, q: tuple, ext: str, output_dir: str): + def __init__( + self, + q: tuple, + ext: str, + output_dir: str, + instance: Literal[*_ANNAS_URLS.keys()] = _ANNAS_ORG_URL, + ): self.q = " ".join(map(str, q)) self.output_dir = output_dir or os.environ.get("GETDAT_BOOK_DIR") self.ext = ext + self.instance = instance def _determine_source(self) -> dict: - return self._SOURCE_DICT.get(self._current_source) + source = self._SOURCE_DICT.get(self._current_source) + if self._current_source == self._SOURCE_ANNAS: + annas_url = self._ANNAS_URLS.get(self.instance) + source.update({"url": annas_url}) + return source def _determine_link(self) -> str: source = self._determine_source() diff --git a/tests/test_main.py b/tests/test_main.py index 0972a55..66761e3 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -71,9 +71,20 @@ def test_no_args_only_output_dir_option_print_help(self, mocker): assert self.get_help_text() in results.output ebook_run_method.assert_not_called() + def test_no_args_only_instance_option_print_help(self, mocker): + ebook_run_method = mocker.patch.object(AnnasEbook, "run") + results = self.runner.invoke(ebook, "--instance=gs") + # assert error message echoed + assert EBOOK_ERROR_MSG in results.output + # assert help text prompt shown + assert self.get_help_text() in results.output + ebook_run_method.assert_not_called() + def test_no_args_only_options_print_help(self, mocker): ebook_run_method = mocker.patch.object(AnnasEbook, "run") - results = self.runner.invoke(ebook, "--ext=pdf --output_dir=~/books") + results = self.runner.invoke( + ebook, "--ext=pdf --output_dir=~/books --instance=gs" + ) # assert error message echoed assert EBOOK_ERROR_MSG in results.output # assert help text prompt shown @@ -90,19 +101,47 @@ def test_many_search_arg_ebook_run(self, mocker): self.runner.invoke(ebook, ["Treasure", "Island", "Stevenson"]) ebook_run_method.assert_called_once() - def test_search_arg_ext_option_ebook_run(self, mocker): + @pytest.mark.parametrize( + "ext_type, expect_error", [("pdf", False), ("epub", False), ("er", True)] + ) + def test_search_arg_ext_option_ebook_run(self, ext_type, expect_error, mocker): ebook_run_method = mocker.patch.object(AnnasEbook, "run") - self.runner.invoke(ebook, "Treasure Island Stevenson --ext=epub") - ebook_run_method.assert_called_once() + self.runner.invoke(ebook, f"Treasure Island Stevenson --ext={ext_type}") + if expect_error: + ebook_run_method.assert_not_called() + else: + ebook_run_method.assert_called_once() def test_search_arg_output_dir_option_ebook_run(self, mocker): ebook_run_method = mocker.patch.object(AnnasEbook, "run") self.runner.invoke(ebook, "Treasure Island Stevenson --output_dir=~/books/") ebook_run_method.assert_called_once() + @pytest.mark.parametrize( + "instance_type, expect_error", + [ + (AnnasEbook._ANNAS_ORG_URL, False), + (AnnasEbook._ANNAS_GS_URL, False), + (AnnasEbook._ANNAS_SE_URL, False), + ("er", True), + ], + ) + def test_search_arg_instance_option_ebook_run( + self, instance_type, expect_error, mocker + ): + ebook_run_method = mocker.patch.object(AnnasEbook, "run") + result = self.runner.invoke( + ebook, f"Treasure Island Stevenson --instance={instance_type}" + ) + if expect_error: + ebook_run_method.assert_not_called() + else: + ebook_run_method.assert_called_once() + def test_search_arg_options_ebook_run(self, mocker): ebook_run_method = mocker.patch.object(AnnasEbook, "run") self.runner.invoke( - ebook, "Treasure Island Stevenson --ext=epub --output_dir=~/books/" + ebook, + "Treasure Island Stevenson --ext=epub --output_dir=~/books/ --instance=gs", ) ebook_run_method.assert_called_once() diff --git a/tests/test_utils.py b/tests/test_utils.py index af39067..bf53502 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -83,13 +83,33 @@ def test_ext(self, ext, expected_ext): ebook.ext = expected_ext @pytest.mark.parametrize( - "source, expected_dict", + "instance", + [ + (AnnasEbook._ANNAS_ORG_URL), + (AnnasEbook._ANNAS_GS_URL), + (AnnasEbook._ANNAS_SE_URL), + (""), + ], + ) + def test_instance(self, instance): + if instance: + ebook = AnnasEbook( + q=self.q, ext=self.ext, output_dir=self.output_dir, instance=instance + ) + assert ebook.instance == instance + else: + ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir) + assert ebook.instance == AnnasEbook._ANNAS_ORG_URL + + @pytest.mark.parametrize( + "source, instance, expected_dict", [ ( AnnasEbook._SOURCE_ANNAS, + "", { "name": AnnasEbook._SOURCE_ANNAS, - "url": "https://annas-archive.org", + "url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_ORG_URL), "search_page_scrape": { "tag": "a", "class": ( @@ -108,18 +128,74 @@ def test_ext(self, ext, expected_ext): "detail_page_scrape": {"tag": "a", "class": "js-download-link"}, }, ), - (AnnasEbook._LIBGEN_RS, {"download_page_scrape": {"tag": "a"}}), + ( + AnnasEbook._SOURCE_ANNAS, + AnnasEbook._ANNAS_GS_URL, + { + "name": AnnasEbook._SOURCE_ANNAS, + "url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_GS_URL), + "search_page_scrape": { + "tag": "a", + "class": ( + "js-vim-focus custom-a flex items-center " + "relative left-[-10px] w-[calc(100%+20px)] px-[10px] " + "outline-offset-[-2px] outline-2 rounded-[3px] hover:bg-[#00000011] " + "focus:outline" + ), + "title_container": { + "tag": "div", + "class": ( + "line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500" + ), + }, + }, + "detail_page_scrape": {"tag": "a", "class": "js-download-link"}, + }, + ), + ( + AnnasEbook._SOURCE_ANNAS, + AnnasEbook._ANNAS_SE_URL, + { + "name": AnnasEbook._SOURCE_ANNAS, + "url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_SE_URL), + "search_page_scrape": { + "tag": "a", + "class": ( + "js-vim-focus custom-a flex items-center " + "relative left-[-10px] w-[calc(100%+20px)] px-[10px] " + "outline-offset-[-2px] outline-2 rounded-[3px] hover:bg-[#00000011] " + "focus:outline" + ), + "title_container": { + "tag": "div", + "class": ( + "line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500" + ), + }, + }, + "detail_page_scrape": {"tag": "a", "class": "js-download-link"}, + }, + ), + (AnnasEbook._LIBGEN_RS, "", {"download_page_scrape": {"tag": "a"}}), ( AnnasEbook._LIBGEN_LI, + AnnasEbook._ANNAS_GS_URL, {"url": "https://libgen.li/", "download_page_scrape": {"tag": "a"}}, ), - ("Not part of _SOURCE_DICT", None), + ("Not part of _SOURCE_DICT", "", None), ], ) - def test_determine_source(self, source, expected_dict, mocker): - ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir) - mocker.patch.object(ebook, "_current_source", source) - assert ebook._determine_source() == expected_dict + def test_determine_source(self, source, instance, expected_dict, mocker): + if instance: + ebook = AnnasEbook( + q=self.q, ext=self.ext, output_dir=self.output_dir, instance=instance + ) + mocker.patch.object(ebook, "_current_source", source) + assert ebook._determine_source() == expected_dict + else: + ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir) + mocker.patch.object(ebook, "_current_source", source) + assert ebook._determine_source() == expected_dict @pytest.mark.parametrize( "selected_result, expected_link",