From ef68000e401097675e1e91b7e9ddef2076888f9a Mon Sep 17 00:00:00 2001
From: Audiosutras <37882933+Audiosutras@users.noreply.github.com>
Date: Sun, 19 Nov 2023 08:19:08 -0500
Subject: [PATCH] Anna's Archive Instance Support (#13)
* wip: add logic now working on tests
* update ReadMe.md
* update ReadMe.md
* update: test_main.py
* update: test_utils.py
* fix: gracefully handle FileNotFoundError for to_filesystem method (#10)
* update & resolve dependencies
---
docs/ReadMe.md | 40 +++++++++++++++-----
poetry.lock | 6 +--
src/getdat/main.py | 18 +++++++--
src/getdat/utils.py | 27 +++++++++++--
tests/test_main.py | 49 +++++++++++++++++++++---
tests/test_utils.py | 92 +++++++++++++++++++++++++++++++++++++++++----
6 files changed, 200 insertions(+), 32 deletions(-)
diff --git a/docs/ReadMe.md b/docs/ReadMe.md
index 0611e3c..7d9ba16 100644
--- a/docs/ReadMe.md
+++ b/docs/ReadMe.md
@@ -13,7 +13,17 @@
-![CI](https://github.com/Audiosutras/getdat/actions/workflows/ci.yml/badge.svg?branch=master)
+
Table of Contents
@@ -97,22 +107,32 @@ Usage: getdat ebook [OPTIONS] [Q]...
ex: getdat ebook
Options:
- -o, --output_dir TEXT Path to ebook's output directory from home directory.
- Path must be prefixed by '~' on Unix or '~user' on
- Windows. This argument overrides GETDAT_BOOK_DIR env
- var if set. Outputs book to working directory if
- neither are set.
- -e, --ext [epub|pdf] Preferred ebook extension for search results
- --help Show this message and exit.
+ -o, --output_dir TEXT Path to ebook's output directory from home
+ directory. Path must be prefixed by '~' on Unix
+ or '~user' on Windows. This argument overrides
+ GETDAT_BOOK_DIR env var if set. Outputs book to
+ working directory if neither are set.
+ -e, --ext [epub|pdf] Preferred ebook extension for search results -
+ Default: epub
+ -i, --instance [org|gs|se] The instance of Anna's Archive you would like to
+ use for your search: https://annas-archive.org,
+ https://annas-archive.gs, https://annas-
+ archive.se - Default: org
+ --help Show this message and exit.
+
```
Example:
```bash
--> getdat ebook Treasure Island Stevenson --ext=epub --output_dir=~/books/epub/
+-> getdat ebook Treasure Island Stevenson --ext=epub --output_dir=~/books/epub/ --instance=gs
+```
+or
+```bash
+-> getdat ebook "Treasure Island Stevenson" -e epub -o ~/books/epub -i gs
```
or
```bash
--> getdat ebook "Treasure Island Stevensonn" -e epub -o ~/books/epub
+-> getdat ebook "Treasure Island Stevenson"
```
#### Environment Variable
diff --git a/poetry.lock b/poetry.lock
index 0a79eca..cf04870 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -193,13 +193,13 @@ typing = ["typing-extensions (>=4.8)"]
[[package]]
name = "identify"
-version = "2.5.31"
+version = "2.5.32"
description = "File identification library for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "identify-2.5.31-py2.py3-none-any.whl", hash = "sha256:90199cb9e7bd3c5407a9b7e81b4abec4bb9d249991c79439ec8af740afc6293d"},
- {file = "identify-2.5.31.tar.gz", hash = "sha256:7736b3c7a28233637e3c36550646fc6389bedd74ae84cb788200cc8e2dd60b75"},
+ {file = "identify-2.5.32-py2.py3-none-any.whl", hash = "sha256:0b7656ef6cba81664b783352c73f8c24b39cf82f926f78f4550eda928e5e0545"},
+ {file = "identify-2.5.32.tar.gz", hash = "sha256:5d9979348ec1a21c768ae07e0a652924538e8bce67313a73cb0f681cf08ba407"},
]
[package.extras]
diff --git a/src/getdat/main.py b/src/getdat/main.py
index 3e8b4f7..1fdd65c 100644
--- a/src/getdat/main.py
+++ b/src/getdat/main.py
@@ -39,15 +39,27 @@ def cinema():
"--ext",
type=click.Choice(["epub", "pdf"]),
default="epub",
- help="Preferred ebook extension for search results",
+ help=("Preferred ebook extension for search results " "- Default: epub"),
+)
+@click.option(
+ "-i",
+ "--instance",
+ type=click.Choice(AnnasEbook._ANNAS_URLS.keys()),
+ default=AnnasEbook._ANNAS_ORG_URL,
+ help=(
+ "The instance of Anna's Archive you would like to "
+ "use for your search:\n "
+ f"{', '.join(AnnasEbook._ANNAS_URLS.values())}\n"
+ f"- Default: {AnnasEbook._ANNAS_ORG_URL}"
+ ),
)
@click.argument("q", nargs=-1)
-def ebook(q, ext, output_dir):
+def ebook(q, ext, output_dir, instance):
"""Search and download an ebook available through Anna's Archive
ex: getdat ebook
"""
if not q:
print_help(EBOOK_ERROR_MSG)
- ebook = AnnasEbook(q=q, ext=ext, output_dir=output_dir)
+ ebook = AnnasEbook(q=q, ext=ext, output_dir=output_dir, instance=instance)
ebook.run()
diff --git a/src/getdat/utils.py b/src/getdat/utils.py
index e53cb97..1a10364 100644
--- a/src/getdat/utils.py
+++ b/src/getdat/utils.py
@@ -1,6 +1,7 @@
import click
import os
import requests
+from typing import Literal
from requests.exceptions import ConnectionError, ChunkedEncodingError
from requests.models import Response
from bs4 import BeautifulSoup
@@ -36,10 +37,19 @@ class AnnasEbook:
_SOURCE_ANNAS = "Anna's Archive"
+ _ANNAS_ORG_URL = "org"
+ _ANNAS_GS_URL = "gs"
+ _ANNAS_SE_URL = "se"
+ _ANNAS_URLS = {
+ _ANNAS_ORG_URL: "https://annas-archive.org",
+ _ANNAS_GS_URL: "https://annas-archive.gs",
+ _ANNAS_SE_URL: "https://annas-archive.se",
+ }
+
_SOURCE_DICT = {
_SOURCE_ANNAS: {
"name": _SOURCE_ANNAS,
- "url": "https://annas-archive.org",
+ "url": _ANNAS_URLS.get(_ANNAS_ORG_URL),
"search_page_scrape": {
"tag": "a",
"class": (
@@ -67,13 +77,24 @@ class AnnasEbook:
_msg = "Searching Anna's Archive..."
_resource_name = ""
- def __init__(self, q: tuple, ext: str, output_dir: str):
+ def __init__(
+ self,
+ q: tuple,
+ ext: str,
+ output_dir: str,
+ instance: Literal[*_ANNAS_URLS.keys()] = _ANNAS_ORG_URL,
+ ):
self.q = " ".join(map(str, q))
self.output_dir = output_dir or os.environ.get("GETDAT_BOOK_DIR")
self.ext = ext
+ self.instance = instance
def _determine_source(self) -> dict:
- return self._SOURCE_DICT.get(self._current_source)
+ source = self._SOURCE_DICT.get(self._current_source)
+ if self._current_source == self._SOURCE_ANNAS:
+ annas_url = self._ANNAS_URLS.get(self.instance)
+ source.update({"url": annas_url})
+ return source
def _determine_link(self) -> str:
source = self._determine_source()
diff --git a/tests/test_main.py b/tests/test_main.py
index 0972a55..66761e3 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -71,9 +71,20 @@ def test_no_args_only_output_dir_option_print_help(self, mocker):
assert self.get_help_text() in results.output
ebook_run_method.assert_not_called()
+ def test_no_args_only_instance_option_print_help(self, mocker):
+ ebook_run_method = mocker.patch.object(AnnasEbook, "run")
+ results = self.runner.invoke(ebook, "--instance=gs")
+ # assert error message echoed
+ assert EBOOK_ERROR_MSG in results.output
+ # assert help text prompt shown
+ assert self.get_help_text() in results.output
+ ebook_run_method.assert_not_called()
+
def test_no_args_only_options_print_help(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
- results = self.runner.invoke(ebook, "--ext=pdf --output_dir=~/books")
+ results = self.runner.invoke(
+ ebook, "--ext=pdf --output_dir=~/books --instance=gs"
+ )
# assert error message echoed
assert EBOOK_ERROR_MSG in results.output
# assert help text prompt shown
@@ -90,19 +101,47 @@ def test_many_search_arg_ebook_run(self, mocker):
self.runner.invoke(ebook, ["Treasure", "Island", "Stevenson"])
ebook_run_method.assert_called_once()
- def test_search_arg_ext_option_ebook_run(self, mocker):
+ @pytest.mark.parametrize(
+ "ext_type, expect_error", [("pdf", False), ("epub", False), ("er", True)]
+ )
+ def test_search_arg_ext_option_ebook_run(self, ext_type, expect_error, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
- self.runner.invoke(ebook, "Treasure Island Stevenson --ext=epub")
- ebook_run_method.assert_called_once()
+ self.runner.invoke(ebook, f"Treasure Island Stevenson --ext={ext_type}")
+ if expect_error:
+ ebook_run_method.assert_not_called()
+ else:
+ ebook_run_method.assert_called_once()
def test_search_arg_output_dir_option_ebook_run(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
self.runner.invoke(ebook, "Treasure Island Stevenson --output_dir=~/books/")
ebook_run_method.assert_called_once()
+ @pytest.mark.parametrize(
+ "instance_type, expect_error",
+ [
+ (AnnasEbook._ANNAS_ORG_URL, False),
+ (AnnasEbook._ANNAS_GS_URL, False),
+ (AnnasEbook._ANNAS_SE_URL, False),
+ ("er", True),
+ ],
+ )
+ def test_search_arg_instance_option_ebook_run(
+ self, instance_type, expect_error, mocker
+ ):
+ ebook_run_method = mocker.patch.object(AnnasEbook, "run")
+ result = self.runner.invoke(
+ ebook, f"Treasure Island Stevenson --instance={instance_type}"
+ )
+ if expect_error:
+ ebook_run_method.assert_not_called()
+ else:
+ ebook_run_method.assert_called_once()
+
def test_search_arg_options_ebook_run(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
self.runner.invoke(
- ebook, "Treasure Island Stevenson --ext=epub --output_dir=~/books/"
+ ebook,
+ "Treasure Island Stevenson --ext=epub --output_dir=~/books/ --instance=gs",
)
ebook_run_method.assert_called_once()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index af39067..bf53502 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -83,13 +83,33 @@ def test_ext(self, ext, expected_ext):
ebook.ext = expected_ext
@pytest.mark.parametrize(
- "source, expected_dict",
+ "instance",
+ [
+ (AnnasEbook._ANNAS_ORG_URL),
+ (AnnasEbook._ANNAS_GS_URL),
+ (AnnasEbook._ANNAS_SE_URL),
+ (""),
+ ],
+ )
+ def test_instance(self, instance):
+ if instance:
+ ebook = AnnasEbook(
+ q=self.q, ext=self.ext, output_dir=self.output_dir, instance=instance
+ )
+ assert ebook.instance == instance
+ else:
+ ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir)
+ assert ebook.instance == AnnasEbook._ANNAS_ORG_URL
+
+ @pytest.mark.parametrize(
+ "source, instance, expected_dict",
[
(
AnnasEbook._SOURCE_ANNAS,
+ "",
{
"name": AnnasEbook._SOURCE_ANNAS,
- "url": "https://annas-archive.org",
+ "url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_ORG_URL),
"search_page_scrape": {
"tag": "a",
"class": (
@@ -108,18 +128,74 @@ def test_ext(self, ext, expected_ext):
"detail_page_scrape": {"tag": "a", "class": "js-download-link"},
},
),
- (AnnasEbook._LIBGEN_RS, {"download_page_scrape": {"tag": "a"}}),
+ (
+ AnnasEbook._SOURCE_ANNAS,
+ AnnasEbook._ANNAS_GS_URL,
+ {
+ "name": AnnasEbook._SOURCE_ANNAS,
+ "url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_GS_URL),
+ "search_page_scrape": {
+ "tag": "a",
+ "class": (
+ "js-vim-focus custom-a flex items-center "
+ "relative left-[-10px] w-[calc(100%+20px)] px-[10px] "
+ "outline-offset-[-2px] outline-2 rounded-[3px] hover:bg-[#00000011] "
+ "focus:outline"
+ ),
+ "title_container": {
+ "tag": "div",
+ "class": (
+ "line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500"
+ ),
+ },
+ },
+ "detail_page_scrape": {"tag": "a", "class": "js-download-link"},
+ },
+ ),
+ (
+ AnnasEbook._SOURCE_ANNAS,
+ AnnasEbook._ANNAS_SE_URL,
+ {
+ "name": AnnasEbook._SOURCE_ANNAS,
+ "url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_SE_URL),
+ "search_page_scrape": {
+ "tag": "a",
+ "class": (
+ "js-vim-focus custom-a flex items-center "
+ "relative left-[-10px] w-[calc(100%+20px)] px-[10px] "
+ "outline-offset-[-2px] outline-2 rounded-[3px] hover:bg-[#00000011] "
+ "focus:outline"
+ ),
+ "title_container": {
+ "tag": "div",
+ "class": (
+ "line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500"
+ ),
+ },
+ },
+ "detail_page_scrape": {"tag": "a", "class": "js-download-link"},
+ },
+ ),
+ (AnnasEbook._LIBGEN_RS, "", {"download_page_scrape": {"tag": "a"}}),
(
AnnasEbook._LIBGEN_LI,
+ AnnasEbook._ANNAS_GS_URL,
{"url": "https://libgen.li/", "download_page_scrape": {"tag": "a"}},
),
- ("Not part of _SOURCE_DICT", None),
+ ("Not part of _SOURCE_DICT", "", None),
],
)
- def test_determine_source(self, source, expected_dict, mocker):
- ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir)
- mocker.patch.object(ebook, "_current_source", source)
- assert ebook._determine_source() == expected_dict
+ def test_determine_source(self, source, instance, expected_dict, mocker):
+ if instance:
+ ebook = AnnasEbook(
+ q=self.q, ext=self.ext, output_dir=self.output_dir, instance=instance
+ )
+ mocker.patch.object(ebook, "_current_source", source)
+ assert ebook._determine_source() == expected_dict
+ else:
+ ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir)
+ mocker.patch.object(ebook, "_current_source", source)
+ assert ebook._determine_source() == expected_dict
@pytest.mark.parametrize(
"selected_result, expected_link",