Skip to content

Commit

Permalink
Anna's Archive Instance Support (#13)
Browse files Browse the repository at this point in the history
* wip: add logic now working on tests

* update ReadMe.md

* update ReadMe.md

* update: test_main.py

* update: test_utils.py

* fix: gracefully handle FileNotFoundError for to_filesystem method (#10)

* update & resolve dependencies
  • Loading branch information
Audiosutras authored Nov 19, 2023
1 parent 0a9bfb9 commit ef68000
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 32 deletions.
40 changes: 30 additions & 10 deletions docs/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
</p>
</div>

![CI](https://github.com/Audiosutras/getdat/actions/workflows/ci.yml/badge.svg?branch=master)
<div class="flex flex-row justify-between flex-wrap">
<a href="https://pypi.org/project/getdat/" alt="Link to PyPi package homepage">
<img class="p-2" alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/getdat?logo=pypi">
</a>
<a href="https://github.com/Audiosutras/getdat/actions/workflows/ci.yml?query=branch%3Amaster" alt="Link to CI jobs for master branch">
<img class="p-2" alt="CI" src="https://github.com/Audiosutras/getdat/actions/workflows/ci.yml/badge.svg?branch=master">
</a>
<a href="https://github.com/Audiosutras/getdat" alt="Link to Github Issues">
<img alt="GitHub issues" src="https://img.shields.io/github/issues/Audiosutras/getdat">
</a>
</div>

Table of Contents

Expand Down Expand Up @@ -97,22 +107,32 @@ Usage: getdat ebook [OPTIONS] [Q]...
ex: getdat ebook <Search>
Options:
-o, --output_dir TEXT Path to ebook's output directory from home directory.
Path must be prefixed by '~' on Unix or '~user' on
Windows. This argument overrides GETDAT_BOOK_DIR env
var if set. Outputs book to working directory if
neither are set.
-e, --ext [epub|pdf] Preferred ebook extension for search results
--help Show this message and exit.
-o, --output_dir TEXT Path to ebook's output directory from home
directory. Path must be prefixed by '~' on Unix
or '~user' on Windows. This argument overrides
GETDAT_BOOK_DIR env var if set. Outputs book to
working directory if neither are set.
-e, --ext [epub|pdf] Preferred ebook extension for search results -
Default: epub
-i, --instance [org|gs|se] The instance of Anna's Archive you would like to
use for your search: https://annas-archive.org,
https://annas-archive.gs, https://annas-
archive.se - Default: org
--help Show this message and exit.
```
Example:
```bash
-> getdat ebook Treasure Island Stevenson --ext=epub --output_dir=~/books/epub/
-> getdat ebook Treasure Island Stevenson --ext=epub --output_dir=~/books/epub/ --instance=gs
```
or
```bash
-> getdat ebook "Treasure Island Stevenson" -e epub -o ~/books/epub -i gs
```
or
```bash
-> getdat ebook "Treasure Island Stevensonn" -e epub -o ~/books/epub
-> getdat ebook "Treasure Island Stevenson"
```
#### Environment Variable
Expand Down
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 15 additions & 3 deletions src/getdat/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,27 @@ def cinema():
"--ext",
type=click.Choice(["epub", "pdf"]),
default="epub",
help="Preferred ebook extension for search results",
help=("Preferred ebook extension for search results " "- Default: epub"),
)
@click.option(
"-i",
"--instance",
type=click.Choice(AnnasEbook._ANNAS_URLS.keys()),
default=AnnasEbook._ANNAS_ORG_URL,
help=(
"The instance of Anna's Archive you would like to "
"use for your search:\n "
f"{', '.join(AnnasEbook._ANNAS_URLS.values())}\n"
f"- Default: {AnnasEbook._ANNAS_ORG_URL}"
),
)
@click.argument("q", nargs=-1)
def ebook(q, ext, output_dir):
def ebook(q, ext, output_dir, instance):
"""Search and download an ebook available through Anna's Archive
ex: getdat ebook <Search>
"""
if not q:
print_help(EBOOK_ERROR_MSG)
ebook = AnnasEbook(q=q, ext=ext, output_dir=output_dir)
ebook = AnnasEbook(q=q, ext=ext, output_dir=output_dir, instance=instance)
ebook.run()
27 changes: 24 additions & 3 deletions src/getdat/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import click
import os
import requests
from typing import Literal
from requests.exceptions import ConnectionError, ChunkedEncodingError
from requests.models import Response
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -36,10 +37,19 @@ class AnnasEbook:

_SOURCE_ANNAS = "Anna's Archive"

_ANNAS_ORG_URL = "org"
_ANNAS_GS_URL = "gs"
_ANNAS_SE_URL = "se"
_ANNAS_URLS = {
_ANNAS_ORG_URL: "https://annas-archive.org",
_ANNAS_GS_URL: "https://annas-archive.gs",
_ANNAS_SE_URL: "https://annas-archive.se",
}

_SOURCE_DICT = {
_SOURCE_ANNAS: {
"name": _SOURCE_ANNAS,
"url": "https://annas-archive.org",
"url": _ANNAS_URLS.get(_ANNAS_ORG_URL),
"search_page_scrape": {
"tag": "a",
"class": (
Expand Down Expand Up @@ -67,13 +77,24 @@ class AnnasEbook:
_msg = "Searching Anna's Archive..."
_resource_name = ""

def __init__(self, q: tuple, ext: str, output_dir: str):
def __init__(
self,
q: tuple,
ext: str,
output_dir: str,
instance: Literal[*_ANNAS_URLS.keys()] = _ANNAS_ORG_URL,
):
self.q = " ".join(map(str, q))
self.output_dir = output_dir or os.environ.get("GETDAT_BOOK_DIR")
self.ext = ext
self.instance = instance

def _determine_source(self) -> dict:
return self._SOURCE_DICT.get(self._current_source)
source = self._SOURCE_DICT.get(self._current_source)
if self._current_source == self._SOURCE_ANNAS:
annas_url = self._ANNAS_URLS.get(self.instance)
source.update({"url": annas_url})
return source

def _determine_link(self) -> str:
source = self._determine_source()
Expand Down
49 changes: 44 additions & 5 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,20 @@ def test_no_args_only_output_dir_option_print_help(self, mocker):
assert self.get_help_text() in results.output
ebook_run_method.assert_not_called()

def test_no_args_only_instance_option_print_help(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
results = self.runner.invoke(ebook, "--instance=gs")
# assert error message echoed
assert EBOOK_ERROR_MSG in results.output
# assert help text prompt shown
assert self.get_help_text() in results.output
ebook_run_method.assert_not_called()

def test_no_args_only_options_print_help(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
results = self.runner.invoke(ebook, "--ext=pdf --output_dir=~/books")
results = self.runner.invoke(
ebook, "--ext=pdf --output_dir=~/books --instance=gs"
)
# assert error message echoed
assert EBOOK_ERROR_MSG in results.output
# assert help text prompt shown
Expand All @@ -90,19 +101,47 @@ def test_many_search_arg_ebook_run(self, mocker):
self.runner.invoke(ebook, ["Treasure", "Island", "Stevenson"])
ebook_run_method.assert_called_once()

def test_search_arg_ext_option_ebook_run(self, mocker):
@pytest.mark.parametrize(
"ext_type, expect_error", [("pdf", False), ("epub", False), ("er", True)]
)
def test_search_arg_ext_option_ebook_run(self, ext_type, expect_error, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
self.runner.invoke(ebook, "Treasure Island Stevenson --ext=epub")
ebook_run_method.assert_called_once()
self.runner.invoke(ebook, f"Treasure Island Stevenson --ext={ext_type}")
if expect_error:
ebook_run_method.assert_not_called()
else:
ebook_run_method.assert_called_once()

def test_search_arg_output_dir_option_ebook_run(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
self.runner.invoke(ebook, "Treasure Island Stevenson --output_dir=~/books/")
ebook_run_method.assert_called_once()

@pytest.mark.parametrize(
"instance_type, expect_error",
[
(AnnasEbook._ANNAS_ORG_URL, False),
(AnnasEbook._ANNAS_GS_URL, False),
(AnnasEbook._ANNAS_SE_URL, False),
("er", True),
],
)
def test_search_arg_instance_option_ebook_run(
self, instance_type, expect_error, mocker
):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
result = self.runner.invoke(
ebook, f"Treasure Island Stevenson --instance={instance_type}"
)
if expect_error:
ebook_run_method.assert_not_called()
else:
ebook_run_method.assert_called_once()

def test_search_arg_options_ebook_run(self, mocker):
ebook_run_method = mocker.patch.object(AnnasEbook, "run")
self.runner.invoke(
ebook, "Treasure Island Stevenson --ext=epub --output_dir=~/books/"
ebook,
"Treasure Island Stevenson --ext=epub --output_dir=~/books/ --instance=gs",
)
ebook_run_method.assert_called_once()
92 changes: 84 additions & 8 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,33 @@ def test_ext(self, ext, expected_ext):
ebook.ext = expected_ext

@pytest.mark.parametrize(
"source, expected_dict",
"instance",
[
(AnnasEbook._ANNAS_ORG_URL),
(AnnasEbook._ANNAS_GS_URL),
(AnnasEbook._ANNAS_SE_URL),
(""),
],
)
def test_instance(self, instance):
if instance:
ebook = AnnasEbook(
q=self.q, ext=self.ext, output_dir=self.output_dir, instance=instance
)
assert ebook.instance == instance
else:
ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir)
assert ebook.instance == AnnasEbook._ANNAS_ORG_URL

@pytest.mark.parametrize(
"source, instance, expected_dict",
[
(
AnnasEbook._SOURCE_ANNAS,
"",
{
"name": AnnasEbook._SOURCE_ANNAS,
"url": "https://annas-archive.org",
"url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_ORG_URL),
"search_page_scrape": {
"tag": "a",
"class": (
Expand All @@ -108,18 +128,74 @@ def test_ext(self, ext, expected_ext):
"detail_page_scrape": {"tag": "a", "class": "js-download-link"},
},
),
(AnnasEbook._LIBGEN_RS, {"download_page_scrape": {"tag": "a"}}),
(
AnnasEbook._SOURCE_ANNAS,
AnnasEbook._ANNAS_GS_URL,
{
"name": AnnasEbook._SOURCE_ANNAS,
"url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_GS_URL),
"search_page_scrape": {
"tag": "a",
"class": (
"js-vim-focus custom-a flex items-center "
"relative left-[-10px] w-[calc(100%+20px)] px-[10px] "
"outline-offset-[-2px] outline-2 rounded-[3px] hover:bg-[#00000011] "
"focus:outline"
),
"title_container": {
"tag": "div",
"class": (
"line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500"
),
},
},
"detail_page_scrape": {"tag": "a", "class": "js-download-link"},
},
),
(
AnnasEbook._SOURCE_ANNAS,
AnnasEbook._ANNAS_SE_URL,
{
"name": AnnasEbook._SOURCE_ANNAS,
"url": AnnasEbook._ANNAS_URLS.get(AnnasEbook._ANNAS_SE_URL),
"search_page_scrape": {
"tag": "a",
"class": (
"js-vim-focus custom-a flex items-center "
"relative left-[-10px] w-[calc(100%+20px)] px-[10px] "
"outline-offset-[-2px] outline-2 rounded-[3px] hover:bg-[#00000011] "
"focus:outline"
),
"title_container": {
"tag": "div",
"class": (
"line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500"
),
},
},
"detail_page_scrape": {"tag": "a", "class": "js-download-link"},
},
),
(AnnasEbook._LIBGEN_RS, "", {"download_page_scrape": {"tag": "a"}}),
(
AnnasEbook._LIBGEN_LI,
AnnasEbook._ANNAS_GS_URL,
{"url": "https://libgen.li/", "download_page_scrape": {"tag": "a"}},
),
("Not part of _SOURCE_DICT", None),
("Not part of _SOURCE_DICT", "", None),
],
)
def test_determine_source(self, source, expected_dict, mocker):
ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir)
mocker.patch.object(ebook, "_current_source", source)
assert ebook._determine_source() == expected_dict
def test_determine_source(self, source, instance, expected_dict, mocker):
if instance:
ebook = AnnasEbook(
q=self.q, ext=self.ext, output_dir=self.output_dir, instance=instance
)
mocker.patch.object(ebook, "_current_source", source)
assert ebook._determine_source() == expected_dict
else:
ebook = AnnasEbook(q=self.q, ext=self.ext, output_dir=self.output_dir)
mocker.patch.object(ebook, "_current_source", source)
assert ebook._determine_source() == expected_dict

@pytest.mark.parametrize(
"selected_result, expected_link",
Expand Down

0 comments on commit ef68000

Please sign in to comment.