From 1468d7e8d870225b8581130e0f4375c0f38f7941 Mon Sep 17 00:00:00 2001 From: kjake Date: Mon, 26 Aug 2024 22:10:13 -0400 Subject: [PATCH] TLS Cipher Workaround - Implements manual TLS cipher selection to workaround a server-side configuration issue with bandcamp.com - Used sample code from https://github.com/urllib3/urllib3/issues/3439#issuecomment-2306400349 - thanks to @pquentin! Co-Authored-By: Quentin Pradet <42327+pquentin@users.noreply.github.com> --- bandcamp_dl/__main__.py | 5 ++-- bandcamp_dl/bandcamp.py | 53 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/bandcamp_dl/__main__.py b/bandcamp_dl/__main__.py index 297e931..4de24c0 100644 --- a/bandcamp_dl/__main__.py +++ b/bandcamp_dl/__main__.py @@ -86,7 +86,6 @@ def main(): logger = logging.getLogger(logging_handle) # TODO: Its possible to break bandcamp-dl temporarily by simply erasing a line in the config, catch this and warn. - arguments = config.init_config(parser) logger.debug(f"Config/Args: {arguments}") if not arguments.URL: parser.print_usage() @@ -105,13 +104,15 @@ def main(): elif arguments.artist and arguments.track: urls = Bandcamp.generate_album_url(arguments.artist, arguments.track, "track") elif arguments.artist: - urls = Bandcamp.get_full_discography(arguments.artist, "music") + urls = Bandcamp.get_full_discography(bandcamp, arguments.artist, "music") else: urls = arguments.URL album_list = [] for url in urls: + if "/album/" not in url and "/track/" not in url: + continue logger.debug("\n\tURL: %s", url) album_list.append(bandcamp.parse(url, not arguments.no_art, arguments.embed_lyrics, arguments.debug)) diff --git a/bandcamp_dl/bandcamp.py b/bandcamp_dl/bandcamp.py index c21cb9b..a89f963 100644 --- a/bandcamp_dl/bandcamp.py +++ b/bandcamp_dl/bandcamp.py @@ -5,11 +5,49 @@ import bs4 import requests +from requests.adapters import HTTPAdapter +from urllib3.util import create_urllib3_context from bandcamp_dl import __version__ from bandcamp_dl.bandcampjson import BandcampJSON - +class SSLAdapter(HTTPAdapter): + def __init__(self, ssl_context=None, **kwargs): + self.ssl_context = ssl_context + super().__init__(**kwargs) + + def init_poolmanager(self, *args, **kwargs): + kwargs['ssl_context'] = self.ssl_context + return super().init_poolmanager(*args, **kwargs) + + def proxy_manager_for(self, *args, **kwargs): + kwargs['ssl_context'] = self.ssl_context + return super().proxy_manager_for(*args, **kwargs) + +# Create the SSL context with the custom ciphers +ctx = create_urllib3_context() +ctx.load_default_certs() + +DEFAULT_CIPHERS = ":".join( + [ + "ECDHE+AESGCM", + "ECDHE+CHACHA20", + "DHE+AESGCM", + "DHE+CHACHA20", + "ECDH+AESGCM", + "DH+AESGCM", + "ECDH+AES", + "DH+AES", + "RSA+AESGCM", + "RSA+AES", + "!aNULL", + "!eNULL", + "!MD5", + "!DSS", + "!AESCCM", + ] +) +ctx.set_ciphers(DEFAULT_CIPHERS) class Bandcamp: def __init__(self): @@ -18,6 +56,11 @@ def __init__(self): self.soup = None self.tracks = None self.logger = logging.getLogger("bandcamp-dl").getChild("Main") + + # Mount the adapter with the custom SSL context to the session + self.session = requests.Session() + self.adapter = SSLAdapter(ssl_context=ctx) + self.session.mount('https://', self.adapter) def parse(self, url: str, art: bool = True, lyrics: bool = False, debugging: bool = False) -> dict or None: @@ -31,7 +74,7 @@ def parse(self, url: str, art: bool = True, lyrics: bool = False, """ try: - response = requests.get(url, headers=self.headers) + response = self.session.get(url, headers=self.headers) except requests.exceptions.MissingSchema: return None @@ -107,7 +150,7 @@ def parse(self, url: str, art: bool = True, lyrics: bool = False, def get_track_lyrics(self, track_url): self.logger.debug(" Fetching track lyrics..") - track_page = requests.get(track_url, headers=self.headers) + track_page = self.session.get(track_url, headers=self.headers) try: track_soup = bs4.BeautifulSoup(track_page.text, "lxml") except bs4.FeatureNotFound: @@ -182,7 +225,7 @@ def get_album_art(self) -> str: except None: pass - def get_full_discography(artist: str, page_type: str) -> list: + def get_full_discography(self, artist: str, page_type: str) -> list: """Generate a list of album and track urls based on the artist name :param artist: artist name @@ -190,7 +233,7 @@ def get_full_discography(artist: str, page_type: str) -> list: hardcoded :return: urls as list of strs """ - html = requests.get(f"https://{artist}.bandcamp.com/{page_type}").text + html = self.session.get(f"https://{artist}.bandcamp.com/{page_type}").text try: soup = bs4.BeautifulSoup(html, "lxml")