-
Notifications
You must be signed in to change notification settings - Fork 3
/
ivoox_scraper_client.py
40 lines (34 loc) · 1.24 KB
/
ivoox_scraper_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup
class IvooxScraperClient:
"""
Clase para construir la URL y hacer la peticion a ivoox
@see https://www.ivoox.com/podcast-internet-tecnologia_sc_f445_1.html
"""
def __init__(self, category_name, category_id):
self.url_base = "https://www.ivoox.com/podcast-{0}_sc_f{1}_{2}.html"
self.category_name = category_name
self.category_id = category_id
def request(self, page):
"""
Generamos la peticion para una pagina dada
"""
url = self.__format_url(page)
return self.__get_content(url)
def __format_url(self, page):
"""
Clase privada pra formatear de forma correcta la URL a solicitar
"""
return self.url_base.format(self.category_name, self.category_id, page)
def __get_content(self, url):
"""
Clase privada que extrae el contenido de unas URL especificas
"""
res = requests.get(url)
# Levanta el error solo si algo fue mal (errores 400)
try:
res.raise_for_status()
except Exception as exc:
print('Problem! %s' % (exc))
soup = BeautifulSoup(res.text, "lxml")
return soup