This repository has been archived by the owner on Jan 26, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
/
process_data.py
54 lines (41 loc) · 1.9 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import requests
import os
import time
def create_dir(path="source"):
folder = os.path.exists(path)
if not folder:
os.makedirs(path)
header = {'user-agent':
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"}
class ProcessData:
def __init__(self, account_id, client_id, uid, token, src, get_url, section_url,book_name,dir_path):
self.account_id = account_id
self.client_id = client_id
self.uid = uid
self.token = token
self.src = src
self.get_url = get_url
self.section_url = section_url
self.dir_path = dir_path
self.book_name = book_name
def get_section_id(self):
pay_load = {"id": self.account_id, "uid": self.uid, "token": self.token, "client_id": self.client_id,
"src": self.src}
time.sleep(1)
req = requests.get(self.get_url, params=pay_load)
return req.json()["d"]["section"]
def get_content_from_section(self, section_list):
# with open("./source/" + "toc.md", 'w') as f:
# f.write("# "+self.book_name.strip('"')+'\n')
with open(self.book_name.strip('"') + ".html", 'a', encoding="utf-8") as html:
html.writelines('<meta charset="UTF-8">')
for section in section_list:
time.sleep(1)
pay_load = {"id": self.account_id, "uid": self.uid, "token": self.token, "client_id": self.client_id,
"src": self.src, "sectionId": section}
req = requests.get(self.section_url, params=pay_load)
# import pdb;pdb.set_trace()
# with open("./source/"+"toc.md", 'a') as f:
# f.write("## "+req.json()["d"]["title"]+'\n')
with open(self.book_name.strip('"')+".html", 'a', encoding="utf-8") as htmlf:
htmlf.writelines(req.json()["d"]["html"])