diff --git a/demo/bar_test.py b/demo/bar_test.py new file mode 100644 index 0000000..6694f6c --- /dev/null +++ b/demo/bar_test.py @@ -0,0 +1,29 @@ + +import time +from progress.bar import ChargingBar +import threading + +i = 20 +i_ = 20 +def _(): + global i,i_ + with ChargingBar('Processing', max=i_) as bar: + while True: + if i_ - i > 0: + for ___ in range(i_ - i): + + bar.next() + i_ = i + +def a(): + global i + while True: + time.sleep(0.1) + i = i - 1 + + + +threading.Thread(target = _).start() + +threading.Thread(target= a).start() + diff --git a/demo/download_test.py b/demo/download_test.py new file mode 100644 index 0000000..2f6f218 --- /dev/null +++ b/demo/download_test.py @@ -0,0 +1,43 @@ +import requests +from bs4 import BeautifulSoup + +url = 'https://kemono.party/fantia/user/6561/post/784019' +host = 'https://kemono.party' + + +def DownloadPicOrgin(url): + html = requests.get(url).content.decode('utf-8') + soup = BeautifulSoup(html, 'lxml') + data = soup.select('.post__thumbnail a') + + for trumbpic in data: + link = trumbpic['href'] + + + headers = { + ':authority': 'kemono.party', + ':method': 'GET', + ':scheme': 'https', + ':path': link, + 'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,en;q=0.7,en-US;q=0.6', + 'dnt': '1', + 'cookie': '__ddg1_=hiv3SY00KuBX0ApYjacE; _pk_id.1.5bc1=91fc9c392a521176.1655783181.', + 'referer': url, + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'image', + 'sec-fetch-mode': 'no-cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36' + } + + data = requests.get(host + link, headers) + with open('D:/a.jpg','wb') as f: + f.write(data.content) + + + +DownloadPicOrgin(url) \ No newline at end of file diff --git a/demo/quit_test.py b/demo/quit_test.py new file mode 100644 index 0000000..59b7137 --- /dev/null +++ b/demo/quit_test.py @@ -0,0 +1,3 @@ +import os +print('[main(info)]: 所有插画下载完成!',end = '') +os.system('pause') #按任意键继续 \ No newline at end of file diff --git a/demo/re_test.py b/demo/re_test.py new file mode 100644 index 0000000..da37c3e --- /dev/null +++ b/demo/re_test.py @@ -0,0 +1,14 @@ +import re + +#string = 'https://data8.kemono.party/data/ad/26/ad26917457da07a22f8b2c0adf15b733e3af93d839deaa38a7241797f014397e.png?f=bac77975-f3b2-4388-95db-10ce2787686f.png&%3Aauthority=kemono.party&%3Amethod=GET&%3Ascheme=https&%3Apath=%2Fdata%2Fad%2F26%2Fad26917457da07a22f8b2c0adf15b733e3af93d839deaa38a7241797f014397e.png%3Ff%3Dbac77975-f3b2-4388-95db-10ce2787686f.png&accept=image%2Favif%2Cimage%2Fwebp%2Cimage%2Fapng%2Cimage%2Fsvg%2Bxml%2Cimage%2F%2A%2C%2A%2F%2A%3Bq%3D0.8&accept-encoding=gzip%2C+deflate%2C+br&accept-language=zh-CN%2Czh%3Bq%3D0.9%2Cja%3Bq%3D0.8%2Cen%3Bq%3D0.7%2Cen-US%3Bq%3D0.6&dnt=1&cookie=__ddg1_%3Dhiv3SY00KuBX0ApYjacE%3B+_pk_id.1.5bc1%3D91fc9c392a521176.1655783181.&referer=https%3A%2F%2Fkemono.party%2Ffantia%2Fuser%2F6561%2Fpost%2F784019&sec-ch-ua=%22+Not+A%3BBrand%22%3Bv%3D%2299%22%2C+%22Chromium%22%3Bv%3D%22102%22%2C+%22Google+Chrome%22%3Bv%3D%22102%22&sec-ch-ua-mobile=%3F0&sec-ch-ua-platform=%22Windows%22&sec-fetch-dest=image&sec-fetch-mode=no-cors&sec-fetch-site=same-origin&user-agent=Mozilla%2F5.0+%28Windows+NT+10.0%3B+Win64%3B+x64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F102.0.0.0+Safari%2F537.36' + +#pattern = re.compile(r'.*?(.*?)\?f=') +#picfile_extensions = re.findall(pattern,string) +#picfile_extensions = '.' + str(picfile_extensions[0]).split('.')[-1] +#print(picfile_extensions) + + +string = ['/',':','*','?','<','>','|','\\','"'] + + + diff --git a/demo/requests_test.py b/demo/requests_test.py new file mode 100644 index 0000000..310279c --- /dev/null +++ b/demo/requests_test.py @@ -0,0 +1,27 @@ +import re +import requests +from bs4 import BeautifulSoup + +html = requests.get('https://kemono.party/fantia/user/17148/post/1244218').content.decode('utf-8') +soup = BeautifulSoup(html, 'lxml') +#data = soup.select('.post__attachment-link')[0].contents[0].replace(' ','') +data = soup.select('.post__attachment-link') + +for dat in data: + x = dat.contents[0].replace('\n', '').replace(' ','') + +#url = 'url' +#print('\n [main(info)]: 当前链接无可用加载资源,跳过... {}'.format(url)) + +#data = requests.get('https://kemono.party/data/31/8b/318b056fb4479c17dd0bdbf868aa63142ea01b19ec6d8b505212fc3924903065.zip?f=Aqua%28Tier2%29.zip') + + + + + + + + + + + diff --git a/demo/requests_test2.py b/demo/requests_test2.py new file mode 100644 index 0000000..de16217 --- /dev/null +++ b/demo/requests_test2.py @@ -0,0 +1,13 @@ +import requests +from bs4 import BeautifulSoup + + +html = requests.get('https://kemono.party/fanbox/user/14496985/post/3968336').content.decode('utf-8') +soup = BeautifulSoup(html, 'lxml') +data = soup.select('.post__content')[0].contents[0] + +print(data) + +with open(file = 'a.txt', mode = 'x',encoding = 'utf-8') as f: + f.write(data) + f.close() \ No newline at end of file diff --git a/demo/requests_test3.py b/demo/requests_test3.py new file mode 100644 index 0000000..fdfd688 --- /dev/null +++ b/demo/requests_test3.py @@ -0,0 +1,11 @@ +import re +import requests +from bs4 import BeautifulSoup + + +html = requests.get('https://kemono.party/fantia/user/6561/post/791687').content.decode('utf-8') +soup = BeautifulSoup(html, 'lxml') +print(re.findall(re.compile(r'\n+(.*?)\n+'), soup.select('.post__user-name')[0].contents[0])[0].replace(' ', '')) + + +username = re.findall(re.compile(r'\n+(.*?)\n+'), BeautifulSoup(requests.get('https://kemono.party/fantia/user/6561/post/791687').content.decode('utf-8'), 'lxml').select('.post__user-name')[0].contents[0])[0].replace(' ', '') diff --git a/demo/requests_test4.py b/demo/requests_test4.py new file mode 100644 index 0000000..4722b17 --- /dev/null +++ b/demo/requests_test4.py @@ -0,0 +1,5 @@ +import requests + +url = 'https://kemono.party/fantia/user/6561' + +l = url.split('/',3)[-1] diff --git a/demo/split_test.py b/demo/split_test.py new file mode 100644 index 0000000..2ad1815 --- /dev/null +++ b/demo/split_test.py @@ -0,0 +1,63 @@ + +from os import remove + + +n = 5 + +lst = [] +for i in range(125): + lst.append(i) + +length = len(lst) + +def dumper(): + global n + global lst + intercept = len(lst) // n + + def foo(lst, intercept): + sp_lst = [] + if len(lst) - intercept * n < intercept: + for _ in range(n): + sublist = lst[0:intercept] + sp_lst.append(sublist) + for item in sublist: + lst.remove(item) + if lst != []: + sp_lst.append(lst) + else: + intercept = intercept + 1 + sp_lst = foo(lst, intercept) + return sp_lst + sp_lst = foo(lst, intercept) + if len(sp_lst) > n: + m = sp_lst[-1] + for n in range(len(m)): + sp_lst[-1 - (n + 1)].append(m[0]) + m.remove(m[0]) + for p in sp_lst: + if p == []: + sp_lst.remove(p) + + return sp_lst + +print(dumper()) + + + #for _ in range(n): + # sublist = lst[0:intercept] + + # print(sublist) + # for item in sublist: + # lst.remove(item) + + + + + + + +#thread_ = [] +#for i in range(n): +# thread = threading.Thread(target=starter).start() + diff --git a/demo/str_test.py b/demo/str_test.py new file mode 100644 index 0000000..3d405f3 --- /dev/null +++ b/demo/str_test.py @@ -0,0 +1,12 @@ +lst = [1,2,3] +def GetFirstElementInList(lst): + if type(lst) != list: + print('[main(warn)]: 变量类型错误, 类型: {}'.format(type(lst))) + else: + element = lst[0] + lst.remove(element) + return element + +print(GetFirstElementInList(lst)) +print(lst) + diff --git a/demo/tqdm_test.py b/demo/tqdm_test.py new file mode 100644 index 0000000..dca9e33 --- /dev/null +++ b/demo/tqdm_test.py @@ -0,0 +1,8 @@ +from tqdm import tqdm +from time import sleep + +bar = tqdm(['p1','p2','p3','p4','p5']) +for b in bar: + sleep(0.1) + bar.set_description("处理{0}中".format(b)) + \ No newline at end of file diff --git a/demo/write_test.py b/demo/write_test.py new file mode 100644 index 0000000..9dbf173 --- /dev/null +++ b/demo/write_test.py @@ -0,0 +1,3 @@ +with open('a.txt', 'w') as f: + f.write('a') + f.close \ No newline at end of file diff --git a/main.exe b/main.exe new file mode 100644 index 0000000..c39d648 Binary files /dev/null and b/main.exe differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..725d1a2 --- /dev/null +++ b/main.py @@ -0,0 +1,501 @@ +# Author: zch9241 +# +# 版权声明:该软件(KemonoCrawler)为「zch」所有,转载请附上本声明。保留所有权利。 +# License: Apache 2.0 +# +# version: 1.3 +# +# 版本更新说明: +# v1.0: 程序首个版本 +# v1.1: 增加多线程下载功能;增加下载进度显示 +# v1.2: 修正下载进度显示;增加文件下载功能 +# v1.3: 增加下载投稿时将文本(content),图片,文件(files)打包成一个文件夹的可选下载功能(default) +# +# + + +import os +import re +import threading + +import requests +from bs4 import BeautifulSoup +from progress.bar import ChargingBar + + +def GetPageAmount(html): + """ + # 获取概览页总数 + - html: 第一概览页 + - return: 概览页总数 + """ + soup_ = BeautifulSoup(html, 'lxml') + data = soup_.select('#paginator-top menu li a') + if data == []: + print('[main(info)]: 共 1 张概览页') #只有一页 + return 1 + + else: + lastpage = str(data[-2]) #寻找最后一页的元素 + #print(lastpage) + pattern = re.compile(r'(\d+)\n+') #寻找元素中的数字文本['(\d+)‘表示提取数字,'\n+'表示换行符] + pageamount_list = re.findall(pattern = pattern, string = lastpage) + pageamount = int(pageamount_list[0]) + print('[main(info)]: 共 {} 张概览页'.format(pageamount)) + return pageamount + +def FormatPageLinks(amount): + """ + # 将页面数格式化为链接 + - amount: 页面数 + - return: 格式化后的链接 + """ + global request_url + o_list = [] + links_list = [] + for o in range(1, amount + 1): + o_ = 25 * (o - 1) + o_list.append(o_) + for item in o_list: + link = request_url + '?o=' + str(item) + links_list.append(link) + return links_list + +def GetEachPage(html_): + """ + # 获取单页的内容链接 + - html: 每一概览页 + """ + links = [] + soup = BeautifulSoup(html_, 'lxml') + data = soup.select('.fancy-link[rel="noopener noreferrer"]') + + for link in data: + link_orgi = host + link['href'] + links.append(link_orgi) + links.pop() + return links + +def GetAllpages(links_list): + """ + # 获取所有概览页的内容链接 + """ + all_links = [] + for link in links_list: + html = requests.get(link).content.decode('utf-8') + + detail_links = GetEachPage(html_ = html) + all_links = all_links + detail_links + print('[main(info)]: 所有详情页链接获取成功。长度 {}'.format(len(all_links))) + + return all_links + +def Downloader(): + """ + 下载 + """ + global all_links + global task_done + while len(all_links) > 0: + url = all_links.pop() + html = requests.get(url).content.decode('utf-8') + soup = BeautifulSoup(html, 'lxml') + data = soup.select('.post__thumbnail a') #pic + data_file_url = soup.select('.post__attachment-link') #flie_url + title = soup.select('.post__title span') #[雫ちゃん!, (Fantia)] + + + #filedownloader + if len(data_file_url) == 1: + file_url = host + data_file_url[0]['href'] + file_name = soup.select('.post__attachment-link')[0].contents[0].replace('\n', '').replace(' ','') + + file_bytes = requests.get(file_url) + + with open(file_path + file_name, 'wb') as f: + f.write(file_bytes.content) + f.close() + elif len(data_file_url) == 0: + pass + else: + file_name_list = [] + file_url_list = [] + for file_name_ in data_file_url: + file_name = file_name_.contents[0].replace('\n', '').replace(' ','') + file_name_list.append(file_name) + + for file_url_ in data_file_url: + file_url = host + file_url_['href'] + file_url_list.append(file_url) + + for i in range(len(file_name_list)): + with open(file_path + file_name_list[i], 'wb') as f: + f.write(requests.get(file_url_list[i]).content) + f.close() + + + #picdownloader + if len(title) == 1: + picname_ = re.findall(re.compile(r'\+(.*?)\+'),title[0])[0] + else: + picname_ = '' + for subtitle in title: + subtitle = str(subtitle) + picname = re.findall(re.compile(r'\+(.*?)\+'),subtitle) + picname_ = picname_ + picname[0] + + if len(data) == 1: + trumbpic = data[0] + link = trumbpic['href'] + headers = { + ':authority': 'kemono.party', + ':method': 'GET', + ':scheme': 'https', + ':path': link, + 'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,en;q=0.7,en-US;q=0.6', + 'dnt': '1', + 'cookie': '__ddg1_=hiv3SY00KuBX0ApYjacE; _pk_id.1.5bc1=91fc9c392a521176.1655783181.', + 'referer': url, + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'image', + 'sec-fetch-mode': 'no-cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36' + } + data__ = requests.get(host + link, headers) + + picfile_extensions = re.findall(re.compile(r'.*?(.*?)\?f='), data__.url) + picfile_extensions = '.' + str(picfile_extensions[0]).split('.')[-1] + pic = picname_ + picfile_extensions + for item in string: + pic = pic.replace(item, '') + #print('[main(info)]: 准备下载 {}'.format(pic), end = '') + + with open(pic_path + pic, 'wb') as f: + f.write(data__.content) + f.close() + #print('...完成') + elif len(data) == 0: #返回空列表,即没有图片 + pass + else: + i = 0 + for trumbpic in data: + i = i + 1 + link = trumbpic['href'] + headers = { + ':authority': 'kemono.party', + ':method': 'GET', + ':scheme': 'https', + ':path': link, + 'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,en;q=0.7,en-US;q=0.6', + 'dnt': '1', + 'cookie': '__ddg1_=hiv3SY00KuBX0ApYjacE; _pk_id.1.5bc1=91fc9c392a521176.1655783181.', + 'referer': url, + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'image', + 'sec-fetch-mode': 'no-cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36' + } + data__ = requests.get(host + link, headers) + + picfile_extensions = re.findall(re.compile(r'.*?(.*?)\?f='), data__.url) + picfile_extensions = '.' + str(picfile_extensions[0]).split('.')[-1] + pic = picname_ + '_' + str(i) + picfile_extensions + for item in string: + pic = pic.replace(item, '') + #print('[main(info)]: 准备下载 {}'.format(pic), end = '') + + with open(pic_path + pic, 'wb') as f: + f.write(data__.content) + f.close() + #print('...完成') + if len(data_file_url) == 0 and len(data) == 0: + print('\n----[main(info)]: 当前链接无可用加载资源,跳过... {}'.format(url)) + #循环计数 + task_done += 1 + +def Downloader_(ignore_texts): + """ + 下载(以每一个POST作为文件夹) + """ + global all_links + global task_done + + if len(all_links) > 0: + url = all_links.pop() + html = requests.get(url).content.decode('utf-8') + soup = BeautifulSoup(html, 'lxml') + data = soup.select('.post__thumbnail a') #pic + data_file_url = soup.select('.post__attachment-link') #flie_url + title = soup.select('.post__title span') #[雫ちゃん!, (Fantia)] + + + if len(title) == 1: + foldertitle = re.findall(re.compile(r'\+(.*?)\+'),title[0])[0] + else: + foldertitle = '' + for subtitle in title: + subtitle = str(subtitle) + title_ = re.findall(re.compile(r'\+(.*?)\+'),subtitle) + foldertitle = foldertitle + title_[0] + + #整合绝对路径 + Absolute_path = file_path + username + '\\' + foldertitle + '\\' + try: + os.mkdir(Absolute_path) + except FileExistsError: + pass + + #textdownloader + if ignore_texts == False: + #data_contents = soup.select('.post__content')[0].contents[0] + _data = soup.select('.post__content') + if _data == []: + pass + else: + data_contents = _data[0].contents[0] + + with open(file = Absolute_path + 'content.txt', mode = 'w', encoding = 'utf-8') as f: + f.write(data_contents) + f.close() + + #filedownloader + if len(data_file_url) == 1: + file_url = host + data_file_url[0]['href'] + file_name = soup.select('.post__attachment-link')[0].contents[0].replace('\n', '').replace(' ','') + + file_bytes = requests.get(file_url) + + with open(Absolute_path + file_name, 'wb') as f: + f.write(file_bytes.content) + f.close() + elif len(data_file_url) == 0: + pass + else: + file_name_list = [] + file_url_list = [] + for file_name_ in data_file_url: + file_name = file_name_.contents[0].replace('\n', '').replace(' ','') + file_name_list.append(file_name) + + for file_url_ in data_file_url: + file_url = host + file_url_['href'] + file_url_list.append(file_url) + + for i in range(len(file_name_list)): + with open(Absolute_path + file_name_list[i], 'wb') as f: + f.write(requests.get(file_url_list[i]).content) + f.close() + + + #picdownloader + if len(data) == 1: + trumbpic = data[0] + link = trumbpic['href'] + headers = { + ':authority': 'kemono.party', + ':method': 'GET', + ':scheme': 'https', + ':path': link, + 'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,en;q=0.7,en-US;q=0.6', + 'dnt': '1', + 'cookie': '__ddg1_=hiv3SY00KuBX0ApYjacE; _pk_id.1.5bc1=91fc9c392a521176.1655783181.', + 'referer': url, + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'image', + 'sec-fetch-mode': 'no-cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36' + } + data__ = requests.get(host + link, headers) + + picfile_extensions = re.findall(re.compile(r'.*?(.*?)\?f='), data__.url) + picfile_extensions = '.' + str(picfile_extensions[0]).split('.')[-1] + pic = '0' + picfile_extensions + for item in string: + pic = pic.replace(item, '') + #print('[main(info)]: 准备下载 {}'.format(pic), end = '') + + with open(Absolute_path + pic, 'wb') as f: + f.write(data__.content) + f.close() + #print('...完成') + elif len(data) == 0: #返回空列表,即没有图片 + pass + else: + i = 0 + for trumbpic in data: + i = i + 1 + link = trumbpic['href'] + headers = { + ':authority': 'kemono.party', + ':method': 'GET', + ':scheme': 'https', + ':path': link, + 'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,en;q=0.7,en-US;q=0.6', + 'dnt': '1', + 'cookie': '__ddg1_=hiv3SY00KuBX0ApYjacE; _pk_id.1.5bc1=91fc9c392a521176.1655783181.', + 'referer': url, + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'image', + 'sec-fetch-mode': 'no-cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36' + } + data__ = requests.get(host + link, headers) + + picfile_extensions = re.findall(re.compile(r'.*?(.*?)\?f='), data__.url) + picfile_extensions = '.' + str(picfile_extensions[0]).split('.')[-1] + pic = str(i) + picfile_extensions + for item in string: + pic = pic.replace(item, '') + #print('[main(info)]: 准备下载 {}'.format(pic), end = '') + + with open(Absolute_path + pic, 'wb') as f: + f.write(data__.content) + f.close() + #print('...完成') + + #循环计数 + task_done += 1 + +def jutils(mode): + global file_path + project_path = os.getcwd() + try: + #print('[main(info)]: 尝试创建 Downloads 文件夹...') + os.mkdir(project_path + '\\Downloads\\') + except FileExistsError: + pass + + if mode == 0: + global pic_path + + pic_path = project_path + '\\Downloads\\pics\\' + file_path = project_path + '\\Downloads\\file\\' + + try: + os.mkdir(pic_path) + except FileExistsError: + pass + try: + os.mkdir(file_path) + except FileExistsError: + pass + + else: #mode == 1 + file_path = project_path + '\\Downloads\\' + try: + os.mkdir(file_path) + except: + pass + +def switcher(mode = None,ignore_texts = None): + """ + - mode = 0: 将图片和文件分开下载 + - mode = 1: 将图片和文件打包下载,同时下载文本 + """ + while True: + if mode: + if mode == 0: + Downloader() + elif mode == 1: + if ignore_texts != None: + if ignore_texts == False: + Downloader_(False) + elif ignore_texts == True: + Downloader_(True) + else: + print('[main(wran)]: 模式错误 ignore_texts = {}'.format(ignore_texts)) + else: + print('[main(warn)]: 模式错误 mode = {}'.format(mode)) + else: + Downloader() + +def bar_(max): + task_done_ = 0 + with ChargingBar('下载进度 (共{}个链接): '.format(max), max = max) as bar: + while True: + if task_done - task_done_ > 0: + for _ in range(task_done - task_done_): + bar.next() + task_done_ = task_done + elif task_done_ == max: + break + + +if __name__ == '__main__': + #----config---- + # mode = 0: 将图片和文件分开下载 + # mode = 1: 将图片和文件打包下载 + mode = 1 + # ignore_texts: 是否下载文本(仅在 mode = 1 时可用) + ignore_texts = False + # n: 下载线程数量 + n = 5 + #----config_end---- + + pic_path = '' + file_path = '' + task_done = 0 + string = ['/',':','*','?','<','>','|','\\','"'] + host = 'https://kemono.party' + + print('[main(info)]: config: mode = {} \n ignore_texts = {} \n n = {}'.format(mode, ignore_texts, n)) + + #request_url = 'https://kemono.party/fantia/user/6561' + request_url = str(input('[main(input)]: 请输入完整链接: ')) + html = requests.get(request_url) + status = html.status_code + + #检查网络连接 + if status == 200: + print('[main(info)]: 连接成功!') + html = html.content.decode('utf-8') + else: + print('[main(warning)]: 请检查网络连接,状态码:{}'.format(status)) + + pageamount = GetPageAmount(html = html) + links_list = FormatPageLinks(amount = pageamount) + all_links = GetAllpages(links_list = links_list) + + jutils(mode = mode) + username = re.findall(re.compile(r'\n+(.*?)\n+'), BeautifulSoup(requests.get(all_links[0]).content.decode('utf-8'), 'lxml').select('.post__user-name')[0].contents[0])[0].replace(' ', '') + try: + os.mkdir(file_path + username + '\\') + except FileExistsError: + pass + + threads__ = [] + for i in range(n): + thread = threading.Thread(target = switcher, args = (mode, ignore_texts)) + threads__.append(thread) + thread_n = threading.Thread(target = bar_, args = (len(all_links),)) + + for a in threads__: + a.start() + thread_n.start() + + for b in threads__: + b.join() + thread_n.join() + + print('\n[main(info)]: 所有插画下载完成!',end = '') + os.system('pause')