-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.py
57 lines (53 loc) · 1.75 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import requests
import os
from bs4 import BeautifulSoup
from queue import Queue
from threading import Thread
from multiprocessing import Process,Queue
base_url = "域名/.git/"
q = Queue(maxsize=99999)
# 爬取根域名
q.put("")
def file_tree():
global q
while not q.empty():
path = q.get()
url = "%s%s"%(base_url,path)
print("file tree:%s" % url)
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
for t in soup.find_all('a'):
href = t.attrs["href"]
if href == "../":
print("../ continue")
continue
if href.endswith("/"):
q.put("%s%s"%(path,href))
else:
file_url = "%s%s" % (url, href)
dir_path = os.path.join('create/.git/',path)
file_path = os.path.join(dir_path,href)
if os.path.exists(file_path):
try:
with open(file_path,"r") as f:
data = f.read()
if "系统发生错误" in data:
continue
except UnicodeDecodeError:
pass
r = requests.get(file_url)
if r.status_code != 200:
print("error:%s"% file_url)
if not os.path.exists(os.path.join('create',path)):
os.makedirs(os.path.join('create',path))
print("download %s" % file_path)
with open(file_path, 'wb') as f:
f.write(r.content)
q = file_tree()
pool = []
for i in range(16):
pool.append(Thread(target=file_tree))
for i in pool:
i.start()
for i in pool:
i.join()