Skip to content

Commit

Permalink
Features | 更新部分功能,详情看CHANGELOG
Browse files Browse the repository at this point in the history
  • Loading branch information
panyi committed Feb 12, 2023
1 parent 80a14dd commit c89c356
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 41 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# CHANGELOG 代码变更记录

### 1.2.1

+ 添加`QUESTIONS.md`问题集合文件
+ 增加断点重连机制,重连最大次数为3,提高系统运行稳定性
+ 增加多账户
+ 部分文件的微小更新(fofa_useragent.py、README.md....)

### 1.2.0

+ 添加CHANGELOG文件
Expand Down
9 changes: 9 additions & 0 deletions QUESTIONS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# 问题集合

> 罗列可能使用过程中遇到的问题
### ERROR: Could not build wheels for opencv-python-headless, which is required to install pyproject.toml-based projects

解决方法

[【Bug】ERROR: Could not build wheels for opencv-python, which is required to install pyproject.toml-ba](https://blog.csdn.net/AugustMe/article/details/126402049)
45 changes: 38 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@

### 简介

基于[fofa_spider-1.0.5](https://github.com/FightingForWhat/fofa_spider-1.0.5) - 非付费会员,fofa数据无限抓取版 的梅开二度,配置普通用户cookie即可使用
基于[fofa_spider-1.0.5](https://github.com/FightingForWhat/fofa_spider-1.0.5) - 非付费会员,fofa数据无限抓取版 的梅开二度,配置普通用户账号密码即可使用

截止至 `2023-2-12` 日 ,亲测可用,如果项目不行了欢迎联系我



### 使用

```shell
Expand All @@ -20,9 +18,19 @@ git clone https://github.com/Cl0udG0d/Fofa-hack
pip install -r requirements.txt
```

配置`config.py`中的
+ fofa_username = ""
+ fofa_password = ""
配置`config.py`中的`fofa_account`,支持多账号
```json
fofa_account=[
{
"fofa_username" : "test@email.com",
"fofa_password" : "12345678"
},
{
"fofa_username" : "test1@email.com",
"fofa_password" : "12345678"
}
]
```

也就是你的FOFA账号密码(老版本的直接复制cookie的方式弃用)

Expand All @@ -32,13 +40,36 @@ pip install -r requirements.txt
爬取的结果会存储到`搜索关键字_运行时间戳.txt`文件中


### 参数
<details>
<summary>config参数详情</summary>
<table >
<tr>
<td>参数值</td>
<td>释义</td>
</tr>
<tr>
<td>VERSION_NUM</td>
<td>Fofa-hack 版本号</td>
</tr>
<tr>
<td>MAX_LOGIN_RETRY_NUM</td>
<td>登录最大重试次数</td>
</tr>
<tr>
<td>MAX_MATCH_RETRY_NUM</td>
<td>页面URL获取最大重试次数</td>
</tr>
</table>
</details>

### 测试

输入 搜索关键字 `app="ThinkPHP"`,等待秒数为5的情况下,下载1-50页数据经过测试无问题,经过自动去重之后剩余497条

### 使用问题集合

详情请见[QUESTIONS](QUESTIONS.md)

### 更新日志

Expand Down
13 changes: 10 additions & 3 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
# @Github: https://github.com/Cl0udG0d


fofa_username = ""
fofa_password = ""
fofa_account=[
{
"fofa_username" : "test@email.com",
"fofa_password" : "12345678"
},
]



SearchKEY = ""
Expand Down Expand Up @@ -40,6 +45,8 @@
]

# Fofa-hack 版本号
VERSION_NUM="1.2.0"
VERSION_NUM="1.2.1"
# 登录最大重试次数
MAX_LOGIN_RETRY_NUM=3
# 页面URL获取最大重试次数
MAX_MATCH_RETRY_NUM=3
147 changes: 118 additions & 29 deletions fofa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@

filename=""

# 账户指针
ACCOUNT_INDEX=0





import re, requests
Expand Down Expand Up @@ -48,14 +53,25 @@ def __init__(self):
'''.format(config.VERSION_NUM))

def fofa_captcha(self, src):
"""
识别FOFA登录界面验证码
:param src:
:return:
"""
import ddddocr
ocr = ddddocr.DdddOcr(show_ad=False)
ocr = ddddocr.DdddOcr()

captcha_api = f'https://i.nosec.org{src}'
resp = self.session.get(url=captcha_api, headers=fofa_useragent.getFofaCaptchaHeaders())
return ocr.classification(resp.content)

def fofa_login(self, fofa_username, fofa_password):
"""
使用FOFA账号密码进行登录
:param fofa_username:
:param fofa_password:
:return:
"""
print('尝试登录')
TEMP_RETRY_NUM=0
while TEMP_RETRY_NUM<config.MAX_LOGIN_RETRY_NUM:
Expand Down Expand Up @@ -95,15 +111,20 @@ def fofa_login(self, fofa_username, fofa_password):
with open('fofa_cookie.txt', 'w') as f:
f.write(tempstr)
return self.session.cookies, 1
except:
except Exception as e:
# print(e)
TEMP_RETRY_NUM+=1
print('[-] 第{}次尝试登录'.format(TEMP_RETRY_NUM))
pass
print('[-] FOFA登录失败,请检查相关配置,即将退出程序')
exit(0)
print('[-] FOFA登录失败,即将切换账号进行尝试')
raise

def check_login(self, cookies):

"""
检测cookie是否生效
:param cookies:
:return:
"""
resp = requests.get(url='https://fofa.info/result?qbase64=MQ==&page=2&page_size=10', headers=fofa_useragent.getCheckHeaders(cookies))
tree = etree.HTML(resp.text)
urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
Expand Down Expand Up @@ -136,7 +157,7 @@ def init(self):

def get_page_num(self, search_key,cookie):
# 获取页码
headers_use = self.headers(cookie)
headers_use = fofa_useragent.getFofaPageNumHeaders(cookie)
searchbs64 = base64.b64encode(f'{search_key}'.encode()).decode()
print("[*] 爬取页面为:https://fofa.info/result?qbase64=" + searchbs64)
html = requests.get(url="https://fofa.info/result?qbase64=" + searchbs64, headers=headers_use).text
Expand All @@ -160,28 +181,81 @@ def getTimeList(self, text):
return timelist

def fofa_spider_page(self, page, search_key, searchbs64, headers_use, turn_num):
# 获取
"""
获取某一页内的URL数据
:rtype: object
"""
global host_list
global timestamp_list

TEMP_RETRY_NUM=0
print("[*] 正在爬取第" + str(5 * int(turn_num) + int(page)) + "页")
request_url = 'https://fofa.info/result?qbase64=' + searchbs64 + '&full=false&page=' + str(
page) + "&page_size=10"
# print(f'request_url:{request_url}')
rep = requests.get(request_url, headers=headers_use)
tree = etree.HTML(rep.text)
urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
timelist = self.getTimeList(rep.text)
print(urllist)
global ACCOUNT_INDEX
while ACCOUNT_INDEX < len(config.fofa_account):
temp_headers=headers_use if ACCOUNT_INDEX==0 else self.getNewHeaders()
while TEMP_RETRY_NUM < config.MAX_MATCH_RETRY_NUM:
try:
request_url = 'https://fofa.info/result?qbase64=' + searchbs64 + '&full=false&page=' + str(
page) + "&page_size=10"
# print(f'request_url:{request_url}')
rep = requests.get(request_url, headers=temp_headers)
tree = etree.HTML(rep.text)
urllist = tree.xpath('//span[@class="hsxa-host"]/a/@href')
timelist = self.getTimeList(rep.text)
print(urllist)

for i in urllist:
with open(filename, 'a+') as f:
f.write(i + "\n")
host_list.extend(urllist)
timestamp_list.extend(timelist)

time.sleep(config.TimeSleep)
return
except:
TEMP_RETRY_NUM+=1
print('[-] 第{}次尝试获取页面URL'.format(TEMP_RETRY_NUM))
pass
self.refresh_cookie()

print('[-] FOFA资源获取重试超过最大次数,程序退出')
exit(0)

for i in urllist:
with open(filename, 'a+') as f:
f.write(i + "\n")
host_list.extend(urllist)
timestamp_list.extend(timelist)

time.sleep(config.TimeSleep)
return

def refresh_cookie(self):
"""
当前获取不到数据的时候,该方法会重置cookie数据进行账号切换
1.重置cookie文件
2.切换账号重新获取cookie
3.返回新的cookie以供爬取
:return:
"""
global ACCOUNT_INDEX
ACCOUNT_INDEX += 1

while ACCOUNT_INDEX < len(config.fofa_account):
username = config.fofa_account[ACCOUNT_INDEX]["fofa_username"]
password = config.fofa_account[ACCOUNT_INDEX]["fofa_password"]
# print("username:{};password:{}".format(username, password))
try:
if self.fofa_login(username, password)[1] == 1:
cookie=self.cookie_info()
return fofa_useragent.getFofaPageNumHeaders(cookie)
except Exception as e:
print(e)
ACCOUNT_INDEX += 1
if ACCOUNT_INDEX < len(config.fofa_account):
print("[*] 切换账号:{}".format(config.fofa_account[ACCOUNT_INDEX]["fofa_username"]))
pass
else:
break
print("[-] 账号无法登录,程序退出")
exit(0)

def getNewHeaders(self):
cookie = self.cookie_info()
return fofa_useragent.getFofaPageNumHeaders(cookie)


def fofa_spider(self, search_key, searchbs64, headers_use):
global host_list
Expand Down Expand Up @@ -264,12 +338,27 @@ def main(self):
urllist, cookie = self.check_login(self.cookie_info())
if urllist == 0:
print("未登录")
if self.fofa_login(config.fofa_username, config.fofa_password)[1] == 1:
print('开始搜索')
self.run(self.cookie_info())
print('退出')
else:
exit(0)
global ACCOUNT_INDEX
while ACCOUNT_INDEX < len(config.fofa_account):
username=config.fofa_account[ACCOUNT_INDEX]["fofa_username"]
password = config.fofa_account[ACCOUNT_INDEX]["fofa_password"]
# print("username:{};password:{}".format(username,password))
try:
if self.fofa_login(username,password)[1] == 1:
print('[*] 开始搜索')
self.run(self.cookie_info())
print('[*] 运行结束')
exit(0)
except Exception as e:
print(e)
ACCOUNT_INDEX += 1
if ACCOUNT_INDEX < len(config.fofa_account):
print("[*] 切换账号:{}".format(config.fofa_account[ACCOUNT_INDEX]["fofa_username"]))
pass
else:
break
print("[-] 账号无法登录,程序退出")
exit(0)
else:
print('已经登录')
self.run(cookie)
Expand Down
11 changes: 10 additions & 1 deletion fofa_useragent.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,13 @@ def getCheckHeaders(cookies):
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Cookie': cookies,
}
return check_headers
return check_headers


def getFofaPageNumHeaders(cookie):
headers_use = {
'User-Agent': getFakeUserAgent(),
'Accept': 'application/json, text/plain, */*',
"cookie": cookie.encode("utf-8").decode("latin1")
}
return headers_use
Binary file removed images/2.png
Binary file not shown.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ idna==3.4
lxml==4.9.2
requests==2.27.1
urllib3==1.26.13
ddddocr==1.4.7
ddddocr==1.2.0

0 comments on commit c89c356

Please sign in to comment.