diff --git a/OLD_README.md b/OLD_README.md new file mode 100644 index 0000000..eacaf9e --- /dev/null +++ b/OLD_README.md @@ -0,0 +1,182 @@ +# Fofa-hack + +### 简介 + +非付费会员,fofa数据无限抓取版 , 配置FoFa普通用户账号密码即可使用 + +截止至 `2023-2-25` 日 ,亲测可用,如果项目不行了欢迎联系我 + +使用示例 +> python fofa.py --username fofa_hack_test --password Test123456 -k app="ThinkPHP" -t 5 +### 安装 + +```shell +git clone https://github.com/Cl0udG0d/Fofa-hack +``` + +安装运行所需的库文件,国内请加源 https://pypi.tuna.tsinghua.edu.cn/simple + +```shell +pip install -r requirements.txt +``` + +### 配置 +有三种方式配置登录账号 + +#### 1.运行传值 +传入`--username` 和 `--password` 参数 +> Fofa-hack>python fofa.py --username fofa_hack_test --password Test123 + +#### 2.配置config.py +配置`config.py`中的`fofa_account`,支持多账号 +```json +fofa_account=[ + { + "fofa_username" : "test@email.com", + "fofa_password" : "12345678" + }, + { + "fofa_username" : "test1@email.com", + "fofa_password" : "12345678" + } +] +``` + +也就是你的FOFA账号密码 + +#### 3.配置fofa_cookie.txt文件 +将下图中的cookie直接复制到`fofa_cookie.txt`文件里,这样在启动的时候Fofa-hack就会识别到并且直接使用cookie爬取了 + +注意不是`Authorization` + +`cookie`的位置如下 + ![](https://github.com/Cl0udG0d/Fofa-script/blob/master/images/2.png) + +### 运行 + +运行`fofa.py` , `-k`或`--keyword` 参数传入搜索关键字 + +更多参数查看 `--help` + +> python3 fofa.py --help + +```shell +Fofa-hack>python fofa.py --help + + ____ ____ ____ ____ + | ===|/ () \| ===|/ () \ + |__| \____/|__| /__/\__\ + _ _ ____ ____ __ __ + | |_| | / () \ / (__`| |/ / + |_| |_|/__/\__\\____)|__|\__\ V1.3.0 + +usage: fofa.py [-h] [--timesleep TIMESLEEP] --keyword KEYWORD + [--username USERNAME] [--password PASSWORD] [--endpage ENDPAGE] + [--level LEVEL] + +Fofa-hack v1.3.0 使用说明 + +optional arguments: + -h, --help show this help message and exit + --timesleep TIMESLEEP, -t TIMESLEEP + 爬取每一页等待秒数,防止IP被Ban,默认为3 + --keyword KEYWORD, -k KEYWORD + fofa搜索关键字,默认为test + --username USERNAME, -u USERNAME + fofa用户名 + --password PASSWORD, -p PASSWORD + fofa密码 + --endpage ENDPAGE, -e ENDPAGE + 爬取结束页码 + --level LEVEL, -l LEVEL + 爬取等级: 1-3 ,数字越大内容越详细,默认为 1 +``` + +爬取的结果会存储到`md5(搜索关键字)_运行时间戳.txt`文件中 + +### 测试 + +输入 搜索关键字 `app="ThinkPHP"`,等待秒数为5的情况下,下载1-50页数据经过测试无问题,经过自动去重之后剩余497条 + +### 赞赏列表 + +详情请见[SPONSOR](docs/SPONSOR.md) + +### 使用问题集合 + +详情请见[QUESTIONS](docs/QUESTIONS.md) + ++ [ERROR: Could not build wheels for opencv-python-headless, which is required to install pyproject.toml-based projects](docs/QUESTIONS.md#opencv-python错误) ++ [ddddocr错误解决](docs/QUESTIONS.md#ddddocr错误解决) ++ [FOFA综合语法使用](docs/QUESTIONS.md#FOFA综合语法使用) + +### 更新日志 + +详情请见[CHANGELOG](docs/CHANGELOG.md) + +### TODO List +
+TODO + + + + + + + + + + + + + + + + + + + + + + + + + +
名称简介
支持代理池使用代理池的方式防止FOFA断开连接
支持多种导出格式支持json、txt、excel等方式导出结果
编写图形化界面生成可执行文件运行
增加程序稳定性防止程序因为各种情况运行失败或者被ban的情况
内容去重去除重复的url信息
+
+ +### 贡献者 + + + + + + +
+ + Cl0udG0d +
+ 潘一二三 +
+
+ + wanswu +
+ Wans +
+
+ + +### END + +网络乞丐在线乞讨 +
+ PNG +
+ +建了一个微信的安全交流群,欢迎添加我微信备注`进群`,一起来聊天吹水哇,以及一个会发布安全相关内容的公众号,欢迎关注 :) + +
+ GIF + GIF +
diff --git a/README.md b/README.md index 33d5f1e..01e9696 100644 --- a/README.md +++ b/README.md @@ -2,101 +2,15 @@ ### 简介 -非付费会员,fofa数据无限抓取版 , 配置FoFa普通用户账号密码即可使用 +`2023-2-26`因FOFA对于普通用户的限制,停止该项目的更新 +![fofa](./images/fofa.jpg) -截止至 `2023-2-25` 日 ,亲测可用,如果项目不行了欢迎联系我 +结论是 `FOFA-Hack` 对于普通用户每个月最多能获取到3000条数据,在这个数据量内该项目是有效的 -使用示例 -> python fofa.py --username fofa_hack_test --password Test123456 -k app="ThinkPHP" -t 5 -### 安装 +不过对我来说这样 `FOFA-Hack` 的存在就没有意义了,所以在没有找到更好的方式之前,该项目不会再进行更新 +![result](./images/result.jpg) -```shell -git clone https://github.com/Cl0udG0d/Fofa-hack -``` - -安装运行所需的库文件,国内请加源 https://pypi.tuna.tsinghua.edu.cn/simple - -```shell -pip install -r requirements.txt -``` - -### 配置 -有三种方式配置登录账号 - -#### 1.运行传值 -传入`--username` 和 `--password` 参数 -> Fofa-hack>python fofa.py --username fofa_hack_test --password Test123 - -#### 2.配置config.py -配置`config.py`中的`fofa_account`,支持多账号 -```json -fofa_account=[ - { - "fofa_username" : "test@email.com", - "fofa_password" : "12345678" - }, - { - "fofa_username" : "test1@email.com", - "fofa_password" : "12345678" - } -] -``` - -也就是你的FOFA账号密码 - -#### 3.配置fofa_cookie.txt文件 -将下图中的cookie直接复制到`fofa_cookie.txt`文件里,这样在启动的时候Fofa-hack就会识别到并且直接使用cookie爬取了 - -注意不是`Authorization` - -`cookie`的位置如下 - ![](https://github.com/Cl0udG0d/Fofa-script/blob/master/images/2.png) - -### 运行 - -运行`fofa.py` , `-k`或`--keyword` 参数传入搜索关键字 - -更多参数查看 `--help` - -> python3 fofa.py --help - -```shell -Fofa-hack>python fofa.py --help - - ____ ____ ____ ____ - | ===|/ () \| ===|/ () \ - |__| \____/|__| /__/\__\ - _ _ ____ ____ __ __ - | |_| | / () \ / (__`| |/ / - |_| |_|/__/\__\\____)|__|\__\ V1.3.0 - -usage: fofa.py [-h] [--timesleep TIMESLEEP] --keyword KEYWORD - [--username USERNAME] [--password PASSWORD] [--endpage ENDPAGE] - [--level LEVEL] - -Fofa-hack v1.3.0 使用说明 - -optional arguments: - -h, --help show this help message and exit - --timesleep TIMESLEEP, -t TIMESLEEP - 爬取每一页等待秒数,防止IP被Ban,默认为3 - --keyword KEYWORD, -k KEYWORD - fofa搜索关键字,默认为test - --username USERNAME, -u USERNAME - fofa用户名 - --password PASSWORD, -p PASSWORD - fofa密码 - --endpage ENDPAGE, -e ENDPAGE - 爬取结束页码 - --level LEVEL, -l LEVEL - 爬取等级: 1-3 ,数字越大内容越详细,默认为 1 -``` - -爬取的结果会存储到`md5(搜索关键字)_运行时间戳.txt`文件中 - -### 测试 - -输入 搜索关键字 `app="ThinkPHP"`,等待秒数为5的情况下,下载1-50页数据经过测试无问题,经过自动去重之后剩余497条 +原来的[README](./OLD_README.md)此处查看 ### 赞赏列表 @@ -130,10 +44,6 @@ optional arguments: 支持多种导出格式 支持json、txt、excel等方式导出结果 - - 支持多种爬取内容 - 添加支持title、status、headers等内容 - 编写图形化界面 生成可执行文件运行 @@ -142,6 +52,10 @@ optional arguments: 增加程序稳定性 防止程序因为各种情况运行失败或者被ban的情况 + + 内容去重 + 去除重复的url信息 + diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f4d19b9..fec4429 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG 代码变更记录 +### 1.3.1 + ++ 支持多种导出方式 txt,json,csv ++ 因FOFA对普通用户的限制停止该项目的更新 ### 1.3.0 + `README`添加示例运行参考 diff --git a/fofa.py b/fofa.py index 0b77133..01f52cd 100644 --- a/fofa.py +++ b/fofa.py @@ -16,6 +16,7 @@ import argparse from tookit.levelData import LevelData +from tookit.outputData import OutputData host_list = [] timestamp_list = [] @@ -159,6 +160,7 @@ def init(self): parser.add_argument('--password', '-p', help='fofa密码') parser.add_argument('--endpage', '-e', help='爬取结束页码') parser.add_argument('--level', '-l', help='爬取等级: 1-3 ,数字越大内容越详细,默认为 1') + parser.add_argument('--output', '-o', help='输出格式:txt、json、csv,默认为txt') args = parser.parse_args() config.TimeSleep = int(args.timesleep) print("[*] 爬取延时: {}s".format(config.TimeSleep)) @@ -179,9 +181,16 @@ def init(self): print("[*] 爬取页码数: {}".format(self.want_page)) self.level=args.level if args.level else "1" self.levelData=LevelData(self.level) + + + self.output = args.output if args.output else "txt" + print("[*] 输出格式为: {}".format(self.output)) + + global filename - filename = "{}_{}.txt".format(unit.md5(config.SearchKEY), int(time.time())) + filename = "{}_{}.{}".format(unit.md5(config.SearchKEY), int(time.time()),self.output) print("[*] 存储文件名: {}".format(filename)) + self.outputData = OutputData(filename, pattern=self.output) return def get_page_num(self, search_key,cookie): diff --git a/fofa_cookie.txt b/fofa_cookie.txt index 8cce6c4..e69de29 100644 --- a/fofa_cookie.txt +++ b/fofa_cookie.txt @@ -1 +0,0 @@ -befor_router=; fofa_token=eyJhbGciOiJIUzUxMiIsImtpZCI6Ik5XWTVZakF4TVRkalltSTJNRFZsWXpRM05EWXdaakF3TURVMlkyWTNZemd3TUdRd1pUTmpZUT09IiwidHlwIjoiSldUIn0.eyJpZCI6MjQ4MjI4LCJtaWQiOjEwMDE0MTU1OSwidXNlcm5hbWUiOiJmb2ZhX2hhY2tfdGVzdCIsImV4cCI6MTY3NzU3MjY3MH0.VBTqAd1imJfC9UsnlzW-n64ktBBxdY0FHI0uoB262wavOwzQaSFjdXlvL8MmqpDZ17-JNxTTdU-11NhQkJN_eg; is_flag_login=0; user=%7B%22id%22%3A248228%2C%22mid%22%3A100141559%2C%22is_admin%22%3Afalse%2C%22username%22%3A%22fofa_hack_test%22%2C%22nickname%22%3A%22fofa_hack_test%22%2C%22email%22%3A%22fofa_test%40163.com%22%2C%22avatar_medium%22%3A%22https%3A%2F%2Fnosec.org%2Fmissing.jpg%22%2C%22avatar_thumb%22%3A%22https%3A%2F%2Fnosec.org%2Fmissing.jpg%22%2C%22key%22%3A%2281d94208789ae707007ce80b5e637d7e%22%2C%22rank_name%22%3A%22%E6%B3%A8%E5%86%8C%E7%94%A8%E6%88%B7%22%2C%22rank_level%22%3A0%2C%22company_name%22%3A%22fofa_hack_test%22%2C%22coins%22%3A0%2C%22can_pay_coins%22%3A0%2C%22fofa_point%22%3A0%2C%22credits%22%3A1%2C%22expiration%22%3A%22-%22%2C%22login_at%22%3A1677313470%2C%22data_limit%22%3Anull%7D; _nosec_cas_session=c1NDeVE0WnFXK3R1VWVQMTJ6Tmtrdm5Vd3ZYUjI4ZHBYNDdTQU5lR040UFNjY3FPLzB0NHY1RUE2K2VtNDBGaGFKWGRSMDRaUFBscGpuRkE4Y3RuMXI4ZWhlTG1CNGsvS3JHc003czF6b2hldU1lY0RuY09NQUw1MWY3a1FMRUpqbjVNNEpva1g0ZURCOGJqVk5XNXZJdjdRaERRUTVOV2VldTdza2o0TXdibTkwMDZGS3AyR2M2QXRjSWxDSUdGcUdWbUZsSGNjczlIMmdmYkxCOWx1TkEvQ0g3YmtYdXhJM0pVeEVJVEJzND0tLTUrNVJvRExBTnU1VlF3Z2lydk82WHc9PQ%3D%3D--b113e6e0fc7e89b7f39834cbe7088eb21a5d496b; tgt=TGC-16773134699994-KZOHdbLgQQQUgT4AIs0qy9zkdEXyferuY4JZBPLJ; \ No newline at end of file diff --git a/images/fofa.jpg b/images/fofa.jpg new file mode 100644 index 0000000..41a6026 Binary files /dev/null and b/images/fofa.jpg differ diff --git a/images/result.jpg b/images/result.jpg new file mode 100644 index 0000000..a4b8881 Binary files /dev/null and b/images/result.jpg differ diff --git a/test.py b/test.py index 8cb4751..63bc81d 100644 --- a/test.py +++ b/test.py @@ -5,41 +5,28 @@ date : 2023/2/12 """ import datetime +import json import time -import requests -from lxml import etree -def stripList(data): - newData=[] - for i in data: - newData.append(i.strip()) - return newData -request_url = "https://fofa.info/result?qbase64=dGhpbmtwaHA%3D" -rep = requests.get(request_url) -tree = etree.HTML(rep.text) -leftList = tree.xpath('//div[@class="hsxa-meta-data-list-main-left hsxa-fl"]') -print(leftList) -for i in range(len(leftList)): - title=leftList[i].xpath('p[@class="hsxa-two-line"]/text()') - ip=leftList[i].xpath('p[2]/a/text()') - city=leftList[i].xpath('p[3]/a/text()') - asn = leftList[i].xpath('p[4]/a/text()') - organization=leftList[i].xpath('p[5]/a/text()') - server=leftList[i].xpath('p[@class="hsxa-list-span-wrap"]/a/span/text()') - print("title: "+str(title[0].strip())) - print("ip: "+ip[0].strip()) - print("city: " + city[0].strip()) - print("asn: " + asn[0].strip()) - print("organization: " + organization[0].strip()) - print("server: " + str(stripList(server))) +def outputJson(data): + with open("test.json", 'w', encoding="utf-8") as f: + dic={"1":"2"} + data.append(dic) + json.dump(data, f) -rightList=tree.xpath('//div[@class="hsxa-meta-data-list-main-right hsxa-fr"]') -for i in range(len(rightList)): - rep=rightList[i].xpath('//div[@class="el-scrollbar__view"]/span/text()') - print(rep[0].strip()) +def readAllJsonData(): + with open("test.json", 'r',encoding="utf-8") as load_f: + load_dict = json.load(load_f) + print(type(load_dict)) + print(load_dict) + return -portlist=tree.xpath('//div[@class="hsxa-fr"]/a/text()') -for i in portlist: - print(i.strip()) \ No newline at end of file +data=[ + {"2":"3"} +] + +outputJson(data) + +readAllJsonData() \ No newline at end of file diff --git a/tookit/levelData.py b/tookit/levelData.py index dc03f33..c37e9b7 100644 --- a/tookit/levelData.py +++ b/tookit/levelData.py @@ -66,7 +66,6 @@ def startSpider(self, rep): self.tree = etree.HTML(rep.text) self.selectSpiderRule() - return self.outputData() def selectSpiderRule(self): if self.level == self.PRIMARYDATA: @@ -154,35 +153,5 @@ def spiderHighData(self): tempDic["rep"] = rep[0].strip() self.formatData.append(tempDic) - def outputData(self): - """ - 返回当页的爬取结果 - :return: - """ - return - # def formatDataFunc(self, *args, **kwargs): - # """ - # level 1 : - # url - # level 2: - # url status - # level 3: - # url status - # :param args: - # :param kwargs: - # """ - # list1 = ["https://www.baidu.com", "https://www.sina.com"] - # list2 = ["200", "302"] - # list3 = ["nginx", "apache"] - # # print(int(time.time())) - # urllength = len(args[0]) - # length = len(args) - # - # for i in range(urllength): - # tempDic = {} - # tempDic["url"] = args[0][i] - # - # self.formatData.append(tempDic) - # - # print("[*] 当页数据:" + self.formatData) + diff --git a/tookit/outputData.py b/tookit/outputData.py new file mode 100644 index 0000000..f1e4bc3 --- /dev/null +++ b/tookit/outputData.py @@ -0,0 +1,60 @@ +import json + + +class OutputData: + + ''' + 常量 + ''' + CONST_TXT = "txt" + CONST_JSON = "json" + CONST_CSV = "csv" + STANDARD_LIST = [CONST_TXT, CONST_JSON, CONST_CSV] + + ENCODING_TYPE="utf-8" + + def __init__(self,filename,pattern="txt"): + self.filename=filename + self.pattern = pattern if self.checkPatternStandard(pattern) else "txt" + + + def checkPatternStandard(self, pattern): + """ + 检测pattern是否合规 + :param pattern: + :return: + """ + return pattern in self.STANDARD_LIST + + def output(self,data): + self.filename="{}.{}".format(self.filename,self.pattern) + if self.pattern==self.CONST_TXT: + self.outputTxt(data) + elif self.pattern==self.CONST_JSON: + pass + else: + pass + + def outputTxt(self,data): + for i in data: + with open(self.filename, 'a+', encoding=self.ENCODING_TYPE) as f: + f.write(str(i) + "\n") + + + def readAllJsonData(self): + with open("../{}".format(self.filename), 'r+', encoding=self.ENCODING_TYPE) as load_f: + load_dict = json.load(load_f) + print(load_dict) + return + + def outputJson(self,newdata): + listdata=self.readAllJsonData() + if type(listdata) != list: + listdata = [] + for data in newdata: + listdata.append(data) + with open(self.filename, 'w', encoding=self.ENCODING_TYPE) as f: + json.dump(listdata, f) + + def outputCsv(self): + return