From b57660cf42a0f437b08735730620e20ed7516946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A3=8E=E4=B9=8B=E5=87=8C=E6=AE=87?= Date: Fri, 26 Mar 2021 22:03:58 +0800 Subject: [PATCH] =?UTF-8?q?=E5=85=BC=E5=AE=B9=E4=B8=8B=E8=BD=BD=E9=A1=B5?= =?UTF-8?q?=E9=9D=A2=E4=B8=8D=E7=9B=B4=E6=8E=A5=E8=BF=94=E5=9B=9E=E9=A1=B5?= =?UTF-8?q?=E9=9D=A2=E4=BF=A1=E6=81=AF=EF=BC=8C=E8=80=8C=E6=98=AF=E5=85=88?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E4=B8=80=E4=B8=AA=E8=87=AA=E5=8A=A8=E8=AE=A1?= =?UTF-8?q?=E7=AE=97acw=5Fsc=5F=5Fv2=E5=90=8E=E5=8A=A0=E5=85=A5cookie?= =?UTF-8?q?=E7=84=B6=E5=90=8E=E8=87=AA=E5=8A=A8reload=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E6=AD=A3=E5=B8=B8=E9=A1=B5=E9=9D=A2=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lanzou/api/core.py | 10 ++++++++ lanzou/api/utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/lanzou/api/core.py b/lanzou/api/core.py index 31cfe47..dfd3cfb 100644 --- a/lanzou/api/core.py +++ b/lanzou/api/core.py @@ -433,6 +433,16 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail: if not first_page: return FileDetail(LanZouCloud.NETWORK_ERROR, pwd=pwd, url=share_url) + if "var arg1=" in first_page.text: + # 在页面被过多访问或其他情况下,有时候会先返回一个加密的页面,其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面 + # 若该页面进行了js加密,则进行解密,计算acw_sc__v2,并加入cookie + # 可以测试的链接:https://fzls.lanzous.com/iDBM7nbkzti ,使用隐私模式打开(确保无cookie),会注意到第一次访问时会返回形如下面这样的页面,会计算acw_sc__v2并设置cookie后reload,之后会返回正常的页面 + acw_sc__v2 = calc_acw_sc__v2(first_page.text) + self._session.cookies.set("acw_sc__v2", acw_sc__v2) + first_page = self._get(share_url) # 文件分享页面(第一页) + if not first_page: + return FileDetail(LanZouCloud.NETWORK_ERROR, pwd=pwd, url=share_url) + first_page = remove_notes(first_page.text) # 去除网页里的注释 if '文件取消' in first_page or '文件不存在' in first_page: return FileDetail(LanZouCloud.FILE_CANCELLED, pwd=pwd, url=share_url) diff --git a/lanzou/api/utils.py b/lanzou/api/utils.py index 3ee1a58..8e43674 100644 --- a/lanzou/api/utils.py +++ b/lanzou/api/utils.py @@ -12,7 +12,7 @@ import requests __all__ = ['logger', 'remove_notes', 'name_format', 'time_format', 'is_name_valid', 'is_file_url', - 'is_folder_url', 'big_file_split', 'un_serialize', 'let_me_upload', 'auto_rename'] + 'is_folder_url', 'big_file_split', 'un_serialize', 'let_me_upload', 'auto_rename', 'calc_acw_sc__v2'] # 调试日志设置 logger = logging.getLogger('lanzou') @@ -207,3 +207,63 @@ def auto_rename(file_path) -> str: while f"{fname_no_ext}({count}){ext}" in flist: count += 1 return fpath + os.sep + fname_no_ext + '(' + str(count) + ')' + ext + +def calc_acw_sc__v2(html_text:str)-> str: + arg1 = re.search('arg1=\'([0-9A-Z]+)\'', html_text).group(1) + acw_sc__v2 = hexXor(unsbox(arg1), "3000176000856006061501533003690027800375") + + return acw_sc__v2 + +# 参考自 https://zhuanlan.zhihu.com/p/228507547 +def unsbox(str_arg): + v1 = [15, 35, 29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36] + v2 = ["" for v in v1] + res = '' + for idx in range(0, len(str_arg)): + v3 = str_arg[idx] + for idx2 in range(len(v1)): + if v1[idx2] == idx + 1: + v2[idx2] = v3 + + res = ''.join(v2) + return res + + +def hexXor(str_arg, args): + res = '' + idx = 0 + for idx in range(0, min(len(str_arg), len(args)), 2): + v1 = int(str_arg[idx:idx + 2], 16) + v2 = int(args[idx:idx + 2], 16) + v3 = format(v1 ^ v2, 'x') + if len(v3) == 1: + v3 = '0' + v3 + res += v3 + + return res + +def test_acw_sc__v2(): + # 这种情况应该是在一个链接被下载过多次的时候才会出现 + res = calc_acw_sc__v2(""" + + + + """) + expected = "605da825b26c2e61e46c98234f69a6175d4efb96" + + print(f"预期:{expected}") + print(f"实际:{res}") + print(res == expected) + +if __name__ == '__main__': + test_acw_sc__v2()