Skip to content

Commit

Permalink
Fix #120: 匹配番号时忽略小于指定大小的文件
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuukiy committed Dec 17, 2023
1 parent e349650 commit 9f99515
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 12 deletions.
2 changes: 2 additions & 0 deletions core/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ scan_dir =
media_ext = 3gp;avi;f4v;flv;iso;m2ts;m4v;mkv;mov;mp4;mpeg;rm;rmvb;ts;vob;webm;wmv
# 扫描影片文件时忽略指定的文件夹(以.开头的文件夹不需要设置也会被忽略)
ignore_folder = #recycle;#整理完成;不要扫描
# 匹配番号时忽略小于指定大小的文件(以MiB为单位,0表示禁用此功能)
ignore_video_file_less_than = 232

[Network]
# 是否启用代理
Expand Down
1 change: 1 addition & 0 deletions core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def norm_int(cfg: Config):
cfg.Network.timeout = cfg.getint('Network', 'timeout')
cfg.NamingRule.max_path_len = min(cfg.getint('NamingRule', 'max_path_len'), 256)
cfg.NamingRule.max_actress_count = max(cfg.getint('NamingRule', 'max_actress_count'), 1)
cfg.File.ignore_video_file_less_than = int(cfg.getfloat('File', 'ignore_video_file_less_than') * 2**20)


def norm_tuples(cfg: Config):
Expand Down
38 changes: 30 additions & 8 deletions core/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,50 @@ def scan_movies(root: str) -> List[Movie]:

# 扫描所有影片文件并获取它们的番号
dic = {} # avid: [abspath1, abspath2...]
failed_path_ls = []
for dirpath, dirnames, filenames in os.walk(root):
for name in dirnames.copy():
if name.startswith('.') or name in cfg.File.ignore_folder:
dirnames.remove(name)
match_videos, unmatch_videos = {}, {}
for file in filenames:
ext = os.path.splitext(file)[1].lower()
if ext in cfg.File.media_ext:
fullpath = os.path.join(dirpath, file)
dvdid = get_id(fullpath)
dvdid = get_id(file)
cid = get_cid(fullpath)
# 如果文件名能匹配到cid,那么将cid视为有效id,因为此时dvdid多半是错的
avid = cid if cid else dvdid
if avid:
if avid in dic:
dic[avid].append(fullpath)
else:
dic[avid] = [fullpath]
match_videos[fullpath] = avid
dic.setdefault(avid, []).append(fullpath)
else:
fail = Movie('无法识别番号')
fail.files = [fullpath]
failed_items.append(fail)
unmatch_videos[fullpath] = None
# 如果一个文件夹内有视频能匹配到番号,同时也有视频无法匹配到番号,则后者很可能是广告
match_cnt, unmatch_cnt = len(match_videos), len(unmatch_videos)
if match_cnt == 0:
# 所有视频都没有匹配到番号,则尝试从文件夹寻找番号并作为所有视频的结果
dvdid = get_id(dirpath)
if dvdid:
for fullpath in unmatch_videos.keys():
dic.setdefault(dvdid, []).append(fullpath)
else:
for fullpath in unmatch_videos.keys():
failed_path_ls.append(fullpath)
logger.error(f"无法提取影片番号: '{fullpath}'")
else:
if unmatch_cnt > 0:
for fullpath in unmatch_videos.keys():
filesize = os.path.getsize(fullpath)
if filesize < cfg.File.ignore_video_file_less_than:
logger.debug(f"忽略匹配不到番号的小文件: '{fullpath}'")
else:
failed_path_ls.append(fullpath)
logger.error(f"无法提取影片番号: '{fullpath}'")
for fullpath in failed_path_ls:
fail = Movie('无法识别番号')
fail.files = [fullpath]
failed_items.append(fail)
# 检查是否有多部影片对应同一个番号
non_slice_dup = {} # avid: [abspath1, abspath2...]
for avid, files in dic.copy().items():
Expand Down
39 changes: 35 additions & 4 deletions unittest/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ def prepare_files(files):
Args:
files (list of tuple): 文件列表,仅接受相对路径
"""
for i in files:
path = os.path.join(tmp_folder, i)
if not isinstance(files, dict):
files = {i:1024 for i in files}
for name, size in files.items():
path = os.path.join(tmp_folder, name)
folder = os.path.split(path)[0]
if folder and (not os.path.exists(folder)):
os.makedirs(folder)
with open(path, 'wt', encoding='utf-8') as f:
f.write(path)
os.system(f'fsutil file createnew "{path}" {size}')
yield
rmtree(tmp_folder)
return
Expand Down Expand Up @@ -172,3 +173,33 @@ def test_scan_movies__mix_data(prepare_files):
basenames = [os.path.basename(i) for i in movies[0].files]
assert basenames[0] == 'movie.mp4'


# 文件夹以番号命名,文件夹内同时有带番号的影片和广告
@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': 1, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 1048576, 'ABC-123/Advertisement.mp4': 243269631}])
def test_scan_movies__1_video_with_ad(prepare_files):
movies = scan_movies(tmp_folder)
assert len(movies) == 1
assert movies[0].dvdid == 'ABC-123'
assert len(movies[0].files) == 1


# 文件夹以番号命名,文件夹内同时有带番号的影片和超出阈值的广告
@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': 1, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 1048576, 'ABC-123/Advertisement.mp4': 2**30}])
def test_scan_movies__1_video_with_large_ad(prepare_files):
movies = scan_movies(tmp_folder)
assert len(movies) == 1
assert movies[0].dvdid == 'ABC-123'
assert len(movies[0].files) == 1
import core.file
failed = core.file.failed_items
assert len(failed) == 1 and len(failed[0].files) == 1
assert os.path.basename(failed[0].files[0]) == 'Advertisement.mp4'


# 文件夹内同时有多部带番号的影片和广告
@pytest.mark.parametrize('files', [{'ABC-123.mp4': 1, 'DEF-456.mp4': 1, '广告1.mp4': 1024, '广告2.mp4': 1048576, 'Advertisement.mp4': 243269631}])
def test_scan_movies__n_video_with_ad(prepare_files):
movies = scan_movies(tmp_folder)
assert len(movies) == 2
assert movies[0].dvdid == 'ABC-123' and movies[1].dvdid == 'DEF-456'
assert all(len(i.files) == 1 for i in movies)

0 comments on commit 9f99515

Please sign in to comment.