Fix #120: 匹配番号时忽略小于指定大小的文件

Yuukiy · Dec 17, 2023 · 9f99515 · 9f99515
1 parent e349650
commit 9f99515
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 12 deletions.
diff --git a/core/config.ini b/core/config.ini
@@ -15,6 +15,8 @@ scan_dir =
 media_ext = 3gp;avi;f4v;flv;iso;m2ts;m4v;mkv;mov;mp4;mpeg;rm;rmvb;ts;vob;webm;wmv
 # 扫描影片文件时忽略指定的文件夹（以.开头的文件夹不需要设置也会被忽略）
 ignore_folder = #recycle;#整理完成;不要扫描
+# 匹配番号时忽略小于指定大小的文件（以MiB为单位，0表示禁用此功能）
+ignore_video_file_less_than = 232
 
 [Network]
 # 是否启用代理

diff --git a/core/config.py b/core/config.py
@@ -233,6 +233,7 @@ def norm_int(cfg: Config):
     cfg.Network.timeout = cfg.getint('Network', 'timeout')
     cfg.NamingRule.max_path_len = min(cfg.getint('NamingRule', 'max_path_len'), 256)
     cfg.NamingRule.max_actress_count = max(cfg.getint('NamingRule', 'max_actress_count'), 1)
+    cfg.File.ignore_video_file_less_than = int(cfg.getfloat('File', 'ignore_video_file_less_than') * 2**20)
 
 
 def norm_tuples(cfg: Config):

diff --git a/core/file.py b/core/file.py
@@ -28,28 +28,50 @@ def scan_movies(root: str) -> List[Movie]:
 
     # 扫描所有影片文件并获取它们的番号
     dic = {}    # avid: [abspath1, abspath2...]
+    failed_path_ls = []
     for dirpath, dirnames, filenames in os.walk(root):
         for name in dirnames.copy():
             if name.startswith('.') or name in cfg.File.ignore_folder:
                 dirnames.remove(name)
+        match_videos, unmatch_videos = {}, {}
         for file in filenames:
             ext = os.path.splitext(file)[1].lower()
             if ext in cfg.File.media_ext:
                 fullpath = os.path.join(dirpath, file)
-                dvdid = get_id(fullpath)
+                dvdid = get_id(file)
                 cid = get_cid(fullpath)
                 # 如果文件名能匹配到cid，那么将cid视为有效id，因为此时dvdid多半是错的
                 avid = cid if cid else dvdid
                 if avid:
-                    if avid in dic:
-                        dic[avid].append(fullpath)
-                    else:
-                        dic[avid] = [fullpath]
+                    match_videos[fullpath] = avid
+                    dic.setdefault(avid, []).append(fullpath)
                 else:
-                    fail = Movie('无法识别番号')
-                    fail.files = [fullpath]
-                    failed_items.append(fail)
+                    unmatch_videos[fullpath] = None
+        # 如果一个文件夹内有视频能匹配到番号，同时也有视频无法匹配到番号，则后者很可能是广告
+        match_cnt, unmatch_cnt = len(match_videos), len(unmatch_videos)
+        if match_cnt == 0:
+            # 所有视频都没有匹配到番号，则尝试从文件夹寻找番号并作为所有视频的结果
+            dvdid = get_id(dirpath)
+            if dvdid:
+                for fullpath in unmatch_videos.keys():
+                    dic.setdefault(dvdid, []).append(fullpath)
+            else:
+                for fullpath in unmatch_videos.keys():
+                    failed_path_ls.append(fullpath)
                     logger.error(f"无法提取影片番号: '{fullpath}'")
+        else:
+            if unmatch_cnt > 0:
+                for fullpath in unmatch_videos.keys():
+                    filesize = os.path.getsize(fullpath)
+                    if filesize < cfg.File.ignore_video_file_less_than:
+                        logger.debug(f"忽略匹配不到番号的小文件: '{fullpath}'")
+                    else:
+                        failed_path_ls.append(fullpath)
+                        logger.error(f"无法提取影片番号: '{fullpath}'")
+    for fullpath in failed_path_ls:
+        fail = Movie('无法识别番号')
+        fail.files = [fullpath]
+        failed_items.append(fail)
     # 检查是否有多部影片对应同一个番号
     non_slice_dup = {}  # avid: [abspath1, abspath2...]
     for avid, files in dic.copy().items():

diff --git a/unittest/test_file.py b/unittest/test_file.py
@@ -19,13 +19,14 @@ def prepare_files(files):
     Args:
         files (list of tuple): 文件列表，仅接受相对路径
     """
-    for i in files:
-        path = os.path.join(tmp_folder, i)
+    if not isinstance(files, dict):
+        files = {i:1024 for i in files}
+    for name, size in files.items():
+        path = os.path.join(tmp_folder, name)
         folder = os.path.split(path)[0]
         if folder and (not os.path.exists(folder)):
             os.makedirs(folder)
-        with open(path, 'wt', encoding='utf-8') as f:
-            f.write(path)
+        os.system(f'fsutil file createnew "{path}" {size}')
     yield
     rmtree(tmp_folder)
     return
@@ -172,3 +173,33 @@ def test_scan_movies__mix_data(prepare_files):
     basenames = [os.path.basename(i) for i in movies[0].files]
     assert basenames[0] == 'movie.mp4'
 
+
+# 文件夹以番号命名，文件夹内同时有带番号的影片和广告
+@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': 1, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 1048576, 'ABC-123/Advertisement.mp4': 243269631}])
+def test_scan_movies__1_video_with_ad(prepare_files):
+    movies = scan_movies(tmp_folder)
+    assert len(movies) == 1
+    assert movies[0].dvdid == 'ABC-123'
+    assert len(movies[0].files) == 1
+
+
+# 文件夹以番号命名，文件夹内同时有带番号的影片和超出阈值的广告
+@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': 1, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 1048576, 'ABC-123/Advertisement.mp4': 2**30}])
+def test_scan_movies__1_video_with_large_ad(prepare_files):
+    movies = scan_movies(tmp_folder)
+    assert len(movies) == 1
+    assert movies[0].dvdid == 'ABC-123'
+    assert len(movies[0].files) == 1
+    import core.file
+    failed = core.file.failed_items
+    assert len(failed) == 1 and len(failed[0].files) == 1
+    assert os.path.basename(failed[0].files[0]) == 'Advertisement.mp4'
+
+
+# 文件夹内同时有多部带番号的影片和广告
+@pytest.mark.parametrize('files', [{'ABC-123.mp4': 1, 'DEF-456.mp4': 1, '广告1.mp4': 1024, '广告2.mp4': 1048576, 'Advertisement.mp4': 243269631}])
+def test_scan_movies__n_video_with_ad(prepare_files):
+    movies = scan_movies(tmp_folder)
+    assert len(movies) == 2
+    assert movies[0].dvdid == 'ABC-123' and movies[1].dvdid == 'DEF-456'
+    assert all(len(i.files) == 1 for i in movies)