[test] Parallelize long-running pytest files

mxmlnkn · Apr 5, 2024 · c43bc5b · c43bc5b
1 parent 1fc7511
commit c43bc5b
Show file tree

Hide file tree

Showing 6 changed files with 102 additions and 57 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -96,6 +96,7 @@ jobs:
       run: |
         echo "uname -a: $( uname -a )"
         echo "Shell: $SHELL"
+        echo "Cores: $( nproc )"
         echo "Mount points:"; mount
 
     - uses: msys2/setup-msys2@v2
@@ -187,11 +188,21 @@ jobs:
     - name: Unit Tests
       if: ${{ !startsWith( matrix.os, 'macos' ) }}
       run: |
-        python3 -m pip install pytest
+        python3 -m pip install pytest pytest-xdist
         for file in core/tests/test_*.py tests/test_*.py; do
-            # Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
-            # But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
-            pytest --disable-warnings "$file"
+            case "$file" in
+                "core/tests/test_AutoMountLayer.py"\
+                |"core/tests/test_BlockParallelReaders.py"\
+                |"core/tests/test_LibarchiveMountSource.py"\
+                |"core/tests/test_SQLiteIndexedTar.py")
+                    echo "$file"  # pytest-xdist seems to omit the test file name
+                    pytest -n auto --disable-warnings "$file"
+                    ;;
+                *)
+                  # Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
+                  # But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
+                  pytest --disable-warnings "$file"
+            esac
         done
 
     - name: Regression Tests

diff --git a/core/tests/helpers.py b/core/tests/helpers.py
@@ -0,0 +1,20 @@
+import contextlib
+import os
+import tempfile
+
+
+def findTestFile(relativePathOrName):
+    for i in range(3):
+        path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
+        if os.path.exists(path):
+            return path
+    return relativePathOrName
+
+
+@contextlib.contextmanager
+def copyTestFile(relativePathOrName):
+    with tempfile.TemporaryDirectory() as folder:
+        path = os.path.join(folder, os.path.basename(relativePathOrName))
+        with open(findTestFile(relativePathOrName), 'rb') as file, open(path, 'wb') as target:
+            target.write(file.read())
+        yield path
diff --git a/core/tests/test_AutoMountLayer.py b/core/tests/test_AutoMountLayer.py
@@ -8,21 +8,15 @@
 import stat
 import sys
 
+from helpers import copyTestFile
+
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
 import pytest  # noqa: E402
 
 from ratarmountcore import AutoMountLayer, openMountSource  # noqa: E402
 
 
-def findTestFile(relativePathOrName):
-    for i in range(3):
-        path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
-        if os.path.exists(path):
-            return path
-    return relativePathOrName
-
-
 @pytest.mark.parametrize("parallelization", [1, 2, 4])
 class TestAutoMountLayer:
     @staticmethod
@@ -34,7 +28,7 @@ def test_regex_mount_point_tar(parallelization):
             'transformRecursiveMountPoint': ('.*/([^/]*).tar', r'\1'),
         }
 
-        with openMountSource(findTestFile("packed-100-times.tar.gz"), **options) as mountSource:
+        with copyTestFile("packed-100-times.tar.gz") as path, openMountSource(path, **options) as mountSource:
             recursivelyMounted = AutoMountLayer(mountSource, **options)
 
             assert recursivelyMounted.listDir('/')
@@ -56,7 +50,7 @@ def test_regex_mount_point_tar_gz(parallelization):
         #      other files and those other files will actually take 10x or more longer than without this test running
         #      before! It might be that the memory usage makes Python's garbage collector a bottleneck because of too
         #      many small objects?!
-        with openMountSource(findTestFile("compressed-100-times.tar.gz"), **options) as mountSource:
+        with copyTestFile("compressed-100-times.tar.gz") as path, openMountSource(path, **options) as mountSource:
             recursivelyMounted = AutoMountLayer(mountSource, **options)
 
             assert recursivelyMounted.listDir('/')
@@ -77,7 +71,7 @@ def test_regex_mount_point_gz(parallelization):
         # > Recursively mounted: /ufo_805.gz
         # >  File "core/ratarmountcore/SQLiteIndexedTar.py", line 2085, in _detectTar
         # > indexed_gzip.indexed_gzip.ZranError: zran_read returned error: ZRAN_READ_FAIL (file: n/a)
-        with openMountSource(findTestFile("compressed-100-times.gz"), **options) as mountSource:
+        with copyTestFile("compressed-100-times.gz") as path, openMountSource(path, **options) as mountSource:
             recursivelyMounted = AutoMountLayer(mountSource, **options)
 
             assert recursivelyMounted.listDir('/')
@@ -94,7 +88,9 @@ def test_file_versions(parallelization):
             'parallelization': parallelization,
         }
 
-        with openMountSource(findTestFile("tests/double-compressed-nested-tar.tgz.tgz"), **options) as mountSource:
+        with copyTestFile("tests/double-compressed-nested-tar.tgz.tgz") as path, openMountSource(
+            path, **options
+        ) as mountSource:
             recursivelyMounted = AutoMountLayer(mountSource, **options)
 
             for folder in ['/', '/nested-tar.tar.gz', '/nested-tar.tar.gz/foo', '/nested-tar.tar.gz/foo/fighter']:

diff --git a/core/tests/test_LibarchiveMountSource.py b/core/tests/test_LibarchiveMountSource.py
@@ -14,25 +14,19 @@
 
 import pytest
 
+from helpers import copyTestFile, findTestFile
+
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
 from ratarmountcore import LibarchiveMountSource  # noqa: E402
 from ratarmountcore.LibarchiveMountSource import IterableArchive  # noqa: E402
 
 
-def findTestFile(relativePathOrName):
-    for i in range(3):
-        path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
-        if os.path.exists(path):
-            return path
-    return relativePathOrName
-
-
 class TestLibarchiveMountSource:
     @staticmethod
     @pytest.mark.parametrize('compression', ['7z', 'rar', 'zip'])
     def test_simple_usage(compression):
-        with LibarchiveMountSource(findTestFile('folder-symlink.' + compression)) as mountSource:
+        with copyTestFile('folder-symlink.' + compression) as path, LibarchiveMountSource(path) as mountSource:
             for folder in ['/', '/foo', '/foo/fighter']:
                 assert mountSource.getFileInfo(folder)
                 assert mountSource.fileVersions(folder) == 1
@@ -64,8 +58,8 @@ def test_simple_usage(compression):
     # @pytest.mark.parametrize("compression", ["7z", "rar", "zip"])
     @pytest.mark.parametrize('compression', ['zip'])
     def test_password(compression):
-        with LibarchiveMountSource(
-            findTestFile('encrypted-nested-tar.' + compression), passwords=['foo']
+        with copyTestFile('encrypted-nested-tar.' + compression) as path, LibarchiveMountSource(
+            path, passwords=['foo']
         ) as mountSource:
             for folder in ['/', '/foo', '/foo/fighter']:
                 assert mountSource.getFileInfo(folder)
@@ -82,7 +76,9 @@ def test_password(compression):
     @staticmethod
     @pytest.mark.parametrize('compression', ['bz2', 'gz', 'lrz', 'lz4', 'lzip', 'lzma', 'lzo', 'xz', 'Z', 'zst'])
     def test_stream_compressed(compression):
-        with LibarchiveMountSource(findTestFile('simple.' + compression), passwords=['foo']) as mountSource:
+        with copyTestFile('simple.' + compression) as path, LibarchiveMountSource(
+            path, passwords=['foo']
+        ) as mountSource:
             for folder in ['/']:
                 assert mountSource.getFileInfo(folder)
                 assert mountSource.fileVersions(folder) == 1
@@ -107,7 +103,7 @@ def test_stream_compressed(compression):
         ],
     )
     def test_file_independence(path, lineSize):
-        with LibarchiveMountSource(findTestFile(path)) as mountSource:
+        with copyTestFile(path) as copiedPath, LibarchiveMountSource(copiedPath) as mountSource:
             with mountSource.open(mountSource.getFileInfo('zeros-32-MiB.txt')) as fileWithZeros:
                 expectedZeros = b'0' * (lineSize - 1) + b'\n'
                 assert fileWithZeros.read(lineSize) == expectedZeros
@@ -198,7 +194,7 @@ def create_large_file(tarPath, compression, fileCount):
     def _test_large_file(path):
         t0 = time.time()
         fileCount = 0
-        with LibarchiveMountSource(findTestFile(path)) as mountSource:
+        with LibarchiveMountSource(path) as mountSource:
             t1 = time.time()
             print(f"Opening {path} took {time.time() - t0:.3f} s")  # ~5 s
             # In the worst case, reading all files can take 300k * 5s / 2 = ~9 days.

diff --git a/core/tests/test_SQLiteIndexedTar.py b/core/tests/test_SQLiteIndexedTar.py
@@ -17,21 +17,15 @@
 
 import indexed_bzip2
 
+from helpers import copyTestFile
+
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
 import pytest  # noqa: E402
 
 from ratarmountcore import RatarmountError, SQLiteIndexedTar  # noqa: E402
 
 
-def findTestFile(relativePathOrName):
-    for i in range(3):
-        path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
-        if os.path.exists(path):
-            return path
-    return relativePathOrName
-
-
 @pytest.mark.parametrize("parallelization", [1, 2, 4])
 class TestSQLiteIndexedTarParallelized:
     @staticmethod
@@ -48,8 +42,8 @@ def _makeFolder(tarArchive, name):
 
     @staticmethod
     def test_context_manager(parallelization):
-        with SQLiteIndexedTar(
-            findTestFile('single-file.tar'), writeIndex=False, parallelization=parallelization
+        with copyTestFile("single-file.tar") as path, SQLiteIndexedTar(
+            path, writeIndex=False, parallelization=parallelization
         ) as indexedTar:
             assert indexedTar.listDir('/')
             assert indexedTar.getFileInfo('/')
@@ -58,8 +52,8 @@ def test_context_manager(parallelization):
 
     @staticmethod
     def test_tar_bz2_with_parallelization(parallelization):
-        with SQLiteIndexedTar(
-            findTestFile("2k-recursive-tars.tar.bz2"),
+        with copyTestFile("2k-recursive-tars.tar.bz2") as path, SQLiteIndexedTar(
+            path,
             clearIndexCache=True,
             recursive=False,
             parallelization=parallelization,
@@ -79,8 +73,8 @@ def test_tar_bz2_with_parallelization(parallelization):
 
     @staticmethod
     def test_recursive_tar_bz2_with_parallelization(parallelization):
-        with SQLiteIndexedTar(
-            findTestFile("2k-recursive-tars.tar.bz2"),
+        with copyTestFile("2k-recursive-tars.tar.bz2") as path, SQLiteIndexedTar(
+            path,
             clearIndexCache=True,
             recursive=True,
             parallelization=parallelization,
@@ -98,8 +92,8 @@ def test_recursive_tar_bz2_with_parallelization(parallelization):
 
     @staticmethod
     def test_deep_recursive(parallelization):
-        with SQLiteIndexedTar(
-            findTestFile("packed-5-times.tar.gz"),
+        with copyTestFile("packed-5-times.tar.gz") as path, SQLiteIndexedTar(
+            path,
             clearIndexCache=True,
             recursive=True,
             parallelization=parallelization,
@@ -445,9 +439,9 @@ def test_appending_to_large_archive(parallelization, tmpdir):
 
         # Create a TAR large in size as well as file count
         tarPath = os.path.join(tmpdir, "foo.tar")
-        with indexed_bzip2.open(findTestFile("tar-with-300-folders-with-1000-files-0B-files.tar.bz2")) as file, open(
-            tarPath, 'wb'
-        ) as extracted:
+        with copyTestFile("tar-with-300-folders-with-1000-files-0B-files.tar.bz2") as path, indexed_bzip2.open(
+            path
+        ) as file, open(tarPath, 'wb') as extracted:
             while True:
                 data = file.read(1024 * 1024)
                 if not data:

diff --git a/tests/runtests.sh b/tests/runtests.sh
@@ -1648,16 +1648,44 @@ if [[ -z "$CI" ]]; then
 
     shellcheck tests/*.sh || returnError "$LINENO" 'shellcheck failed!'
 
+    # Test runtimes 2024-04-04 on Ryzen 3900X
+    # core/tests/test_AutoMountLayer.py         in 19.05s   parallelize
+    # core/tests/test_BlockParallelReaders.py   in 57.95s   parallelize
+    # core/tests/test_LibarchiveMountSource.py  in 246.99s  parallelize
+    # core/tests/test_RarMountSource.py         in 0.08s
+    # core/tests/test_SQLiteBlobFile.py         in 0.24s
+    # core/tests/test_SQLiteIndex.py            in 0.10s
+    # core/tests/test_SQLiteIndexedTar.py       in 154.08s  parallelize
+    # core/tests/test_StenciledFile.py          in 1.91s
+    # core/tests/test_SubvolumesMountSource.py  in 0.12s
+    # core/tests/test_UnionMountSource.py       in 0.12s
+    # core/tests/test_ZipMountSource.py         in 0.09s
+    # core/tests/test_compressions.py           in 0.13s
+    # core/tests/test_factory.py                in 0.36s
+    # core/tests/test_utils.py                  in 0.22s
+    # tests/test_cli.py                         in 1.43s
+
     # Pytest has serious performance issues. It does collect all tests beforehand and does not free memory
-    # after tests have finished it seems. Or maybe that memory is a bug with indexed_gzip but the problem is
-    # that after that all tests after that one outlier also run slower. Maybe because of a Python garbage collector
-    # bug? For that reason, run each test file separately.
+    # after tests have finished it seems. Or maybe that memory is a bug with indexed_gzip. But the problem is
+    # that all tests after that one outlier also run slower! Maybe because of a Python garbage collector bug?
+    # For that reason, run each test file separately.
     for testFile in "${testFiles[@]}"; do
-        if [[ "${testFile//test_//}" != "$testFile" ]]; then
-            # Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
-            # But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
-            pytest --disable-warnings "$testFile" || returnError "$LINENO" 'pytest failed!'
-        fi
+        case "$testFile" in
+            "core/tests/test_AutoMountLayer.py"\
+            |"core/tests/test_BlockParallelReaders.py"\
+            |"core/tests/test_LibarchiveMountSource.py"\
+            |"core/tests/test_SQLiteIndexedTar.py")
+                echo "$testFile"  # pytest-xdist seems to omit the test file name
+                pytest -n auto --disable-warnings "$testFile" || returnError "$LINENO" 'pytest failed!'
+                ;;
+            *)
+                if [[ "${testFile//test_//}" != "$testFile" ]]; then
+                    # Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
+                    # But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
+                    pytest --disable-warnings "$testFile" || returnError "$LINENO" 'pytest failed!'
+                fi
+                ;;
+        esac
     done
 fi