Skip to content

Commit

Permalink
[test] Parallelize long-running pytest files
Browse files Browse the repository at this point in the history
  • Loading branch information
mxmlnkn committed Apr 5, 2024
1 parent 1fc7511 commit c43bc5b
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 57 deletions.
19 changes: 15 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jobs:
run: |
echo "uname -a: $( uname -a )"
echo "Shell: $SHELL"
echo "Cores: $( nproc )"
echo "Mount points:"; mount
- uses: msys2/setup-msys2@v2
Expand Down Expand Up @@ -187,11 +188,21 @@ jobs:
- name: Unit Tests
if: ${{ !startsWith( matrix.os, 'macos' ) }}
run: |
python3 -m pip install pytest
python3 -m pip install pytest pytest-xdist
for file in core/tests/test_*.py tests/test_*.py; do
# Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
# But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
pytest --disable-warnings "$file"
case "$file" in
"core/tests/test_AutoMountLayer.py"\
|"core/tests/test_BlockParallelReaders.py"\
|"core/tests/test_LibarchiveMountSource.py"\
|"core/tests/test_SQLiteIndexedTar.py")
echo "$file" # pytest-xdist seems to omit the test file name
pytest -n auto --disable-warnings "$file"
;;
*)
# Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
# But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
pytest --disable-warnings "$file"
esac
done
- name: Regression Tests
Expand Down
20 changes: 20 additions & 0 deletions core/tests/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import contextlib
import os
import tempfile


def findTestFile(relativePathOrName):
for i in range(3):
path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
if os.path.exists(path):
return path
return relativePathOrName


@contextlib.contextmanager
def copyTestFile(relativePathOrName):
with tempfile.TemporaryDirectory() as folder:
path = os.path.join(folder, os.path.basename(relativePathOrName))
with open(findTestFile(relativePathOrName), 'rb') as file, open(path, 'wb') as target:
target.write(file.read())
yield path
20 changes: 8 additions & 12 deletions core/tests/test_AutoMountLayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,15 @@
import stat
import sys

from helpers import copyTestFile

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import pytest # noqa: E402

from ratarmountcore import AutoMountLayer, openMountSource # noqa: E402


def findTestFile(relativePathOrName):
for i in range(3):
path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
if os.path.exists(path):
return path
return relativePathOrName


@pytest.mark.parametrize("parallelization", [1, 2, 4])
class TestAutoMountLayer:
@staticmethod
Expand All @@ -34,7 +28,7 @@ def test_regex_mount_point_tar(parallelization):
'transformRecursiveMountPoint': ('.*/([^/]*).tar', r'\1'),
}

with openMountSource(findTestFile("packed-100-times.tar.gz"), **options) as mountSource:
with copyTestFile("packed-100-times.tar.gz") as path, openMountSource(path, **options) as mountSource:
recursivelyMounted = AutoMountLayer(mountSource, **options)

assert recursivelyMounted.listDir('/')
Expand All @@ -56,7 +50,7 @@ def test_regex_mount_point_tar_gz(parallelization):
# other files and those other files will actually take 10x or more longer than without this test running
# before! It might be that the memory usage makes Python's garbage collector a bottleneck because of too
# many small objects?!
with openMountSource(findTestFile("compressed-100-times.tar.gz"), **options) as mountSource:
with copyTestFile("compressed-100-times.tar.gz") as path, openMountSource(path, **options) as mountSource:
recursivelyMounted = AutoMountLayer(mountSource, **options)

assert recursivelyMounted.listDir('/')
Expand All @@ -77,7 +71,7 @@ def test_regex_mount_point_gz(parallelization):
# > Recursively mounted: /ufo_805.gz
# > File "core/ratarmountcore/SQLiteIndexedTar.py", line 2085, in _detectTar
# > indexed_gzip.indexed_gzip.ZranError: zran_read returned error: ZRAN_READ_FAIL (file: n/a)
with openMountSource(findTestFile("compressed-100-times.gz"), **options) as mountSource:
with copyTestFile("compressed-100-times.gz") as path, openMountSource(path, **options) as mountSource:
recursivelyMounted = AutoMountLayer(mountSource, **options)

assert recursivelyMounted.listDir('/')
Expand All @@ -94,7 +88,9 @@ def test_file_versions(parallelization):
'parallelization': parallelization,
}

with openMountSource(findTestFile("tests/double-compressed-nested-tar.tgz.tgz"), **options) as mountSource:
with copyTestFile("tests/double-compressed-nested-tar.tgz.tgz") as path, openMountSource(
path, **options
) as mountSource:
recursivelyMounted = AutoMountLayer(mountSource, **options)

for folder in ['/', '/nested-tar.tar.gz', '/nested-tar.tar.gz/foo', '/nested-tar.tar.gz/foo/fighter']:
Expand Down
24 changes: 10 additions & 14 deletions core/tests/test_LibarchiveMountSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,19 @@

import pytest

from helpers import copyTestFile, findTestFile

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from ratarmountcore import LibarchiveMountSource # noqa: E402
from ratarmountcore.LibarchiveMountSource import IterableArchive # noqa: E402


def findTestFile(relativePathOrName):
for i in range(3):
path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
if os.path.exists(path):
return path
return relativePathOrName


class TestLibarchiveMountSource:
@staticmethod
@pytest.mark.parametrize('compression', ['7z', 'rar', 'zip'])
def test_simple_usage(compression):
with LibarchiveMountSource(findTestFile('folder-symlink.' + compression)) as mountSource:
with copyTestFile('folder-symlink.' + compression) as path, LibarchiveMountSource(path) as mountSource:
for folder in ['/', '/foo', '/foo/fighter']:
assert mountSource.getFileInfo(folder)
assert mountSource.fileVersions(folder) == 1
Expand Down Expand Up @@ -64,8 +58,8 @@ def test_simple_usage(compression):
# @pytest.mark.parametrize("compression", ["7z", "rar", "zip"])
@pytest.mark.parametrize('compression', ['zip'])
def test_password(compression):
with LibarchiveMountSource(
findTestFile('encrypted-nested-tar.' + compression), passwords=['foo']
with copyTestFile('encrypted-nested-tar.' + compression) as path, LibarchiveMountSource(
path, passwords=['foo']
) as mountSource:
for folder in ['/', '/foo', '/foo/fighter']:
assert mountSource.getFileInfo(folder)
Expand All @@ -82,7 +76,9 @@ def test_password(compression):
@staticmethod
@pytest.mark.parametrize('compression', ['bz2', 'gz', 'lrz', 'lz4', 'lzip', 'lzma', 'lzo', 'xz', 'Z', 'zst'])
def test_stream_compressed(compression):
with LibarchiveMountSource(findTestFile('simple.' + compression), passwords=['foo']) as mountSource:
with copyTestFile('simple.' + compression) as path, LibarchiveMountSource(
path, passwords=['foo']
) as mountSource:
for folder in ['/']:
assert mountSource.getFileInfo(folder)
assert mountSource.fileVersions(folder) == 1
Expand All @@ -107,7 +103,7 @@ def test_stream_compressed(compression):
],
)
def test_file_independence(path, lineSize):
with LibarchiveMountSource(findTestFile(path)) as mountSource:
with copyTestFile(path) as copiedPath, LibarchiveMountSource(copiedPath) as mountSource:
with mountSource.open(mountSource.getFileInfo('zeros-32-MiB.txt')) as fileWithZeros:
expectedZeros = b'0' * (lineSize - 1) + b'\n'
assert fileWithZeros.read(lineSize) == expectedZeros
Expand Down Expand Up @@ -198,7 +194,7 @@ def create_large_file(tarPath, compression, fileCount):
def _test_large_file(path):
t0 = time.time()
fileCount = 0
with LibarchiveMountSource(findTestFile(path)) as mountSource:
with LibarchiveMountSource(path) as mountSource:
t1 = time.time()
print(f"Opening {path} took {time.time() - t0:.3f} s") # ~5 s
# In the worst case, reading all files can take 300k * 5s / 2 = ~9 days.
Expand Down
32 changes: 13 additions & 19 deletions core/tests/test_SQLiteIndexedTar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,15 @@

import indexed_bzip2

from helpers import copyTestFile

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import pytest # noqa: E402

from ratarmountcore import RatarmountError, SQLiteIndexedTar # noqa: E402


def findTestFile(relativePathOrName):
for i in range(3):
path = os.path.sep.join([".."] * i + ["tests", relativePathOrName])
if os.path.exists(path):
return path
return relativePathOrName


@pytest.mark.parametrize("parallelization", [1, 2, 4])
class TestSQLiteIndexedTarParallelized:
@staticmethod
Expand All @@ -48,8 +42,8 @@ def _makeFolder(tarArchive, name):

@staticmethod
def test_context_manager(parallelization):
with SQLiteIndexedTar(
findTestFile('single-file.tar'), writeIndex=False, parallelization=parallelization
with copyTestFile("single-file.tar") as path, SQLiteIndexedTar(
path, writeIndex=False, parallelization=parallelization
) as indexedTar:
assert indexedTar.listDir('/')
assert indexedTar.getFileInfo('/')
Expand All @@ -58,8 +52,8 @@ def test_context_manager(parallelization):

@staticmethod
def test_tar_bz2_with_parallelization(parallelization):
with SQLiteIndexedTar(
findTestFile("2k-recursive-tars.tar.bz2"),
with copyTestFile("2k-recursive-tars.tar.bz2") as path, SQLiteIndexedTar(
path,
clearIndexCache=True,
recursive=False,
parallelization=parallelization,
Expand All @@ -79,8 +73,8 @@ def test_tar_bz2_with_parallelization(parallelization):

@staticmethod
def test_recursive_tar_bz2_with_parallelization(parallelization):
with SQLiteIndexedTar(
findTestFile("2k-recursive-tars.tar.bz2"),
with copyTestFile("2k-recursive-tars.tar.bz2") as path, SQLiteIndexedTar(
path,
clearIndexCache=True,
recursive=True,
parallelization=parallelization,
Expand All @@ -98,8 +92,8 @@ def test_recursive_tar_bz2_with_parallelization(parallelization):

@staticmethod
def test_deep_recursive(parallelization):
with SQLiteIndexedTar(
findTestFile("packed-5-times.tar.gz"),
with copyTestFile("packed-5-times.tar.gz") as path, SQLiteIndexedTar(
path,
clearIndexCache=True,
recursive=True,
parallelization=parallelization,
Expand Down Expand Up @@ -445,9 +439,9 @@ def test_appending_to_large_archive(parallelization, tmpdir):

# Create a TAR large in size as well as file count
tarPath = os.path.join(tmpdir, "foo.tar")
with indexed_bzip2.open(findTestFile("tar-with-300-folders-with-1000-files-0B-files.tar.bz2")) as file, open(
tarPath, 'wb'
) as extracted:
with copyTestFile("tar-with-300-folders-with-1000-files-0B-files.tar.bz2") as path, indexed_bzip2.open(
path
) as file, open(tarPath, 'wb') as extracted:
while True:
data = file.read(1024 * 1024)
if not data:
Expand Down
44 changes: 36 additions & 8 deletions tests/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1648,16 +1648,44 @@ if [[ -z "$CI" ]]; then

shellcheck tests/*.sh || returnError "$LINENO" 'shellcheck failed!'

# Test runtimes 2024-04-04 on Ryzen 3900X
# core/tests/test_AutoMountLayer.py in 19.05s parallelize
# core/tests/test_BlockParallelReaders.py in 57.95s parallelize
# core/tests/test_LibarchiveMountSource.py in 246.99s parallelize
# core/tests/test_RarMountSource.py in 0.08s
# core/tests/test_SQLiteBlobFile.py in 0.24s
# core/tests/test_SQLiteIndex.py in 0.10s
# core/tests/test_SQLiteIndexedTar.py in 154.08s parallelize
# core/tests/test_StenciledFile.py in 1.91s
# core/tests/test_SubvolumesMountSource.py in 0.12s
# core/tests/test_UnionMountSource.py in 0.12s
# core/tests/test_ZipMountSource.py in 0.09s
# core/tests/test_compressions.py in 0.13s
# core/tests/test_factory.py in 0.36s
# core/tests/test_utils.py in 0.22s
# tests/test_cli.py in 1.43s

# Pytest has serious performance issues. It does collect all tests beforehand and does not free memory
# after tests have finished it seems. Or maybe that memory is a bug with indexed_gzip but the problem is
# that after that all tests after that one outlier also run slower. Maybe because of a Python garbage collector
# bug? For that reason, run each test file separately.
# after tests have finished it seems. Or maybe that memory is a bug with indexed_gzip. But the problem is
# that all tests after that one outlier also run slower! Maybe because of a Python garbage collector bug?
# For that reason, run each test file separately.
for testFile in "${testFiles[@]}"; do
if [[ "${testFile//test_//}" != "$testFile" ]]; then
# Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
# But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
pytest --disable-warnings "$testFile" || returnError "$LINENO" 'pytest failed!'
fi
case "$testFile" in
"core/tests/test_AutoMountLayer.py"\
|"core/tests/test_BlockParallelReaders.py"\
|"core/tests/test_LibarchiveMountSource.py"\
|"core/tests/test_SQLiteIndexedTar.py")
echo "$testFile" # pytest-xdist seems to omit the test file name
pytest -n auto --disable-warnings "$testFile" || returnError "$LINENO" 'pytest failed!'
;;
*)
if [[ "${testFile//test_//}" != "$testFile" ]]; then
# Fusepy warns about usage of use_ns because the implicit behavior is deprecated.
# But there has been no development to fusepy for 4 years, so I think it should be fine to ignore.
pytest --disable-warnings "$testFile" || returnError "$LINENO" 'pytest failed!'
fi
;;
esac
done
fi

Expand Down

0 comments on commit c43bc5b

Please sign in to comment.