Skip to content

Commit

Permalink
update perlmutter gpu script
Browse files Browse the repository at this point in the history
  • Loading branch information
alchem0x2A committed Oct 3, 2022
1 parent 9f7a0ba commit 6bec166
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 5 deletions.
16 changes: 15 additions & 1 deletion tests/_common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,29 @@ def get_cpu_cores():
break
return cores

def get_oversubscribe():
over = False
with VaspInteractive() as test_calc:
args = test_calc.make_command().split()
for i, param in enumerate(args):
if "oversubscribe" in param:
over = True
break
return over

def skip_probe(min_cores=8):

def skip_probe(min_cores=8, skip_oversubscribe=False):
"""Test if single step needs to be skipped"""
cores = get_cpu_cores()
do_test = (cores >= min_cores)
if do_test is False:
pytest.skip(
f"Skipping test with ncores < {min_cores}", allow_module_level=False
)
elif skip_oversubscribe and get_oversubscribe():
pytest.skip(
f"Skipping due to oversubscription", allow_module_level=False
)


def skip_slurm(reverse=False):
Expand Down
66 changes: 66 additions & 0 deletions tests/nersc_scripts/job_test_perlmutter_shifter.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash -l
#SBATCH -N 1
#SBATCH -C cpu
#SBATCH -q regular
#SBATCH -A m2755
#SBATCH -t 02:00:00

CONDA_ROOT="/global/homes/t/ttian20/.conda/envs/vpi"
GIT_REPO="ulissigroup/vasp-interactive"
if [ -z "$GIT_REF" ]
then
GIT_REF="main"
fi
# conda activate vpi
export PATH=${CONDA_ROOT}/bin:$PATH

uid=`uuidgen`
root=$SCRATCH/vpi-runner/$uid
mkdir -p $root && cd $root
jobid=${SLURM_JOB_ID}
echo "Job ID $jobid"
echo "Running tests under $root"
gh repo clone $GIT_REPO
cd vasp-interactive
git checkout $GIT_REF
echo "Check to $GIT_REF"
export MODPATH=`realpath .`
# export TMPDIR=$SCRATCH

export SHIFTER_IMAGE=ulissigroup/kubeflow_vasp:beef_vdw

res="true"
for ver in "vasp.5.4.4.pl2" "vasp.6.3.0_pgi_mkl"
do
echo "Testing VaspInteractive on $ver"
for f in tests/test*.py
do
abs_f=`realpath $f`
VASP_COMMAND="mpirun -np 32 --bind-to core --oversubscribe /opt/${ver}/bin/vasp_std"
shifter --image=$SHIFTER_IMAGE --env=VASP_COMMAND="$VASP_COMMAND" --env=PYTHONPATH="$MODPATH" --env=TEMPDIR="$SCRATCH" -- pytest -svv ${abs_f}
# pytest -svv $f
if [[ $? != 0 ]]
then
res="false"
killall vasp_std
break
fi
killall vasp_std
done
module unload vasp
if [[ $res == "false" ]]
then
break
fi
done

if [[ $res == "true" ]]
then
echo "All test pass!"
else
echo "Test fail. See output"
fi


gh workflow run perlmutter_shifter_status.yml -f signal=$res -f jobid=$jobid -f path=$root

4 changes: 2 additions & 2 deletions tests/test_mpi_pause.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

def test_pause_cpu_percent():
"""Send pause signal to mpi process and see if drops below threshold"""
skip_probe(4)
skip_probe(4, skip_oversubscribe=True)
# skip_slurm()
h2 = h2_root.copy()
threshold_high = 75.0
Expand All @@ -55,7 +55,7 @@ def test_pause_cpu_percent():

def test_pause_context():
"""Context mode"""
skip_probe(4)
skip_probe(4, skip_oversubscribe=True)
# skip_slurm()
h2 = h2_root.copy()
threshold_high = 75.0
Expand Down
4 changes: 2 additions & 2 deletions tests/test_reverse_mpi_pause.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


def test_paused_close():
skip_probe(4)
skip_probe(4, skip_oversubscribe=True)
# skip_slurm()
"""Context mode"""
h2 = h2_root.copy()
Expand All @@ -45,7 +45,7 @@ def test_paused_close():

def test_paused_close_context():
"""Context mode"""
skip_probe(4)
skip_probe(4, skip_oversubscribe=True)
# skip_slurm()
h2 = h2_root.copy()
with tempfile.TemporaryDirectory() as tmpdir:
Expand Down

0 comments on commit 6bec166

Please sign in to comment.