Skip to content

Commit ce3d2bc

Browse files
authored
Merge pull request #99 from aturner-epcc/aturner-epcc/cirrus-ex-utils
Ports R, xthi and container tests to Cirrus EX
2 parents 9e68dfa + cef0485 commit ce3d2bc

File tree

5 files changed

+201
-104
lines changed

5 files changed

+201
-104
lines changed

tests/utils/R/rscript.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class RscriptInstall(RscriptBase):
2929
"""Tests installing packages with R on the login nodes"""
3030

3131
descr = "Tests that R packages can be installed locally. Requires internet access."
32-
valid_systems = ["archer2:login"]
32+
valid_systems = ["archer2:login", "cirrus-ex:login"]
3333
local = True
3434
executable_opts = ["install_benchmark_packages.R"]
3535
libs_path = None
@@ -61,7 +61,7 @@ class RscriptRun(RscriptBase):
6161
Uses packages installed locally
6262
in a previous test.
6363
"""
64-
valid_systems = ["archer2:login", "archer2:compute"]
64+
valid_systems = ["archer2:login", "archer2:compute", "cirrus-ex:login", "cirrus-ex:compute"]
6565
executable_opts = ["run_benchmark.R"]
6666
library = fixture(RscriptInstall, scope="session")
6767
libs_path = None
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Containerised OSU benchmarks
4+
5+
These tests checks that containers can be run with MPI. Basic performance checks are also included.
6+
"""
7+
8+
import os
9+
10+
import reframe as rfm
11+
import reframe.utility.sanity as sn
12+
13+
14+
class PullOSUContainerARCHER2(rfm.RunOnlyRegressionTest):
15+
"""Pull a container containing an osu benchmark - GLIBC compatible with ARCHER2 OS"""
16+
17+
descr = "Pull an OSU benchmark container from github "
18+
valid_systems = ["archer2:login"]
19+
valid_prog_environs = ["PrgEnv-gnu"]
20+
executable = "singularity"
21+
image_name = "archer2_osu"
22+
executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"]
23+
local = True
24+
25+
@sanity_function
26+
def validate_download(self):
27+
"""Sanity Check"""
28+
return sn.assert_not_found("error", self.stderr)
29+
30+
31+
class PullOSUContainerCirrusEX(rfm.RunOnlyRegressionTest):
32+
"""Pull a container containing an osu benchmark - GLIBC compatible with CirrusEX OS"""
33+
34+
descr = "Pull an OSU benchmark container from github "
35+
valid_systems = ["cirrus-ex:login"]
36+
valid_prog_environs = ["PrgEnv-gnu"]
37+
executable = "apptainer"
38+
image_name = "osu-benchmarks"
39+
image_version = "7.5.1"
40+
executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}:{image_version}"]
41+
local = True
42+
43+
@sanity_function
44+
def validate_download(self):
45+
"""Sanity Check"""
46+
return sn.assert_not_found("error", self.stderr)
47+
48+
49+
@rfm.simple_test
50+
class OSUContainerTestARCHER2(rfm.RunOnlyRegressionTest):
51+
"""Run the OSU benchmark in a container"""
52+
53+
descr = "OSU benchmarks in a container"
54+
osu_container = fixture(PullOSUContainerARCHER2, scope="session")
55+
valid_systems = ["archer2:compute"]
56+
valid_prog_environs = ["PrgEnv-gnu"]
57+
num_tasks = 256
58+
num_tasks_per_node = 128
59+
num_cpus_per_task = 1
60+
time_limit = "5m"
61+
62+
env_vars = {
63+
"OMP_NUM_THREADS": str(num_cpus_per_task),
64+
"OMP_PLACES": "cores",
65+
"SINGULARITYENV_LD_LIBRARY_PATH": "/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib-abi-mpich:\
66+
/opt/cray/pe/mpich/8.1.23/gtl/lib:/opt/cray/libfabric/1.12.1.2.2.0.0/lib64:\
67+
/opt/cray/pe/gcc-libs:/opt/cray/pe/gcc-libs:/opt/cray/pe/lib64:/opt/cray/pe/lib64:\
68+
/opt/cray/xpmem/default/lib64:/usr/lib64/libibverbs:/usr/lib64:/usr/lib64",
69+
"SINGULARITY_BIND": "/opt/cray,/var/spool,\
70+
/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib-abi-mpich:\
71+
/opt/cray/pe/mpich/8.1.23/gtl/lib,/etc/host.conf,\
72+
/etc/libibverbs.d/mlx5.driver,/etc/libnl/classid,\
73+
/etc/resolv.conf,/opt/cray/libfabric/1.12.1.2.2.0.0/lib64/libfabric.so.1,\
74+
/opt/cray/pe/gcc-libs/libatomic.so.1,/opt/cray/pe/gcc-libs/libgcc_s.so.1,\
75+
/opt/cray/pe/gcc-libs/libgfortran.so.5,/opt/cray/pe/gcc-libs/libquadmath.so.0,\
76+
/opt/cray/pe/lib64/libpals.so.0,/opt/cray/pe/lib64/libpmi2.so.0,\
77+
/opt/cray/pe/lib64/libpmi.so.0,/opt/cray/xpmem/default/lib64/libxpmem.so.0,\
78+
/run/munge/munge.socket.2,/usr/lib64/libibverbs/libmlx5-rdmav34.so,\
79+
/usr/lib64/libibverbs.so.1,/usr/lib64/libkeyutils.so.1,/usr/lib64/liblnetconfig.so.4,\
80+
/usr/lib64/liblustreapi.so,/usr/lib64/libmunge.so.2,/usr/lib64/libnl-3.so.200,\
81+
/usr/lib64/libnl-genl-3.so.200,/usr/lib64/libnl-route-3.so.200,/usr/lib64/librdmacm.so.1,\
82+
/usr/lib64/libyaml-0.so.2",
83+
}
84+
85+
reference = {
86+
"archer2:compute": {"latency_big": (2200, -0.02, 0.30, "us"), "latency_small": (8.4, -0.05, 0.30, "us")}
87+
}
88+
89+
@require_deps
90+
def set_singularity_invoke(self):
91+
"""Builds the command to be passed to srun"""
92+
self.executable = "singularity"
93+
94+
self.executable_opts = [
95+
"run",
96+
os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_latest.sif"),
97+
"osu_allreduce",
98+
]
99+
100+
@performance_function("us")
101+
def latency_big(self):
102+
"""Extract the latency from the largest size in the OSU test"""
103+
return sn.extractsingle(r"^1048576\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float)
104+
105+
@performance_function("us")
106+
def latency_small(self):
107+
"""Extract the latency from the largest size in the OSU test"""
108+
return sn.extractsingle(r"^4\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float)
109+
110+
@sanity_function
111+
def validate_job_run(self):
112+
"""Basic check that any output was produced"""
113+
return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout)
114+
115+
116+
@rfm.simple_test
117+
class OSUContainerTestCirrusEX(rfm.RunOnlyRegressionTest):
118+
"""Run the OSU benchmark in a container"""
119+
120+
descr = "OSU benchmarks in a container"
121+
osu_container = fixture(PullOSUContainerCirrusEX, scope="session")
122+
valid_systems = ["cirrus-ex:compute"]
123+
valid_prog_environs = ["PrgEnv-gnu"]
124+
num_tasks = 576
125+
num_tasks_per_node = 288
126+
num_cpus_per_task = 1
127+
time_limit = "10m"
128+
129+
env_vars = {
130+
"OMP_NUM_THREADS": str(num_cpus_per_task),
131+
"OMP_PLACES": "cores",
132+
"APPTAINERENV_LD_LIBRARY_PATH": "/opt/cray/pe/mpich/8.1.32/ofi/gnu/11.2/lib-abi-mpich:\
133+
/opt/cray/libfabric/1.22.0/lib64:\
134+
/opt/cray/pals/1.6/lib:\
135+
/opt/cray/pe/lib64:\
136+
/opt/xpmem/lib64:/lib64",
137+
"APPTAINER_BIND": "/opt/cray,/var/spool,\
138+
/opt/cray/pe/mpich/8.1.32/ofi/gnu/11.2/lib-abi-mpich,\
139+
/etc/host.conf,/etc/libibverbs.d/mlx5.driver,\
140+
/etc/libnl/classid,\
141+
/etc/resolv.conf,\
142+
/opt/cray/libfabric/1.22.0/lib64/libfabric.so.1,\
143+
/lib64/libatomic.so.1,\
144+
/lib64/libgcc_s.so.1,/lib64/libgfortran.so.5,\
145+
/lib64/libquadmath.so.0,\
146+
/opt/cray/pals/1.6/lib/libpals.so.0,\
147+
/opt/cray/pe/lib64/libpmi2.so.0,\
148+
/opt/cray/pe/lib64/libpmi.so.0,\
149+
/opt/xpmem/lib64/libxpmem.so.0,\
150+
/run/munge/munge.socket.2,\
151+
/lib64/libmunge.so.2,\
152+
/lib64/libnl-3.so.200,\
153+
/lib64/libnl-genl-3.so.200,\
154+
/lib64/libnl-route-3.so.200,\
155+
/lib64/librdmacm.so.1,\
156+
/lib64/libcxi.so.1,\
157+
/lib64/libm.so.6",
158+
}
159+
160+
reference = {
161+
"cirrus-ex:compute": {"latency_big": (1100, -0.02, 0.30, "us"), "latency_small": (9.7, -0.05, 0.30, "us")}
162+
}
163+
164+
@require_deps
165+
def set_singularity_invoke(self):
166+
"""Builds the command to be passed to srun"""
167+
self.executable = "apptainer"
168+
169+
self.executable_opts = [
170+
"run",
171+
os.path.join(
172+
self.osu_container.stagedir,
173+
self.osu_container.image_name + "_" + self.osu_container.image_version + ".sif",
174+
),
175+
"osu_allreduce",
176+
]
177+
178+
@performance_function("us")
179+
def latency_big(self):
180+
"""Extract the latency from the largest size in the OSU test"""
181+
return sn.extractsingle(r"^1048576\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float)
182+
183+
@performance_function("us")
184+
def latency_small(self):
185+
"""Extract the latency from the largest size in the OSU test"""
186+
return sn.extractsingle(r"^4\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float)
187+
188+
@sanity_function
189+
def validate_job_run(self):
190+
"""Basic check that any output was produced"""
191+
return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout)

tests/utils/singularity/singularity.py

Lines changed: 0 additions & 95 deletions
This file was deleted.

tests/utils/xthi/hetjob.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ class SharedCommWorldTest(rfm.RunOnlyRegressionTest):
2424

2525
maintainers = ["k.stratford@epcc.ed.ac.uk"]
2626
descr = "SLURM hetjob for xthi shared MPI_COM_WORLD"
27-
valid_systems = ["archer2:compute"]
27+
valid_systems = ["archer2:compute", "cirrus-ex:compute"]
2828
valid_prog_environs = ["*"]
2929
modules = ["xthi"]
3030

3131
# Utter, utter kludge
3232
# 1 + 2 nodes; 8 + 2x4 MPI tasks
33-
hetgroup0 = "--het-group=0 --nodes=1 --ntasks=8 --ntasks-per-node=8 xthi"
34-
hetgroup1 = "--het-group=1 --nodes=2 --ntasks=8 --ntasks-per-node=4 xthi"
33+
hetgroup0 = "--het-group=0 --nodes=1 --ntasks=8 --ntasks-per-node=8 xthi_mpi_mp"
34+
hetgroup1 = "--het-group=1 --nodes=2 --ntasks=8 --ntasks-per-node=4 xthi_mpi_mp"
3535
executable = hetgroup0 + " : " + hetgroup1
3636

3737
time_limit = "2m"
@@ -56,16 +56,16 @@ class SharedCommWorldWithOpenMPTest(rfm.RunOnlyRegressionTest):
5656
"""
5757

5858
descr = "SLURM hetjob for shared MPI_COM_WORLD with OpenMP"
59-
valid_systems = ["archer2:compute"]
59+
valid_systems = ["archer2:compute", "cirrus-ex:compute"]
6060
valid_prog_environs = ["*"]
6161
modules = ["xthi"]
6262

6363
# Two nodes with 8 MPI tasks per node
6464
shared_args = " --nodes=1 --ntasks=8 --tasks-per-node=8 --cpus-per-task=16"
6565
openmp0 = " --export=all,OMP_NUM_THREADS=16"
6666
openmp1 = " --export=all,OMP_NUM_THREADS=1"
67-
hetgroup0 = "--het-group=0" + shared_args + openmp0 + " xthi"
68-
hetgroup1 = "--het-group=1" + shared_args + openmp1 + " xthi"
67+
hetgroup0 = "--het-group=0" + shared_args + openmp0 + " xthi_mpi_mp"
68+
hetgroup1 = "--het-group=1" + shared_args + openmp1 + " xthi_mpi_mp"
6969
executable = hetgroup0 + " : " + hetgroup1
7070

7171
time_limit = "2m"

tests/utils/xthi/xthi.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ class XthiCompilationTest(reframe.CompileOnlyRegressionTest):
1313

1414
maintainers = ["k.straford@epcc.ed.ac.uk"]
1515
descr = "xthi compilation test"
16-
valid_systems = ["archer2:login", "cirrus:login"]
16+
valid_systems = ["archer2:login", "cirrus:login", "cirrus-ex:login"]
1717
valid_prog_environs = [
1818
"PrgEnv-cray",
1919
"PrgEnv-gnu",
2020
"PrgEnv-aocc",
21+
"PrgEnv-intel",
2122
"gcc",
2223
"intel",
2324
]

0 commit comments

Comments
 (0)