Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/tests/ftest/daos_racer/multi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ server_config:

daos_racer:
runtime: 7200
clush_timeout: 10080
timeout: 10080
29 changes: 6 additions & 23 deletions src/tests/ftest/daos_racer/parallel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/python3
"""
(C) Copyright 2021-2022 Intel Corporation.
(C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
Expand All @@ -10,7 +9,6 @@
from daos_racer_utils import DaosRacerCommand
from exception_utils import CommandFailure
from job_manager_utils import get_job_manager
from run_utils import run_remote


class DaosRacerParallelTest(TestWithServers):
Expand All @@ -35,39 +33,24 @@
:avocado: tags=io,daos_racer
:avocado: tags=DaosRacerParallelTest,test_daos_racer_parallel
"""
# DAOS-18236 - Debug missing libdpar_mpi.so
run_remote(
self.log, self.hostlist_clients,
'ls -l /usr/mpi/gcc/openmpi-4.1.7rc1/lib | grep -i libdpar')
run_remote(
self.log, self.hostlist_clients,
'ls -l /usr/mpi/gcc/openmpi-4.1.7rc1/lib64 | grep -i libdpar')
run_remote(
self.log, self.hostlist_clients,
'ls -l /usr/lib | grep -i libdpar')
run_remote(
self.log, self.hostlist_clients,
'ls -l /usr/lib64 | grep -i libdpar')

# Create the daos_racer command
daos_racer = DaosRacerCommand(self.bin, self.hostlist_clients, self.get_dmg_command())
daos_racer.get_params(self)

# Create the orterun command
# Create the mpi command
job_manager = get_job_manager(self)
job_manager.assign_hosts(self.hostlist_clients, self.workdir, None)
job_manager.assign_processes(len(self.hostlist_clients))
job_manager.assign_processes(ppn=self.params.get('ppn', daos_racer.namespace))
job_manager.assign_environment(daos_racer.env)
job_manager.job = daos_racer
job_manager.check_results_list = ["<stderr>"]
job_manager.timeout = daos_racer.clush_timeout.value
self.log.info("Multi-process command: %s", str(job_manager))
job_manager.check_results_list = ["<stderr>", "No MPI found"]
job_manager.timeout = daos_racer.timeout.value

# Run the daos_racer command and check for errors
self.log_step("Run daos_racer with multiple clients")
try:
job_manager.run()

except CommandFailure as error:
self.fail(f"daos_racer failed: {error}")

self.log.info("Test passed!")
self.log_step("Test passed!")

Check warning on line 56 in src/tests/ftest/daos_racer/parallel.py

View workflow job for this annotation

GitHub Actions / Pylint check

missing-final-newline, Final newline missing

Check warning on line 56 in src/tests/ftest/daos_racer/parallel.py

View workflow job for this annotation

GitHub Actions / Flake8 check

W292 no newline at end of file
8 changes: 2 additions & 6 deletions src/tests/ftest/daos_racer/parallel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@ server_config:
log_mask: "ERR"
storage: auto

job_manager:
class_name: Orterun
mpi_type: openmpi
manager_timeout: 630

daos_racer:
ppn: 1
runtime: 600
clush_timeout: 900
timeout: 900
2 changes: 1 addition & 1 deletion src/tests/ftest/daos_racer/simple.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ server_config:

daos_racer:
runtime: 600
clush_timeout: 900
timeout: 900
2 changes: 1 addition & 1 deletion src/tests/ftest/osa/online_extend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ mdtest:

daos_racer:
runtime: 480
clush_timeout: 1000
timeout: 1000

test_obj_class:
oclass:
Expand Down
4 changes: 2 additions & 2 deletions src/tests/ftest/osa/online_parallel_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
(C) Copyright 2020-2023 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
(C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -194,7 +194,7 @@ def test_osa_online_parallel_test(self):

:avocado: tags=all,pr,daily_regression
:avocado: tags=hw,medium
:avocado: tags=osa,checksum,osa_parallel
:avocado: tags=osa,checksum,osa_parallel,daos_racer
:avocado: tags=OSAOnlineParallelTest,test_osa_online_parallel_test
"""
self.run_online_parallel_test(1)
2 changes: 1 addition & 1 deletion src/tests/ftest/osa/online_parallel_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ ior:

daos_racer:
runtime: 480
clush_timeout: 1000
timeout: 1000
2 changes: 1 addition & 1 deletion src/tests/ftest/osa/online_reintegration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ ior:

daos_racer:
runtime: 480
clush_timeout: 1000
timeout: 1000

mdtest:
api: DFS
Expand Down
50 changes: 11 additions & 39 deletions src/tests/ftest/util/daos_racer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@
from ClusterShell.NodeSet import NodeSet
from command_utils import ExecutableCommand
from command_utils_base import BasicParameter, FormattedParameter
from env_modules import load_mpi
from exception_utils import CommandFailure, MPILoadError
from exception_utils import CommandFailure
from general_utils import get_log_file
from run_utils import run_remote


class DaosRacerCommand(ExecutableCommand):
"""Defines a object representing a daos_racer command."""

def __init__(self, path, hosts, dmg=None):
def __init__(self, path, hosts, dmg=None, namespace="/run/daos_racer/*"):
"""Create a daos_racer command object.

Args:
path (str): path of the daos_racer command
hosts (str/NodeSet): hosts on which to run the daos_racer command
dmg (DmgCommand): a DmgCommand object used to obtain the
configuration file and certificate
namespace (str): yaml namespace (path to parameters). Defaults to "/run/daos_racer/*".
"""
super().__init__("/run/daos_racer/*", "daos_racer", path)
super().__init__(namespace, "daos_racer", path)
if not isinstance(hosts, NodeSet):
hosts = NodeSet(hosts)
self._hosts = NodeSet(hosts)
Expand All @@ -42,15 +42,18 @@ def __init__(self, path, hosts, dmg=None):
dmg.copy_certificates(get_log_file("daosCA/certs"), self._hosts)
dmg.copy_configuration(self._hosts)

# Optional timeout for the clush command running the daos_racer command.
# Optional timeout for running the daos_racer command.
# This should be set greater than the 'runtime' value but less than the
# avocado test timeout value to allow for proper cleanup. Using a value
# of None will result in no timeout being used.
self.clush_timeout = BasicParameter(None)
self.timeout = BasicParameter(None)

# Include bullseye coverage file environment
self.env["COVFILE"] = os.path.join(os.sep, "tmp", "test.cov")

# Use a separate log file by default
self.env["D_LOG_FILE"] = get_log_file(f"{self.command}.log")

def get_str_param_names(self):
"""Get a sorted list of the names of the command attributes.

Expand All @@ -64,33 +67,6 @@ def get_str_param_names(self):
"""
return self.get_attribute_names(FormattedParameter)

def get_params(self, test):
"""Get values for all of the command params from the yaml file.

Also sets default daos_racer environment.

Args:
test (Test): avocado Test object

"""
super().get_params(test)
default_env = {
"D_LOG_FILE": get_log_file("{}_daos.log".format(self.command)),
"OMPI_MCA_btl_openib_warn_default_gid_prefix": "0",
"OMPI_MCA_btl": "tcp,self",
"OMPI_MCA_oob": "tcp",
"OMPI_MCA_pml": "ob1",
"D_LOG_MASK": "ERR"
}
for key, val in default_env.items():
if key not in self.env:
self.env[key] = val

if not load_mpi("openmpi"):
raise MPILoadError("openmpi")

self.env["LD_LIBRARY_PATH"] = os.environ["LD_LIBRARY_PATH"]

def run(self, raise_exception=None):
"""Run the daos_racer command remotely.

Expand All @@ -110,16 +86,12 @@ def run(self, raise_exception=None):
self.log.info(
"Running %s on %s with %s timeout",
str(self), self._hosts,
"no" if self.clush_timeout.value is None else
"a {}s".format(self.clush_timeout.value))
result = run_remote(
self.log, self._hosts, self.with_exports, timeout=self.clush_timeout.value)
"no" if self.timeout.value is None else f"a {self.timeout.value}s")
result = run_remote(self.log, self._hosts, self.with_exports, timeout=self.timeout.value)
if not result.passed:
if result.timeout:
self.log.info("Stopping timed out daos_racer process on %s", result.timeout_hosts)
run_remote(self.log, result.timeout_hosts, "pkill daos_racer", True)

if raise_exception:
raise CommandFailure(f"Error running '{self._command}'")

self.log.info("Test passed!")
4 changes: 1 addition & 3 deletions src/tests/ftest/util/soak_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,10 +1346,8 @@ def create_racer_cmdline(self, job_spec):
# daos_racer needs its own pool; does not run using jobs pool
add_pools(self, ["pool_racer"])
add_containers(self, self.pool[-1], "SX")
racer_namespace = os.path.join(os.sep, "run", job_spec, "*")
daos_racer = DaosRacerCommand(
self.bin, self.hostlist_clients[0])
daos_racer.namespace = racer_namespace
self.bin, self.hostlist_clients[0], namespace=os.path.join(os.sep, "run", job_spec, "*"))
daos_racer.get_params(self)
daos_racer.pool_uuid.update(self.pool[-1].uuid)
daos_racer.cont_uuid.update(self.container[-1].uuid)
Expand Down
3 changes: 1 addition & 2 deletions src/tests/ftest/util/yaml_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
(C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -108,7 +108,6 @@ class YamlUpdater():
("bdev_list", "_storage", list),
("timeout", "_timeout", int),
("timeouts", "_timeout", dict),
("clush_timeout", "_timeout", int),
("ior_timeout", "_timeout", int),
("job_manager_timeout", "_timeout", int),
("pattern_timeout", "_timeout", int),
Expand Down
Loading