diff --git a/src/tests/ftest/daos_racer/multi.yaml b/src/tests/ftest/daos_racer/multi.yaml index 85b1fc983b9..5bdff30e49d 100644 --- a/src/tests/ftest/daos_racer/multi.yaml +++ b/src/tests/ftest/daos_racer/multi.yaml @@ -23,4 +23,4 @@ server_config: daos_racer: runtime: 7200 - clush_timeout: 10080 + timeout: 10080 diff --git a/src/tests/ftest/daos_racer/parallel.py b/src/tests/ftest/daos_racer/parallel.py index 80a70d4f719..d2dd9d9d9b8 100755 --- a/src/tests/ftest/daos_racer/parallel.py +++ b/src/tests/ftest/daos_racer/parallel.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 """ (C) Copyright 2021-2022 Intel Corporation. (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP @@ -10,7 +9,6 @@ from daos_racer_utils import DaosRacerCommand from exception_utils import CommandFailure from job_manager_utils import get_job_manager -from run_utils import run_remote class DaosRacerParallelTest(TestWithServers): @@ -35,39 +33,24 @@ def test_daos_racer_parallel(self): :avocado: tags=io,daos_racer :avocado: tags=DaosRacerParallelTest,test_daos_racer_parallel """ - # DAOS-18236 - Debug missing libdpar_mpi.so - run_remote( - self.log, self.hostlist_clients, - 'ls -l /usr/mpi/gcc/openmpi-4.1.7rc1/lib | grep -i libdpar') - run_remote( - self.log, self.hostlist_clients, - 'ls -l /usr/mpi/gcc/openmpi-4.1.7rc1/lib64 | grep -i libdpar') - run_remote( - self.log, self.hostlist_clients, - 'ls -l /usr/lib | grep -i libdpar') - run_remote( - self.log, self.hostlist_clients, - 'ls -l /usr/lib64 | grep -i libdpar') - # Create the daos_racer command daos_racer = DaosRacerCommand(self.bin, self.hostlist_clients, self.get_dmg_command()) daos_racer.get_params(self) - # Create the orterun command + # Create the mpi command job_manager = get_job_manager(self) job_manager.assign_hosts(self.hostlist_clients, self.workdir, None) - job_manager.assign_processes(len(self.hostlist_clients)) + job_manager.assign_processes(ppn=self.params.get('ppn', daos_racer.namespace)) job_manager.assign_environment(daos_racer.env) job_manager.job = daos_racer - job_manager.check_results_list = [""] - job_manager.timeout = daos_racer.clush_timeout.value - self.log.info("Multi-process command: %s", str(job_manager)) + job_manager.check_results_list = ["", "No MPI found"] + job_manager.timeout = daos_racer.timeout.value - # Run the daos_racer command and check for errors + self.log_step("Run daos_racer with multiple clients") try: job_manager.run() except CommandFailure as error: self.fail(f"daos_racer failed: {error}") - self.log.info("Test passed!") + self.log_step("Test passed!") diff --git a/src/tests/ftest/daos_racer/parallel.yaml b/src/tests/ftest/daos_racer/parallel.yaml index 9c79b82efb4..95ab606263d 100644 --- a/src/tests/ftest/daos_racer/parallel.yaml +++ b/src/tests/ftest/daos_racer/parallel.yaml @@ -21,11 +21,7 @@ server_config: log_mask: "ERR" storage: auto -job_manager: - class_name: Orterun - mpi_type: openmpi - manager_timeout: 630 - daos_racer: + ppn: 1 runtime: 600 - clush_timeout: 900 + timeout: 900 diff --git a/src/tests/ftest/daos_racer/simple.yaml b/src/tests/ftest/daos_racer/simple.yaml index 6eda8bb011b..5c3ae17d18c 100644 --- a/src/tests/ftest/daos_racer/simple.yaml +++ b/src/tests/ftest/daos_racer/simple.yaml @@ -23,4 +23,4 @@ server_config: daos_racer: runtime: 600 - clush_timeout: 900 + timeout: 900 diff --git a/src/tests/ftest/osa/online_extend.yaml b/src/tests/ftest/osa/online_extend.yaml index deb6a306a8d..d335b4345be 100644 --- a/src/tests/ftest/osa/online_extend.yaml +++ b/src/tests/ftest/osa/online_extend.yaml @@ -92,7 +92,7 @@ mdtest: daos_racer: runtime: 480 - clush_timeout: 1000 + timeout: 1000 test_obj_class: oclass: diff --git a/src/tests/ftest/osa/online_parallel_test.py b/src/tests/ftest/osa/online_parallel_test.py index cf473038f55..34b73017f0f 100644 --- a/src/tests/ftest/osa/online_parallel_test.py +++ b/src/tests/ftest/osa/online_parallel_test.py @@ -1,6 +1,6 @@ """ (C) Copyright 2020-2023 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -194,7 +194,7 @@ def test_osa_online_parallel_test(self): :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium - :avocado: tags=osa,checksum,osa_parallel + :avocado: tags=osa,checksum,osa_parallel,daos_racer :avocado: tags=OSAOnlineParallelTest,test_osa_online_parallel_test """ self.run_online_parallel_test(1) diff --git a/src/tests/ftest/osa/online_parallel_test.yaml b/src/tests/ftest/osa/online_parallel_test.yaml index 4eedc63838f..e27310bc52a 100644 --- a/src/tests/ftest/osa/online_parallel_test.yaml +++ b/src/tests/ftest/osa/online_parallel_test.yaml @@ -63,4 +63,4 @@ ior: daos_racer: runtime: 480 - clush_timeout: 1000 + timeout: 1000 diff --git a/src/tests/ftest/osa/online_reintegration.yaml b/src/tests/ftest/osa/online_reintegration.yaml index af5f1c41866..7e5ea903035 100644 --- a/src/tests/ftest/osa/online_reintegration.yaml +++ b/src/tests/ftest/osa/online_reintegration.yaml @@ -63,7 +63,7 @@ ior: daos_racer: runtime: 480 - clush_timeout: 1000 + timeout: 1000 mdtest: api: DFS diff --git a/src/tests/ftest/util/daos_racer_utils.py b/src/tests/ftest/util/daos_racer_utils.py index fe8bfb83808..f9d97a415a6 100644 --- a/src/tests/ftest/util/daos_racer_utils.py +++ b/src/tests/ftest/util/daos_racer_utils.py @@ -9,8 +9,7 @@ from ClusterShell.NodeSet import NodeSet from command_utils import ExecutableCommand from command_utils_base import BasicParameter, FormattedParameter -from env_modules import load_mpi -from exception_utils import CommandFailure, MPILoadError +from exception_utils import CommandFailure from general_utils import get_log_file from run_utils import run_remote @@ -18,7 +17,7 @@ class DaosRacerCommand(ExecutableCommand): """Defines a object representing a daos_racer command.""" - def __init__(self, path, hosts, dmg=None): + def __init__(self, path, hosts, dmg=None, namespace="/run/daos_racer/*"): """Create a daos_racer command object. Args: @@ -26,8 +25,9 @@ def __init__(self, path, hosts, dmg=None): hosts (str/NodeSet): hosts on which to run the daos_racer command dmg (DmgCommand): a DmgCommand object used to obtain the configuration file and certificate + namespace (str): yaml namespace (path to parameters). Defaults to "/run/daos_racer/*". """ - super().__init__("/run/daos_racer/*", "daos_racer", path) + super().__init__(namespace, "daos_racer", path) if not isinstance(hosts, NodeSet): hosts = NodeSet(hosts) self._hosts = NodeSet(hosts) @@ -42,15 +42,18 @@ def __init__(self, path, hosts, dmg=None): dmg.copy_certificates(get_log_file("daosCA/certs"), self._hosts) dmg.copy_configuration(self._hosts) - # Optional timeout for the clush command running the daos_racer command. + # Optional timeout for running the daos_racer command. # This should be set greater than the 'runtime' value but less than the # avocado test timeout value to allow for proper cleanup. Using a value # of None will result in no timeout being used. - self.clush_timeout = BasicParameter(None) + self.timeout = BasicParameter(None) # Include bullseye coverage file environment self.env["COVFILE"] = os.path.join(os.sep, "tmp", "test.cov") + # Use a separate log file by default + self.env["D_LOG_FILE"] = get_log_file(f"{self.command}.log") + def get_str_param_names(self): """Get a sorted list of the names of the command attributes. @@ -64,33 +67,6 @@ def get_str_param_names(self): """ return self.get_attribute_names(FormattedParameter) - def get_params(self, test): - """Get values for all of the command params from the yaml file. - - Also sets default daos_racer environment. - - Args: - test (Test): avocado Test object - - """ - super().get_params(test) - default_env = { - "D_LOG_FILE": get_log_file("{}_daos.log".format(self.command)), - "OMPI_MCA_btl_openib_warn_default_gid_prefix": "0", - "OMPI_MCA_btl": "tcp,self", - "OMPI_MCA_oob": "tcp", - "OMPI_MCA_pml": "ob1", - "D_LOG_MASK": "ERR" - } - for key, val in default_env.items(): - if key not in self.env: - self.env[key] = val - - if not load_mpi("openmpi"): - raise MPILoadError("openmpi") - - self.env["LD_LIBRARY_PATH"] = os.environ["LD_LIBRARY_PATH"] - def run(self, raise_exception=None): """Run the daos_racer command remotely. @@ -110,10 +86,8 @@ def run(self, raise_exception=None): self.log.info( "Running %s on %s with %s timeout", str(self), self._hosts, - "no" if self.clush_timeout.value is None else - "a {}s".format(self.clush_timeout.value)) - result = run_remote( - self.log, self._hosts, self.with_exports, timeout=self.clush_timeout.value) + "no" if self.timeout.value is None else f"a {self.timeout.value}s") + result = run_remote(self.log, self._hosts, self.with_exports, timeout=self.timeout.value) if not result.passed: if result.timeout: self.log.info("Stopping timed out daos_racer process on %s", result.timeout_hosts) @@ -121,5 +95,3 @@ def run(self, raise_exception=None): if raise_exception: raise CommandFailure(f"Error running '{self._command}'") - - self.log.info("Test passed!") diff --git a/src/tests/ftest/util/soak_utils.py b/src/tests/ftest/util/soak_utils.py index 6d6ad6592d4..6adcaa9be15 100644 --- a/src/tests/ftest/util/soak_utils.py +++ b/src/tests/ftest/util/soak_utils.py @@ -1346,10 +1346,8 @@ def create_racer_cmdline(self, job_spec): # daos_racer needs its own pool; does not run using jobs pool add_pools(self, ["pool_racer"]) add_containers(self, self.pool[-1], "SX") - racer_namespace = os.path.join(os.sep, "run", job_spec, "*") daos_racer = DaosRacerCommand( - self.bin, self.hostlist_clients[0]) - daos_racer.namespace = racer_namespace + self.bin, self.hostlist_clients[0], namespace=os.path.join(os.sep, "run", job_spec, "*")) daos_racer.get_params(self) daos_racer.pool_uuid.update(self.pool[-1].uuid) daos_racer.cont_uuid.update(self.container[-1].uuid) diff --git a/src/tests/ftest/util/yaml_utils.py b/src/tests/ftest/util/yaml_utils.py index cbc7c8f5203..7783c3848de 100644 --- a/src/tests/ftest/util/yaml_utils.py +++ b/src/tests/ftest/util/yaml_utils.py @@ -1,6 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. -(C) Copyright 2025 Hewlett Packard Enterprise Development LP +(C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -108,7 +108,6 @@ class YamlUpdater(): ("bdev_list", "_storage", list), ("timeout", "_timeout", int), ("timeouts", "_timeout", dict), - ("clush_timeout", "_timeout", int), ("ior_timeout", "_timeout", int), ("job_manager_timeout", "_timeout", int), ("pattern_timeout", "_timeout", int),