Merge pull request #155 from RWTH-EBC/154-support-wildcards-in-variable-names [PYPI-RELEASE]

HvanderStok · web-flow · commit 4a8c800ae103 · 2025-05-06T11:20:57.000+02:00
154 support wildcards in variable names
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -118,3 +118,5 @@
   - Add feature to postprocess mat results within the simulate function to avoid memory errors in large studies
 - v0.5.3
   - Improve loading of mat files #150
+- v0.5.4
+  - Add support for wildcard patterns in variable names #154
diff --git a/ebcpy/__init__.py b/ebcpy/__init__.py
@@ -8,4 +8,4 @@
 from .optimization import Optimizer
 
 
-__version__ = '0.5.3'
+__version__ = '0.5.4'
diff --git a/ebcpy/data_types.py b/ebcpy/data_types.py
@@ -16,6 +16,7 @@
 import numpy as np
 import ebcpy.modelica.simres as sr
 from ebcpy import preprocessing
+from ebcpy.utils import get_names
 
 # pylint: disable=I1101
 # pylint: disable=too-many-ancestors
@@ -86,6 +87,8 @@ class TimeSeriesData(pd.DataFrame):
         List of variable names to load from .mat file. If you
         know which variables you want to plot, this may speed up
         loading significantly, and reduce memory size drastically.
+        You can also supply wildcard patterns (e.g. "*wall.layer[*].T", etc.)
+        to match multiple variables at once.
 
     Examples:
 
@@ -350,13 +353,33 @@ def _load_df_from_file(self, file):
                 ) from err
         return df
 
-    def get_variable_names(self) -> List[str]:
+    def get_variable_names(self, patterns: Union[str, List[str]] = None) -> List[str]:
         """
-        Return an alphabetically sorted list of all variables
-
-        :return: List[str]
-        """
-        return sorted(self.columns.get_level_values(0).unique())
+    Return an alphabetically sorted list of variable names, optionally filtered by patterns.
+
+    By default, returns all variable names found in the first level of the DataFrame's
+    column MultiIndex, sorted alphabetically. If `patterns` is provided, only names
+    matching one or more of the given literal strings or glob-style patterns
+    (where `*` matches any sequence of characters) will be returned.
+
+    :param patterns:
+        - A single string or list of strings.
+        - Each entry may be an exact variable name, or a pattern containing `*` as a wildcard.
+        - If None, all variable names are returned.
+    :return:
+        A list of matching variable names, in alphabetical order.
+    :raises KeyError:
+        If any literal name or pattern does not match at least one variable in the DataFrame.
+
+    Example:
+        # return all wall temperatures at any layer
+        tsd.get_variable_names("*wall.layer[*].T")
+        ["wall.layer[1].T", "wall.layer[2].T", "wall.layer[3].T"]
+    """
+        all_names = sorted(self.columns.get_level_values(0).unique())
+        if patterns is None:
+            return all_names
+        return get_names(all_names, patterns)
 
     def get_variables_with_multiple_tags(self) -> List[str]:
         """
diff --git a/ebcpy/modelica/simres.py b/ebcpy/modelica/simres.py
@@ -55,6 +55,7 @@
 from scipy.io import loadmat
 import pandas as pd
 import numpy as np
+from ebcpy.utils import get_names
 
 
 # Namedtuple to store the time and value information of each variable
@@ -265,7 +266,9 @@ def mat_to_pandas(fname='dsres.mat',
     :param str fname:
         The mat file to load.
     :param list names:
-        If None (default), then all variables are included.
+        If None (default), then all variables are included. You can also
+        supply wildcard patterns (e.g. "*wall.layer[*].T", etc.) to match
+        multiple variables at once.
     :param dict aliases:
         Dictionary of aliases for the variable names
 
@@ -291,15 +294,14 @@ def mat_to_pandas(fname='dsres.mat',
 
     # Create the list of variable names.
     if names:
-        if 'Time' not in names:
-            names = names.copy()
-            names.append('Time')
-        non_existing_variables = list(set(names).difference(_variables.keys()))
-        if non_existing_variables:
-            raise KeyError(f"The following variable names are not in the given .mat file: "
-                           f"{', '.join(non_existing_variables)}")
+        # ensure Time is always included
+        patterns = list(names)
+        if 'Time' not in patterns:
+            patterns.append('Time')
+
+        names = get_names(list(_variables.keys()), patterns)
     else:
-        names = _variables.keys()
+        names = list(_variables.keys())
 
     # Create a dictionary of names and values.
     times = _variables['Time'].values()
diff --git a/ebcpy/utils/__init__.py b/ebcpy/utils/__init__.py
@@ -4,8 +4,9 @@
 """
 import logging
 import os
+import re
 from pathlib import Path
-from typing import Union
+from typing import Union, List
 
 
 def setup_logger(name: str,
@@ -44,3 +45,53 @@ def setup_logger(name: str,
         file_handler.setFormatter(fmt=formatter)
         logger.addHandler(hdlr=file_handler)
     return logger
+
+
+def get_names(all_names: list, patterns: Union[str, List[str]]) -> List[str]:
+    """
+    Filter a list of candidate names by literal values or glob-style patterns.
+
+    This function returns all names from `all_names` that match the provided
+    `patterns`.  Patterns may be a single string or a list of strings, and may
+    contain the wildcard `*` to match any sequence of characters.  Literal names
+    without `*` must match exactly.  The matching is performed in two steps:
+      1. Each pattern is translated to a regular expression if it contains `*`,
+         otherwise used as a literal match.
+      2. Any pattern that matches no names in `all_names` raises a `KeyError`.
+
+    The returned list preserves the order of `all_names`.
+
+    :param all_names: List of available names to filter.
+    :param patterns: A pattern or list of patterns (with optional `*` wildcards)
+                     to match against `all_names`.
+    :return: A list of names from `all_names` that match any of the given patterns,
+             in original order.
+    :raises KeyError: If any pattern does not match at least one name.
+    """
+    if isinstance(patterns, str):
+        patterns = [patterns]
+
+    matched = set()
+    unmatched = []
+    for pat in patterns:
+        if '*' in pat:
+            regex = '^' + re.escape(pat).replace(r'\*', '.*') + '$'
+            hits = [k for k in all_names if re.match(regex, k)]
+            if hits:
+                matched.update(hits)
+            else:
+                unmatched.append(pat)
+        else:
+            if pat in all_names:
+                matched.add(pat)
+            else:
+                unmatched.append(pat)
+
+    if unmatched:
+        raise KeyError(
+            "The following variable names/patterns are not in the given .mat file: "
+            + ", ".join(unmatched)
+        )
+    # preserve original order
+    names = [var for var in all_names if var in matched]
+    return names
diff --git a/tests/test_data_types.py b/tests/test_data_types.py
@@ -158,15 +158,16 @@ def test_time_series_data(self):
             time_series_data,
             type(pd.DataFrame()))
         # Load with variable names:
-        variable_names = ["combiTimeTable.y[6]"]
+        variable_names = ["combiTimeTable.timeScale", "*.y[*]"]
         time_series_data = data_types.TimeSeriesData(
             self.example_data_mat_path, variable_names=variable_names
         )
         self.assertIsInstance(
             time_series_data,
             type(pd.DataFrame()))
-        self.assertEqual(len(time_series_data.columns), 1)
-        self.assertEqual(time_series_data.to_df().columns[0], variable_names[0])
+        self.assertEqual(len(time_series_data.columns), 7)
+        self.assertTrue(variable_names[0] in list(time_series_data.to_df().columns))
+        self.assertTrue("combiTimeTable.y[5]" in list(time_series_data.to_df().columns))
         # Test load and set df functions:
         df = time_series_data
         self.assertIsInstance(
@@ -232,6 +233,7 @@ def test_time_series_utils(self):
         """Test the utils for time series"""
         tsd = data_types.TimeSeriesData(self.example_data_mat_path)
         self.assertEqual(len(tsd.get_variable_names()), tsd.shape[1])
+        self.assertEqual(len(tsd.get_variable_names("*.y[*]")), 6)
         self.assertIsNotNone(tsd.get_tags())
         self.assertLessEqual(len(tsd.get_variable_names()), tsd.shape[1])
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import scipy.io as spio
 from ebcpy import TimeSeriesData
-from ebcpy.utils import setup_logger, conversion, statistics_analyzer, reproduction
+from ebcpy.utils import setup_logger, conversion, statistics_analyzer, reproduction, get_names
 
 
 class TestConversion(unittest.TestCase):
@@ -323,5 +323,41 @@ def tearDown(self) -> None:
             pass
 
 
+class TestGetNames(unittest.TestCase):
+    def test_matches(self):
+        """
+        Test various literal and wildcard patterns, including brackets and multiple '*' usage.
+        """
+        test_cases = [
+            # literal single match
+            (['alpha', 'beta', 'gamma'], 'beta', ['beta']),
+            # literal list of matches
+            (['alpha', 'beta', 'gamma', 'delta'], ['alpha', 'delta'], ['alpha', 'delta']),
+            # single '*' wildcard
+            (['wall1.T', 'wall2.T', 'floor.T'], 'wall*.T', ['wall1.T', 'wall2.T']),
+            # wildcard inside brackets
+            (['wall[1].T', 'wall[2].T', 'wallX.T'], 'wall[*].T', ['wall[1].T', 'wall[2].T']),
+            # two '*' wildcards
+            (['a1b2', 'axby', 'ab'], 'a*b*', ['a1b2', 'axby', 'ab']),
+            # mix of wildcard and literal in list
+            (['a1', 'a2', 'b1', 'b2'], ['a*', 'b1'], ['a1', 'a2', 'b1']),
+            # order preservation test
+            (['first', 'second', 'third'], ['third', 'first'], ['first', 'third']),
+        ]
+        for all_names, patterns, expected in test_cases:
+            with self.subTest(patterns=patterns):
+                result = get_names(all_names, patterns)
+                self.assertEqual(result, expected)
+
+    def test_errors(self):
+        """
+        Patterns or literals that match nothing should raise KeyError.
+        """
+        with self.assertRaises(KeyError):
+            get_names(['alpha', 'beta'], 'unknown')
+        with self.assertRaises(KeyError):
+            get_names(['x1', 'x2'], 'y*')
+
+
 if __name__ == "__main__":
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -8,4 +8,4 @@`
`8`	`8`	`from .optimization import Optimizer`
`9`	`9`
`10`	`10`
`11`		`-__version__ = '0.5.3'`
	`11`	`+__version__ = '0.5.4'`