Skip to content

Commit 4a8c800

Browse files
authored
Merge pull request #155 from RWTH-EBC/154-support-wildcards-in-variable-names [PYPI-RELEASE]
154 support wildcards in variable names
2 parents 8d115bb + 626e388 commit 4a8c800

File tree

7 files changed

+137
-21
lines changed

7 files changed

+137
-21
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,5 @@
118118
- Add feature to postprocess mat results within the simulate function to avoid memory errors in large studies
119119
- v0.5.3
120120
- Improve loading of mat files #150
121+
- v0.5.4
122+
- Add support for wildcard patterns in variable names #154

ebcpy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@
88
from .optimization import Optimizer
99

1010

11-
__version__ = '0.5.3'
11+
__version__ = '0.5.4'

ebcpy/data_types.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import numpy as np
1717
import ebcpy.modelica.simres as sr
1818
from ebcpy import preprocessing
19+
from ebcpy.utils import get_names
1920

2021
# pylint: disable=I1101
2122
# pylint: disable=too-many-ancestors
@@ -86,6 +87,8 @@ class TimeSeriesData(pd.DataFrame):
8687
List of variable names to load from .mat file. If you
8788
know which variables you want to plot, this may speed up
8889
loading significantly, and reduce memory size drastically.
90+
You can also supply wildcard patterns (e.g. "*wall.layer[*].T", etc.)
91+
to match multiple variables at once.
8992
9093
Examples:
9194
@@ -350,13 +353,33 @@ def _load_df_from_file(self, file):
350353
) from err
351354
return df
352355

353-
def get_variable_names(self) -> List[str]:
356+
def get_variable_names(self, patterns: Union[str, List[str]] = None) -> List[str]:
354357
"""
355-
Return an alphabetically sorted list of all variables
356-
357-
:return: List[str]
358-
"""
359-
return sorted(self.columns.get_level_values(0).unique())
358+
Return an alphabetically sorted list of variable names, optionally filtered by patterns.
359+
360+
By default, returns all variable names found in the first level of the DataFrame's
361+
column MultiIndex, sorted alphabetically. If `patterns` is provided, only names
362+
matching one or more of the given literal strings or glob-style patterns
363+
(where `*` matches any sequence of characters) will be returned.
364+
365+
:param patterns:
366+
- A single string or list of strings.
367+
- Each entry may be an exact variable name, or a pattern containing `*` as a wildcard.
368+
- If None, all variable names are returned.
369+
:return:
370+
A list of matching variable names, in alphabetical order.
371+
:raises KeyError:
372+
If any literal name or pattern does not match at least one variable in the DataFrame.
373+
374+
Example:
375+
# return all wall temperatures at any layer
376+
tsd.get_variable_names("*wall.layer[*].T")
377+
["wall.layer[1].T", "wall.layer[2].T", "wall.layer[3].T"]
378+
"""
379+
all_names = sorted(self.columns.get_level_values(0).unique())
380+
if patterns is None:
381+
return all_names
382+
return get_names(all_names, patterns)
360383

361384
def get_variables_with_multiple_tags(self) -> List[str]:
362385
"""

ebcpy/modelica/simres.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from scipy.io import loadmat
5656
import pandas as pd
5757
import numpy as np
58+
from ebcpy.utils import get_names
5859

5960

6061
# Namedtuple to store the time and value information of each variable
@@ -265,7 +266,9 @@ def mat_to_pandas(fname='dsres.mat',
265266
:param str fname:
266267
The mat file to load.
267268
:param list names:
268-
If None (default), then all variables are included.
269+
If None (default), then all variables are included. You can also
270+
supply wildcard patterns (e.g. "*wall.layer[*].T", etc.) to match
271+
multiple variables at once.
269272
:param dict aliases:
270273
Dictionary of aliases for the variable names
271274
@@ -291,15 +294,14 @@ def mat_to_pandas(fname='dsres.mat',
291294

292295
# Create the list of variable names.
293296
if names:
294-
if 'Time' not in names:
295-
names = names.copy()
296-
names.append('Time')
297-
non_existing_variables = list(set(names).difference(_variables.keys()))
298-
if non_existing_variables:
299-
raise KeyError(f"The following variable names are not in the given .mat file: "
300-
f"{', '.join(non_existing_variables)}")
297+
# ensure Time is always included
298+
patterns = list(names)
299+
if 'Time' not in patterns:
300+
patterns.append('Time')
301+
302+
names = get_names(list(_variables.keys()), patterns)
301303
else:
302-
names = _variables.keys()
304+
names = list(_variables.keys())
303305

304306
# Create a dictionary of names and values.
305307
times = _variables['Time'].values()

ebcpy/utils/__init__.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
"""
55
import logging
66
import os
7+
import re
78
from pathlib import Path
8-
from typing import Union
9+
from typing import Union, List
910

1011

1112
def setup_logger(name: str,
@@ -44,3 +45,53 @@ def setup_logger(name: str,
4445
file_handler.setFormatter(fmt=formatter)
4546
logger.addHandler(hdlr=file_handler)
4647
return logger
48+
49+
50+
def get_names(all_names: list, patterns: Union[str, List[str]]) -> List[str]:
51+
"""
52+
Filter a list of candidate names by literal values or glob-style patterns.
53+
54+
This function returns all names from `all_names` that match the provided
55+
`patterns`. Patterns may be a single string or a list of strings, and may
56+
contain the wildcard `*` to match any sequence of characters. Literal names
57+
without `*` must match exactly. The matching is performed in two steps:
58+
1. Each pattern is translated to a regular expression if it contains `*`,
59+
otherwise used as a literal match.
60+
2. Any pattern that matches no names in `all_names` raises a `KeyError`.
61+
62+
The returned list preserves the order of `all_names`.
63+
64+
:param all_names: List of available names to filter.
65+
:param patterns: A pattern or list of patterns (with optional `*` wildcards)
66+
to match against `all_names`.
67+
:return: A list of names from `all_names` that match any of the given patterns,
68+
in original order.
69+
:raises KeyError: If any pattern does not match at least one name.
70+
"""
71+
if isinstance(patterns, str):
72+
patterns = [patterns]
73+
74+
matched = set()
75+
unmatched = []
76+
for pat in patterns:
77+
if '*' in pat:
78+
regex = '^' + re.escape(pat).replace(r'\*', '.*') + '$'
79+
hits = [k for k in all_names if re.match(regex, k)]
80+
if hits:
81+
matched.update(hits)
82+
else:
83+
unmatched.append(pat)
84+
else:
85+
if pat in all_names:
86+
matched.add(pat)
87+
else:
88+
unmatched.append(pat)
89+
90+
if unmatched:
91+
raise KeyError(
92+
"The following variable names/patterns are not in the given .mat file: "
93+
+ ", ".join(unmatched)
94+
)
95+
# preserve original order
96+
names = [var for var in all_names if var in matched]
97+
return names

tests/test_data_types.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,15 +158,16 @@ def test_time_series_data(self):
158158
time_series_data,
159159
type(pd.DataFrame()))
160160
# Load with variable names:
161-
variable_names = ["combiTimeTable.y[6]"]
161+
variable_names = ["combiTimeTable.timeScale", "*.y[*]"]
162162
time_series_data = data_types.TimeSeriesData(
163163
self.example_data_mat_path, variable_names=variable_names
164164
)
165165
self.assertIsInstance(
166166
time_series_data,
167167
type(pd.DataFrame()))
168-
self.assertEqual(len(time_series_data.columns), 1)
169-
self.assertEqual(time_series_data.to_df().columns[0], variable_names[0])
168+
self.assertEqual(len(time_series_data.columns), 7)
169+
self.assertTrue(variable_names[0] in list(time_series_data.to_df().columns))
170+
self.assertTrue("combiTimeTable.y[5]" in list(time_series_data.to_df().columns))
170171
# Test load and set df functions:
171172
df = time_series_data
172173
self.assertIsInstance(
@@ -232,6 +233,7 @@ def test_time_series_utils(self):
232233
"""Test the utils for time series"""
233234
tsd = data_types.TimeSeriesData(self.example_data_mat_path)
234235
self.assertEqual(len(tsd.get_variable_names()), tsd.shape[1])
236+
self.assertEqual(len(tsd.get_variable_names("*.y[*]")), 6)
235237
self.assertIsNotNone(tsd.get_tags())
236238
self.assertLessEqual(len(tsd.get_variable_names()), tsd.shape[1])
237239

tests/test_utils.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pandas as pd
1111
import scipy.io as spio
1212
from ebcpy import TimeSeriesData
13-
from ebcpy.utils import setup_logger, conversion, statistics_analyzer, reproduction
13+
from ebcpy.utils import setup_logger, conversion, statistics_analyzer, reproduction, get_names
1414

1515

1616
class TestConversion(unittest.TestCase):
@@ -323,5 +323,41 @@ def tearDown(self) -> None:
323323
pass
324324

325325

326+
class TestGetNames(unittest.TestCase):
327+
def test_matches(self):
328+
"""
329+
Test various literal and wildcard patterns, including brackets and multiple '*' usage.
330+
"""
331+
test_cases = [
332+
# literal single match
333+
(['alpha', 'beta', 'gamma'], 'beta', ['beta']),
334+
# literal list of matches
335+
(['alpha', 'beta', 'gamma', 'delta'], ['alpha', 'delta'], ['alpha', 'delta']),
336+
# single '*' wildcard
337+
(['wall1.T', 'wall2.T', 'floor.T'], 'wall*.T', ['wall1.T', 'wall2.T']),
338+
# wildcard inside brackets
339+
(['wall[1].T', 'wall[2].T', 'wallX.T'], 'wall[*].T', ['wall[1].T', 'wall[2].T']),
340+
# two '*' wildcards
341+
(['a1b2', 'axby', 'ab'], 'a*b*', ['a1b2', 'axby', 'ab']),
342+
# mix of wildcard and literal in list
343+
(['a1', 'a2', 'b1', 'b2'], ['a*', 'b1'], ['a1', 'a2', 'b1']),
344+
# order preservation test
345+
(['first', 'second', 'third'], ['third', 'first'], ['first', 'third']),
346+
]
347+
for all_names, patterns, expected in test_cases:
348+
with self.subTest(patterns=patterns):
349+
result = get_names(all_names, patterns)
350+
self.assertEqual(result, expected)
351+
352+
def test_errors(self):
353+
"""
354+
Patterns or literals that match nothing should raise KeyError.
355+
"""
356+
with self.assertRaises(KeyError):
357+
get_names(['alpha', 'beta'], 'unknown')
358+
with self.assertRaises(KeyError):
359+
get_names(['x1', 'x2'], 'y*')
360+
361+
326362
if __name__ == "__main__":
327363
unittest.main()

0 commit comments

Comments
 (0)