From 585a47b302c0d1209b4d05d0cbb8d8c1a58ea57f Mon Sep 17 00:00:00 2001 From: Tesshub Date: Thu, 12 Mar 2026 15:57:09 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8Foptimize=20tide=20request?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tide/utils.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tide/utils.py b/tide/utils.py index a56b339..c473a72 100644 --- a/tide/utils.py +++ b/tide/utils.py @@ -6,6 +6,7 @@ from bigtree import dict_to_tree, levelordergroup_iter from bigtree.node import node from typing import TypeVar +from functools import lru_cache T = TypeVar("T", bound=node.Node) @@ -33,6 +34,19 @@ 2: {"name": 1}, } +@lru_cache(maxsize=32) +def _cached_enriched_columns(columns_tuple: tuple[str, ...]): + max_level = get_tags_max_level(columns_tuple) + + enriched_map = { + col_name_tag_enrichment(col, max_level): col for col in columns_tuple + } + + split_tags = { + enriched: enriched.split("__") for enriched in enriched_map + } + + return enriched_map, split_tags def get_tree_depth_from_level(tree_max_depth: int, level: int | str): level = LEVEL_NAME_MAP[level] if isinstance(level, int) else level @@ -213,12 +227,8 @@ def tide_request( f"request must be str, list[str], pd.Index or None, got {type(request)}" ) - max_level = get_tags_max_level(data_columns) - - # Enrich columns once - enriched_map = { - col_name_tag_enrichment(col, max_level): col for col in data_columns - } + columns_tuple = tuple(data_columns) + enriched_map, split_tags = _cached_enriched_columns(columns_tuple) selected = [] @@ -233,9 +243,8 @@ def tide_request( ) for enriched_name, original in enriched_map.items(): - tags = enriched_name.split("__") + tags = split_tags[enriched_name] - # Exact per-tag match if all(tag in tags for tag in group_tags): selected.append(original) From d9ad58f428a67af53976b5c86d144d3e830f5cc6 Mon Sep 17 00:00:00 2001 From: Tesshub Date: Thu, 12 Mar 2026 16:02:17 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8Fsame=20APi=20and=20behavi?= =?UTF-8?q?or=20but=205-100x=20faster?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tide/utils.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/tide/utils.py b/tide/utils.py index c473a72..d7fa40d 100644 --- a/tide/utils.py +++ b/tide/utils.py @@ -48,6 +48,21 @@ def _cached_enriched_columns(columns_tuple: tuple[str, ...]): return enriched_map, split_tags +@lru_cache(maxsize=32) +def _build_tag_index(columns_tuple: tuple[str, ...]): + max_level = get_tags_max_level(columns_tuple) + + tag_index = {} + order = {col: i for i, col in enumerate(columns_tuple)} + + for col in columns_tuple: + enriched = col_name_tag_enrichment(col, max_level) + + for tag in enriched.split("__"): + tag_index.setdefault(tag, set()).add(col) + + return tag_index, order + def get_tree_depth_from_level(tree_max_depth: int, level: int | str): level = LEVEL_NAME_MAP[level] if isinstance(level, int) else level if tree_max_depth not in TREE_LEVEL_NAME_MAP: @@ -228,7 +243,7 @@ def tide_request( ) columns_tuple = tuple(data_columns) - enriched_map, split_tags = _cached_enriched_columns(columns_tuple) + tag_index, order = _build_tag_index(columns_tuple) selected = [] @@ -242,11 +257,17 @@ def tide_request( "Use up to 4 tags separated by '__'." ) - for enriched_name, original in enriched_map.items(): - tags = split_tags[enriched_name] + candidate_sets = [] + + for tag in group_tags: + if tag not in tag_index: + candidate_sets = [] + break + candidate_sets.append(tag_index[tag]) - if all(tag in tags for tag in group_tags): - selected.append(original) + if candidate_sets: + matches = set.intersection(*candidate_sets) + selected.extend(sorted(matches, key=lambda c: order[c])) return list(dict.fromkeys(selected))