Skip to content

Commit ea43e1e

Browse files
Move _normalize_hive_syntax to DefaultTypeConverter and fix pass stubs
- Move _normalize_hive_syntax from module-level function in parser.py to a staticmethod on DefaultTypeConverter, since it is only used there - Move Hive syntax tests from test_parser.py to test_converter.py to match the new location - Replace pass stubs in convert() with self.get(type_)(value) in DefaultPandasTypeConverter, DefaultPandasUnloadTypeConverter, DefaultArrowUnloadTypeConverter, and DefaultPolarsUnloadTypeConverter - Add tests for the pass stub fix Closes #692. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 90e67bc commit ea43e1e

7 files changed

Lines changed: 111 additions & 80 deletions

File tree

pyathena/arrow/converter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,5 @@ def __init__(self) -> None:
115115
)
116116

117117
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
118-
pass
118+
converter = self.get(type_)
119+
return converter(value)

pyathena/converter.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33
import binascii
44
import json
55
import logging
6+
import re
67
from abc import ABCMeta, abstractmethod
78
from collections.abc import Callable
89
from copy import deepcopy
910
from datetime import date, datetime, time
1011
from decimal import Decimal
11-
from typing import Any
12+
from typing import Any, ClassVar
1213

1314
from dateutil.tz import gettz
1415

1516
from pyathena.parser import (
1617
TypedValueConverter,
1718
TypeNode,
1819
TypeSignatureParser,
19-
_normalize_hive_syntax,
2020
_split_array_items,
2121
)
2222
from pyathena.util import strtobool
@@ -551,6 +551,9 @@ class DefaultTypeConverter(Converter):
551551
['1', '2', '3']
552552
"""
553553

554+
_HIVE_SYNTAX_RE: ClassVar[re.Pattern[str]] = re.compile(r"[<>:]")
555+
_HIVE_REPLACEMENTS: ClassVar[dict[str, str]] = {"<": "(", ">": ")", ":": " "}
556+
554557
def __init__(self) -> None:
555558
super().__init__(mappings=deepcopy(_DEFAULT_CONVERTERS), default=_to_default)
556559
self._parser = TypeSignatureParser()
@@ -561,6 +564,25 @@ def __init__(self) -> None:
561564
)
562565
self._parsed_hints: dict[str, TypeNode] = {}
563566

567+
@staticmethod
568+
def _normalize_hive_syntax(type_str: str) -> str:
569+
"""Normalize Hive-style DDL syntax to Trino-style.
570+
571+
Converts angle-bracket notation (``array<struct<a:int>>``) to
572+
parenthesized notation (``array(struct(a int))``).
573+
574+
Args:
575+
type_str: Type signature string, possibly using Hive syntax.
576+
577+
Returns:
578+
Normalized type signature using Trino-style parenthesized notation.
579+
"""
580+
if "<" not in type_str:
581+
return type_str
582+
return DefaultTypeConverter._HIVE_SYNTAX_RE.sub(
583+
lambda m: DefaultTypeConverter._HIVE_REPLACEMENTS[m.group()], type_str
584+
)
585+
564586
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
565587
"""Convert a string value to the appropriate Python type.
566588
@@ -605,7 +627,7 @@ def _parse_type_hint(self, type_hint: str) -> TypeNode:
605627
Returns:
606628
Parsed TypeNode.
607629
"""
608-
normalized = _normalize_hive_syntax(type_hint)
630+
normalized = self._normalize_hive_syntax(type_hint)
609631
if normalized not in self._parsed_hints:
610632
self._parsed_hints[normalized] = self._parser.parse(normalized)
611633
return self._parsed_hints[normalized]

pyathena/pandas/converter.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def _dtypes(self) -> dict[str, type[Any]]:
8181
return self.__dtypes
8282

8383
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
84-
pass
84+
converter = self.get(type_)
85+
return converter(value)
8586

8687

8788
class DefaultPandasUnloadTypeConverter(Converter):
@@ -104,4 +105,5 @@ def __init__(self) -> None:
104105
)
105106

106107
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
107-
pass
108+
converter = self.get(type_)
109+
return converter(value)

pyathena/parser.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import json
4-
import re
54
from collections.abc import Callable
65
from dataclasses import dataclass, field
76
from typing import Any
@@ -11,28 +10,6 @@
1110
"int": "integer",
1211
}
1312

14-
# Pattern for normalizing Hive-style type signatures to Trino-style.
15-
# Matches angle brackets and colons used in Hive DDL (e.g., array<struct<a:int>>).
16-
_HIVE_SYNTAX_RE: re.Pattern[str] = re.compile(r"[<>:]")
17-
_HIVE_REPLACEMENTS: dict[str, str] = {"<": "(", ">": ")", ":": " "}
18-
19-
20-
def _normalize_hive_syntax(type_str: str) -> str:
21-
"""Normalize Hive-style DDL syntax to Trino-style.
22-
23-
Converts angle-bracket notation (``array<struct<a:int>>``) to
24-
parenthesized notation (``array(struct(a int))``).
25-
26-
Args:
27-
type_str: Type signature string, possibly using Hive syntax.
28-
29-
Returns:
30-
Normalized type signature using Trino-style parenthesized notation.
31-
"""
32-
if "<" not in type_str:
33-
return type_str
34-
return _HIVE_SYNTAX_RE.sub(lambda m: _HIVE_REPLACEMENTS[m.group()], type_str)
35-
3613

3714
def _split_array_items(inner: str) -> list[str]:
3815
"""Split array items by comma, respecting brace and bracket groupings.

pyathena/polars/converter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,5 @@ def __init__(self) -> None:
128128
)
129129

130130
def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
131-
pass
131+
converter = self.get(type_)
132+
return converter(value)

tests/pyathena/test_converter.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import pytest
22

3+
from pyathena.arrow.converter import DefaultArrowUnloadTypeConverter
34
from pyathena.converter import (
45
DefaultTypeConverter,
56
_to_array,
67
_to_map,
78
_to_struct,
89
)
10+
from pyathena.pandas.converter import DefaultPandasTypeConverter, DefaultPandasUnloadTypeConverter
11+
from pyathena.polars.converter import DefaultPolarsUnloadTypeConverter
912

1013

1114
@pytest.mark.parametrize(
@@ -408,3 +411,78 @@ def test_hive_syntax_caching(self):
408411
# Both should normalize to "array(integer)" in the cache
409412
assert "array(integer)" in converter._parsed_hints
410413
assert len(converter._parsed_hints) == 1
414+
415+
def test_normalize_hive_syntax_noop(self):
416+
"""Trino-style input passes through unchanged."""
417+
assert DefaultTypeConverter._normalize_hive_syntax("array(integer)") == "array(integer)"
418+
419+
def test_normalize_hive_syntax_replaces(self):
420+
assert (
421+
DefaultTypeConverter._normalize_hive_syntax("array<struct<a:int>>")
422+
== "array(struct(a int))"
423+
)
424+
425+
def test_normalize_hive_syntax_struct(self):
426+
converter = DefaultTypeConverter()
427+
result = converter.convert(
428+
"row",
429+
"{name=Alice, age=25}",
430+
type_hint="struct<name:varchar,age:int>",
431+
)
432+
assert result == {"name": "Alice", "age": 25}
433+
434+
def test_normalize_hive_syntax_nested(self):
435+
converter = DefaultTypeConverter()
436+
result = converter.convert(
437+
"array",
438+
"[{a=1, b=hello}, {a=2, b=world}]",
439+
type_hint="array<struct<a:int,b:varchar>>",
440+
)
441+
assert result == [{"a": 1, "b": "hello"}, {"a": 2, "b": "world"}]
442+
443+
def test_normalize_hive_syntax_map(self):
444+
converter = DefaultTypeConverter()
445+
result = converter.convert(
446+
"map",
447+
'{"x": 1, "y": 2}',
448+
type_hint="map<string,int>",
449+
)
450+
assert result == {"x": 1, "y": 2}
451+
452+
def test_normalize_hive_syntax_mixed(self):
453+
"""Hive angle brackets wrapping Trino-style parenthesized inner type."""
454+
converter = DefaultTypeConverter()
455+
result = converter.convert(
456+
"array",
457+
"[{a=1, b=hello}]",
458+
type_hint="array<row(a int, b varchar)>",
459+
)
460+
assert result == [{"a": 1, "b": "hello"}]
461+
462+
463+
class TestConverterPassStubFix:
464+
"""Verify that converters previously stubbed with pass now delegate correctly."""
465+
466+
def test_pandas_converter_returns_value(self):
467+
converter = DefaultPandasTypeConverter()
468+
assert converter.convert("boolean", "true") is True
469+
470+
def test_pandas_converter_default_passthrough(self):
471+
converter = DefaultPandasTypeConverter()
472+
assert converter.convert("varchar", "hello") == "hello"
473+
474+
def test_pandas_unload_converter_returns_value(self):
475+
converter = DefaultPandasUnloadTypeConverter()
476+
assert converter.convert("varchar", "hello") == "hello"
477+
478+
def test_arrow_unload_converter_returns_value(self):
479+
converter = DefaultArrowUnloadTypeConverter()
480+
assert converter.convert("varchar", "hello") == "hello"
481+
482+
def test_polars_unload_converter_returns_value(self):
483+
converter = DefaultPolarsUnloadTypeConverter()
484+
assert converter.convert("varchar", "hello") == "hello"
485+
486+
def test_pandas_converter_none_value(self):
487+
converter = DefaultPandasTypeConverter()
488+
assert converter.convert("varchar", None) is None

tests/pyathena/test_parser.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
TypedValueConverter,
66
TypeNode,
77
TypeSignatureParser,
8-
_normalize_hive_syntax,
98
)
109

1110

@@ -109,55 +108,6 @@ def test_type_alias_in_complex_type(self):
109108
assert node.type_name == "array"
110109
assert node.children[0].type_name == "integer"
111110

112-
def test_hive_syntax_simple(self):
113-
parser = TypeSignatureParser()
114-
node = parser.parse(_normalize_hive_syntax("array<int>"))
115-
assert node.type_name == "array"
116-
assert node.children[0].type_name == "integer"
117-
118-
def test_hive_syntax_struct(self):
119-
parser = TypeSignatureParser()
120-
node = parser.parse(_normalize_hive_syntax("struct<a:int,b:varchar>"))
121-
assert node.type_name == "struct"
122-
assert node.field_names == ["a", "b"]
123-
assert node.children[0].type_name == "integer"
124-
assert node.children[1].type_name == "varchar"
125-
126-
def test_hive_syntax_nested(self):
127-
parser = TypeSignatureParser()
128-
node = parser.parse(_normalize_hive_syntax("array<struct<a:int,b:varchar>>"))
129-
assert node.type_name == "array"
130-
struct_node = node.children[0]
131-
assert struct_node.type_name == "struct"
132-
assert struct_node.field_names == ["a", "b"]
133-
assert struct_node.children[0].type_name == "integer"
134-
assert struct_node.children[1].type_name == "varchar"
135-
136-
def test_hive_syntax_map(self):
137-
parser = TypeSignatureParser()
138-
node = parser.parse(_normalize_hive_syntax("map<string,int>"))
139-
assert node.type_name == "map"
140-
assert node.children[0].type_name == "string"
141-
assert node.children[1].type_name == "integer"
142-
143-
def test_mixed_syntax(self):
144-
"""Hive angle brackets wrapping Trino-style parenthesized inner type."""
145-
parser = TypeSignatureParser()
146-
node = parser.parse(_normalize_hive_syntax("array<row(a int, b varchar)>"))
147-
assert node.type_name == "array"
148-
row_node = node.children[0]
149-
assert row_node.type_name == "row"
150-
assert row_node.field_names == ["a", "b"]
151-
assert row_node.children[0].type_name == "integer"
152-
assert row_node.children[1].type_name == "varchar"
153-
154-
def test_normalize_hive_syntax_noop(self):
155-
"""Trino-style input passes through unchanged."""
156-
assert _normalize_hive_syntax("array(integer)") == "array(integer)"
157-
158-
def test_normalize_hive_syntax_replaces(self):
159-
assert _normalize_hive_syntax("array<struct<a:int>>") == "array(struct(a int))"
160-
161111
def test_trailing_modifier_after_paren(self):
162112
"""Type with content after closing paren should not break parsing."""
163113
parser = TypeSignatureParser()

0 commit comments

Comments
 (0)