diff --git a/synthbanshee/tts/ssml_types.py b/synthbanshee/tts/ssml_types.py index ef44db1..78468fb 100644 --- a/synthbanshee/tts/ssml_types.py +++ b/synthbanshee/tts/ssml_types.py @@ -65,8 +65,14 @@ class PhraseProsody: char_end: Exclusive end offset. rate: SSML rate value (e.g. ``"+15%"``, ``"slow"``). ``None`` means no rate change. - pitch: SSML pitch value (e.g. ``"+2st"``). ``None`` means no change. - volume: SSML volume value (e.g. ``"+3dB"``). ``None`` means no change. + pitch: SSML pitch value (e.g. ``"+2st"`` or ``"+6%"``). ``None`` + means no change. Mixing ``st`` (inner) with ``%`` (outer) is + tolerated by Azure. + volume: SSML volume value (e.g. ``"+3%"``). ``None`` means no + change. **Must be expressed in ``%``** to match the outer + ```` emitted by ``_volume_to_string``; + nesting ``volume="+NdB"`` inside ``volume="+N%"`` triggers + Azure SSML parser error 0x80045003 (#72). break_before_ms: Milliseconds of silence inserted before the span. break_after_ms: Milliseconds of silence inserted after the span. """ @@ -86,8 +92,10 @@ class PhraseProsody: # --------------------------------------------------------------------------- _HINT_DEFAULTS: dict[str, dict[str, str | int]] = { - # Stressed accusatory phrase — faster, louder, higher pitch - "stress": {"rate": "+15%", "volume": "+3dB", "pitch": "+1st", "break_before_ms": 0}, + # Stressed accusatory phrase — faster, louder, higher pitch. + # `volume` must be in `%` (#72): Azure SSML rejects `volume="+NdB"` + # nested inside the outer `volume="+N%"` emitted by `_volume_to_string`. + "stress": {"rate": "+15%", "volume": "+3%", "pitch": "+1st", "break_before_ms": 0}, # Deliberate command slowing — measured menace "slow": {"rate": "-20%", "break_before_ms": 150}, # Pause before the phrase for dramatic weight diff --git a/tests/unit/test_phrase_prosody.py b/tests/unit/test_phrase_prosody.py index ce61794..6e8ebe7 100644 --- a/tests/unit/test_phrase_prosody.py +++ b/tests/unit/test_phrase_prosody.py @@ -15,6 +15,7 @@ from synthbanshee.tts.ssml_builder import SSMLBuilder, UtteranceSpec, _apply_phrase_prosody from synthbanshee.tts.ssml_types import ( + _HINT_DEFAULTS, PhraseHint, PhraseProsody, _build_offset_map, @@ -106,7 +107,10 @@ def test_hint_defaults_applied_stress(self) -> None: ) result = resolve_phrase_hints([hint], text, text) assert result[0].rate == "+15%" - assert result[0].volume == "+3dB" + # `volume` must be in `%` (#72): Azure SSML rejects `volume="+NdB"` + # nested inside the outer `volume="+N%"` emitted by `_volume_to_string`. + # Don't reintroduce `+3dB` here without also changing the outer emitter. + assert result[0].volume == "+3%" assert result[0].pitch == "+1st" def test_hint_defaults_applied_menace(self) -> None: @@ -190,6 +194,44 @@ def test_negative_char_start_clamped(self) -> None: assert result[0].char_start == 0 +# --------------------------------------------------------------------------- +# Hint defaults — Azure SSML unit invariant +# --------------------------------------------------------------------------- + + +class TestHintDefaultUnits: + """Azure rejects SSML when an inner `` is nested + inside an outer `` (#72: `0x80045003 / Connection + was closed by the remote host`). Since `_volume_to_string` always emits + the outer prosody volume in `%`, every nested phrase prosody value must + also be in `%`. Reintroducing `dB` here re-breaks the corpus generation + for any scene that emits a `stress` hint (most violent / agitated scenes).""" + + def test_no_hint_default_uses_db_for_volume(self) -> None: + for hint_name, defaults in _HINT_DEFAULTS.items(): + volume = defaults.get("volume") + if volume is None: + continue + assert isinstance(volume, str) + assert not volume.endswith("dB"), ( + f"Hint {hint_name!r} default volume {volume!r} uses dB; " + "must be % to match the outer prosody emitter and avoid #72 " + "(Azure SSML parse error 0x80045003)" + ) + + def test_hint_default_volumes_parse_as_percent(self) -> None: + for hint_name, defaults in _HINT_DEFAULTS.items(): + volume = defaults.get("volume") + if volume is None: + continue + assert isinstance(volume, str) + assert volume.endswith("%"), ( + f"Hint {hint_name!r} default volume {volume!r} must end in '%'" + ) + n = volume.rstrip("%").lstrip("+") + int(n.lstrip("-")) # numeric (no exception => fine) + + # --------------------------------------------------------------------------- # rebase_phrase_prosody # ---------------------------------------------------------------------------