Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions synthbanshee/tts/ssml_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ class PhraseProsody:
char_end: Exclusive end offset.
rate: SSML rate value (e.g. ``"+15%"``, ``"slow"``).
``None`` means no rate change.
pitch: SSML pitch value (e.g. ``"+2st"``). ``None`` means no change.
volume: SSML volume value (e.g. ``"+3dB"``). ``None`` means no change.
pitch: SSML pitch value (e.g. ``"+2st"`` or ``"+6%"``). ``None``
means no change. Mixing ``st`` (inner) with ``%`` (outer) is
tolerated by Azure.
volume: SSML volume value (e.g. ``"+3%"``). ``None`` means no
change. **Must be expressed in ``%``** to match the outer
``<prosody volume="...">`` emitted by ``_volume_to_string``;
nesting ``volume="+NdB"`` inside ``volume="+N%"`` triggers
Azure SSML parser error 0x80045003 (#72).
break_before_ms: Milliseconds of silence inserted before the span.
break_after_ms: Milliseconds of silence inserted after the span.
"""
Expand All @@ -86,8 +92,10 @@ class PhraseProsody:
# ---------------------------------------------------------------------------

_HINT_DEFAULTS: dict[str, dict[str, str | int]] = {
# Stressed accusatory phrase — faster, louder, higher pitch
"stress": {"rate": "+15%", "volume": "+3dB", "pitch": "+1st", "break_before_ms": 0},
# Stressed accusatory phrase — faster, louder, higher pitch.
# `volume` must be in `%` (#72): Azure SSML rejects `volume="+NdB"`
# nested inside the outer `volume="+N%"` emitted by `_volume_to_string`.
"stress": {"rate": "+15%", "volume": "+3%", "pitch": "+1st", "break_before_ms": 0},
# Deliberate command slowing — measured menace
"slow": {"rate": "-20%", "break_before_ms": 150},
# Pause before the phrase for dramatic weight
Expand Down
44 changes: 43 additions & 1 deletion tests/unit/test_phrase_prosody.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from synthbanshee.tts.ssml_builder import SSMLBuilder, UtteranceSpec, _apply_phrase_prosody
from synthbanshee.tts.ssml_types import (
_HINT_DEFAULTS,
PhraseHint,
PhraseProsody,
_build_offset_map,
Expand Down Expand Up @@ -106,7 +107,10 @@ def test_hint_defaults_applied_stress(self) -> None:
)
result = resolve_phrase_hints([hint], text, text)
assert result[0].rate == "+15%"
assert result[0].volume == "+3dB"
# `volume` must be in `%` (#72): Azure SSML rejects `volume="+NdB"`
# nested inside the outer `volume="+N%"` emitted by `_volume_to_string`.
# Don't reintroduce `+3dB` here without also changing the outer emitter.
assert result[0].volume == "+3%"
assert result[0].pitch == "+1st"

def test_hint_defaults_applied_menace(self) -> None:
Expand Down Expand Up @@ -190,6 +194,44 @@ def test_negative_char_start_clamped(self) -> None:
assert result[0].char_start == 0


# ---------------------------------------------------------------------------
# Hint defaults — Azure SSML unit invariant
# ---------------------------------------------------------------------------


class TestHintDefaultUnits:
"""Azure rejects SSML when an inner `<prosody volume="+NdB">` is nested
inside an outer `<prosody volume="+N%">` (#72: `0x80045003 / Connection
was closed by the remote host`). Since `_volume_to_string` always emits
the outer prosody volume in `%`, every nested phrase prosody value must
also be in `%`. Reintroducing `dB` here re-breaks the corpus generation
for any scene that emits a `stress` hint (most violent / agitated scenes)."""

def test_no_hint_default_uses_db_for_volume(self) -> None:
for hint_name, defaults in _HINT_DEFAULTS.items():
volume = defaults.get("volume")
if volume is None:
continue
assert isinstance(volume, str)
assert not volume.endswith("dB"), (
f"Hint {hint_name!r} default volume {volume!r} uses dB; "
"must be % to match the outer prosody emitter and avoid #72 "
"(Azure SSML parse error 0x80045003)"
)

def test_hint_default_volumes_parse_as_percent(self) -> None:
for hint_name, defaults in _HINT_DEFAULTS.items():
volume = defaults.get("volume")
if volume is None:
continue
assert isinstance(volume, str)
assert volume.endswith("%"), (
f"Hint {hint_name!r} default volume {volume!r} must end in '%'"
)
n = volume.rstrip("%").lstrip("+")
int(n.lstrip("-")) # numeric (no exception => fine)


# ---------------------------------------------------------------------------
# rebase_phrase_prosody
# ---------------------------------------------------------------------------
Expand Down
Loading