support speed control in tts tags

This commit is contained in:
Damien Elmes 2020-01-26 14:28:17 +10:00
parent 0480879c11
commit 21cbb5a766
6 changed files with 45 additions and 7 deletions

View file

@ -168,5 +168,6 @@ message TTSTag {
string field_text = 1; string field_text = 1;
string lang = 2; string lang = 2;
repeated string voices = 3; repeated string voices = 3;
repeated string other_args = 4; float speed = 4;
repeated string other_args = 5;
} }

View file

@ -56,6 +56,7 @@ def av_tag_to_native(tag: pb.AVTag) -> AVTag:
lang=tag.tts.lang, lang=tag.tts.lang,
voices=list(tag.tts.voices), voices=list(tag.tts.voices),
other_args=list(tag.tts.other_args), other_args=list(tag.tts.other_args),
speed=tag.tts.speed,
) )

View file

@ -24,6 +24,7 @@ class TTSTag:
field_text: str field_text: str
lang: str lang: str
voices: List[str] voices: List[str]
speed: float
# each arg should be in the form 'foo=bar' # each arg should be in the form 'foo=bar'
other_args: List[str] other_args: List[str]

View file

@ -126,8 +126,11 @@ class MacTTSPlayer(TTSProcessPlayer):
voice = match.voice voice = match.voice
assert isinstance(voice, MacVoice) assert isinstance(voice, MacVoice)
default_wpm = 170
words_per_min = str(int(default_wpm * tag.speed))
self._process = subprocess.Popen( self._process = subprocess.Popen(
["say", "-v", voice.original_name, "-f", "-"], ["say", "-v", voice.original_name, "-r", words_per_min, "-f", "-"],
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
@ -171,8 +174,21 @@ class MacTTSFilePlayer(MacTTSPlayer):
voice = match.voice voice = match.voice
assert isinstance(voice, MacVoice) assert isinstance(voice, MacVoice)
default_wpm = 170
words_per_min = str(int(default_wpm * tag.speed))
self._process = subprocess.Popen( self._process = subprocess.Popen(
["say", "-v", voice.original_name, "-f", "-", "-o", self.tmppath], [
"say",
"-v",
voice.original_name,
"-r",
words_per_min,
"-f",
"-",
"-o",
self.tmppath,
],
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
@ -441,6 +457,7 @@ if isWin:
try: try:
native_voice = voice.handle native_voice = voice.handle
self.speaker.Voice = native_voice self.speaker.Voice = native_voice
self.speaker.Rate = self._rate_for_speed(tag.speed)
self.speaker.Speak(tag.field_text, 1) self.speaker.Speak(tag.field_text, 1)
gui_hooks.av_player_did_begin_playing(self, tag) gui_hooks.av_player_did_begin_playing(self, tag)
@ -454,7 +471,12 @@ if isWin:
self._terminate_flag = False self._terminate_flag = False
def _tidy_name(self, name: str) -> str: def _tidy_name(self, name: str) -> str:
"eg. Microsoft Haruka Desktop -> MS-Haruka." "eg. Microsoft Haruka Desktop -> Microsoft-Haruka."
return re.sub(r"^Microsoft (.+) Desktop$", "Microsoft_\\1", name).replace( return re.sub(r"^Microsoft (.+) Desktop$", "Microsoft_\\1", name).replace(
" ", "_" " ", "_"
) )
def _rate_for_speed(self, speed: float) -> int:
"eg. 1.5 -> 15, 0.5 -> -5"
speed = (speed * 10) - 10
return int(max(-10, min(10, speed)))

View file

@ -195,12 +195,14 @@ impl Backend {
lang, lang,
voices, voices,
other_args, other_args,
speed,
} => pt::AvTag { } => pt::AvTag {
value: Some(pt::av_tag::Value::Tts(pt::TtsTag { value: Some(pt::av_tag::Value::Tts(pt::TtsTag {
field_text, field_text,
lang, lang,
voices, voices,
other_args, other_args,
speed,
})), })),
}, },
}) })

View file

@ -15,6 +15,7 @@ pub enum AVTag {
field_text: String, field_text: String,
lang: String, lang: String,
voices: Vec<String>, voices: Vec<String>,
speed: f32,
other_args: Vec<String>, other_args: Vec<String>,
}, },
} }
@ -102,9 +103,10 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>,
fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag { fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
let mut other_args = vec![]; let mut other_args = vec![];
let mut split_args = args.split(' '); let mut split_args = args.split_ascii_whitespace();
let lang = split_args.next().unwrap_or(""); let lang = split_args.next().unwrap_or("");
let mut voices = None; let mut voices = None;
let mut speed = 1.0;
for remaining_arg in split_args { for remaining_arg in split_args {
if remaining_arg.starts_with("voices=") { if remaining_arg.starts_with("voices=") {
@ -112,6 +114,13 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
.split('=') .split('=')
.nth(1) .nth(1)
.map(|voices| voices.split(',').map(ToOwned::to_owned).collect()); .map(|voices| voices.split(',').map(ToOwned::to_owned).collect());
} else if remaining_arg.starts_with("speed=") {
speed = remaining_arg
.split('=')
.nth(1)
.unwrap()
.parse()
.unwrap_or(1.0);
} else { } else {
other_args.push(remaining_arg.to_owned()); other_args.push(remaining_arg.to_owned());
} }
@ -121,6 +130,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
field_text: strip_html_for_tts(field_text).into(), field_text: strip_html_for_tts(field_text).into(),
lang: lang.into(), lang: lang.into(),
voices: voices.unwrap_or_else(Vec::new), voices: voices.unwrap_or_else(Vec::new),
speed,
other_args, other_args,
} }
} }
@ -188,7 +198,7 @@ mod test {
#[test] #[test]
fn test_audio() { fn test_audio() {
let s = let s =
"abc[sound:fo&amp;o.mp3]def[anki:tts][en_US voices=Bob,Jane]foo<br>1&gt;2[/anki:tts]gh"; "abc[sound:fo&amp;o.mp3]def[anki:tts][en_US voices=Bob,Jane speed=1.2]foo<br>1&gt;2[/anki:tts]gh";
assert_eq!(strip_av_tags(s), "abcdefgh"); assert_eq!(strip_av_tags(s), "abcdefgh");
let (text, tags) = extract_av_tags(s, true); let (text, tags) = extract_av_tags(s, true);
@ -202,7 +212,8 @@ mod test {
field_text: "foo 1>2".into(), field_text: "foo 1>2".into(),
lang: "en_US".into(), lang: "en_US".into(),
voices: vec!["Bob".into(), "Jane".into()], voices: vec!["Bob".into(), "Jane".into()],
other_args: vec![] other_args: vec![],
speed: 1.2
}, },
] ]
); );