mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 06:22:22 -04:00
support speed control in tts tags
This commit is contained in:
parent
0480879c11
commit
21cbb5a766
6 changed files with 45 additions and 7 deletions
|
@ -168,5 +168,6 @@ message TTSTag {
|
||||||
string field_text = 1;
|
string field_text = 1;
|
||||||
string lang = 2;
|
string lang = 2;
|
||||||
repeated string voices = 3;
|
repeated string voices = 3;
|
||||||
repeated string other_args = 4;
|
float speed = 4;
|
||||||
|
repeated string other_args = 5;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,7 @@ def av_tag_to_native(tag: pb.AVTag) -> AVTag:
|
||||||
lang=tag.tts.lang,
|
lang=tag.tts.lang,
|
||||||
voices=list(tag.tts.voices),
|
voices=list(tag.tts.voices),
|
||||||
other_args=list(tag.tts.other_args),
|
other_args=list(tag.tts.other_args),
|
||||||
|
speed=tag.tts.speed,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ class TTSTag:
|
||||||
field_text: str
|
field_text: str
|
||||||
lang: str
|
lang: str
|
||||||
voices: List[str]
|
voices: List[str]
|
||||||
|
speed: float
|
||||||
# each arg should be in the form 'foo=bar'
|
# each arg should be in the form 'foo=bar'
|
||||||
other_args: List[str]
|
other_args: List[str]
|
||||||
|
|
||||||
|
|
|
@ -126,8 +126,11 @@ class MacTTSPlayer(TTSProcessPlayer):
|
||||||
voice = match.voice
|
voice = match.voice
|
||||||
assert isinstance(voice, MacVoice)
|
assert isinstance(voice, MacVoice)
|
||||||
|
|
||||||
|
default_wpm = 170
|
||||||
|
words_per_min = str(int(default_wpm * tag.speed))
|
||||||
|
|
||||||
self._process = subprocess.Popen(
|
self._process = subprocess.Popen(
|
||||||
["say", "-v", voice.original_name, "-f", "-"],
|
["say", "-v", voice.original_name, "-r", words_per_min, "-f", "-"],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL,
|
stderr=subprocess.DEVNULL,
|
||||||
|
@ -171,8 +174,21 @@ class MacTTSFilePlayer(MacTTSPlayer):
|
||||||
voice = match.voice
|
voice = match.voice
|
||||||
assert isinstance(voice, MacVoice)
|
assert isinstance(voice, MacVoice)
|
||||||
|
|
||||||
|
default_wpm = 170
|
||||||
|
words_per_min = str(int(default_wpm * tag.speed))
|
||||||
|
|
||||||
self._process = subprocess.Popen(
|
self._process = subprocess.Popen(
|
||||||
["say", "-v", voice.original_name, "-f", "-", "-o", self.tmppath],
|
[
|
||||||
|
"say",
|
||||||
|
"-v",
|
||||||
|
voice.original_name,
|
||||||
|
"-r",
|
||||||
|
words_per_min,
|
||||||
|
"-f",
|
||||||
|
"-",
|
||||||
|
"-o",
|
||||||
|
self.tmppath,
|
||||||
|
],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL,
|
stderr=subprocess.DEVNULL,
|
||||||
|
@ -441,6 +457,7 @@ if isWin:
|
||||||
try:
|
try:
|
||||||
native_voice = voice.handle
|
native_voice = voice.handle
|
||||||
self.speaker.Voice = native_voice
|
self.speaker.Voice = native_voice
|
||||||
|
self.speaker.Rate = self._rate_for_speed(tag.speed)
|
||||||
self.speaker.Speak(tag.field_text, 1)
|
self.speaker.Speak(tag.field_text, 1)
|
||||||
gui_hooks.av_player_did_begin_playing(self, tag)
|
gui_hooks.av_player_did_begin_playing(self, tag)
|
||||||
|
|
||||||
|
@ -454,7 +471,12 @@ if isWin:
|
||||||
self._terminate_flag = False
|
self._terminate_flag = False
|
||||||
|
|
||||||
def _tidy_name(self, name: str) -> str:
|
def _tidy_name(self, name: str) -> str:
|
||||||
"eg. Microsoft Haruka Desktop -> MS-Haruka."
|
"eg. Microsoft Haruka Desktop -> Microsoft-Haruka."
|
||||||
return re.sub(r"^Microsoft (.+) Desktop$", "Microsoft_\\1", name).replace(
|
return re.sub(r"^Microsoft (.+) Desktop$", "Microsoft_\\1", name).replace(
|
||||||
" ", "_"
|
" ", "_"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _rate_for_speed(self, speed: float) -> int:
|
||||||
|
"eg. 1.5 -> 15, 0.5 -> -5"
|
||||||
|
speed = (speed * 10) - 10
|
||||||
|
return int(max(-10, min(10, speed)))
|
||||||
|
|
|
@ -195,12 +195,14 @@ impl Backend {
|
||||||
lang,
|
lang,
|
||||||
voices,
|
voices,
|
||||||
other_args,
|
other_args,
|
||||||
|
speed,
|
||||||
} => pt::AvTag {
|
} => pt::AvTag {
|
||||||
value: Some(pt::av_tag::Value::Tts(pt::TtsTag {
|
value: Some(pt::av_tag::Value::Tts(pt::TtsTag {
|
||||||
field_text,
|
field_text,
|
||||||
lang,
|
lang,
|
||||||
voices,
|
voices,
|
||||||
other_args,
|
other_args,
|
||||||
|
speed,
|
||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
|
@ -15,6 +15,7 @@ pub enum AVTag {
|
||||||
field_text: String,
|
field_text: String,
|
||||||
lang: String,
|
lang: String,
|
||||||
voices: Vec<String>,
|
voices: Vec<String>,
|
||||||
|
speed: f32,
|
||||||
other_args: Vec<String>,
|
other_args: Vec<String>,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -102,9 +103,10 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>,
|
||||||
|
|
||||||
fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
||||||
let mut other_args = vec![];
|
let mut other_args = vec![];
|
||||||
let mut split_args = args.split(' ');
|
let mut split_args = args.split_ascii_whitespace();
|
||||||
let lang = split_args.next().unwrap_or("");
|
let lang = split_args.next().unwrap_or("");
|
||||||
let mut voices = None;
|
let mut voices = None;
|
||||||
|
let mut speed = 1.0;
|
||||||
|
|
||||||
for remaining_arg in split_args {
|
for remaining_arg in split_args {
|
||||||
if remaining_arg.starts_with("voices=") {
|
if remaining_arg.starts_with("voices=") {
|
||||||
|
@ -112,6 +114,13 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
||||||
.split('=')
|
.split('=')
|
||||||
.nth(1)
|
.nth(1)
|
||||||
.map(|voices| voices.split(',').map(ToOwned::to_owned).collect());
|
.map(|voices| voices.split(',').map(ToOwned::to_owned).collect());
|
||||||
|
} else if remaining_arg.starts_with("speed=") {
|
||||||
|
speed = remaining_arg
|
||||||
|
.split('=')
|
||||||
|
.nth(1)
|
||||||
|
.unwrap()
|
||||||
|
.parse()
|
||||||
|
.unwrap_or(1.0);
|
||||||
} else {
|
} else {
|
||||||
other_args.push(remaining_arg.to_owned());
|
other_args.push(remaining_arg.to_owned());
|
||||||
}
|
}
|
||||||
|
@ -121,6 +130,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
||||||
field_text: strip_html_for_tts(field_text).into(),
|
field_text: strip_html_for_tts(field_text).into(),
|
||||||
lang: lang.into(),
|
lang: lang.into(),
|
||||||
voices: voices.unwrap_or_else(Vec::new),
|
voices: voices.unwrap_or_else(Vec::new),
|
||||||
|
speed,
|
||||||
other_args,
|
other_args,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -188,7 +198,7 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_audio() {
|
fn test_audio() {
|
||||||
let s =
|
let s =
|
||||||
"abc[sound:fo&o.mp3]def[anki:tts][en_US voices=Bob,Jane]foo<br>1>2[/anki:tts]gh";
|
"abc[sound:fo&o.mp3]def[anki:tts][en_US voices=Bob,Jane speed=1.2]foo<br>1>2[/anki:tts]gh";
|
||||||
assert_eq!(strip_av_tags(s), "abcdefgh");
|
assert_eq!(strip_av_tags(s), "abcdefgh");
|
||||||
|
|
||||||
let (text, tags) = extract_av_tags(s, true);
|
let (text, tags) = extract_av_tags(s, true);
|
||||||
|
@ -202,7 +212,8 @@ mod test {
|
||||||
field_text: "foo 1>2".into(),
|
field_text: "foo 1>2".into(),
|
||||||
lang: "en_US".into(),
|
lang: "en_US".into(),
|
||||||
voices: vec!["Bob".into(), "Jane".into()],
|
voices: vec!["Bob".into(), "Jane".into()],
|
||||||
other_args: vec![]
|
other_args: vec![],
|
||||||
|
speed: 1.2
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
Loading…
Reference in a new issue