mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 22:12:21 -04:00
support speed control in tts tags
This commit is contained in:
parent
0480879c11
commit
21cbb5a766
6 changed files with 45 additions and 7 deletions
|
@ -168,5 +168,6 @@ message TTSTag {
|
|||
string field_text = 1;
|
||||
string lang = 2;
|
||||
repeated string voices = 3;
|
||||
repeated string other_args = 4;
|
||||
float speed = 4;
|
||||
repeated string other_args = 5;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ def av_tag_to_native(tag: pb.AVTag) -> AVTag:
|
|||
lang=tag.tts.lang,
|
||||
voices=list(tag.tts.voices),
|
||||
other_args=list(tag.tts.other_args),
|
||||
speed=tag.tts.speed,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ class TTSTag:
|
|||
field_text: str
|
||||
lang: str
|
||||
voices: List[str]
|
||||
speed: float
|
||||
# each arg should be in the form 'foo=bar'
|
||||
other_args: List[str]
|
||||
|
||||
|
|
|
@ -126,8 +126,11 @@ class MacTTSPlayer(TTSProcessPlayer):
|
|||
voice = match.voice
|
||||
assert isinstance(voice, MacVoice)
|
||||
|
||||
default_wpm = 170
|
||||
words_per_min = str(int(default_wpm * tag.speed))
|
||||
|
||||
self._process = subprocess.Popen(
|
||||
["say", "-v", voice.original_name, "-f", "-"],
|
||||
["say", "-v", voice.original_name, "-r", words_per_min, "-f", "-"],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
|
@ -171,8 +174,21 @@ class MacTTSFilePlayer(MacTTSPlayer):
|
|||
voice = match.voice
|
||||
assert isinstance(voice, MacVoice)
|
||||
|
||||
default_wpm = 170
|
||||
words_per_min = str(int(default_wpm * tag.speed))
|
||||
|
||||
self._process = subprocess.Popen(
|
||||
["say", "-v", voice.original_name, "-f", "-", "-o", self.tmppath],
|
||||
[
|
||||
"say",
|
||||
"-v",
|
||||
voice.original_name,
|
||||
"-r",
|
||||
words_per_min,
|
||||
"-f",
|
||||
"-",
|
||||
"-o",
|
||||
self.tmppath,
|
||||
],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
|
@ -441,6 +457,7 @@ if isWin:
|
|||
try:
|
||||
native_voice = voice.handle
|
||||
self.speaker.Voice = native_voice
|
||||
self.speaker.Rate = self._rate_for_speed(tag.speed)
|
||||
self.speaker.Speak(tag.field_text, 1)
|
||||
gui_hooks.av_player_did_begin_playing(self, tag)
|
||||
|
||||
|
@ -454,7 +471,12 @@ if isWin:
|
|||
self._terminate_flag = False
|
||||
|
||||
def _tidy_name(self, name: str) -> str:
|
||||
"eg. Microsoft Haruka Desktop -> MS-Haruka."
|
||||
"eg. Microsoft Haruka Desktop -> Microsoft-Haruka."
|
||||
return re.sub(r"^Microsoft (.+) Desktop$", "Microsoft_\\1", name).replace(
|
||||
" ", "_"
|
||||
)
|
||||
|
||||
def _rate_for_speed(self, speed: float) -> int:
|
||||
"eg. 1.5 -> 15, 0.5 -> -5"
|
||||
speed = (speed * 10) - 10
|
||||
return int(max(-10, min(10, speed)))
|
||||
|
|
|
@ -195,12 +195,14 @@ impl Backend {
|
|||
lang,
|
||||
voices,
|
||||
other_args,
|
||||
speed,
|
||||
} => pt::AvTag {
|
||||
value: Some(pt::av_tag::Value::Tts(pt::TtsTag {
|
||||
field_text,
|
||||
lang,
|
||||
voices,
|
||||
other_args,
|
||||
speed,
|
||||
})),
|
||||
},
|
||||
})
|
||||
|
|
|
@ -15,6 +15,7 @@ pub enum AVTag {
|
|||
field_text: String,
|
||||
lang: String,
|
||||
voices: Vec<String>,
|
||||
speed: f32,
|
||||
other_args: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
@ -102,9 +103,10 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>,
|
|||
|
||||
fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
||||
let mut other_args = vec![];
|
||||
let mut split_args = args.split(' ');
|
||||
let mut split_args = args.split_ascii_whitespace();
|
||||
let lang = split_args.next().unwrap_or("");
|
||||
let mut voices = None;
|
||||
let mut speed = 1.0;
|
||||
|
||||
for remaining_arg in split_args {
|
||||
if remaining_arg.starts_with("voices=") {
|
||||
|
@ -112,6 +114,13 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
|||
.split('=')
|
||||
.nth(1)
|
||||
.map(|voices| voices.split(',').map(ToOwned::to_owned).collect());
|
||||
} else if remaining_arg.starts_with("speed=") {
|
||||
speed = remaining_arg
|
||||
.split('=')
|
||||
.nth(1)
|
||||
.unwrap()
|
||||
.parse()
|
||||
.unwrap_or(1.0);
|
||||
} else {
|
||||
other_args.push(remaining_arg.to_owned());
|
||||
}
|
||||
|
@ -121,6 +130,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
|||
field_text: strip_html_for_tts(field_text).into(),
|
||||
lang: lang.into(),
|
||||
voices: voices.unwrap_or_else(Vec::new),
|
||||
speed,
|
||||
other_args,
|
||||
}
|
||||
}
|
||||
|
@ -188,7 +198,7 @@ mod test {
|
|||
#[test]
|
||||
fn test_audio() {
|
||||
let s =
|
||||
"abc[sound:fo&o.mp3]def[anki:tts][en_US voices=Bob,Jane]foo<br>1>2[/anki:tts]gh";
|
||||
"abc[sound:fo&o.mp3]def[anki:tts][en_US voices=Bob,Jane speed=1.2]foo<br>1>2[/anki:tts]gh";
|
||||
assert_eq!(strip_av_tags(s), "abcdefgh");
|
||||
|
||||
let (text, tags) = extract_av_tags(s, true);
|
||||
|
@ -202,7 +212,8 @@ mod test {
|
|||
field_text: "foo 1>2".into(),
|
||||
lang: "en_US".into(),
|
||||
voices: vec!["Bob".into(), "Jane".into()],
|
||||
other_args: vec![]
|
||||
other_args: vec![],
|
||||
speed: 1.2
|
||||
},
|
||||
]
|
||||
);
|
||||
|
|
Loading…
Reference in a new issue