mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 14:32:22 -04:00
add a simple TTS field filter
This simply wraps the field in extra text that the frontend will deal with. Also added some helpers for extracting and stripping audio and TTS tags from the rendered text.
This commit is contained in:
parent
17dae51b29
commit
0942ffbff6
8 changed files with 211 additions and 24 deletions
|
@ -16,6 +16,8 @@ message BackendInput {
|
||||||
BrowserRowsIn browser_rows = 20;
|
BrowserRowsIn browser_rows = 20;
|
||||||
RenderCardIn render_card = 21;
|
RenderCardIn render_card = 21;
|
||||||
int64 local_minutes_west = 22;
|
int64 local_minutes_west = 22;
|
||||||
|
string strip_av_tags = 23;
|
||||||
|
string get_av_tags = 24;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +30,8 @@ message BackendOutput {
|
||||||
BrowserRowsOut browser_rows = 20;
|
BrowserRowsOut browser_rows = 20;
|
||||||
RenderCardOut render_card = 21;
|
RenderCardOut render_card = 21;
|
||||||
sint32 local_minutes_west = 22;
|
sint32 local_minutes_west = 22;
|
||||||
|
string strip_av_tags = 23;
|
||||||
|
GetAVTagsOut get_av_tags = 24;
|
||||||
|
|
||||||
BackendError error = 2047;
|
BackendError error = 2047;
|
||||||
}
|
}
|
||||||
|
@ -44,14 +48,6 @@ message InvalidInputError {
|
||||||
string info = 1;
|
string info = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message PlusOneIn {
|
|
||||||
int32 num = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message PlusOneOut {
|
|
||||||
int32 num = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message TemplateParseError {
|
message TemplateParseError {
|
||||||
string info = 1;
|
string info = 1;
|
||||||
}
|
}
|
||||||
|
@ -150,3 +146,19 @@ message RenderedTemplateReplacement {
|
||||||
string current_text = 2;
|
string current_text = 2;
|
||||||
repeated string filters = 3;
|
repeated string filters = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message GetAVTagsOut {
|
||||||
|
repeated AVTag av_tags = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message AVTag {
|
||||||
|
oneof value {
|
||||||
|
string sound_or_video = 1;
|
||||||
|
TTSTag tts = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message TTSTag {
|
||||||
|
repeated string args = 1;
|
||||||
|
string text = 2;
|
||||||
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ import ankirspy # pytype: disable=import-error
|
||||||
import anki.backend_pb2 as pb
|
import anki.backend_pb2 as pb
|
||||||
import anki.buildinfo
|
import anki.buildinfo
|
||||||
from anki.models import AllTemplateReqs
|
from anki.models import AllTemplateReqs
|
||||||
|
from anki.sound import AVTag, SoundOrVideoTag, TTSTag
|
||||||
|
|
||||||
assert ankirspy.buildhash() == anki.buildinfo.buildhash
|
assert ankirspy.buildhash() == anki.buildinfo.buildhash
|
||||||
|
|
||||||
|
@ -45,6 +46,14 @@ def proto_template_reqs_to_legacy(
|
||||||
return legacy_reqs
|
return legacy_reqs
|
||||||
|
|
||||||
|
|
||||||
|
def av_tag_to_native(tag: pb.AVTag) -> AVTag:
|
||||||
|
val = tag.WhichOneof("value")
|
||||||
|
if val == "sound_or_video":
|
||||||
|
return SoundOrVideoTag(filename=tag.sound_or_video)
|
||||||
|
else:
|
||||||
|
return TTSTag(args=list(tag.tts.args), text=tag.tts.text)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TemplateReplacement:
|
class TemplateReplacement:
|
||||||
field_name: str
|
field_name: str
|
||||||
|
@ -143,3 +152,16 @@ class RustBackend:
|
||||||
return self._run_command(
|
return self._run_command(
|
||||||
pb.BackendInput(local_minutes_west=stamp)
|
pb.BackendInput(local_minutes_west=stamp)
|
||||||
).local_minutes_west
|
).local_minutes_west
|
||||||
|
|
||||||
|
def strip_av_tags(self, text: str) -> str:
|
||||||
|
return self._run_command(pb.BackendInput(strip_av_tags=text)).strip_av_tags
|
||||||
|
|
||||||
|
def get_av_tags(self, text: str) -> List[AVTag]:
|
||||||
|
return list(
|
||||||
|
map(
|
||||||
|
av_tag_to_native,
|
||||||
|
self._run_command(
|
||||||
|
pb.BackendInput(get_av_tags=text)
|
||||||
|
).get_av_tags.av_tags,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -1,18 +1,61 @@
|
||||||
# Copyright: Ankitects Pty Ltd and contributors
|
# Copyright: Ankitects Pty Ltd and contributors
|
||||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
import re
|
"""
|
||||||
from typing import List
|
Sound/TTS references extracted from card text.
|
||||||
|
|
||||||
# Shared utils
|
Use collection.backend.strip_av_tags(string) to remove all tags,
|
||||||
|
and collection.backend.get_av_tags(string) to get a list of AVTags.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Union
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TTSTag:
|
||||||
|
"""Records information about a text to speech tag.
|
||||||
|
|
||||||
|
See tts.py for more information.
|
||||||
|
"""
|
||||||
|
|
||||||
|
args: List[str]
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SoundOrVideoTag:
|
||||||
|
"""Contains the filename inside a [sound:...] tag.
|
||||||
|
|
||||||
|
Video files also use [sound:...].
|
||||||
|
"""
|
||||||
|
|
||||||
|
filename: str
|
||||||
|
|
||||||
|
|
||||||
|
# note this does not include image tags, which are handled with HTML.
|
||||||
|
AVTag = Union[SoundOrVideoTag, TTSTag]
|
||||||
|
|
||||||
|
# Legacy utils
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
# these will be removed in the future
|
||||||
|
|
||||||
_soundReg = r"\[sound:(.*?)\]"
|
_soundReg = r"\[sound:(.*?)\]"
|
||||||
|
|
||||||
|
|
||||||
def allSounds(text) -> List:
|
def allSounds(text) -> List:
|
||||||
return re.findall(_soundReg, text)
|
from aqt import mw
|
||||||
|
|
||||||
|
return [
|
||||||
|
x.filename
|
||||||
|
for x in mw.col.backend.get_av_tags(text)
|
||||||
|
if isinstance(x, SoundOrVideoTag)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def stripSounds(text) -> str:
|
def stripSounds(text) -> str:
|
||||||
return re.sub(_soundReg, "", text)
|
from aqt import mw
|
||||||
|
|
||||||
|
return mw.col.backend.strip_av_tags(text)
|
||||||
|
|
|
@ -15,6 +15,7 @@ lazy_static = "1.4.0"
|
||||||
regex = "1.3.3"
|
regex = "1.3.3"
|
||||||
hex = "0.4.0"
|
hex = "0.4.0"
|
||||||
blake3 = "0.1.0"
|
blake3 = "0.1.0"
|
||||||
|
htmlescape = "0.3.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
prost-build = "0.5.0"
|
prost-build = "0.5.0"
|
||||||
|
|
|
@ -10,6 +10,7 @@ use crate::template::{
|
||||||
render_card, without_legacy_template_directives, FieldMap, FieldRequirements, ParsedTemplate,
|
render_card, without_legacy_template_directives, FieldMap, FieldRequirements, ParsedTemplate,
|
||||||
RenderedNode,
|
RenderedNode,
|
||||||
};
|
};
|
||||||
|
use crate::text::{av_tags_in_string, strip_av_tags, AVTag};
|
||||||
use prost::Message;
|
use prost::Message;
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
@ -98,6 +99,8 @@ impl Backend {
|
||||||
Value::LocalMinutesWest(stamp) => {
|
Value::LocalMinutesWest(stamp) => {
|
||||||
OValue::LocalMinutesWest(local_minutes_west_for_stamp(stamp))
|
OValue::LocalMinutesWest(local_minutes_west_for_stamp(stamp))
|
||||||
}
|
}
|
||||||
|
Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()),
|
||||||
|
Value::GetAvTags(text) => OValue::GetAvTags(self.get_av_tags(&text)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,6 +181,24 @@ impl Backend {
|
||||||
answer_nodes: rendered_nodes_to_proto(anodes),
|
answer_nodes: rendered_nodes_to_proto(anodes),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_av_tags(&self, text: &str) -> pt::GetAvTagsOut {
|
||||||
|
let tags = av_tags_in_string(text)
|
||||||
|
.map(|avtag| match avtag {
|
||||||
|
AVTag::SoundOrVideo(file) => pt::AvTag {
|
||||||
|
value: Some(pt::av_tag::Value::SoundOrVideo(file.to_string())),
|
||||||
|
},
|
||||||
|
AVTag::TextToSpeech { args, field_text } => pt::AvTag {
|
||||||
|
value: Some(pt::av_tag::Value::Tts(pt::TtsTag {
|
||||||
|
args: args.iter().map(|&s| s.to_string()).collect(),
|
||||||
|
text: field_text.to_string(),
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
pt::GetAvTagsOut { av_tags: tags }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ords_hash_to_set(ords: HashSet<u16>) -> Vec<u32> {
|
fn ords_hash_to_set(ords: HashSet<u16>) -> Vec<u32> {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
use crate::err::{Result, TemplateError};
|
use crate::err::{Result, TemplateError};
|
||||||
use crate::template_filters::apply_filters;
|
use crate::template_filters::apply_filters;
|
||||||
use crate::text::strip_sounds;
|
use crate::text::strip_av_tags;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use nom;
|
use nom;
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
|
@ -443,7 +443,7 @@ pub fn render_card(
|
||||||
// if the question side didn't have any unknown filters, we can pass
|
// if the question side didn't have any unknown filters, we can pass
|
||||||
// FrontSide in now
|
// FrontSide in now
|
||||||
if let [RenderedNode::Text { ref text }] = *qnodes.as_slice() {
|
if let [RenderedNode::Text { ref text }] = *qnodes.as_slice() {
|
||||||
context.front_text = Some(strip_sounds(text));
|
context.front_text = Some(strip_av_tags(text));
|
||||||
}
|
}
|
||||||
|
|
||||||
// answer side
|
// answer side
|
||||||
|
|
|
@ -75,9 +75,13 @@ fn apply_filter<'a>(
|
||||||
// an empty filter name (caused by using two colons) is ignored
|
// an empty filter name (caused by using two colons) is ignored
|
||||||
"" => text.into(),
|
"" => text.into(),
|
||||||
_ => {
|
_ => {
|
||||||
|
if filter_name.starts_with("tts ") {
|
||||||
|
tts_filter(filter_name, text)
|
||||||
|
} else {
|
||||||
// unrecognized filter
|
// unrecognized filter
|
||||||
return (false, None);
|
return (false, None);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@ -285,6 +289,11 @@ return false;">
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn tts_filter(filter_name: &str, text: &str) -> Cow<'static, str> {
|
||||||
|
let args = filter_name.splitn(2, ' ').nth(1).unwrap_or("");
|
||||||
|
|
||||||
|
format!("[anki:tts][{}]{}[/anki:tts]", args, text).into()
|
||||||
|
}
|
||||||
// Tests
|
// Tests
|
||||||
//----------------------------------------
|
//----------------------------------------
|
||||||
|
|
||||||
|
@ -293,7 +302,7 @@ mod test {
|
||||||
use crate::template::RenderContext;
|
use crate::template::RenderContext;
|
||||||
use crate::template_filters::{
|
use crate::template_filters::{
|
||||||
apply_filters, cloze_filter, furigana_filter, hint_filter, kana_filter, kanji_filter,
|
apply_filters, cloze_filter, furigana_filter, hint_filter, kana_filter, kanji_filter,
|
||||||
type_cloze_filter, type_filter,
|
tts_filter, type_cloze_filter, type_filter,
|
||||||
};
|
};
|
||||||
use crate::text::strip_html;
|
use crate::text::strip_html;
|
||||||
|
|
||||||
|
@ -368,4 +377,12 @@ field</a>
|
||||||
ctx.card_ord = 2;
|
ctx.card_ord = 2;
|
||||||
assert_eq!(cloze_filter(text, &ctx).as_ref(), "");
|
assert_eq!(cloze_filter(text, &ctx).as_ref(), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tts() {
|
||||||
|
assert_eq!(
|
||||||
|
tts_filter("tts lang=en_US", "foo"),
|
||||||
|
"[anki:tts][lang=en_US]foo[/anki:tts]"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,22 @@
|
||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
|
use htmlescape;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum AVTag<'a> {
|
||||||
|
SoundOrVideo(Cow<'a, str>),
|
||||||
|
TextToSpeech {
|
||||||
|
args: Vec<&'a str>,
|
||||||
|
field_text: Cow<'a, str>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref HTML: Regex = Regex::new(concat!(
|
static ref HTML: Regex = Regex::new(concat!(
|
||||||
"(?si)",
|
"(?si)",
|
||||||
|
@ -22,9 +32,16 @@ lazy_static! {
|
||||||
r#"(?i)<img[^>]+src=["']?([^"'>]+)["']?[^>]*>"#
|
r#"(?i)<img[^>]+src=["']?([^"'>]+)["']?[^>]*>"#
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
static ref SOUND_TAG: Regex = Regex::new(
|
// videos are also in sound tags
|
||||||
r"\[sound:(.*?)\]"
|
static ref AV_TAGS: Regex = Regex::new(
|
||||||
).unwrap();
|
r#"(?xs)
|
||||||
|
\[sound:(.*?)\] # 1 - the filename in a sound tag
|
||||||
|
|
|
||||||
|
\[anki:tts\]
|
||||||
|
\[(.*?)\] # 2 - arguments to tts call
|
||||||
|
(.*?) # 3 - field text
|
||||||
|
\[/anki:tts\]
|
||||||
|
"#).unwrap();
|
||||||
|
|
||||||
static ref CLOZED_TEXT: Regex = Regex::new(
|
static ref CLOZED_TEXT: Regex = Regex::new(
|
||||||
r"(?s)\{\{c(\d+)::.+?\}\}"
|
r"(?s)\{\{c(\d+)::.+?\}\}"
|
||||||
|
@ -35,8 +52,43 @@ pub fn strip_html(html: &str) -> Cow<str> {
|
||||||
HTML.replace_all(html, "")
|
HTML.replace_all(html, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn strip_sounds(html: &str) -> Cow<str> {
|
pub fn decode_entities(html: &str) -> Cow<str> {
|
||||||
SOUND_TAG.replace_all(html, "")
|
if html.contains('&') {
|
||||||
|
match htmlescape::decode_html(html) {
|
||||||
|
Ok(text) => text,
|
||||||
|
Err(e) => format!("{:?}", e),
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
} else {
|
||||||
|
// nothing to do
|
||||||
|
html.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn strip_html_for_tts(html: &str) -> Cow<str> {
|
||||||
|
match HTML.replace_all(html, " ") {
|
||||||
|
Cow::Borrowed(_) => decode_entities(html),
|
||||||
|
Cow::Owned(s) => decode_entities(&s).to_string().into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn strip_av_tags(text: &str) -> Cow<str> {
|
||||||
|
AV_TAGS.replace_all(text, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn av_tags_in_string(text: &str) -> impl Iterator<Item = AVTag> {
|
||||||
|
AV_TAGS.captures_iter(text).map(|caps| {
|
||||||
|
if let Some(av_file) = caps.get(1) {
|
||||||
|
AVTag::SoundOrVideo(decode_entities(av_file.as_str()))
|
||||||
|
} else {
|
||||||
|
let args = caps.get(2).unwrap();
|
||||||
|
let field_text = caps.get(3).unwrap();
|
||||||
|
AVTag::TextToSpeech {
|
||||||
|
args: args.as_str().split(' ').collect(),
|
||||||
|
field_text: strip_html_for_tts(field_text.as_str()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
|
pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
|
||||||
|
@ -64,7 +116,10 @@ pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::text::{cloze_numbers_in_string, strip_html, strip_html_preserving_image_filenames};
|
use crate::text::{
|
||||||
|
av_tags_in_string, cloze_numbers_in_string, strip_av_tags, strip_html,
|
||||||
|
strip_html_preserving_image_filenames, AVTag,
|
||||||
|
};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -95,4 +150,20 @@ mod test {
|
||||||
vec![1, 2].into_iter().collect::<HashSet<u16>>()
|
vec![1, 2].into_iter().collect::<HashSet<u16>>()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_audio() {
|
||||||
|
let s = "abc[sound:fo&o.mp3]def[anki:tts][lang=en_US voices=Bob,Jane]foo<br>1>2[/anki:tts]gh";
|
||||||
|
assert_eq!(strip_av_tags(s), "abcdefgh");
|
||||||
|
assert_eq!(
|
||||||
|
av_tags_in_string(s).collect::<Vec<_>>(),
|
||||||
|
vec![
|
||||||
|
AVTag::SoundOrVideo("fo&o.mp3".into()),
|
||||||
|
AVTag::TextToSpeech {
|
||||||
|
args: vec!["lang=en_US", "voices=Bob,Jane"],
|
||||||
|
field_text: "foo 1>2".into()
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue