New TTS/AV tag handling (#1559)

* Add new `card_rendering` mod

Parses a text with av/tts tags and strips or extracts tags.

* Replace old `extract_av_tags` and `strip_av_tags`

... with new `card_rendering` mod

* ressource -> resource

* Add AV prettifier for use in browser table

* Accept String in av tag routines

... and avoid redundant writes if no changes need to be made.

* add benchmarking with criterion; make links test optional (dae)

cargo install cargo-criterion, then run ./bench.sh

* performance comparison: creating HashMap up front (dae)

the previous solution:

anki_tag_parse          time:   [1.8401 us 1.8437 us 1.8476 us]

this solution:

anki_tag_parse          time:   [2.2420 us 2.2447 us 2.2477 us]
                        change: [+21.477% +21.770% +22.066%] (p = 0.00 < 0.05)
                        Performance has regressed.

* Revert "performance comparison: creating HashMap up front" (dae)

This reverts commit f19126a2f1.

* add missing header

* Write error message if tts lang is missing

* `Tag` -> `Directive`
This commit is contained in:
RumovZ 2021-12-17 10:04:42 +01:00 committed by GitHub
parent 2221d0a520
commit 3e0c9dc866
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 940 additions and 146 deletions

224
Cargo.lock generated
View file

@ -54,6 +54,7 @@ dependencies = [
"bytes",
"chrono",
"coarsetime",
"criterion",
"env_logger",
"flate2",
"fluent",
@ -222,6 +223,18 @@ dependencies = [
"digest",
]
[[package]]
name = "bstr"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
dependencies = [
"lazy_static",
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.8.0"
@ -240,6 +253,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
[[package]]
name = "cast"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
dependencies = [
"rustc_version",
]
[[package]]
name = "cc"
version = "1.0.72"
@ -265,6 +287,17 @@ dependencies = [
"winapi",
]
[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"bitflags",
"textwrap",
"unicode-width",
]
[[package]]
name = "coarsetime"
version = "0.1.20"
@ -335,6 +368,42 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "criterion"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10"
dependencies = [
"atty",
"cast",
"clap",
"criterion-plot",
"csv",
"itertools",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_cbor",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
dependencies = [
"cast",
"itertools",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
@ -345,6 +414,30 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
dependencies = [
"cfg-if",
"crossbeam-utils",
"lazy_static",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
@ -382,6 +475,28 @@ dependencies = [
"syn",
]
[[package]]
name = "csv"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "derivative"
version = "2.2.0"
@ -758,6 +873,12 @@ dependencies = [
"tracing",
]
[[package]]
name = "half"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hashbrown"
version = "0.11.2"
@ -1166,6 +1287,15 @@ version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "mime"
version = "0.3.16"
@ -1343,6 +1473,12 @@ version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "openssl"
version = "0.10.38"
@ -1580,6 +1716,34 @@ version = "0.3.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f"
[[package]]
name = "plotters"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c"
[[package]]
name = "plotters-svg"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9"
dependencies = [
"plotters-backend",
]
[[package]]
name = "ppv-lite86"
version = "0.2.15"
@ -1836,6 +2000,31 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rayon"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"
@ -1866,6 +2055,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
[[package]]
name = "regex-syntax"
version = "0.6.25"
@ -2123,6 +2318,16 @@ dependencies = [
"serde_json",
]
[[package]]
name = "serde_cbor"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
dependencies = [
"half",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.130"
@ -2423,6 +2628,15 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thin-slice"
version = "0.1.1"
@ -2475,6 +2689,16 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1"
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "tinyvec"
version = "1.5.1"

View file

@ -9,3 +9,8 @@ errors-100-tags-max =
is no need to select child tags if you have selected a parent tag.
errors-multiple-notetypes-selected = Please select notes from only one notetype.
errors-please-check-database = Please use the Check Database action, then try again.
## Card Rendering
errors-bad-directive = Error in directive '{ $directive }': { $error }
errors-option-not-set = '{ $option }' not set

View file

@ -11,6 +11,19 @@ build = "build/main.rs"
name = "anki"
path = "src/lib.rs"
[features]
bench = ["criterion"]
links = ["linkcheck"]
[[test]]
name = "links"
required-features = ["links"]
[[bench]]
name = "benchmark"
harness = false
required-features = ["bench"]
# After updating anything below, run ../cargo/update.py
[build-dependencies]
@ -18,7 +31,6 @@ prost-build = "0.9.0"
[dev-dependencies]
env_logger = "0.9.0"
linkcheck = { git = "https://github.com/ankitects/linkcheck.git", rev = "2f20798ce521cc594d510d4e417e76d5eac04d4b" }
tokio = { version = "1.12.0", features = ["macros"] }
[dependencies]
@ -27,6 +39,9 @@ unicase = "=2.6.0"
anki_i18n = { path="i18n" }
criterion = { version = "0.3.5", optional = true }
linkcheck = { git = "https://github.com/ankitects/linkcheck.git", rev = "2f20798ce521cc594d510d4e417e76d5eac04d4b", optional = true }
nom = "7.0.0"
proc-macro-nested = "0.1.7"
slog-term = "2.8.0"

3
rslib/bench.sh Executable file
View file

@ -0,0 +1,3 @@
#!/bin/bash
cargo criterion --bench benchmark --features bench

View file

@ -0,0 +1,12 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use anki::card_rendering::anki_tag_benchmark;
use criterion::{criterion_group, criterion_main, Criterion};
pub fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("anki_tag_parse", |b| b.iter(|| anki_tag_benchmark()));
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View file

@ -5,14 +5,15 @@ use super::Backend;
pub(super) use crate::backend_proto::cardrendering_service::Service as CardRenderingService;
use crate::{
backend_proto as pb,
card_rendering::{extract_av_tags, strip_av_tags},
latex::{extract_latex, extract_latex_expanding_clozes, ExtractedLatex},
markdown::render_markdown,
notetype::{CardTemplateSchema11, RenderCardOutput},
prelude::*,
template::RenderedNode,
text::{
decode_iri_paths, encode_iri_paths, extract_av_tags, sanitize_html_no_images,
strip_av_tags, strip_html, strip_html_preserving_media_filenames, AvTag,
decode_iri_paths, encode_iri_paths, sanitize_html_no_images, strip_html,
strip_html_preserving_media_filenames,
},
};
@ -21,34 +22,10 @@ impl CardRenderingService for Backend {
&self,
input: pb::ExtractAvTagsRequest,
) -> Result<pb::ExtractAvTagsResponse> {
let (text, tags) = extract_av_tags(&input.text, input.question_side);
let pt_tags = tags
.into_iter()
.map(|avtag| match avtag {
AvTag::SoundOrVideo(file) => pb::AvTag {
value: Some(pb::av_tag::Value::SoundOrVideo(file)),
},
AvTag::TextToSpeech {
field_text,
lang,
voices,
other_args,
speed,
} => pb::AvTag {
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
field_text,
lang,
voices,
speed,
other_args,
})),
},
})
.collect();
let out = extract_av_tags(input.text, input.question_side, self.i18n());
Ok(pb::ExtractAvTagsResponse {
text: text.into(),
av_tags: pt_tags,
text: out.0,
av_tags: out.1,
})
}
@ -140,9 +117,7 @@ impl CardRenderingService for Backend {
}
fn strip_av_tags(&self, input: pb::String) -> Result<pb::String> {
Ok(pb::String {
val: strip_av_tags(&input.val).into(),
})
Ok(strip_av_tags(input.val).into())
}
fn render_markdown(&self, input: pb::RenderMarkdownRequest) -> Result<pb::String> {

View file

@ -9,11 +9,12 @@ use strum::{Display, EnumIter, EnumString, IntoEnumIterator};
use crate::{
backend_proto as pb,
card::{CardQueue, CardType},
card_rendering::prettify_av_tags,
notetype::{CardTemplate, NotetypeKind},
prelude::*,
scheduler::{timespan::time_span, timing::SchedTimingToday},
template::RenderedNode,
text::{extract_av_tags, html_to_text_line},
text::html_to_text_line,
};
#[derive(Debug, PartialEq, Clone, Copy, Display, EnumIter, EnumString)]
@ -270,7 +271,7 @@ impl RenderContext {
} => current_text,
})
.join("");
let question = extract_av_tags(&qnodes_text, true).0.to_string();
let question = prettify_av_tags(qnodes_text);
Ok(RenderContext {
question,
@ -410,7 +411,7 @@ impl RowContext {
} => current_text,
})
.join("");
let answer = extract_av_tags(&answer, false).0;
let answer = prettify_av_tags(answer);
html_to_text_line(
if let Some(stripped) = answer.strip_prefix(&render_context.question) {
stripped

View file

@ -0,0 +1,153 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashMap;
use crate::{backend_proto as pb, prelude::*};
mod parser;
mod writer;
pub fn strip_av_tags<S: Into<String> + AsRef<str>>(txt: S) -> String {
nodes_or_text_only(txt.as_ref())
.map(|nodes| nodes.write_without_av_tags())
.unwrap_or_else(|| txt.into())
}
pub fn extract_av_tags<S: Into<String> + AsRef<str>>(
txt: S,
question_side: bool,
tr: &I18n,
) -> (String, Vec<pb::AvTag>) {
nodes_or_text_only(txt.as_ref())
.map(|nodes| nodes.write_and_extract_av_tags(question_side, tr))
.unwrap_or_else(|| (txt.into(), vec![]))
}
pub fn prettify_av_tags<S: Into<String> + AsRef<str>>(txt: S) -> String {
nodes_or_text_only(txt.as_ref())
.map(|nodes| nodes.write_with_pretty_av_tags())
.unwrap_or_else(|| txt.into())
}
/// Parse `txt` into [CardNodes] and return the result,
/// or [None] if it is only a text node.
fn nodes_or_text_only(txt: &str) -> Option<CardNodes> {
let nodes = CardNodes::parse(txt);
match nodes.0[..] {
[Node::Text(_)] => None,
_ => Some(nodes),
}
}
#[derive(Debug, PartialEq)]
struct CardNodes<'a>(Vec<Node<'a>>);
impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> {
type Item = &'iter Node<'nodes>;
type IntoIter = std::slice::Iter<'iter, Node<'nodes>>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
#[derive(Debug, PartialEq)]
enum Node<'a> {
Text(&'a str),
SoundOrVideo(&'a str),
Directive(Directive<'a>),
}
#[derive(Debug, PartialEq)]
enum Directive<'a> {
Tts(TtsDirective<'a>),
Other(OtherDirective<'a>),
}
#[derive(Debug, PartialEq)]
struct TtsDirective<'a> {
content: &'a str,
lang: &'a str,
voices: Vec<&'a str>,
speed: f32,
blank: Option<&'a str>,
options: HashMap<&'a str, &'a str>,
}
#[derive(Debug, PartialEq)]
struct OtherDirective<'a> {
name: &'a str,
content: &'a str,
options: HashMap<&'a str, &'a str>,
}
#[cfg(feature = "bench")]
#[inline]
pub fn anki_directive_benchmark() {
CardNodes::parse("[anki:foo bar=baz][/anki:foo][anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]");
}
#[cfg(test)]
mod test {
use super::*;
/// Strip av tags and assert equality with input or separately passed output.
macro_rules! assert_av_stripped {
($input:expr) => {
assert_eq!($input, strip_av_tags($input));
};
($input:expr, $output:expr) => {
assert_eq!(strip_av_tags($input), $output);
};
}
#[test]
fn av_stripping() {
assert_av_stripped!("foo [sound:bar] baz", "foo baz");
assert_av_stripped!("[anki:tts bar=baz]spam[/anki:tts]", "");
assert_av_stripped!("[anki:foo bar=baz]spam[/anki:foo]");
}
#[test]
fn av_extracting() {
let tr = I18n::template_only();
let (txt, tags) = extract_av_tags(
"foo [sound:bar.mp3] baz [anki:tts lang=en_US][...][/anki:tts]",
true,
&tr,
);
assert_eq!(
(txt.as_str(), tags),
(
"foo [anki:play:q:0] baz [anki:play:q:1]",
vec![
pb::AvTag {
value: Some(pb::av_tag::Value::SoundOrVideo("bar.mp3".to_string()))
},
pb::AvTag {
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
field_text: tr.card_templates_blank().to_string(),
lang: "en_US".to_string(),
voices: vec![],
speed: 1.0,
other_args: vec![],
}))
}
],
),
);
assert_eq!(
extract_av_tags("[anki:tts]foo[/anki:tts]", true, &tr),
(
format!(
"[{}]",
tr.errors_bad_directive("anki:tts", tr.errors_option_not_set("lang"))
.to_owned()
),
vec![],
),
);
}
}

View file

@ -0,0 +1,260 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashMap;
use nom::{
branch::alt,
bytes::complete::{is_not, tag},
character::complete::{anychar, multispace0},
combinator::{map, not, recognize, success, value},
multi::{many0, many1},
sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
};
use super::{CardNodes, Directive, Node, OtherDirective, TtsDirective};
type IResult<'a, O> = nom::IResult<&'a str, O>;
impl<'a> CardNodes<'a> {
pub(super) fn parse(mut txt: &'a str) -> Self {
let mut nodes = Vec::new();
while let Ok((remaining, node)) = node(txt) {
txt = remaining;
nodes.push(node);
}
Self(nodes)
}
}
impl<'a> Directive<'a> {
fn new(name: &'a str, options: Vec<(&'a str, &'a str)>, content: &'a str) -> Self {
match name {
"tts" => {
let mut lang = "";
let mut voices = vec![];
let mut speed = 1.0;
let mut blank = None;
let mut other_options = HashMap::new();
for option in options {
match option.0 {
"lang" => lang = option.1,
"voices" => voices = option.1.split(',').collect(),
"speed" => speed = option.1.parse().unwrap_or(1.0),
"cloze_blank" => blank = Some(option.1),
_ => {
other_options.insert(option.0, option.1);
}
}
}
Self::Tts(TtsDirective {
content,
lang,
voices,
speed,
blank,
options: other_options,
})
}
_ => Self::Other(OtherDirective {
name,
content,
options: options.into_iter().collect(),
}),
}
}
}
/// Consume 0 or more of anything in " \t\r\n" after `parser`.
fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult<O>
where
P: FnMut(&'s str) -> IResult<O> + 'parser,
{
terminated(parser, multispace0)
}
/// Parse until char in `arr` is found. Always succeeds.
fn is_not0<'parser, 'arr: 'parser, 's: 'parser>(
arr: &'arr str,
) -> impl FnMut(&'s str) -> IResult<&'s str> + 'parser {
alt((is_not(arr), success("")))
}
fn node(s: &str) -> IResult<Node> {
alt((text_node, sound_node, tag_node))(s)
}
/// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or video file.
fn sound_node(s: &str) -> IResult<Node> {
map(
delimited(tag("[sound:"), is_not("]"), tag("]")),
Node::SoundOrVideo,
)(s)
}
/// An Anki tag `[anki:tag...]...[/anki:tag]`.
fn tag_node(s: &str) -> IResult<Node> {
/// Match the start of an opening tag and return its name.
fn name(s: &str) -> IResult<&str> {
preceded(tag("[anki:"), is_not("] \t\r\n"))(s)
}
/// Return a parser to match an opening `name` tag and return its options.
fn opening_parser<'name, 's: 'name>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<Vec<(&str, &str)>> + 'name {
/// List of whitespace-separated `key=val` tuples, where `val` may be empty.
fn options(s: &str) -> IResult<Vec<(&str, &str)>> {
fn key(s: &str) -> IResult<&str> {
is_not("] \t\r\n=")(s)
}
fn val(s: &str) -> IResult<&str> {
alt((
delimited(tag("\""), is_not0("\""), tag("\"")),
is_not0("] \t\r\n\""),
))(s)
}
many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s)
}
delimited(
pair(tag("[anki:"), trailing_whitespace0(tag(name))),
options,
tag("]"),
)
}
/// Return a parser to match a closing `name` tag.
fn closing_parser<'parser, 'name: 'parser, 's: 'parser>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<()> + 'parser {
value((), tuple((tag("[/anki:"), tag(name), tag("]"))))
}
/// Return a parser to match and return anything until a closing `name` tag is found.
fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<&str> + 'parser {
recognize(many0(pair(not(closing_parser(name)), anychar)))
}
let (_, tag_name) = name(s)?;
map(
terminated(
pair(opening_parser(tag_name), content_parser(tag_name)),
closing_parser(tag_name),
),
|(options, content)| Node::Directive(Directive::new(tag_name, options, content)),
)(s)
}
fn text_node(s: &str) -> IResult<Node> {
map(
recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))),
Node::Text,
)(s)
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! assert_parsed_nodes {
($txt:expr $(, $node:expr)*) => {
assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*]));
}
}
#[test]
fn parsing() {
use Node::*;
// empty
assert_parsed_nodes!("");
// text
assert_parsed_nodes!("foo", Text("foo"));
// broken sound/tags are just text as well
assert_parsed_nodes!("[sound:]", Text("[sound:]"));
assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]"));
assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]"));
// sound
assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo"));
assert_parsed_nodes!(
"foo [sound:bar] baz",
Text("foo "),
SoundOrVideo("bar"),
Text(" baz")
);
assert_parsed_nodes!(
"[sound:foo][sound:bar]",
SoundOrVideo("foo"),
SoundOrVideo("bar")
);
// tags
assert_parsed_nodes!(
"[anki:foo]bar[/anki:foo]",
Directive(super::Directive::Other(OtherDirective {
name: "foo",
content: "bar",
options: HashMap::new()
}))
);
assert_parsed_nodes!(
"[anki:foo bar=baz][/anki:foo]",
Directive(super::Directive::Other(OtherDirective {
name: "foo",
content: "",
options: [("bar", "baz")].into_iter().collect(),
}))
);
// unquoted white space separates options, "]" terminates
assert_parsed_nodes!(
"[anki:foo\na=b\tc=d e=f][/anki:foo]",
Directive(super::Directive::Other(OtherDirective {
name: "foo",
content: "",
options: [("a", "b"), ("c", "d"), ("e", "f")].into_iter().collect(),
}))
);
assert_parsed_nodes!(
"[anki:foo a=\"b \t\n c ]\"][/anki:foo]",
Directive(super::Directive::Other(OtherDirective {
name: "foo",
content: "",
options: [("a", "b \t\n c ]")].into_iter().collect(),
}))
);
// tts tags
assert_parsed_nodes!(
"[anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]",
Directive(super::Directive::Tts(TtsDirective {
content: "",
lang: "jp_JP",
voices: vec!["Alice", "Bob"],
speed: 0.5,
blank: Some(""),
options: [("bar", "baz")].into_iter().collect(),
}))
);
assert_parsed_nodes!(
"[anki:tts speed=foo][/anki:tts]",
Directive(super::Directive::Tts(TtsDirective {
content: "",
lang: "",
voices: vec![],
speed: 1.0,
blank: None,
options: HashMap::new(),
}))
);
}
}

View file

@ -0,0 +1,246 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::fmt::Write as _;
use super::{CardNodes, Directive, Node, OtherDirective, TtsDirective};
use crate::prelude::*;
use crate::{
backend_proto as pb,
text::{decode_entities, strip_html_for_tts},
};
impl<'a> CardNodes<'a> {
pub(super) fn write_without_av_tags(&self) -> String {
AvStripper::new().write(self)
}
pub(super) fn write_and_extract_av_tags(
&self,
question_side: bool,
tr: &I18n,
) -> (String, Vec<pb::AvTag>) {
let mut extractor = AvExtractor::new(question_side, tr);
(extractor.write(self), extractor.tags)
}
pub(super) fn write_with_pretty_av_tags(&self) -> String {
AvPrettifier::new().write(self)
}
}
trait Write {
fn write<'iter, 'nodes: 'iter, T>(&mut self, nodes: T) -> String
where
T: IntoIterator<Item = &'iter Node<'nodes>>,
{
let mut buf = String::new();
for node in nodes {
match node {
Node::Text(s) => self.write_text(&mut buf, s),
Node::SoundOrVideo(r) => self.write_sound(&mut buf, r),
Node::Directive(directive) => self.write_directive(&mut buf, directive),
};
}
buf
}
fn write_text(&mut self, buf: &mut String, txt: &str) {
buf.push_str(txt);
}
fn write_sound(&mut self, buf: &mut String, resource: &str) {
write!(buf, "[sound:{}]", resource).unwrap();
}
fn write_directive(&mut self, buf: &mut String, directive: &Directive) {
match directive {
Directive::Tts(directive) => self.write_tts_directive(buf, directive),
Directive::Other(directive) => self.write_other_directive(buf, directive),
};
}
fn write_tts_directive(&mut self, buf: &mut String, directive: &TtsDirective) {
write!(buf, "[anki:tts").unwrap();
for (key, val) in [
("lang", directive.lang),
("voices", &directive.voices.join(",")),
("speed", &directive.speed.to_string()),
] {
self.write_directive_option(buf, key, val);
}
if let Some(blank) = directive.blank {
self.write_directive_option(buf, "cloze_blank", blank);
}
for (key, val) in &directive.options {
self.write_directive_option(buf, key, val);
}
write!(buf, "]{}[/anki:tts]", directive.content).unwrap();
}
fn write_other_directive(&mut self, buf: &mut String, directive: &OtherDirective) {
write!(buf, "[anki:{}", directive.name).unwrap();
for (key, val) in &directive.options {
self.write_directive_option(buf, key, val);
}
buf.push(']');
self.write_directive_content(buf, directive.content);
write!(buf, "[/anki:{}]", directive.name).unwrap();
}
fn write_directive_option(&mut self, buf: &mut String, key: &str, val: &str) {
if val.contains::<&[char]>(&[']', ' ', '\t', '\r', '\n']) {
write!(buf, " {}=\"{}\"", key, val).unwrap();
} else {
write!(buf, " {}={}", key, val).unwrap();
}
}
fn write_directive_content(&mut self, buf: &mut String, content: &str) {
buf.push_str(content);
}
}
struct AvStripper;
impl AvStripper {
fn new() -> Self {
Self {}
}
}
impl Write for AvStripper {
fn write_sound(&mut self, _buf: &mut String, _resource: &str) {}
fn write_tts_directive(&mut self, _buf: &mut String, _directive: &TtsDirective) {}
}
struct AvExtractor<'a> {
side: char,
tags: Vec<pb::AvTag>,
tr: &'a I18n,
}
impl<'a> AvExtractor<'a> {
fn new(question_side: bool, tr: &'a I18n) -> Self {
Self {
side: if question_side { 'q' } else { 'a' },
tags: vec![],
tr,
}
}
fn write_play_tag(&self, buf: &mut String) {
write!(buf, "[anki:play:{}:{}]", self.side, self.tags.len()).unwrap();
}
fn transform_tts_content(&self, directive: &TtsDirective) -> String {
strip_html_for_tts(directive.content).replace(
"[...]",
directive.blank.unwrap_or(&self.tr.card_templates_blank()),
)
}
}
impl Write for AvExtractor<'_> {
fn write_sound(&mut self, buf: &mut String, resource: &str) {
self.write_play_tag(buf);
self.tags.push(pb::AvTag {
value: Some(pb::av_tag::Value::SoundOrVideo(
decode_entities(resource).into(),
)),
});
}
fn write_tts_directive(&mut self, buf: &mut String, directive: &TtsDirective) {
if let Some(error) = directive.error(self.tr) {
write!(buf, "[{}]", error).unwrap();
return;
}
self.write_play_tag(buf);
self.tags.push(pb::AvTag {
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
field_text: self.transform_tts_content(directive),
lang: directive.lang.into(),
voices: directive.voices.iter().map(ToString::to_string).collect(),
speed: directive.speed,
other_args: directive
.options
.iter()
.map(|(key, val)| format!("{}={}", key, val))
.collect(),
})),
});
}
}
impl TtsDirective<'_> {
fn error(&self, tr: &I18n) -> Option<String> {
if self.lang.is_empty() {
Some(
tr.errors_bad_directive("anki:tts", tr.errors_option_not_set("lang"))
.into(),
)
} else {
None
}
}
}
struct AvPrettifier;
impl AvPrettifier {
fn new() -> Self {
Self {}
}
}
impl Write for AvPrettifier {
fn write_sound(&mut self, buf: &mut String, resource: &str) {
write!(buf, "🔉{}🔉", resource).unwrap();
}
fn write_tts_directive(&mut self, buf: &mut String, directive: &TtsDirective) {
write!(buf, "💬{}💬", directive.content).unwrap();
}
}
#[cfg(test)]
mod test {
use super::*;
struct Writer;
impl Write for Writer {}
impl Writer {
fn new() -> Self {
Self {}
}
}
/// Parse input, write it out, and assert equality with input or separately
/// passed output.
macro_rules! roundtrip {
($input:expr) => {
assert_eq!($input, Writer::new().write(&CardNodes::parse($input)));
};
($input:expr, $output:expr) => {
assert_eq!(Writer::new().write(&CardNodes::parse($input)), $output);
};
}
#[test]
fn writing() {
roundtrip!("foo");
roundtrip!("[sound:foo]");
roundtrip!("[anki:foo bar=baz]spam[/anki:foo]");
// normalizing (not currently exposed)
roundtrip!(
"[anki:foo\nbar=baz ][/anki:foo]",
"[anki:foo bar=baz][/anki:foo]"
);
}
}

View file

@ -8,6 +8,7 @@ pub mod backend;
mod backend_proto;
pub mod browser_table;
pub mod card;
pub mod card_rendering;
pub mod cloze;
pub mod collection;
pub mod config;

View file

@ -443,7 +443,6 @@ fn render_into(
.as_slice(),
key,
context,
tr,
),
None => {
// unknown field encountered

View file

@ -9,7 +9,6 @@ use regex::{Captures, Regex};
use crate::{
cloze::{cloze_filter, cloze_only_filter},
i18n::I18n,
template::RenderContext,
text::strip_html,
};
@ -26,7 +25,6 @@ pub(crate) fn apply_filters<'a>(
filters: &[&str],
field_name: &str,
context: &RenderContext,
tr: &I18n,
) -> (Cow<'a, str>, Vec<String>) {
let mut text: Cow<str> = text.into();
@ -38,7 +36,7 @@ pub(crate) fn apply_filters<'a>(
};
for (idx, &filter_name) in filters.iter().enumerate() {
match apply_filter(filter_name, text.as_ref(), field_name, context, tr) {
match apply_filter(filter_name, text.as_ref(), field_name, context) {
(true, None) => {
// filter did not change text
}
@ -69,7 +67,6 @@ fn apply_filter<'a>(
text: &'a str,
field_name: &str,
context: &RenderContext,
tr: &I18n,
) -> (bool, Option<String>) {
let output_text = match filter_name {
"text" => strip_html(text),
@ -84,8 +81,8 @@ fn apply_filter<'a>(
// an empty filter name (caused by using two colons) is ignored
"" => text.into(),
_ => {
if filter_name.starts_with("tts ") {
tts_filter(filter_name, text, tr)
if let Some(options) = filter_name.strip_prefix("tts ") {
tts_filter(options, text).into()
} else {
// unrecognized filter
return (false, None);
@ -194,12 +191,10 @@ return false;">
.into()
}
fn tts_filter(filter_name: &str, text: &str, tr: &I18n) -> Cow<'static, str> {
let args = filter_name.split_once(' ').map_or("", |t| t.1);
let text = text.replace("[...]", &tr.card_templates_blank());
format!("[anki:tts][{}]{}[/anki:tts]", args, text).into()
fn tts_filter(options: &str, text: &str) -> String {
format!("[anki:tts lang={}]{}[/anki:tts]", options, text)
}
// Tests
//----------------------------------------
@ -235,7 +230,6 @@ field</a>
#[test]
fn typing() {
let tr = I18n::template_only();
assert_eq!(type_filter("Front"), "[[type:Front]]");
assert_eq!(type_cloze_filter("Front"), "[[type:cloze:Front]]");
let ctx = RenderContext {
@ -245,7 +239,7 @@ field</a>
card_ord: 0,
};
assert_eq!(
apply_filters("ignored", &["cloze", "type"], "Text", &ctx, &tr),
apply_filters("ignored", &["cloze", "type"], "Text", &ctx),
("[[type:cloze:Text]]".into(), vec![])
);
}
@ -280,17 +274,9 @@ field</a>
#[test]
fn tts() {
let tr = I18n::template_only();
assert_eq!(
tts_filter("tts en_US voices=Bob,Jane", "foo", &tr),
"[anki:tts][en_US voices=Bob,Jane]foo[/anki:tts]"
);
assert_eq!(
tts_filter("tts en_US", "foo [...]", &tr),
format!(
"[anki:tts][en_US]foo {}[/anki:tts]",
tr.card_templates_blank()
)
tts_filter("en_US voices=Bob,Jane", "foo"),
"[anki:tts lang=en_US voices=Bob,Jane]foo[/anki:tts]"
);
}
}

View file

@ -175,32 +175,6 @@ pub fn strip_html_for_tts(html: &str) -> Cow<str> {
out
}
pub fn strip_av_tags(text: &str) -> Cow<str> {
AV_TAGS.replace_all(text, "")
}
/// Extract audio tags from string, replacing them with [anki:play] refs
pub fn extract_av_tags(text: &str, question_side: bool) -> (Cow<str>, Vec<AvTag>) {
let mut tags = vec![];
let context = if question_side { 'q' } else { 'a' };
let replaced_text = AV_TAGS.replace_all(text, |caps: &Captures| {
// extract
let tag = if let Some(av_file) = caps.get(1) {
AvTag::SoundOrVideo(decode_entities(av_file.as_str()).into())
} else {
let args = caps.get(2).unwrap();
let field_text = caps.get(3).unwrap();
tts_tag_from_string(field_text.as_str(), args.as_str())
};
tags.push(tag);
// and replace with reference
format!("[anki:play:{}:{}]", context, tags.len() - 1)
});
(replaced_text, tags)
}
#[derive(Debug)]
pub(crate) struct MediaRef<'a> {
pub full_ref: &'a str,
@ -242,40 +216,6 @@ pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
out
}
fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AvTag {
let mut other_args = vec![];
let mut split_args = args.split_ascii_whitespace();
let lang = split_args.next().unwrap_or("");
let mut voices = None;
let mut speed = 1.0;
for remaining_arg in split_args {
if remaining_arg.starts_with("voices=") {
voices = remaining_arg
.split('=')
.nth(1)
.map(|voices| voices.split(',').map(ToOwned::to_owned).collect());
} else if remaining_arg.starts_with("speed=") {
speed = remaining_arg
.split('=')
.nth(1)
.unwrap()
.parse()
.unwrap_or(1.0);
} else {
other_args.push(remaining_arg.to_owned());
}
}
AvTag::TextToSpeech {
field_text: strip_html_for_tts(field_text).into(),
lang: lang.into(),
voices: voices.unwrap_or_else(Vec::new),
speed,
other_args,
}
}
pub fn strip_html_preserving_media_filenames(html: &str) -> Cow<str> {
let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} ");
let without_html = strip_html(&without_fnames);
@ -497,32 +437,6 @@ mod test {
assert_eq!(strip_html_preserving_media_filenames("<html>"), "");
}
#[test]
fn audio() {
let s = concat!(
"abc[sound:fo&amp;obar.mp3]def[anki:tts][en_US voices=Bob,Jane speed=1.2]",
"foo b<i><b>a</b>r</i><br>1&gt;2[/anki:tts]gh",
);
assert_eq!(strip_av_tags(s), "abcdefgh");
let (text, tags) = extract_av_tags(s, true);
assert_eq!(text, "abc[anki:play:q:0]def[anki:play:q:1]gh");
assert_eq!(
tags,
vec![
AvTag::SoundOrVideo("fo&obar.mp3".into()),
AvTag::TextToSpeech {
field_text: "foo bar 1>2".into(),
lang: "en_US".into(),
voices: vec!["Bob".into(), "Jane".into()],
other_args: vec![],
speed: 1.2
},
]
);
}
#[test]
fn combining() {
assert!(matches!(without_combining("test"), Cow::Borrowed(_)));