diff --git a/Cargo.lock b/Cargo.lock index ef3debf4a..c42bef561 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,6 +54,7 @@ dependencies = [ "bytes", "chrono", "coarsetime", + "criterion", "env_logger", "flate2", "fluent", @@ -222,6 +223,18 @@ dependencies = [ "digest", ] +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.8.0" @@ -240,6 +253,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" +[[package]] +name = "cast" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" +dependencies = [ + "rustc_version", +] + [[package]] name = "cc" version = "1.0.72" @@ -265,6 +287,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + [[package]] name = "coarsetime" version = "0.1.20" @@ -335,6 +368,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-channel" version = "0.5.1" @@ -345,6 +414,30 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.5" @@ -382,6 +475,28 @@ dependencies = [ "syn", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "derivative" version = "2.2.0" @@ -758,6 +873,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "hashbrown" version = "0.11.2" @@ -1166,6 +1287,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" @@ -1343,6 +1473,12 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "openssl" version = "0.10.38" @@ -1580,6 +1716,34 @@ version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f" +[[package]] +name = "plotters" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" + +[[package]] +name = "plotters-svg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +dependencies = [ + "plotters-backend", +] + [[package]] name = "ppv-lite86" version = "0.2.15" @@ -1836,6 +2000,31 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.2.10" @@ -1866,6 +2055,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.25" @@ -2123,6 +2318,16 @@ dependencies = [ "serde_json", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.130" @@ -2423,6 +2628,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thin-slice" version = "0.1.1" @@ -2475,6 +2689,16 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1" +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.5.1" diff --git a/ftl/core/errors.ftl b/ftl/core/errors.ftl index f1a4c5934..fdab5a2de 100644 --- a/ftl/core/errors.ftl +++ b/ftl/core/errors.ftl @@ -9,3 +9,8 @@ errors-100-tags-max = is no need to select child tags if you have selected a parent tag. errors-multiple-notetypes-selected = Please select notes from only one notetype. errors-please-check-database = Please use the Check Database action, then try again. + +## Card Rendering + +errors-bad-directive = Error in directive '{ $directive }': { $error } +errors-option-not-set = '{ $option }' not set diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index 1b622d6f6..9d6c1e016 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -11,6 +11,19 @@ build = "build/main.rs" name = "anki" path = "src/lib.rs" +[features] +bench = ["criterion"] +links = ["linkcheck"] + +[[test]] +name = "links" +required-features = ["links"] + +[[bench]] +name = "benchmark" +harness = false +required-features = ["bench"] + # After updating anything below, run ../cargo/update.py [build-dependencies] @@ -18,7 +31,6 @@ prost-build = "0.9.0" [dev-dependencies] env_logger = "0.9.0" -linkcheck = { git = "https://github.com/ankitects/linkcheck.git", rev = "2f20798ce521cc594d510d4e417e76d5eac04d4b" } tokio = { version = "1.12.0", features = ["macros"] } [dependencies] @@ -27,6 +39,9 @@ unicase = "=2.6.0" anki_i18n = { path="i18n" } +criterion = { version = "0.3.5", optional = true } +linkcheck = { git = "https://github.com/ankitects/linkcheck.git", rev = "2f20798ce521cc594d510d4e417e76d5eac04d4b", optional = true } + nom = "7.0.0" proc-macro-nested = "0.1.7" slog-term = "2.8.0" diff --git a/rslib/bench.sh b/rslib/bench.sh new file mode 100755 index 000000000..076ac2a04 --- /dev/null +++ b/rslib/bench.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +cargo criterion --bench benchmark --features bench diff --git a/rslib/benches/benchmark.rs b/rslib/benches/benchmark.rs new file mode 100644 index 000000000..90cc64d1e --- /dev/null +++ b/rslib/benches/benchmark.rs @@ -0,0 +1,12 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use anki::card_rendering::anki_tag_benchmark; +use criterion::{criterion_group, criterion_main, Criterion}; + +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("anki_tag_parse", |b| b.iter(|| anki_tag_benchmark())); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/rslib/src/backend/cardrendering.rs b/rslib/src/backend/cardrendering.rs index 6606c7ed1..3e0de964c 100644 --- a/rslib/src/backend/cardrendering.rs +++ b/rslib/src/backend/cardrendering.rs @@ -5,14 +5,15 @@ use super::Backend; pub(super) use crate::backend_proto::cardrendering_service::Service as CardRenderingService; use crate::{ backend_proto as pb, + card_rendering::{extract_av_tags, strip_av_tags}, latex::{extract_latex, extract_latex_expanding_clozes, ExtractedLatex}, markdown::render_markdown, notetype::{CardTemplateSchema11, RenderCardOutput}, prelude::*, template::RenderedNode, text::{ - decode_iri_paths, encode_iri_paths, extract_av_tags, sanitize_html_no_images, - strip_av_tags, strip_html, strip_html_preserving_media_filenames, AvTag, + decode_iri_paths, encode_iri_paths, sanitize_html_no_images, strip_html, + strip_html_preserving_media_filenames, }, }; @@ -21,34 +22,10 @@ impl CardRenderingService for Backend { &self, input: pb::ExtractAvTagsRequest, ) -> Result { - let (text, tags) = extract_av_tags(&input.text, input.question_side); - let pt_tags = tags - .into_iter() - .map(|avtag| match avtag { - AvTag::SoundOrVideo(file) => pb::AvTag { - value: Some(pb::av_tag::Value::SoundOrVideo(file)), - }, - AvTag::TextToSpeech { - field_text, - lang, - voices, - other_args, - speed, - } => pb::AvTag { - value: Some(pb::av_tag::Value::Tts(pb::TtsTag { - field_text, - lang, - voices, - speed, - other_args, - })), - }, - }) - .collect(); - + let out = extract_av_tags(input.text, input.question_side, self.i18n()); Ok(pb::ExtractAvTagsResponse { - text: text.into(), - av_tags: pt_tags, + text: out.0, + av_tags: out.1, }) } @@ -140,9 +117,7 @@ impl CardRenderingService for Backend { } fn strip_av_tags(&self, input: pb::String) -> Result { - Ok(pb::String { - val: strip_av_tags(&input.val).into(), - }) + Ok(strip_av_tags(input.val).into()) } fn render_markdown(&self, input: pb::RenderMarkdownRequest) -> Result { diff --git a/rslib/src/browser_table.rs b/rslib/src/browser_table.rs index c1fe4a3cd..9fefe4bf3 100644 --- a/rslib/src/browser_table.rs +++ b/rslib/src/browser_table.rs @@ -9,11 +9,12 @@ use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; use crate::{ backend_proto as pb, card::{CardQueue, CardType}, + card_rendering::prettify_av_tags, notetype::{CardTemplate, NotetypeKind}, prelude::*, scheduler::{timespan::time_span, timing::SchedTimingToday}, template::RenderedNode, - text::{extract_av_tags, html_to_text_line}, + text::html_to_text_line, }; #[derive(Debug, PartialEq, Clone, Copy, Display, EnumIter, EnumString)] @@ -270,7 +271,7 @@ impl RenderContext { } => current_text, }) .join(""); - let question = extract_av_tags(&qnodes_text, true).0.to_string(); + let question = prettify_av_tags(qnodes_text); Ok(RenderContext { question, @@ -410,7 +411,7 @@ impl RowContext { } => current_text, }) .join(""); - let answer = extract_av_tags(&answer, false).0; + let answer = prettify_av_tags(answer); html_to_text_line( if let Some(stripped) = answer.strip_prefix(&render_context.question) { stripped diff --git a/rslib/src/card_rendering/mod.rs b/rslib/src/card_rendering/mod.rs new file mode 100644 index 000000000..bfb4c5ff4 --- /dev/null +++ b/rslib/src/card_rendering/mod.rs @@ -0,0 +1,153 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::collections::HashMap; + +use crate::{backend_proto as pb, prelude::*}; + +mod parser; +mod writer; + +pub fn strip_av_tags + AsRef>(txt: S) -> String { + nodes_or_text_only(txt.as_ref()) + .map(|nodes| nodes.write_without_av_tags()) + .unwrap_or_else(|| txt.into()) +} + +pub fn extract_av_tags + AsRef>( + txt: S, + question_side: bool, + tr: &I18n, +) -> (String, Vec) { + nodes_or_text_only(txt.as_ref()) + .map(|nodes| nodes.write_and_extract_av_tags(question_side, tr)) + .unwrap_or_else(|| (txt.into(), vec![])) +} + +pub fn prettify_av_tags + AsRef>(txt: S) -> String { + nodes_or_text_only(txt.as_ref()) + .map(|nodes| nodes.write_with_pretty_av_tags()) + .unwrap_or_else(|| txt.into()) +} + +/// Parse `txt` into [CardNodes] and return the result, +/// or [None] if it is only a text node. +fn nodes_or_text_only(txt: &str) -> Option { + let nodes = CardNodes::parse(txt); + match nodes.0[..] { + [Node::Text(_)] => None, + _ => Some(nodes), + } +} + +#[derive(Debug, PartialEq)] +struct CardNodes<'a>(Vec>); + +impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> { + type Item = &'iter Node<'nodes>; + type IntoIter = std::slice::Iter<'iter, Node<'nodes>>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +#[derive(Debug, PartialEq)] +enum Node<'a> { + Text(&'a str), + SoundOrVideo(&'a str), + Directive(Directive<'a>), +} + +#[derive(Debug, PartialEq)] +enum Directive<'a> { + Tts(TtsDirective<'a>), + Other(OtherDirective<'a>), +} + +#[derive(Debug, PartialEq)] +struct TtsDirective<'a> { + content: &'a str, + lang: &'a str, + voices: Vec<&'a str>, + speed: f32, + blank: Option<&'a str>, + options: HashMap<&'a str, &'a str>, +} + +#[derive(Debug, PartialEq)] +struct OtherDirective<'a> { + name: &'a str, + content: &'a str, + options: HashMap<&'a str, &'a str>, +} + +#[cfg(feature = "bench")] +#[inline] +pub fn anki_directive_benchmark() { + CardNodes::parse("[anki:foo bar=baz][/anki:foo][anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]"); +} + +#[cfg(test)] +mod test { + use super::*; + + /// Strip av tags and assert equality with input or separately passed output. + macro_rules! assert_av_stripped { + ($input:expr) => { + assert_eq!($input, strip_av_tags($input)); + }; + ($input:expr, $output:expr) => { + assert_eq!(strip_av_tags($input), $output); + }; + } + + #[test] + fn av_stripping() { + assert_av_stripped!("foo [sound:bar] baz", "foo baz"); + assert_av_stripped!("[anki:tts bar=baz]spam[/anki:tts]", ""); + assert_av_stripped!("[anki:foo bar=baz]spam[/anki:foo]"); + } + + #[test] + fn av_extracting() { + let tr = I18n::template_only(); + let (txt, tags) = extract_av_tags( + "foo [sound:bar.mp3] baz [anki:tts lang=en_US][...][/anki:tts]", + true, + &tr, + ); + assert_eq!( + (txt.as_str(), tags), + ( + "foo [anki:play:q:0] baz [anki:play:q:1]", + vec![ + pb::AvTag { + value: Some(pb::av_tag::Value::SoundOrVideo("bar.mp3".to_string())) + }, + pb::AvTag { + value: Some(pb::av_tag::Value::Tts(pb::TtsTag { + field_text: tr.card_templates_blank().to_string(), + lang: "en_US".to_string(), + voices: vec![], + speed: 1.0, + other_args: vec![], + })) + } + ], + ), + ); + + assert_eq!( + extract_av_tags("[anki:tts]foo[/anki:tts]", true, &tr), + ( + format!( + "[{}]", + tr.errors_bad_directive("anki:tts", tr.errors_option_not_set("lang")) + .to_owned() + ), + vec![], + ), + ); + } +} diff --git a/rslib/src/card_rendering/parser.rs b/rslib/src/card_rendering/parser.rs new file mode 100644 index 000000000..2957384eb --- /dev/null +++ b/rslib/src/card_rendering/parser.rs @@ -0,0 +1,260 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::collections::HashMap; + +use nom::{ + branch::alt, + bytes::complete::{is_not, tag}, + character::complete::{anychar, multispace0}, + combinator::{map, not, recognize, success, value}, + multi::{many0, many1}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, +}; + +use super::{CardNodes, Directive, Node, OtherDirective, TtsDirective}; + +type IResult<'a, O> = nom::IResult<&'a str, O>; + +impl<'a> CardNodes<'a> { + pub(super) fn parse(mut txt: &'a str) -> Self { + let mut nodes = Vec::new(); + while let Ok((remaining, node)) = node(txt) { + txt = remaining; + nodes.push(node); + } + + Self(nodes) + } +} + +impl<'a> Directive<'a> { + fn new(name: &'a str, options: Vec<(&'a str, &'a str)>, content: &'a str) -> Self { + match name { + "tts" => { + let mut lang = ""; + let mut voices = vec![]; + let mut speed = 1.0; + let mut blank = None; + let mut other_options = HashMap::new(); + + for option in options { + match option.0 { + "lang" => lang = option.1, + "voices" => voices = option.1.split(',').collect(), + "speed" => speed = option.1.parse().unwrap_or(1.0), + "cloze_blank" => blank = Some(option.1), + _ => { + other_options.insert(option.0, option.1); + } + } + } + + Self::Tts(TtsDirective { + content, + lang, + voices, + speed, + blank, + options: other_options, + }) + } + _ => Self::Other(OtherDirective { + name, + content, + options: options.into_iter().collect(), + }), + } + } +} + +/// Consume 0 or more of anything in " \t\r\n" after `parser`. +fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult +where + P: FnMut(&'s str) -> IResult + 'parser, +{ + terminated(parser, multispace0) +} + +/// Parse until char in `arr` is found. Always succeeds. +fn is_not0<'parser, 'arr: 'parser, 's: 'parser>( + arr: &'arr str, +) -> impl FnMut(&'s str) -> IResult<&'s str> + 'parser { + alt((is_not(arr), success(""))) +} + +fn node(s: &str) -> IResult { + alt((text_node, sound_node, tag_node))(s) +} + +/// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or video file. +fn sound_node(s: &str) -> IResult { + map( + delimited(tag("[sound:"), is_not("]"), tag("]")), + Node::SoundOrVideo, + )(s) +} + +/// An Anki tag `[anki:tag...]...[/anki:tag]`. +fn tag_node(s: &str) -> IResult { + /// Match the start of an opening tag and return its name. + fn name(s: &str) -> IResult<&str> { + preceded(tag("[anki:"), is_not("] \t\r\n"))(s) + } + + /// Return a parser to match an opening `name` tag and return its options. + fn opening_parser<'name, 's: 'name>( + name: &'name str, + ) -> impl FnMut(&'s str) -> IResult> + 'name { + /// List of whitespace-separated `key=val` tuples, where `val` may be empty. + fn options(s: &str) -> IResult> { + fn key(s: &str) -> IResult<&str> { + is_not("] \t\r\n=")(s) + } + + fn val(s: &str) -> IResult<&str> { + alt(( + delimited(tag("\""), is_not0("\""), tag("\"")), + is_not0("] \t\r\n\""), + ))(s) + } + + many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s) + } + + delimited( + pair(tag("[anki:"), trailing_whitespace0(tag(name))), + options, + tag("]"), + ) + } + + /// Return a parser to match a closing `name` tag. + fn closing_parser<'parser, 'name: 'parser, 's: 'parser>( + name: &'name str, + ) -> impl FnMut(&'s str) -> IResult<()> + 'parser { + value((), tuple((tag("[/anki:"), tag(name), tag("]")))) + } + + /// Return a parser to match and return anything until a closing `name` tag is found. + fn content_parser<'parser, 'name: 'parser, 's: 'parser>( + name: &'name str, + ) -> impl FnMut(&'s str) -> IResult<&str> + 'parser { + recognize(many0(pair(not(closing_parser(name)), anychar))) + } + + let (_, tag_name) = name(s)?; + map( + terminated( + pair(opening_parser(tag_name), content_parser(tag_name)), + closing_parser(tag_name), + ), + |(options, content)| Node::Directive(Directive::new(tag_name, options, content)), + )(s) +} + +fn text_node(s: &str) -> IResult { + map( + recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))), + Node::Text, + )(s) +} + +#[cfg(test)] +mod test { + use super::*; + + macro_rules! assert_parsed_nodes { + ($txt:expr $(, $node:expr)*) => { + assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*])); + } + } + + #[test] + fn parsing() { + use Node::*; + + // empty + assert_parsed_nodes!(""); + + // text + assert_parsed_nodes!("foo", Text("foo")); + // broken sound/tags are just text as well + assert_parsed_nodes!("[sound:]", Text("[sound:]")); + assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]")); + assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]")); + + // sound + assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo")); + assert_parsed_nodes!( + "foo [sound:bar] baz", + Text("foo "), + SoundOrVideo("bar"), + Text(" baz") + ); + assert_parsed_nodes!( + "[sound:foo][sound:bar]", + SoundOrVideo("foo"), + SoundOrVideo("bar") + ); + + // tags + assert_parsed_nodes!( + "[anki:foo]bar[/anki:foo]", + Directive(super::Directive::Other(OtherDirective { + name: "foo", + content: "bar", + options: HashMap::new() + })) + ); + assert_parsed_nodes!( + "[anki:foo bar=baz][/anki:foo]", + Directive(super::Directive::Other(OtherDirective { + name: "foo", + content: "", + options: [("bar", "baz")].into_iter().collect(), + })) + ); + // unquoted white space separates options, "]" terminates + assert_parsed_nodes!( + "[anki:foo\na=b\tc=d e=f][/anki:foo]", + Directive(super::Directive::Other(OtherDirective { + name: "foo", + content: "", + options: [("a", "b"), ("c", "d"), ("e", "f")].into_iter().collect(), + })) + ); + assert_parsed_nodes!( + "[anki:foo a=\"b \t\n c ]\"][/anki:foo]", + Directive(super::Directive::Other(OtherDirective { + name: "foo", + content: "", + options: [("a", "b \t\n c ]")].into_iter().collect(), + })) + ); + + // tts tags + assert_parsed_nodes!( + "[anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]", + Directive(super::Directive::Tts(TtsDirective { + content: "", + lang: "jp_JP", + voices: vec!["Alice", "Bob"], + speed: 0.5, + blank: Some(""), + options: [("bar", "baz")].into_iter().collect(), + })) + ); + assert_parsed_nodes!( + "[anki:tts speed=foo][/anki:tts]", + Directive(super::Directive::Tts(TtsDirective { + content: "", + lang: "", + voices: vec![], + speed: 1.0, + blank: None, + options: HashMap::new(), + })) + ); + } +} diff --git a/rslib/src/card_rendering/writer.rs b/rslib/src/card_rendering/writer.rs new file mode 100644 index 000000000..4f9126558 --- /dev/null +++ b/rslib/src/card_rendering/writer.rs @@ -0,0 +1,246 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::fmt::Write as _; + +use super::{CardNodes, Directive, Node, OtherDirective, TtsDirective}; +use crate::prelude::*; +use crate::{ + backend_proto as pb, + text::{decode_entities, strip_html_for_tts}, +}; + +impl<'a> CardNodes<'a> { + pub(super) fn write_without_av_tags(&self) -> String { + AvStripper::new().write(self) + } + + pub(super) fn write_and_extract_av_tags( + &self, + question_side: bool, + tr: &I18n, + ) -> (String, Vec) { + let mut extractor = AvExtractor::new(question_side, tr); + (extractor.write(self), extractor.tags) + } + + pub(super) fn write_with_pretty_av_tags(&self) -> String { + AvPrettifier::new().write(self) + } +} + +trait Write { + fn write<'iter, 'nodes: 'iter, T>(&mut self, nodes: T) -> String + where + T: IntoIterator>, + { + let mut buf = String::new(); + for node in nodes { + match node { + Node::Text(s) => self.write_text(&mut buf, s), + Node::SoundOrVideo(r) => self.write_sound(&mut buf, r), + Node::Directive(directive) => self.write_directive(&mut buf, directive), + }; + } + buf + } + + fn write_text(&mut self, buf: &mut String, txt: &str) { + buf.push_str(txt); + } + + fn write_sound(&mut self, buf: &mut String, resource: &str) { + write!(buf, "[sound:{}]", resource).unwrap(); + } + + fn write_directive(&mut self, buf: &mut String, directive: &Directive) { + match directive { + Directive::Tts(directive) => self.write_tts_directive(buf, directive), + Directive::Other(directive) => self.write_other_directive(buf, directive), + }; + } + + fn write_tts_directive(&mut self, buf: &mut String, directive: &TtsDirective) { + write!(buf, "[anki:tts").unwrap(); + + for (key, val) in [ + ("lang", directive.lang), + ("voices", &directive.voices.join(",")), + ("speed", &directive.speed.to_string()), + ] { + self.write_directive_option(buf, key, val); + } + if let Some(blank) = directive.blank { + self.write_directive_option(buf, "cloze_blank", blank); + } + for (key, val) in &directive.options { + self.write_directive_option(buf, key, val); + } + + write!(buf, "]{}[/anki:tts]", directive.content).unwrap(); + } + + fn write_other_directive(&mut self, buf: &mut String, directive: &OtherDirective) { + write!(buf, "[anki:{}", directive.name).unwrap(); + for (key, val) in &directive.options { + self.write_directive_option(buf, key, val); + } + buf.push(']'); + self.write_directive_content(buf, directive.content); + write!(buf, "[/anki:{}]", directive.name).unwrap(); + } + + fn write_directive_option(&mut self, buf: &mut String, key: &str, val: &str) { + if val.contains::<&[char]>(&[']', ' ', '\t', '\r', '\n']) { + write!(buf, " {}=\"{}\"", key, val).unwrap(); + } else { + write!(buf, " {}={}", key, val).unwrap(); + } + } + + fn write_directive_content(&mut self, buf: &mut String, content: &str) { + buf.push_str(content); + } +} + +struct AvStripper; + +impl AvStripper { + fn new() -> Self { + Self {} + } +} + +impl Write for AvStripper { + fn write_sound(&mut self, _buf: &mut String, _resource: &str) {} + + fn write_tts_directive(&mut self, _buf: &mut String, _directive: &TtsDirective) {} +} + +struct AvExtractor<'a> { + side: char, + tags: Vec, + tr: &'a I18n, +} + +impl<'a> AvExtractor<'a> { + fn new(question_side: bool, tr: &'a I18n) -> Self { + Self { + side: if question_side { 'q' } else { 'a' }, + tags: vec![], + tr, + } + } + + fn write_play_tag(&self, buf: &mut String) { + write!(buf, "[anki:play:{}:{}]", self.side, self.tags.len()).unwrap(); + } + + fn transform_tts_content(&self, directive: &TtsDirective) -> String { + strip_html_for_tts(directive.content).replace( + "[...]", + directive.blank.unwrap_or(&self.tr.card_templates_blank()), + ) + } +} + +impl Write for AvExtractor<'_> { + fn write_sound(&mut self, buf: &mut String, resource: &str) { + self.write_play_tag(buf); + self.tags.push(pb::AvTag { + value: Some(pb::av_tag::Value::SoundOrVideo( + decode_entities(resource).into(), + )), + }); + } + + fn write_tts_directive(&mut self, buf: &mut String, directive: &TtsDirective) { + if let Some(error) = directive.error(self.tr) { + write!(buf, "[{}]", error).unwrap(); + return; + } + + self.write_play_tag(buf); + self.tags.push(pb::AvTag { + value: Some(pb::av_tag::Value::Tts(pb::TtsTag { + field_text: self.transform_tts_content(directive), + lang: directive.lang.into(), + voices: directive.voices.iter().map(ToString::to_string).collect(), + speed: directive.speed, + other_args: directive + .options + .iter() + .map(|(key, val)| format!("{}={}", key, val)) + .collect(), + })), + }); + } +} + +impl TtsDirective<'_> { + fn error(&self, tr: &I18n) -> Option { + if self.lang.is_empty() { + Some( + tr.errors_bad_directive("anki:tts", tr.errors_option_not_set("lang")) + .into(), + ) + } else { + None + } + } +} + +struct AvPrettifier; + +impl AvPrettifier { + fn new() -> Self { + Self {} + } +} + +impl Write for AvPrettifier { + fn write_sound(&mut self, buf: &mut String, resource: &str) { + write!(buf, "🔉{}🔉", resource).unwrap(); + } + + fn write_tts_directive(&mut self, buf: &mut String, directive: &TtsDirective) { + write!(buf, "💬{}💬", directive.content).unwrap(); + } +} + +#[cfg(test)] +mod test { + use super::*; + + struct Writer; + impl Write for Writer {} + impl Writer { + fn new() -> Self { + Self {} + } + } + + /// Parse input, write it out, and assert equality with input or separately + /// passed output. + macro_rules! roundtrip { + ($input:expr) => { + assert_eq!($input, Writer::new().write(&CardNodes::parse($input))); + }; + ($input:expr, $output:expr) => { + assert_eq!(Writer::new().write(&CardNodes::parse($input)), $output); + }; + } + + #[test] + fn writing() { + roundtrip!("foo"); + roundtrip!("[sound:foo]"); + roundtrip!("[anki:foo bar=baz]spam[/anki:foo]"); + + // normalizing (not currently exposed) + roundtrip!( + "[anki:foo\nbar=baz ][/anki:foo]", + "[anki:foo bar=baz][/anki:foo]" + ); + } +} diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index e3ae998c8..8a2a46115 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -8,6 +8,7 @@ pub mod backend; mod backend_proto; pub mod browser_table; pub mod card; +pub mod card_rendering; pub mod cloze; pub mod collection; pub mod config; diff --git a/rslib/src/template.rs b/rslib/src/template.rs index 9628033df..876a276e0 100644 --- a/rslib/src/template.rs +++ b/rslib/src/template.rs @@ -443,7 +443,6 @@ fn render_into( .as_slice(), key, context, - tr, ), None => { // unknown field encountered diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs index 81b002668..b799983b3 100644 --- a/rslib/src/template_filters.rs +++ b/rslib/src/template_filters.rs @@ -9,7 +9,6 @@ use regex::{Captures, Regex}; use crate::{ cloze::{cloze_filter, cloze_only_filter}, - i18n::I18n, template::RenderContext, text::strip_html, }; @@ -26,7 +25,6 @@ pub(crate) fn apply_filters<'a>( filters: &[&str], field_name: &str, context: &RenderContext, - tr: &I18n, ) -> (Cow<'a, str>, Vec) { let mut text: Cow = text.into(); @@ -38,7 +36,7 @@ pub(crate) fn apply_filters<'a>( }; for (idx, &filter_name) in filters.iter().enumerate() { - match apply_filter(filter_name, text.as_ref(), field_name, context, tr) { + match apply_filter(filter_name, text.as_ref(), field_name, context) { (true, None) => { // filter did not change text } @@ -69,7 +67,6 @@ fn apply_filter<'a>( text: &'a str, field_name: &str, context: &RenderContext, - tr: &I18n, ) -> (bool, Option) { let output_text = match filter_name { "text" => strip_html(text), @@ -84,8 +81,8 @@ fn apply_filter<'a>( // an empty filter name (caused by using two colons) is ignored "" => text.into(), _ => { - if filter_name.starts_with("tts ") { - tts_filter(filter_name, text, tr) + if let Some(options) = filter_name.strip_prefix("tts ") { + tts_filter(options, text).into() } else { // unrecognized filter return (false, None); @@ -194,12 +191,10 @@ return false;"> .into() } -fn tts_filter(filter_name: &str, text: &str, tr: &I18n) -> Cow<'static, str> { - let args = filter_name.split_once(' ').map_or("", |t| t.1); - let text = text.replace("[...]", &tr.card_templates_blank()); - - format!("[anki:tts][{}]{}[/anki:tts]", args, text).into() +fn tts_filter(options: &str, text: &str) -> String { + format!("[anki:tts lang={}]{}[/anki:tts]", options, text) } + // Tests //---------------------------------------- @@ -235,7 +230,6 @@ field #[test] fn typing() { - let tr = I18n::template_only(); assert_eq!(type_filter("Front"), "[[type:Front]]"); assert_eq!(type_cloze_filter("Front"), "[[type:cloze:Front]]"); let ctx = RenderContext { @@ -245,7 +239,7 @@ field card_ord: 0, }; assert_eq!( - apply_filters("ignored", &["cloze", "type"], "Text", &ctx, &tr), + apply_filters("ignored", &["cloze", "type"], "Text", &ctx), ("[[type:cloze:Text]]".into(), vec![]) ); } @@ -280,17 +274,9 @@ field #[test] fn tts() { - let tr = I18n::template_only(); assert_eq!( - tts_filter("tts en_US voices=Bob,Jane", "foo", &tr), - "[anki:tts][en_US voices=Bob,Jane]foo[/anki:tts]" - ); - assert_eq!( - tts_filter("tts en_US", "foo [...]", &tr), - format!( - "[anki:tts][en_US]foo {}[/anki:tts]", - tr.card_templates_blank() - ) + tts_filter("en_US voices=Bob,Jane", "foo"), + "[anki:tts lang=en_US voices=Bob,Jane]foo[/anki:tts]" ); } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index 02e7d3321..ff2d6ed54 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -175,32 +175,6 @@ pub fn strip_html_for_tts(html: &str) -> Cow { out } -pub fn strip_av_tags(text: &str) -> Cow { - AV_TAGS.replace_all(text, "") -} - -/// Extract audio tags from string, replacing them with [anki:play] refs -pub fn extract_av_tags(text: &str, question_side: bool) -> (Cow, Vec) { - let mut tags = vec![]; - let context = if question_side { 'q' } else { 'a' }; - let replaced_text = AV_TAGS.replace_all(text, |caps: &Captures| { - // extract - let tag = if let Some(av_file) = caps.get(1) { - AvTag::SoundOrVideo(decode_entities(av_file.as_str()).into()) - } else { - let args = caps.get(2).unwrap(); - let field_text = caps.get(3).unwrap(); - tts_tag_from_string(field_text.as_str(), args.as_str()) - }; - tags.push(tag); - - // and replace with reference - format!("[anki:play:{}:{}]", context, tags.len() - 1) - }); - - (replaced_text, tags) -} - #[derive(Debug)] pub(crate) struct MediaRef<'a> { pub full_ref: &'a str, @@ -242,40 +216,6 @@ pub(crate) fn extract_media_refs(text: &str) -> Vec { out } -fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AvTag { - let mut other_args = vec![]; - let mut split_args = args.split_ascii_whitespace(); - let lang = split_args.next().unwrap_or(""); - let mut voices = None; - let mut speed = 1.0; - - for remaining_arg in split_args { - if remaining_arg.starts_with("voices=") { - voices = remaining_arg - .split('=') - .nth(1) - .map(|voices| voices.split(',').map(ToOwned::to_owned).collect()); - } else if remaining_arg.starts_with("speed=") { - speed = remaining_arg - .split('=') - .nth(1) - .unwrap() - .parse() - .unwrap_or(1.0); - } else { - other_args.push(remaining_arg.to_owned()); - } - } - - AvTag::TextToSpeech { - field_text: strip_html_for_tts(field_text).into(), - lang: lang.into(), - voices: voices.unwrap_or_else(Vec::new), - speed, - other_args, - } -} - pub fn strip_html_preserving_media_filenames(html: &str) -> Cow { let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} "); let without_html = strip_html(&without_fnames); @@ -497,32 +437,6 @@ mod test { assert_eq!(strip_html_preserving_media_filenames(""), ""); } - #[test] - fn audio() { - let s = concat!( - "abc[sound:fo&obar.mp3]def[anki:tts][en_US voices=Bob,Jane speed=1.2]", - "foo bar
1>2[/anki:tts]gh", - ); - assert_eq!(strip_av_tags(s), "abcdefgh"); - - let (text, tags) = extract_av_tags(s, true); - assert_eq!(text, "abc[anki:play:q:0]def[anki:play:q:1]gh"); - - assert_eq!( - tags, - vec![ - AvTag::SoundOrVideo("fo&obar.mp3".into()), - AvTag::TextToSpeech { - field_text: "foo bar 1>2".into(), - lang: "en_US".into(), - voices: vec!["Bob".into(), "Jane".into()], - other_args: vec![], - speed: 1.2 - }, - ] - ); - } - #[test] fn combining() { assert!(matches!(without_combining("test"), Cow::Borrowed(_)));