From 939bddd5d622a60b47b4b7567562dce9bbaf40fe Mon Sep 17 00:00:00 2001 From: RumovZ Date: Wed, 15 Dec 2021 08:44:37 +0100 Subject: [PATCH] Add new `card_rendering` mod Parses a text with av/tts tags and strips or extracts tags. --- rslib/src/card_rendering/mod.rs | 112 +++++++++++++ rslib/src/card_rendering/parser.rs | 261 +++++++++++++++++++++++++++++ rslib/src/card_rendering/writer.rs | 210 +++++++++++++++++++++++ rslib/src/lib.rs | 1 + 4 files changed, 584 insertions(+) create mode 100644 rslib/src/card_rendering/mod.rs create mode 100644 rslib/src/card_rendering/parser.rs create mode 100644 rslib/src/card_rendering/writer.rs diff --git a/rslib/src/card_rendering/mod.rs b/rslib/src/card_rendering/mod.rs new file mode 100644 index 000000000..bab4688e8 --- /dev/null +++ b/rslib/src/card_rendering/mod.rs @@ -0,0 +1,112 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::collections::HashMap; + +use crate::backend_proto as pb; +use crate::prelude::*; + +mod parser; +mod writer; + +pub fn strip_av_tags(txt: &str) -> String { + CardNodes::parse(txt).write_without_av_tags() +} + +pub fn extract_av_tags(txt: &str, question_side: bool, tr: &I18n) -> (String, Vec) { + CardNodes::parse(txt).write_and_extract_av_tags(question_side, tr) +} + +#[derive(Debug, PartialEq)] +struct CardNodes<'a>(Vec>); + +impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> { + type Item = &'iter Node<'nodes>; + type IntoIter = std::slice::Iter<'iter, Node<'nodes>>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +#[derive(Debug, PartialEq)] +enum Node<'a> { + Text(&'a str), + SoundOrVideo(&'a str), + Tag(Tag<'a>), +} + +#[derive(Debug, PartialEq)] +enum Tag<'a> { + Tts(TtsTag<'a>), + Other(OtherTag<'a>), +} + +#[derive(Debug, PartialEq)] +struct TtsTag<'a> { + content: &'a str, + lang: &'a str, + voices: Vec<&'a str>, + speed: f32, + blank: Option<&'a str>, + options: HashMap<&'a str, &'a str>, +} + +#[derive(Debug, PartialEq)] +struct OtherTag<'a> { + name: &'a str, + content: &'a str, + options: HashMap<&'a str, &'a str>, +} + +#[cfg(test)] +mod test { + use super::*; + + /// Strip av tags and assert equality with input or separately passed output. + macro_rules! assert_av_stripped { + ($input:expr) => { + assert_eq!($input, strip_av_tags($input)); + }; + ($input:expr, $output:expr) => { + assert_eq!(strip_av_tags($input), $output); + }; + } + + #[test] + fn av_stripping() { + assert_av_stripped!("foo [sound:bar] baz", "foo baz"); + assert_av_stripped!("[anki:tts bar=baz]spam[/anki:tts]", ""); + assert_av_stripped!("[anki:foo bar=baz]spam[/anki:foo]"); + } + + #[test] + fn av_extracting() { + let tr = I18n::template_only(); + let (txt, tags) = extract_av_tags( + "foo [sound:bar.mp3] baz [anki:tts][...][/anki:tts]", + true, + &tr, + ); + assert_eq!( + (txt.as_str(), tags), + ( + "foo [anki:play:q:0] baz [anki:play:q:1]", + vec![ + pb::AvTag { + value: Some(pb::av_tag::Value::SoundOrVideo("bar.mp3".to_string())) + }, + pb::AvTag { + value: Some(pb::av_tag::Value::Tts(pb::TtsTag { + field_text: tr.card_templates_blank().to_string(), + lang: "".to_string(), + voices: vec![], + speed: 1.0, + other_args: vec![], + })) + } + ], + ), + ); + } +} diff --git a/rslib/src/card_rendering/parser.rs b/rslib/src/card_rendering/parser.rs new file mode 100644 index 000000000..5fc32ca68 --- /dev/null +++ b/rslib/src/card_rendering/parser.rs @@ -0,0 +1,261 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::collections::HashMap; + +use nom::{ + branch::alt, + bytes::complete::is_not, + bytes::complete::tag, + character::complete::{anychar, multispace0}, + combinator::{map, not, recognize, success, value}, + multi::{many0, many1}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, +}; + +use super::{CardNodes, Node, OtherTag, Tag, TtsTag}; + +type IResult<'a, O> = nom::IResult<&'a str, O>; + +impl<'a> CardNodes<'a> { + pub(super) fn parse(mut txt: &'a str) -> Self { + let mut nodes = Vec::new(); + while let Ok((remaining, node)) = node(txt) { + txt = remaining; + nodes.push(node); + } + + Self(nodes) + } +} + +impl<'a> Tag<'a> { + fn new(name: &'a str, options: Vec<(&'a str, &'a str)>, content: &'a str) -> Self { + match name { + "tts" => { + let mut lang = ""; + let mut voices = vec![]; + let mut speed = 1.0; + let mut blank = None; + let mut other_options = HashMap::new(); + + for option in options { + match option.0 { + "lang" => lang = option.1, + "voices" => voices = option.1.split(',').collect(), + "speed" => speed = option.1.parse().unwrap_or(1.0), + "cloze_blank" => blank = Some(option.1), + _ => { + other_options.insert(option.0, option.1); + } + } + } + + Self::Tts(TtsTag { + content, + lang, + voices, + speed, + blank, + options: other_options, + }) + } + _ => Self::Other(OtherTag { + name, + content, + options: options.into_iter().collect(), + }), + } + } +} + +/// Consume 0 or more of anything in " \t\r\n" after `parser`. +fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult +where + P: FnMut(&'s str) -> IResult + 'parser, +{ + terminated(parser, multispace0) +} + +/// Parse until char in `arr` is found. Always succeeds. +fn is_not0<'parser, 'arr: 'parser, 's: 'parser>( + arr: &'arr str, +) -> impl FnMut(&'s str) -> IResult<&'s str> + 'parser { + alt((is_not(arr), success(""))) +} + +fn node(s: &str) -> IResult { + alt((text_node, sound_node, tag_node))(s) +} + +/// A sound tag `[sound:ressource]`, where `ressource` is pointing to a sound or video file. +fn sound_node(s: &str) -> IResult { + map( + delimited(tag("[sound:"), is_not("]"), tag("]")), + Node::SoundOrVideo, + )(s) +} + +/// An Anki tag `[anki:tag...]...[/anki:tag]`. +fn tag_node(s: &str) -> IResult { + /// Match the start of an opening tag and return its name. + fn name(s: &str) -> IResult<&str> { + preceded(tag("[anki:"), is_not("] \t\r\n"))(s) + } + + /// Return a parser to match an opening `name` tag and return its options. + fn opening_parser<'name, 's: 'name>( + name: &'name str, + ) -> impl FnMut(&'s str) -> IResult> + 'name { + /// List of whitespace-separated `key=val` tuples, where `val` may be empty. + fn options(s: &str) -> IResult> { + fn key(s: &str) -> IResult<&str> { + is_not("] \t\r\n=")(s) + } + + fn val(s: &str) -> IResult<&str> { + alt(( + delimited(tag("\""), is_not0("\""), tag("\"")), + is_not0("] \t\r\n\""), + ))(s) + } + + many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s) + } + + delimited( + pair(tag("[anki:"), trailing_whitespace0(tag(name))), + options, + tag("]"), + ) + } + + /// Return a parser to match a closing `name` tag. + fn closing_parser<'parser, 'name: 'parser, 's: 'parser>( + name: &'name str, + ) -> impl FnMut(&'s str) -> IResult<()> + 'parser { + value((), tuple((tag("[/anki:"), tag(name), tag("]")))) + } + + /// Return a parser to match and return anything until a closing `name` tag is found. + fn content_parser<'parser, 'name: 'parser, 's: 'parser>( + name: &'name str, + ) -> impl FnMut(&'s str) -> IResult<&str> + 'parser { + recognize(many0(pair(not(closing_parser(name)), anychar))) + } + + let (_, tag_name) = name(s)?; + map( + terminated( + pair(opening_parser(tag_name), content_parser(tag_name)), + closing_parser(tag_name), + ), + |(options, content)| Node::Tag(Tag::new(tag_name, options, content)), + )(s) +} + +fn text_node(s: &str) -> IResult { + map( + recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))), + Node::Text, + )(s) +} + +#[cfg(test)] +mod test { + use super::*; + + macro_rules! assert_parsed_nodes { + ($txt:expr $(, $node:expr)*) => { + assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*])); + } + } + + #[test] + fn parsing() { + use Node::*; + + // empty + assert_parsed_nodes!(""); + + // text + assert_parsed_nodes!("foo", Text("foo")); + // broken sound/tags are just text as well + assert_parsed_nodes!("[sound:]", Text("[sound:]")); + assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]")); + assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]")); + + // sound + assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo")); + assert_parsed_nodes!( + "foo [sound:bar] baz", + Text("foo "), + SoundOrVideo("bar"), + Text(" baz") + ); + assert_parsed_nodes!( + "[sound:foo][sound:bar]", + SoundOrVideo("foo"), + SoundOrVideo("bar") + ); + + // tags + assert_parsed_nodes!( + "[anki:foo]bar[/anki:foo]", + Tag(super::Tag::Other(OtherTag { + name: "foo", + content: "bar", + options: HashMap::new() + })) + ); + assert_parsed_nodes!( + "[anki:foo bar=baz][/anki:foo]", + Tag(super::Tag::Other(OtherTag { + name: "foo", + content: "", + options: [("bar", "baz")].into_iter().collect(), + })) + ); + // unquoted white space separates options, "]" terminates + assert_parsed_nodes!( + "[anki:foo\na=b\tc=d e=f][/anki:foo]", + Tag(super::Tag::Other(OtherTag { + name: "foo", + content: "", + options: [("a", "b"), ("c", "d"), ("e", "f")].into_iter().collect(), + })) + ); + assert_parsed_nodes!( + "[anki:foo a=\"b \t\n c ]\"][/anki:foo]", + Tag(super::Tag::Other(OtherTag { + name: "foo", + content: "", + options: [("a", "b \t\n c ]")].into_iter().collect(), + })) + ); + + // tts tags + assert_parsed_nodes!( + "[anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]", + Tag(super::Tag::Tts(TtsTag { + content: "", + lang: "jp_JP", + voices: vec!["Alice", "Bob"], + speed: 0.5, + blank: Some(""), + options: [("bar", "baz")].into_iter().collect(), + })) + ); + assert_parsed_nodes!( + "[anki:tts speed=foo][/anki:tts]", + Tag(super::Tag::Tts(TtsTag { + content: "", + lang: "", + voices: vec![], + speed: 1.0, + blank: None, + options: HashMap::new(), + })) + ); + } +} diff --git a/rslib/src/card_rendering/writer.rs b/rslib/src/card_rendering/writer.rs new file mode 100644 index 000000000..7c0c57850 --- /dev/null +++ b/rslib/src/card_rendering/writer.rs @@ -0,0 +1,210 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::fmt::Write as _; + +use super::{CardNodes, Node, OtherTag, Tag, TtsTag}; +use crate::prelude::*; +use crate::{ + backend_proto as pb, + text::{decode_entities, strip_html_for_tts}, +}; + +impl<'a> CardNodes<'a> { + pub(super) fn write_without_av_tags(&self) -> String { + AvStripper::new().write(self) + } + + pub(super) fn write_and_extract_av_tags( + &self, + question_side: bool, + tr: &I18n, + ) -> (String, Vec) { + let mut extractor = AvExtractor::new(question_side, tr); + (extractor.write(self), extractor.tags) + } +} + +trait Write { + fn write<'iter, 'nodes: 'iter, T>(&mut self, nodes: T) -> String + where + T: IntoIterator>, + { + let mut buf = String::new(); + for node in nodes { + match &node { + Node::Text(s) => self.write_text(&mut buf, s), + Node::SoundOrVideo(r) => self.write_sound(&mut buf, r), + Node::Tag(tag) => self.write_tag(&mut buf, tag), + }; + } + buf + } + + fn write_text(&mut self, buf: &mut String, txt: &str) { + buf.push_str(txt); + } + + fn write_sound(&mut self, buf: &mut String, ressource: &str) { + write!(buf, "[sound:{}]", ressource).unwrap(); + } + + fn write_tag(&mut self, buf: &mut String, tag: &Tag) { + match tag { + Tag::Tts(tag) => self.write_tts_tag(buf, tag), + Tag::Other(tag) => self.write_other_tag(buf, tag), + }; + } + + fn write_tts_tag(&mut self, buf: &mut String, tag: &TtsTag) { + write!(buf, "[anki:tts").unwrap(); + + for (key, val) in [ + ("lang", tag.lang), + ("voices", &tag.voices.join(",")), + ("speed", &tag.speed.to_string()), + ] { + self.write_tag_option(buf, key, val); + } + if let Some(blank) = tag.blank { + self.write_tag_option(buf, "cloze_blank", blank); + } + for (key, val) in &tag.options { + self.write_tag_option(buf, key, val); + } + + write!(buf, "]{}[/anki:tts]", tag.content).unwrap(); + } + + fn write_other_tag(&mut self, buf: &mut String, tag: &OtherTag) { + write!(buf, "[anki:{}", tag.name).unwrap(); + for (key, val) in &tag.options { + self.write_tag_option(buf, key, val); + } + buf.push(']'); + self.write_tag_content(buf, tag.content); + write!(buf, "[/anki:{}]", tag.name).unwrap(); + } + + fn write_tag_option(&mut self, buf: &mut String, key: &str, val: &str) { + if val.contains::<&[char]>(&[']', ' ', '\t', '\r', '\n']) { + write!(buf, " {}=\"{}\"", key, val).unwrap(); + } else { + write!(buf, " {}={}", key, val).unwrap(); + } + } + + fn write_tag_content(&mut self, buf: &mut String, content: &str) { + buf.push_str(content); + } +} + +struct AvStripper; + +impl AvStripper { + fn new() -> Self { + Self {} + } +} + +impl Write for AvStripper { + fn write_sound(&mut self, _buf: &mut String, _ressource: &str) {} + + fn write_tts_tag(&mut self, _buf: &mut String, _tag: &TtsTag) {} +} + +struct AvExtractor<'a> { + side: char, + tags: Vec, + tr: &'a I18n, +} + +impl<'a> AvExtractor<'a> { + fn new(question_side: bool, tr: &'a I18n) -> Self { + Self { + side: if question_side { 'q' } else { 'a' }, + tags: vec![], + tr, + } + } + + fn write_play_tag(&self, buf: &mut String) { + write!(buf, "[anki:play:{}:{}]", self.side, self.tags.len()).unwrap(); + } + + fn transform_tts_content(&self, tag: &TtsTag) -> String { + strip_html_for_tts(tag.content).replace( + "[...]", + tag.blank.unwrap_or(&self.tr.card_templates_blank()), + ) + } +} + +impl Write for AvExtractor<'_> { + fn write_sound(&mut self, buf: &mut String, ressource: &str) { + self.write_play_tag(buf); + self.tags.push(pb::AvTag { + value: Some(pb::av_tag::Value::SoundOrVideo( + decode_entities(ressource).into(), + )), + }); + } + + fn write_tts_tag(&mut self, buf: &mut String, tag: &TtsTag) { + self.write_play_tag(buf); + self.tags.push(pb::AvTag { + value: Some(pb::av_tag::Value::Tts(pb::TtsTag { + field_text: self.transform_tts_content(tag), + lang: tag.lang.into(), + voices: tag.voices.iter().map(ToString::to_string).collect(), + speed: tag.speed, + other_args: tag + .options + .iter() + .map(|(key, val)| format!("{}={}", key, val)) + .collect(), + })), + }); + } +} + +#[cfg(test)] +mod test { + use super::*; + + struct Writer; + impl Write for Writer {} + impl Writer { + fn new() -> Self { + Self {} + } + } + + /// Parse input, write it out, and assert equality with input or separately + /// passed output. + macro_rules! roundtrip { + ($input:expr) => { + assert_eq!($input, Writer::new().write(&CardNodes::parse($input))); + }; + ($input:expr, $output:expr) => { + assert_eq!(Writer::new().write(&CardNodes::parse($input)), $output); + }; + } + + #[test] + fn writing() { + roundtrip!("foo"); + roundtrip!("[sound:foo]"); + roundtrip!("[anki:foo bar=baz]spam[/anki:foo]"); + + // normalizing (not currently exposed) + roundtrip!( + "[anki:foo\nbar=baz ][/anki:foo]", + "[anki:foo bar=baz][/anki:foo]" + ); + roundtrip!( + "[anki:tts][/anki:tts]", + "[anki:tts lang= voices= speed=1][/anki:tts]" + ); + } +} diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index e3ae998c8..8a2a46115 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -8,6 +8,7 @@ pub mod backend; mod backend_proto; pub mod browser_table; pub mod card; +pub mod card_rendering; pub mod cloze; pub mod collection; pub mod config;