Add new card_rendering mod

Parses a text with av/tts tags and strips or extracts tags.
This commit is contained in:
RumovZ 2021-12-15 08:44:37 +01:00
parent 0d51b4db1f
commit 939bddd5d6
4 changed files with 584 additions and 0 deletions

View file

@ -0,0 +1,112 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashMap;
use crate::backend_proto as pb;
use crate::prelude::*;
mod parser;
mod writer;
pub fn strip_av_tags(txt: &str) -> String {
CardNodes::parse(txt).write_without_av_tags()
}
pub fn extract_av_tags(txt: &str, question_side: bool, tr: &I18n) -> (String, Vec<pb::AvTag>) {
CardNodes::parse(txt).write_and_extract_av_tags(question_side, tr)
}
#[derive(Debug, PartialEq)]
struct CardNodes<'a>(Vec<Node<'a>>);
impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> {
type Item = &'iter Node<'nodes>;
type IntoIter = std::slice::Iter<'iter, Node<'nodes>>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
#[derive(Debug, PartialEq)]
enum Node<'a> {
Text(&'a str),
SoundOrVideo(&'a str),
Tag(Tag<'a>),
}
#[derive(Debug, PartialEq)]
enum Tag<'a> {
Tts(TtsTag<'a>),
Other(OtherTag<'a>),
}
#[derive(Debug, PartialEq)]
struct TtsTag<'a> {
content: &'a str,
lang: &'a str,
voices: Vec<&'a str>,
speed: f32,
blank: Option<&'a str>,
options: HashMap<&'a str, &'a str>,
}
#[derive(Debug, PartialEq)]
struct OtherTag<'a> {
name: &'a str,
content: &'a str,
options: HashMap<&'a str, &'a str>,
}
#[cfg(test)]
mod test {
use super::*;
/// Strip av tags and assert equality with input or separately passed output.
macro_rules! assert_av_stripped {
($input:expr) => {
assert_eq!($input, strip_av_tags($input));
};
($input:expr, $output:expr) => {
assert_eq!(strip_av_tags($input), $output);
};
}
#[test]
fn av_stripping() {
assert_av_stripped!("foo [sound:bar] baz", "foo baz");
assert_av_stripped!("[anki:tts bar=baz]spam[/anki:tts]", "");
assert_av_stripped!("[anki:foo bar=baz]spam[/anki:foo]");
}
#[test]
fn av_extracting() {
let tr = I18n::template_only();
let (txt, tags) = extract_av_tags(
"foo [sound:bar.mp3] baz [anki:tts][...][/anki:tts]",
true,
&tr,
);
assert_eq!(
(txt.as_str(), tags),
(
"foo [anki:play:q:0] baz [anki:play:q:1]",
vec![
pb::AvTag {
value: Some(pb::av_tag::Value::SoundOrVideo("bar.mp3".to_string()))
},
pb::AvTag {
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
field_text: tr.card_templates_blank().to_string(),
lang: "".to_string(),
voices: vec![],
speed: 1.0,
other_args: vec![],
}))
}
],
),
);
}
}

View file

@ -0,0 +1,261 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashMap;
use nom::{
branch::alt,
bytes::complete::is_not,
bytes::complete::tag,
character::complete::{anychar, multispace0},
combinator::{map, not, recognize, success, value},
multi::{many0, many1},
sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
};
use super::{CardNodes, Node, OtherTag, Tag, TtsTag};
type IResult<'a, O> = nom::IResult<&'a str, O>;
impl<'a> CardNodes<'a> {
pub(super) fn parse(mut txt: &'a str) -> Self {
let mut nodes = Vec::new();
while let Ok((remaining, node)) = node(txt) {
txt = remaining;
nodes.push(node);
}
Self(nodes)
}
}
impl<'a> Tag<'a> {
fn new(name: &'a str, options: Vec<(&'a str, &'a str)>, content: &'a str) -> Self {
match name {
"tts" => {
let mut lang = "";
let mut voices = vec![];
let mut speed = 1.0;
let mut blank = None;
let mut other_options = HashMap::new();
for option in options {
match option.0 {
"lang" => lang = option.1,
"voices" => voices = option.1.split(',').collect(),
"speed" => speed = option.1.parse().unwrap_or(1.0),
"cloze_blank" => blank = Some(option.1),
_ => {
other_options.insert(option.0, option.1);
}
}
}
Self::Tts(TtsTag {
content,
lang,
voices,
speed,
blank,
options: other_options,
})
}
_ => Self::Other(OtherTag {
name,
content,
options: options.into_iter().collect(),
}),
}
}
}
/// Consume 0 or more of anything in " \t\r\n" after `parser`.
fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult<O>
where
P: FnMut(&'s str) -> IResult<O> + 'parser,
{
terminated(parser, multispace0)
}
/// Parse until char in `arr` is found. Always succeeds.
fn is_not0<'parser, 'arr: 'parser, 's: 'parser>(
arr: &'arr str,
) -> impl FnMut(&'s str) -> IResult<&'s str> + 'parser {
alt((is_not(arr), success("")))
}
fn node(s: &str) -> IResult<Node> {
alt((text_node, sound_node, tag_node))(s)
}
/// A sound tag `[sound:ressource]`, where `ressource` is pointing to a sound or video file.
fn sound_node(s: &str) -> IResult<Node> {
map(
delimited(tag("[sound:"), is_not("]"), tag("]")),
Node::SoundOrVideo,
)(s)
}
/// An Anki tag `[anki:tag...]...[/anki:tag]`.
fn tag_node(s: &str) -> IResult<Node> {
/// Match the start of an opening tag and return its name.
fn name(s: &str) -> IResult<&str> {
preceded(tag("[anki:"), is_not("] \t\r\n"))(s)
}
/// Return a parser to match an opening `name` tag and return its options.
fn opening_parser<'name, 's: 'name>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<Vec<(&str, &str)>> + 'name {
/// List of whitespace-separated `key=val` tuples, where `val` may be empty.
fn options(s: &str) -> IResult<Vec<(&str, &str)>> {
fn key(s: &str) -> IResult<&str> {
is_not("] \t\r\n=")(s)
}
fn val(s: &str) -> IResult<&str> {
alt((
delimited(tag("\""), is_not0("\""), tag("\"")),
is_not0("] \t\r\n\""),
))(s)
}
many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s)
}
delimited(
pair(tag("[anki:"), trailing_whitespace0(tag(name))),
options,
tag("]"),
)
}
/// Return a parser to match a closing `name` tag.
fn closing_parser<'parser, 'name: 'parser, 's: 'parser>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<()> + 'parser {
value((), tuple((tag("[/anki:"), tag(name), tag("]"))))
}
/// Return a parser to match and return anything until a closing `name` tag is found.
fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<&str> + 'parser {
recognize(many0(pair(not(closing_parser(name)), anychar)))
}
let (_, tag_name) = name(s)?;
map(
terminated(
pair(opening_parser(tag_name), content_parser(tag_name)),
closing_parser(tag_name),
),
|(options, content)| Node::Tag(Tag::new(tag_name, options, content)),
)(s)
}
fn text_node(s: &str) -> IResult<Node> {
map(
recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))),
Node::Text,
)(s)
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! assert_parsed_nodes {
($txt:expr $(, $node:expr)*) => {
assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*]));
}
}
#[test]
fn parsing() {
use Node::*;
// empty
assert_parsed_nodes!("");
// text
assert_parsed_nodes!("foo", Text("foo"));
// broken sound/tags are just text as well
assert_parsed_nodes!("[sound:]", Text("[sound:]"));
assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]"));
assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]"));
// sound
assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo"));
assert_parsed_nodes!(
"foo [sound:bar] baz",
Text("foo "),
SoundOrVideo("bar"),
Text(" baz")
);
assert_parsed_nodes!(
"[sound:foo][sound:bar]",
SoundOrVideo("foo"),
SoundOrVideo("bar")
);
// tags
assert_parsed_nodes!(
"[anki:foo]bar[/anki:foo]",
Tag(super::Tag::Other(OtherTag {
name: "foo",
content: "bar",
options: HashMap::new()
}))
);
assert_parsed_nodes!(
"[anki:foo bar=baz][/anki:foo]",
Tag(super::Tag::Other(OtherTag {
name: "foo",
content: "",
options: [("bar", "baz")].into_iter().collect(),
}))
);
// unquoted white space separates options, "]" terminates
assert_parsed_nodes!(
"[anki:foo\na=b\tc=d e=f][/anki:foo]",
Tag(super::Tag::Other(OtherTag {
name: "foo",
content: "",
options: [("a", "b"), ("c", "d"), ("e", "f")].into_iter().collect(),
}))
);
assert_parsed_nodes!(
"[anki:foo a=\"b \t\n c ]\"][/anki:foo]",
Tag(super::Tag::Other(OtherTag {
name: "foo",
content: "",
options: [("a", "b \t\n c ]")].into_iter().collect(),
}))
);
// tts tags
assert_parsed_nodes!(
"[anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]",
Tag(super::Tag::Tts(TtsTag {
content: "",
lang: "jp_JP",
voices: vec!["Alice", "Bob"],
speed: 0.5,
blank: Some(""),
options: [("bar", "baz")].into_iter().collect(),
}))
);
assert_parsed_nodes!(
"[anki:tts speed=foo][/anki:tts]",
Tag(super::Tag::Tts(TtsTag {
content: "",
lang: "",
voices: vec![],
speed: 1.0,
blank: None,
options: HashMap::new(),
}))
);
}
}

View file

@ -0,0 +1,210 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::fmt::Write as _;
use super::{CardNodes, Node, OtherTag, Tag, TtsTag};
use crate::prelude::*;
use crate::{
backend_proto as pb,
text::{decode_entities, strip_html_for_tts},
};
impl<'a> CardNodes<'a> {
pub(super) fn write_without_av_tags(&self) -> String {
AvStripper::new().write(self)
}
pub(super) fn write_and_extract_av_tags(
&self,
question_side: bool,
tr: &I18n,
) -> (String, Vec<pb::AvTag>) {
let mut extractor = AvExtractor::new(question_side, tr);
(extractor.write(self), extractor.tags)
}
}
trait Write {
fn write<'iter, 'nodes: 'iter, T>(&mut self, nodes: T) -> String
where
T: IntoIterator<Item = &'iter Node<'nodes>>,
{
let mut buf = String::new();
for node in nodes {
match &node {
Node::Text(s) => self.write_text(&mut buf, s),
Node::SoundOrVideo(r) => self.write_sound(&mut buf, r),
Node::Tag(tag) => self.write_tag(&mut buf, tag),
};
}
buf
}
fn write_text(&mut self, buf: &mut String, txt: &str) {
buf.push_str(txt);
}
fn write_sound(&mut self, buf: &mut String, ressource: &str) {
write!(buf, "[sound:{}]", ressource).unwrap();
}
fn write_tag(&mut self, buf: &mut String, tag: &Tag) {
match tag {
Tag::Tts(tag) => self.write_tts_tag(buf, tag),
Tag::Other(tag) => self.write_other_tag(buf, tag),
};
}
fn write_tts_tag(&mut self, buf: &mut String, tag: &TtsTag) {
write!(buf, "[anki:tts").unwrap();
for (key, val) in [
("lang", tag.lang),
("voices", &tag.voices.join(",")),
("speed", &tag.speed.to_string()),
] {
self.write_tag_option(buf, key, val);
}
if let Some(blank) = tag.blank {
self.write_tag_option(buf, "cloze_blank", blank);
}
for (key, val) in &tag.options {
self.write_tag_option(buf, key, val);
}
write!(buf, "]{}[/anki:tts]", tag.content).unwrap();
}
fn write_other_tag(&mut self, buf: &mut String, tag: &OtherTag) {
write!(buf, "[anki:{}", tag.name).unwrap();
for (key, val) in &tag.options {
self.write_tag_option(buf, key, val);
}
buf.push(']');
self.write_tag_content(buf, tag.content);
write!(buf, "[/anki:{}]", tag.name).unwrap();
}
fn write_tag_option(&mut self, buf: &mut String, key: &str, val: &str) {
if val.contains::<&[char]>(&[']', ' ', '\t', '\r', '\n']) {
write!(buf, " {}=\"{}\"", key, val).unwrap();
} else {
write!(buf, " {}={}", key, val).unwrap();
}
}
fn write_tag_content(&mut self, buf: &mut String, content: &str) {
buf.push_str(content);
}
}
struct AvStripper;
impl AvStripper {
fn new() -> Self {
Self {}
}
}
impl Write for AvStripper {
fn write_sound(&mut self, _buf: &mut String, _ressource: &str) {}
fn write_tts_tag(&mut self, _buf: &mut String, _tag: &TtsTag) {}
}
struct AvExtractor<'a> {
side: char,
tags: Vec<pb::AvTag>,
tr: &'a I18n,
}
impl<'a> AvExtractor<'a> {
fn new(question_side: bool, tr: &'a I18n) -> Self {
Self {
side: if question_side { 'q' } else { 'a' },
tags: vec![],
tr,
}
}
fn write_play_tag(&self, buf: &mut String) {
write!(buf, "[anki:play:{}:{}]", self.side, self.tags.len()).unwrap();
}
fn transform_tts_content(&self, tag: &TtsTag) -> String {
strip_html_for_tts(tag.content).replace(
"[...]",
tag.blank.unwrap_or(&self.tr.card_templates_blank()),
)
}
}
impl Write for AvExtractor<'_> {
fn write_sound(&mut self, buf: &mut String, ressource: &str) {
self.write_play_tag(buf);
self.tags.push(pb::AvTag {
value: Some(pb::av_tag::Value::SoundOrVideo(
decode_entities(ressource).into(),
)),
});
}
fn write_tts_tag(&mut self, buf: &mut String, tag: &TtsTag) {
self.write_play_tag(buf);
self.tags.push(pb::AvTag {
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
field_text: self.transform_tts_content(tag),
lang: tag.lang.into(),
voices: tag.voices.iter().map(ToString::to_string).collect(),
speed: tag.speed,
other_args: tag
.options
.iter()
.map(|(key, val)| format!("{}={}", key, val))
.collect(),
})),
});
}
}
#[cfg(test)]
mod test {
use super::*;
struct Writer;
impl Write for Writer {}
impl Writer {
fn new() -> Self {
Self {}
}
}
/// Parse input, write it out, and assert equality with input or separately
/// passed output.
macro_rules! roundtrip {
($input:expr) => {
assert_eq!($input, Writer::new().write(&CardNodes::parse($input)));
};
($input:expr, $output:expr) => {
assert_eq!(Writer::new().write(&CardNodes::parse($input)), $output);
};
}
#[test]
fn writing() {
roundtrip!("foo");
roundtrip!("[sound:foo]");
roundtrip!("[anki:foo bar=baz]spam[/anki:foo]");
// normalizing (not currently exposed)
roundtrip!(
"[anki:foo\nbar=baz ][/anki:foo]",
"[anki:foo bar=baz][/anki:foo]"
);
roundtrip!(
"[anki:tts][/anki:tts]",
"[anki:tts lang= voices= speed=1][/anki:tts]"
);
}
}

View file

@ -8,6 +8,7 @@ pub mod backend;
mod backend_proto; mod backend_proto;
pub mod browser_table; pub mod browser_table;
pub mod card; pub mod card;
pub mod card_rendering;
pub mod cloze; pub mod cloze;
pub mod collection; pub mod collection;
pub mod config; pub mod config;