mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 16:56:36 -04:00
Add new card_rendering
mod
Parses a text with av/tts tags and strips or extracts tags.
This commit is contained in:
parent
0d51b4db1f
commit
939bddd5d6
4 changed files with 584 additions and 0 deletions
112
rslib/src/card_rendering/mod.rs
Normal file
112
rslib/src/card_rendering/mod.rs
Normal file
|
@ -0,0 +1,112 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::backend_proto as pb;
|
||||
use crate::prelude::*;
|
||||
|
||||
mod parser;
|
||||
mod writer;
|
||||
|
||||
pub fn strip_av_tags(txt: &str) -> String {
|
||||
CardNodes::parse(txt).write_without_av_tags()
|
||||
}
|
||||
|
||||
pub fn extract_av_tags(txt: &str, question_side: bool, tr: &I18n) -> (String, Vec<pb::AvTag>) {
|
||||
CardNodes::parse(txt).write_and_extract_av_tags(question_side, tr)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct CardNodes<'a>(Vec<Node<'a>>);
|
||||
|
||||
impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> {
|
||||
type Item = &'iter Node<'nodes>;
|
||||
type IntoIter = std::slice::Iter<'iter, Node<'nodes>>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum Node<'a> {
|
||||
Text(&'a str),
|
||||
SoundOrVideo(&'a str),
|
||||
Tag(Tag<'a>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum Tag<'a> {
|
||||
Tts(TtsTag<'a>),
|
||||
Other(OtherTag<'a>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct TtsTag<'a> {
|
||||
content: &'a str,
|
||||
lang: &'a str,
|
||||
voices: Vec<&'a str>,
|
||||
speed: f32,
|
||||
blank: Option<&'a str>,
|
||||
options: HashMap<&'a str, &'a str>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct OtherTag<'a> {
|
||||
name: &'a str,
|
||||
content: &'a str,
|
||||
options: HashMap<&'a str, &'a str>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
/// Strip av tags and assert equality with input or separately passed output.
|
||||
macro_rules! assert_av_stripped {
|
||||
($input:expr) => {
|
||||
assert_eq!($input, strip_av_tags($input));
|
||||
};
|
||||
($input:expr, $output:expr) => {
|
||||
assert_eq!(strip_av_tags($input), $output);
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn av_stripping() {
|
||||
assert_av_stripped!("foo [sound:bar] baz", "foo baz");
|
||||
assert_av_stripped!("[anki:tts bar=baz]spam[/anki:tts]", "");
|
||||
assert_av_stripped!("[anki:foo bar=baz]spam[/anki:foo]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn av_extracting() {
|
||||
let tr = I18n::template_only();
|
||||
let (txt, tags) = extract_av_tags(
|
||||
"foo [sound:bar.mp3] baz [anki:tts][...][/anki:tts]",
|
||||
true,
|
||||
&tr,
|
||||
);
|
||||
assert_eq!(
|
||||
(txt.as_str(), tags),
|
||||
(
|
||||
"foo [anki:play:q:0] baz [anki:play:q:1]",
|
||||
vec![
|
||||
pb::AvTag {
|
||||
value: Some(pb::av_tag::Value::SoundOrVideo("bar.mp3".to_string()))
|
||||
},
|
||||
pb::AvTag {
|
||||
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
|
||||
field_text: tr.card_templates_blank().to_string(),
|
||||
lang: "".to_string(),
|
||||
voices: vec![],
|
||||
speed: 1.0,
|
||||
other_args: vec![],
|
||||
}))
|
||||
}
|
||||
],
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
261
rslib/src/card_rendering/parser.rs
Normal file
261
rslib/src/card_rendering/parser.rs
Normal file
|
@ -0,0 +1,261 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::is_not,
|
||||
bytes::complete::tag,
|
||||
character::complete::{anychar, multispace0},
|
||||
combinator::{map, not, recognize, success, value},
|
||||
multi::{many0, many1},
|
||||
sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
|
||||
};
|
||||
|
||||
use super::{CardNodes, Node, OtherTag, Tag, TtsTag};
|
||||
|
||||
type IResult<'a, O> = nom::IResult<&'a str, O>;
|
||||
|
||||
impl<'a> CardNodes<'a> {
|
||||
pub(super) fn parse(mut txt: &'a str) -> Self {
|
||||
let mut nodes = Vec::new();
|
||||
while let Ok((remaining, node)) = node(txt) {
|
||||
txt = remaining;
|
||||
nodes.push(node);
|
||||
}
|
||||
|
||||
Self(nodes)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Tag<'a> {
|
||||
fn new(name: &'a str, options: Vec<(&'a str, &'a str)>, content: &'a str) -> Self {
|
||||
match name {
|
||||
"tts" => {
|
||||
let mut lang = "";
|
||||
let mut voices = vec![];
|
||||
let mut speed = 1.0;
|
||||
let mut blank = None;
|
||||
let mut other_options = HashMap::new();
|
||||
|
||||
for option in options {
|
||||
match option.0 {
|
||||
"lang" => lang = option.1,
|
||||
"voices" => voices = option.1.split(',').collect(),
|
||||
"speed" => speed = option.1.parse().unwrap_or(1.0),
|
||||
"cloze_blank" => blank = Some(option.1),
|
||||
_ => {
|
||||
other_options.insert(option.0, option.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Self::Tts(TtsTag {
|
||||
content,
|
||||
lang,
|
||||
voices,
|
||||
speed,
|
||||
blank,
|
||||
options: other_options,
|
||||
})
|
||||
}
|
||||
_ => Self::Other(OtherTag {
|
||||
name,
|
||||
content,
|
||||
options: options.into_iter().collect(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume 0 or more of anything in " \t\r\n" after `parser`.
|
||||
fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult<O>
|
||||
where
|
||||
P: FnMut(&'s str) -> IResult<O> + 'parser,
|
||||
{
|
||||
terminated(parser, multispace0)
|
||||
}
|
||||
|
||||
/// Parse until char in `arr` is found. Always succeeds.
|
||||
fn is_not0<'parser, 'arr: 'parser, 's: 'parser>(
|
||||
arr: &'arr str,
|
||||
) -> impl FnMut(&'s str) -> IResult<&'s str> + 'parser {
|
||||
alt((is_not(arr), success("")))
|
||||
}
|
||||
|
||||
fn node(s: &str) -> IResult<Node> {
|
||||
alt((text_node, sound_node, tag_node))(s)
|
||||
}
|
||||
|
||||
/// A sound tag `[sound:ressource]`, where `ressource` is pointing to a sound or video file.
|
||||
fn sound_node(s: &str) -> IResult<Node> {
|
||||
map(
|
||||
delimited(tag("[sound:"), is_not("]"), tag("]")),
|
||||
Node::SoundOrVideo,
|
||||
)(s)
|
||||
}
|
||||
|
||||
/// An Anki tag `[anki:tag...]...[/anki:tag]`.
|
||||
fn tag_node(s: &str) -> IResult<Node> {
|
||||
/// Match the start of an opening tag and return its name.
|
||||
fn name(s: &str) -> IResult<&str> {
|
||||
preceded(tag("[anki:"), is_not("] \t\r\n"))(s)
|
||||
}
|
||||
|
||||
/// Return a parser to match an opening `name` tag and return its options.
|
||||
fn opening_parser<'name, 's: 'name>(
|
||||
name: &'name str,
|
||||
) -> impl FnMut(&'s str) -> IResult<Vec<(&str, &str)>> + 'name {
|
||||
/// List of whitespace-separated `key=val` tuples, where `val` may be empty.
|
||||
fn options(s: &str) -> IResult<Vec<(&str, &str)>> {
|
||||
fn key(s: &str) -> IResult<&str> {
|
||||
is_not("] \t\r\n=")(s)
|
||||
}
|
||||
|
||||
fn val(s: &str) -> IResult<&str> {
|
||||
alt((
|
||||
delimited(tag("\""), is_not0("\""), tag("\"")),
|
||||
is_not0("] \t\r\n\""),
|
||||
))(s)
|
||||
}
|
||||
|
||||
many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s)
|
||||
}
|
||||
|
||||
delimited(
|
||||
pair(tag("[anki:"), trailing_whitespace0(tag(name))),
|
||||
options,
|
||||
tag("]"),
|
||||
)
|
||||
}
|
||||
|
||||
/// Return a parser to match a closing `name` tag.
|
||||
fn closing_parser<'parser, 'name: 'parser, 's: 'parser>(
|
||||
name: &'name str,
|
||||
) -> impl FnMut(&'s str) -> IResult<()> + 'parser {
|
||||
value((), tuple((tag("[/anki:"), tag(name), tag("]"))))
|
||||
}
|
||||
|
||||
/// Return a parser to match and return anything until a closing `name` tag is found.
|
||||
fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
|
||||
name: &'name str,
|
||||
) -> impl FnMut(&'s str) -> IResult<&str> + 'parser {
|
||||
recognize(many0(pair(not(closing_parser(name)), anychar)))
|
||||
}
|
||||
|
||||
let (_, tag_name) = name(s)?;
|
||||
map(
|
||||
terminated(
|
||||
pair(opening_parser(tag_name), content_parser(tag_name)),
|
||||
closing_parser(tag_name),
|
||||
),
|
||||
|(options, content)| Node::Tag(Tag::new(tag_name, options, content)),
|
||||
)(s)
|
||||
}
|
||||
|
||||
fn text_node(s: &str) -> IResult<Node> {
|
||||
map(
|
||||
recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))),
|
||||
Node::Text,
|
||||
)(s)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
macro_rules! assert_parsed_nodes {
|
||||
($txt:expr $(, $node:expr)*) => {
|
||||
assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*]));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parsing() {
|
||||
use Node::*;
|
||||
|
||||
// empty
|
||||
assert_parsed_nodes!("");
|
||||
|
||||
// text
|
||||
assert_parsed_nodes!("foo", Text("foo"));
|
||||
// broken sound/tags are just text as well
|
||||
assert_parsed_nodes!("[sound:]", Text("[sound:]"));
|
||||
assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]"));
|
||||
assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]"));
|
||||
|
||||
// sound
|
||||
assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo"));
|
||||
assert_parsed_nodes!(
|
||||
"foo [sound:bar] baz",
|
||||
Text("foo "),
|
||||
SoundOrVideo("bar"),
|
||||
Text(" baz")
|
||||
);
|
||||
assert_parsed_nodes!(
|
||||
"[sound:foo][sound:bar]",
|
||||
SoundOrVideo("foo"),
|
||||
SoundOrVideo("bar")
|
||||
);
|
||||
|
||||
// tags
|
||||
assert_parsed_nodes!(
|
||||
"[anki:foo]bar[/anki:foo]",
|
||||
Tag(super::Tag::Other(OtherTag {
|
||||
name: "foo",
|
||||
content: "bar",
|
||||
options: HashMap::new()
|
||||
}))
|
||||
);
|
||||
assert_parsed_nodes!(
|
||||
"[anki:foo bar=baz][/anki:foo]",
|
||||
Tag(super::Tag::Other(OtherTag {
|
||||
name: "foo",
|
||||
content: "",
|
||||
options: [("bar", "baz")].into_iter().collect(),
|
||||
}))
|
||||
);
|
||||
// unquoted white space separates options, "]" terminates
|
||||
assert_parsed_nodes!(
|
||||
"[anki:foo\na=b\tc=d e=f][/anki:foo]",
|
||||
Tag(super::Tag::Other(OtherTag {
|
||||
name: "foo",
|
||||
content: "",
|
||||
options: [("a", "b"), ("c", "d"), ("e", "f")].into_iter().collect(),
|
||||
}))
|
||||
);
|
||||
assert_parsed_nodes!(
|
||||
"[anki:foo a=\"b \t\n c ]\"][/anki:foo]",
|
||||
Tag(super::Tag::Other(OtherTag {
|
||||
name: "foo",
|
||||
content: "",
|
||||
options: [("a", "b \t\n c ]")].into_iter().collect(),
|
||||
}))
|
||||
);
|
||||
|
||||
// tts tags
|
||||
assert_parsed_nodes!(
|
||||
"[anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]",
|
||||
Tag(super::Tag::Tts(TtsTag {
|
||||
content: "",
|
||||
lang: "jp_JP",
|
||||
voices: vec!["Alice", "Bob"],
|
||||
speed: 0.5,
|
||||
blank: Some(""),
|
||||
options: [("bar", "baz")].into_iter().collect(),
|
||||
}))
|
||||
);
|
||||
assert_parsed_nodes!(
|
||||
"[anki:tts speed=foo][/anki:tts]",
|
||||
Tag(super::Tag::Tts(TtsTag {
|
||||
content: "",
|
||||
lang: "",
|
||||
voices: vec![],
|
||||
speed: 1.0,
|
||||
blank: None,
|
||||
options: HashMap::new(),
|
||||
}))
|
||||
);
|
||||
}
|
||||
}
|
210
rslib/src/card_rendering/writer.rs
Normal file
210
rslib/src/card_rendering/writer.rs
Normal file
|
@ -0,0 +1,210 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::fmt::Write as _;
|
||||
|
||||
use super::{CardNodes, Node, OtherTag, Tag, TtsTag};
|
||||
use crate::prelude::*;
|
||||
use crate::{
|
||||
backend_proto as pb,
|
||||
text::{decode_entities, strip_html_for_tts},
|
||||
};
|
||||
|
||||
impl<'a> CardNodes<'a> {
|
||||
pub(super) fn write_without_av_tags(&self) -> String {
|
||||
AvStripper::new().write(self)
|
||||
}
|
||||
|
||||
pub(super) fn write_and_extract_av_tags(
|
||||
&self,
|
||||
question_side: bool,
|
||||
tr: &I18n,
|
||||
) -> (String, Vec<pb::AvTag>) {
|
||||
let mut extractor = AvExtractor::new(question_side, tr);
|
||||
(extractor.write(self), extractor.tags)
|
||||
}
|
||||
}
|
||||
|
||||
trait Write {
|
||||
fn write<'iter, 'nodes: 'iter, T>(&mut self, nodes: T) -> String
|
||||
where
|
||||
T: IntoIterator<Item = &'iter Node<'nodes>>,
|
||||
{
|
||||
let mut buf = String::new();
|
||||
for node in nodes {
|
||||
match &node {
|
||||
Node::Text(s) => self.write_text(&mut buf, s),
|
||||
Node::SoundOrVideo(r) => self.write_sound(&mut buf, r),
|
||||
Node::Tag(tag) => self.write_tag(&mut buf, tag),
|
||||
};
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
fn write_text(&mut self, buf: &mut String, txt: &str) {
|
||||
buf.push_str(txt);
|
||||
}
|
||||
|
||||
fn write_sound(&mut self, buf: &mut String, ressource: &str) {
|
||||
write!(buf, "[sound:{}]", ressource).unwrap();
|
||||
}
|
||||
|
||||
fn write_tag(&mut self, buf: &mut String, tag: &Tag) {
|
||||
match tag {
|
||||
Tag::Tts(tag) => self.write_tts_tag(buf, tag),
|
||||
Tag::Other(tag) => self.write_other_tag(buf, tag),
|
||||
};
|
||||
}
|
||||
|
||||
fn write_tts_tag(&mut self, buf: &mut String, tag: &TtsTag) {
|
||||
write!(buf, "[anki:tts").unwrap();
|
||||
|
||||
for (key, val) in [
|
||||
("lang", tag.lang),
|
||||
("voices", &tag.voices.join(",")),
|
||||
("speed", &tag.speed.to_string()),
|
||||
] {
|
||||
self.write_tag_option(buf, key, val);
|
||||
}
|
||||
if let Some(blank) = tag.blank {
|
||||
self.write_tag_option(buf, "cloze_blank", blank);
|
||||
}
|
||||
for (key, val) in &tag.options {
|
||||
self.write_tag_option(buf, key, val);
|
||||
}
|
||||
|
||||
write!(buf, "]{}[/anki:tts]", tag.content).unwrap();
|
||||
}
|
||||
|
||||
fn write_other_tag(&mut self, buf: &mut String, tag: &OtherTag) {
|
||||
write!(buf, "[anki:{}", tag.name).unwrap();
|
||||
for (key, val) in &tag.options {
|
||||
self.write_tag_option(buf, key, val);
|
||||
}
|
||||
buf.push(']');
|
||||
self.write_tag_content(buf, tag.content);
|
||||
write!(buf, "[/anki:{}]", tag.name).unwrap();
|
||||
}
|
||||
|
||||
fn write_tag_option(&mut self, buf: &mut String, key: &str, val: &str) {
|
||||
if val.contains::<&[char]>(&[']', ' ', '\t', '\r', '\n']) {
|
||||
write!(buf, " {}=\"{}\"", key, val).unwrap();
|
||||
} else {
|
||||
write!(buf, " {}={}", key, val).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn write_tag_content(&mut self, buf: &mut String, content: &str) {
|
||||
buf.push_str(content);
|
||||
}
|
||||
}
|
||||
|
||||
struct AvStripper;
|
||||
|
||||
impl AvStripper {
|
||||
fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for AvStripper {
|
||||
fn write_sound(&mut self, _buf: &mut String, _ressource: &str) {}
|
||||
|
||||
fn write_tts_tag(&mut self, _buf: &mut String, _tag: &TtsTag) {}
|
||||
}
|
||||
|
||||
struct AvExtractor<'a> {
|
||||
side: char,
|
||||
tags: Vec<pb::AvTag>,
|
||||
tr: &'a I18n,
|
||||
}
|
||||
|
||||
impl<'a> AvExtractor<'a> {
|
||||
fn new(question_side: bool, tr: &'a I18n) -> Self {
|
||||
Self {
|
||||
side: if question_side { 'q' } else { 'a' },
|
||||
tags: vec![],
|
||||
tr,
|
||||
}
|
||||
}
|
||||
|
||||
fn write_play_tag(&self, buf: &mut String) {
|
||||
write!(buf, "[anki:play:{}:{}]", self.side, self.tags.len()).unwrap();
|
||||
}
|
||||
|
||||
fn transform_tts_content(&self, tag: &TtsTag) -> String {
|
||||
strip_html_for_tts(tag.content).replace(
|
||||
"[...]",
|
||||
tag.blank.unwrap_or(&self.tr.card_templates_blank()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for AvExtractor<'_> {
|
||||
fn write_sound(&mut self, buf: &mut String, ressource: &str) {
|
||||
self.write_play_tag(buf);
|
||||
self.tags.push(pb::AvTag {
|
||||
value: Some(pb::av_tag::Value::SoundOrVideo(
|
||||
decode_entities(ressource).into(),
|
||||
)),
|
||||
});
|
||||
}
|
||||
|
||||
fn write_tts_tag(&mut self, buf: &mut String, tag: &TtsTag) {
|
||||
self.write_play_tag(buf);
|
||||
self.tags.push(pb::AvTag {
|
||||
value: Some(pb::av_tag::Value::Tts(pb::TtsTag {
|
||||
field_text: self.transform_tts_content(tag),
|
||||
lang: tag.lang.into(),
|
||||
voices: tag.voices.iter().map(ToString::to_string).collect(),
|
||||
speed: tag.speed,
|
||||
other_args: tag
|
||||
.options
|
||||
.iter()
|
||||
.map(|(key, val)| format!("{}={}", key, val))
|
||||
.collect(),
|
||||
})),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
struct Writer;
|
||||
impl Write for Writer {}
|
||||
impl Writer {
|
||||
fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse input, write it out, and assert equality with input or separately
|
||||
/// passed output.
|
||||
macro_rules! roundtrip {
|
||||
($input:expr) => {
|
||||
assert_eq!($input, Writer::new().write(&CardNodes::parse($input)));
|
||||
};
|
||||
($input:expr, $output:expr) => {
|
||||
assert_eq!(Writer::new().write(&CardNodes::parse($input)), $output);
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn writing() {
|
||||
roundtrip!("foo");
|
||||
roundtrip!("[sound:foo]");
|
||||
roundtrip!("[anki:foo bar=baz]spam[/anki:foo]");
|
||||
|
||||
// normalizing (not currently exposed)
|
||||
roundtrip!(
|
||||
"[anki:foo\nbar=baz ][/anki:foo]",
|
||||
"[anki:foo bar=baz][/anki:foo]"
|
||||
);
|
||||
roundtrip!(
|
||||
"[anki:tts][/anki:tts]",
|
||||
"[anki:tts lang= voices= speed=1][/anki:tts]"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -8,6 +8,7 @@ pub mod backend;
|
|||
mod backend_proto;
|
||||
pub mod browser_table;
|
||||
pub mod card;
|
||||
pub mod card_rendering;
|
||||
pub mod cloze;
|
||||
pub mod collection;
|
||||
pub mod config;
|
||||
|
|
Loading…
Reference in a new issue