// Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use std::collections::HashMap; use nom::{ branch::alt, bytes::complete::is_not, bytes::complete::tag, character::complete::{anychar, multispace0}, combinator::{map, not, recognize, success, value}, multi::{many0, many1}, sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, }; use super::{CardNodes, Node, OtherTag, Tag, TtsTag}; type IResult<'a, O> = nom::IResult<&'a str, O>; impl<'a> CardNodes<'a> { pub(super) fn parse(mut txt: &'a str) -> Self { let mut nodes = Vec::new(); while let Ok((remaining, node)) = node(txt) { txt = remaining; nodes.push(node); } Self(nodes) } } impl<'a> Tag<'a> { fn new(name: &'a str, options: Vec<(&'a str, &'a str)>, content: &'a str) -> Self { match name { "tts" => { let mut lang = ""; let mut voices = vec![]; let mut speed = 1.0; let mut blank = None; let mut other_options = HashMap::new(); for option in options { match option.0 { "lang" => lang = option.1, "voices" => voices = option.1.split(',').collect(), "speed" => speed = option.1.parse().unwrap_or(1.0), "cloze_blank" => blank = Some(option.1), _ => { other_options.insert(option.0, option.1); } } } Self::Tts(TtsTag { content, lang, voices, speed, blank, options: other_options, }) } _ => Self::Other(OtherTag { name, content, options: options.into_iter().collect(), }), } } } /// Consume 0 or more of anything in " \t\r\n" after `parser`. fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult where P: FnMut(&'s str) -> IResult + 'parser, { terminated(parser, multispace0) } /// Parse until char in `arr` is found. Always succeeds. fn is_not0<'parser, 'arr: 'parser, 's: 'parser>( arr: &'arr str, ) -> impl FnMut(&'s str) -> IResult<&'s str> + 'parser { alt((is_not(arr), success(""))) } fn node(s: &str) -> IResult { alt((text_node, sound_node, tag_node))(s) } /// A sound tag `[sound:ressource]`, where `ressource` is pointing to a sound or video file. fn sound_node(s: &str) -> IResult { map( delimited(tag("[sound:"), is_not("]"), tag("]")), Node::SoundOrVideo, )(s) } /// An Anki tag `[anki:tag...]...[/anki:tag]`. fn tag_node(s: &str) -> IResult { /// Match the start of an opening tag and return its name. fn name(s: &str) -> IResult<&str> { preceded(tag("[anki:"), is_not("] \t\r\n"))(s) } /// Return a parser to match an opening `name` tag and return its options. fn opening_parser<'name, 's: 'name>( name: &'name str, ) -> impl FnMut(&'s str) -> IResult> + 'name { /// List of whitespace-separated `key=val` tuples, where `val` may be empty. fn options(s: &str) -> IResult> { fn key(s: &str) -> IResult<&str> { is_not("] \t\r\n=")(s) } fn val(s: &str) -> IResult<&str> { alt(( delimited(tag("\""), is_not0("\""), tag("\"")), is_not0("] \t\r\n\""), ))(s) } many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s) } delimited( pair(tag("[anki:"), trailing_whitespace0(tag(name))), options, tag("]"), ) } /// Return a parser to match a closing `name` tag. fn closing_parser<'parser, 'name: 'parser, 's: 'parser>( name: &'name str, ) -> impl FnMut(&'s str) -> IResult<()> + 'parser { value((), tuple((tag("[/anki:"), tag(name), tag("]")))) } /// Return a parser to match and return anything until a closing `name` tag is found. fn content_parser<'parser, 'name: 'parser, 's: 'parser>( name: &'name str, ) -> impl FnMut(&'s str) -> IResult<&str> + 'parser { recognize(many0(pair(not(closing_parser(name)), anychar))) } let (_, tag_name) = name(s)?; map( terminated( pair(opening_parser(tag_name), content_parser(tag_name)), closing_parser(tag_name), ), |(options, content)| Node::Tag(Tag::new(tag_name, options, content)), )(s) } fn text_node(s: &str) -> IResult { map( recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))), Node::Text, )(s) } #[cfg(test)] mod test { use super::*; macro_rules! assert_parsed_nodes { ($txt:expr $(, $node:expr)*) => { assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*])); } } #[test] fn parsing() { use Node::*; // empty assert_parsed_nodes!(""); // text assert_parsed_nodes!("foo", Text("foo")); // broken sound/tags are just text as well assert_parsed_nodes!("[sound:]", Text("[sound:]")); assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]")); assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]")); // sound assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo")); assert_parsed_nodes!( "foo [sound:bar] baz", Text("foo "), SoundOrVideo("bar"), Text(" baz") ); assert_parsed_nodes!( "[sound:foo][sound:bar]", SoundOrVideo("foo"), SoundOrVideo("bar") ); // tags assert_parsed_nodes!( "[anki:foo]bar[/anki:foo]", Tag(super::Tag::Other(OtherTag { name: "foo", content: "bar", options: HashMap::new() })) ); assert_parsed_nodes!( "[anki:foo bar=baz][/anki:foo]", Tag(super::Tag::Other(OtherTag { name: "foo", content: "", options: [("bar", "baz")].into_iter().collect(), })) ); // unquoted white space separates options, "]" terminates assert_parsed_nodes!( "[anki:foo\na=b\tc=d e=f][/anki:foo]", Tag(super::Tag::Other(OtherTag { name: "foo", content: "", options: [("a", "b"), ("c", "d"), ("e", "f")].into_iter().collect(), })) ); assert_parsed_nodes!( "[anki:foo a=\"b \t\n c ]\"][/anki:foo]", Tag(super::Tag::Other(OtherTag { name: "foo", content: "", options: [("a", "b \t\n c ]")].into_iter().collect(), })) ); // tts tags assert_parsed_nodes!( "[anki:tts lang=jp_JP voices=Alice,Bob speed=0.5 cloze_blank= bar=baz][/anki:tts]", Tag(super::Tag::Tts(TtsTag { content: "", lang: "jp_JP", voices: vec!["Alice", "Bob"], speed: 0.5, blank: Some(""), options: [("bar", "baz")].into_iter().collect(), })) ); assert_parsed_nodes!( "[anki:tts speed=foo][/anki:tts]", Tag(super::Tag::Tts(TtsTag { content: "", lang: "", voices: vec![], speed: 1.0, blank: None, options: HashMap::new(), })) ); } }