mirror of
https://github.com/ankitects/anki.git
synced 2025-09-21 07:22:23 -04:00
Improve performance of card rendering parser (#3886)
* refactor parser * update test * add tests * refactor CardNodes * Increase nested cloze limit to underlying protobuf limit (dae)
This commit is contained in:
parent
52781aaab8
commit
aa5684638b
3 changed files with 55 additions and 19 deletions
|
@ -33,24 +33,24 @@ pub fn prettify_av_tags<S: Into<String> + AsRef<str>>(txt: S) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse `txt` into [CardNodes] and return the result,
|
/// Parse `txt` into [CardNodes] and return the result,
|
||||||
/// or [None] if it is only a text node.
|
/// or [None] if it only contains text nodes.
|
||||||
fn nodes_or_text_only(txt: &str) -> Option<CardNodes> {
|
fn nodes_or_text_only(txt: &str) -> Option<CardNodes> {
|
||||||
let nodes = CardNodes::parse(txt);
|
let nodes = CardNodes::parse(txt);
|
||||||
match nodes.0[..] {
|
(!nodes.text_only).then_some(nodes)
|
||||||
[Node::Text(_)] => None,
|
|
||||||
_ => Some(nodes),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
struct CardNodes<'a>(Vec<Node<'a>>);
|
struct CardNodes<'a> {
|
||||||
|
nodes: Vec<Node<'a>>,
|
||||||
|
text_only: bool,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> {
|
impl<'iter, 'nodes> IntoIterator for &'iter CardNodes<'nodes> {
|
||||||
type Item = &'iter Node<'nodes>;
|
type Item = &'iter Node<'nodes>;
|
||||||
type IntoIter = std::slice::Iter<'iter, Node<'nodes>>;
|
type IntoIter = std::slice::Iter<'iter, Node<'nodes>>;
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.0.iter()
|
self.nodes.iter()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,11 @@ use nom::character::complete::multispace0;
|
||||||
use nom::combinator::map;
|
use nom::combinator::map;
|
||||||
use nom::combinator::not;
|
use nom::combinator::not;
|
||||||
use nom::combinator::recognize;
|
use nom::combinator::recognize;
|
||||||
|
use nom::combinator::rest;
|
||||||
use nom::combinator::success;
|
use nom::combinator::success;
|
||||||
use nom::combinator::value;
|
use nom::combinator::value;
|
||||||
|
use nom::multi::fold_many0;
|
||||||
use nom::multi::many0;
|
use nom::multi::many0;
|
||||||
use nom::multi::many1;
|
|
||||||
use nom::sequence::delimited;
|
use nom::sequence::delimited;
|
||||||
use nom::sequence::pair;
|
use nom::sequence::pair;
|
||||||
use nom::sequence::preceded;
|
use nom::sequence::preceded;
|
||||||
|
@ -33,12 +34,14 @@ type IResult<'a, O> = nom::IResult<&'a str, O>;
|
||||||
impl<'a> CardNodes<'a> {
|
impl<'a> CardNodes<'a> {
|
||||||
pub(super) fn parse(mut txt: &'a str) -> Self {
|
pub(super) fn parse(mut txt: &'a str) -> Self {
|
||||||
let mut nodes = Vec::new();
|
let mut nodes = Vec::new();
|
||||||
|
let mut text_only = true;
|
||||||
while let Ok((remaining, node)) = node(txt) {
|
while let Ok((remaining, node)) = node(txt) {
|
||||||
|
text_only &= matches!(node, Node::Text(_));
|
||||||
txt = remaining;
|
txt = remaining;
|
||||||
nodes.push(node);
|
nodes.push(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
Self(nodes)
|
Self { nodes, text_only }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,7 +101,7 @@ fn is_not0<'parser, 'arr: 'parser, 's: 'parser>(
|
||||||
}
|
}
|
||||||
|
|
||||||
fn node(s: &str) -> IResult<Node> {
|
fn node(s: &str) -> IResult<Node> {
|
||||||
alt((text_node, sound_node, tag_node))(s)
|
alt((sound_node, tag_node, text_node))(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or
|
/// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or
|
||||||
|
@ -110,6 +113,16 @@ fn sound_node(s: &str) -> IResult<Node> {
|
||||||
)(s)
|
)(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn take_till_potential_tag_start(s: &str) -> IResult<&str> {
|
||||||
|
use nom::InputTake;
|
||||||
|
// first char could be '[', but wasn't part of a node, so skip (eof ends parse)
|
||||||
|
let (after, offset) = anychar(s).map(|(s, c)| (s, c.len_utf8()))?;
|
||||||
|
Ok(match after.find('[') {
|
||||||
|
Some(pos) => s.take_split(offset + pos),
|
||||||
|
_ => rest(s)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// An Anki tag `[anki:tag...]...[/anki:tag]`.
|
/// An Anki tag `[anki:tag...]...[/anki:tag]`.
|
||||||
fn tag_node(s: &str) -> IResult<Node> {
|
fn tag_node(s: &str) -> IResult<Node> {
|
||||||
/// Match the start of an opening tag and return its name.
|
/// Match the start of an opening tag and return its name.
|
||||||
|
@ -157,7 +170,12 @@ fn tag_node(s: &str) -> IResult<Node> {
|
||||||
fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
|
fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
|
||||||
name: &'name str,
|
name: &'name str,
|
||||||
) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser {
|
) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser {
|
||||||
recognize(many0(pair(not(closing_parser(name)), anychar)))
|
recognize(fold_many0(
|
||||||
|
pair(not(closing_parser(name)), take_till_potential_tag_start),
|
||||||
|
// we don't need to accumulate anything
|
||||||
|
|| (),
|
||||||
|
|_, _| (),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
let (_, tag_name) = name(s)?;
|
let (_, tag_name) = name(s)?;
|
||||||
|
@ -171,10 +189,7 @@ fn tag_node(s: &str) -> IResult<Node> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn text_node(s: &str) -> IResult<Node> {
|
fn text_node(s: &str) -> IResult<Node> {
|
||||||
map(
|
map(take_till_potential_tag_start, Node::Text)(s)
|
||||||
recognize(many1(pair(not(alt((sound_node, tag_node))), anychar))),
|
|
||||||
Node::Text,
|
|
||||||
)(s)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -183,7 +198,7 @@ mod test {
|
||||||
|
|
||||||
macro_rules! assert_parsed_nodes {
|
macro_rules! assert_parsed_nodes {
|
||||||
($txt:expr $(, $node:expr)*) => {
|
($txt:expr $(, $node:expr)*) => {
|
||||||
assert_eq!(CardNodes::parse($txt), CardNodes(vec![$($node),*]));
|
assert_eq!(CardNodes::parse($txt).nodes, vec![$($node),*]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,8 +213,21 @@ mod test {
|
||||||
assert_parsed_nodes!("foo", Text("foo"));
|
assert_parsed_nodes!("foo", Text("foo"));
|
||||||
// broken sound/tags are just text as well
|
// broken sound/tags are just text as well
|
||||||
assert_parsed_nodes!("[sound:]", Text("[sound:]"));
|
assert_parsed_nodes!("[sound:]", Text("[sound:]"));
|
||||||
assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:][/anki:]"));
|
assert_parsed_nodes!("[anki:][/anki:]", Text("[anki:]"), Text("[/anki:]"));
|
||||||
assert_parsed_nodes!("[anki:foo][/anki:bar]", Text("[anki:foo][/anki:bar]"));
|
assert_parsed_nodes!(
|
||||||
|
"[anki:foo][/anki:bar]",
|
||||||
|
Text("[anki:foo]"),
|
||||||
|
Text("[/anki:bar]")
|
||||||
|
);
|
||||||
|
assert_parsed_nodes!(
|
||||||
|
"abc[anki:foo]def[/anki:bar]ghi][[anki:bar][",
|
||||||
|
Text("abc"),
|
||||||
|
Text("[anki:foo]def"),
|
||||||
|
Text("[/anki:bar]ghi]"),
|
||||||
|
Text("["),
|
||||||
|
Text("[anki:bar]"),
|
||||||
|
Text("[")
|
||||||
|
);
|
||||||
|
|
||||||
// sound
|
// sound
|
||||||
assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo"));
|
assert_parsed_nodes!("[sound:foo]", SoundOrVideo("foo"));
|
||||||
|
@ -224,6 +252,14 @@ mod test {
|
||||||
options: HashMap::new()
|
options: HashMap::new()
|
||||||
}))
|
}))
|
||||||
);
|
);
|
||||||
|
assert_parsed_nodes!(
|
||||||
|
"[anki:foo]]bar[[/anki:foo]",
|
||||||
|
Directive(super::Directive::Other(OtherDirective {
|
||||||
|
name: "foo",
|
||||||
|
content: "]bar[",
|
||||||
|
options: HashMap::new()
|
||||||
|
}))
|
||||||
|
);
|
||||||
assert_parsed_nodes!(
|
assert_parsed_nodes!(
|
||||||
"[anki:foo bar=baz][/anki:foo]",
|
"[anki:foo bar=baz][/anki:foo]",
|
||||||
Directive(super::Directive::Other(OtherDirective {
|
Directive(super::Directive::Other(OtherDirective {
|
||||||
|
|
|
@ -159,7 +159,7 @@ fn parse_text_with_clozes(text: &str) -> Vec<TextOrCloze<'_>> {
|
||||||
for token in tokenize(text) {
|
for token in tokenize(text) {
|
||||||
match token {
|
match token {
|
||||||
Token::OpenCloze(ordinal) => {
|
Token::OpenCloze(ordinal) => {
|
||||||
if open_clozes.len() < 8 {
|
if open_clozes.len() < 10 {
|
||||||
open_clozes.push(ExtractedCloze {
|
open_clozes.push(ExtractedCloze {
|
||||||
ordinal,
|
ordinal,
|
||||||
nodes: Vec::with_capacity(1), // common case
|
nodes: Vec::with_capacity(1), // common case
|
||||||
|
|
Loading…
Reference in a new issue