Upgrade to nom 8.0.0 (#4105)

* bump nom to 8.0.0 * update cloze.rs * update template.rs * update imageocclusion.rs * update search/parser.rs * update card_rendering/parser.rs * replace use of fold_many0 with many0 in nom 8, `many0` doesn't accumulate when used within `recognize`
2025-12-21 10:52:57 -05:00 · 2025-06-21 20:15:19 +08:00 · 2025-06-21 20:15:19 +08:00 · cc395f7c44
commit cc395f7c44
parent a4c95f5fbd
8 changed files with 90 additions and 50 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -117,7 +117,7 @@ dependencies = [
 "id_tree",
 "inflections",
 "itertools 0.14.0",
- "nom",
+ "nom 8.0.0",
 "num_cpus",
 "num_enum",
 "once_cell",
@ -4117,6 +4117,15 @@ dependencies = [
 "minimal-lexical",
 ]

+[[package]]
+name = "nom"
+version = "8.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "normpath"
 version = "1.3.0"
@ -6258,7 +6267,7 @@ dependencies = [
 "bytesize",
 "lazy_static",
 "libc",
- "nom",
+ "nom 7.1.3",
 "time",
 "winapi",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -93,7 +93,7 @@ junction = "1.2.0"
 libc = "0.2"
 libc-stdhandle = "0.1"
 maplit = "1.0.2"
-nom = "7.1.3"
+nom = "8.0.0"
 num-format = "0.4.4"
 num_cpus = "1.17.0"
 num_enum = "0.7.3"
--- a/cargo/licenses.json
+++ b/cargo/licenses.json
@ -2645,6 +2645,15 @@
    "license_file": null,
    "description": "A byte-oriented, zero-copy, parser combinators library"
  },
+  {
+    "name": "nom",
+    "version": "8.0.0",
+    "authors": "contact@geoffroycouprie.com",
+    "repository": "https://github.com/rust-bakery/nom",
+    "license": "MIT",
+    "license_file": null,
+    "description": "A byte-oriented, zero-copy, parser combinators library"
+  },
  {
    "name": "ntapi",
    "version": "0.4.1",
--- a/rslib/src/card_rendering/parser.rs
+++ b/rslib/src/card_rendering/parser.rs
@ -14,14 +14,14 @@ use nom::combinator::recognize;
 use nom::combinator::rest;
 use nom::combinator::success;
 use nom::combinator::value;
-use nom::multi::fold_many0;
 use nom::multi::many0;
 use nom::sequence::delimited;
 use nom::sequence::pair;
 use nom::sequence::preceded;
 use nom::sequence::separated_pair;
 use nom::sequence::terminated;
-use nom::sequence::tuple;
+use nom::Input;
+use nom::Parser;

 use super::CardNodes;
 use super::Directive;
@ -86,9 +86,12 @@ impl<'a> Directive<'a> {
 }

 /// Consume 0 or more of anything in " \t\r\n" after `parser`.
-fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult<'s, O>
+fn trailing_whitespace0<I, O, E, P>(parser: P) -> impl Parser<I, Output = O, Error = E>
 where
-    P: FnMut(&'s str) -> IResult<'s, O> + 'parser,
+    I: Input,
+    <I as Input>::Item: nom::AsChar,
+    E: nom::error::ParseError<I>,
+    P: Parser<I, Output = O, Error = E>,
 {
    terminated(parser, multispace0)
 }
@ -97,11 +100,11 @@ where
 fn is_not0<'parser, 'arr: 'parser, 's: 'parser>(
    arr: &'arr str,
 ) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser {
-    alt((is_not(arr), success("")))
+    move |s| alt((is_not(arr), success(""))).parse(s)
 }

 fn node(s: &str) -> IResult<Node> {
-    alt((sound_node, tag_node, text_node))(s)
+    alt((sound_node, tag_node, text_node)).parse(s)
 }

 /// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or
@ -110,11 +113,11 @@ fn sound_node(s: &str) -> IResult<Node> {
    map(
        delimited(tag("[sound:"), is_not("]"), tag("]")),
        Node::SoundOrVideo,
-    )(s)
+    )
+    .parse(s)
 }

 fn take_till_potential_tag_start(s: &str) -> IResult<&str> {
-    use nom::InputTake;
    // first char could be '[', but wasn't part of a node, so skip (eof ends parse)
    let (after, offset) = anychar(s).map(|(s, c)| (s, c.len_utf8()))?;
    Ok(match after.find('[') {
@ -127,7 +130,7 @@ fn take_till_potential_tag_start(s: &str) -> IResult<&str> {
 fn tag_node(s: &str) -> IResult<Node> {
    /// Match the start of an opening tag and return its name.
    fn name(s: &str) -> IResult<&str> {
-        preceded(tag("[anki:"), is_not("] \t\r\n"))(s)
+        preceded(tag("[anki:"), is_not("] \t\r\n")).parse(s)
    }

    /// Return a parser to match an opening `name` tag and return its options.
@ -138,31 +141,35 @@ fn tag_node(s: &str) -> IResult<Node> {
        /// empty.
        fn options(s: &str) -> IResult<Vec<(&str, &str)>> {
            fn key(s: &str) -> IResult<&str> {
-                is_not("] \t\r\n=")(s)
+                is_not("] \t\r\n=").parse(s)
            }

            fn val(s: &str) -> IResult<&str> {
                alt((
                    delimited(tag("\""), is_not0("\""), tag("\"")),
                    is_not0("] \t\r\n\""),
-                ))(s)
+                ))
+                .parse(s)
            }

-            many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s)
+            many0(trailing_whitespace0(separated_pair(key, tag("="), val))).parse(s)
        }

-        delimited(
-            pair(tag("[anki:"), trailing_whitespace0(tag(name))),
-            options,
-            tag("]"),
-        )
+        move |s| {
+            delimited(
+                pair(tag("[anki:"), trailing_whitespace0(tag(name))),
+                options,
+                tag("]"),
+            )
+            .parse(s)
+        }
    }

    /// Return a parser to match a closing `name` tag.
    fn closing_parser<'parser, 'name: 'parser, 's: 'parser>(
        name: &'name str,
    ) -> impl FnMut(&'s str) -> IResult<'s, ()> + 'parser {
-        value((), tuple((tag("[/anki:"), tag(name), tag("]"))))
+        move |s| value((), (tag("[/anki:"), tag(name), tag("]"))).parse(s)
    }

    /// Return a parser to match and return anything until a closing `name` tag
@ -170,12 +177,13 @@ fn tag_node(s: &str) -> IResult<Node> {
    fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
        name: &'name str,
    ) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser {
-        recognize(fold_many0(
-            pair(not(closing_parser(name)), take_till_potential_tag_start),
-            // we don't need to accumulate anything
-            || (),
-            |_, _| (),
-        ))
+        move |s| {
+            recognize(many0(pair(
+                not(closing_parser(name)),
+                take_till_potential_tag_start,
+            )))
+            .parse(s)
+        }
    }

    let (_, tag_name) = name(s)?;
@ -185,11 +193,12 @@ fn tag_node(s: &str) -> IResult<Node> {
            closing_parser(tag_name),
        ),
        |(options, content)| Node::Directive(Directive::new(tag_name, options, content)),
-    )(s)
+    )
+    .parse(s)
 }

 fn text_node(s: &str) -> IResult<Node> {
-    map(take_till_potential_tag_start, Node::Text)(s)
+    map(take_till_potential_tag_start, Node::Text).parse(s)
 }

 #[cfg(test)]
--- a/rslib/src/cloze.rs
+++ b/rslib/src/cloze.rs
@ -15,6 +15,7 @@ use nom::bytes::complete::tag;
 use nom::bytes::complete::take_while;
 use nom::combinator::map;
 use nom::IResult;
+use nom::Parser;
 use regex::Captures;
 use regex::Regex;

@ -72,7 +73,7 @@ fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
    }

    fn close_cloze(text: &str) -> IResult<&str, Token> {
-        map(tag("}}"), |_| Token::CloseCloze)(text)
+        map(tag("}}"), |_| Token::CloseCloze).parse(text)
    }

    /// Match a run of text until an open/close marker is encountered.
@ -87,7 +88,7 @@ fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
        // start with the no-match case
        let mut index = text.len();
        for (idx, _) in text.char_indices() {
-            if other_token(&text[idx..]).is_ok() {
+            if other_token.parse(&text[idx..]).is_ok() {
                index = idx;
                break;
            }
@ -99,8 +100,9 @@ fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
        if text.is_empty() {
            None
        } else {
-            let (remaining_text, token) =
-                alt((open_cloze, close_cloze, normal_text))(text).unwrap();
+            let (remaining_text, token) = alt((open_cloze, close_cloze, normal_text))
+                .parse(text)
+                .unwrap();
            text = remaining_text;
            Some(token)
        }
--- a/rslib/src/image_occlusion/imageocclusion.rs
+++ b/rslib/src/image_occlusion/imageocclusion.rs
@ -13,6 +13,7 @@ use nom::character::complete::char;
 use nom::error::ErrorKind;
 use nom::sequence::preceded;
 use nom::sequence::separated_pair;
+use nom::Parser;

 fn unescape(text: &str) -> String {
    text.replace("\\:", ":")
@ -22,11 +23,12 @@ pub fn parse_image_cloze(text: &str) -> Option<ImageOcclusionShape> {
    if let Some((shape, _)) = text.split_once(':') {
        let mut properties = vec![];
        let mut remaining = &text[shape.len()..];
-        while let Ok((rem, (name, value))) = separated_pair::<_, _, _, _, (_, ErrorKind), _, _, _>(
+        while let Ok((rem, (name, value))) = separated_pair::<_, _, _, (_, ErrorKind), _, _, _>(
            preceded(tag(":"), is_not("=")),
            tag("="),
            escaped(is_not("\\:"), '\\', char(':')),
-        )(remaining)
+        )
+        .parse(remaining)
        {
            remaining = rem;
            let value = unescape(value);
--- a/rslib/src/search/parser.rs
+++ b/rslib/src/search/parser.rs
@ -19,6 +19,7 @@ use nom::error::ErrorKind as NomErrorKind;
 use nom::multi::many0;
 use nom::sequence::preceded;
 use nom::sequence::separated_pair;
+use nom::Parser;
 use regex::Captures;
 use regex::Regex;

@ -202,18 +203,19 @@ fn group_inner(input: &str) -> IResult<Vec<Node>> {
 }

 fn whitespace0(s: &str) -> IResult<Vec<char>> {
-    many0(one_of(" \u{3000}"))(s)
+    many0(one_of(" \u{3000}")).parse(s)
 }

 /// Optional leading space, then a (negated) group or text
 fn node(s: &str) -> IResult<Node> {
-    preceded(whitespace0, alt((negated_node, group, text)))(s)
+    preceded(whitespace0, alt((negated_node, group, text))).parse(s)
 }

 fn negated_node(s: &str) -> IResult<Node> {
    map(preceded(char('-'), alt((group, text))), |node| {
        Node::Not(Box::new(node))
-    })(s)
+    })
+    .parse(s)
 }

 /// One or more nodes surrounded by brackets, eg (one OR two)
@ -233,7 +235,7 @@ fn group(s: &str) -> IResult<Node> {

 /// Either quoted or unquoted text
 fn text(s: &str) -> IResult<Node> {
-    alt((quoted_term, partially_quoted_term, unquoted_term))(s)
+    alt((quoted_term, partially_quoted_term, unquoted_term)).parse(s)
 }

 /// Quoted text, including the outer double quotes.
@ -248,7 +250,8 @@ fn partially_quoted_term(s: &str) -> IResult<Node> {
        escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(" \u{3000}")),
        char(':'),
        quoted_term_str,
-    )(s)?;
+    )
+    .parse(s)?;
    Ok((
        remaining,
        Node::Search(search_node_for_text_with_argument(key, val)?),
@ -296,7 +299,7 @@ fn unquoted_term(s: &str) -> IResult<Node> {
 fn quoted_term_str(s: &str) -> IResult<&str> {
    let (opened, _) = char('"')(s)?;
    if let Ok((tail, inner)) =
-        escaped::<_, ParseError, _, _, _, _>(is_not(r#""\"#), '\\', anychar)(opened)
+        escaped::<_, ParseError, _, _>(is_not(r#""\"#), '\\', anychar).parse(opened)
    {
        if let Ok((remaining, _)) = char::<_, ParseError>('"')(tail) {
            Ok((remaining, inner))
@ -321,7 +324,8 @@ fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
    // leading : is only possible error for well-formed input
    let (tail, head) = verify(escaped(is_not(r":\"), '\\', anychar), |t: &str| {
        !t.is_empty()
-    })(s)
+    })
+    .parse(s)
    .map_err(|_: nom::Err<ParseError>| parse_failure(s, FailKind::MissingKey))?;
    if tail.is_empty() {
        Ok(SearchNode::UnqualifiedText(unescape(head)?))
@ -407,7 +411,7 @@ fn parse_resched(s: &str) -> ParseResult<SearchNode> {

 /// eg prop:ivl>3, prop:ease!=2.5
 fn parse_prop(prop_clause: &str) -> ParseResult<SearchNode> {
-    let (tail, prop) = alt::<_, _, ParseError, _>((
+    let (tail, prop) = alt((
        tag("ivl"),
        tag("due"),
        tag("reps"),
@ -421,8 +425,9 @@ fn parse_prop(prop_clause: &str) -> ParseResult<SearchNode> {
        tag("r"),
        recognize(preceded(tag("cdn:"), alphanumeric1)),
        recognize(preceded(tag("cds:"), alphanumeric1)),
-    ))(prop_clause)
-    .map_err(|_| {
+    ))
+    .parse(prop_clause)
+    .map_err(|_: nom::Err<ParseError>| {
        parse_failure(
            prop_clause,
            FailKind::InvalidPropProperty {
@ -431,15 +436,16 @@ fn parse_prop(prop_clause: &str) -> ParseResult<SearchNode> {
        )
    })?;

-    let (num, operator) = alt::<_, _, ParseError, _>((
+    let (num, operator) = alt((
        tag("<="),
        tag(">="),
        tag("!="),
        tag("="),
        tag("<"),
        tag(">"),
-    ))(tail)
-    .map_err(|_| {
+    ))
+    .parse(tail)
+    .map_err(|_: nom::Err<ParseError>| {
        parse_failure(
            prop_clause,
            FailKind::InvalidPropOperator {
--- a/rslib/src/template.rs
+++ b/rslib/src/template.rs
@ -13,6 +13,7 @@ use nom::bytes::complete::tag;
 use nom::bytes::complete::take_until;
 use nom::combinator::map;
 use nom::sequence::delimited;
+use nom::Parser;
 use regex::Regex;

 use crate::cloze::cloze_number_in_fields;
@ -67,7 +68,8 @@ impl TemplateMode {
                tag(self.end_tag()),
            ),
            |out| classify_handle(out),
-        )(s)
+        )
+        .parse(s)
    }

    /// Return the next handlebar, comment or text token.
@ -127,7 +129,8 @@ fn comment_token(s: &str) -> nom::IResult<&str, Token> {
            tag(COMMENT_END),
        ),
        Token::Comment,
-    )(s)
+    )
+    .parse(s)
 }

 fn tokens(mut template: &str) -> impl Iterator<Item = TemplateResult<Token<'_>>> {