From cc395f7c448ee0ba268077a1e21fb8e0b80e99f4 Mon Sep 17 00:00:00 2001 From: llama Date: Sat, 21 Jun 2025 20:15:19 +0800 Subject: [PATCH] Upgrade to nom 8.0.0 (#4105) * bump nom to 8.0.0 * update cloze.rs * update template.rs * update imageocclusion.rs * update search/parser.rs * update card_rendering/parser.rs * replace use of fold_many0 with many0 in nom 8, `many0` doesn't accumulate when used within `recognize` --- Cargo.lock | 13 ++++- Cargo.toml | 2 +- cargo/licenses.json | 9 +++ rslib/src/card_rendering/parser.rs | 61 ++++++++++++--------- rslib/src/cloze.rs | 10 ++-- rslib/src/image_occlusion/imageocclusion.rs | 6 +- rslib/src/search/parser.rs | 32 ++++++----- rslib/src/template.rs | 7 ++- 8 files changed, 90 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 66173027b..03f9e63c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,7 +117,7 @@ dependencies = [ "id_tree", "inflections", "itertools 0.14.0", - "nom", + "nom 8.0.0", "num_cpus", "num_enum", "once_cell", @@ -4117,6 +4117,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "normpath" version = "1.3.0" @@ -6258,7 +6267,7 @@ dependencies = [ "bytesize", "lazy_static", "libc", - "nom", + "nom 7.1.3", "time", "winapi", ] diff --git a/Cargo.toml b/Cargo.toml index 980956b05..61cca8649 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,7 +93,7 @@ junction = "1.2.0" libc = "0.2" libc-stdhandle = "0.1" maplit = "1.0.2" -nom = "7.1.3" +nom = "8.0.0" num-format = "0.4.4" num_cpus = "1.17.0" num_enum = "0.7.3" diff --git a/cargo/licenses.json b/cargo/licenses.json index c16b20aa7..f2695ac76 100644 --- a/cargo/licenses.json +++ b/cargo/licenses.json @@ -2645,6 +2645,15 @@ "license_file": null, "description": "A byte-oriented, zero-copy, parser combinators library" }, + { + "name": "nom", + "version": "8.0.0", + "authors": "contact@geoffroycouprie.com", + "repository": "https://github.com/rust-bakery/nom", + "license": "MIT", + "license_file": null, + "description": "A byte-oriented, zero-copy, parser combinators library" + }, { "name": "ntapi", "version": "0.4.1", diff --git a/rslib/src/card_rendering/parser.rs b/rslib/src/card_rendering/parser.rs index 6f1cc662e..b124c069d 100644 --- a/rslib/src/card_rendering/parser.rs +++ b/rslib/src/card_rendering/parser.rs @@ -14,14 +14,14 @@ use nom::combinator::recognize; use nom::combinator::rest; use nom::combinator::success; use nom::combinator::value; -use nom::multi::fold_many0; use nom::multi::many0; use nom::sequence::delimited; use nom::sequence::pair; use nom::sequence::preceded; use nom::sequence::separated_pair; use nom::sequence::terminated; -use nom::sequence::tuple; +use nom::Input; +use nom::Parser; use super::CardNodes; use super::Directive; @@ -86,9 +86,12 @@ impl<'a> Directive<'a> { } /// Consume 0 or more of anything in " \t\r\n" after `parser`. -fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult<'s, O> +fn trailing_whitespace0(parser: P) -> impl Parser where - P: FnMut(&'s str) -> IResult<'s, O> + 'parser, + I: Input, + ::Item: nom::AsChar, + E: nom::error::ParseError, + P: Parser, { terminated(parser, multispace0) } @@ -97,11 +100,11 @@ where fn is_not0<'parser, 'arr: 'parser, 's: 'parser>( arr: &'arr str, ) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser { - alt((is_not(arr), success(""))) + move |s| alt((is_not(arr), success(""))).parse(s) } fn node(s: &str) -> IResult { - alt((sound_node, tag_node, text_node))(s) + alt((sound_node, tag_node, text_node)).parse(s) } /// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or @@ -110,11 +113,11 @@ fn sound_node(s: &str) -> IResult { map( delimited(tag("[sound:"), is_not("]"), tag("]")), Node::SoundOrVideo, - )(s) + ) + .parse(s) } fn take_till_potential_tag_start(s: &str) -> IResult<&str> { - use nom::InputTake; // first char could be '[', but wasn't part of a node, so skip (eof ends parse) let (after, offset) = anychar(s).map(|(s, c)| (s, c.len_utf8()))?; Ok(match after.find('[') { @@ -127,7 +130,7 @@ fn take_till_potential_tag_start(s: &str) -> IResult<&str> { fn tag_node(s: &str) -> IResult { /// Match the start of an opening tag and return its name. fn name(s: &str) -> IResult<&str> { - preceded(tag("[anki:"), is_not("] \t\r\n"))(s) + preceded(tag("[anki:"), is_not("] \t\r\n")).parse(s) } /// Return a parser to match an opening `name` tag and return its options. @@ -138,31 +141,35 @@ fn tag_node(s: &str) -> IResult { /// empty. fn options(s: &str) -> IResult> { fn key(s: &str) -> IResult<&str> { - is_not("] \t\r\n=")(s) + is_not("] \t\r\n=").parse(s) } fn val(s: &str) -> IResult<&str> { alt(( delimited(tag("\""), is_not0("\""), tag("\"")), is_not0("] \t\r\n\""), - ))(s) + )) + .parse(s) } - many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s) + many0(trailing_whitespace0(separated_pair(key, tag("="), val))).parse(s) } - delimited( - pair(tag("[anki:"), trailing_whitespace0(tag(name))), - options, - tag("]"), - ) + move |s| { + delimited( + pair(tag("[anki:"), trailing_whitespace0(tag(name))), + options, + tag("]"), + ) + .parse(s) + } } /// Return a parser to match a closing `name` tag. fn closing_parser<'parser, 'name: 'parser, 's: 'parser>( name: &'name str, ) -> impl FnMut(&'s str) -> IResult<'s, ()> + 'parser { - value((), tuple((tag("[/anki:"), tag(name), tag("]")))) + move |s| value((), (tag("[/anki:"), tag(name), tag("]"))).parse(s) } /// Return a parser to match and return anything until a closing `name` tag @@ -170,12 +177,13 @@ fn tag_node(s: &str) -> IResult { fn content_parser<'parser, 'name: 'parser, 's: 'parser>( name: &'name str, ) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser { - recognize(fold_many0( - pair(not(closing_parser(name)), take_till_potential_tag_start), - // we don't need to accumulate anything - || (), - |_, _| (), - )) + move |s| { + recognize(many0(pair( + not(closing_parser(name)), + take_till_potential_tag_start, + ))) + .parse(s) + } } let (_, tag_name) = name(s)?; @@ -185,11 +193,12 @@ fn tag_node(s: &str) -> IResult { closing_parser(tag_name), ), |(options, content)| Node::Directive(Directive::new(tag_name, options, content)), - )(s) + ) + .parse(s) } fn text_node(s: &str) -> IResult { - map(take_till_potential_tag_start, Node::Text)(s) + map(take_till_potential_tag_start, Node::Text).parse(s) } #[cfg(test)] diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs index f57d07ab0..208a2f4ed 100644 --- a/rslib/src/cloze.rs +++ b/rslib/src/cloze.rs @@ -15,6 +15,7 @@ use nom::bytes::complete::tag; use nom::bytes::complete::take_while; use nom::combinator::map; use nom::IResult; +use nom::Parser; use regex::Captures; use regex::Regex; @@ -72,7 +73,7 @@ fn tokenize(mut text: &str) -> impl Iterator { } fn close_cloze(text: &str) -> IResult<&str, Token> { - map(tag("}}"), |_| Token::CloseCloze)(text) + map(tag("}}"), |_| Token::CloseCloze).parse(text) } /// Match a run of text until an open/close marker is encountered. @@ -87,7 +88,7 @@ fn tokenize(mut text: &str) -> impl Iterator { // start with the no-match case let mut index = text.len(); for (idx, _) in text.char_indices() { - if other_token(&text[idx..]).is_ok() { + if other_token.parse(&text[idx..]).is_ok() { index = idx; break; } @@ -99,8 +100,9 @@ fn tokenize(mut text: &str) -> impl Iterator { if text.is_empty() { None } else { - let (remaining_text, token) = - alt((open_cloze, close_cloze, normal_text))(text).unwrap(); + let (remaining_text, token) = alt((open_cloze, close_cloze, normal_text)) + .parse(text) + .unwrap(); text = remaining_text; Some(token) } diff --git a/rslib/src/image_occlusion/imageocclusion.rs b/rslib/src/image_occlusion/imageocclusion.rs index 2ba83374f..e2eea9a39 100644 --- a/rslib/src/image_occlusion/imageocclusion.rs +++ b/rslib/src/image_occlusion/imageocclusion.rs @@ -13,6 +13,7 @@ use nom::character::complete::char; use nom::error::ErrorKind; use nom::sequence::preceded; use nom::sequence::separated_pair; +use nom::Parser; fn unescape(text: &str) -> String { text.replace("\\:", ":") @@ -22,11 +23,12 @@ pub fn parse_image_cloze(text: &str) -> Option { if let Some((shape, _)) = text.split_once(':') { let mut properties = vec![]; let mut remaining = &text[shape.len()..]; - while let Ok((rem, (name, value))) = separated_pair::<_, _, _, _, (_, ErrorKind), _, _, _>( + while let Ok((rem, (name, value))) = separated_pair::<_, _, _, (_, ErrorKind), _, _, _>( preceded(tag(":"), is_not("=")), tag("="), escaped(is_not("\\:"), '\\', char(':')), - )(remaining) + ) + .parse(remaining) { remaining = rem; let value = unescape(value); diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs index 93df4ea08..041ec4948 100644 --- a/rslib/src/search/parser.rs +++ b/rslib/src/search/parser.rs @@ -19,6 +19,7 @@ use nom::error::ErrorKind as NomErrorKind; use nom::multi::many0; use nom::sequence::preceded; use nom::sequence::separated_pair; +use nom::Parser; use regex::Captures; use regex::Regex; @@ -202,18 +203,19 @@ fn group_inner(input: &str) -> IResult> { } fn whitespace0(s: &str) -> IResult> { - many0(one_of(" \u{3000}"))(s) + many0(one_of(" \u{3000}")).parse(s) } /// Optional leading space, then a (negated) group or text fn node(s: &str) -> IResult { - preceded(whitespace0, alt((negated_node, group, text)))(s) + preceded(whitespace0, alt((negated_node, group, text))).parse(s) } fn negated_node(s: &str) -> IResult { map(preceded(char('-'), alt((group, text))), |node| { Node::Not(Box::new(node)) - })(s) + }) + .parse(s) } /// One or more nodes surrounded by brackets, eg (one OR two) @@ -233,7 +235,7 @@ fn group(s: &str) -> IResult { /// Either quoted or unquoted text fn text(s: &str) -> IResult { - alt((quoted_term, partially_quoted_term, unquoted_term))(s) + alt((quoted_term, partially_quoted_term, unquoted_term)).parse(s) } /// Quoted text, including the outer double quotes. @@ -248,7 +250,8 @@ fn partially_quoted_term(s: &str) -> IResult { escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(" \u{3000}")), char(':'), quoted_term_str, - )(s)?; + ) + .parse(s)?; Ok(( remaining, Node::Search(search_node_for_text_with_argument(key, val)?), @@ -296,7 +299,7 @@ fn unquoted_term(s: &str) -> IResult { fn quoted_term_str(s: &str) -> IResult<&str> { let (opened, _) = char('"')(s)?; if let Ok((tail, inner)) = - escaped::<_, ParseError, _, _, _, _>(is_not(r#""\"#), '\\', anychar)(opened) + escaped::<_, ParseError, _, _>(is_not(r#""\"#), '\\', anychar).parse(opened) { if let Ok((remaining, _)) = char::<_, ParseError>('"')(tail) { Ok((remaining, inner)) @@ -321,7 +324,8 @@ fn search_node_for_text(s: &str) -> ParseResult { // leading : is only possible error for well-formed input let (tail, head) = verify(escaped(is_not(r":\"), '\\', anychar), |t: &str| { !t.is_empty() - })(s) + }) + .parse(s) .map_err(|_: nom::Err| parse_failure(s, FailKind::MissingKey))?; if tail.is_empty() { Ok(SearchNode::UnqualifiedText(unescape(head)?)) @@ -407,7 +411,7 @@ fn parse_resched(s: &str) -> ParseResult { /// eg prop:ivl>3, prop:ease!=2.5 fn parse_prop(prop_clause: &str) -> ParseResult { - let (tail, prop) = alt::<_, _, ParseError, _>(( + let (tail, prop) = alt(( tag("ivl"), tag("due"), tag("reps"), @@ -421,8 +425,9 @@ fn parse_prop(prop_clause: &str) -> ParseResult { tag("r"), recognize(preceded(tag("cdn:"), alphanumeric1)), recognize(preceded(tag("cds:"), alphanumeric1)), - ))(prop_clause) - .map_err(|_| { + )) + .parse(prop_clause) + .map_err(|_: nom::Err| { parse_failure( prop_clause, FailKind::InvalidPropProperty { @@ -431,15 +436,16 @@ fn parse_prop(prop_clause: &str) -> ParseResult { ) })?; - let (num, operator) = alt::<_, _, ParseError, _>(( + let (num, operator) = alt(( tag("<="), tag(">="), tag("!="), tag("="), tag("<"), tag(">"), - ))(tail) - .map_err(|_| { + )) + .parse(tail) + .map_err(|_: nom::Err| { parse_failure( prop_clause, FailKind::InvalidPropOperator { diff --git a/rslib/src/template.rs b/rslib/src/template.rs index d09ade580..e3a900a2b 100644 --- a/rslib/src/template.rs +++ b/rslib/src/template.rs @@ -13,6 +13,7 @@ use nom::bytes::complete::tag; use nom::bytes::complete::take_until; use nom::combinator::map; use nom::sequence::delimited; +use nom::Parser; use regex::Regex; use crate::cloze::cloze_number_in_fields; @@ -67,7 +68,8 @@ impl TemplateMode { tag(self.end_tag()), ), |out| classify_handle(out), - )(s) + ) + .parse(s) } /// Return the next handlebar, comment or text token. @@ -127,7 +129,8 @@ fn comment_token(s: &str) -> nom::IResult<&str, Token> { tag(COMMENT_END), ), Token::Comment, - )(s) + ) + .parse(s) } fn tokens(mut template: &str) -> impl Iterator>> {