Upgrade to nom 8.0.0 (#4105)

* bump nom to 8.0.0

* update cloze.rs

* update template.rs

* update imageocclusion.rs

* update search/parser.rs

* update card_rendering/parser.rs

* replace use of fold_many0 with many0

in nom 8, `many0` doesn't accumulate when used within `recognize`
This commit is contained in:
llama 2025-06-21 20:15:19 +08:00 committed by GitHub
parent a4c95f5fbd
commit cc395f7c44
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 90 additions and 50 deletions

13
Cargo.lock generated
View file

@ -117,7 +117,7 @@ dependencies = [
"id_tree",
"inflections",
"itertools 0.14.0",
"nom",
"nom 8.0.0",
"num_cpus",
"num_enum",
"once_cell",
@ -4117,6 +4117,15 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]
[[package]]
name = "normpath"
version = "1.3.0"
@ -6258,7 +6267,7 @@ dependencies = [
"bytesize",
"lazy_static",
"libc",
"nom",
"nom 7.1.3",
"time",
"winapi",
]

View file

@ -93,7 +93,7 @@ junction = "1.2.0"
libc = "0.2"
libc-stdhandle = "0.1"
maplit = "1.0.2"
nom = "7.1.3"
nom = "8.0.0"
num-format = "0.4.4"
num_cpus = "1.17.0"
num_enum = "0.7.3"

View file

@ -2645,6 +2645,15 @@
"license_file": null,
"description": "A byte-oriented, zero-copy, parser combinators library"
},
{
"name": "nom",
"version": "8.0.0",
"authors": "contact@geoffroycouprie.com",
"repository": "https://github.com/rust-bakery/nom",
"license": "MIT",
"license_file": null,
"description": "A byte-oriented, zero-copy, parser combinators library"
},
{
"name": "ntapi",
"version": "0.4.1",

View file

@ -14,14 +14,14 @@ use nom::combinator::recognize;
use nom::combinator::rest;
use nom::combinator::success;
use nom::combinator::value;
use nom::multi::fold_many0;
use nom::multi::many0;
use nom::sequence::delimited;
use nom::sequence::pair;
use nom::sequence::preceded;
use nom::sequence::separated_pair;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::Input;
use nom::Parser;
use super::CardNodes;
use super::Directive;
@ -86,9 +86,12 @@ impl<'a> Directive<'a> {
}
/// Consume 0 or more of anything in " \t\r\n" after `parser`.
fn trailing_whitespace0<'parser, 's, P, O>(parser: P) -> impl FnMut(&'s str) -> IResult<'s, O>
fn trailing_whitespace0<I, O, E, P>(parser: P) -> impl Parser<I, Output = O, Error = E>
where
P: FnMut(&'s str) -> IResult<'s, O> + 'parser,
I: Input,
<I as Input>::Item: nom::AsChar,
E: nom::error::ParseError<I>,
P: Parser<I, Output = O, Error = E>,
{
terminated(parser, multispace0)
}
@ -97,11 +100,11 @@ where
fn is_not0<'parser, 'arr: 'parser, 's: 'parser>(
arr: &'arr str,
) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser {
alt((is_not(arr), success("")))
move |s| alt((is_not(arr), success(""))).parse(s)
}
fn node(s: &str) -> IResult<Node> {
alt((sound_node, tag_node, text_node))(s)
alt((sound_node, tag_node, text_node)).parse(s)
}
/// A sound tag `[sound:resource]`, where `resource` is pointing to a sound or
@ -110,11 +113,11 @@ fn sound_node(s: &str) -> IResult<Node> {
map(
delimited(tag("[sound:"), is_not("]"), tag("]")),
Node::SoundOrVideo,
)(s)
)
.parse(s)
}
fn take_till_potential_tag_start(s: &str) -> IResult<&str> {
use nom::InputTake;
// first char could be '[', but wasn't part of a node, so skip (eof ends parse)
let (after, offset) = anychar(s).map(|(s, c)| (s, c.len_utf8()))?;
Ok(match after.find('[') {
@ -127,7 +130,7 @@ fn take_till_potential_tag_start(s: &str) -> IResult<&str> {
fn tag_node(s: &str) -> IResult<Node> {
/// Match the start of an opening tag and return its name.
fn name(s: &str) -> IResult<&str> {
preceded(tag("[anki:"), is_not("] \t\r\n"))(s)
preceded(tag("[anki:"), is_not("] \t\r\n")).parse(s)
}
/// Return a parser to match an opening `name` tag and return its options.
@ -138,31 +141,35 @@ fn tag_node(s: &str) -> IResult<Node> {
/// empty.
fn options(s: &str) -> IResult<Vec<(&str, &str)>> {
fn key(s: &str) -> IResult<&str> {
is_not("] \t\r\n=")(s)
is_not("] \t\r\n=").parse(s)
}
fn val(s: &str) -> IResult<&str> {
alt((
delimited(tag("\""), is_not0("\""), tag("\"")),
is_not0("] \t\r\n\""),
))(s)
))
.parse(s)
}
many0(trailing_whitespace0(separated_pair(key, tag("="), val)))(s)
many0(trailing_whitespace0(separated_pair(key, tag("="), val))).parse(s)
}
delimited(
pair(tag("[anki:"), trailing_whitespace0(tag(name))),
options,
tag("]"),
)
move |s| {
delimited(
pair(tag("[anki:"), trailing_whitespace0(tag(name))),
options,
tag("]"),
)
.parse(s)
}
}
/// Return a parser to match a closing `name` tag.
fn closing_parser<'parser, 'name: 'parser, 's: 'parser>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<'s, ()> + 'parser {
value((), tuple((tag("[/anki:"), tag(name), tag("]"))))
move |s| value((), (tag("[/anki:"), tag(name), tag("]"))).parse(s)
}
/// Return a parser to match and return anything until a closing `name` tag
@ -170,12 +177,13 @@ fn tag_node(s: &str) -> IResult<Node> {
fn content_parser<'parser, 'name: 'parser, 's: 'parser>(
name: &'name str,
) -> impl FnMut(&'s str) -> IResult<'s, &'s str> + 'parser {
recognize(fold_many0(
pair(not(closing_parser(name)), take_till_potential_tag_start),
// we don't need to accumulate anything
|| (),
|_, _| (),
))
move |s| {
recognize(many0(pair(
not(closing_parser(name)),
take_till_potential_tag_start,
)))
.parse(s)
}
}
let (_, tag_name) = name(s)?;
@ -185,11 +193,12 @@ fn tag_node(s: &str) -> IResult<Node> {
closing_parser(tag_name),
),
|(options, content)| Node::Directive(Directive::new(tag_name, options, content)),
)(s)
)
.parse(s)
}
fn text_node(s: &str) -> IResult<Node> {
map(take_till_potential_tag_start, Node::Text)(s)
map(take_till_potential_tag_start, Node::Text).parse(s)
}
#[cfg(test)]

View file

@ -15,6 +15,7 @@ use nom::bytes::complete::tag;
use nom::bytes::complete::take_while;
use nom::combinator::map;
use nom::IResult;
use nom::Parser;
use regex::Captures;
use regex::Regex;
@ -72,7 +73,7 @@ fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
}
fn close_cloze(text: &str) -> IResult<&str, Token> {
map(tag("}}"), |_| Token::CloseCloze)(text)
map(tag("}}"), |_| Token::CloseCloze).parse(text)
}
/// Match a run of text until an open/close marker is encountered.
@ -87,7 +88,7 @@ fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
// start with the no-match case
let mut index = text.len();
for (idx, _) in text.char_indices() {
if other_token(&text[idx..]).is_ok() {
if other_token.parse(&text[idx..]).is_ok() {
index = idx;
break;
}
@ -99,8 +100,9 @@ fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
if text.is_empty() {
None
} else {
let (remaining_text, token) =
alt((open_cloze, close_cloze, normal_text))(text).unwrap();
let (remaining_text, token) = alt((open_cloze, close_cloze, normal_text))
.parse(text)
.unwrap();
text = remaining_text;
Some(token)
}

View file

@ -13,6 +13,7 @@ use nom::character::complete::char;
use nom::error::ErrorKind;
use nom::sequence::preceded;
use nom::sequence::separated_pair;
use nom::Parser;
fn unescape(text: &str) -> String {
text.replace("\\:", ":")
@ -22,11 +23,12 @@ pub fn parse_image_cloze(text: &str) -> Option<ImageOcclusionShape> {
if let Some((shape, _)) = text.split_once(':') {
let mut properties = vec![];
let mut remaining = &text[shape.len()..];
while let Ok((rem, (name, value))) = separated_pair::<_, _, _, _, (_, ErrorKind), _, _, _>(
while let Ok((rem, (name, value))) = separated_pair::<_, _, _, (_, ErrorKind), _, _, _>(
preceded(tag(":"), is_not("=")),
tag("="),
escaped(is_not("\\:"), '\\', char(':')),
)(remaining)
)
.parse(remaining)
{
remaining = rem;
let value = unescape(value);

View file

@ -19,6 +19,7 @@ use nom::error::ErrorKind as NomErrorKind;
use nom::multi::many0;
use nom::sequence::preceded;
use nom::sequence::separated_pair;
use nom::Parser;
use regex::Captures;
use regex::Regex;
@ -202,18 +203,19 @@ fn group_inner(input: &str) -> IResult<Vec<Node>> {
}
fn whitespace0(s: &str) -> IResult<Vec<char>> {
many0(one_of(" \u{3000}"))(s)
many0(one_of(" \u{3000}")).parse(s)
}
/// Optional leading space, then a (negated) group or text
fn node(s: &str) -> IResult<Node> {
preceded(whitespace0, alt((negated_node, group, text)))(s)
preceded(whitespace0, alt((negated_node, group, text))).parse(s)
}
fn negated_node(s: &str) -> IResult<Node> {
map(preceded(char('-'), alt((group, text))), |node| {
Node::Not(Box::new(node))
})(s)
})
.parse(s)
}
/// One or more nodes surrounded by brackets, eg (one OR two)
@ -233,7 +235,7 @@ fn group(s: &str) -> IResult<Node> {
/// Either quoted or unquoted text
fn text(s: &str) -> IResult<Node> {
alt((quoted_term, partially_quoted_term, unquoted_term))(s)
alt((quoted_term, partially_quoted_term, unquoted_term)).parse(s)
}
/// Quoted text, including the outer double quotes.
@ -248,7 +250,8 @@ fn partially_quoted_term(s: &str) -> IResult<Node> {
escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(" \u{3000}")),
char(':'),
quoted_term_str,
)(s)?;
)
.parse(s)?;
Ok((
remaining,
Node::Search(search_node_for_text_with_argument(key, val)?),
@ -296,7 +299,7 @@ fn unquoted_term(s: &str) -> IResult<Node> {
fn quoted_term_str(s: &str) -> IResult<&str> {
let (opened, _) = char('"')(s)?;
if let Ok((tail, inner)) =
escaped::<_, ParseError, _, _, _, _>(is_not(r#""\"#), '\\', anychar)(opened)
escaped::<_, ParseError, _, _>(is_not(r#""\"#), '\\', anychar).parse(opened)
{
if let Ok((remaining, _)) = char::<_, ParseError>('"')(tail) {
Ok((remaining, inner))
@ -321,7 +324,8 @@ fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
// leading : is only possible error for well-formed input
let (tail, head) = verify(escaped(is_not(r":\"), '\\', anychar), |t: &str| {
!t.is_empty()
})(s)
})
.parse(s)
.map_err(|_: nom::Err<ParseError>| parse_failure(s, FailKind::MissingKey))?;
if tail.is_empty() {
Ok(SearchNode::UnqualifiedText(unescape(head)?))
@ -407,7 +411,7 @@ fn parse_resched(s: &str) -> ParseResult<SearchNode> {
/// eg prop:ivl>3, prop:ease!=2.5
fn parse_prop(prop_clause: &str) -> ParseResult<SearchNode> {
let (tail, prop) = alt::<_, _, ParseError, _>((
let (tail, prop) = alt((
tag("ivl"),
tag("due"),
tag("reps"),
@ -421,8 +425,9 @@ fn parse_prop(prop_clause: &str) -> ParseResult<SearchNode> {
tag("r"),
recognize(preceded(tag("cdn:"), alphanumeric1)),
recognize(preceded(tag("cds:"), alphanumeric1)),
))(prop_clause)
.map_err(|_| {
))
.parse(prop_clause)
.map_err(|_: nom::Err<ParseError>| {
parse_failure(
prop_clause,
FailKind::InvalidPropProperty {
@ -431,15 +436,16 @@ fn parse_prop(prop_clause: &str) -> ParseResult<SearchNode> {
)
})?;
let (num, operator) = alt::<_, _, ParseError, _>((
let (num, operator) = alt((
tag("<="),
tag(">="),
tag("!="),
tag("="),
tag("<"),
tag(">"),
))(tail)
.map_err(|_| {
))
.parse(tail)
.map_err(|_: nom::Err<ParseError>| {
parse_failure(
prop_clause,
FailKind::InvalidPropOperator {

View file

@ -13,6 +13,7 @@ use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::combinator::map;
use nom::sequence::delimited;
use nom::Parser;
use regex::Regex;
use crate::cloze::cloze_number_in_fields;
@ -67,7 +68,8 @@ impl TemplateMode {
tag(self.end_tag()),
),
|out| classify_handle(out),
)(s)
)
.parse(s)
}
/// Return the next handlebar, comment or text token.
@ -127,7 +129,8 @@ fn comment_token(s: &str) -> nom::IResult<&str, Token> {
tag(COMMENT_END),
),
Token::Comment,
)(s)
)
.parse(s)
}
fn tokens(mut template: &str) -> impl Iterator<Item = TemplateResult<Token<'_>>> {