From f623f19b3d4bfffed9da76850eb644a79e103cc7 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Sun, 15 Mar 2020 19:11:45 +1000 Subject: [PATCH] basic search parsing --- rslib/src/lib.rs | 1 + rslib/src/search/mod.rs | 1 + rslib/src/search/parser.rs | 132 +++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 rslib/src/search/mod.rs create mode 100644 rslib/src/search/parser.rs diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index 8f815dce8..f66dd1bd6 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -19,6 +19,7 @@ pub mod log; pub mod media; pub mod notes; pub mod sched; +pub mod search; pub mod storage; pub mod template; pub mod template_filters; diff --git a/rslib/src/search/mod.rs b/rslib/src/search/mod.rs new file mode 100644 index 000000000..b93e263bb --- /dev/null +++ b/rslib/src/search/mod.rs @@ -0,0 +1 @@ +mod parser; diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs new file mode 100644 index 000000000..3517fa817 --- /dev/null +++ b/rslib/src/search/parser.rs @@ -0,0 +1,132 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use nom::branch::alt; +use nom::bytes::complete::{escaped, is_not, take_while1}; +use nom::character::complete::{char, one_of}; +use nom::combinator::{all_consuming, map}; +use nom::sequence::{delimited, preceded}; +use nom::{multi::many0, IResult}; + +#[derive(Debug, PartialEq)] +pub(super) enum Node<'a> { + And, + Or, + Not(Box>), + Group(Vec>), + Text(&'a str), +} + +/// Parse the input string into a list of nodes. +#[allow(dead_code)] +pub(super) fn parse(input: &str) -> std::result::Result, String> { + let (_, nodes) = all_consuming(group_inner)(input).map_err(|e| format!("{:?}", e))?; + Ok(nodes) +} + +/// One or more nodes surrounded by brackets, eg (one OR two) +fn group(s: &str) -> IResult<&str, Node> { + map(delimited(char('('), group_inner, char(')')), |nodes| { + Node::Group(nodes) + })(s) +} + +/// One or more nodes inside brackets, er 'one OR two -three' +fn group_inner(input: &str) -> IResult<&str, Vec> { + let mut remaining = input; + let mut nodes = vec![]; + + loop { + match node(remaining) { + Ok((rem, node)) => { + remaining = rem; + + if nodes.len() % 2 == 0 { + // before adding the node, if the length is even then the node + // must not be a boolean + if matches!(node, Node::And | Node::Or) { + return Err(nom::Err::Failure(("", nom::error::ErrorKind::NoneOf))); + } + } else { + // if the length is odd, the next item must be a boolean. if it's + // not, add an implicit and + if !matches!(node, Node::And | Node::Or) { + nodes.push(Node::And); + } + } + nodes.push(node); + } + Err(e) => match e { + nom::Err::Error(_) => break, + _ => return Err(e), + }, + }; + } + + Ok((remaining, nodes)) +} + +/// Optional leading space, then a (negated) group or text +fn node(s: &str) -> IResult<&str, Node> { + let whitespace0 = many0(one_of(" \u{3000}")); + preceded(whitespace0, alt((negated_node, group, text)))(s) +} + +fn negated_node(s: &str) -> IResult<&str, Node> { + map(preceded(char('-'), alt((group, text))), |node| { + Node::Not(Box::new(node)) + })(s) +} + +/// Either quoted or unquoted text +fn text(s: &str) -> IResult<&str, Node> { + alt((quoted_term, unquoted_term))(s) +} + +/// Unquoted text, terminated by a space or ) +fn unquoted_term(s: &str) -> IResult<&str, Node> { + map(take_while1(|c| c != ' ' && c != ')'), |text: &str| { + if text.len() == 2 && text.to_ascii_lowercase() == "or" { + Node::Or + } else if text.len() == 3 && text.to_ascii_lowercase() == "and" { + Node::And + } else { + Node::Text(text) + } + })(s) +} + +// Quoted text, including the outer double quotes. +fn quoted_term(s: &str) -> IResult<&str, Node> { + delimited(char('"'), quoted_term_inner, char('"'))(s) +} + +/// Quoted text, terminated by a non-escaped double quote +/// Can escape :, " and \ +fn quoted_term_inner(s: &str) -> IResult<&str, Node> { + map(escaped(is_not(r#""\"#), '\\', one_of(r#"":\"#)), |o| { + Node::Text(o) + })(s) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn parsing() -> Result<(), String> { + use Node::*; + assert_eq!( + parse(r#"hello -(world and "foo bar") OR test"#)?, + vec![ + Text("hello"), + And, + Not(Box::new(Group(vec![Text("world"), And, Text("foo bar")]))), + Or, + Text("test") + ] + ); + + Ok(()) + } +}