From 08d205d37770fa0cacf409841cf14916be5b3ccc Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Mon, 16 Mar 2020 09:48:22 +1000 Subject: [PATCH] decode search terms in parser --- rslib/src/search/parser.rs | 301 +++++++++++++++++++++++++++++++++---- 1 file changed, 269 insertions(+), 32 deletions(-) diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs index fc2e4ea98..b3768aaae 100644 --- a/rslib/src/search/parser.rs +++ b/rslib/src/search/parser.rs @@ -1,13 +1,39 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use crate::types::ObjID; use nom::branch::alt; -use nom::bytes::complete::{escaped, is_not, take_while1}; +use nom::bytes::complete::{escaped, is_not, tag, take_while1}; use nom::character::complete::{char, one_of}; -use nom::combinator::{all_consuming, map}; +use nom::character::is_digit; +use nom::combinator::{all_consuming, map, map_res}; use nom::sequence::{delimited, preceded}; use nom::{multi::many0, IResult}; -use std::borrow::Cow; +use std::{borrow::Cow, num}; + +// fixme: need to preserve \ when used twice in string + +struct ParseError {} + +impl From for ParseError { + fn from(_: num::ParseIntError) -> Self { + ParseError {} + } +} + +impl From for ParseError { + fn from(_: num::ParseFloatError) -> Self { + ParseError {} + } +} + +impl From> for ParseError { + fn from(_: nom::Err<(I, nom::error::ErrorKind)>) -> Self { + ParseError {} + } +} + +type ParseResult = Result; #[derive(Debug, PartialEq)] pub(super) enum Node<'a> { @@ -15,11 +41,59 @@ pub(super) enum Node<'a> { Or, Not(Box>), Group(Vec>), + Search(SearchNode<'a>), +} + +#[derive(Debug, PartialEq)] +pub(super) enum SearchNode<'a> { + // text without a colon UnqualifiedText(Cow<'a, str>), - QualifiedText { - key: Cow<'a, str>, - val: Cow<'a, str>, + // foo:bar, where foo doesn't match a term below + SingleField { + field: Cow<'a, str>, + text: Cow<'a, str>, }, + AddedInDays(u32), + CardTemplate(Cow<'a, str>), + Deck(Cow<'a, str>), + NoteTypeID(ObjID), + NoteType(Cow<'a, str>), + Rated { + days: u32, + ease: Option, + }, + Tag(Cow<'a, str>), + Duplicates { + note_type_id: ObjID, + text: String, + }, + State(StateKind), + Flag(u8), + NoteIDs(Cow<'a, str>), + CardIDs(Cow<'a, str>), + Property { + operator: String, + kind: PropertyKind, + }, +} + +#[derive(Debug, PartialEq)] +pub(super) enum PropertyKind { + Due(i32), + Interval(u32), + Reps(u32), + Lapses(u32), + Ease(f32), +} + +#[derive(Debug, PartialEq)] +pub(super) enum StateKind { + New, + Review, + Learning, + Due, + Buried, + Suspended, } /// Parse the input string into a list of nodes. @@ -89,7 +163,7 @@ fn text(s: &str) -> IResult<&str, Node> { } /// Determine if text is a qualified search, and handle escaped chars. -fn node_for_text(s: &str) -> Node { +fn search_node_for_text(s: &str) -> ParseResult { let mut it = s.splitn(2, ':'); let (head, tail) = ( without_escapes(it.next().unwrap()), @@ -97,15 +171,13 @@ fn node_for_text(s: &str) -> Node { ); if let Some(tail) = tail { - Node::QualifiedText { - key: head, - val: tail, - } + search_node_for_text_with_argument(head, tail) } else { - Node::UnqualifiedText(head) + Ok(SearchNode::UnqualifiedText(head)) } } +/// Strip the \ escaping character fn without_escapes(s: &str) -> Cow { if s.find('\\').is_some() { s.replace('\\', "").into() @@ -116,18 +188,21 @@ fn without_escapes(s: &str) -> Cow { /// Unquoted text, terminated by a space or ) fn unquoted_term(s: &str) -> IResult<&str, Node> { - map(take_while1(|c| c != ' ' && c != ')'), |text: &str| { - if text.len() == 2 && text.to_ascii_lowercase() == "or" { - Node::Or - } else if text.len() == 3 && text.to_ascii_lowercase() == "and" { - Node::And - } else { - node_for_text(text) - } - })(s) + map_res( + take_while1(|c| c != ' ' && c != ')'), + |text: &str| -> ParseResult { + Ok(if text.len() == 2 && text.to_ascii_lowercase() == "or" { + Node::Or + } else if text.len() == 3 && text.to_ascii_lowercase() == "and" { + Node::And + } else { + Node::Search(search_node_for_text(text)?) + }) + }, + )(s) } -// Quoted text, including the outer double quotes. +/// Quoted text, including the outer double quotes. fn quoted_term(s: &str) -> IResult<&str, Node> { delimited(char('"'), quoted_term_inner, char('"'))(s) } @@ -135,9 +210,136 @@ fn quoted_term(s: &str) -> IResult<&str, Node> { /// Quoted text, terminated by a non-escaped double quote /// Can escape " and \ fn quoted_term_inner(s: &str) -> IResult<&str, Node> { - map(escaped(is_not(r#""\"#), '\\', one_of(r#""\"#)), |o| { - node_for_text(o) - })(s) + map_res( + escaped(is_not(r#""\"#), '\\', one_of(r#""\"#)), + |o| -> ParseResult { Ok(Node::Search(search_node_for_text(o)?)) }, + )(s) +} + +/// Convert a colon-separated key/val pair into the relevant search type. +fn search_node_for_text_with_argument<'a>( + key: Cow<'a, str>, + val: Cow<'a, str>, +) -> ParseResult> { + Ok(match key.to_ascii_lowercase().as_str() { + "added" => SearchNode::AddedInDays(val.parse()?), + "card" => SearchNode::CardTemplate(val), + "deck" => SearchNode::Deck(val), + "note" => SearchNode::NoteType(val), + "tag" => SearchNode::Tag(val), + "mid" => SearchNode::NoteTypeID(val.parse()?), + "nid" => SearchNode::NoteIDs(check_id_list(val)?), + "cid" => SearchNode::CardIDs(check_id_list(val)?), + "is" => parse_state(val.as_ref())?, + "flag" => parse_flag(val.as_ref())?, + "rated" => parse_rated(val.as_ref())?, + "dupes" => parse_dupes(val.as_ref())?, + "prop" => parse_prop(val.as_ref())?, + + // anything else is a field search + _ => SearchNode::SingleField { + field: key, + text: val, + }, + }) +} + +/// ensure a list of ids contains only numbers and commas, returning unchanged if true +/// used by nid: and cid: +fn check_id_list(s: Cow) -> ParseResult> { + if s.as_bytes().iter().any(|&c| !is_digit(c) && c != b',') { + Err(ParseError {}) + } else { + Ok(s) + } +} + +/// eg is:due +fn parse_state(s: &str) -> ParseResult> { + use StateKind::*; + Ok(SearchNode::State(match s { + "new" => New, + "review" => Review, + "learn" => Learning, + "due" => Due, + "buried" => Buried, + "suspended" => Suspended, + _ => return Err(ParseError {}), + })) +} + +/// flag:0-4 +fn parse_flag(s: &str) -> ParseResult> { + let n: u8 = s.parse()?; + if n > 4 { + Err(ParseError {}) + } else { + Ok(SearchNode::Flag(n)) + } +} + +/// eg rated:3 or rated:10:2 +fn parse_rated(val: &str) -> ParseResult> { + let mut it = val.splitn(2, ':'); + let days = it.next().unwrap().parse()?; + let ease = match it.next() { + Some(v) => Some(v.parse()?), + None => None, + }; + + Ok(SearchNode::Rated { days, ease }) +} + +/// eg dupes:1231,hello +fn parse_dupes(val: &str) -> ParseResult> { + let mut it = val.splitn(2, ","); + let mid: ObjID = it.next().unwrap().parse()?; + let text = it.next().ok_or(ParseError {})?; + Ok(SearchNode::Duplicates { + note_type_id: mid, + text: text.into(), + }) +} + +/// eg prop:ivl>3, prop:ease!=2.5 +fn parse_prop(val: &str) -> ParseResult> { + let (val, key) = alt(( + tag("ivl"), + tag("due"), + tag("reps"), + tag("lapses"), + tag("ease"), + ))(val)?; + + let (val, operator) = alt(( + tag("<="), + tag(">="), + tag("!="), + tag("="), + tag("<"), + tag(">"), + ))(val)?; + + let kind = if key == "ease" { + let num: f32 = val.parse()?; + PropertyKind::Ease(num) + } else if key == "due" { + let num: i32 = val.parse()?; + PropertyKind::Due(num) + } else { + let num: u32 = val.parse()?; + match key { + "ivl" => PropertyKind::Interval(num), + "reps" => PropertyKind::Reps(num), + "lapses" => PropertyKind::Lapses(num), + _ => unreachable!(), + } + }; + + Ok(SearchNode::Property { + operator: operator.to_string(), + kind, + }) } #[cfg(test)] @@ -147,25 +349,60 @@ mod test { #[test] fn parsing() -> Result<(), String> { use Node::*; + use SearchNode::*; assert_eq!( parse(r#"hello -(world and "foo:bar baz") OR test"#)?, vec![ - UnqualifiedText("hello".into()), + Search(UnqualifiedText("hello".into())), And, Not(Box::new(Group(vec![ - UnqualifiedText("world".into()), + Search(UnqualifiedText("world".into())), And, - QualifiedText { - key: "foo".into(), - val: "bar baz".into() - } + Search(SingleField { + field: "foo".into(), + text: "bar baz".into() + }) ]))), Or, - UnqualifiedText("test".into()) + Search(UnqualifiedText("test".into())) ] ); + assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]); + assert_eq!( + parse("card:front")?, + vec![Search(CardTemplate("front".into()))] + ); + assert_eq!(parse("deck:default")?, vec![Search(Deck("default".into()))]); + assert_eq!(parse("note:basic")?, vec![Search(NoteType("basic".into()))]); + assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".into()))]); + assert_eq!( + parse("nid:1237123712,2,3")?, + vec![Search(NoteIDs("1237123712,2,3".into()))] + ); + assert!(parse("nid:1237123712_2,3").is_err()); + assert_eq!(parse("is:due")?, vec![Search(State(StateKind::Due))]); + assert_eq!(parse("flag:3")?, vec![Search(Flag(3))]); + assert!(parse("flag:-1").is_err()); + assert!(parse("flag:5").is_err()); + + assert_eq!( + parse("prop:ivl>3")?, + vec![Search(Property { + operator: ">".into(), + kind: PropertyKind::Interval(3) + })] + ); + assert!(parse("prop:ivl>3.3").is_err()); + assert_eq!( + parse("prop:ease<=3.3")?, + vec![Search(Property { + operator: "<=".into(), + kind: PropertyKind::Ease(3.3) + })] + ); + Ok(()) } }