mirror of
https://github.com/ankitects/anki.git
synced 2025-12-10 21:36:55 -05:00
Fix bugs and inconsistencies in the search parser
This commit is contained in:
parent
92e516c6b0
commit
0cff65e5a8
3 changed files with 240 additions and 177 deletions
|
|
@ -9,16 +9,15 @@ use crate::{
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{escaped, is_not, tag, take_while1},
|
bytes::complete::{escaped, is_not, tag},
|
||||||
character::complete::{anychar, char, one_of},
|
character::complete::{anychar, char, none_of, one_of},
|
||||||
combinator::{all_consuming, map, map_res},
|
combinator::{all_consuming, map, map_res},
|
||||||
sequence::{delimited, preceded, tuple},
|
sequence::{delimited, preceded, separated_pair},
|
||||||
{multi::many0, IResult},
|
{multi::many0, IResult},
|
||||||
};
|
};
|
||||||
use regex::Regex;
|
use regex::{Captures, Regex};
|
||||||
use std::{borrow::Cow, num};
|
use std::{borrow::Cow, num};
|
||||||
|
|
||||||
// fixme: need to preserve \ when used twice in string
|
|
||||||
|
|
||||||
struct ParseError {}
|
struct ParseError {}
|
||||||
|
|
||||||
|
|
@ -42,6 +41,12 @@ impl<I> From<nom::Err<(I, nom::error::ErrorKind)>> for ParseError {
|
||||||
|
|
||||||
type ParseResult<T> = std::result::Result<T, ParseError>;
|
type ParseResult<T> = std::result::Result<T, ParseError>;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(super) enum OptionalRe<'a> {
|
||||||
|
Text(Cow<'a, str>),
|
||||||
|
Re(Cow<'a, str>),
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(super) enum Node<'a> {
|
pub(super) enum Node<'a> {
|
||||||
And,
|
And,
|
||||||
|
|
@ -57,30 +62,30 @@ pub(super) enum SearchNode<'a> {
|
||||||
UnqualifiedText(Cow<'a, str>),
|
UnqualifiedText(Cow<'a, str>),
|
||||||
// foo:bar, where foo doesn't match a term below
|
// foo:bar, where foo doesn't match a term below
|
||||||
SingleField {
|
SingleField {
|
||||||
field: Cow<'a, str>,
|
field: OptionalRe<'a>,
|
||||||
text: Cow<'a, str>,
|
text: Cow<'a, str>,
|
||||||
is_re: bool,
|
is_re: bool,
|
||||||
},
|
},
|
||||||
AddedInDays(u32),
|
AddedInDays(u32),
|
||||||
EditedInDays(u32),
|
EditedInDays(u32),
|
||||||
CardTemplate(TemplateKind),
|
CardTemplate(TemplateKind<'a>),
|
||||||
Deck(Cow<'a, str>),
|
Deck(String),
|
||||||
DeckID(DeckID),
|
DeckID(DeckID),
|
||||||
NoteTypeID(NoteTypeID),
|
NoteTypeID(NoteTypeID),
|
||||||
NoteType(Cow<'a, str>),
|
NoteType(OptionalRe<'a>),
|
||||||
Rated {
|
Rated {
|
||||||
days: u32,
|
days: u32,
|
||||||
ease: Option<u8>,
|
ease: Option<u8>,
|
||||||
},
|
},
|
||||||
Tag(Cow<'a, str>),
|
Tag(OptionalRe<'a>),
|
||||||
Duplicates {
|
Duplicates {
|
||||||
note_type_id: NoteTypeID,
|
note_type_id: NoteTypeID,
|
||||||
text: String,
|
text: Cow<'a, str>,
|
||||||
},
|
},
|
||||||
State(StateKind),
|
State(StateKind),
|
||||||
Flag(u8),
|
Flag(u8),
|
||||||
NoteIDs(Cow<'a, str>),
|
NoteIDs(&'a str),
|
||||||
CardIDs(Cow<'a, str>),
|
CardIDs(&'a str),
|
||||||
Property {
|
Property {
|
||||||
operator: String,
|
operator: String,
|
||||||
kind: PropertyKind,
|
kind: PropertyKind,
|
||||||
|
|
@ -88,7 +93,7 @@ pub(super) enum SearchNode<'a> {
|
||||||
WholeCollection,
|
WholeCollection,
|
||||||
Regex(Cow<'a, str>),
|
Regex(Cow<'a, str>),
|
||||||
NoCombining(Cow<'a, str>),
|
NoCombining(Cow<'a, str>),
|
||||||
WordBoundary(Cow<'a, str>),
|
WordBoundary(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
|
@ -113,9 +118,9 @@ pub(super) enum StateKind {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(super) enum TemplateKind {
|
pub(super) enum TemplateKind<'a> {
|
||||||
Ordinal(u16),
|
Ordinal(u16),
|
||||||
Name(String),
|
Name(OptionalRe<'a>),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the input string into a list of nodes.
|
/// Parse the input string into a list of nodes.
|
||||||
|
|
@ -127,7 +132,6 @@ pub(super) fn parse(input: &str) -> Result<Vec<Node>> {
|
||||||
|
|
||||||
let (_, nodes) =
|
let (_, nodes) =
|
||||||
all_consuming(group_inner)(input).map_err(|_e| AnkiError::SearchError(None))?;
|
all_consuming(group_inner)(input).map_err(|_e| AnkiError::SearchError(None))?;
|
||||||
|
|
||||||
Ok(nodes)
|
Ok(nodes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -184,7 +188,7 @@ fn group_inner(input: &str) -> IResult<&str, Vec<Node>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn whitespace0(s: &str) -> IResult<&str, Vec<char>> {
|
fn whitespace0(s: &str) -> IResult<&str, Vec<char>> {
|
||||||
many0(one_of(" \u{3000}"))(s)
|
many0(one_of(" \u{3000}\t\n"))(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Optional leading space, then a (negated) group or text
|
/// Optional leading space, then a (negated) group or text
|
||||||
|
|
@ -205,32 +209,18 @@ fn text(s: &str) -> IResult<&str, Node> {
|
||||||
|
|
||||||
/// Determine if text is a qualified search, and handle escaped chars.
|
/// Determine if text is a qualified search, and handle escaped chars.
|
||||||
fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
|
fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
|
||||||
let mut it = s.splitn(2, ':');
|
let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
|
||||||
let (head, tail) = (
|
if tail.is_empty() {
|
||||||
unescape_quotes(it.next().unwrap()),
|
Ok(SearchNode::UnqualifiedText(unescape_to_glob(head)?))
|
||||||
it.next().map(unescape_quotes),
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Some(tail) = tail {
|
|
||||||
search_node_for_text_with_argument(head, tail)
|
|
||||||
} else {
|
} else {
|
||||||
Ok(SearchNode::UnqualifiedText(head))
|
search_node_for_text_with_argument(head, &tail[1..])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \" -> "
|
/// Unquoted text, terminated by whitespace or unescaped ", ( or )
|
||||||
fn unescape_quotes(s: &str) -> Cow<str> {
|
|
||||||
if s.find(r#"\""#).is_some() {
|
|
||||||
s.replace(r#"\""#, "\"").into()
|
|
||||||
} else {
|
|
||||||
s.into()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Unquoted text, terminated by a space or )
|
|
||||||
fn unquoted_term(s: &str) -> IResult<&str, Node> {
|
fn unquoted_term(s: &str) -> IResult<&str, Node> {
|
||||||
map_res(
|
map_res(
|
||||||
take_while1(|c| c != ' ' && c != ')' && c != '"'),
|
escaped(is_not("\"() \u{3000}\\"), '\\', none_of(" \u{3000}")),
|
||||||
|text: &str| -> ParseResult<Node> {
|
|text: &str| -> ParseResult<Node> {
|
||||||
Ok(if text.eq_ignore_ascii_case("or") {
|
Ok(if text.eq_ignore_ascii_case("or") {
|
||||||
Node::Or
|
Node::Or
|
||||||
|
|
@ -261,52 +251,64 @@ fn quoted_term_inner(s: &str) -> IResult<&str, &str> {
|
||||||
|
|
||||||
/// eg deck:"foo bar" - quotes must come after the :
|
/// eg deck:"foo bar" - quotes must come after the :
|
||||||
fn partially_quoted_term(s: &str) -> IResult<&str, Node> {
|
fn partially_quoted_term(s: &str) -> IResult<&str, Node> {
|
||||||
let term = take_while1(|c| c != ' ' && c != ')' && c != ':');
|
map_res(
|
||||||
let (s, (term, _, quoted_val)) = tuple((term, char(':'), quoted_term_str))(s)?;
|
separated_pair(
|
||||||
let quoted_val = unescape_quotes(quoted_val);
|
escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(": \u{3000}")),
|
||||||
|
char(':'),
|
||||||
match search_node_for_text_with_argument(term.into(), quoted_val) {
|
quoted_term_str,
|
||||||
Ok(search) => Ok((s, Node::Search(search))),
|
),
|
||||||
Err(_) => Err(nom::Err::Failure((s, nom::error::ErrorKind::NoneOf))),
|
|p| match search_node_for_text_with_argument(p.0, p.1) {
|
||||||
}
|
Ok(search) => Ok(Node::Search(search)),
|
||||||
|
Err(e) => Err(e),
|
||||||
|
},
|
||||||
|
)(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert a colon-separated key/val pair into the relevant search type.
|
/// Convert a colon-separated key/val pair into the relevant search type.
|
||||||
fn search_node_for_text_with_argument<'a>(
|
fn search_node_for_text_with_argument<'a>(
|
||||||
key: Cow<'a, str>,
|
key: &'a str,
|
||||||
val: Cow<'a, str>,
|
val: &'a str,
|
||||||
) -> ParseResult<SearchNode<'a>> {
|
) -> ParseResult<SearchNode<'a>> {
|
||||||
Ok(match key.to_ascii_lowercase().as_str() {
|
Ok(match key.to_ascii_lowercase().as_str() {
|
||||||
"added" => SearchNode::AddedInDays(val.parse()?),
|
"added" => SearchNode::AddedInDays(val.parse()?),
|
||||||
"edited" => SearchNode::EditedInDays(val.parse()?),
|
"edited" => SearchNode::EditedInDays(val.parse()?),
|
||||||
"deck" => SearchNode::Deck(val),
|
"deck" => SearchNode::Deck(unescape_to_enforced_re(val)?),
|
||||||
"note" => SearchNode::NoteType(val),
|
"note" => SearchNode::NoteType(unescape_to_re(val)?),
|
||||||
"tag" => SearchNode::Tag(val),
|
"tag" => SearchNode::Tag(parse_tag(val)?),
|
||||||
"mid" => SearchNode::NoteTypeID(val.parse()?),
|
"mid" => SearchNode::NoteTypeID(val.parse()?),
|
||||||
"nid" => SearchNode::NoteIDs(check_id_list(val)?),
|
"nid" => SearchNode::NoteIDs(check_id_list(val)?),
|
||||||
"cid" => SearchNode::CardIDs(check_id_list(val)?),
|
"cid" => SearchNode::CardIDs(check_id_list(val)?),
|
||||||
"did" => SearchNode::DeckID(val.parse()?),
|
"did" => SearchNode::DeckID(val.parse()?),
|
||||||
"card" => parse_template(val.as_ref()),
|
"card" => parse_template(val)?,
|
||||||
"is" => parse_state(val.as_ref())?,
|
"is" => parse_state(val)?,
|
||||||
"flag" => parse_flag(val.as_ref())?,
|
"flag" => parse_flag(val)?,
|
||||||
"rated" => parse_rated(val.as_ref())?,
|
"rated" => parse_rated(val)?,
|
||||||
"dupe" => parse_dupes(val.as_ref())?,
|
"dupe" => parse_dupes(val)?,
|
||||||
"prop" => parse_prop(val.as_ref())?,
|
"prop" => parse_prop(val)?,
|
||||||
"re" => SearchNode::Regex(val),
|
"re" => SearchNode::Regex(unescape_quotes(val)),
|
||||||
"nc" => SearchNode::NoCombining(val),
|
"nc" => SearchNode::NoCombining(unescape_to_glob(val)?),
|
||||||
"w" => SearchNode::WordBoundary(val),
|
"w" => SearchNode::WordBoundary(unescape_to_enforced_re(val)?),
|
||||||
// anything else is a field search
|
// anything else is a field search
|
||||||
_ => parse_single_field(key.as_ref(), val.as_ref()),
|
_ => parse_single_field(key, val)?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ensure the string doesn't contain whitespace and unescape.
|
||||||
|
fn parse_tag(s: &str) -> ParseResult<OptionalRe> {
|
||||||
|
if s.as_bytes().iter().any(u8::is_ascii_whitespace) {
|
||||||
|
Err(ParseError {})
|
||||||
|
} else {
|
||||||
|
unescape_to_custom_re(s, r"\S")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// ensure a list of ids contains only numbers and commas, returning unchanged if true
|
/// ensure a list of ids contains only numbers and commas, returning unchanged if true
|
||||||
/// used by nid: and cid:
|
/// used by nid: and cid:
|
||||||
fn check_id_list(s: Cow<str>) -> ParseResult<Cow<str>> {
|
fn check_id_list(s: &str) -> ParseResult<&str> {
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref RE: Regex = Regex::new(r"^(\d+,)*\d+$").unwrap();
|
static ref RE: Regex = Regex::new(r"^(\d+,)*\d+$").unwrap();
|
||||||
}
|
}
|
||||||
if RE.is_match(s.as_ref()) {
|
if RE.is_match(s) {
|
||||||
Ok(s)
|
Ok(s)
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError {})
|
Err(ParseError {})
|
||||||
|
|
@ -360,13 +362,13 @@ fn parse_rated(val: &str) -> ParseResult<SearchNode<'static>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// eg dupes:1231,hello
|
/// eg dupes:1231,hello
|
||||||
fn parse_dupes(val: &str) -> ParseResult<SearchNode<'static>> {
|
fn parse_dupes(val: &str) -> ParseResult<SearchNode> {
|
||||||
let mut it = val.splitn(2, ',');
|
let mut it = val.splitn(2, ',');
|
||||||
let mid: NoteTypeID = it.next().unwrap().parse()?;
|
let mid: NoteTypeID = it.next().unwrap().parse()?;
|
||||||
let text = it.next().ok_or(ParseError {})?;
|
let text = it.next().ok_or(ParseError {})?;
|
||||||
Ok(SearchNode::Duplicates {
|
Ok(SearchNode::Duplicates {
|
||||||
note_type_id: mid,
|
note_type_id: mid,
|
||||||
text: text.into(),
|
text: unescape_quotes(text),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -411,27 +413,122 @@ fn parse_prop(val: &str) -> ParseResult<SearchNode<'static>> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_template(val: &str) -> SearchNode<'static> {
|
fn parse_template(val: &str) -> ParseResult<SearchNode> {
|
||||||
SearchNode::CardTemplate(match val.parse::<u16>() {
|
Ok(SearchNode::CardTemplate(match val.parse::<u16>() {
|
||||||
Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
|
Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
|
||||||
Err(_) => TemplateKind::Name(val.into()),
|
Err(_) => TemplateKind::Name(unescape_to_re(val)?),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> {
|
||||||
|
Ok(if val.starts_with("re:") {
|
||||||
|
SearchNode::SingleField {
|
||||||
|
field: unescape_to_re(key)?,
|
||||||
|
text: unescape_quotes(&val[3..]),
|
||||||
|
is_re: true,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
SearchNode::SingleField {
|
||||||
|
field: unescape_to_re(key)?,
|
||||||
|
text: unescape_to_glob(val)?,
|
||||||
|
is_re: false,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_single_field(key: &str, mut val: &str) -> SearchNode<'static> {
|
/// For strings without unescaped ", convert \" to "
|
||||||
let is_re = if val.starts_with("re:") {
|
fn unescape_quotes(s: &str) -> Cow<str> {
|
||||||
val = val.trim_start_matches("re:");
|
if s.contains('"') {
|
||||||
true
|
s.replace(r#"\""#, "\"").into()
|
||||||
} else {
|
} else {
|
||||||
false
|
s.into()
|
||||||
};
|
|
||||||
SearchNode::SingleField {
|
|
||||||
field: key.to_string().into(),
|
|
||||||
text: val.to_string().into(),
|
|
||||||
is_re,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check string for invalid escape sequences.
|
||||||
|
fn is_invalid_escape(txt: &str) -> bool {
|
||||||
|
// odd number of \s not followed by an escapable character
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r#"(^|[^\\])(\\\\)*\\([^":*_()]|$)"#).unwrap();
|
||||||
|
}
|
||||||
|
RE.is_match(txt)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle escaped characters and convert Anki wildcards to SQL wildcards.
|
||||||
|
/// Return error if there is an undefined escape sequence.
|
||||||
|
fn unescape_to_glob(txt: &str) -> ParseResult<Cow<str>> {
|
||||||
|
if is_invalid_escape(txt) {
|
||||||
|
Err(ParseError {})
|
||||||
|
} else {
|
||||||
|
// escape sequences and unescaped special characters which need conversion
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r"\\.|[*%]").unwrap();
|
||||||
|
}
|
||||||
|
Ok(RE.replace_all(&txt, |caps: &Captures| {
|
||||||
|
match &caps[0] {
|
||||||
|
r"\\" => r"\\",
|
||||||
|
"\\\"" => "\"",
|
||||||
|
r"\:" => ":",
|
||||||
|
r"\*" => "*",
|
||||||
|
r"\_" => r"\_",
|
||||||
|
r"\(" => "(",
|
||||||
|
r"\)" => ")",
|
||||||
|
"*" => "%",
|
||||||
|
"%" => r"\%",
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle escaped characters and convert to regex if there are wildcards.
|
||||||
|
/// Return error if there is an undefined escape sequence.
|
||||||
|
fn unescape_to_re(txt: &str) -> ParseResult<OptionalRe> {
|
||||||
|
unescape_to_custom_re(txt, ".")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle escaped characters and if there are wildcards, convert to a regex using the given wildcard.
|
||||||
|
/// Return error if there is an undefined escape sequence.
|
||||||
|
fn unescape_to_custom_re<'a>(txt: &'a str, wildcard: &str) -> ParseResult<OptionalRe<'a>> {
|
||||||
|
if is_invalid_escape(txt) {
|
||||||
|
Err(ParseError {})
|
||||||
|
} else {
|
||||||
|
lazy_static! {
|
||||||
|
static ref WILDCARD: Regex = Regex::new(r"(^|[^\\])(\\\\)*[*_]").unwrap();
|
||||||
|
static ref MAYBE_ESCAPED: Regex = Regex::new(r"\\?.").unwrap();
|
||||||
|
static ref ESCAPED: Regex = Regex::new(r"\\(.)").unwrap();
|
||||||
|
}
|
||||||
|
if WILDCARD.is_match(txt) {
|
||||||
|
Ok(OptionalRe::Re(MAYBE_ESCAPED.replace_all(
|
||||||
|
&txt,
|
||||||
|
|caps: &Captures| {
|
||||||
|
let s = &caps[0];
|
||||||
|
match s {
|
||||||
|
r"\\" | r"\*" | r"\(" | r"\)" => s.to_string(),
|
||||||
|
"\\\"" => "\"".to_string(),
|
||||||
|
r"\:" => ":".to_string(),
|
||||||
|
r"*" => format!("{}*", wildcard),
|
||||||
|
"_" => wildcard.to_string(),
|
||||||
|
r"\_" => r"_".to_string(),
|
||||||
|
s => regex::escape(s),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
Ok(OptionalRe::Text(ESCAPED.replace_all(&txt, "$1")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle escaped characters and convert to regex.
|
||||||
|
/// Return error if there is an undefined escape sequence.
|
||||||
|
fn unescape_to_enforced_re(txt: &str) -> ParseResult<String> {
|
||||||
|
Ok(match unescape_to_re(txt)? {
|
||||||
|
OptionalRe::Text(s) => regex::escape(s.as_ref()),
|
||||||
|
OptionalRe::Re(s) => s.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
@ -440,6 +537,7 @@ mod test {
|
||||||
fn parsing() -> Result<()> {
|
fn parsing() -> Result<()> {
|
||||||
use Node::*;
|
use Node::*;
|
||||||
use SearchNode::*;
|
use SearchNode::*;
|
||||||
|
use OptionalRe::*;
|
||||||
|
|
||||||
assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]);
|
assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]);
|
||||||
assert_eq!(parse(" ")?, vec![Search(SearchNode::WholeCollection)]);
|
assert_eq!(parse(" ")?, vec![Search(SearchNode::WholeCollection)]);
|
||||||
|
|
@ -478,7 +576,7 @@ mod test {
|
||||||
Search(UnqualifiedText("world".into())),
|
Search(UnqualifiedText("world".into())),
|
||||||
And,
|
And,
|
||||||
Search(SingleField {
|
Search(SingleField {
|
||||||
field: "foo".into(),
|
field: Text("foo".into()),
|
||||||
text: "bar baz".into(),
|
text: "bar baz".into(),
|
||||||
is_re: false,
|
is_re: false,
|
||||||
})
|
})
|
||||||
|
|
@ -491,7 +589,7 @@ mod test {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("foo:re:bar")?,
|
parse("foo:re:bar")?,
|
||||||
vec![Search(SingleField {
|
vec![Search(SingleField {
|
||||||
field: "foo".into(),
|
field: Text("foo".into()),
|
||||||
text: "bar".into(),
|
text: "bar".into(),
|
||||||
is_re: true
|
is_re: true
|
||||||
})]
|
})]
|
||||||
|
|
@ -501,7 +599,7 @@ mod test {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse(r#""field:va\"lue""#)?,
|
parse(r#""field:va\"lue""#)?,
|
||||||
vec![Search(SingleField {
|
vec![Search(SingleField {
|
||||||
field: "field".into(),
|
field: Text("foo".into()),
|
||||||
text: "va\"lue".into(),
|
text: "va\"lue".into(),
|
||||||
is_re: false
|
is_re: false
|
||||||
})]
|
})]
|
||||||
|
|
@ -517,7 +615,7 @@ mod test {
|
||||||
assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
|
assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("card:front")?,
|
parse("card:front")?,
|
||||||
vec![Search(CardTemplate(TemplateKind::Name("front".into())))]
|
vec![Search(CardTemplate(TemplateKind::Name(Text("front".into()))))]
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("card:3")?,
|
parse("card:3")?,
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
use super::parser::{Node, PropertyKind, SearchNode, StateKind, TemplateKind};
|
use super::parser::{Node, PropertyKind, SearchNode, StateKind, OptionalRe, TemplateKind};
|
||||||
use crate::{
|
use crate::{
|
||||||
card::{CardQueue, CardType},
|
card::{CardQueue, CardType},
|
||||||
collection::Collection,
|
collection::Collection,
|
||||||
|
|
@ -9,13 +9,13 @@ use crate::{
|
||||||
err::Result,
|
err::Result,
|
||||||
notes::field_checksum,
|
notes::field_checksum,
|
||||||
notetype::NoteTypeID,
|
notetype::NoteTypeID,
|
||||||
text::{matches_wildcard, text_to_re},
|
text::text_to_re,
|
||||||
text::{normalize_to_nfc, strip_html_preserving_image_filenames, without_combining},
|
text::{normalize_to_nfc, strip_html_preserving_image_filenames, without_combining},
|
||||||
timestamp::TimestampSecs,
|
timestamp::TimestampSecs,
|
||||||
};
|
};
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::{borrow::Cow, fmt::Write};
|
use std::{borrow::Cow, fmt::Write};
|
||||||
|
use unicase::eq as uni_eq;
|
||||||
|
|
||||||
pub(crate) struct SqlWriter<'a> {
|
pub(crate) struct SqlWriter<'a> {
|
||||||
col: &'a mut Collection,
|
col: &'a mut Collection,
|
||||||
|
|
@ -119,7 +119,7 @@ impl SqlWriter<'_> {
|
||||||
// note fields related
|
// note fields related
|
||||||
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
|
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
|
||||||
SearchNode::SingleField { field, text, is_re } => {
|
SearchNode::SingleField { field, text, is_re } => {
|
||||||
self.write_single_field(field.as_ref(), &self.norm_note(text), *is_re)?
|
self.write_single_field(field, &self.norm_note(text), *is_re)?
|
||||||
}
|
}
|
||||||
SearchNode::Duplicates { note_type_id, text } => {
|
SearchNode::Duplicates { note_type_id, text } => {
|
||||||
self.write_dupes(*note_type_id, &self.norm_note(text))
|
self.write_dupes(*note_type_id, &self.norm_note(text))
|
||||||
|
|
@ -131,14 +131,8 @@ impl SqlWriter<'_> {
|
||||||
// other
|
// other
|
||||||
SearchNode::AddedInDays(days) => self.write_added(*days)?,
|
SearchNode::AddedInDays(days) => self.write_added(*days)?,
|
||||||
SearchNode::EditedInDays(days) => self.write_edited(*days)?,
|
SearchNode::EditedInDays(days) => self.write_edited(*days)?,
|
||||||
SearchNode::CardTemplate(template) => match template {
|
// fixme: normalise in name case?
|
||||||
TemplateKind::Ordinal(_) => {
|
SearchNode::CardTemplate(template) => self.write_template(template)?,
|
||||||
self.write_template(template)?;
|
|
||||||
}
|
|
||||||
TemplateKind::Name(name) => {
|
|
||||||
self.write_template(&TemplateKind::Name(norm(name).into()))?;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
|
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
|
||||||
SearchNode::NoteTypeID(ntid) => {
|
SearchNode::NoteTypeID(ntid) => {
|
||||||
write!(self.sql, "n.mid = {}", ntid).unwrap();
|
write!(self.sql, "n.mid = {}", ntid).unwrap();
|
||||||
|
|
@ -146,9 +140,12 @@ impl SqlWriter<'_> {
|
||||||
SearchNode::DeckID(did) => {
|
SearchNode::DeckID(did) => {
|
||||||
write!(self.sql, "c.did = {}", did).unwrap();
|
write!(self.sql, "c.did = {}", did).unwrap();
|
||||||
}
|
}
|
||||||
SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?,
|
// fixme: normalise?
|
||||||
|
SearchNode::NoteType(notetype) => self.write_note_type(notetype)?,
|
||||||
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
|
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
|
||||||
SearchNode::Tag(tag) => self.write_tag(&norm(tag))?,
|
|
||||||
|
// fixme: normalise?
|
||||||
|
SearchNode::Tag(tag) => self.write_tag(tag)?,
|
||||||
SearchNode::State(state) => self.write_state(state)?,
|
SearchNode::State(state) => self.write_state(state)?,
|
||||||
SearchNode::Flag(flag) => {
|
SearchNode::Flag(flag) => {
|
||||||
write!(self.sql, "(c.flags & 7) == {}", flag).unwrap();
|
write!(self.sql, "(c.flags & 7) == {}", flag).unwrap();
|
||||||
|
|
@ -167,7 +164,7 @@ impl SqlWriter<'_> {
|
||||||
|
|
||||||
fn write_unqualified(&mut self, text: &str) {
|
fn write_unqualified(&mut self, text: &str) {
|
||||||
// implicitly wrap in %
|
// implicitly wrap in %
|
||||||
let text = format!("%{}%", convert_glob_char(text));
|
let text = format!("%{}%", text);
|
||||||
self.args.push(text);
|
self.args.push(text);
|
||||||
write!(
|
write!(
|
||||||
self.sql,
|
self.sql,
|
||||||
|
|
@ -191,27 +188,27 @@ impl SqlWriter<'_> {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_tag(&mut self, text: &str) -> Result<()> {
|
fn write_tag(&mut self, text: &OptionalRe) -> Result<()> {
|
||||||
match text {
|
match text {
|
||||||
"none" => {
|
OptionalRe::Text(s) => {
|
||||||
write!(self.sql, "n.tags = ''").unwrap();
|
if s == "none" {
|
||||||
}
|
write!(self.sql, "n.tags = ''").unwrap();
|
||||||
"*" | "%" => {
|
} else if let Some(tag) = self.col.storage.preferred_tag_case(s)? {
|
||||||
write!(self.sql, "true").unwrap();
|
|
||||||
}
|
|
||||||
text => {
|
|
||||||
if let Some(re_glob) = glob_to_re(text) {
|
|
||||||
// text contains a wildcard
|
|
||||||
let re_glob = format!("(?i).* {} .*", re_glob);
|
|
||||||
write!(self.sql, "n.tags regexp ?").unwrap();
|
|
||||||
self.args.push(re_glob);
|
|
||||||
} else if let Some(tag) = self.col.storage.preferred_tag_case(&text)? {
|
|
||||||
write!(self.sql, "n.tags like ?").unwrap();
|
write!(self.sql, "n.tags like ?").unwrap();
|
||||||
self.args.push(format!("% {} %", tag));
|
self.args.push(format!("% {} %", tag));
|
||||||
} else {
|
} else {
|
||||||
write!(self.sql, "false").unwrap();
|
write!(self.sql, "false").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
OptionalRe::Re(s) => {
|
||||||
|
if s == "*" {
|
||||||
|
write!(self.sql, "true").unwrap();
|
||||||
|
} else {
|
||||||
|
let re = format!("(?i).* {} .*", s);
|
||||||
|
write!(self.sql, "n.tags regexp ?").unwrap();
|
||||||
|
self.args.push(re);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -340,45 +337,54 @@ impl SqlWriter<'_> {
|
||||||
TemplateKind::Ordinal(n) => {
|
TemplateKind::Ordinal(n) => {
|
||||||
write!(self.sql, "c.ord = {}", n).unwrap();
|
write!(self.sql, "c.ord = {}", n).unwrap();
|
||||||
}
|
}
|
||||||
TemplateKind::Name(name) => {
|
TemplateKind::Name(name) => match name {
|
||||||
if let Some(re) = glob_to_re(name) {
|
OptionalRe::Re(s) => {
|
||||||
let re = format!("(?i){}", re);
|
let re = format!("(?i){}", s);
|
||||||
self.sql.push_str(
|
self.sql.push_str(
|
||||||
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
|
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
|
||||||
);
|
);
|
||||||
self.args.push(re);
|
self.args.push(re);
|
||||||
} else {
|
}
|
||||||
|
OptionalRe::Text(s) => {
|
||||||
self.sql.push_str(
|
self.sql.push_str(
|
||||||
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
|
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
|
||||||
);
|
);
|
||||||
self.args.push(name.to_string());
|
self.args.push(s.to_string());
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
|
fn write_note_type(&mut self, nt_name: &OptionalRe) -> Result<()> {
|
||||||
if let Some(re) = glob_to_re(nt_name) {
|
match nt_name {
|
||||||
let re = format!("(?i){}", re);
|
OptionalRe::Re(s) => {
|
||||||
self.sql
|
let re = format!("(?i){}", s);
|
||||||
.push_str("n.mid in (select id from notetypes where name regexp ?)");
|
self.sql
|
||||||
self.args.push(re);
|
.push_str("n.mid in (select id from notetypes where name regexp ?)");
|
||||||
} else {
|
self.args.push(re);
|
||||||
self.sql
|
}
|
||||||
.push_str("n.mid in (select id from notetypes where name = ?)");
|
OptionalRe::Text(s) => {
|
||||||
self.args.push(nt_name.to_string());
|
self.sql
|
||||||
|
.push_str("n.mid in (select id from notetypes where name = ?)");
|
||||||
|
self.args.push(s.to_string());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_single_field(&mut self, field_name: &str, val: &str, is_re: bool) -> Result<()> {
|
fn write_single_field(
|
||||||
|
&mut self,
|
||||||
|
field_name: &OptionalRe,
|
||||||
|
val: &str,
|
||||||
|
is_re: bool,
|
||||||
|
) -> Result<()> {
|
||||||
let note_types = self.col.get_all_notetypes()?;
|
let note_types = self.col.get_all_notetypes()?;
|
||||||
|
|
||||||
let mut field_map = vec![];
|
let mut field_map = vec![];
|
||||||
for nt in note_types.values() {
|
for nt in note_types.values() {
|
||||||
for field in &nt.fields {
|
for field in &nt.fields {
|
||||||
if matches_wildcard(&field.name, field_name) {
|
if matches_string_variant(&field.name, field_name) {
|
||||||
field_map.push((nt.id, field.ord));
|
field_map.push((nt.id, field.ord));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -401,7 +407,7 @@ impl SqlWriter<'_> {
|
||||||
} else {
|
} else {
|
||||||
cmp = "like";
|
cmp = "like";
|
||||||
cmp_trailer = "escape '\\'";
|
cmp_trailer = "escape '\\'";
|
||||||
self.args.push(convert_glob_char(val).into())
|
self.args.push(val.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
let arg_idx = self.args.len();
|
let arg_idx = self.args.len();
|
||||||
|
|
@ -455,27 +461,16 @@ impl SqlWriter<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_word_boundary(&mut self, word: &str) {
|
fn write_word_boundary(&mut self, word: &str) {
|
||||||
// fixme: need to escape in the no-glob case as well
|
self.write_regex(&format!(r"\b{}\b", word))
|
||||||
let re = text_to_re(word);
|
|
||||||
self.write_regex(&format!(r"\b{}\b", re))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replace * with %, leaving \* alone.
|
/// True if the content of search is equal to text, folding case.
|
||||||
fn convert_glob_char(val: &str) -> Cow<str> {
|
fn matches_string_variant(text: &str, search: &OptionalRe) -> bool {
|
||||||
lazy_static! {
|
match search {
|
||||||
static ref RE: Regex = Regex::new(r"(^|[^\\])\*").unwrap();
|
OptionalRe::Re(s) => Regex::new(&format!("^(?i){}$", s)).unwrap().is_match(text),
|
||||||
|
OptionalRe::Text(s) => uni_eq(text, s),
|
||||||
}
|
}
|
||||||
RE.replace_all(val, "${1}%")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert a string with _, % or * characters into a regex.
|
|
||||||
/// If string contains no globbing characters, return None.
|
|
||||||
fn glob_to_re(glob: &str) -> Option<String> {
|
|
||||||
if !glob.contains(|c| c == '_' || c == '*' || c == '%') {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(text_to_re(glob))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||||
|
|
@ -803,12 +798,4 @@ mod test {
|
||||||
RequiredTable::Notes
|
RequiredTable::Notes
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn convert_glob() {
|
|
||||||
assert_eq!(&convert_glob_char("foo*bar"), "foo%bar");
|
|
||||||
assert_eq!(&convert_glob_char("*bar"), "%bar");
|
|
||||||
assert_eq!(&convert_glob_char("\n*bar"), "\n%bar");
|
|
||||||
assert_eq!(&convert_glob_char(r"\*bar"), r"\*bar");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ use lazy_static::lazy_static;
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Regex};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
use unicase::eq as uni_eq;
|
|
||||||
use unicode_normalization::{
|
use unicode_normalization::{
|
||||||
char::is_combining_mark, is_nfc, is_nfkd_quick, IsNormalized, UnicodeNormalization,
|
char::is_combining_mark, is_nfc, is_nfkd_quick, IsNormalized, UnicodeNormalization,
|
||||||
};
|
};
|
||||||
|
|
@ -240,17 +239,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// True if search is equal to text, folding case.
|
|
||||||
/// Supports '*' to match 0 or more characters.
|
|
||||||
pub(crate) fn matches_wildcard(text: &str, search: &str) -> bool {
|
|
||||||
if search.contains('*') {
|
|
||||||
let search = format!("^(?i){}$", regex::escape(search).replace(r"\*", ".*"));
|
|
||||||
Regex::new(&search).unwrap().is_match(text)
|
|
||||||
} else {
|
|
||||||
uni_eq(text, search)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert provided string to NFKD form and strip combining characters.
|
/// Convert provided string to NFKD form and strip combining characters.
|
||||||
pub(crate) fn without_combining(s: &str) -> Cow<str> {
|
pub(crate) fn without_combining(s: &str) -> Cow<str> {
|
||||||
// if the string is already normalized
|
// if the string is already normalized
|
||||||
|
|
@ -303,7 +291,6 @@ pub(crate) fn text_to_re(glob: &str) -> String {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::matches_wildcard;
|
|
||||||
use crate::text::without_combining;
|
use crate::text::without_combining;
|
||||||
use crate::text::{
|
use crate::text::{
|
||||||
extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_image_filenames, AVTag,
|
extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_image_filenames, AVTag,
|
||||||
|
|
@ -351,15 +338,6 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn wildcard() {
|
|
||||||
assert_eq!(matches_wildcard("foo", "bar"), false);
|
|
||||||
assert_eq!(matches_wildcard("foo", "Foo"), true);
|
|
||||||
assert_eq!(matches_wildcard("foo", "F*"), true);
|
|
||||||
assert_eq!(matches_wildcard("foo", "F*oo"), true);
|
|
||||||
assert_eq!(matches_wildcard("foo", "b*"), false);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn combining() {
|
fn combining() {
|
||||||
assert!(matches!(without_combining("test"), Cow::Borrowed(_)));
|
assert!(matches!(without_combining("test"), Cow::Borrowed(_)));
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue