mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
Split unescaping between parser and writer
* Unescape wildcards in writer instead of parser. * Move text conversion functions to text.rs. * Implicitly norm when converting text. * Revert to using collection when comparing tags but add escape support.
This commit is contained in:
parent
7c5cf6d18b
commit
8c02c6e205
3 changed files with 241 additions and 171 deletions
|
@ -40,12 +40,6 @@ impl<I> From<nom::Err<(I, nom::error::ErrorKind)>> for ParseError {
|
||||||
|
|
||||||
type ParseResult<T> = std::result::Result<T, ParseError>;
|
type ParseResult<T> = std::result::Result<T, ParseError>;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub(super) enum OptionalRe<'a> {
|
|
||||||
Text(Cow<'a, str>),
|
|
||||||
Re(Cow<'a, str>),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(super) enum Node<'a> {
|
pub(super) enum Node<'a> {
|
||||||
And,
|
And,
|
||||||
|
@ -61,22 +55,22 @@ pub(super) enum SearchNode<'a> {
|
||||||
UnqualifiedText(Cow<'a, str>),
|
UnqualifiedText(Cow<'a, str>),
|
||||||
// foo:bar, where foo doesn't match a term below
|
// foo:bar, where foo doesn't match a term below
|
||||||
SingleField {
|
SingleField {
|
||||||
field: OptionalRe<'a>,
|
field: Cow<'a, str>,
|
||||||
text: Cow<'a, str>,
|
text: Cow<'a, str>,
|
||||||
is_re: bool,
|
is_re: bool,
|
||||||
},
|
},
|
||||||
AddedInDays(u32),
|
AddedInDays(u32),
|
||||||
EditedInDays(u32),
|
EditedInDays(u32),
|
||||||
CardTemplate(TemplateKind<'a>),
|
CardTemplate(TemplateKind<'a>),
|
||||||
Deck(String),
|
Deck(Cow<'a, str>),
|
||||||
DeckID(DeckID),
|
DeckID(DeckID),
|
||||||
NoteTypeID(NoteTypeID),
|
NoteTypeID(NoteTypeID),
|
||||||
NoteType(OptionalRe<'a>),
|
NoteType(Cow<'a, str>),
|
||||||
Rated {
|
Rated {
|
||||||
days: u32,
|
days: u32,
|
||||||
ease: Option<u8>,
|
ease: Option<u8>,
|
||||||
},
|
},
|
||||||
Tag(String),
|
Tag(Cow<'a, str>),
|
||||||
Duplicates {
|
Duplicates {
|
||||||
note_type_id: NoteTypeID,
|
note_type_id: NoteTypeID,
|
||||||
text: Cow<'a, str>,
|
text: Cow<'a, str>,
|
||||||
|
@ -92,7 +86,7 @@ pub(super) enum SearchNode<'a> {
|
||||||
WholeCollection,
|
WholeCollection,
|
||||||
Regex(Cow<'a, str>),
|
Regex(Cow<'a, str>),
|
||||||
NoCombining(Cow<'a, str>),
|
NoCombining(Cow<'a, str>),
|
||||||
WordBoundary(String),
|
WordBoundary(Cow<'a, str>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
@ -119,7 +113,7 @@ pub(super) enum StateKind {
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(super) enum TemplateKind<'a> {
|
pub(super) enum TemplateKind<'a> {
|
||||||
Ordinal(u16),
|
Ordinal(u16),
|
||||||
Name(OptionalRe<'a>),
|
Name(Cow<'a, str>),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the input string into a list of nodes.
|
/// Parse the input string into a list of nodes.
|
||||||
|
@ -210,7 +204,7 @@ fn text(s: &str) -> IResult<&str, Node> {
|
||||||
fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
|
fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
|
||||||
let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
|
let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
|
||||||
if tail.is_empty() {
|
if tail.is_empty() {
|
||||||
Ok(SearchNode::UnqualifiedText(unescape_to_glob(head)?))
|
Ok(SearchNode::UnqualifiedText(unescape(head)?))
|
||||||
} else {
|
} else {
|
||||||
search_node_for_text_with_argument(head, &tail[1..])
|
search_node_for_text_with_argument(head, &tail[1..])
|
||||||
}
|
}
|
||||||
|
@ -279,9 +273,9 @@ fn search_node_for_text_with_argument<'a>(
|
||||||
Ok(match key.to_ascii_lowercase().as_str() {
|
Ok(match key.to_ascii_lowercase().as_str() {
|
||||||
"added" => SearchNode::AddedInDays(val.parse()?),
|
"added" => SearchNode::AddedInDays(val.parse()?),
|
||||||
"edited" => SearchNode::EditedInDays(val.parse()?),
|
"edited" => SearchNode::EditedInDays(val.parse()?),
|
||||||
"deck" => SearchNode::Deck(unescape_to_enforced_re(val, ".")?),
|
"deck" => SearchNode::Deck(unescape(val)?),
|
||||||
"note" => SearchNode::NoteType(unescape_to_re(val)?),
|
"note" => SearchNode::NoteType(unescape(val)?),
|
||||||
"tag" => SearchNode::Tag(unescape_to_enforced_re(val, r"\S")?),
|
"tag" => SearchNode::Tag(unescape(val)?),
|
||||||
"mid" => SearchNode::NoteTypeID(val.parse()?),
|
"mid" => SearchNode::NoteTypeID(val.parse()?),
|
||||||
"nid" => SearchNode::NoteIDs(check_id_list(val)?),
|
"nid" => SearchNode::NoteIDs(check_id_list(val)?),
|
||||||
"cid" => SearchNode::CardIDs(check_id_list(val)?),
|
"cid" => SearchNode::CardIDs(check_id_list(val)?),
|
||||||
|
@ -293,8 +287,8 @@ fn search_node_for_text_with_argument<'a>(
|
||||||
"dupe" => parse_dupes(val)?,
|
"dupe" => parse_dupes(val)?,
|
||||||
"prop" => parse_prop(val)?,
|
"prop" => parse_prop(val)?,
|
||||||
"re" => SearchNode::Regex(unescape_quotes(val)),
|
"re" => SearchNode::Regex(unescape_quotes(val)),
|
||||||
"nc" => SearchNode::NoCombining(unescape_to_glob(val)?),
|
"nc" => SearchNode::NoCombining(unescape(val)?),
|
||||||
"w" => SearchNode::WordBoundary(unescape_to_enforced_re(val, ".")?),
|
"w" => SearchNode::WordBoundary(unescape(val)?),
|
||||||
// anything else is a field search
|
// anything else is a field search
|
||||||
_ => parse_single_field(key, val)?,
|
_ => parse_single_field(key, val)?,
|
||||||
})
|
})
|
||||||
|
@ -414,21 +408,21 @@ fn parse_prop(val: &str) -> ParseResult<SearchNode<'static>> {
|
||||||
fn parse_template(val: &str) -> ParseResult<SearchNode> {
|
fn parse_template(val: &str) -> ParseResult<SearchNode> {
|
||||||
Ok(SearchNode::CardTemplate(match val.parse::<u16>() {
|
Ok(SearchNode::CardTemplate(match val.parse::<u16>() {
|
||||||
Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
|
Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
|
||||||
Err(_) => TemplateKind::Name(unescape_to_re(val)?),
|
Err(_) => TemplateKind::Name(unescape(val)?),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> {
|
fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> {
|
||||||
Ok(if val.starts_with("re:") {
|
Ok(if val.starts_with("re:") {
|
||||||
SearchNode::SingleField {
|
SearchNode::SingleField {
|
||||||
field: unescape_to_re(key)?,
|
field: unescape(key)?,
|
||||||
text: unescape_quotes(&val[3..]),
|
text: unescape_quotes(&val[3..]),
|
||||||
is_re: true,
|
is_re: true,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
SearchNode::SingleField {
|
SearchNode::SingleField {
|
||||||
field: unescape_to_re(key)?,
|
field: unescape(key)?,
|
||||||
text: unescape_to_glob(val)?,
|
text: unescape(val)?,
|
||||||
is_re: false,
|
is_re: false,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -443,6 +437,26 @@ fn unescape_quotes(s: &str) -> Cow<str> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn unescape(txt: &str) -> ParseResult<Cow<str>> {
|
||||||
|
if is_invalid_escape(txt) {
|
||||||
|
Err(ParseError {})
|
||||||
|
} else if is_parser_escape(txt) {
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r#"\\[\\":()]"#).unwrap();
|
||||||
|
}
|
||||||
|
Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
|
||||||
|
r"\\" => r"\\",
|
||||||
|
"\\\"" => "\"",
|
||||||
|
r"\:" => ":",
|
||||||
|
r"\(" => "(",
|
||||||
|
r"\)" => ")",
|
||||||
|
_ => unreachable!(),
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
Ok(txt.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Check string for invalid escape sequences.
|
/// Check string for invalid escape sequences.
|
||||||
fn is_invalid_escape(txt: &str) -> bool {
|
fn is_invalid_escape(txt: &str) -> bool {
|
||||||
// odd number of \s not followed by an escapable character
|
// odd number of \s not followed by an escapable character
|
||||||
|
@ -461,77 +475,22 @@ fn is_invalid_escape(txt: &str) -> bool {
|
||||||
RE.is_match(txt)
|
RE.is_match(txt)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle escaped characters and convert Anki wildcards to SQL wildcards.
|
/// Check string for escape sequences handled by the parser: ":()
|
||||||
/// Return error if there is an undefined escape sequence.
|
fn is_parser_escape(txt: &str) -> bool {
|
||||||
fn unescape_to_glob(txt: &str) -> ParseResult<Cow<str>> {
|
// odd number of \s followed by a char with special meaning to the parser
|
||||||
if is_invalid_escape(txt) {
|
|
||||||
Err(ParseError {})
|
|
||||||
} else {
|
|
||||||
// escape sequences and unescaped special characters which need conversion
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref RE: Regex = Regex::new(r"\\.|[*%]").unwrap();
|
static ref RE: Regex = Regex::new(
|
||||||
}
|
r#"(?x)
|
||||||
Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
|
(?:^|[^\\]) # not a backslash
|
||||||
r"\\" => r"\\",
|
(?:\\\\)* # even number of backslashes
|
||||||
"\\\"" => "\"",
|
\\ # single backslash
|
||||||
r"\:" => ":",
|
[":()] # parser escape
|
||||||
r"\*" => "*",
|
"#
|
||||||
r"\_" => r"\_",
|
)
|
||||||
r"\(" => "(",
|
.unwrap();
|
||||||
r"\)" => ")",
|
|
||||||
"*" => "%",
|
|
||||||
"%" => r"\%",
|
|
||||||
_ => unreachable!(),
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle escaped characters and convert to regex if there are wildcards.
|
RE.is_match(txt)
|
||||||
/// Return error if there is an undefined escape sequence.
|
|
||||||
fn unescape_to_re(txt: &str) -> ParseResult<OptionalRe> {
|
|
||||||
unescape_to_custom_re(txt, ".")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Handle escaped characters and if there are wildcards, convert to a regex using the given wildcard.
|
|
||||||
/// Return error if there is an undefined escape sequence.
|
|
||||||
fn unescape_to_custom_re<'a>(txt: &'a str, wildcard: &str) -> ParseResult<OptionalRe<'a>> {
|
|
||||||
if is_invalid_escape(txt) {
|
|
||||||
Err(ParseError {})
|
|
||||||
} else {
|
|
||||||
lazy_static! {
|
|
||||||
static ref WILDCARD: Regex = Regex::new(r"(^|[^\\])(\\\\)*[*_]").unwrap();
|
|
||||||
static ref MAYBE_ESCAPED: Regex = Regex::new(r"\\?.").unwrap();
|
|
||||||
static ref ESCAPED: Regex = Regex::new(r"\\(.)").unwrap();
|
|
||||||
}
|
|
||||||
if WILDCARD.is_match(txt) {
|
|
||||||
Ok(OptionalRe::Re(MAYBE_ESCAPED.replace_all(
|
|
||||||
&txt,
|
|
||||||
|caps: &Captures| {
|
|
||||||
let s = &caps[0];
|
|
||||||
match s {
|
|
||||||
"\\" | r"\*" | r"\(" | r"\)" => s.to_string(),
|
|
||||||
"\\\"" => "\"".to_string(),
|
|
||||||
r"\:" => ":".to_string(),
|
|
||||||
"*" => format!("{}*", wildcard),
|
|
||||||
"_" => wildcard.to_string(),
|
|
||||||
r"\_" => "_".to_string(),
|
|
||||||
s => regex::escape(s),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)))
|
|
||||||
} else {
|
|
||||||
Ok(OptionalRe::Text(ESCAPED.replace_all(&txt, "$1")))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Handle escaped characters and convert to regex.
|
|
||||||
/// Return error if there is an undefined escape sequence.
|
|
||||||
fn unescape_to_enforced_re(txt: &str, wildcard: &str) -> ParseResult<String> {
|
|
||||||
Ok(match unescape_to_custom_re(txt, wildcard)? {
|
|
||||||
OptionalRe::Text(s) => regex::escape(s.as_ref()),
|
|
||||||
OptionalRe::Re(s) => s.to_string(),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -541,7 +500,6 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn parsing() -> Result<()> {
|
fn parsing() -> Result<()> {
|
||||||
use Node::*;
|
use Node::*;
|
||||||
use OptionalRe::*;
|
|
||||||
use SearchNode::*;
|
use SearchNode::*;
|
||||||
|
|
||||||
assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]);
|
assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]);
|
||||||
|
@ -581,7 +539,7 @@ mod test {
|
||||||
Search(UnqualifiedText("world".into())),
|
Search(UnqualifiedText("world".into())),
|
||||||
And,
|
And,
|
||||||
Search(SingleField {
|
Search(SingleField {
|
||||||
field: Text("foo".into()),
|
field: "foo".into(),
|
||||||
text: "bar baz".into(),
|
text: "bar baz".into(),
|
||||||
is_re: false,
|
is_re: false,
|
||||||
})
|
})
|
||||||
|
@ -594,7 +552,7 @@ mod test {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("foo:re:bar")?,
|
parse("foo:re:bar")?,
|
||||||
vec![Search(SingleField {
|
vec![Search(SingleField {
|
||||||
field: Text("foo".into()),
|
field: "foo".into(),
|
||||||
text: "bar".into(),
|
text: "bar".into(),
|
||||||
is_re: true
|
is_re: true
|
||||||
})]
|
})]
|
||||||
|
@ -604,7 +562,7 @@ mod test {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse(r#""field:va\"lue""#)?,
|
parse(r#""field:va\"lue""#)?,
|
||||||
vec![Search(SingleField {
|
vec![Search(SingleField {
|
||||||
field: Text("field".into()),
|
field: "field".into(),
|
||||||
text: "va\"lue".into(),
|
text: "va\"lue".into(),
|
||||||
is_re: false
|
is_re: false
|
||||||
})]
|
})]
|
||||||
|
@ -616,9 +574,17 @@ mod test {
|
||||||
assert!(parse(r"\").is_err());
|
assert!(parse(r"\").is_err());
|
||||||
assert!(parse(r"\a").is_err());
|
assert!(parse(r"\a").is_err());
|
||||||
assert!(parse(r"\%").is_err());
|
assert!(parse(r"\%").is_err());
|
||||||
|
|
||||||
|
// parser unescapes ":()
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse(r#"\\\"\:\(\)\*\_"#)?,
|
parse(r#"\"\:\(\)"#)?,
|
||||||
vec![Search(UnqualifiedText(r#"\\":()*\_"#.into())),]
|
vec![Search(UnqualifiedText(r#"":()"#.into())),]
|
||||||
|
);
|
||||||
|
|
||||||
|
// parser doesn't unescape unescape \*_
|
||||||
|
assert_eq!(
|
||||||
|
parse(r#"\\\*\_"#)?,
|
||||||
|
vec![Search(UnqualifiedText(r#"\\\*\_"#.into())),]
|
||||||
);
|
);
|
||||||
|
|
||||||
// escaping parentheses is optional (only) inside quotes
|
// escaping parentheses is optional (only) inside quotes
|
||||||
|
@ -651,9 +617,7 @@ mod test {
|
||||||
assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
|
assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("card:front")?,
|
parse("card:front")?,
|
||||||
vec![Search(CardTemplate(TemplateKind::Name(Text(
|
vec![Search(CardTemplate(TemplateKind::Name("front".into())))]
|
||||||
"front".into()
|
|
||||||
))))]
|
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("card:3")?,
|
parse("card:3")?,
|
||||||
|
@ -670,15 +634,8 @@ mod test {
|
||||||
vec![Search(Deck("default one".into()))]
|
vec![Search(Deck("default one".into()))]
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(parse("note:basic")?, vec![Search(NoteType("basic".into()))]);
|
||||||
parse("note:basic")?,
|
assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".into()))]);
|
||||||
vec![Search(NoteType(Text("basic".into())))]
|
|
||||||
);
|
|
||||||
assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".to_string()))]);
|
|
||||||
// wildcards in tags don't match whitespace
|
|
||||||
assert_eq!(parse("tag:ha_d")?, vec![Search(Tag(r"ha\Sd".to_string()))]);
|
|
||||||
assert_eq!(parse("tag:h*d")?, vec![Search(Tag(r"h\S*d".to_string()))]);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse("nid:1237123712,2,3")?,
|
parse("nid:1237123712,2,3")?,
|
||||||
vec![Search(NoteIDs("1237123712,2,3".into()))]
|
vec![Search(NoteIDs("1237123712,2,3".into()))]
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
use super::parser::{Node, OptionalRe, PropertyKind, SearchNode, StateKind, TemplateKind};
|
use super::parser::{Node, PropertyKind, SearchNode, StateKind, TemplateKind};
|
||||||
use crate::{
|
use crate::{
|
||||||
card::{CardQueue, CardType},
|
card::{CardQueue, CardType},
|
||||||
collection::Collection,
|
collection::Collection,
|
||||||
|
@ -9,12 +9,24 @@ use crate::{
|
||||||
err::Result,
|
err::Result,
|
||||||
notes::field_checksum,
|
notes::field_checksum,
|
||||||
notetype::NoteTypeID,
|
notetype::NoteTypeID,
|
||||||
text::{normalize_to_nfc, strip_html_preserving_image_filenames, without_combining},
|
text::{
|
||||||
|
escape_sql, is_glob, normalize_to_nfc, strip_html_preserving_image_filenames, to_custom_re,
|
||||||
|
to_re, to_sql, to_text, without_combining,
|
||||||
|
},
|
||||||
timestamp::TimestampSecs,
|
timestamp::TimestampSecs,
|
||||||
};
|
};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::{borrow::Cow, fmt::Write};
|
use std::{borrow::Cow, fmt::Write};
|
||||||
use unicase::eq as uni_eq;
|
use unicase::eq as uni_eq;
|
||||||
|
use ConversionMode as CM;
|
||||||
|
|
||||||
|
enum ConversionMode<'a> {
|
||||||
|
OnlyNorm,
|
||||||
|
Regex,
|
||||||
|
CustomRe(&'a str),
|
||||||
|
Sql,
|
||||||
|
Text,
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) struct SqlWriter<'a> {
|
pub(crate) struct SqlWriter<'a> {
|
||||||
col: &'a mut Collection,
|
col: &'a mut Collection,
|
||||||
|
@ -116,22 +128,20 @@ impl SqlWriter<'_> {
|
||||||
use normalize_to_nfc as norm;
|
use normalize_to_nfc as norm;
|
||||||
match node {
|
match node {
|
||||||
// note fields related
|
// note fields related
|
||||||
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
|
SearchNode::UnqualifiedText(text) => self.write_unqualified(text),
|
||||||
SearchNode::SingleField { field, text, is_re } => {
|
SearchNode::SingleField { field, text, is_re } => {
|
||||||
self.write_single_field(field, &self.norm_note(text), *is_re)?
|
self.write_single_field(field, text, *is_re)?
|
||||||
}
|
}
|
||||||
SearchNode::Duplicates { note_type_id, text } => {
|
SearchNode::Duplicates { note_type_id, text } => self.write_dupes(*note_type_id, text),
|
||||||
self.write_dupes(*note_type_id, &self.norm_note(text))
|
SearchNode::Regex(re) => self.write_regex(re),
|
||||||
}
|
SearchNode::NoCombining(text) => self.write_no_combining(text),
|
||||||
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
|
SearchNode::WordBoundary(text) => self.write_word_boundary(text),
|
||||||
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
|
|
||||||
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
|
|
||||||
|
|
||||||
// other
|
// other
|
||||||
SearchNode::AddedInDays(days) => self.write_added(*days)?,
|
SearchNode::AddedInDays(days) => self.write_added(*days)?,
|
||||||
SearchNode::EditedInDays(days) => self.write_edited(*days)?,
|
SearchNode::EditedInDays(days) => self.write_edited(*days)?,
|
||||||
// fixme: normalise in name case?
|
|
||||||
SearchNode::CardTemplate(template) => self.write_template(template)?,
|
SearchNode::CardTemplate(template) => self.write_template(template)?,
|
||||||
|
// fixme: always norm?
|
||||||
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
|
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
|
||||||
SearchNode::NoteTypeID(ntid) => {
|
SearchNode::NoteTypeID(ntid) => {
|
||||||
write!(self.sql, "n.mid = {}", ntid).unwrap();
|
write!(self.sql, "n.mid = {}", ntid).unwrap();
|
||||||
|
@ -139,11 +149,9 @@ impl SqlWriter<'_> {
|
||||||
SearchNode::DeckID(did) => {
|
SearchNode::DeckID(did) => {
|
||||||
write!(self.sql, "c.did = {}", did).unwrap();
|
write!(self.sql, "c.did = {}", did).unwrap();
|
||||||
}
|
}
|
||||||
// fixme: normalise?
|
|
||||||
SearchNode::NoteType(notetype) => self.write_note_type(notetype)?,
|
SearchNode::NoteType(notetype) => self.write_note_type(notetype)?,
|
||||||
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
|
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
|
||||||
|
|
||||||
// fixme: normalise?
|
|
||||||
SearchNode::Tag(tag) => self.write_tag(tag)?,
|
SearchNode::Tag(tag) => self.write_tag(tag)?,
|
||||||
SearchNode::State(state) => self.write_state(state)?,
|
SearchNode::State(state) => self.write_state(state)?,
|
||||||
SearchNode::Flag(flag) => {
|
SearchNode::Flag(flag) => {
|
||||||
|
@ -163,7 +171,7 @@ impl SqlWriter<'_> {
|
||||||
|
|
||||||
fn write_unqualified(&mut self, text: &str) {
|
fn write_unqualified(&mut self, text: &str) {
|
||||||
// implicitly wrap in %
|
// implicitly wrap in %
|
||||||
let text = format!("%{}%", text);
|
let text = format!("%{}%", &self.convert(CM::Sql, text));
|
||||||
self.args.push(text);
|
self.args.push(text);
|
||||||
write!(
|
write!(
|
||||||
self.sql,
|
self.sql,
|
||||||
|
@ -174,7 +182,7 @@ impl SqlWriter<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_no_combining(&mut self, text: &str) {
|
fn write_no_combining(&mut self, text: &str) {
|
||||||
let text = format!("%{}%", without_combining(text));
|
let text = format!("%{}%", without_combining(&self.convert(CM::Sql, text)));
|
||||||
self.args.push(text);
|
self.args.push(text);
|
||||||
write!(
|
write!(
|
||||||
self.sql,
|
self.sql,
|
||||||
|
@ -187,16 +195,28 @@ impl SqlWriter<'_> {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_tag(&mut self, s: &String) -> Result<()> {
|
fn write_tag(&mut self, text: &str) -> Result<()> {
|
||||||
if s.contains(" ") {
|
if text.contains(" ") {
|
||||||
write!(self.sql, "false").unwrap();
|
write!(self.sql, "false").unwrap();
|
||||||
} else {
|
} else {
|
||||||
match s.as_str() {
|
match text {
|
||||||
"none" => write!(self.sql, "n.tags = ''").unwrap(),
|
"none" => write!(self.sql, "n.tags = ''").unwrap(),
|
||||||
r"\S*" => write!(self.sql, "true").unwrap(),
|
"*" => write!(self.sql, "true").unwrap(),
|
||||||
_ => {
|
s => {
|
||||||
|
if is_glob(s) {
|
||||||
write!(self.sql, "n.tags regexp ?").unwrap();
|
write!(self.sql, "n.tags regexp ?").unwrap();
|
||||||
self.args.push(format!("(?i).* {} .*", s));
|
let re = &self.convert(CM::CustomRe(r"\S"), s);
|
||||||
|
self.args.push(format!("(?i).* {} .*", re));
|
||||||
|
} else if let Some(tag) = self
|
||||||
|
.col
|
||||||
|
.storage
|
||||||
|
.preferred_tag_case(&self.convert(CM::Text, s))?
|
||||||
|
{
|
||||||
|
write!(self.sql, "n.tags like ? escape '\\'").unwrap();
|
||||||
|
self.args.push(format!("% {} %", escape_sql(&tag)));
|
||||||
|
} else {
|
||||||
|
write!(self.sql, "false").unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -294,7 +314,7 @@ impl SqlWriter<'_> {
|
||||||
|
|
||||||
fn write_deck(&mut self, deck: &str) -> Result<()> {
|
fn write_deck(&mut self, deck: &str) -> Result<()> {
|
||||||
match deck {
|
match deck {
|
||||||
".*" => write!(self.sql, "true").unwrap(),
|
"*" => write!(self.sql, "true").unwrap(),
|
||||||
"filtered" => write!(self.sql, "c.odid != 0").unwrap(),
|
"filtered" => write!(self.sql, "c.odid != 0").unwrap(),
|
||||||
deck => {
|
deck => {
|
||||||
// rewrite "current" to the current deck name
|
// rewrite "current" to the current deck name
|
||||||
|
@ -309,7 +329,7 @@ impl SqlWriter<'_> {
|
||||||
.as_str(),
|
.as_str(),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
human_deck_name_to_native(deck)
|
human_deck_name_to_native(&self.convert(CM::Regex, deck))
|
||||||
};
|
};
|
||||||
|
|
||||||
// convert to a regex that includes child decks
|
// convert to a regex that includes child decks
|
||||||
|
@ -330,54 +350,45 @@ impl SqlWriter<'_> {
|
||||||
TemplateKind::Ordinal(n) => {
|
TemplateKind::Ordinal(n) => {
|
||||||
write!(self.sql, "c.ord = {}", n).unwrap();
|
write!(self.sql, "c.ord = {}", n).unwrap();
|
||||||
}
|
}
|
||||||
TemplateKind::Name(name) => match name {
|
TemplateKind::Name(name) => {
|
||||||
OptionalRe::Re(s) => {
|
if is_glob(name) {
|
||||||
let re = format!("(?i){}", s);
|
let re = format!("(?i){}", self.convert(CM::Regex, name));
|
||||||
self.sql.push_str(
|
self.sql.push_str(
|
||||||
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
|
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
|
||||||
);
|
);
|
||||||
self.args.push(re);
|
self.args.push(re);
|
||||||
}
|
} else {
|
||||||
OptionalRe::Text(s) => {
|
|
||||||
self.sql.push_str(
|
self.sql.push_str(
|
||||||
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
|
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
|
||||||
);
|
);
|
||||||
self.args.push(s.to_string());
|
self.args.push(self.convert(CM::Text, name).into());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
|
||||||
};
|
};
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_note_type(&mut self, nt_name: &OptionalRe) -> Result<()> {
|
fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
|
||||||
match nt_name {
|
if is_glob(nt_name) {
|
||||||
OptionalRe::Re(s) => {
|
let re = format!("(?i){}", self.convert(CM::Regex, nt_name));
|
||||||
let re = format!("(?i){}", s);
|
|
||||||
self.sql
|
self.sql
|
||||||
.push_str("n.mid in (select id from notetypes where name regexp ?)");
|
.push_str("n.mid in (select id from notetypes where name regexp ?)");
|
||||||
self.args.push(re);
|
self.args.push(re);
|
||||||
}
|
} else {
|
||||||
OptionalRe::Text(s) => {
|
|
||||||
self.sql
|
self.sql
|
||||||
.push_str("n.mid in (select id from notetypes where name = ?)");
|
.push_str("n.mid in (select id from notetypes where name = ?)");
|
||||||
self.args.push(s.to_string());
|
self.args.push(self.convert(CM::Text, nt_name).into());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_single_field(
|
fn write_single_field(&mut self, field_name: &str, val: &str, is_re: bool) -> Result<()> {
|
||||||
&mut self,
|
|
||||||
field_name: &OptionalRe,
|
|
||||||
val: &str,
|
|
||||||
is_re: bool,
|
|
||||||
) -> Result<()> {
|
|
||||||
let note_types = self.col.get_all_notetypes()?;
|
let note_types = self.col.get_all_notetypes()?;
|
||||||
|
|
||||||
let mut field_map = vec![];
|
let mut field_map = vec![];
|
||||||
for nt in note_types.values() {
|
for nt in note_types.values() {
|
||||||
for field in &nt.fields {
|
for field in &nt.fields {
|
||||||
if matches_string_variant(&field.name, field_name) {
|
if self.matches_glob(&field.name, field_name) {
|
||||||
field_map.push((nt.id, field.ord));
|
field_map.push((nt.id, field.ord));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -396,11 +407,12 @@ impl SqlWriter<'_> {
|
||||||
if is_re {
|
if is_re {
|
||||||
cmp = "regexp";
|
cmp = "regexp";
|
||||||
cmp_trailer = "";
|
cmp_trailer = "";
|
||||||
self.args.push(format!("(?i){}", val));
|
self.args
|
||||||
|
.push(format!("(?i){}", self.convert(CM::OnlyNorm, val)));
|
||||||
} else {
|
} else {
|
||||||
cmp = "like";
|
cmp = "like";
|
||||||
cmp_trailer = "escape '\\'";
|
cmp_trailer = "escape '\\'";
|
||||||
self.args.push(val.into())
|
self.args.push(self.convert(CM::Sql, val).into())
|
||||||
}
|
}
|
||||||
|
|
||||||
let arg_idx = self.args.len();
|
let arg_idx = self.args.len();
|
||||||
|
@ -423,6 +435,7 @@ impl SqlWriter<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) {
|
fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) {
|
||||||
|
let text = &self.convert(CM::OnlyNorm, text);
|
||||||
let text_nohtml = strip_html_preserving_image_filenames(text);
|
let text_nohtml = strip_html_preserving_image_filenames(text);
|
||||||
let csum = field_checksum(text_nohtml.as_ref());
|
let csum = field_checksum(text_nohtml.as_ref());
|
||||||
write!(
|
write!(
|
||||||
|
@ -450,19 +463,39 @@ impl SqlWriter<'_> {
|
||||||
|
|
||||||
fn write_regex(&mut self, word: &str) {
|
fn write_regex(&mut self, word: &str) {
|
||||||
self.sql.push_str("n.flds regexp ?");
|
self.sql.push_str("n.flds regexp ?");
|
||||||
self.args.push(format!(r"(?i){}", word));
|
self.args
|
||||||
|
.push(format!(r"(?i){}", self.convert(CM::OnlyNorm, word)));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_word_boundary(&mut self, word: &str) {
|
fn write_word_boundary(&mut self, word: &str) {
|
||||||
self.write_regex(&format!(r"\b{}\b", word))
|
self.sql.push_str("n.flds regexp ?");
|
||||||
|
self.args
|
||||||
|
.push(format!(r"(?i)\b{}\b", self.convert(CM::Regex, word)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Norm text and call the according conversion function.
|
||||||
|
fn convert<'a>(&self, mode: ConversionMode, txt: &'a str) -> Cow<'a, str> {
|
||||||
|
let txt = match mode {
|
||||||
|
CM::OnlyNorm => txt.into(),
|
||||||
|
CM::Regex => to_re(txt),
|
||||||
|
CM::CustomRe(wildcard) => to_custom_re(txt, wildcard),
|
||||||
|
CM::Sql => to_sql(txt),
|
||||||
|
CM::Text => to_text(txt),
|
||||||
|
};
|
||||||
|
match txt {
|
||||||
|
Cow::Borrowed(s) => self.norm_note(s),
|
||||||
|
Cow::Owned(s) => self.norm_note(&s).to_string().into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// True if the content of search is equal to text, folding case.
|
/// Compare text with a possible glob, folding case.
|
||||||
fn matches_string_variant(text: &str, search: &OptionalRe) -> bool {
|
fn matches_glob(&self, text: &str, search: &str) -> bool {
|
||||||
match search {
|
if is_glob(search) {
|
||||||
OptionalRe::Re(s) => Regex::new(&format!("^(?i){}$", s)).unwrap().is_match(text),
|
let search = format!("^(?i){}$", self.convert(CM::Regex, search));
|
||||||
OptionalRe::Text(s) => uni_eq(text, s),
|
Regex::new(&search).unwrap().is_match(text)
|
||||||
|
} else {
|
||||||
|
uni_eq(text, &self.convert(CM::Text, search))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -665,9 +698,15 @@ mod test {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
s(ctx, r"tag:one"),
|
s(ctx, r"tag:one"),
|
||||||
("(n.tags regexp ?)".into(), vec![r"(?i).* one .*".into()])
|
(
|
||||||
|
"(n.tags like ? escape '\\')".into(),
|
||||||
|
vec![r"% One %".into()]
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// unregistered tags without wildcards won't match
|
||||||
|
assert_eq!(s(ctx, "tag:unknown"), ("(false)".into(), vec![]));
|
||||||
|
|
||||||
// wildcards force a regexp search
|
// wildcards force a regexp search
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
s(ctx, r"tag:o*n\*et%w%oth_re\_e"),
|
s(ctx, r"tag:o*n\*et%w%oth_re\_e"),
|
||||||
|
|
|
@ -289,6 +289,80 @@ pub(crate) fn text_to_re(glob: &str) -> String {
|
||||||
text2.into()
|
text2.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if string contains an unescaped wildcard.
|
||||||
|
pub(crate) fn is_glob(txt: &str) -> bool {
|
||||||
|
// even number of \s followed by a wildcard
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(
|
||||||
|
r#"(?x)
|
||||||
|
(?:^|[^\\]) # not a backslash
|
||||||
|
(?:\\\\)* # even number of backslashes
|
||||||
|
[*_] # wildcard
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
RE.is_match(txt)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert to a RegEx respecting Anki wildcards.
|
||||||
|
pub(crate) fn to_re(txt: &str) -> Cow<str> {
|
||||||
|
to_custom_re(txt, ".")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert Anki style to RegEx using the provided wildcard.
|
||||||
|
pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> {
|
||||||
|
// escape sequences and unescaped special characters which need conversion
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r"\\.|[*_]").unwrap();
|
||||||
|
}
|
||||||
|
RE.replace_all(&txt, |caps: &Captures| {
|
||||||
|
let s = &caps[0];
|
||||||
|
match s {
|
||||||
|
r"\\" | r"\*" => s.to_string(),
|
||||||
|
r"\_" => "_".to_string(),
|
||||||
|
"*" => format!("{}*", wildcard),
|
||||||
|
"_" => wildcard.to_string(),
|
||||||
|
s => regex::escape(s),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert to SQL respecting Anki wildcards.
|
||||||
|
pub(crate) fn to_sql<'a>(txt: &'a str) -> Cow<'a, str> {
|
||||||
|
// escape sequences and unescaped special characters which need conversion
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap();
|
||||||
|
}
|
||||||
|
RE.replace_all(&txt, |caps: &Captures| {
|
||||||
|
let s = &caps[0];
|
||||||
|
match s {
|
||||||
|
r"\\" => r"\\",
|
||||||
|
r"\*" => "*",
|
||||||
|
"*" => "%",
|
||||||
|
"%" => r"\%",
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unescape everything.
|
||||||
|
pub(crate) fn to_text(txt: &str) -> Cow<str> {
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r"\\(.)").unwrap();
|
||||||
|
}
|
||||||
|
RE.replace_all(&txt, "$1")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escape characters special to SQL: \%_
|
||||||
|
pub(crate) fn escape_sql(txt: &str) -> Cow<str> {
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new(r"[\\%_]").unwrap();
|
||||||
|
}
|
||||||
|
RE.replace_all(&txt, r"\$0")
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::text::without_combining;
|
use crate::text::without_combining;
|
||||||
|
|
Loading…
Reference in a new issue