Split unescaping between parser and writer

* Unescape wildcards in writer instead of parser.
* Move text conversion functions to text.rs.
* Implicitly norm when converting text.
* Revert to using collection when comparing tags but add escape support.
This commit is contained in:
RumovZ 2020-11-17 12:49:37 +01:00
parent 7c5cf6d18b
commit 8c02c6e205
3 changed files with 241 additions and 171 deletions

View file

@ -40,12 +40,6 @@ impl<I> From<nom::Err<(I, nom::error::ErrorKind)>> for ParseError {
type ParseResult<T> = std::result::Result<T, ParseError>; type ParseResult<T> = std::result::Result<T, ParseError>;
#[derive(Debug, PartialEq)]
pub(super) enum OptionalRe<'a> {
Text(Cow<'a, str>),
Re(Cow<'a, str>),
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(super) enum Node<'a> { pub(super) enum Node<'a> {
And, And,
@ -61,22 +55,22 @@ pub(super) enum SearchNode<'a> {
UnqualifiedText(Cow<'a, str>), UnqualifiedText(Cow<'a, str>),
// foo:bar, where foo doesn't match a term below // foo:bar, where foo doesn't match a term below
SingleField { SingleField {
field: OptionalRe<'a>, field: Cow<'a, str>,
text: Cow<'a, str>, text: Cow<'a, str>,
is_re: bool, is_re: bool,
}, },
AddedInDays(u32), AddedInDays(u32),
EditedInDays(u32), EditedInDays(u32),
CardTemplate(TemplateKind<'a>), CardTemplate(TemplateKind<'a>),
Deck(String), Deck(Cow<'a, str>),
DeckID(DeckID), DeckID(DeckID),
NoteTypeID(NoteTypeID), NoteTypeID(NoteTypeID),
NoteType(OptionalRe<'a>), NoteType(Cow<'a, str>),
Rated { Rated {
days: u32, days: u32,
ease: Option<u8>, ease: Option<u8>,
}, },
Tag(String), Tag(Cow<'a, str>),
Duplicates { Duplicates {
note_type_id: NoteTypeID, note_type_id: NoteTypeID,
text: Cow<'a, str>, text: Cow<'a, str>,
@ -92,7 +86,7 @@ pub(super) enum SearchNode<'a> {
WholeCollection, WholeCollection,
Regex(Cow<'a, str>), Regex(Cow<'a, str>),
NoCombining(Cow<'a, str>), NoCombining(Cow<'a, str>),
WordBoundary(String), WordBoundary(Cow<'a, str>),
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -119,7 +113,7 @@ pub(super) enum StateKind {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(super) enum TemplateKind<'a> { pub(super) enum TemplateKind<'a> {
Ordinal(u16), Ordinal(u16),
Name(OptionalRe<'a>), Name(Cow<'a, str>),
} }
/// Parse the input string into a list of nodes. /// Parse the input string into a list of nodes.
@ -210,7 +204,7 @@ fn text(s: &str) -> IResult<&str, Node> {
fn search_node_for_text(s: &str) -> ParseResult<SearchNode> { fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?; let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
if tail.is_empty() { if tail.is_empty() {
Ok(SearchNode::UnqualifiedText(unescape_to_glob(head)?)) Ok(SearchNode::UnqualifiedText(unescape(head)?))
} else { } else {
search_node_for_text_with_argument(head, &tail[1..]) search_node_for_text_with_argument(head, &tail[1..])
} }
@ -279,9 +273,9 @@ fn search_node_for_text_with_argument<'a>(
Ok(match key.to_ascii_lowercase().as_str() { Ok(match key.to_ascii_lowercase().as_str() {
"added" => SearchNode::AddedInDays(val.parse()?), "added" => SearchNode::AddedInDays(val.parse()?),
"edited" => SearchNode::EditedInDays(val.parse()?), "edited" => SearchNode::EditedInDays(val.parse()?),
"deck" => SearchNode::Deck(unescape_to_enforced_re(val, ".")?), "deck" => SearchNode::Deck(unescape(val)?),
"note" => SearchNode::NoteType(unescape_to_re(val)?), "note" => SearchNode::NoteType(unescape(val)?),
"tag" => SearchNode::Tag(unescape_to_enforced_re(val, r"\S")?), "tag" => SearchNode::Tag(unescape(val)?),
"mid" => SearchNode::NoteTypeID(val.parse()?), "mid" => SearchNode::NoteTypeID(val.parse()?),
"nid" => SearchNode::NoteIDs(check_id_list(val)?), "nid" => SearchNode::NoteIDs(check_id_list(val)?),
"cid" => SearchNode::CardIDs(check_id_list(val)?), "cid" => SearchNode::CardIDs(check_id_list(val)?),
@ -293,8 +287,8 @@ fn search_node_for_text_with_argument<'a>(
"dupe" => parse_dupes(val)?, "dupe" => parse_dupes(val)?,
"prop" => parse_prop(val)?, "prop" => parse_prop(val)?,
"re" => SearchNode::Regex(unescape_quotes(val)), "re" => SearchNode::Regex(unescape_quotes(val)),
"nc" => SearchNode::NoCombining(unescape_to_glob(val)?), "nc" => SearchNode::NoCombining(unescape(val)?),
"w" => SearchNode::WordBoundary(unescape_to_enforced_re(val, ".")?), "w" => SearchNode::WordBoundary(unescape(val)?),
// anything else is a field search // anything else is a field search
_ => parse_single_field(key, val)?, _ => parse_single_field(key, val)?,
}) })
@ -414,21 +408,21 @@ fn parse_prop(val: &str) -> ParseResult<SearchNode<'static>> {
fn parse_template(val: &str) -> ParseResult<SearchNode> { fn parse_template(val: &str) -> ParseResult<SearchNode> {
Ok(SearchNode::CardTemplate(match val.parse::<u16>() { Ok(SearchNode::CardTemplate(match val.parse::<u16>() {
Ok(n) => TemplateKind::Ordinal(n.max(1) - 1), Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
Err(_) => TemplateKind::Name(unescape_to_re(val)?), Err(_) => TemplateKind::Name(unescape(val)?),
})) }))
} }
fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> { fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> {
Ok(if val.starts_with("re:") { Ok(if val.starts_with("re:") {
SearchNode::SingleField { SearchNode::SingleField {
field: unescape_to_re(key)?, field: unescape(key)?,
text: unescape_quotes(&val[3..]), text: unescape_quotes(&val[3..]),
is_re: true, is_re: true,
} }
} else { } else {
SearchNode::SingleField { SearchNode::SingleField {
field: unescape_to_re(key)?, field: unescape(key)?,
text: unescape_to_glob(val)?, text: unescape(val)?,
is_re: false, is_re: false,
} }
}) })
@ -443,6 +437,26 @@ fn unescape_quotes(s: &str) -> Cow<str> {
} }
} }
fn unescape(txt: &str) -> ParseResult<Cow<str>> {
if is_invalid_escape(txt) {
Err(ParseError {})
} else if is_parser_escape(txt) {
lazy_static! {
static ref RE: Regex = Regex::new(r#"\\[\\":()]"#).unwrap();
}
Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
r"\\" => r"\\",
"\\\"" => "\"",
r"\:" => ":",
r"\(" => "(",
r"\)" => ")",
_ => unreachable!(),
}))
} else {
Ok(txt.into())
}
}
/// Check string for invalid escape sequences. /// Check string for invalid escape sequences.
fn is_invalid_escape(txt: &str) -> bool { fn is_invalid_escape(txt: &str) -> bool {
// odd number of \s not followed by an escapable character // odd number of \s not followed by an escapable character
@ -461,77 +475,22 @@ fn is_invalid_escape(txt: &str) -> bool {
RE.is_match(txt) RE.is_match(txt)
} }
/// Handle escaped characters and convert Anki wildcards to SQL wildcards. /// Check string for escape sequences handled by the parser: ":()
/// Return error if there is an undefined escape sequence. fn is_parser_escape(txt: &str) -> bool {
fn unescape_to_glob(txt: &str) -> ParseResult<Cow<str>> { // odd number of \s followed by a char with special meaning to the parser
if is_invalid_escape(txt) { lazy_static! {
Err(ParseError {}) static ref RE: Regex = Regex::new(
} else { r#"(?x)
// escape sequences and unescaped special characters which need conversion (?:^|[^\\]) # not a backslash
lazy_static! { (?:\\\\)* # even number of backslashes
static ref RE: Regex = Regex::new(r"\\.|[*%]").unwrap(); \\ # single backslash
} [":()] # parser escape
Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] { "#
r"\\" => r"\\", )
"\\\"" => "\"", .unwrap();
r"\:" => ":",
r"\*" => "*",
r"\_" => r"\_",
r"\(" => "(",
r"\)" => ")",
"*" => "%",
"%" => r"\%",
_ => unreachable!(),
}))
} }
}
/// Handle escaped characters and convert to regex if there are wildcards. RE.is_match(txt)
/// Return error if there is an undefined escape sequence.
fn unescape_to_re(txt: &str) -> ParseResult<OptionalRe> {
unescape_to_custom_re(txt, ".")
}
/// Handle escaped characters and if there are wildcards, convert to a regex using the given wildcard.
/// Return error if there is an undefined escape sequence.
fn unescape_to_custom_re<'a>(txt: &'a str, wildcard: &str) -> ParseResult<OptionalRe<'a>> {
if is_invalid_escape(txt) {
Err(ParseError {})
} else {
lazy_static! {
static ref WILDCARD: Regex = Regex::new(r"(^|[^\\])(\\\\)*[*_]").unwrap();
static ref MAYBE_ESCAPED: Regex = Regex::new(r"\\?.").unwrap();
static ref ESCAPED: Regex = Regex::new(r"\\(.)").unwrap();
}
if WILDCARD.is_match(txt) {
Ok(OptionalRe::Re(MAYBE_ESCAPED.replace_all(
&txt,
|caps: &Captures| {
let s = &caps[0];
match s {
"\\" | r"\*" | r"\(" | r"\)" => s.to_string(),
"\\\"" => "\"".to_string(),
r"\:" => ":".to_string(),
"*" => format!("{}*", wildcard),
"_" => wildcard.to_string(),
r"\_" => "_".to_string(),
s => regex::escape(s),
}
},
)))
} else {
Ok(OptionalRe::Text(ESCAPED.replace_all(&txt, "$1")))
}
}
}
/// Handle escaped characters and convert to regex.
/// Return error if there is an undefined escape sequence.
fn unescape_to_enforced_re(txt: &str, wildcard: &str) -> ParseResult<String> {
Ok(match unescape_to_custom_re(txt, wildcard)? {
OptionalRe::Text(s) => regex::escape(s.as_ref()),
OptionalRe::Re(s) => s.to_string(),
})
} }
#[cfg(test)] #[cfg(test)]
@ -541,7 +500,6 @@ mod test {
#[test] #[test]
fn parsing() -> Result<()> { fn parsing() -> Result<()> {
use Node::*; use Node::*;
use OptionalRe::*;
use SearchNode::*; use SearchNode::*;
assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]); assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]);
@ -581,7 +539,7 @@ mod test {
Search(UnqualifiedText("world".into())), Search(UnqualifiedText("world".into())),
And, And,
Search(SingleField { Search(SingleField {
field: Text("foo".into()), field: "foo".into(),
text: "bar baz".into(), text: "bar baz".into(),
is_re: false, is_re: false,
}) })
@ -594,7 +552,7 @@ mod test {
assert_eq!( assert_eq!(
parse("foo:re:bar")?, parse("foo:re:bar")?,
vec![Search(SingleField { vec![Search(SingleField {
field: Text("foo".into()), field: "foo".into(),
text: "bar".into(), text: "bar".into(),
is_re: true is_re: true
})] })]
@ -604,7 +562,7 @@ mod test {
assert_eq!( assert_eq!(
parse(r#""field:va\"lue""#)?, parse(r#""field:va\"lue""#)?,
vec![Search(SingleField { vec![Search(SingleField {
field: Text("field".into()), field: "field".into(),
text: "va\"lue".into(), text: "va\"lue".into(),
is_re: false is_re: false
})] })]
@ -616,9 +574,17 @@ mod test {
assert!(parse(r"\").is_err()); assert!(parse(r"\").is_err());
assert!(parse(r"\a").is_err()); assert!(parse(r"\a").is_err());
assert!(parse(r"\%").is_err()); assert!(parse(r"\%").is_err());
// parser unescapes ":()
assert_eq!( assert_eq!(
parse(r#"\\\"\:\(\)\*\_"#)?, parse(r#"\"\:\(\)"#)?,
vec![Search(UnqualifiedText(r#"\\":()*\_"#.into())),] vec![Search(UnqualifiedText(r#"":()"#.into())),]
);
// parser doesn't unescape unescape \*_
assert_eq!(
parse(r#"\\\*\_"#)?,
vec![Search(UnqualifiedText(r#"\\\*\_"#.into())),]
); );
// escaping parentheses is optional (only) inside quotes // escaping parentheses is optional (only) inside quotes
@ -651,9 +617,7 @@ mod test {
assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]); assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
assert_eq!( assert_eq!(
parse("card:front")?, parse("card:front")?,
vec![Search(CardTemplate(TemplateKind::Name(Text( vec![Search(CardTemplate(TemplateKind::Name("front".into())))]
"front".into()
))))]
); );
assert_eq!( assert_eq!(
parse("card:3")?, parse("card:3")?,
@ -670,15 +634,8 @@ mod test {
vec![Search(Deck("default one".into()))] vec![Search(Deck("default one".into()))]
); );
assert_eq!( assert_eq!(parse("note:basic")?, vec![Search(NoteType("basic".into()))]);
parse("note:basic")?, assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".into()))]);
vec![Search(NoteType(Text("basic".into())))]
);
assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".to_string()))]);
// wildcards in tags don't match whitespace
assert_eq!(parse("tag:ha_d")?, vec![Search(Tag(r"ha\Sd".to_string()))]);
assert_eq!(parse("tag:h*d")?, vec![Search(Tag(r"h\S*d".to_string()))]);
assert_eq!( assert_eq!(
parse("nid:1237123712,2,3")?, parse("nid:1237123712,2,3")?,
vec![Search(NoteIDs("1237123712,2,3".into()))] vec![Search(NoteIDs("1237123712,2,3".into()))]

View file

@ -1,7 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use super::parser::{Node, OptionalRe, PropertyKind, SearchNode, StateKind, TemplateKind}; use super::parser::{Node, PropertyKind, SearchNode, StateKind, TemplateKind};
use crate::{ use crate::{
card::{CardQueue, CardType}, card::{CardQueue, CardType},
collection::Collection, collection::Collection,
@ -9,12 +9,24 @@ use crate::{
err::Result, err::Result,
notes::field_checksum, notes::field_checksum,
notetype::NoteTypeID, notetype::NoteTypeID,
text::{normalize_to_nfc, strip_html_preserving_image_filenames, without_combining}, text::{
escape_sql, is_glob, normalize_to_nfc, strip_html_preserving_image_filenames, to_custom_re,
to_re, to_sql, to_text, without_combining,
},
timestamp::TimestampSecs, timestamp::TimestampSecs,
}; };
use regex::Regex; use regex::Regex;
use std::{borrow::Cow, fmt::Write}; use std::{borrow::Cow, fmt::Write};
use unicase::eq as uni_eq; use unicase::eq as uni_eq;
use ConversionMode as CM;
enum ConversionMode<'a> {
OnlyNorm,
Regex,
CustomRe(&'a str),
Sql,
Text,
}
pub(crate) struct SqlWriter<'a> { pub(crate) struct SqlWriter<'a> {
col: &'a mut Collection, col: &'a mut Collection,
@ -116,22 +128,20 @@ impl SqlWriter<'_> {
use normalize_to_nfc as norm; use normalize_to_nfc as norm;
match node { match node {
// note fields related // note fields related
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)), SearchNode::UnqualifiedText(text) => self.write_unqualified(text),
SearchNode::SingleField { field, text, is_re } => { SearchNode::SingleField { field, text, is_re } => {
self.write_single_field(field, &self.norm_note(text), *is_re)? self.write_single_field(field, text, *is_re)?
} }
SearchNode::Duplicates { note_type_id, text } => { SearchNode::Duplicates { note_type_id, text } => self.write_dupes(*note_type_id, text),
self.write_dupes(*note_type_id, &self.norm_note(text)) SearchNode::Regex(re) => self.write_regex(re),
} SearchNode::NoCombining(text) => self.write_no_combining(text),
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)), SearchNode::WordBoundary(text) => self.write_word_boundary(text),
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
// other // other
SearchNode::AddedInDays(days) => self.write_added(*days)?, SearchNode::AddedInDays(days) => self.write_added(*days)?,
SearchNode::EditedInDays(days) => self.write_edited(*days)?, SearchNode::EditedInDays(days) => self.write_edited(*days)?,
// fixme: normalise in name case?
SearchNode::CardTemplate(template) => self.write_template(template)?, SearchNode::CardTemplate(template) => self.write_template(template)?,
// fixme: always norm?
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?, SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
SearchNode::NoteTypeID(ntid) => { SearchNode::NoteTypeID(ntid) => {
write!(self.sql, "n.mid = {}", ntid).unwrap(); write!(self.sql, "n.mid = {}", ntid).unwrap();
@ -139,11 +149,9 @@ impl SqlWriter<'_> {
SearchNode::DeckID(did) => { SearchNode::DeckID(did) => {
write!(self.sql, "c.did = {}", did).unwrap(); write!(self.sql, "c.did = {}", did).unwrap();
} }
// fixme: normalise?
SearchNode::NoteType(notetype) => self.write_note_type(notetype)?, SearchNode::NoteType(notetype) => self.write_note_type(notetype)?,
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?, SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
// fixme: normalise?
SearchNode::Tag(tag) => self.write_tag(tag)?, SearchNode::Tag(tag) => self.write_tag(tag)?,
SearchNode::State(state) => self.write_state(state)?, SearchNode::State(state) => self.write_state(state)?,
SearchNode::Flag(flag) => { SearchNode::Flag(flag) => {
@ -163,7 +171,7 @@ impl SqlWriter<'_> {
fn write_unqualified(&mut self, text: &str) { fn write_unqualified(&mut self, text: &str) {
// implicitly wrap in % // implicitly wrap in %
let text = format!("%{}%", text); let text = format!("%{}%", &self.convert(CM::Sql, text));
self.args.push(text); self.args.push(text);
write!( write!(
self.sql, self.sql,
@ -174,7 +182,7 @@ impl SqlWriter<'_> {
} }
fn write_no_combining(&mut self, text: &str) { fn write_no_combining(&mut self, text: &str) {
let text = format!("%{}%", without_combining(text)); let text = format!("%{}%", without_combining(&self.convert(CM::Sql, text)));
self.args.push(text); self.args.push(text);
write!( write!(
self.sql, self.sql,
@ -187,16 +195,28 @@ impl SqlWriter<'_> {
.unwrap(); .unwrap();
} }
fn write_tag(&mut self, s: &String) -> Result<()> { fn write_tag(&mut self, text: &str) -> Result<()> {
if s.contains(" ") { if text.contains(" ") {
write!(self.sql, "false").unwrap(); write!(self.sql, "false").unwrap();
} else { } else {
match s.as_str() { match text {
"none" => write!(self.sql, "n.tags = ''").unwrap(), "none" => write!(self.sql, "n.tags = ''").unwrap(),
r"\S*" => write!(self.sql, "true").unwrap(), "*" => write!(self.sql, "true").unwrap(),
_ => { s => {
write!(self.sql, "n.tags regexp ?").unwrap(); if is_glob(s) {
self.args.push(format!("(?i).* {} .*", s)); write!(self.sql, "n.tags regexp ?").unwrap();
let re = &self.convert(CM::CustomRe(r"\S"), s);
self.args.push(format!("(?i).* {} .*", re));
} else if let Some(tag) = self
.col
.storage
.preferred_tag_case(&self.convert(CM::Text, s))?
{
write!(self.sql, "n.tags like ? escape '\\'").unwrap();
self.args.push(format!("% {} %", escape_sql(&tag)));
} else {
write!(self.sql, "false").unwrap();
}
} }
} }
} }
@ -294,7 +314,7 @@ impl SqlWriter<'_> {
fn write_deck(&mut self, deck: &str) -> Result<()> { fn write_deck(&mut self, deck: &str) -> Result<()> {
match deck { match deck {
".*" => write!(self.sql, "true").unwrap(), "*" => write!(self.sql, "true").unwrap(),
"filtered" => write!(self.sql, "c.odid != 0").unwrap(), "filtered" => write!(self.sql, "c.odid != 0").unwrap(),
deck => { deck => {
// rewrite "current" to the current deck name // rewrite "current" to the current deck name
@ -309,7 +329,7 @@ impl SqlWriter<'_> {
.as_str(), .as_str(),
) )
} else { } else {
human_deck_name_to_native(deck) human_deck_name_to_native(&self.convert(CM::Regex, deck))
}; };
// convert to a regex that includes child decks // convert to a regex that includes child decks
@ -330,54 +350,45 @@ impl SqlWriter<'_> {
TemplateKind::Ordinal(n) => { TemplateKind::Ordinal(n) => {
write!(self.sql, "c.ord = {}", n).unwrap(); write!(self.sql, "c.ord = {}", n).unwrap();
} }
TemplateKind::Name(name) => match name { TemplateKind::Name(name) => {
OptionalRe::Re(s) => { if is_glob(name) {
let re = format!("(?i){}", s); let re = format!("(?i){}", self.convert(CM::Regex, name));
self.sql.push_str( self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)", "(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
); );
self.args.push(re); self.args.push(re);
} } else {
OptionalRe::Text(s) => {
self.sql.push_str( self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)", "(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
); );
self.args.push(s.to_string()); self.args.push(self.convert(CM::Text, name).into());
} }
}, }
}; };
Ok(()) Ok(())
} }
fn write_note_type(&mut self, nt_name: &OptionalRe) -> Result<()> { fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
match nt_name { if is_glob(nt_name) {
OptionalRe::Re(s) => { let re = format!("(?i){}", self.convert(CM::Regex, nt_name));
let re = format!("(?i){}", s); self.sql
self.sql .push_str("n.mid in (select id from notetypes where name regexp ?)");
.push_str("n.mid in (select id from notetypes where name regexp ?)"); self.args.push(re);
self.args.push(re); } else {
} self.sql
OptionalRe::Text(s) => { .push_str("n.mid in (select id from notetypes where name = ?)");
self.sql self.args.push(self.convert(CM::Text, nt_name).into());
.push_str("n.mid in (select id from notetypes where name = ?)");
self.args.push(s.to_string());
}
} }
Ok(()) Ok(())
} }
fn write_single_field( fn write_single_field(&mut self, field_name: &str, val: &str, is_re: bool) -> Result<()> {
&mut self,
field_name: &OptionalRe,
val: &str,
is_re: bool,
) -> Result<()> {
let note_types = self.col.get_all_notetypes()?; let note_types = self.col.get_all_notetypes()?;
let mut field_map = vec![]; let mut field_map = vec![];
for nt in note_types.values() { for nt in note_types.values() {
for field in &nt.fields { for field in &nt.fields {
if matches_string_variant(&field.name, field_name) { if self.matches_glob(&field.name, field_name) {
field_map.push((nt.id, field.ord)); field_map.push((nt.id, field.ord));
} }
} }
@ -396,11 +407,12 @@ impl SqlWriter<'_> {
if is_re { if is_re {
cmp = "regexp"; cmp = "regexp";
cmp_trailer = ""; cmp_trailer = "";
self.args.push(format!("(?i){}", val)); self.args
.push(format!("(?i){}", self.convert(CM::OnlyNorm, val)));
} else { } else {
cmp = "like"; cmp = "like";
cmp_trailer = "escape '\\'"; cmp_trailer = "escape '\\'";
self.args.push(val.into()) self.args.push(self.convert(CM::Sql, val).into())
} }
let arg_idx = self.args.len(); let arg_idx = self.args.len();
@ -423,6 +435,7 @@ impl SqlWriter<'_> {
} }
fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) { fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) {
let text = &self.convert(CM::OnlyNorm, text);
let text_nohtml = strip_html_preserving_image_filenames(text); let text_nohtml = strip_html_preserving_image_filenames(text);
let csum = field_checksum(text_nohtml.as_ref()); let csum = field_checksum(text_nohtml.as_ref());
write!( write!(
@ -450,19 +463,39 @@ impl SqlWriter<'_> {
fn write_regex(&mut self, word: &str) { fn write_regex(&mut self, word: &str) {
self.sql.push_str("n.flds regexp ?"); self.sql.push_str("n.flds regexp ?");
self.args.push(format!(r"(?i){}", word)); self.args
.push(format!(r"(?i){}", self.convert(CM::OnlyNorm, word)));
} }
fn write_word_boundary(&mut self, word: &str) { fn write_word_boundary(&mut self, word: &str) {
self.write_regex(&format!(r"\b{}\b", word)) self.sql.push_str("n.flds regexp ?");
self.args
.push(format!(r"(?i)\b{}\b", self.convert(CM::Regex, word)));
} }
}
/// True if the content of search is equal to text, folding case. /// Norm text and call the according conversion function.
fn matches_string_variant(text: &str, search: &OptionalRe) -> bool { fn convert<'a>(&self, mode: ConversionMode, txt: &'a str) -> Cow<'a, str> {
match search { let txt = match mode {
OptionalRe::Re(s) => Regex::new(&format!("^(?i){}$", s)).unwrap().is_match(text), CM::OnlyNorm => txt.into(),
OptionalRe::Text(s) => uni_eq(text, s), CM::Regex => to_re(txt),
CM::CustomRe(wildcard) => to_custom_re(txt, wildcard),
CM::Sql => to_sql(txt),
CM::Text => to_text(txt),
};
match txt {
Cow::Borrowed(s) => self.norm_note(s),
Cow::Owned(s) => self.norm_note(&s).to_string().into(),
}
}
/// Compare text with a possible glob, folding case.
fn matches_glob(&self, text: &str, search: &str) -> bool {
if is_glob(search) {
let search = format!("^(?i){}$", self.convert(CM::Regex, search));
Regex::new(&search).unwrap().is_match(text)
} else {
uni_eq(text, &self.convert(CM::Text, search))
}
} }
} }
@ -665,9 +698,15 @@ mod test {
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
s(ctx, r"tag:one"), s(ctx, r"tag:one"),
("(n.tags regexp ?)".into(), vec![r"(?i).* one .*".into()]) (
"(n.tags like ? escape '\\')".into(),
vec![r"% One %".into()]
)
); );
// unregistered tags without wildcards won't match
assert_eq!(s(ctx, "tag:unknown"), ("(false)".into(), vec![]));
// wildcards force a regexp search // wildcards force a regexp search
assert_eq!( assert_eq!(
s(ctx, r"tag:o*n\*et%w%oth_re\_e"), s(ctx, r"tag:o*n\*et%w%oth_re\_e"),

View file

@ -289,6 +289,80 @@ pub(crate) fn text_to_re(glob: &str) -> String {
text2.into() text2.into()
} }
/// Check if string contains an unescaped wildcard.
pub(crate) fn is_glob(txt: &str) -> bool {
// even number of \s followed by a wildcard
lazy_static! {
static ref RE: Regex = Regex::new(
r#"(?x)
(?:^|[^\\]) # not a backslash
(?:\\\\)* # even number of backslashes
[*_] # wildcard
"#
)
.unwrap();
}
RE.is_match(txt)
}
/// Convert to a RegEx respecting Anki wildcards.
pub(crate) fn to_re(txt: &str) -> Cow<str> {
to_custom_re(txt, ".")
}
/// Convert Anki style to RegEx using the provided wildcard.
pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> {
// escape sequences and unescaped special characters which need conversion
lazy_static! {
static ref RE: Regex = Regex::new(r"\\.|[*_]").unwrap();
}
RE.replace_all(&txt, |caps: &Captures| {
let s = &caps[0];
match s {
r"\\" | r"\*" => s.to_string(),
r"\_" => "_".to_string(),
"*" => format!("{}*", wildcard),
"_" => wildcard.to_string(),
s => regex::escape(s),
}
})
}
/// Convert to SQL respecting Anki wildcards.
pub(crate) fn to_sql<'a>(txt: &'a str) -> Cow<'a, str> {
// escape sequences and unescaped special characters which need conversion
lazy_static! {
static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap();
}
RE.replace_all(&txt, |caps: &Captures| {
let s = &caps[0];
match s {
r"\\" => r"\\",
r"\*" => "*",
"*" => "%",
"%" => r"\%",
_ => unreachable!(),
}
})
}
/// Unescape everything.
pub(crate) fn to_text(txt: &str) -> Cow<str> {
lazy_static! {
static ref RE: Regex = Regex::new(r"\\(.)").unwrap();
}
RE.replace_all(&txt, "$1")
}
/// Escape characters special to SQL: \%_
pub(crate) fn escape_sql(txt: &str) -> Cow<str> {
lazy_static! {
static ref RE: Regex = Regex::new(r"[\\%_]").unwrap();
}
RE.replace_all(&txt, r"\$0")
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::text::without_combining; use crate::text::without_combining;