From 5280cb2f1c2ee8f9b97d28e33387cf153acb3e48 Mon Sep 17 00:00:00 2001 From: maxr777 <31160014+maxr777@users.noreply.github.com> Date: Thu, 4 Sep 2025 03:52:08 +0200 Subject: [PATCH] Enable nc: to only search in a specific field (#4276) (#4312) * Enable nc: to only search in a specific field * Add FieldSearchMode enum to replace boolean fields * Avoid magic numbers in enum * Use standard naming so Prost can remove redundant text --------- Co-authored-by: Damien Elmes --- CONTRIBUTORS | 1 + proto/anki/search.proto | 7 +- rslib/src/search/builder.rs | 3 +- rslib/src/search/mod.rs | 1 + rslib/src/search/parser.rs | 56 ++++++++++++---- rslib/src/search/service/search_node.rs | 5 +- rslib/src/search/sqlwriter.rs | 85 ++++++++++++++++++++++--- rslib/src/search/writer.rs | 24 +++++-- 8 files changed, 152 insertions(+), 30 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 70032a23c..b03108e16 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -240,6 +240,7 @@ Thomas Rixen Siyuan Mattuwu Yan Lee Doughty <32392044+leedoughty@users.noreply.github.com> memchr +Max Romanowski Aldlss ******************** diff --git a/proto/anki/search.proto b/proto/anki/search.proto index bb417294c..e87a063c9 100644 --- a/proto/anki/search.proto +++ b/proto/anki/search.proto @@ -74,10 +74,15 @@ message SearchNode { repeated SearchNode nodes = 1; Joiner joiner = 2; } + enum FieldSearchMode { + FIELD_SEARCH_MODE_NORMAL = 0; + FIELD_SEARCH_MODE_REGEX = 1; + FIELD_SEARCH_MODE_NOCOMBINING = 2; + } message Field { string field_name = 1; string text = 2; - bool is_re = 3; + FieldSearchMode mode = 3; } oneof filter { diff --git a/rslib/src/search/builder.rs b/rslib/src/search/builder.rs index a76af0560..0c22ff1eb 100644 --- a/rslib/src/search/builder.rs +++ b/rslib/src/search/builder.rs @@ -6,6 +6,7 @@ use std::mem; use itertools::Itertools; use super::writer::write_nodes; +use super::FieldSearchMode; use super::Node; use super::SearchNode; use super::StateKind; @@ -174,7 +175,7 @@ impl SearchNode { pub fn from_tag_name(name: &str) -> Self { Self::Tag { tag: escape_anki_wildcards_for_search_node(name), - is_re: false, + mode: FieldSearchMode::Normal, } } diff --git a/rslib/src/search/mod.rs b/rslib/src/search/mod.rs index 0960fabf9..0dd52dbc3 100644 --- a/rslib/src/search/mod.rs +++ b/rslib/src/search/mod.rs @@ -13,6 +13,7 @@ pub use builder::JoinSearches; pub use builder::Negated; pub use builder::SearchBuilder; pub use parser::parse as parse_search; +pub use parser::FieldSearchMode; pub use parser::Node; pub use parser::PropertyKind; pub use parser::RatingKind; diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs index 33c1a4622..cbdba3d9f 100644 --- a/rslib/src/search/parser.rs +++ b/rslib/src/search/parser.rs @@ -3,6 +3,7 @@ use std::sync::LazyLock; +use anki_proto::search::search_node::FieldSearchMode as FieldSearchModeProto; use nom::branch::alt; use nom::bytes::complete::escaped; use nom::bytes::complete::is_not; @@ -27,7 +28,6 @@ use crate::error::ParseError; use crate::error::Result; use crate::error::SearchErrorKind as FailKind; use crate::prelude::*; - type IResult<'a, O> = std::result::Result<(&'a str, O), nom::Err>>; type ParseResult<'a, O> = std::result::Result>>; @@ -48,6 +48,23 @@ pub enum Node { Search(SearchNode), } +#[derive(Copy, Debug, PartialEq, Eq, Clone)] +pub enum FieldSearchMode { + Normal, + Regex, + NoCombining, +} + +impl From for FieldSearchMode { + fn from(mode: FieldSearchModeProto) -> Self { + match mode { + FieldSearchModeProto::Normal => Self::Normal, + FieldSearchModeProto::Regex => Self::Regex, + FieldSearchModeProto::Nocombining => Self::NoCombining, + } + } +} + #[derive(Debug, PartialEq, Clone)] pub enum SearchNode { // text without a colon @@ -56,7 +73,7 @@ pub enum SearchNode { SingleField { field: String, text: String, - is_re: bool, + mode: FieldSearchMode, }, AddedInDays(u32), EditedInDays(u32), @@ -77,7 +94,7 @@ pub enum SearchNode { }, Tag { tag: String, - is_re: bool, + mode: FieldSearchMode, }, Duplicates { notetype_id: NotetypeId, @@ -373,12 +390,12 @@ fn parse_tag(s: &str) -> ParseResult<'_, SearchNode> { Ok(if let Some(re) = s.strip_prefix("re:") { SearchNode::Tag { tag: unescape_quotes(re), - is_re: true, + mode: FieldSearchMode::Regex, } } else { SearchNode::Tag { tag: unescape(s)?, - is_re: false, + mode: FieldSearchMode::Normal, } }) } @@ -670,13 +687,19 @@ fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<'a, SearchN SearchNode::SingleField { field: unescape(key)?, text: unescape_quotes(stripped), - is_re: true, + mode: FieldSearchMode::Regex, + } + } else if let Some(stripped) = val.strip_prefix("nc:") { + SearchNode::SingleField { + field: unescape(key)?, + text: unescape_quotes(stripped), + mode: FieldSearchMode::NoCombining, } } else { SearchNode::SingleField { field: unescape(key)?, text: unescape(val)?, - is_re: false, + mode: FieldSearchMode::Normal, } }) } @@ -806,7 +829,7 @@ mod test { Search(SingleField { field: "foo".into(), text: "bar baz".into(), - is_re: false, + mode: FieldSearchMode::Normal, }) ]))), Or, @@ -819,7 +842,16 @@ mod test { vec![Search(SingleField { field: "foo".into(), text: "bar".into(), - is_re: true + mode: FieldSearchMode::Regex, + })] + ); + + assert_eq!( + parse("foo:nc:bar")?, + vec![Search(SingleField { + field: "foo".into(), + text: "bar".into(), + mode: FieldSearchMode::NoCombining, })] ); @@ -829,7 +861,7 @@ mod test { vec![Search(SingleField { field: "field".into(), text: "va\"lue".into(), - is_re: false + mode: FieldSearchMode::Normal, })] ); assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:"va\"lue""#)?,); @@ -906,14 +938,14 @@ mod test { parse("tag:hard")?, vec![Search(Tag { tag: "hard".into(), - is_re: false + mode: FieldSearchMode::Normal })] ); assert_eq!( parse(r"tag:re:\\")?, vec![Search(Tag { tag: r"\\".into(), - is_re: true + mode: FieldSearchMode::Regex })] ); assert_eq!( diff --git a/rslib/src/search/service/search_node.rs b/rslib/src/search/service/search_node.rs index 1851a28f7..6986eef2a 100644 --- a/rslib/src/search/service/search_node.rs +++ b/rslib/src/search/service/search_node.rs @@ -6,6 +6,7 @@ use itertools::Itertools; use crate::prelude::*; use crate::search::parse_search; +use crate::search::FieldSearchMode; use crate::search::Negated; use crate::search::Node; use crate::search::PropertyKind; @@ -40,7 +41,7 @@ impl TryFrom for Node { Filter::FieldName(s) => Node::Search(SearchNode::SingleField { field: escape_anki_wildcards_for_search_node(&s), text: "_*".to_string(), - is_re: false, + mode: FieldSearchMode::Normal, }), Filter::Rated(rated) => Node::Search(SearchNode::Rated { days: rated.days, @@ -107,7 +108,7 @@ impl TryFrom for Node { Filter::Field(field) => Node::Search(SearchNode::SingleField { field: escape_anki_wildcards(&field.field_name), text: escape_anki_wildcards(&field.text), - is_re: field.is_re, + mode: field.mode().into(), }), Filter::LiteralText(text) => { let text = escape_anki_wildcards(&text); diff --git a/rslib/src/search/sqlwriter.rs b/rslib/src/search/sqlwriter.rs index 542dba4fc..95249276c 100644 --- a/rslib/src/search/sqlwriter.rs +++ b/rslib/src/search/sqlwriter.rs @@ -7,6 +7,7 @@ use std::ops::Range; use itertools::Itertools; +use super::parser::FieldSearchMode; use super::parser::Node; use super::parser::PropertyKind; use super::parser::RatingKind; @@ -138,8 +139,8 @@ impl SqlWriter<'_> { false, )? } - SearchNode::SingleField { field, text, is_re } => { - self.write_field(&norm(field), &self.norm_note(text), *is_re)? + SearchNode::SingleField { field, text, mode } => { + self.write_field(&norm(field), &self.norm_note(text), *mode)? } SearchNode::Duplicates { notetype_id, text } => { self.write_dupe(*notetype_id, &self.norm_note(text))? @@ -180,7 +181,7 @@ impl SqlWriter<'_> { SearchNode::Notetype(notetype) => self.write_notetype(&norm(notetype)), SearchNode::Rated { days, ease } => self.write_rated(">", -i64::from(*days), ease)?, - SearchNode::Tag { tag, is_re } => self.write_tag(&norm(tag), *is_re), + SearchNode::Tag { tag, mode } => self.write_tag(&norm(tag), *mode), SearchNode::State(state) => self.write_state(state)?, SearchNode::Flag(flag) => { write!(self.sql, "(c.flags & 7) == {flag}").unwrap(); @@ -296,8 +297,8 @@ impl SqlWriter<'_> { Ok(()) } - fn write_tag(&mut self, tag: &str, is_re: bool) { - if is_re { + fn write_tag(&mut self, tag: &str, mode: FieldSearchMode) { + if mode == FieldSearchMode::Regex { self.args.push(format!("(?i){tag}")); write!(self.sql, "regexp_tags(?{}, n.tags)", self.args.len()).unwrap(); } else { @@ -567,16 +568,18 @@ impl SqlWriter<'_> { } } - fn write_field(&mut self, field_name: &str, val: &str, is_re: bool) -> Result<()> { + fn write_field(&mut self, field_name: &str, val: &str, mode: FieldSearchMode) -> Result<()> { if matches!(field_name, "*" | "_*" | "*_") { - if is_re { + if mode == FieldSearchMode::Regex { self.write_all_fields_regexp(val); } else { self.write_all_fields(val); } Ok(()) - } else if is_re { + } else if mode == FieldSearchMode::Regex { self.write_single_field_regexp(field_name, val) + } else if mode == FieldSearchMode::NoCombining { + self.write_single_field_nc(field_name, val) } else { self.write_single_field(field_name, val) } @@ -592,6 +595,58 @@ impl SqlWriter<'_> { write!(self.sql, "regexp_fields(?{}, n.flds)", self.args.len()).unwrap(); } + fn write_single_field_nc(&mut self, field_name: &str, val: &str) -> Result<()> { + let field_indicies_by_notetype = self.num_fields_and_fields_indices_by_notetype( + field_name, + matches!(val, "*" | "_*" | "*_"), + )?; + if field_indicies_by_notetype.is_empty() { + write!(self.sql, "false").unwrap(); + return Ok(()); + } + + let val = to_sql(val); + let val = without_combining(&val); + self.args.push(val.into()); + let arg_idx = self.args.len(); + let field_idx_str = format!("' || ?{arg_idx} || '"); + let other_idx_str = "%".to_string(); + + let notetype_clause = |ctx: &FieldQualifiedSearchContext| -> String { + let field_index_clause = |range: &Range| { + let f = (0..ctx.total_fields_in_note) + .filter_map(|i| { + if i as u32 == range.start { + Some(&field_idx_str) + } else if range.contains(&(i as u32)) { + None + } else { + Some(&other_idx_str) + } + }) + .join("\x1f"); + format!( + "coalesce(process_text(n.flds, {}), n.flds) like '{f}' escape '\\'", + ProcessTextFlags::NoCombining.bits() + ) + }; + + let all_field_clauses = ctx + .field_ranges_to_search + .iter() + .map(field_index_clause) + .join(" or "); + format!("(n.mid = {mid} and ({all_field_clauses}))", mid = ctx.ntid) + }; + let all_notetype_clauses = field_indicies_by_notetype + .iter() + .map(notetype_clause) + .join(" or "); + write!(self.sql, "({all_notetype_clauses})").unwrap(); + + Ok(()) + } + fn write_single_field_regexp(&mut self, field_name: &str, val: &str) -> Result<()> { let field_indicies_by_notetype = self.fields_indices_by_notetype(field_name)?; if field_indicies_by_notetype.is_empty() { @@ -1116,6 +1171,20 @@ mod test { vec!["(?i)te.*st".into()] ) ); + // field search with no-combine + assert_eq!( + s(ctx, "front:nc:frânçais"), + ( + concat!( + "(((n.mid = 1581236385344 and (coalesce(process_text(n.flds, 1), n.flds) like '' || ?1 || '\u{1f}%' escape '\\')) or ", + "(n.mid = 1581236385345 and (coalesce(process_text(n.flds, 1), n.flds) like '' || ?1 || '\u{1f}%\u{1f}%' escape '\\')) or ", + "(n.mid = 1581236385346 and (coalesce(process_text(n.flds, 1), n.flds) like '' || ?1 || '\u{1f}%' escape '\\')) or ", + "(n.mid = 1581236385347 and (coalesce(process_text(n.flds, 1), n.flds) like '' || ?1 || '\u{1f}%' escape '\\'))))" + ) + .into(), + vec!["francais".into()] + ) + ); // all field search assert_eq!( s(ctx, "*:te*st"), diff --git a/rslib/src/search/writer.rs b/rslib/src/search/writer.rs index 3bbe6fd0a..68d05c66d 100644 --- a/rslib/src/search/writer.rs +++ b/rslib/src/search/writer.rs @@ -9,6 +9,7 @@ use regex::Regex; use crate::notetype::NotetypeId as NotetypeIdType; use crate::prelude::*; use crate::search::parser::parse; +use crate::search::parser::FieldSearchMode; use crate::search::parser::Node; use crate::search::parser::PropertyKind; use crate::search::parser::RatingKind; @@ -69,7 +70,7 @@ fn write_search_node(node: &SearchNode) -> String { use SearchNode::*; match node { UnqualifiedText(s) => maybe_quote(&s.replace(':', "\\:")), - SingleField { field, text, is_re } => write_single_field(field, text, *is_re), + SingleField { field, text, mode } => write_single_field(field, text, *mode), AddedInDays(u) => format!("added:{u}"), EditedInDays(u) => format!("edited:{u}"), IntroducedInDays(u) => format!("introduced:{u}"), @@ -81,7 +82,7 @@ fn write_search_node(node: &SearchNode) -> String { NotetypeId(NotetypeIdType(i)) => format!("mid:{i}"), Notetype(s) => maybe_quote(&format!("note:{s}")), Rated { days, ease } => write_rated(days, ease), - Tag { tag, is_re } => write_single_field("tag", tag, *is_re), + Tag { tag, mode } => write_single_field("tag", tag, *mode), Duplicates { notetype_id, text } => write_dupe(notetype_id, text), State(k) => write_state(k), Flag(u) => format!("flag:{u}"), @@ -116,14 +117,25 @@ fn needs_quotation(txt: &str) -> bool { } /// Also used by tag search, which has the same syntax. -fn write_single_field(field: &str, text: &str, is_re: bool) -> String { - let re = if is_re { "re:" } else { "" }; - let text = if !is_re && text.starts_with("re:") { +fn write_single_field(field: &str, text: &str, mode: FieldSearchMode) -> String { + let prefix = match mode { + FieldSearchMode::Normal => "", + FieldSearchMode::Regex => "re:", + FieldSearchMode::NoCombining => "nc:", + }; + let text = if mode == FieldSearchMode::Normal + && (text.starts_with("re:") || text.starts_with("nc:")) + { text.replacen(':', "\\:", 1) } else { text.to_string() }; - maybe_quote(&format!("{}:{}{}", field.replace(':', "\\:"), re, &text)) + maybe_quote(&format!( + "{}:{}{}", + field.replace(':', "\\:"), + prefix, + &text + )) } fn write_template(template: &TemplateKind) -> String {