Add option to exclude fields from search (#2394)

* Add option to exclude fields from unqualified searches * Use temp tables instead This is slightly faster according to my (very rough) tests. * Make query a bit more readable * exclude_from_search -> excludeFromSearch * Remove superfluous notetypes table from query * Rework to use field search logic Thanks to Rumo for the suggestion: https://github.com/ankitects/anki/pull/2394#issuecomment-1446702402 * Exclude fields from field searches too * Fix error on notetypes with no included fields * Add back the exclude_fields function This approach seems to perform better on average than the previously benchmarked ones. * Use pure-SQL approach to excluding fields * Change single field search to use new approach * Fix flawed any_excluded/sortf_excluded logic * Support field exclusion in the nc operator Also fix search text being wrapped in % in the any_excluded=true case. * Support field exclusion in the re and w operators * Label field exclusion as being slower * Unqualified search should be wrapped in % in all cases I was under the impression that it shouldn't be wrapped with the new field exclusion logic. * Remove unnecessary .collect() * Refactor some complex return types into structs * Do not exclude fields in field searches * Add a test and docstring for CollectRanges * Avoid destructuring in closures * Remove the exclude_fields function Minor wording tweaks by dae: * num_fields -> total_fields_in_note * fields -> field_ranges_to_search * fields -> fields_to_search * SingleField -> FieldQualified * mid -> ntid
2025-09-24 08:46:37 -04:00 · 2023-03-20 00:46:03 +03:00 · 2023-03-20 00:46:03 +03:00 · 946eb46813
commit 946eb46813
parent fdfa33fea4
7 changed files with 346 additions and 67 deletions
--- a/ftl/core/fields.ftl
+++ b/ftl/core/fields.ftl
@ -17,3 +17,4 @@ fields-sort-by-this-field-in-the = Sort by this field in the browser
 fields-that-field-name-is-already-used = That field name is already used.
 fields-name-first-letter-not-valid = The field name should not start with #, ^ or /.
 fields-name-invalid-letter = The field name should not contain :, ", { "{" } or { "}" }.
+fields-exclude-from-search = Exclude from unqualified searches (slower)
--- a/proto/anki/notetypes.proto
+++ b/proto/anki/notetypes.proto
@ -74,6 +74,7 @@ message Notetype {
      string description = 5;
      bool plain_text = 6;
      bool collapsed = 7;
+      bool exclude_from_search = 8;

      bytes other = 255;
    }
--- a/qt/aqt/fields.py
+++ b/qt/aqt/fields.py
@ -246,6 +246,7 @@ class FieldDialog(QDialog):
        f.rtl.setChecked(fld["rtl"])
        f.plainTextByDefault.setChecked(fld["plainText"])
        f.collapseByDefault.setChecked(fld["collapsed"])
+        f.excludeFromSearch.setChecked(fld["excludeFromSearch"])
        f.fieldDescription.setText(fld.get("description", ""))

    def saveField(self) -> None:
@ -274,6 +275,11 @@ class FieldDialog(QDialog):
        collapsed = f.collapseByDefault.isChecked()
        if fld["collapsed"] != collapsed:
            fld["collapsed"] = collapsed
+            self.change_tracker.mark_basic()
+        exclude_from_search = f.excludeFromSearch.isChecked()
+        if fld["excludeFromSearch"] != exclude_from_search:
+            fld["excludeFromSearch"] = exclude_from_search
+            self.change_tracker.mark_basic()
        desc = f.fieldDescription.text()
        if fld.get("description", "") != desc:
            fld["description"] = desc
--- a/qt/aqt/forms/fields.ui
+++ b/qt/aqt/forms/fields.ui
@ -172,6 +172,16 @@
       </property>
      </widget>
     </item>
+     <item row="6" column="1" alignment="Qt::AlignLeft">
+      <widget class="QCheckBox" name="excludeFromSearch">
+       <property name="enabled">
+        <bool>true</bool>
+       </property>
+       <property name="text">
+        <string>fields_exclude_from_search</string>
+       </property>
+      </widget>
+     </item>
    </layout>
   </item>
   <item>
--- a/rslib/src/notetype/fields.rs
+++ b/rslib/src/notetype/fields.rs
@ -46,6 +46,7 @@ impl NoteField {
                font_size: 20,
                description: "".into(),
                collapsed: false,
+                exclude_from_search: false,
                other: vec![],
            },
        }
--- a/rslib/src/notetype/schema11.rs
+++ b/rslib/src/notetype/schema11.rs
@ -167,7 +167,7 @@ impl From<Notetype> for NotetypeSchema11 {

 /// See [crate::deckconfig::schema11::clear_other_duplicates()].
 fn clear_other_field_duplicates(other: &mut HashMap<String, Value>) {
-    for key in &["description", "plainText", "collapsed"] {
+    for key in &["description", "plainText", "collapsed", "excludeFromSearch"] {
        other.remove(*key);
    }
 }
@ -224,6 +224,9 @@ pub struct NoteFieldSchema11 {
    #[serde(default, deserialize_with = "default_on_invalid")]
    pub(crate) collapsed: bool,

+    #[serde(default, deserialize_with = "default_on_invalid")]
+    pub(crate) exclude_from_search: bool,
+
    #[serde(flatten)]
    pub(crate) other: HashMap<String, Value>,
 }
@ -240,6 +243,7 @@ impl Default for NoteFieldSchema11 {
            size: 20,
            description: String::new(),
            collapsed: false,
+            exclude_from_search: false,
            other: Default::default(),
        }
    }
@ -258,6 +262,7 @@ impl From<NoteFieldSchema11> for NoteField {
                font_size: f.size as u32,
                description: f.description,
                collapsed: f.collapsed,
+                exclude_from_search: f.exclude_from_search,
                other: other_to_bytes(&f.other),
            },
        }
@ -281,6 +286,7 @@ impl From<NoteField> for NoteFieldSchema11 {
            size: conf.font_size as u16,
            description: conf.description,
            collapsed: conf.collapsed,
+            exclude_from_search: conf.exclude_from_search,
            other,
        }
    }
--- a/rslib/src/search/sqlwriter.rs
+++ b/rslib/src/search/sqlwriter.rs
@ -3,6 +3,7 @@

 use std::borrow::Cow;
 use std::fmt::Write;
+use std::ops::Range;

 use itertools::Itertools;

@ -130,11 +131,10 @@ impl SqlWriter<'_> {
            // note fields related
            SearchNode::UnqualifiedText(text) => {
                let text = &self.norm_note(text);
-                if self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch) {
-                    self.write_no_combining(text)
-                } else {
-                    self.write_unqualified(text)
-                }
+                self.write_unqualified(
+                    text,
+                    self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch),
+                )?
            }
            SearchNode::SingleField { field, text, is_re } => {
                self.write_field(&norm(field), &self.norm_note(text), *is_re)?
@ -142,9 +142,9 @@ impl SqlWriter<'_> {
            SearchNode::Duplicates { notetype_id, text } => {
                self.write_dupe(*notetype_id, &self.norm_note(text))?
            }
-            SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
-            SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
-            SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
+            SearchNode::Regex(re) => self.write_regex(&self.norm_note(re), false)?,
+            SearchNode::NoCombining(text) => self.write_unqualified(&self.norm_note(text), true)?,
+            SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text))?,

            // other
            SearchNode::AddedInDays(days) => self.write_added(*days)?,
@ -184,30 +184,76 @@ impl SqlWriter<'_> {
        Ok(())
    }

-    fn write_unqualified(&mut self, text: &str) {
+    fn write_unqualified(&mut self, text: &str, no_combining: bool) -> Result<()> {
+        let text = to_sql(text);
+        let text = if no_combining {
+            without_combining(&text)
+        } else {
+            text
+        };
        // implicitly wrap in %
-        let text = format!("%{}%", &to_sql(text));
+        let text = format!("%{}%", text);
        self.args.push(text);
-        write!(
-            self.sql,
-            "(n.sfld like ?{n} escape '\\' or n.flds like ?{n} escape '\\')",
-            n = self.args.len(),
-        )
-        .unwrap();
-    }
+        let arg_idx = self.args.len();

-    fn write_no_combining(&mut self, text: &str) {
-        let text = format!("%{}%", without_combining(&to_sql(text)));
-        self.args.push(text);
-        write!(
-            self.sql,
-            concat!(
-                "(coalesce(without_combining(cast(n.sfld as text)), n.sfld) like ?{n} escape '\\' ",
-                "or coalesce(without_combining(n.flds), n.flds) like ?{n} escape '\\')"
-            ),
-            n = self.args.len(),
-        )
-        .unwrap();
+        let sfld_expr = if no_combining {
+            "coalesce(without_combining(cast(n.sfld as text)), n.sfld)"
+        } else {
+            "n.sfld"
+        };
+        let flds_expr = if no_combining {
+            "coalesce(without_combining(n.flds), n.flds)"
+        } else {
+            "n.flds"
+        };
+
+        if let Some(field_indicies_by_notetype) = self.included_fields_by_notetype()? {
+            let field_idx_str = format!("' || ?{arg_idx} || '");
+            let other_idx_str = "%".to_string();
+
+            let notetype_clause = |ctx: &UnqualifiedSearchContext| -> String {
+                let field_index_clause = |range: &Range<u32>| {
+                    let f = (0..ctx.total_fields_in_note)
+                        .filter_map(|i| {
+                            if i as u32 == range.start {
+                                Some(&field_idx_str)
+                            } else if range.contains(&(i as u32)) {
+                                None
+                            } else {
+                                Some(&other_idx_str)
+                            }
+                        })
+                        .join("\x1f");
+                    format!("{flds_expr} like '{f}' escape '\\'")
+                };
+                let mut all_field_clauses: Vec<String> = ctx
+                    .field_ranges_to_search
+                    .iter()
+                    .map(field_index_clause)
+                    .collect();
+                if !ctx.sortf_excluded {
+                    all_field_clauses.push(format!("{sfld_expr} like ?{arg_idx} escape '\\'"));
+                }
+                format!(
+                    "(n.mid = {mid} and ({all_field_clauses}))",
+                    mid = ctx.ntid,
+                    all_field_clauses = all_field_clauses.join(" or ")
+                )
+            };
+            let all_notetype_clauses = field_indicies_by_notetype
+                .iter()
+                .map(notetype_clause)
+                .join(" or ");
+            write!(self.sql, "({all_notetype_clauses})").unwrap();
+        } else {
+            write!(
+                self.sql,
+                "({sfld_expr} like ?{arg_idx} escape '\\' or {flds_expr} like ?{arg_idx} escape '\\')"
+            )
+            .unwrap();
+        }
+
+        Ok(())
    }

    fn write_tag(&mut self, tag: &str, is_re: bool) {
@ -490,7 +536,8 @@ impl SqlWriter<'_> {
    }

    fn write_single_field(&mut self, field_name: &str, val: &str) -> Result<()> {
-        let field_indicies_by_notetype = self.fields_indices_by_notetype(field_name)?;
+        let field_indicies_by_notetype =
+            self.num_fields_and_fields_indices_by_notetype(field_name)?;
        if field_indicies_by_notetype.is_empty() {
            write!(self.sql, "false").unwrap();
            return Ok(());
@ -498,12 +545,31 @@ impl SqlWriter<'_> {

        self.args.push(to_sql(val).into());
        let arg_idx = self.args.len();
+        let field_idx_str = format!("' || ?{arg_idx} || '");
+        let other_idx_str = "%".to_string();

-        let notetype_clause = |(mid, fields): &(NotetypeId, Vec<u32>)| -> String {
-            let field_index_clause =
-                |ord| format!("field_at_index(n.flds, {ord}) like ?{arg_idx} escape '\\'",);
-            let all_field_clauses = fields.iter().map(field_index_clause).join(" or ");
-            format!("(n.mid = {mid} and ({all_field_clauses}))",)
+        let notetype_clause = |ctx: &FieldQualifiedSearchContext| -> String {
+            let field_index_clause = |range: &Range<u32>| {
+                let f = (0..ctx.total_fields_in_note)
+                    .filter_map(|i| {
+                        if i as u32 == range.start {
+                            Some(&field_idx_str)
+                        } else if range.contains(&(i as u32)) {
+                            None
+                        } else {
+                            Some(&other_idx_str)
+                        }
+                    })
+                    .join("\x1f");
+                format!("n.flds like '{f}' escape '\\'")
+            };
+
+            let all_field_clauses = ctx
+                .field_ranges_to_search
+                .iter()
+                .map(field_index_clause)
+                .join(" or ");
+            format!("(n.mid = {mid} and ({all_field_clauses}))", mid = ctx.ntid)
        };
        let all_notetype_clauses = field_indicies_by_notetype
            .iter()
@ -514,6 +580,37 @@ impl SqlWriter<'_> {
        Ok(())
    }

+    fn num_fields_and_fields_indices_by_notetype(
+        &mut self,
+        field_name: &str,
+    ) -> Result<Vec<FieldQualifiedSearchContext>> {
+        let notetypes = self.col.get_all_notetypes()?;
+        let matches_glob = glob_matcher(field_name);
+
+        let mut field_map = vec![];
+        for nt in notetypes.values() {
+            let matched_fields = nt
+                .fields
+                .iter()
+                .filter_map(|field| {
+                    matches_glob(&field.name).then(|| field.ord.unwrap_or_default())
+                })
+                .collect_ranges();
+            if !matched_fields.is_empty() {
+                field_map.push(FieldQualifiedSearchContext {
+                    ntid: nt.id,
+                    total_fields_in_note: nt.fields.len(),
+                    field_ranges_to_search: matched_fields,
+                });
+            }
+        }
+
+        // for now, sort the map for the benefit of unit tests
+        field_map.sort_by_key(|v| v.ntid);
+
+        Ok(field_map)
+    }
+
    fn fields_indices_by_notetype(
        &mut self,
        field_name: &str,
@ -523,12 +620,13 @@ impl SqlWriter<'_> {

        let mut field_map = vec![];
        for nt in notetypes.values() {
-            let mut matched_fields = vec![];
-            for field in &nt.fields {
-                if matches_glob(&field.name) {
-                    matched_fields.push(field.ord.unwrap_or_default());
-                }
-            }
+            let matched_fields: Vec<u32> = nt
+                .fields
+                .iter()
+                .filter_map(|field| {
+                    matches_glob(&field.name).then(|| field.ord.unwrap_or_default())
+                })
+                .collect();
            if !matched_fields.is_empty() {
                field_map.push((nt.id, matched_fields));
            }
@ -540,6 +638,68 @@ impl SqlWriter<'_> {
        Ok(field_map)
    }

+    fn included_fields_by_notetype(&mut self) -> Result<Option<Vec<UnqualifiedSearchContext>>> {
+        let notetypes = self.col.get_all_notetypes()?;
+        let mut any_excluded = false;
+        let mut field_map = vec![];
+        for nt in notetypes.values() {
+            let mut sortf_excluded = false;
+            let matched_fields = nt
+                .fields
+                .iter()
+                .filter_map(|field| {
+                    let ord = field.ord.unwrap_or_default();
+                    if field.config.exclude_from_search {
+                        any_excluded = true;
+                        sortf_excluded |= ord == nt.config.sort_field_idx;
+                    }
+                    (!field.config.exclude_from_search).then_some(ord)
+                })
+                .collect_ranges();
+            if !matched_fields.is_empty() {
+                field_map.push(UnqualifiedSearchContext {
+                    ntid: nt.id,
+                    total_fields_in_note: nt.fields.len(),
+                    sortf_excluded,
+                    field_ranges_to_search: matched_fields,
+                });
+            }
+        }
+        if any_excluded {
+            Ok(Some(field_map))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn included_fields_for_unqualified_regex(
+        &mut self,
+    ) -> Result<Option<Vec<UnqualifiedRegexSearchContext>>> {
+        let notetypes = self.col.get_all_notetypes()?;
+        let mut any_excluded = false;
+        let mut field_map = vec![];
+        for nt in notetypes.values() {
+            let matched_fields: Vec<u32> = nt
+                .fields
+                .iter()
+                .filter_map(|field| {
+                    any_excluded |= field.config.exclude_from_search;
+                    (!field.config.exclude_from_search).then_some(field.ord.unwrap_or_default())
+                })
+                .collect();
+            field_map.push(UnqualifiedRegexSearchContext {
+                ntid: nt.id,
+                total_fields_in_note: nt.fields.len(),
+                fields_to_search: matched_fields,
+            });
+        }
+        if any_excluded {
+            Ok(Some(field_map))
+        } else {
+            Ok(None)
+        }
+    }
+
    fn write_dupe(&mut self, ntid: NotetypeId, text: &str) -> Result<()> {
        let text_nohtml = strip_html_preserving_media_filenames(text);
        let csum = field_checksum(text_nohtml.as_ref());
@ -598,25 +758,48 @@ impl SqlWriter<'_> {
        Ok(())
    }

-    fn write_regex(&mut self, word: &str) {
-        self.sql.push_str("n.flds regexp ?");
-        self.args.push(format!(r"(?i){}", word));
-    }
-
-    fn write_regex_nc(&mut self, word: &str) {
-        let word = &without_combining(word);
-        self.sql
-            .push_str("coalesce(without_combining(n.flds), n.flds) regexp ?");
-        self.args.push(format!(r"(?i){}", word));
-    }
-
-    fn write_word_boundary(&mut self, word: &str) {
-        let re = format!(r"\b{}\b", to_re(word));
-        if self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch) {
-            self.write_regex_nc(&re);
+    fn write_regex(&mut self, word: &str, no_combining: bool) -> Result<()> {
+        let flds_expr = if no_combining {
+            "coalesce(without_combining(n.flds), n.flds)"
        } else {
-            self.write_regex(&re);
+            "n.flds"
+        };
+        let word = if no_combining {
+            without_combining(word)
+        } else {
+            std::borrow::Cow::Borrowed(word)
+        };
+        self.args.push(format!(r"(?i){}", word));
+        let arg_idx = self.args.len();
+        if let Some(field_indices_by_notetype) = self.included_fields_for_unqualified_regex()? {
+            let notetype_clause = |ctx: &UnqualifiedRegexSearchContext| -> String {
+                let clause = if ctx.fields_to_search.len() == ctx.total_fields_in_note {
+                    format!("{flds_expr} regexp ?{arg_idx}")
+                } else {
+                    let indices = ctx.fields_to_search.iter().join(",");
+                    format!("regexp_fields(?{arg_idx}, {flds_expr}, {indices})")
+                };
+
+                format!("(n.mid = {mid} and {clause})", mid = ctx.ntid)
+            };
+            let all_notetype_clauses = field_indices_by_notetype
+                .iter()
+                .map(notetype_clause)
+                .join(" or ");
+            write!(self.sql, "({all_notetype_clauses})").unwrap();
+        } else {
+            write!(self.sql, "{flds_expr} regexp ?{arg_idx}").unwrap();
        }
+
+        Ok(())
+    }
+
+    fn write_word_boundary(&mut self, word: &str) -> Result<()> {
+        let re = format!(r"\b{}\b", to_re(word));
+        self.write_regex(
+            &re,
+            self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch),
+        )
    }
 }

@ -646,6 +829,68 @@ impl RequiredTable {
    }
 }

+/// Given a list of numbers, create one or more ranges, collapsing
+/// contiguous numbers.
+trait CollectRanges {
+    type Item;
+    fn collect_ranges(self) -> Vec<Range<Self::Item>>;
+}
+
+impl<
+        Idx: Copy + PartialOrd + std::ops::Add<Idx, Output = Idx> + From<u8>,
+        I: IntoIterator<Item = Idx>,
+    > CollectRanges for I
+{
+    type Item = Idx;
+
+    fn collect_ranges(self) -> Vec<Range<Self::Item>> {
+        let mut result = Vec::new();
+        let mut iter = self.into_iter();
+        let next = iter.next();
+        if next.is_none() {
+            return result;
+        }
+        let mut start = next.unwrap();
+        let mut end = next.unwrap();
+
+        for i in iter {
+            if i == end + 1.into() {
+                end = end + 1.into();
+            } else {
+                result.push(start..end + 1.into());
+                start = i;
+                end = i;
+            }
+        }
+        result.push(start..end + 1.into());
+
+        result
+    }
+}
+
+struct FieldQualifiedSearchContext {
+    ntid: NotetypeId,
+    total_fields_in_note: usize,
+    /// This may include more than one field in the case the user
+    /// has searched with a wildcard, eg f*:foo.
+    field_ranges_to_search: Vec<Range<u32>>,
+}
+
+struct UnqualifiedSearchContext {
+    ntid: NotetypeId,
+    total_fields_in_note: usize,
+    sortf_excluded: bool,
+    field_ranges_to_search: Vec<Range<u32>>,
+}
+
+struct UnqualifiedRegexSearchContext {
+    ntid: NotetypeId,
+    total_fields_in_note: usize,
+    /// Unlike the other contexts, this contains each individual index
+    /// instead of a list of ranges.
+    fields_to_search: Vec<u32>,
+}
+
 impl Node {
    fn required_table(&self) -> RequiredTable {
        match self {
@ -740,10 +985,10 @@ mod test {
            s(ctx, "front:te*st"),
            (
                concat!(
-                    "(((n.mid = 1581236385344 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
-                    "(n.mid = 1581236385345 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
-                    "(n.mid = 1581236385346 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
-                    "(n.mid = 1581236385347 and (field_at_index(n.flds, 0) like ?1 escape '\\'))))"
+                    "(((n.mid = 1581236385344 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\')) or ",
+                    "(n.mid = 1581236385345 and (n.flds like '' || ?1 || '\u{1f}%\u{1f}%' escape '\\')) or ",
+                    "(n.mid = 1581236385346 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\')) or ",
+                    "(n.mid = 1581236385347 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\'))))"
                )
                .into(),
                vec!["te%st".into()]
@ -941,22 +1186,25 @@ mod test {
        // regex
        assert_eq!(
            s(ctx, r"re:\bone"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\bone".into()])
+            ("(n.flds regexp ?1)".into(), vec![r"(?i)\bone".into()])
        );

        // word boundary
        assert_eq!(
            s(ctx, r"w:foo"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\bfoo\b".into()])
+            ("(n.flds regexp ?1)".into(), vec![r"(?i)\bfoo\b".into()])
        );
        assert_eq!(
            s(ctx, r"w:*foo"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\b.*foo\b".into()])
+            ("(n.flds regexp ?1)".into(), vec![r"(?i)\b.*foo\b".into()])
        );

        assert_eq!(
            s(ctx, r"w:*fo_o*"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\b.*fo.o.*\b".into()])
+            (
+                "(n.flds regexp ?1)".into(),
+                vec![r"(?i)\b.*fo.o.*\b".into()]
+            )
        );
    }

@ -991,4 +1239,10 @@ mod test {
            RequiredTable::Notes
        );
    }
+
+    #[test]
+    fn ranges() {
+        assert_eq!([1, 2, 3].collect_ranges(), [1..4]);
+        assert_eq!([1, 3, 4].collect_ranges(), [1..2, 3..5]);
+    }
 }