Add option to exclude fields from search (#2394)

* Add option to exclude fields from unqualified searches * Use temp tables instead This is slightly faster according to my (very rough) tests. * Make query a bit more readable * exclude_from_search -> excludeFromSearch * Remove superfluous notetypes table from query * Rework to use field search logic Thanks to Rumo for the suggestion: https://github.com/ankitects/anki/pull/2394#issuecomment-1446702402 * Exclude fields from field searches too * Fix error on notetypes with no included fields * Add back the exclude_fields function This approach seems to perform better on average than the previously benchmarked ones. * Use pure-SQL approach to excluding fields * Change single field search to use new approach * Fix flawed any_excluded/sortf_excluded logic * Support field exclusion in the nc operator Also fix search text being wrapped in % in the any_excluded=true case. * Support field exclusion in the re and w operators * Label field exclusion as being slower * Unqualified search should be wrapped in % in all cases I was under the impression that it shouldn't be wrapped with the new field exclusion logic. * Remove unnecessary .collect() * Refactor some complex return types into structs * Do not exclude fields in field searches * Add a test and docstring for CollectRanges * Avoid destructuring in closures * Remove the exclude_fields function Minor wording tweaks by dae: * num_fields -> total_fields_in_note * fields -> field_ranges_to_search * fields -> fields_to_search * SingleField -> FieldQualified * mid -> ntid
2025-12-15 07:40:58 -05:00 · 2023-03-20 00:46:03 +03:00 · 2023-03-20 00:46:03 +03:00 · 946eb46813
commit 946eb46813
parent fdfa33fea4
7 changed files with 346 additions and 67 deletions
--- a/ftl/core/fields.ftl
+++ b/ftl/core/fields.ftl
@ -17,3 +17,4 @@ fields-sort-by-this-field-in-the = Sort by this field in the browser
 fields-that-field-name-is-already-used = That field name is already used.
 fields-name-first-letter-not-valid = The field name should not start with #, ^ or /.
 fields-name-invalid-letter = The field name should not contain :, ", { "{" } or { "}" }.
 fields-exclude-from-search = Exclude from unqualified searches (slower)
--- a/proto/anki/notetypes.proto
+++ b/proto/anki/notetypes.proto
@ -74,6 +74,7 @@ message Notetype {
      string description = 5;
      bool plain_text = 6;
      bool collapsed = 7;
      bool exclude_from_search = 8;
      bytes other = 255;
    }
--- a/qt/aqt/fields.py
+++ b/qt/aqt/fields.py
@ -246,6 +246,7 @@ class FieldDialog(QDialog):
        f.rtl.setChecked(fld["rtl"])
        f.plainTextByDefault.setChecked(fld["plainText"])
        f.collapseByDefault.setChecked(fld["collapsed"])
        f.excludeFromSearch.setChecked(fld["excludeFromSearch"])
        f.fieldDescription.setText(fld.get("description", ""))
    def saveField(self) -> None:
@ -274,6 +275,11 @@ class FieldDialog(QDialog):
        collapsed = f.collapseByDefault.isChecked()
        if fld["collapsed"] != collapsed:
            fld["collapsed"] = collapsed
            self.change_tracker.mark_basic()
        exclude_from_search = f.excludeFromSearch.isChecked()
        if fld["excludeFromSearch"] != exclude_from_search:
            fld["excludeFromSearch"] = exclude_from_search
            self.change_tracker.mark_basic()
        desc = f.fieldDescription.text()
        if fld.get("description", "") != desc:
            fld["description"] = desc
--- a/qt/aqt/forms/fields.ui
+++ b/qt/aqt/forms/fields.ui
@ -172,6 +172,16 @@
       </property>
      </widget>
     </item>
     <item row="6" column="1" alignment="Qt::AlignLeft">
      <widget class="QCheckBox" name="excludeFromSearch">
       <property name="enabled">
        <bool>true</bool>
       </property>
       <property name="text">
        <string>fields_exclude_from_search</string>
       </property>
      </widget>
     </item>
    </layout>
   </item>
   <item>
--- a/rslib/src/notetype/fields.rs
+++ b/rslib/src/notetype/fields.rs
@ -46,6 +46,7 @@ impl NoteField {
                font_size: 20,
                description: "".into(),
                collapsed: false,
                exclude_from_search: false,
                other: vec![],
            },
        }
--- a/rslib/src/notetype/schema11.rs
+++ b/rslib/src/notetype/schema11.rs
@ -167,7 +167,7 @@ impl From<Notetype> for NotetypeSchema11 {
 /// See [crate::deckconfig::schema11::clear_other_duplicates()].
 fn clear_other_field_duplicates(other: &mut HashMap<String, Value>) {
-    for key in &["description", "plainText", "collapsed"] {
+    for key in &["description", "plainText", "collapsed", "excludeFromSearch"] {
        other.remove(*key);
    }
 }
@ -224,6 +224,9 @@ pub struct NoteFieldSchema11 {
    #[serde(default, deserialize_with = "default_on_invalid")]
    pub(crate) collapsed: bool,
    #[serde(default, deserialize_with = "default_on_invalid")]
    pub(crate) exclude_from_search: bool,
    #[serde(flatten)]
    pub(crate) other: HashMap<String, Value>,
 }
@ -240,6 +243,7 @@ impl Default for NoteFieldSchema11 {
            size: 20,
            description: String::new(),
            collapsed: false,
            exclude_from_search: false,
            other: Default::default(),
        }
    }
@ -258,6 +262,7 @@ impl From<NoteFieldSchema11> for NoteField {
                font_size: f.size as u32,
                description: f.description,
                collapsed: f.collapsed,
                exclude_from_search: f.exclude_from_search,
                other: other_to_bytes(&f.other),
            },
        }
@ -281,6 +286,7 @@ impl From<NoteField> for NoteFieldSchema11 {
            size: conf.font_size as u16,
            description: conf.description,
            collapsed: conf.collapsed,
            exclude_from_search: conf.exclude_from_search,
            other,
        }
    }
--- a/rslib/src/search/sqlwriter.rs
+++ b/rslib/src/search/sqlwriter.rs
@ -3,6 +3,7 @@
 use std::borrow::Cow;
 use std::fmt::Write;
 use std::ops::Range;
 use itertools::Itertools;
@ -130,11 +131,10 @@ impl SqlWriter<'_> {
            // note fields related
            SearchNode::UnqualifiedText(text) => {
                let text = &self.norm_note(text);
-                if self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch) {
+                self.write_unqualified(
-                    self.write_no_combining(text)
+                    text,
-                } else {
+                    self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch),
-                    self.write_unqualified(text)
+                )?
                }
            }
            SearchNode::SingleField { field, text, is_re } => {
                self.write_field(&norm(field), &self.norm_note(text), *is_re)?
@ -142,9 +142,9 @@ impl SqlWriter<'_> {
            SearchNode::Duplicates { notetype_id, text } => {
                self.write_dupe(*notetype_id, &self.norm_note(text))?
            }
-            SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
+            SearchNode::Regex(re) => self.write_regex(&self.norm_note(re), false)?,
-            SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
+            SearchNode::NoCombining(text) => self.write_unqualified(&self.norm_note(text), true)?,
-            SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
+            SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text))?,
            // other
            SearchNode::AddedInDays(days) => self.write_added(*days)?,
@ -184,30 +184,76 @@ impl SqlWriter<'_> {
        Ok(())
    }
-    fn write_unqualified(&mut self, text: &str) {
+    fn write_unqualified(&mut self, text: &str, no_combining: bool) -> Result<()> {
        let text = to_sql(text);
        let text = if no_combining {
            without_combining(&text)
        } else {
            text
        };
        // implicitly wrap in %
-        let text = format!("%{}%", &to_sql(text));
+        let text = format!("%{}%", text);
        self.args.push(text);
-        write!(
+        let arg_idx = self.args.len();
            self.sql,
            "(n.sfld like ?{n} escape '\\' or n.flds like ?{n} escape '\\')",
            n = self.args.len(),
        )
        .unwrap();
    }
-    fn write_no_combining(&mut self, text: &str) {
+        let sfld_expr = if no_combining {
-        let text = format!("%{}%", without_combining(&to_sql(text)));
+            "coalesce(without_combining(cast(n.sfld as text)), n.sfld)"
-        self.args.push(text);
+        } else {
-        write!(
+            "n.sfld"
-            self.sql,
+        };
-            concat!(
+        let flds_expr = if no_combining {
-                "(coalesce(without_combining(cast(n.sfld as text)), n.sfld) like ?{n} escape '\\' ",
+            "coalesce(without_combining(n.flds), n.flds)"
-                "or coalesce(without_combining(n.flds), n.flds) like ?{n} escape '\\')"
+        } else {
-            ),
+            "n.flds"
-            n = self.args.len(),
+        };
-        )
+
-        .unwrap();
+        if let Some(field_indicies_by_notetype) = self.included_fields_by_notetype()? {
            let field_idx_str = format!("' || ?{arg_idx} || '");
            let other_idx_str = "%".to_string();
            let notetype_clause = |ctx: &UnqualifiedSearchContext| -> String {
                let field_index_clause = |range: &Range<u32>| {
                    let f = (0..ctx.total_fields_in_note)
                        .filter_map(|i| {
                            if i as u32 == range.start {
                                Some(&field_idx_str)
                            } else if range.contains(&(i as u32)) {
                                None
                            } else {
                                Some(&other_idx_str)
                            }
                        })
                        .join("\x1f");
                    format!("{flds_expr} like '{f}' escape '\\'")
                };
                let mut all_field_clauses: Vec<String> = ctx
                    .field_ranges_to_search
                    .iter()
                    .map(field_index_clause)
                    .collect();
                if !ctx.sortf_excluded {
                    all_field_clauses.push(format!("{sfld_expr} like ?{arg_idx} escape '\\'"));
                }
                format!(
                    "(n.mid = {mid} and ({all_field_clauses}))",
                    mid = ctx.ntid,
                    all_field_clauses = all_field_clauses.join(" or ")
                )
            };
            let all_notetype_clauses = field_indicies_by_notetype
                .iter()
                .map(notetype_clause)
                .join(" or ");
            write!(self.sql, "({all_notetype_clauses})").unwrap();
        } else {
            write!(
                self.sql,
                "({sfld_expr} like ?{arg_idx} escape '\\' or {flds_expr} like ?{arg_idx} escape '\\')"
            )
            .unwrap();
        }
        Ok(())
    }
    fn write_tag(&mut self, tag: &str, is_re: bool) {
@ -490,7 +536,8 @@ impl SqlWriter<'_> {
    }
    fn write_single_field(&mut self, field_name: &str, val: &str) -> Result<()> {
-        let field_indicies_by_notetype = self.fields_indices_by_notetype(field_name)?;
+        let field_indicies_by_notetype =
            self.num_fields_and_fields_indices_by_notetype(field_name)?;
        if field_indicies_by_notetype.is_empty() {
            write!(self.sql, "false").unwrap();
            return Ok(());
@ -498,12 +545,31 @@ impl SqlWriter<'_> {
        self.args.push(to_sql(val).into());
        let arg_idx = self.args.len();
        let field_idx_str = format!("' || ?{arg_idx} || '");
        let other_idx_str = "%".to_string();
-        let notetype_clause = |(mid, fields): &(NotetypeId, Vec<u32>)| -> String {
+        let notetype_clause = |ctx: &FieldQualifiedSearchContext| -> String {
-            let field_index_clause =
+            let field_index_clause = |range: &Range<u32>| {
-                |ord| format!("field_at_index(n.flds, {ord}) like ?{arg_idx} escape '\\'",);
+                let f = (0..ctx.total_fields_in_note)
-            let all_field_clauses = fields.iter().map(field_index_clause).join(" or ");
+                    .filter_map(|i| {
-            format!("(n.mid = {mid} and ({all_field_clauses}))",)
+                        if i as u32 == range.start {
                            Some(&field_idx_str)
                        } else if range.contains(&(i as u32)) {
                            None
                        } else {
                            Some(&other_idx_str)
                        }
                    })
                    .join("\x1f");
                format!("n.flds like '{f}' escape '\\'")
            };
            let all_field_clauses = ctx
                .field_ranges_to_search
                .iter()
                .map(field_index_clause)
                .join(" or ");
            format!("(n.mid = {mid} and ({all_field_clauses}))", mid = ctx.ntid)
        };
        let all_notetype_clauses = field_indicies_by_notetype
            .iter()
@ -514,6 +580,37 @@ impl SqlWriter<'_> {
        Ok(())
    }
    fn num_fields_and_fields_indices_by_notetype(
        &mut self,
        field_name: &str,
    ) -> Result<Vec<FieldQualifiedSearchContext>> {
        let notetypes = self.col.get_all_notetypes()?;
        let matches_glob = glob_matcher(field_name);
        let mut field_map = vec![];
        for nt in notetypes.values() {
            let matched_fields = nt
                .fields
                .iter()
                .filter_map(|field| {
                    matches_glob(&field.name).then(|| field.ord.unwrap_or_default())
                })
                .collect_ranges();
            if !matched_fields.is_empty() {
                field_map.push(FieldQualifiedSearchContext {
                    ntid: nt.id,
                    total_fields_in_note: nt.fields.len(),
                    field_ranges_to_search: matched_fields,
                });
            }
        }
        // for now, sort the map for the benefit of unit tests
        field_map.sort_by_key(|v| v.ntid);
        Ok(field_map)
    }
    fn fields_indices_by_notetype(
        &mut self,
        field_name: &str,
@ -523,12 +620,13 @@ impl SqlWriter<'_> {
        let mut field_map = vec![];
        for nt in notetypes.values() {
-            let mut matched_fields = vec![];
+            let matched_fields: Vec<u32> = nt
-            for field in &nt.fields {
+                .fields
-                if matches_glob(&field.name) {
+                .iter()
-                    matched_fields.push(field.ord.unwrap_or_default());
+                .filter_map(|field| {
-                }
+                    matches_glob(&field.name).then(|| field.ord.unwrap_or_default())
-            }
+                })
                .collect();
            if !matched_fields.is_empty() {
                field_map.push((nt.id, matched_fields));
            }
@ -540,6 +638,68 @@ impl SqlWriter<'_> {
        Ok(field_map)
    }
    fn included_fields_by_notetype(&mut self) -> Result<Option<Vec<UnqualifiedSearchContext>>> {
        let notetypes = self.col.get_all_notetypes()?;
        let mut any_excluded = false;
        let mut field_map = vec![];
        for nt in notetypes.values() {
            let mut sortf_excluded = false;
            let matched_fields = nt
                .fields
                .iter()
                .filter_map(|field| {
                    let ord = field.ord.unwrap_or_default();
                    if field.config.exclude_from_search {
                        any_excluded = true;
                        sortf_excluded |= ord == nt.config.sort_field_idx;
                    }
                    (!field.config.exclude_from_search).then_some(ord)
                })
                .collect_ranges();
            if !matched_fields.is_empty() {
                field_map.push(UnqualifiedSearchContext {
                    ntid: nt.id,
                    total_fields_in_note: nt.fields.len(),
                    sortf_excluded,
                    field_ranges_to_search: matched_fields,
                });
            }
        }
        if any_excluded {
            Ok(Some(field_map))
        } else {
            Ok(None)
        }
    }
    fn included_fields_for_unqualified_regex(
        &mut self,
    ) -> Result<Option<Vec<UnqualifiedRegexSearchContext>>> {
        let notetypes = self.col.get_all_notetypes()?;
        let mut any_excluded = false;
        let mut field_map = vec![];
        for nt in notetypes.values() {
            let matched_fields: Vec<u32> = nt
                .fields
                .iter()
                .filter_map(|field| {
                    any_excluded |= field.config.exclude_from_search;
                    (!field.config.exclude_from_search).then_some(field.ord.unwrap_or_default())
                })
                .collect();
            field_map.push(UnqualifiedRegexSearchContext {
                ntid: nt.id,
                total_fields_in_note: nt.fields.len(),
                fields_to_search: matched_fields,
            });
        }
        if any_excluded {
            Ok(Some(field_map))
        } else {
            Ok(None)
        }
    }
    fn write_dupe(&mut self, ntid: NotetypeId, text: &str) -> Result<()> {
        let text_nohtml = strip_html_preserving_media_filenames(text);
        let csum = field_checksum(text_nohtml.as_ref());
@ -598,25 +758,48 @@ impl SqlWriter<'_> {
        Ok(())
    }
-    fn write_regex(&mut self, word: &str) {
+    fn write_regex(&mut self, word: &str, no_combining: bool) -> Result<()> {
-        self.sql.push_str("n.flds regexp ?");
+        let flds_expr = if no_combining {
-        self.args.push(format!(r"(?i){}", word));
+            "coalesce(without_combining(n.flds), n.flds)"
    }
    fn write_regex_nc(&mut self, word: &str) {
        let word = &without_combining(word);
        self.sql
            .push_str("coalesce(without_combining(n.flds), n.flds) regexp ?");
        self.args.push(format!(r"(?i){}", word));
    }
    fn write_word_boundary(&mut self, word: &str) {
        let re = format!(r"\b{}\b", to_re(word));
        if self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch) {
            self.write_regex_nc(&re);
        } else {
-            self.write_regex(&re);
+            "n.flds"
        };
        let word = if no_combining {
            without_combining(word)
        } else {
            std::borrow::Cow::Borrowed(word)
        };
        self.args.push(format!(r"(?i){}", word));
        let arg_idx = self.args.len();
        if let Some(field_indices_by_notetype) = self.included_fields_for_unqualified_regex()? {
            let notetype_clause = |ctx: &UnqualifiedRegexSearchContext| -> String {
                let clause = if ctx.fields_to_search.len() == ctx.total_fields_in_note {
                    format!("{flds_expr} regexp ?{arg_idx}")
                } else {
                    let indices = ctx.fields_to_search.iter().join(",");
                    format!("regexp_fields(?{arg_idx}, {flds_expr}, {indices})")
                };
                format!("(n.mid = {mid} and {clause})", mid = ctx.ntid)
            };
            let all_notetype_clauses = field_indices_by_notetype
                .iter()
                .map(notetype_clause)
                .join(" or ");
            write!(self.sql, "({all_notetype_clauses})").unwrap();
        } else {
            write!(self.sql, "{flds_expr} regexp ?{arg_idx}").unwrap();
        }
        Ok(())
    }
    fn write_word_boundary(&mut self, word: &str) -> Result<()> {
        let re = format!(r"\b{}\b", to_re(word));
        self.write_regex(
            &re,
            self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch),
        )
    }
 }
@ -646,6 +829,68 @@ impl RequiredTable {
    }
 }
 /// Given a list of numbers, create one or more ranges, collapsing
 /// contiguous numbers.
 trait CollectRanges {
    type Item;
    fn collect_ranges(self) -> Vec<Range<Self::Item>>;
 }
 impl<
        Idx: Copy + PartialOrd + std::ops::Add<Idx, Output = Idx> + From<u8>,
        I: IntoIterator<Item = Idx>,
    > CollectRanges for I
 {
    type Item = Idx;
    fn collect_ranges(self) -> Vec<Range<Self::Item>> {
        let mut result = Vec::new();
        let mut iter = self.into_iter();
        let next = iter.next();
        if next.is_none() {
            return result;
        }
        let mut start = next.unwrap();
        let mut end = next.unwrap();
        for i in iter {
            if i == end + 1.into() {
                end = end + 1.into();
            } else {
                result.push(start..end + 1.into());
                start = i;
                end = i;
            }
        }
        result.push(start..end + 1.into());
        result
    }
 }
 struct FieldQualifiedSearchContext {
    ntid: NotetypeId,
    total_fields_in_note: usize,
    /// This may include more than one field in the case the user
    /// has searched with a wildcard, eg f*:foo.
    field_ranges_to_search: Vec<Range<u32>>,
 }
 struct UnqualifiedSearchContext {
    ntid: NotetypeId,
    total_fields_in_note: usize,
    sortf_excluded: bool,
    field_ranges_to_search: Vec<Range<u32>>,
 }
 struct UnqualifiedRegexSearchContext {
    ntid: NotetypeId,
    total_fields_in_note: usize,
    /// Unlike the other contexts, this contains each individual index
    /// instead of a list of ranges.
    fields_to_search: Vec<u32>,
 }
 impl Node {
    fn required_table(&self) -> RequiredTable {
        match self {
@ -740,10 +985,10 @@ mod test {
            s(ctx, "front:te*st"),
            (
                concat!(
-                    "(((n.mid = 1581236385344 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
+                    "(((n.mid = 1581236385344 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\')) or ",
-                    "(n.mid = 1581236385345 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
+                    "(n.mid = 1581236385345 and (n.flds like '' || ?1 || '\u{1f}%\u{1f}%' escape '\\')) or ",
-                    "(n.mid = 1581236385346 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
+                    "(n.mid = 1581236385346 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\')) or ",
-                    "(n.mid = 1581236385347 and (field_at_index(n.flds, 0) like ?1 escape '\\'))))"
+                    "(n.mid = 1581236385347 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\'))))"
                )
                .into(),
                vec!["te%st".into()]
@ -941,22 +1186,25 @@ mod test {
        // regex
        assert_eq!(
            s(ctx, r"re:\bone"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\bone".into()])
+            ("(n.flds regexp ?1)".into(), vec![r"(?i)\bone".into()])
        );
        // word boundary
        assert_eq!(
            s(ctx, r"w:foo"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\bfoo\b".into()])
+            ("(n.flds regexp ?1)".into(), vec![r"(?i)\bfoo\b".into()])
        );
        assert_eq!(
            s(ctx, r"w:*foo"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\b.*foo\b".into()])
+            ("(n.flds regexp ?1)".into(), vec![r"(?i)\b.*foo\b".into()])
        );
        assert_eq!(
            s(ctx, r"w:*fo_o*"),
-            ("(n.flds regexp ?)".into(), vec![r"(?i)\b.*fo.o.*\b".into()])
+            (
                "(n.flds regexp ?1)".into(),
                vec![r"(?i)\b.*fo.o.*\b".into()]
            )
        );
    }
@ -991,4 +1239,10 @@ mod test {
            RequiredTable::Notes
        );
    }
    #[test]
    fn ranges() {
        assert_eq!([1, 2, 3].collect_ranges(), [1..4]);
        assert_eq!([1, 3, 4].collect_ranges(), [1..2, 3..5]);
    }
 }