mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 16:56:36 -04:00
Add option to exclude fields from search (#2394)
* Add option to exclude fields from unqualified searches * Use temp tables instead This is slightly faster according to my (very rough) tests. * Make query a bit more readable * exclude_from_search -> excludeFromSearch * Remove superfluous notetypes table from query * Rework to use field search logic Thanks to Rumo for the suggestion: https://github.com/ankitects/anki/pull/2394#issuecomment-1446702402 * Exclude fields from field searches too * Fix error on notetypes with no included fields * Add back the exclude_fields function This approach seems to perform better on average than the previously benchmarked ones. * Use pure-SQL approach to excluding fields * Change single field search to use new approach * Fix flawed any_excluded/sortf_excluded logic * Support field exclusion in the nc operator Also fix search text being wrapped in % in the any_excluded=true case. * Support field exclusion in the re and w operators * Label field exclusion as being slower * Unqualified search should be wrapped in % in all cases I was under the impression that it shouldn't be wrapped with the new field exclusion logic. * Remove unnecessary .collect() * Refactor some complex return types into structs * Do not exclude fields in field searches * Add a test and docstring for CollectRanges * Avoid destructuring in closures * Remove the exclude_fields function Minor wording tweaks by dae: * num_fields -> total_fields_in_note * fields -> field_ranges_to_search * fields -> fields_to_search * SingleField -> FieldQualified * mid -> ntid
This commit is contained in:
parent
fdfa33fea4
commit
946eb46813
7 changed files with 346 additions and 67 deletions
|
@ -17,3 +17,4 @@ fields-sort-by-this-field-in-the = Sort by this field in the browser
|
||||||
fields-that-field-name-is-already-used = That field name is already used.
|
fields-that-field-name-is-already-used = That field name is already used.
|
||||||
fields-name-first-letter-not-valid = The field name should not start with #, ^ or /.
|
fields-name-first-letter-not-valid = The field name should not start with #, ^ or /.
|
||||||
fields-name-invalid-letter = The field name should not contain :, ", { "{" } or { "}" }.
|
fields-name-invalid-letter = The field name should not contain :, ", { "{" } or { "}" }.
|
||||||
|
fields-exclude-from-search = Exclude from unqualified searches (slower)
|
||||||
|
|
|
@ -74,6 +74,7 @@ message Notetype {
|
||||||
string description = 5;
|
string description = 5;
|
||||||
bool plain_text = 6;
|
bool plain_text = 6;
|
||||||
bool collapsed = 7;
|
bool collapsed = 7;
|
||||||
|
bool exclude_from_search = 8;
|
||||||
|
|
||||||
bytes other = 255;
|
bytes other = 255;
|
||||||
}
|
}
|
||||||
|
|
|
@ -246,6 +246,7 @@ class FieldDialog(QDialog):
|
||||||
f.rtl.setChecked(fld["rtl"])
|
f.rtl.setChecked(fld["rtl"])
|
||||||
f.plainTextByDefault.setChecked(fld["plainText"])
|
f.plainTextByDefault.setChecked(fld["plainText"])
|
||||||
f.collapseByDefault.setChecked(fld["collapsed"])
|
f.collapseByDefault.setChecked(fld["collapsed"])
|
||||||
|
f.excludeFromSearch.setChecked(fld["excludeFromSearch"])
|
||||||
f.fieldDescription.setText(fld.get("description", ""))
|
f.fieldDescription.setText(fld.get("description", ""))
|
||||||
|
|
||||||
def saveField(self) -> None:
|
def saveField(self) -> None:
|
||||||
|
@ -274,6 +275,11 @@ class FieldDialog(QDialog):
|
||||||
collapsed = f.collapseByDefault.isChecked()
|
collapsed = f.collapseByDefault.isChecked()
|
||||||
if fld["collapsed"] != collapsed:
|
if fld["collapsed"] != collapsed:
|
||||||
fld["collapsed"] = collapsed
|
fld["collapsed"] = collapsed
|
||||||
|
self.change_tracker.mark_basic()
|
||||||
|
exclude_from_search = f.excludeFromSearch.isChecked()
|
||||||
|
if fld["excludeFromSearch"] != exclude_from_search:
|
||||||
|
fld["excludeFromSearch"] = exclude_from_search
|
||||||
|
self.change_tracker.mark_basic()
|
||||||
desc = f.fieldDescription.text()
|
desc = f.fieldDescription.text()
|
||||||
if fld.get("description", "") != desc:
|
if fld.get("description", "") != desc:
|
||||||
fld["description"] = desc
|
fld["description"] = desc
|
||||||
|
|
|
@ -172,6 +172,16 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="6" column="1" alignment="Qt::AlignLeft">
|
||||||
|
<widget class="QCheckBox" name="excludeFromSearch">
|
||||||
|
<property name="enabled">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>fields_exclude_from_search</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
|
|
|
@ -46,6 +46,7 @@ impl NoteField {
|
||||||
font_size: 20,
|
font_size: 20,
|
||||||
description: "".into(),
|
description: "".into(),
|
||||||
collapsed: false,
|
collapsed: false,
|
||||||
|
exclude_from_search: false,
|
||||||
other: vec![],
|
other: vec![],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,7 +167,7 @@ impl From<Notetype> for NotetypeSchema11 {
|
||||||
|
|
||||||
/// See [crate::deckconfig::schema11::clear_other_duplicates()].
|
/// See [crate::deckconfig::schema11::clear_other_duplicates()].
|
||||||
fn clear_other_field_duplicates(other: &mut HashMap<String, Value>) {
|
fn clear_other_field_duplicates(other: &mut HashMap<String, Value>) {
|
||||||
for key in &["description", "plainText", "collapsed"] {
|
for key in &["description", "plainText", "collapsed", "excludeFromSearch"] {
|
||||||
other.remove(*key);
|
other.remove(*key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -224,6 +224,9 @@ pub struct NoteFieldSchema11 {
|
||||||
#[serde(default, deserialize_with = "default_on_invalid")]
|
#[serde(default, deserialize_with = "default_on_invalid")]
|
||||||
pub(crate) collapsed: bool,
|
pub(crate) collapsed: bool,
|
||||||
|
|
||||||
|
#[serde(default, deserialize_with = "default_on_invalid")]
|
||||||
|
pub(crate) exclude_from_search: bool,
|
||||||
|
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub(crate) other: HashMap<String, Value>,
|
pub(crate) other: HashMap<String, Value>,
|
||||||
}
|
}
|
||||||
|
@ -240,6 +243,7 @@ impl Default for NoteFieldSchema11 {
|
||||||
size: 20,
|
size: 20,
|
||||||
description: String::new(),
|
description: String::new(),
|
||||||
collapsed: false,
|
collapsed: false,
|
||||||
|
exclude_from_search: false,
|
||||||
other: Default::default(),
|
other: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -258,6 +262,7 @@ impl From<NoteFieldSchema11> for NoteField {
|
||||||
font_size: f.size as u32,
|
font_size: f.size as u32,
|
||||||
description: f.description,
|
description: f.description,
|
||||||
collapsed: f.collapsed,
|
collapsed: f.collapsed,
|
||||||
|
exclude_from_search: f.exclude_from_search,
|
||||||
other: other_to_bytes(&f.other),
|
other: other_to_bytes(&f.other),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -281,6 +286,7 @@ impl From<NoteField> for NoteFieldSchema11 {
|
||||||
size: conf.font_size as u16,
|
size: conf.font_size as u16,
|
||||||
description: conf.description,
|
description: conf.description,
|
||||||
collapsed: conf.collapsed,
|
collapsed: conf.collapsed,
|
||||||
|
exclude_from_search: conf.exclude_from_search,
|
||||||
other,
|
other,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
@ -130,11 +131,10 @@ impl SqlWriter<'_> {
|
||||||
// note fields related
|
// note fields related
|
||||||
SearchNode::UnqualifiedText(text) => {
|
SearchNode::UnqualifiedText(text) => {
|
||||||
let text = &self.norm_note(text);
|
let text = &self.norm_note(text);
|
||||||
if self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch) {
|
self.write_unqualified(
|
||||||
self.write_no_combining(text)
|
text,
|
||||||
} else {
|
self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch),
|
||||||
self.write_unqualified(text)
|
)?
|
||||||
}
|
|
||||||
}
|
}
|
||||||
SearchNode::SingleField { field, text, is_re } => {
|
SearchNode::SingleField { field, text, is_re } => {
|
||||||
self.write_field(&norm(field), &self.norm_note(text), *is_re)?
|
self.write_field(&norm(field), &self.norm_note(text), *is_re)?
|
||||||
|
@ -142,9 +142,9 @@ impl SqlWriter<'_> {
|
||||||
SearchNode::Duplicates { notetype_id, text } => {
|
SearchNode::Duplicates { notetype_id, text } => {
|
||||||
self.write_dupe(*notetype_id, &self.norm_note(text))?
|
self.write_dupe(*notetype_id, &self.norm_note(text))?
|
||||||
}
|
}
|
||||||
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
|
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re), false)?,
|
||||||
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
|
SearchNode::NoCombining(text) => self.write_unqualified(&self.norm_note(text), true)?,
|
||||||
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
|
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text))?,
|
||||||
|
|
||||||
// other
|
// other
|
||||||
SearchNode::AddedInDays(days) => self.write_added(*days)?,
|
SearchNode::AddedInDays(days) => self.write_added(*days)?,
|
||||||
|
@ -184,30 +184,76 @@ impl SqlWriter<'_> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_unqualified(&mut self, text: &str) {
|
fn write_unqualified(&mut self, text: &str, no_combining: bool) -> Result<()> {
|
||||||
|
let text = to_sql(text);
|
||||||
|
let text = if no_combining {
|
||||||
|
without_combining(&text)
|
||||||
|
} else {
|
||||||
|
text
|
||||||
|
};
|
||||||
// implicitly wrap in %
|
// implicitly wrap in %
|
||||||
let text = format!("%{}%", &to_sql(text));
|
let text = format!("%{}%", text);
|
||||||
self.args.push(text);
|
self.args.push(text);
|
||||||
write!(
|
let arg_idx = self.args.len();
|
||||||
self.sql,
|
|
||||||
"(n.sfld like ?{n} escape '\\' or n.flds like ?{n} escape '\\')",
|
|
||||||
n = self.args.len(),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_no_combining(&mut self, text: &str) {
|
let sfld_expr = if no_combining {
|
||||||
let text = format!("%{}%", without_combining(&to_sql(text)));
|
"coalesce(without_combining(cast(n.sfld as text)), n.sfld)"
|
||||||
self.args.push(text);
|
} else {
|
||||||
write!(
|
"n.sfld"
|
||||||
self.sql,
|
};
|
||||||
concat!(
|
let flds_expr = if no_combining {
|
||||||
"(coalesce(without_combining(cast(n.sfld as text)), n.sfld) like ?{n} escape '\\' ",
|
"coalesce(without_combining(n.flds), n.flds)"
|
||||||
"or coalesce(without_combining(n.flds), n.flds) like ?{n} escape '\\')"
|
} else {
|
||||||
),
|
"n.flds"
|
||||||
n = self.args.len(),
|
};
|
||||||
)
|
|
||||||
.unwrap();
|
if let Some(field_indicies_by_notetype) = self.included_fields_by_notetype()? {
|
||||||
|
let field_idx_str = format!("' || ?{arg_idx} || '");
|
||||||
|
let other_idx_str = "%".to_string();
|
||||||
|
|
||||||
|
let notetype_clause = |ctx: &UnqualifiedSearchContext| -> String {
|
||||||
|
let field_index_clause = |range: &Range<u32>| {
|
||||||
|
let f = (0..ctx.total_fields_in_note)
|
||||||
|
.filter_map(|i| {
|
||||||
|
if i as u32 == range.start {
|
||||||
|
Some(&field_idx_str)
|
||||||
|
} else if range.contains(&(i as u32)) {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(&other_idx_str)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.join("\x1f");
|
||||||
|
format!("{flds_expr} like '{f}' escape '\\'")
|
||||||
|
};
|
||||||
|
let mut all_field_clauses: Vec<String> = ctx
|
||||||
|
.field_ranges_to_search
|
||||||
|
.iter()
|
||||||
|
.map(field_index_clause)
|
||||||
|
.collect();
|
||||||
|
if !ctx.sortf_excluded {
|
||||||
|
all_field_clauses.push(format!("{sfld_expr} like ?{arg_idx} escape '\\'"));
|
||||||
|
}
|
||||||
|
format!(
|
||||||
|
"(n.mid = {mid} and ({all_field_clauses}))",
|
||||||
|
mid = ctx.ntid,
|
||||||
|
all_field_clauses = all_field_clauses.join(" or ")
|
||||||
|
)
|
||||||
|
};
|
||||||
|
let all_notetype_clauses = field_indicies_by_notetype
|
||||||
|
.iter()
|
||||||
|
.map(notetype_clause)
|
||||||
|
.join(" or ");
|
||||||
|
write!(self.sql, "({all_notetype_clauses})").unwrap();
|
||||||
|
} else {
|
||||||
|
write!(
|
||||||
|
self.sql,
|
||||||
|
"({sfld_expr} like ?{arg_idx} escape '\\' or {flds_expr} like ?{arg_idx} escape '\\')"
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_tag(&mut self, tag: &str, is_re: bool) {
|
fn write_tag(&mut self, tag: &str, is_re: bool) {
|
||||||
|
@ -490,7 +536,8 @@ impl SqlWriter<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_single_field(&mut self, field_name: &str, val: &str) -> Result<()> {
|
fn write_single_field(&mut self, field_name: &str, val: &str) -> Result<()> {
|
||||||
let field_indicies_by_notetype = self.fields_indices_by_notetype(field_name)?;
|
let field_indicies_by_notetype =
|
||||||
|
self.num_fields_and_fields_indices_by_notetype(field_name)?;
|
||||||
if field_indicies_by_notetype.is_empty() {
|
if field_indicies_by_notetype.is_empty() {
|
||||||
write!(self.sql, "false").unwrap();
|
write!(self.sql, "false").unwrap();
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -498,12 +545,31 @@ impl SqlWriter<'_> {
|
||||||
|
|
||||||
self.args.push(to_sql(val).into());
|
self.args.push(to_sql(val).into());
|
||||||
let arg_idx = self.args.len();
|
let arg_idx = self.args.len();
|
||||||
|
let field_idx_str = format!("' || ?{arg_idx} || '");
|
||||||
|
let other_idx_str = "%".to_string();
|
||||||
|
|
||||||
let notetype_clause = |(mid, fields): &(NotetypeId, Vec<u32>)| -> String {
|
let notetype_clause = |ctx: &FieldQualifiedSearchContext| -> String {
|
||||||
let field_index_clause =
|
let field_index_clause = |range: &Range<u32>| {
|
||||||
|ord| format!("field_at_index(n.flds, {ord}) like ?{arg_idx} escape '\\'",);
|
let f = (0..ctx.total_fields_in_note)
|
||||||
let all_field_clauses = fields.iter().map(field_index_clause).join(" or ");
|
.filter_map(|i| {
|
||||||
format!("(n.mid = {mid} and ({all_field_clauses}))",)
|
if i as u32 == range.start {
|
||||||
|
Some(&field_idx_str)
|
||||||
|
} else if range.contains(&(i as u32)) {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(&other_idx_str)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.join("\x1f");
|
||||||
|
format!("n.flds like '{f}' escape '\\'")
|
||||||
|
};
|
||||||
|
|
||||||
|
let all_field_clauses = ctx
|
||||||
|
.field_ranges_to_search
|
||||||
|
.iter()
|
||||||
|
.map(field_index_clause)
|
||||||
|
.join(" or ");
|
||||||
|
format!("(n.mid = {mid} and ({all_field_clauses}))", mid = ctx.ntid)
|
||||||
};
|
};
|
||||||
let all_notetype_clauses = field_indicies_by_notetype
|
let all_notetype_clauses = field_indicies_by_notetype
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -514,6 +580,37 @@ impl SqlWriter<'_> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn num_fields_and_fields_indices_by_notetype(
|
||||||
|
&mut self,
|
||||||
|
field_name: &str,
|
||||||
|
) -> Result<Vec<FieldQualifiedSearchContext>> {
|
||||||
|
let notetypes = self.col.get_all_notetypes()?;
|
||||||
|
let matches_glob = glob_matcher(field_name);
|
||||||
|
|
||||||
|
let mut field_map = vec![];
|
||||||
|
for nt in notetypes.values() {
|
||||||
|
let matched_fields = nt
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.filter_map(|field| {
|
||||||
|
matches_glob(&field.name).then(|| field.ord.unwrap_or_default())
|
||||||
|
})
|
||||||
|
.collect_ranges();
|
||||||
|
if !matched_fields.is_empty() {
|
||||||
|
field_map.push(FieldQualifiedSearchContext {
|
||||||
|
ntid: nt.id,
|
||||||
|
total_fields_in_note: nt.fields.len(),
|
||||||
|
field_ranges_to_search: matched_fields,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for now, sort the map for the benefit of unit tests
|
||||||
|
field_map.sort_by_key(|v| v.ntid);
|
||||||
|
|
||||||
|
Ok(field_map)
|
||||||
|
}
|
||||||
|
|
||||||
fn fields_indices_by_notetype(
|
fn fields_indices_by_notetype(
|
||||||
&mut self,
|
&mut self,
|
||||||
field_name: &str,
|
field_name: &str,
|
||||||
|
@ -523,12 +620,13 @@ impl SqlWriter<'_> {
|
||||||
|
|
||||||
let mut field_map = vec![];
|
let mut field_map = vec![];
|
||||||
for nt in notetypes.values() {
|
for nt in notetypes.values() {
|
||||||
let mut matched_fields = vec![];
|
let matched_fields: Vec<u32> = nt
|
||||||
for field in &nt.fields {
|
.fields
|
||||||
if matches_glob(&field.name) {
|
.iter()
|
||||||
matched_fields.push(field.ord.unwrap_or_default());
|
.filter_map(|field| {
|
||||||
}
|
matches_glob(&field.name).then(|| field.ord.unwrap_or_default())
|
||||||
}
|
})
|
||||||
|
.collect();
|
||||||
if !matched_fields.is_empty() {
|
if !matched_fields.is_empty() {
|
||||||
field_map.push((nt.id, matched_fields));
|
field_map.push((nt.id, matched_fields));
|
||||||
}
|
}
|
||||||
|
@ -540,6 +638,68 @@ impl SqlWriter<'_> {
|
||||||
Ok(field_map)
|
Ok(field_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn included_fields_by_notetype(&mut self) -> Result<Option<Vec<UnqualifiedSearchContext>>> {
|
||||||
|
let notetypes = self.col.get_all_notetypes()?;
|
||||||
|
let mut any_excluded = false;
|
||||||
|
let mut field_map = vec![];
|
||||||
|
for nt in notetypes.values() {
|
||||||
|
let mut sortf_excluded = false;
|
||||||
|
let matched_fields = nt
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.filter_map(|field| {
|
||||||
|
let ord = field.ord.unwrap_or_default();
|
||||||
|
if field.config.exclude_from_search {
|
||||||
|
any_excluded = true;
|
||||||
|
sortf_excluded |= ord == nt.config.sort_field_idx;
|
||||||
|
}
|
||||||
|
(!field.config.exclude_from_search).then_some(ord)
|
||||||
|
})
|
||||||
|
.collect_ranges();
|
||||||
|
if !matched_fields.is_empty() {
|
||||||
|
field_map.push(UnqualifiedSearchContext {
|
||||||
|
ntid: nt.id,
|
||||||
|
total_fields_in_note: nt.fields.len(),
|
||||||
|
sortf_excluded,
|
||||||
|
field_ranges_to_search: matched_fields,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if any_excluded {
|
||||||
|
Ok(Some(field_map))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn included_fields_for_unqualified_regex(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<Option<Vec<UnqualifiedRegexSearchContext>>> {
|
||||||
|
let notetypes = self.col.get_all_notetypes()?;
|
||||||
|
let mut any_excluded = false;
|
||||||
|
let mut field_map = vec![];
|
||||||
|
for nt in notetypes.values() {
|
||||||
|
let matched_fields: Vec<u32> = nt
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.filter_map(|field| {
|
||||||
|
any_excluded |= field.config.exclude_from_search;
|
||||||
|
(!field.config.exclude_from_search).then_some(field.ord.unwrap_or_default())
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
field_map.push(UnqualifiedRegexSearchContext {
|
||||||
|
ntid: nt.id,
|
||||||
|
total_fields_in_note: nt.fields.len(),
|
||||||
|
fields_to_search: matched_fields,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if any_excluded {
|
||||||
|
Ok(Some(field_map))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn write_dupe(&mut self, ntid: NotetypeId, text: &str) -> Result<()> {
|
fn write_dupe(&mut self, ntid: NotetypeId, text: &str) -> Result<()> {
|
||||||
let text_nohtml = strip_html_preserving_media_filenames(text);
|
let text_nohtml = strip_html_preserving_media_filenames(text);
|
||||||
let csum = field_checksum(text_nohtml.as_ref());
|
let csum = field_checksum(text_nohtml.as_ref());
|
||||||
|
@ -598,25 +758,48 @@ impl SqlWriter<'_> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_regex(&mut self, word: &str) {
|
fn write_regex(&mut self, word: &str, no_combining: bool) -> Result<()> {
|
||||||
self.sql.push_str("n.flds regexp ?");
|
let flds_expr = if no_combining {
|
||||||
self.args.push(format!(r"(?i){}", word));
|
"coalesce(without_combining(n.flds), n.flds)"
|
||||||
}
|
|
||||||
|
|
||||||
fn write_regex_nc(&mut self, word: &str) {
|
|
||||||
let word = &without_combining(word);
|
|
||||||
self.sql
|
|
||||||
.push_str("coalesce(without_combining(n.flds), n.flds) regexp ?");
|
|
||||||
self.args.push(format!(r"(?i){}", word));
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_word_boundary(&mut self, word: &str) {
|
|
||||||
let re = format!(r"\b{}\b", to_re(word));
|
|
||||||
if self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch) {
|
|
||||||
self.write_regex_nc(&re);
|
|
||||||
} else {
|
} else {
|
||||||
self.write_regex(&re);
|
"n.flds"
|
||||||
|
};
|
||||||
|
let word = if no_combining {
|
||||||
|
without_combining(word)
|
||||||
|
} else {
|
||||||
|
std::borrow::Cow::Borrowed(word)
|
||||||
|
};
|
||||||
|
self.args.push(format!(r"(?i){}", word));
|
||||||
|
let arg_idx = self.args.len();
|
||||||
|
if let Some(field_indices_by_notetype) = self.included_fields_for_unqualified_regex()? {
|
||||||
|
let notetype_clause = |ctx: &UnqualifiedRegexSearchContext| -> String {
|
||||||
|
let clause = if ctx.fields_to_search.len() == ctx.total_fields_in_note {
|
||||||
|
format!("{flds_expr} regexp ?{arg_idx}")
|
||||||
|
} else {
|
||||||
|
let indices = ctx.fields_to_search.iter().join(",");
|
||||||
|
format!("regexp_fields(?{arg_idx}, {flds_expr}, {indices})")
|
||||||
|
};
|
||||||
|
|
||||||
|
format!("(n.mid = {mid} and {clause})", mid = ctx.ntid)
|
||||||
|
};
|
||||||
|
let all_notetype_clauses = field_indices_by_notetype
|
||||||
|
.iter()
|
||||||
|
.map(notetype_clause)
|
||||||
|
.join(" or ");
|
||||||
|
write!(self.sql, "({all_notetype_clauses})").unwrap();
|
||||||
|
} else {
|
||||||
|
write!(self.sql, "{flds_expr} regexp ?{arg_idx}").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_word_boundary(&mut self, word: &str) -> Result<()> {
|
||||||
|
let re = format!(r"\b{}\b", to_re(word));
|
||||||
|
self.write_regex(
|
||||||
|
&re,
|
||||||
|
self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -646,6 +829,68 @@ impl RequiredTable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a list of numbers, create one or more ranges, collapsing
|
||||||
|
/// contiguous numbers.
|
||||||
|
trait CollectRanges {
|
||||||
|
type Item;
|
||||||
|
fn collect_ranges(self) -> Vec<Range<Self::Item>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Idx: Copy + PartialOrd + std::ops::Add<Idx, Output = Idx> + From<u8>,
|
||||||
|
I: IntoIterator<Item = Idx>,
|
||||||
|
> CollectRanges for I
|
||||||
|
{
|
||||||
|
type Item = Idx;
|
||||||
|
|
||||||
|
fn collect_ranges(self) -> Vec<Range<Self::Item>> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut iter = self.into_iter();
|
||||||
|
let next = iter.next();
|
||||||
|
if next.is_none() {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
let mut start = next.unwrap();
|
||||||
|
let mut end = next.unwrap();
|
||||||
|
|
||||||
|
for i in iter {
|
||||||
|
if i == end + 1.into() {
|
||||||
|
end = end + 1.into();
|
||||||
|
} else {
|
||||||
|
result.push(start..end + 1.into());
|
||||||
|
start = i;
|
||||||
|
end = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(start..end + 1.into());
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FieldQualifiedSearchContext {
|
||||||
|
ntid: NotetypeId,
|
||||||
|
total_fields_in_note: usize,
|
||||||
|
/// This may include more than one field in the case the user
|
||||||
|
/// has searched with a wildcard, eg f*:foo.
|
||||||
|
field_ranges_to_search: Vec<Range<u32>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct UnqualifiedSearchContext {
|
||||||
|
ntid: NotetypeId,
|
||||||
|
total_fields_in_note: usize,
|
||||||
|
sortf_excluded: bool,
|
||||||
|
field_ranges_to_search: Vec<Range<u32>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct UnqualifiedRegexSearchContext {
|
||||||
|
ntid: NotetypeId,
|
||||||
|
total_fields_in_note: usize,
|
||||||
|
/// Unlike the other contexts, this contains each individual index
|
||||||
|
/// instead of a list of ranges.
|
||||||
|
fields_to_search: Vec<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
impl Node {
|
impl Node {
|
||||||
fn required_table(&self) -> RequiredTable {
|
fn required_table(&self) -> RequiredTable {
|
||||||
match self {
|
match self {
|
||||||
|
@ -740,10 +985,10 @@ mod test {
|
||||||
s(ctx, "front:te*st"),
|
s(ctx, "front:te*st"),
|
||||||
(
|
(
|
||||||
concat!(
|
concat!(
|
||||||
"(((n.mid = 1581236385344 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
|
"(((n.mid = 1581236385344 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\')) or ",
|
||||||
"(n.mid = 1581236385345 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
|
"(n.mid = 1581236385345 and (n.flds like '' || ?1 || '\u{1f}%\u{1f}%' escape '\\')) or ",
|
||||||
"(n.mid = 1581236385346 and (field_at_index(n.flds, 0) like ?1 escape '\\')) or ",
|
"(n.mid = 1581236385346 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\')) or ",
|
||||||
"(n.mid = 1581236385347 and (field_at_index(n.flds, 0) like ?1 escape '\\'))))"
|
"(n.mid = 1581236385347 and (n.flds like '' || ?1 || '\u{1f}%' escape '\\'))))"
|
||||||
)
|
)
|
||||||
.into(),
|
.into(),
|
||||||
vec!["te%st".into()]
|
vec!["te%st".into()]
|
||||||
|
@ -941,22 +1186,25 @@ mod test {
|
||||||
// regex
|
// regex
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
s(ctx, r"re:\bone"),
|
s(ctx, r"re:\bone"),
|
||||||
("(n.flds regexp ?)".into(), vec![r"(?i)\bone".into()])
|
("(n.flds regexp ?1)".into(), vec![r"(?i)\bone".into()])
|
||||||
);
|
);
|
||||||
|
|
||||||
// word boundary
|
// word boundary
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
s(ctx, r"w:foo"),
|
s(ctx, r"w:foo"),
|
||||||
("(n.flds regexp ?)".into(), vec![r"(?i)\bfoo\b".into()])
|
("(n.flds regexp ?1)".into(), vec![r"(?i)\bfoo\b".into()])
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
s(ctx, r"w:*foo"),
|
s(ctx, r"w:*foo"),
|
||||||
("(n.flds regexp ?)".into(), vec![r"(?i)\b.*foo\b".into()])
|
("(n.flds regexp ?1)".into(), vec![r"(?i)\b.*foo\b".into()])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
s(ctx, r"w:*fo_o*"),
|
s(ctx, r"w:*fo_o*"),
|
||||||
("(n.flds regexp ?)".into(), vec![r"(?i)\b.*fo.o.*\b".into()])
|
(
|
||||||
|
"(n.flds regexp ?1)".into(),
|
||||||
|
vec![r"(?i)\b.*fo.o.*\b".into()]
|
||||||
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -991,4 +1239,10 @@ mod test {
|
||||||
RequiredTable::Notes
|
RequiredTable::Notes
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ranges() {
|
||||||
|
assert_eq!([1, 2, 3].collect_ranges(), [1..4]);
|
||||||
|
assert_eq!([1, 3, 4].collect_ranges(), [1..2, 3..5]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue