mirror of
https://github.com/ankitects/anki.git
synced 2025-09-23 00:12:25 -04:00
add search that ignores combining chars
On a test of a ~40k card collection, the 'ignore accents' add-on takes about 1150ms, and this code takes about 70ms.
This commit is contained in:
parent
f0ed34d79b
commit
51a379de23
4 changed files with 60 additions and 1 deletions
|
@ -79,6 +79,7 @@ pub(super) enum SearchNode<'a> {
|
|||
},
|
||||
WholeCollection,
|
||||
Regex(Cow<'a, str>),
|
||||
NoCombining(Cow<'a, str>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -272,6 +273,7 @@ fn search_node_for_text_with_argument<'a>(
|
|||
"dupes" => parse_dupes(val.as_ref())?,
|
||||
"prop" => parse_prop(val.as_ref())?,
|
||||
"re" => SearchNode::Regex(val),
|
||||
"nc" => SearchNode::NoCombining(val),
|
||||
// anything else is a field search
|
||||
_ => parse_single_field(key.as_ref(), val.as_ref()),
|
||||
})
|
||||
|
|
|
@ -8,6 +8,7 @@ use crate::decks::get_deck;
|
|||
use crate::err::{AnkiError, Result};
|
||||
use crate::notes::field_checksum;
|
||||
use crate::text::matches_wildcard;
|
||||
use crate::text::without_combining;
|
||||
use crate::{
|
||||
collection::RequestContext, text::strip_html_preserving_image_filenames, types::ObjID,
|
||||
};
|
||||
|
@ -81,6 +82,7 @@ impl SqlWriter<'_, '_> {
|
|||
SearchNode::Property { operator, kind } => self.write_prop(operator, kind)?,
|
||||
SearchNode::WholeCollection => write!(self.sql, "true").unwrap(),
|
||||
SearchNode::Regex(re) => self.write_regex(re.as_ref()),
|
||||
SearchNode::NoCombining(text) => self.write_no_combining(text.as_ref()),
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
@ -97,6 +99,20 @@ impl SqlWriter<'_, '_> {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
fn write_no_combining(&mut self, text: &str) {
|
||||
let text = format!("%{}%", without_combining(text));
|
||||
self.args.push(text);
|
||||
write!(
|
||||
self.sql,
|
||||
concat!(
|
||||
"(coalesce(without_combining(cast(n.sfld as text)), n.sfld) like ?{n} escape '\\' ",
|
||||
"or coalesce(without_combining(n.flds), n.flds) like ?{n} escape '\\')"
|
||||
),
|
||||
n = self.args.len(),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn write_tag(&mut self, text: &str) {
|
||||
match text {
|
||||
"none" => {
|
||||
|
|
|
@ -10,12 +10,14 @@ use crate::{
|
|||
decks::Deck,
|
||||
notetypes::NoteType,
|
||||
sched::cutoff::{sched_timing_today, SchedTimingToday},
|
||||
text::without_combining,
|
||||
types::{ObjID, Usn},
|
||||
};
|
||||
use regex::Regex;
|
||||
use rusqlite::{params, Connection, NO_PARAMS};
|
||||
use std::cmp::Ordering;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
@ -58,6 +60,7 @@ fn open_or_create_collection_db(path: &Path) -> Result<Connection> {
|
|||
|
||||
add_field_index_function(&db)?;
|
||||
add_regexp_function(&db)?;
|
||||
add_without_combining_function(&db)?;
|
||||
|
||||
db.create_collation("unicase", unicase_compare)?;
|
||||
|
||||
|
@ -75,6 +78,16 @@ fn add_field_index_function(db: &Connection) -> rusqlite::Result<()> {
|
|||
})
|
||||
}
|
||||
|
||||
fn add_without_combining_function(db: &Connection) -> rusqlite::Result<()> {
|
||||
db.create_scalar_function("without_combining", 1, true, |ctx| {
|
||||
let text = ctx.get_raw(0).as_str()?;
|
||||
Ok(match without_combining(text) {
|
||||
Cow::Borrowed(_) => None,
|
||||
Cow::Owned(o) => Some(o),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Adds sql function regexp(regex, string) -> is_match
|
||||
/// Taken from the rusqlite docs
|
||||
fn add_regexp_function(db: &Connection) -> rusqlite::Result<()> {
|
||||
|
|
|
@ -6,7 +6,9 @@ use regex::{Captures, Regex};
|
|||
use std::borrow::Cow;
|
||||
use std::ptr;
|
||||
use unicase::eq as uni_eq;
|
||||
use unicode_normalization::{is_nfc, UnicodeNormalization};
|
||||
use unicode_normalization::{
|
||||
char::is_combining_mark, is_nfc, is_nfkd_quick, IsNormalized, UnicodeNormalization,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum AVTag {
|
||||
|
@ -231,12 +233,32 @@ pub(crate) fn matches_wildcard(text: &str, search: &str) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Convert provided string to NFKD form and strip combining characters.
|
||||
pub(crate) fn without_combining(s: &str) -> Cow<str> {
|
||||
// if the string is already normalized
|
||||
if matches!(is_nfkd_quick(s.chars()), IsNormalized::Yes) {
|
||||
// and no combining characters found, return unchanged
|
||||
if !s.chars().any(is_combining_mark) {
|
||||
return s.into();
|
||||
}
|
||||
}
|
||||
|
||||
// we need to create a new string without the combining marks
|
||||
s.chars()
|
||||
.nfkd()
|
||||
.filter(|c| !is_combining_mark(*c))
|
||||
.collect::<String>()
|
||||
.into()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::matches_wildcard;
|
||||
use crate::text::without_combining;
|
||||
use crate::text::{
|
||||
extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_image_filenames, AVTag,
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[test]
|
||||
fn stripping() {
|
||||
|
@ -287,4 +309,10 @@ mod test {
|
|||
assert_eq!(matches_wildcard("foo", "F*oo"), true);
|
||||
assert_eq!(matches_wildcard("foo", "b*"), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn combining() {
|
||||
assert!(matches!(without_combining("test"), Cow::Borrowed(_)));
|
||||
assert!(matches!(without_combining("Über"), Cow::Owned(_)));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue