diff --git a/Cargo.lock b/Cargo.lock index 654437d33..26006790b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,7 @@ dependencies = [ "axum", "axum-client-ip", "axum-extra", + "bitflags 2.9.1", "blake3", "bytes", "chrono", diff --git a/Cargo.toml b/Cargo.toml index a22badd97..db5753893 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ async-trait = "0.1.88" axum = { version = "0.8.4", features = ["multipart", "macros"] } axum-client-ip = "1.1.3" axum-extra = { version = "0.10.1", features = ["typed-header"] } +bitflags = "2.9.1" blake3 = "1.8.2" bytes = "1.10.1" camino = "1.1.10" diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index d3c7e215f..a1d24cc87 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -48,6 +48,7 @@ async-trait.workspace = true axum.workspace = true axum-client-ip.workspace = true axum-extra.workspace = true +bitflags.workspace = true blake3.workspace = true bytes.workspace = true chrono.workspace = true diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs index 208a2f4ed..02919dc12 100644 --- a/rslib/src/cloze.rs +++ b/rslib/src/cloze.rs @@ -25,6 +25,9 @@ use crate::latex::contains_latex; use crate::template::RenderContext; use crate::text::strip_html_preserving_entities; +static CLOZE: LazyLock = + LazyLock::new(|| Regex::new(r"(?s)\{\{c\d+::(.*?)(::.*?)?\}\}").unwrap()); + static MATHJAX: LazyLock = LazyLock::new(|| { Regex::new( r"(?xsi) @@ -453,6 +456,10 @@ pub fn cloze_number_in_fields(fields: impl IntoIterator>) -> Ha set } +pub(crate) fn strip_clozes(text: &str) -> Cow<'_, str> { + CLOZE.replace_all(text, "$1") +} + fn strip_html_inside_mathjax(text: &str) -> Cow { MATHJAX.replace_all(text, |caps: &Captures| -> String { format!( @@ -610,6 +617,16 @@ mod test { ); } + #[test] + fn strip_clozes_regex() { + assert_eq!( + strip_clozes( + r#"The {{c1::moon::🌛}} {{c2::orbits::this hint has "::" in it}} the {{c3::🌏}}."# + ), + "The moon orbits the 🌏." + ); + } + #[test] fn mathjax_html() { // escaped angle brackets should be preserved diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs index 409862fce..ae166ef54 100644 --- a/rslib/src/search/parser.rs +++ b/rslib/src/search/parser.rs @@ -94,6 +94,7 @@ pub enum SearchNode { WholeCollection, Regex(String), NoCombining(String), + StripClozes(String), WordBoundary(String), CustomData(String), Preset(String), @@ -358,6 +359,7 @@ fn search_node_for_text_with_argument<'a>( "cid" => SearchNode::CardIds(check_id_list(val, key)?.into()), "re" => SearchNode::Regex(unescape_quotes(val)), "nc" => SearchNode::NoCombining(unescape(val)?), + "sc" => SearchNode::StripClozes(unescape(val)?), "w" => SearchNode::WordBoundary(unescape(val)?), "dupe" => parse_dupe(val)?, "has-cd" => SearchNode::CustomData(unescape(val)?), diff --git a/rslib/src/search/sqlwriter.rs b/rslib/src/search/sqlwriter.rs index 3aa216a4f..8528376cb 100644 --- a/rslib/src/search/sqlwriter.rs +++ b/rslib/src/search/sqlwriter.rs @@ -22,6 +22,7 @@ use crate::notes::field_checksum; use crate::notetype::NotetypeId; use crate::prelude::*; use crate::storage::ids_to_string; +use crate::storage::ProcessTextFlags; use crate::text::glob_matcher; use crate::text::is_glob; use crate::text::normalize_to_nfc; @@ -134,6 +135,7 @@ impl SqlWriter<'_> { self.write_unqualified( text, self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch), + false, )? } SearchNode::SingleField { field, text, is_re } => { @@ -143,7 +145,14 @@ impl SqlWriter<'_> { self.write_dupe(*notetype_id, &self.norm_note(text))? } SearchNode::Regex(re) => self.write_regex(&self.norm_note(re), false)?, - SearchNode::NoCombining(text) => self.write_unqualified(&self.norm_note(text), true)?, + SearchNode::NoCombining(text) => { + self.write_unqualified(&self.norm_note(text), true, false)? + } + SearchNode::StripClozes(text) => self.write_unqualified( + &self.norm_note(text), + self.col.get_config_bool(BoolKey::IgnoreAccentsInSearch), + true, + )?, SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text))?, // other @@ -190,7 +199,12 @@ impl SqlWriter<'_> { Ok(()) } - fn write_unqualified(&mut self, text: &str, no_combining: bool) -> Result<()> { + fn write_unqualified( + &mut self, + text: &str, + no_combining: bool, + strip_clozes: bool, + ) -> Result<()> { let text = to_sql(text); let text = if no_combining { without_combining(&text) @@ -202,17 +216,37 @@ impl SqlWriter<'_> { self.args.push(text); let arg_idx = self.args.len(); - let sfld_expr = if no_combining { - "coalesce(without_combining(cast(n.sfld as text)), n.sfld)" + let mut process_text_flags = ProcessTextFlags::empty(); + if no_combining { + process_text_flags.insert(ProcessTextFlags::NoCombining); + } + if strip_clozes { + process_text_flags.insert(ProcessTextFlags::StripClozes); + } + + let (sfld_expr, flds_expr) = if !process_text_flags.is_empty() { + let bits = process_text_flags.bits(); + ( + Cow::from(format!( + "coalesce(process_text(cast(n.sfld as text), {bits}), n.sfld)" + )), + Cow::from(format!("coalesce(process_text(n.flds, {bits}), n.flds)")), + ) } else { - "n.sfld" - }; - let flds_expr = if no_combining { - "coalesce(without_combining(n.flds), n.flds)" - } else { - "n.flds" + (Cow::from("n.sfld"), Cow::from("n.flds")) }; + if strip_clozes { + let cloze_notetypes_only_clause = self + .col + .get_all_notetypes()? + .iter() + .filter(|nt| nt.is_cloze()) + .map(|nt| format!("n.mid = {}", nt.id)) + .join(" or "); + write!(self.sql, "({cloze_notetypes_only_clause}) and ").unwrap(); + } + if let Some(field_indicies_by_notetype) = self.included_fields_by_notetype()? { let field_idx_str = format!("' || ?{arg_idx} || '"); let other_idx_str = "%".to_string(); @@ -803,9 +837,12 @@ impl SqlWriter<'_> { fn write_regex(&mut self, word: &str, no_combining: bool) -> Result<()> { let flds_expr = if no_combining { - "coalesce(without_combining(n.flds), n.flds)" + Cow::from(format!( + "coalesce(process_text(n.flds, {}), n.flds)", + ProcessTextFlags::NoCombining.bits() + )) } else { - "n.flds" + Cow::from("n.flds") }; let word = if no_combining { without_combining(word) @@ -995,6 +1032,7 @@ impl SearchNode { SearchNode::Duplicates { .. } => RequiredTable::Notes, SearchNode::Regex(_) => RequiredTable::Notes, SearchNode::NoCombining(_) => RequiredTable::Notes, + SearchNode::StripClozes(_) => RequiredTable::Notes, SearchNode::WordBoundary(_) => RequiredTable::Notes, SearchNode::NotetypeId(_) => RequiredTable::Notes, SearchNode::Notetype(_) => RequiredTable::Notes, @@ -1299,6 +1337,9 @@ c.odue != 0 then c.odue else c.due end) != {days}) or (c.queue in (1,4) and "((c.did in (1) or c.odid in (1)))" ); assert_eq!(&s(ctx, "preset:typo").0, "(false)"); + + // strip clozes + assert_eq!(&s(ctx, "sc:abcdef").0, "((n.mid = 1581236385343) and (coalesce(process_text(cast(n.sfld as text), 2), n.sfld) like ?1 escape '\\' or coalesce(process_text(n.flds, 2), n.flds) like ?1 escape '\\'))"); } #[test] diff --git a/rslib/src/search/writer.rs b/rslib/src/search/writer.rs index 2158bffba..3bbe6fd0a 100644 --- a/rslib/src/search/writer.rs +++ b/rslib/src/search/writer.rs @@ -91,6 +91,7 @@ fn write_search_node(node: &SearchNode) -> String { WholeCollection => "deck:*".to_string(), Regex(s) => maybe_quote(&format!("re:{s}")), NoCombining(s) => maybe_quote(&format!("nc:{s}")), + StripClozes(s) => maybe_quote(&format!("sc:{s}")), WordBoundary(s) => maybe_quote(&format!("w:{s}")), CustomData(k) => maybe_quote(&format!("has-cd:{k}")), Preset(s) => maybe_quote(&format!("preset:{s}")), diff --git a/rslib/src/storage/mod.rs b/rslib/src/storage/mod.rs index 948bc30e4..015f4fdc7 100644 --- a/rslib/src/storage/mod.rs +++ b/rslib/src/storage/mod.rs @@ -19,6 +19,7 @@ mod upgrades; use std::fmt::Write; +pub(crate) use sqlite::ProcessTextFlags; pub(crate) use sqlite::SqliteStorage; #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/rslib/src/storage/sqlite.rs b/rslib/src/storage/sqlite.rs index e9ae55a3b..34e2a85d1 100644 --- a/rslib/src/storage/sqlite.rs +++ b/rslib/src/storage/sqlite.rs @@ -9,6 +9,7 @@ use std::hash::Hasher; use std::path::Path; use std::sync::Arc; +use bitflags::bitflags; use fnv::FnvHasher; use fsrs::FSRS; use fsrs::FSRS5_DEFAULT_DECAY; @@ -24,6 +25,7 @@ use super::upgrades::SCHEMA_MAX_VERSION; use super::upgrades::SCHEMA_MIN_VERSION; use super::upgrades::SCHEMA_STARTING_VERSION; use super::SchemaVersion; +use crate::cloze::strip_clozes; use crate::config::schema11::schema11_config_as_string; use crate::error::DbErrorKind; use crate::prelude::*; @@ -31,6 +33,7 @@ use crate::scheduler::timing::local_minutes_west_for_stamp; use crate::scheduler::timing::v1_creation_date; use crate::storage::card::data::CardData; use crate::text::without_combining; +use crate::text::CowMapping; fn unicase_compare(s1: &str, s2: &str) -> Ordering { UniCase::new(s1).cmp(&UniCase::new(s2)) @@ -74,7 +77,7 @@ fn open_or_create_collection_db(path: &Path) -> Result { add_regexp_function(&db)?; add_regexp_fields_function(&db)?; add_regexp_tags_function(&db)?; - add_without_combining_function(&db)?; + add_process_text_function(&db)?; add_fnvhash_function(&db)?; add_extract_original_position_function(&db)?; add_extract_custom_data_function(&db)?; @@ -111,17 +114,28 @@ fn add_field_index_function(db: &Connection) -> rusqlite::Result<()> { ) } -fn add_without_combining_function(db: &Connection) -> rusqlite::Result<()> { +bitflags! { + pub(crate) struct ProcessTextFlags: u8 { + const NoCombining = 1; + const StripClozes = 1 << 1; + } +} + +fn add_process_text_function(db: &Connection) -> rusqlite::Result<()> { db.create_scalar_function( - "without_combining", - 1, + "process_text", + 2, FunctionFlags::SQLITE_DETERMINISTIC, |ctx| { - let text = ctx.get_raw(0).as_str()?; - Ok(match without_combining(text) { - Cow::Borrowed(_) => None, - Cow::Owned(o) => Some(o), - }) + let mut text = Cow::from(ctx.get_raw(0).as_str()?); + let opt = ProcessTextFlags::from_bits_truncate(ctx.get_raw(1).as_i64()? as u8); + if opt.contains(ProcessTextFlags::StripClozes) { + text = text.map_cow(strip_clozes); + } + if opt.contains(ProcessTextFlags::NoCombining) { + text = text.map_cow(without_combining); + } + Ok(text.get_owned()) }, ) }