support disabling unicode normalization in notes

This commit is contained in:
Damien Elmes 2020-05-06 20:06:42 +10:00
parent a7a485d550
commit 7bab99d873
16 changed files with 200 additions and 77 deletions

View file

@ -454,7 +454,9 @@ select id from notes where id in %s and id not in (select nid from cards)"""
# Card generation & field checksums/sort fields # Card generation & field checksums/sort fields
########################################################################## ##########################################################################
def after_note_updates(self, nids: List[int], mark_modified: bool, generate_cards: bool = True) -> None: def after_note_updates(
self, nids: List[int], mark_modified: bool, generate_cards: bool = True
) -> None:
self.backend.after_note_updates( self.backend.after_note_updates(
nids=nids, generate_cards=generate_cards, mark_notes_modified=mark_modified nids=nids, generate_cards=generate_cards, mark_notes_modified=mark_modified
) )
@ -818,7 +820,9 @@ select id from cards where odid > 0 and did in %s"""
self.tags.registerNotes() self.tags.registerNotes()
# field cache # field cache
for m in self.models.all(): for m in self.models.all():
self.after_note_updates(self.models.nids(m), mark_modified=False, generate_cards=False) self.after_note_updates(
self.models.nids(m), mark_modified=False, generate_cards=False
)
# new cards can't have a due position > 32 bits, so wrap items over # new cards can't have a due position > 32 bits, so wrap items over
# 2 million back to 1 million # 2 million back to 1 million
self.db.execute( self.db.execute(

View file

@ -2,7 +2,6 @@
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import html import html
import unicodedata
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
from anki.collection import _Collection from anki.collection import _Collection
@ -147,8 +146,6 @@ class NoteImporter(Importer):
n.fields[c] = n.fields[c].strip() n.fields[c] = n.fields[c].strip()
if not self.allowHTML: if not self.allowHTML:
n.fields[c] = n.fields[c].replace("\n", "<br>") n.fields[c] = n.fields[c].replace("\n", "<br>")
n.fields[c] = unicodedata.normalize("NFC", n.fields[c])
n.tags = [unicodedata.normalize("NFC", t) for t in n.tags]
fld0 = n.fields[fld0idx] fld0 = n.fields[fld0idx]
csum = fieldChecksum(fld0) csum = fieldChecksum(fld0)
# first field must exist # first field must exist

View file

@ -789,7 +789,7 @@ class RustBackend:
mark_notes_modified=mark_notes_modified, mark_notes_modified=mark_notes_modified,
) )
), ),
release_gil=True release_gil=True,
) )

View file

@ -5,9 +5,7 @@
from __future__ import annotations from __future__ import annotations
import html import html
import sre_constants
import time import time
import unicodedata
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from operator import itemgetter from operator import itemgetter
@ -775,7 +773,6 @@ class Browser(QMainWindow):
# grab search text and normalize # grab search text and normalize
txt = self.form.searchEdit.lineEdit().text() txt = self.form.searchEdit.lineEdit().text()
txt = unicodedata.normalize("NFC", txt)
# update history # update history
sh = self.mw.pm.profile["searchHistory"] sh = self.mw.pm.profile["searchHistory"]

View file

@ -377,7 +377,6 @@ class Editor:
if nid != self.note.id: if nid != self.note.id:
print("ignored late blur") print("ignored late blur")
return return
txt = unicodedata.normalize("NFC", txt)
txt = self.mungeHTML(txt) txt = self.mungeHTML(txt)
# misbehaving apps may include a null byte in the text # misbehaving apps may include a null byte in the text
txt = txt.replace("\x00", "") txt = txt.replace("\x00", "")

View file

@ -13,7 +13,6 @@ use crate::{
deckconf::{DeckConf, DeckConfID}, deckconf::{DeckConf, DeckConfID},
decks::{Deck, DeckID, DeckSchema11}, decks::{Deck, DeckID, DeckSchema11},
err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind}, err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind},
findreplace::FindReplaceContext,
i18n::{tr_args, I18n, TR}, i18n::{tr_args, I18n, TR},
latex::{extract_latex, extract_latex_expanding_clozes, ExtractedLatex}, latex::{extract_latex, extract_latex_expanding_clozes, ExtractedLatex},
log, log,
@ -1093,9 +1092,7 @@ impl Backend {
Some(input.field_name) Some(input.field_name)
}; };
let repl = input.replacement; let repl = input.replacement;
self.with_col(|col| { self.with_col(|col| col.find_and_replace(nids, &search, &repl, field_name))
col.find_and_replace(FindReplaceContext::new(nids, &search, &repl, field_name)?)
})
} }
fn after_note_updates(&self, input: pb::AfterNoteUpdatesIn) -> Result<pb::Empty> { fn after_note_updates(&self, input: pb::AfterNoteUpdatesIn) -> Result<pb::Empty> {

View file

@ -43,6 +43,7 @@ pub(crate) enum ConfigKey {
NextNewCardPosition, NextNewCardPosition,
SchedulerVersion, SchedulerVersion,
LearnAheadSecs, LearnAheadSecs,
NormalizeNoteText,
} }
#[derive(PartialEq, Serialize_repr, Deserialize_repr, Clone, Copy)] #[derive(PartialEq, Serialize_repr, Deserialize_repr, Clone, Copy)]
#[repr(u8)] #[repr(u8)]
@ -64,6 +65,7 @@ impl From<ConfigKey> for &'static str {
ConfigKey::NextNewCardPosition => "nextPos", ConfigKey::NextNewCardPosition => "nextPos",
ConfigKey::SchedulerVersion => "schedVer", ConfigKey::SchedulerVersion => "schedVer",
ConfigKey::LearnAheadSecs => "collapseTime", ConfigKey::LearnAheadSecs => "collapseTime",
ConfigKey::NormalizeNoteText => "normalize_note_text",
} }
} }
} }
@ -163,6 +165,12 @@ impl Collection {
self.get_config_optional(ConfigKey::LearnAheadSecs) self.get_config_optional(ConfigKey::LearnAheadSecs)
.unwrap_or(1200) .unwrap_or(1200)
} }
/// This is a stop-gap solution until we can decouple searching from canonical storage.
pub(crate) fn normalize_note_text(&self) -> bool {
self.get_config_optional(ConfigKey::NormalizeNoteText)
.unwrap_or(true)
}
} }
#[derive(Deserialize, PartialEq, Debug, Clone, Copy)] #[derive(Deserialize, PartialEq, Debug, Clone, Copy)]

View file

@ -6,6 +6,7 @@ use crate::{
err::{AnkiError, Result}, err::{AnkiError, Result},
notes::NoteID, notes::NoteID,
notetype::CardGenContext, notetype::CardGenContext,
text::normalize_to_nfc,
types::Usn, types::Usn,
}; };
use itertools::Itertools; use itertools::Itertools;
@ -40,11 +41,31 @@ impl FindReplaceContext {
} }
impl Collection { impl Collection {
pub fn find_and_replace(&mut self, ctx: FindReplaceContext) -> Result<u32> { pub fn find_and_replace(
self.transact(None, |col| col.find_and_replace_inner(ctx, col.usn()?)) &mut self,
nids: Vec<NoteID>,
search_re: &str,
repl: &str,
field_name: Option<String>,
) -> Result<u32> {
self.transact(None, |col| {
let norm = col.normalize_note_text();
let search = if norm {
normalize_to_nfc(search_re)
} else {
search_re.into()
};
let ctx = FindReplaceContext::new(nids, &search, repl, field_name)?;
col.find_and_replace_inner(ctx, col.usn()?, norm)
})
} }
fn find_and_replace_inner(&mut self, ctx: FindReplaceContext, usn: Usn) -> Result<u32> { fn find_and_replace_inner(
&mut self,
ctx: FindReplaceContext,
usn: Usn,
normalize_text: bool,
) -> Result<u32> {
let mut total_changed = 0; let mut total_changed = 0;
let nids_by_notetype = self.storage.note_ids_by_notetype(&ctx.nids)?; let nids_by_notetype = self.storage.note_ids_by_notetype(&ctx.nids)?;
for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) { for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) {
@ -77,7 +98,12 @@ impl Collection {
} }
} }
if changed { if changed {
self.update_note_inner_generating_cards(&genctx, &mut note, true)?; self.update_note_inner_generating_cards(
&genctx,
&mut note,
true,
normalize_text,
)?;
total_changed += 1; total_changed += 1;
} }
} }
@ -108,12 +134,7 @@ mod test {
col.add_note(&mut note2, DeckID(1))?; col.add_note(&mut note2, DeckID(1))?;
let nids = col.search_notes_only("")?; let nids = col.search_notes_only("")?;
let cnt = col.find_and_replace(FindReplaceContext::new( let cnt = col.find_and_replace(nids.clone(), "(?i)AAA", "BBB", None)?;
nids.clone(),
"(?i)AAA",
"BBB",
None,
)?)?;
assert_eq!(cnt, 2); assert_eq!(cnt, 2);
let note = col.storage.get_note(note.id)?.unwrap(); let note = col.storage.get_note(note.id)?.unwrap();
@ -127,12 +148,7 @@ mod test {
col.storage.field_names_for_notes(&nids)?, col.storage.field_names_for_notes(&nids)?,
vec!["Back".to_string(), "Front".into(), "Text".into()] vec!["Back".to_string(), "Front".into(), "Text".into()]
); );
let cnt = col.find_and_replace(FindReplaceContext::new( let cnt = col.find_and_replace(nids.clone(), "BBB", "ccc", Some("Front".into()))?;
nids.clone(),
"BBB",
"ccc",
Some("Front".into()),
)?)?;
// still 2, as the caller is expected to provide only note ids that have // still 2, as the caller is expected to provide only note ids that have
// that field, and if we can't find the field we fall back on all fields // that field, and if we can't find the field we fall back on all fields
assert_eq!(cnt, 2); assert_eq!(cnt, 2);

View file

@ -404,7 +404,7 @@ where
&self.mgr.media_folder, &self.mgr.media_folder,
)? { )? {
// note was modified, needs saving // note was modified, needs saving
note.prepare_for_update(nt)?; note.prepare_for_update(nt, false)?;
note.set_modified(usn); note.set_modified(usn);
self.ctx.storage.update_note(&note)?; self.ctx.storage.update_note(&note)?;
collection_modified = true; collection_modified = true;

View file

@ -8,7 +8,7 @@ use crate::{
define_newtype, define_newtype,
err::{AnkiError, Result}, err::{AnkiError, Result},
notetype::{CardGenContext, NoteField, NoteType, NoteTypeID}, notetype::{CardGenContext, NoteField, NoteType, NoteTypeID},
text::strip_html_preserving_image_filenames, text::{ensure_string_in_nfc, strip_html_preserving_image_filenames},
timestamp::TimestampSecs, timestamp::TimestampSecs,
types::Usn, types::Usn,
}; };
@ -65,7 +65,7 @@ impl Note {
} }
/// Prepare note for saving to the database. Does not mark it as modified. /// Prepare note for saving to the database. Does not mark it as modified.
pub fn prepare_for_update(&mut self, nt: &NoteType) -> Result<()> { pub fn prepare_for_update(&mut self, nt: &NoteType, normalize_text: bool) -> Result<()> {
assert!(nt.id == self.ntid); assert!(nt.id == self.ntid);
if nt.fields.len() != self.fields.len() { if nt.fields.len() != self.fields.len() {
return Err(AnkiError::invalid_input(format!( return Err(AnkiError::invalid_input(format!(
@ -75,6 +75,12 @@ impl Note {
))); )));
} }
if normalize_text {
for field in &mut self.fields {
ensure_string_in_nfc(field);
}
}
let field1_nohtml = strip_html_preserving_image_filenames(&self.fields()[0]); let field1_nohtml = strip_html_preserving_image_filenames(&self.fields()[0]);
let checksum = field_checksum(field1_nohtml.as_ref()); let checksum = field_checksum(field1_nohtml.as_ref());
let sort_field = if nt.config.sort_field_idx == 0 { let sort_field = if nt.config.sort_field_idx == 0 {
@ -184,7 +190,8 @@ impl Collection {
.get_notetype(note.ntid)? .get_notetype(note.ntid)?
.ok_or_else(|| AnkiError::invalid_input("missing note type"))?; .ok_or_else(|| AnkiError::invalid_input("missing note type"))?;
let ctx = CardGenContext::new(&nt, col.usn()?); let ctx = CardGenContext::new(&nt, col.usn()?);
col.add_note_inner(&ctx, note, did) let norm = col.normalize_note_text();
col.add_note_inner(&ctx, note, did, norm)
}) })
} }
@ -193,9 +200,10 @@ impl Collection {
ctx: &CardGenContext, ctx: &CardGenContext,
note: &mut Note, note: &mut Note,
did: DeckID, did: DeckID,
normalize_text: bool,
) -> Result<()> { ) -> Result<()> {
self.canonify_note_tags(note, ctx.usn)?; self.canonify_note_tags(note, ctx.usn)?;
note.prepare_for_update(&ctx.notetype)?; note.prepare_for_update(&ctx.notetype, normalize_text)?;
note.set_modified(ctx.usn); note.set_modified(ctx.usn);
self.storage.add_note(note)?; self.storage.add_note(note)?;
self.generate_cards_for_new_note(ctx, note, did) self.generate_cards_for_new_note(ctx, note, did)
@ -207,7 +215,8 @@ impl Collection {
.get_notetype(note.ntid)? .get_notetype(note.ntid)?
.ok_or_else(|| AnkiError::invalid_input("missing note type"))?; .ok_or_else(|| AnkiError::invalid_input("missing note type"))?;
let ctx = CardGenContext::new(&nt, col.usn()?); let ctx = CardGenContext::new(&nt, col.usn()?);
col.update_note_inner_generating_cards(&ctx, note, true) let norm = col.normalize_note_text();
col.update_note_inner_generating_cards(&ctx, note, true, norm)
}) })
} }
@ -216,8 +225,15 @@ impl Collection {
ctx: &CardGenContext, ctx: &CardGenContext,
note: &mut Note, note: &mut Note,
mark_note_modified: bool, mark_note_modified: bool,
normalize_text: bool,
) -> Result<()> { ) -> Result<()> {
self.update_note_inner_without_cards(note, ctx.notetype, ctx.usn, mark_note_modified)?; self.update_note_inner_without_cards(
note,
ctx.notetype,
ctx.usn,
mark_note_modified,
normalize_text,
)?;
self.generate_cards_for_existing_note(ctx, note) self.generate_cards_for_existing_note(ctx, note)
} }
@ -227,9 +243,10 @@ impl Collection {
nt: &NoteType, nt: &NoteType,
usn: Usn, usn: Usn,
mark_note_modified: bool, mark_note_modified: bool,
normalize_text: bool,
) -> Result<()> { ) -> Result<()> {
self.canonify_note_tags(note, usn)?; self.canonify_note_tags(note, usn)?;
note.prepare_for_update(nt)?; note.prepare_for_update(nt, normalize_text)?;
if mark_note_modified { if mark_note_modified {
note.set_modified(usn); note.set_modified(usn);
} }
@ -256,6 +273,7 @@ impl Collection {
mark_notes_modified: bool, mark_notes_modified: bool,
) -> Result<()> { ) -> Result<()> {
let nids_by_notetype = self.storage.note_ids_by_notetype(nids)?; let nids_by_notetype = self.storage.note_ids_by_notetype(nids)?;
let norm = self.normalize_note_text();
for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) { for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) {
let nt = self let nt = self
.get_notetype(ntid)? .get_notetype(ntid)?
@ -268,6 +286,7 @@ impl Collection {
&genctx, &genctx,
&mut note, &mut note,
mark_notes_modified, mark_notes_modified,
norm,
)?; )?;
} else { } else {
self.update_note_inner_without_cards( self.update_note_inner_without_cards(
@ -275,6 +294,7 @@ impl Collection {
&genctx.notetype, &genctx.notetype,
usn, usn,
mark_notes_modified, mark_notes_modified,
norm,
)?; )?;
} }
} }
@ -286,7 +306,7 @@ impl Collection {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::{anki_base91, field_checksum}; use super::{anki_base91, field_checksum};
use crate::{collection::open_test_collection, decks::DeckID, err::Result}; use crate::{collection::open_test_collection, config::ConfigKey, decks::DeckID, err::Result};
#[test] #[test]
fn test_base91() { fn test_base91() {
@ -350,4 +370,50 @@ mod test {
Ok(()) Ok(())
} }
#[test]
fn normalization() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
note.fields[0] = "\u{fa47}".into();
col.add_note(&mut note, DeckID(1))?;
assert_eq!(note.fields[0], "\u{6f22}");
// non-normalized searches should be converted
assert_eq!(
col.search_cards("\u{fa47}", crate::search::SortMode::NoOrder)?
.len(),
1
);
assert_eq!(
col.search_cards("front:\u{fa47}", crate::search::SortMode::NoOrder)?
.len(),
1
);
col.remove_note_only(note.id, col.usn()?)?;
// if normalization turned off, note text is entered as-is
let mut note = nt.new_note();
note.fields[0] = "\u{fa47}".into();
col.set_config(ConfigKey::NormalizeNoteText, &false)
.unwrap();
col.add_note(&mut note, DeckID(1))?;
assert_eq!(note.fields[0], "\u{fa47}");
// normalized searches won't match
assert_eq!(
col.search_cards("\u{6f22}", crate::search::SortMode::NoOrder)?
.len(),
0
);
// but original characters will
assert_eq!(
col.search_cards("\u{fa47}", crate::search::SortMode::NoOrder)?
.len(),
1
);
Ok(())
}
} }

View file

@ -337,6 +337,7 @@ impl Collection {
/// or fields have been added/removed/reordered. /// or fields have been added/removed/reordered.
pub fn update_notetype(&mut self, nt: &mut NoteType, preserve_usn: bool) -> Result<()> { pub fn update_notetype(&mut self, nt: &mut NoteType, preserve_usn: bool) -> Result<()> {
let existing = self.get_notetype(nt.id)?; let existing = self.get_notetype(nt.id)?;
let norm = self.normalize_note_text();
nt.prepare_for_update(existing.as_ref().map(AsRef::as_ref))?; nt.prepare_for_update(existing.as_ref().map(AsRef::as_ref))?;
self.transact(None, |col| { self.transact(None, |col| {
if let Some(existing_notetype) = existing { if let Some(existing_notetype) = existing {
@ -347,6 +348,7 @@ impl Collection {
nt, nt,
existing_notetype.fields.len(), existing_notetype.fields.len(),
existing_notetype.config.sort_field_idx, existing_notetype.config.sort_field_idx,
norm,
)?; )?;
col.update_cards_for_changed_templates(nt, existing_notetype.templates.len())?; col.update_cards_for_changed_templates(nt, existing_notetype.templates.len())?;
} }

View file

@ -55,6 +55,7 @@ impl Collection {
nt: &NoteType, nt: &NoteType,
previous_field_count: usize, previous_field_count: usize,
previous_sort_idx: u32, previous_sort_idx: u32,
normalize_text: bool,
) -> Result<()> { ) -> Result<()> {
let ords: Vec<_> = nt.fields.iter().map(|f| f.ord).collect(); let ords: Vec<_> = nt.fields.iter().map(|f| f.ord).collect();
if !ords_changed(&ords, previous_field_count) { if !ords_changed(&ords, previous_field_count) {
@ -63,7 +64,7 @@ impl Collection {
let nids = self.search_notes_only(&format!("mid:{}", nt.id))?; let nids = self.search_notes_only(&format!("mid:{}", nt.id))?;
for nid in nids { for nid in nids {
let mut note = self.storage.get_note(nid)?.unwrap(); let mut note = self.storage.get_note(nid)?.unwrap();
note.prepare_for_update(nt)?; note.prepare_for_update(nt, normalize_text)?;
self.storage.update_note(&note)?; self.storage.update_note(&note)?;
} }
} else { } else {
@ -92,7 +93,7 @@ impl Collection {
}) })
.map(Into::into) .map(Into::into)
.collect(); .collect();
note.prepare_for_update(nt)?; note.prepare_for_update(nt, normalize_text)?;
note.set_modified(usn); note.set_modified(usn);
self.storage.update_note(&note)?; self.storage.update_note(&note)?;
} }

View file

@ -19,7 +19,7 @@ pub enum SortMode {
impl Collection { impl Collection {
pub fn search_cards(&mut self, search: &str, order: SortMode) -> Result<Vec<CardID>> { pub fn search_cards(&mut self, search: &str, order: SortMode) -> Result<Vec<CardID>> {
let top_node = Node::Group(parse(search)?); let top_node = Node::Group(parse(search)?);
let (sql, args) = node_to_sql(self, &top_node)?; let (sql, args) = node_to_sql(self, &top_node, self.normalize_note_text())?;
let mut sql = format!( let mut sql = format!(
"select c.id from cards c, notes n where c.nid=n.id and {}", "select c.id from cards c, notes n where c.nid=n.id and {}",

View file

@ -30,7 +30,7 @@ impl Collection {
F: FnOnce(String) -> String, F: FnOnce(String) -> String,
{ {
let top_node = Node::Group(parse(search)?); let top_node = Node::Group(parse(search)?);
let (sql, args) = node_to_sql(self, &top_node)?; let (sql, args) = node_to_sql(self, &top_node, self.normalize_note_text())?;
let sql = build_sql(sql); let sql = build_sql(sql);

View file

@ -1,15 +1,19 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::err::{AnkiError, Result}; use crate::{
use crate::notetype::NoteTypeID; err::{AnkiError, Result},
use nom::branch::alt; notetype::NoteTypeID,
use nom::bytes::complete::{escaped, is_not, tag, take_while1}; };
use nom::character::complete::{anychar, char, one_of}; use nom::{
use nom::character::is_digit; branch::alt,
use nom::combinator::{all_consuming, map, map_res}; bytes::complete::{escaped, is_not, tag, take_while1},
use nom::sequence::{delimited, preceded, tuple}; character::complete::{anychar, char, one_of},
use nom::{multi::many0, IResult}; character::is_digit,
combinator::{all_consuming, map, map_res},
sequence::{delimited, preceded, tuple},
{multi::many0, IResult},
};
use std::{borrow::Cow, num}; use std::{borrow::Cow, num};
// fixme: need to preserve \ when used twice in string // fixme: need to preserve \ when used twice in string
@ -109,7 +113,6 @@ pub(super) enum TemplateKind {
} }
/// Parse the input string into a list of nodes. /// Parse the input string into a list of nodes.
#[allow(dead_code)]
pub(super) fn parse(input: &str) -> Result<Vec<Node>> { pub(super) fn parse(input: &str) -> Result<Vec<Node>> {
let input = input.trim(); let input = input.trim();
if input.is_empty() { if input.is_empty() {
@ -118,6 +121,7 @@ pub(super) fn parse(input: &str) -> Result<Vec<Node>> {
let (_, nodes) = all_consuming(group_inner)(input) let (_, nodes) = all_consuming(group_inner)(input)
.map_err(|_e| AnkiError::invalid_input("unable to parse search"))?; .map_err(|_e| AnkiError::invalid_input("unable to parse search"))?;
Ok(nodes) Ok(nodes)
} }

View file

@ -2,37 +2,47 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use super::parser::{Node, PropertyKind, SearchNode, StateKind, TemplateKind}; use super::parser::{Node, PropertyKind, SearchNode, StateKind, TemplateKind};
use crate::card::CardQueue;
use crate::err::Result;
use crate::notes::field_checksum;
use crate::notetype::NoteTypeID;
use crate::text::matches_wildcard;
use crate::text::without_combining;
use crate::{ use crate::{
collection::Collection, decks::human_deck_name_to_native, card::CardQueue,
text::strip_html_preserving_image_filenames, collection::Collection,
decks::human_deck_name_to_native,
err::Result,
notes::field_checksum,
notetype::NoteTypeID,
text::matches_wildcard,
text::{normalize_to_nfc, strip_html_preserving_image_filenames, without_combining},
}; };
use lazy_static::lazy_static; use lazy_static::lazy_static;
use regex::{Captures, Regex}; use regex::{Captures, Regex};
use std::fmt::Write; use std::{borrow::Cow, fmt::Write};
struct SqlWriter<'a> { struct SqlWriter<'a> {
col: &'a mut Collection, col: &'a mut Collection,
sql: String, sql: String,
args: Vec<String>, args: Vec<String>,
normalize_note_text: bool,
} }
pub(super) fn node_to_sql(req: &mut Collection, node: &Node) -> Result<(String, Vec<String>)> { pub(super) fn node_to_sql(
let mut sctx = SqlWriter::new(req); req: &mut Collection,
node: &Node,
normalize_note_text: bool,
) -> Result<(String, Vec<String>)> {
let mut sctx = SqlWriter::new(req, normalize_note_text);
sctx.write_node_to_sql(&node)?; sctx.write_node_to_sql(&node)?;
Ok((sctx.sql, sctx.args)) Ok((sctx.sql, sctx.args))
} }
impl SqlWriter<'_> { impl SqlWriter<'_> {
fn new(col: &mut Collection) -> SqlWriter<'_> { fn new(col: &mut Collection, normalize_note_text: bool) -> SqlWriter<'_> {
let sql = String::new(); let sql = String::new();
let args = vec![]; let args = vec![];
SqlWriter { col, sql, args } SqlWriter {
col,
sql,
args,
normalize_note_text,
}
} }
fn write_node_to_sql(&mut self, node: &Node) -> Result<()> { fn write_node_to_sql(&mut self, node: &Node) -> Result<()> {
@ -55,22 +65,47 @@ impl SqlWriter<'_> {
Ok(()) Ok(())
} }
fn write_search_node_to_sql(&mut self, node: &SearchNode) -> Result<()> { /// Convert search text to NFC if note normalization is enabled.
match node { fn norm_note<'a>(&self, text: &'a str) -> Cow<'a, str> {
SearchNode::UnqualifiedText(text) => self.write_unqualified(text), if self.normalize_note_text {
SearchNode::SingleField { field, text, is_re } => { normalize_to_nfc(text)
self.write_single_field(field.as_ref(), text.as_ref(), *is_re)? } else {
text.into()
} }
}
fn write_search_node_to_sql(&mut self, node: &SearchNode) -> Result<()> {
use normalize_to_nfc as norm;
match node {
// note fields related
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
SearchNode::SingleField { field, text, is_re } => {
self.write_single_field(field.as_ref(), &self.norm_note(text), *is_re)?
}
SearchNode::Duplicates { note_type_id, text } => {
self.write_dupes(*note_type_id, &self.norm_note(text))
}
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
// other
SearchNode::AddedInDays(days) => self.write_added(*days)?, SearchNode::AddedInDays(days) => self.write_added(*days)?,
SearchNode::CardTemplate(template) => self.write_template(template)?, SearchNode::CardTemplate(template) => match template {
SearchNode::Deck(deck) => self.write_deck(deck.as_ref())?, TemplateKind::Ordinal(_) => {
self.write_template(template)?;
}
TemplateKind::Name(name) => {
self.write_template(&TemplateKind::Name(norm(name).into()))?;
}
},
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
SearchNode::NoteTypeID(ntid) => { SearchNode::NoteTypeID(ntid) => {
write!(self.sql, "n.mid = {}", ntid).unwrap(); write!(self.sql, "n.mid = {}", ntid).unwrap();
} }
SearchNode::NoteType(notetype) => self.write_note_type(notetype.as_ref())?, SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?,
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?, SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
SearchNode::Tag(tag) => self.write_tag(tag)?, SearchNode::Tag(tag) => self.write_tag(&norm(tag))?,
SearchNode::Duplicates { note_type_id, text } => self.write_dupes(*note_type_id, text),
SearchNode::State(state) => self.write_state(state)?, SearchNode::State(state) => self.write_state(state)?,
SearchNode::Flag(flag) => { SearchNode::Flag(flag) => {
write!(self.sql, "(c.flags & 7) == {}", flag).unwrap(); write!(self.sql, "(c.flags & 7) == {}", flag).unwrap();
@ -83,9 +118,6 @@ impl SqlWriter<'_> {
} }
SearchNode::Property { operator, kind } => self.write_prop(operator, kind)?, SearchNode::Property { operator, kind } => self.write_prop(operator, kind)?,
SearchNode::WholeCollection => write!(self.sql, "true").unwrap(), SearchNode::WholeCollection => write!(self.sql, "true").unwrap(),
SearchNode::Regex(re) => self.write_regex(re.as_ref()),
SearchNode::NoCombining(text) => self.write_no_combining(text.as_ref()),
SearchNode::WordBoundary(text) => self.write_word_boundary(text.as_ref()),
}; };
Ok(()) Ok(())
} }
@ -424,7 +456,7 @@ mod test {
// shortcut // shortcut
fn s(req: &mut Collection, search: &str) -> (String, Vec<String>) { fn s(req: &mut Collection, search: &str) -> (String, Vec<String>) {
let node = Node::Group(parse(search).unwrap()); let node = Node::Group(parse(search).unwrap());
node_to_sql(req, &node).unwrap() node_to_sql(req, &node, true).unwrap()
} }
#[test] #[test]