diff --git a/proto/anki/card_rendering.proto b/proto/anki/card_rendering.proto index 145e4b0db..4035ae68b 100644 --- a/proto/anki/card_rendering.proto +++ b/proto/anki/card_rendering.proto @@ -165,6 +165,7 @@ message HtmlToTextLineRequest { message CompareAnswerRequest { string expected = 1; string provided = 2; + bool combining = 3; } message ExtractClozeForTypingRequest { diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 6ae37befe..66b2fb618 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -1152,8 +1152,12 @@ class Collection(DeprecatedNamesMixin): "Not intended for public consumption at this time." return self._backend.render_markdown(markdown=text, sanitize=sanitize) - def compare_answer(self, expected: str, provided: str) -> str: - return self._backend.compare_answer(expected=expected, provided=provided) + def compare_answer( + self, expected: str, provided: str, combining: bool = True + ) -> str: + return self._backend.compare_answer( + expected=expected, provided=provided, combining=combining + ) def extract_cloze_for_typing(self, text: str, ordinal: int) -> str: return self._backend.extract_cloze_for_typing(text=text, ordinal=ordinal) diff --git a/qt/aqt/reviewer.py b/qt/aqt/reviewer.py index 4a16f7b47..b5a6e4d6f 100644 --- a/qt/aqt/reviewer.py +++ b/qt/aqt/reviewer.py @@ -152,6 +152,7 @@ class Reviewer: self.previous_card: Card | None = None self._answeredIds: list[CardId] = [] self._recordedAudio: str | None = None + self.combining: bool = True self.typeCorrect: str | None = None # web init happens before this is set self.state: Literal["question", "answer", "transition"] | None = None self._refresh_needed: RefreshNeeded | None = None @@ -699,6 +700,7 @@ class Reviewer: return self.typeAnsAnswerFilter(buf) def typeAnsQuestionFilter(self, buf: str) -> str: + self.combining = True self.typeCorrect = None clozeIdx = None m = re.search(self.typeAnsPat, buf) @@ -711,6 +713,9 @@ class Reviewer: clozeIdx = self.card.ord + 1 fld = fld.split(":")[1] # loop through fields for a match + if fld.startswith("nc:"): + self.combining = False + fld = fld.split(":")[1] for f in self.card.note_type()["flds"]: if f["name"] == fld: self.typeCorrect = self.card.note()[f["name"]] @@ -750,7 +755,7 @@ class Reviewer: hadHR = len(buf) != origSize expected = self.typeCorrect provided = self.typedAnswer - output = self.mw.col.compare_answer(expected, provided) + output = self.mw.col.compare_answer(expected, provided, self.combining) # and update the type answer area def repl(match: Match) -> str: diff --git a/rslib/src/card_rendering/service.rs b/rslib/src/card_rendering/service.rs index 7e0f9ba67..8d1585725 100644 --- a/rslib/src/card_rendering/service.rs +++ b/rslib/src/card_rendering/service.rs @@ -167,7 +167,7 @@ impl crate::services::CardRenderingService for Collection { &mut self, input: anki_proto::card_rendering::CompareAnswerRequest, ) -> Result { - Ok(compare_answer(&input.expected, &input.provided).into()) + Ok(compare_answer(&input.expected, &input.provided, input.combining).into()) } fn extract_cloze_for_typing( diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs index b6408d965..f55d45862 100644 --- a/rslib/src/template_filters.rs +++ b/rslib/src/template_filters.rs @@ -33,6 +33,8 @@ pub(crate) fn apply_filters<'a>( // type:cloze is handled specially let filters = if filters == ["cloze", "type"] { &["type-cloze"] + } else if filters == ["nc", "type"] { + &["type-nc"] } else { filters }; @@ -80,6 +82,7 @@ fn apply_filter( "kana" => kana_filter(text), "type" => type_filter(field_name), "type-cloze" => type_cloze_filter(field_name), + "type-nc" => type_nc_filter(field_name), "hint" => hint_filter(text, field_name), "cloze" => cloze_filter(text, context), "cloze-only" => cloze_only_filter(text, context), @@ -171,6 +174,10 @@ fn type_cloze_filter<'a>(field_name: &str) -> Cow<'a, str> { format!("[[type:cloze:{}]]", field_name).into() } +fn type_nc_filter<'a>(field_name: &str) -> Cow<'a, str> { + format!("[[type:nc:{}]]", field_name).into() +} + fn hint_filter<'a>(text: &'a str, field_name: &str) -> Cow<'a, str> { if text.trim().is_empty() { return text.into(); @@ -238,6 +245,7 @@ field fn typing() { assert_eq!(type_filter("Front"), "[[type:Front]]"); assert_eq!(type_cloze_filter("Front"), "[[type:cloze:Front]]"); + assert_eq!(type_nc_filter("Front"), "[[type:nc:Front]]"); let ctx = RenderContext { fields: &Default::default(), nonempty_fields: &Default::default(), @@ -249,6 +257,10 @@ field apply_filters("ignored", &["cloze", "type"], "Text", &ctx), ("[[type:cloze:Text]]".into(), vec![]) ); + assert_eq!( + apply_filters("ignored", &["nc", "type"], "Text", &ctx), + ("[[type:nc:Text]]".into(), vec![]) + ); } #[test] diff --git a/rslib/src/text.rs b/rslib/src/text.rs index b32ef45c1..7f741540c 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -13,6 +13,7 @@ use regex::Regex; use unicase::eq as uni_eq; use unicode_normalization::char::is_combining_mark; use unicode_normalization::is_nfc; +use unicode_normalization::is_nfkd; use unicode_normalization::is_nfkd_quick; use unicode_normalization::IsNormalized; use unicode_normalization::UnicodeNormalization; @@ -367,10 +368,9 @@ pub(crate) fn sanitize_html_no_images(html: &str) -> String { } pub(crate) fn normalize_to_nfc(s: &str) -> Cow { - if !is_nfc(s) { - s.chars().nfc().collect::().into() - } else { - s.into() + match is_nfc(s) { + false => s.chars().nfc().collect::().into(), + true => s.into(), } } @@ -380,6 +380,13 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) { } } +pub(crate) fn normalize_to_nfkd(s: &str) -> Cow { + match is_nfkd(s) { + false => s.chars().nfkd().collect::().into(), + true => s.into(), + } +} + static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map = phf::phf_map! { '€' => "E", 'Æ' => "AE", diff --git a/rslib/src/typeanswer.rs b/rslib/src/typeanswer.rs index 68d6fd796..d0ff00146 100644 --- a/rslib/src/typeanswer.rs +++ b/rslib/src/typeanswer.rs @@ -10,6 +10,7 @@ use unic_ucd_category::GeneralCategory; use crate::card_rendering::strip_av_tags; use crate::text::normalize_to_nfc; +use crate::text::normalize_to_nfkd; use crate::text::strip_html; static LINEBREAKS: Lazy = Lazy::new(|| { @@ -33,40 +34,37 @@ macro_rules! format_typeans { } // Public API -pub fn compare_answer(expected: &str, typed: &str) -> String { +pub fn compare_answer(expected: &str, typed: &str, combining: bool) -> String { if typed.is_empty() { format_typeans!(htmlescape::encode_minimal(&prepare_expected(expected))) - } else { + } else if combining { Diff::new(expected, typed).to_html() + } else { + DiffNonCombining::new(expected, typed).to_html() } } -struct Diff { - typed: Vec, - expected: Vec, -} +// Core Logic +trait DiffTrait { + fn get_typed(&self) -> &[char]; + fn get_expected(&self) -> &[char]; + fn get_expected_original(&self) -> Cow; -impl Diff { - fn new(expected: &str, typed: &str) -> Self { - Self { - typed: normalize_to_nfc(typed).chars().collect(), - expected: normalize_to_nfc(&prepare_expected(expected)) - .chars() - .collect(), - } - } + fn new(expected: &str, typed: &str) -> Self; + fn normalize_expected(expected: &str) -> Vec; + fn normalize_typed(typed: &str) -> Vec; // Entry Point fn to_html(&self) -> String { - if self.typed == self.expected { + if self.get_typed() == self.get_expected() { format_typeans!(format!( "{}", - &self.expected.iter().collect::() + self.get_expected_original() )) } else { let output = self.to_tokens(); let typed_html = render_tokens(&output.typed_tokens); - let expected_html = render_tokens(&output.expected_tokens); + let expected_html = self.render_expected_tokens(&output.expected_tokens); format_typeans!(format!( "{typed_html}

{expected_html}" @@ -75,13 +73,13 @@ impl Diff { } fn to_tokens(&self) -> DiffTokens { - let mut matcher = SequenceMatcher::new(&self.typed, &self.expected); + let mut matcher = SequenceMatcher::new(self.get_typed(), self.get_expected()); let mut typed_tokens = Vec::new(); let mut expected_tokens = Vec::new(); for opcode in matcher.get_opcodes() { - let typed_slice = slice(&self.typed, opcode.first_start, opcode.first_end); - let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end); + let typed_slice = slice(self.get_typed(), opcode.first_start, opcode.first_end); + let expected_slice = slice(self.get_expected(), opcode.second_start, opcode.second_end); match opcode.tag.as_str() { "equal" => { @@ -107,6 +105,8 @@ impl Diff { expected_tokens, } } + + fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String; } // Utility Functions @@ -139,12 +139,122 @@ fn isolate_leading_mark(text: &str) -> Cow { .next() .map_or(false, |c| GeneralCategory::of(c).is_mark()) { - format!("\u{a0}{text}").into() + Cow::Owned(format!("\u{a0}{text}")) } else { - text.into() + Cow::Borrowed(text) } } +// Default Comparison +struct Diff { + typed: Vec, + expected: Vec, +} + +impl DiffTrait for Diff { + fn get_typed(&self) -> &[char] { + &self.typed + } + fn get_expected(&self) -> &[char] { + &self.expected + } + fn get_expected_original(&self) -> Cow { + Cow::Owned(self.get_expected().iter().collect::()) + } + + fn new(expected: &str, typed: &str) -> Self { + Self { + typed: Self::normalize_typed(typed), + expected: Self::normalize_expected(expected), + } + } + fn normalize_expected(expected: &str) -> Vec { + normalize_to_nfc(&prepare_expected(expected)) + .chars() + .collect() + } + fn normalize_typed(typed: &str) -> Vec { + normalize_to_nfc(typed).chars().collect() + } + + fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { + render_tokens(tokens) + } +} + +// Non-Combining Comparison +struct DiffNonCombining { + base: Diff, + expected_split: Vec, + expected_original: String, +} + +impl DiffTrait for DiffNonCombining { + fn get_typed(&self) -> &[char] { + &self.base.typed + } + fn get_expected(&self) -> &[char] { + &self.base.expected + } + fn get_expected_original(&self) -> Cow { + Cow::Borrowed(&self.expected_original) + } + + fn new(expected: &str, typed: &str) -> Self { + // filter out combining elements + let mut expected_stripped = String::new(); + // tokenized into "char+combining" for final rendering + let mut expected_split: Vec = Vec::new(); + for c in Self::normalize_expected(expected) { + if unicode_normalization::char::is_combining_mark(c) { + if let Some(last) = expected_split.last_mut() { + last.push(c); + } + } else { + expected_stripped.push(c); + expected_split.push(c.to_string()); + } + } + + Self { + base: Diff { + typed: Self::normalize_typed(typed), + expected: expected_stripped.chars().collect(), + }, + expected_split, + expected_original: prepare_expected(expected), + } + } + fn normalize_expected(expected: &str) -> Vec { + normalize_to_nfkd(&prepare_expected(expected)) + .chars() + .collect() + } + fn normalize_typed(typed: &str) -> Vec { + normalize_to_nfkd(typed) + .chars() + .filter(|c| !unicode_normalization::char::is_combining_mark(*c)) + .collect() + } + + // Since the combining characters are still required learning content, use + // expected_split to show them directly in the "expected" line, rather than + // having to otherwise e.g. include their field twice in the note template. + fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { + let mut idx = 0; + tokens.iter().fold(String::new(), |mut acc, token| { + let end = idx + token.text.chars().count(); + let txt = self.expected_split[idx..end].concat(); + idx = end; + let encoded_text = htmlescape::encode_minimal(&txt); + let class = token.to_class(); + acc.push_str(&format!("{encoded_text}")); + acc + }) + } +} + +// Utility Items #[derive(Debug, PartialEq, Eq)] struct DiffTokens { typed_tokens: Vec, @@ -168,19 +278,15 @@ impl DiffToken { fn new(kind: DiffTokenKind, text: String) -> Self { Self { kind, text } } - fn good(text: String) -> Self { Self::new(DiffTokenKind::Good, text) } - fn bad(text: String) -> Self { Self::new(DiffTokenKind::Bad, text) } - fn missing(text: String) -> Self { Self::new(DiffTokenKind::Missing, text) } - fn to_class(&self) -> &'static str { match self.kind { DiffTokenKind::Good => "typeGood", @@ -293,7 +399,7 @@ mod test { #[test] fn empty_input_shows_as_code() { - let ctx = compare_answer("
123
", ""); + let ctx = compare_answer("
123
", "", true); assert_eq!(ctx, "123"); }