[type:nc] – ignores combining characters

Adds a comparison variant to [type] which ignores when combining characters of the expected field are missing from the provided input. It still shows these characters in the 'expected' line for reference. It's useful for languages with e.g. diacritics that are required for reference (such as in dictionaries), but rarely actually learned or used in everyday writing. Among these languages: Arabic, Hebrew, Persian, Urdu. The bool 'combining' controls it as new final parameter of both relevant compare_answer functions. On the Python side, it's set to true by default. Use on the note templates: [type:nc:field] (only the front needs to include :nc) This also removes the need to have both variants of words/sentences present as separate fields, to show them redundantly, etc.
2025-12-23 11:52:57 -05:00 · 2024-09-30 00:59:18 +02:00 · 2024-09-30 00:59:18 +02:00 · b9e7195526
commit b9e7195526
parent 5f04afe891
7 changed files with 171 additions and 36 deletions
--- a/proto/anki/card_rendering.proto
+++ b/proto/anki/card_rendering.proto
@ -165,6 +165,7 @@ message HtmlToTextLineRequest {
 message CompareAnswerRequest {
  string expected = 1;
  string provided = 2;
+  bool combining = 3;
 }

 message ExtractClozeForTypingRequest {
--- a/pylib/anki/collection.py
+++ b/pylib/anki/collection.py
@ -1152,8 +1152,12 @@ class Collection(DeprecatedNamesMixin):
        "Not intended for public consumption at this time."
        return self._backend.render_markdown(markdown=text, sanitize=sanitize)

-    def compare_answer(self, expected: str, provided: str) -> str:
-        return self._backend.compare_answer(expected=expected, provided=provided)
+    def compare_answer(
+        self, expected: str, provided: str, combining: bool = True
+    ) -> str:
+        return self._backend.compare_answer(
+            expected=expected, provided=provided, combining=combining
+        )

    def extract_cloze_for_typing(self, text: str, ordinal: int) -> str:
        return self._backend.extract_cloze_for_typing(text=text, ordinal=ordinal)
--- a/qt/aqt/reviewer.py
+++ b/qt/aqt/reviewer.py
@ -152,6 +152,7 @@ class Reviewer:
        self.previous_card: Card | None = None
        self._answeredIds: list[CardId] = []
        self._recordedAudio: str | None = None
+        self.combining: bool = True
        self.typeCorrect: str | None = None  # web init happens before this is set
        self.state: Literal["question", "answer", "transition"] | None = None
        self._refresh_needed: RefreshNeeded | None = None
@ -699,6 +700,7 @@ class Reviewer:
            return self.typeAnsAnswerFilter(buf)

    def typeAnsQuestionFilter(self, buf: str) -> str:
+        self.combining = True
        self.typeCorrect = None
        clozeIdx = None
        m = re.search(self.typeAnsPat, buf)
@ -711,6 +713,9 @@ class Reviewer:
            clozeIdx = self.card.ord + 1
            fld = fld.split(":")[1]
        # loop through fields for a match
+        if fld.startswith("nc:"):
+            self.combining = False
+            fld = fld.split(":")[1]
        for f in self.card.note_type()["flds"]:
            if f["name"] == fld:
                self.typeCorrect = self.card.note()[f["name"]]
@ -750,7 +755,7 @@ class Reviewer:
        hadHR = len(buf) != origSize
        expected = self.typeCorrect
        provided = self.typedAnswer
-        output = self.mw.col.compare_answer(expected, provided)
+        output = self.mw.col.compare_answer(expected, provided, self.combining)

        # and update the type answer area
        def repl(match: Match) -> str:
--- a/rslib/src/card_rendering/service.rs
+++ b/rslib/src/card_rendering/service.rs
@ -167,7 +167,7 @@ impl crate::services::CardRenderingService for Collection {
        &mut self,
        input: anki_proto::card_rendering::CompareAnswerRequest,
    ) -> Result<generic::String> {
-        Ok(compare_answer(&input.expected, &input.provided).into())
+        Ok(compare_answer(&input.expected, &input.provided, input.combining).into())
    }

    fn extract_cloze_for_typing(
--- a/rslib/src/template_filters.rs
+++ b/rslib/src/template_filters.rs
@ -33,6 +33,8 @@ pub(crate) fn apply_filters<'a>(
    // type:cloze is handled specially
    let filters = if filters == ["cloze", "type"] {
        &["type-cloze"]
+    } else if filters == ["nc", "type"] {
+        &["type-nc"]
    } else {
        filters
    };
@ -80,6 +82,7 @@ fn apply_filter(
        "kana" => kana_filter(text),
        "type" => type_filter(field_name),
        "type-cloze" => type_cloze_filter(field_name),
+        "type-nc" => type_nc_filter(field_name),
        "hint" => hint_filter(text, field_name),
        "cloze" => cloze_filter(text, context),
        "cloze-only" => cloze_only_filter(text, context),
@ -171,6 +174,10 @@ fn type_cloze_filter<'a>(field_name: &str) -> Cow<'a, str> {
    format!("[[type:cloze:{}]]", field_name).into()
 }

+fn type_nc_filter<'a>(field_name: &str) -> Cow<'a, str> {
+    format!("[[type:nc:{}]]", field_name).into()
+}
+
 fn hint_filter<'a>(text: &'a str, field_name: &str) -> Cow<'a, str> {
    if text.trim().is_empty() {
        return text.into();
@ -238,6 +245,7 @@ field</a>
    fn typing() {
        assert_eq!(type_filter("Front"), "[[type:Front]]");
        assert_eq!(type_cloze_filter("Front"), "[[type:cloze:Front]]");
+        assert_eq!(type_nc_filter("Front"), "[[type:nc:Front]]");
        let ctx = RenderContext {
            fields: &Default::default(),
            nonempty_fields: &Default::default(),
@ -249,6 +257,10 @@ field</a>
            apply_filters("ignored", &["cloze", "type"], "Text", &ctx),
            ("[[type:cloze:Text]]".into(), vec![])
        );
+        assert_eq!(
+            apply_filters("ignored", &["nc", "type"], "Text", &ctx),
+            ("[[type:nc:Text]]".into(), vec![])
+        );
    }

    #[test]
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@ -13,6 +13,7 @@ use regex::Regex;
 use unicase::eq as uni_eq;
 use unicode_normalization::char::is_combining_mark;
 use unicode_normalization::is_nfc;
+use unicode_normalization::is_nfkd;
 use unicode_normalization::is_nfkd_quick;
 use unicode_normalization::IsNormalized;
 use unicode_normalization::UnicodeNormalization;
@ -367,10 +368,9 @@ pub(crate) fn sanitize_html_no_images(html: &str) -> String {
 }

 pub(crate) fn normalize_to_nfc(s: &str) -> Cow<str> {
-    if !is_nfc(s) {
-        s.chars().nfc().collect::<String>().into()
-    } else {
-        s.into()
+    match is_nfc(s) {
+        false => s.chars().nfc().collect::<String>().into(),
+        true => s.into(),
    }
 }

@ -380,6 +380,13 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
    }
 }

+pub(crate) fn normalize_to_nfkd(s: &str) -> Cow<str> {
+    match is_nfkd(s) {
+        false => s.chars().nfkd().collect::<String>().into(),
+        true => s.into(),
+    }
+}
+
 static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
 '€'  =>  "E",
 'Æ'  =>  "AE",
--- a/rslib/src/typeanswer.rs
+++ b/rslib/src/typeanswer.rs
@ -10,6 +10,7 @@ use unic_ucd_category::GeneralCategory;

 use crate::card_rendering::strip_av_tags;
 use crate::text::normalize_to_nfc;
+use crate::text::normalize_to_nfkd;
 use crate::text::strip_html;

 static LINEBREAKS: Lazy<Regex> = Lazy::new(|| {
@ -33,40 +34,37 @@ macro_rules! format_typeans {
 }

 // Public API
-pub fn compare_answer(expected: &str, typed: &str) -> String {
+pub fn compare_answer(expected: &str, typed: &str, combining: bool) -> String {
    if typed.is_empty() {
        format_typeans!(htmlescape::encode_minimal(&prepare_expected(expected)))
-    } else {
+    } else if combining {
        Diff::new(expected, typed).to_html()
+    } else {
+        DiffNonCombining::new(expected, typed).to_html()
    }
 }

-struct Diff {
-    typed: Vec<char>,
-    expected: Vec<char>,
-}
+// Core Logic
+trait DiffTrait {
+    fn get_typed(&self) -> &[char];
+    fn get_expected(&self) -> &[char];
+    fn get_expected_original(&self) -> Cow<str>;

-impl Diff {
-    fn new(expected: &str, typed: &str) -> Self {
-        Self {
-            typed: normalize_to_nfc(typed).chars().collect(),
-            expected: normalize_to_nfc(&prepare_expected(expected))
-                .chars()
-                .collect(),
-        }
-    }
+    fn new(expected: &str, typed: &str) -> Self;
+    fn normalize_expected(expected: &str) -> Vec<char>;
+    fn normalize_typed(typed: &str) -> Vec<char>;

    // Entry Point
    fn to_html(&self) -> String {
-        if self.typed == self.expected {
+        if self.get_typed() == self.get_expected() {
            format_typeans!(format!(
                "<span class=typeGood>{}</span>",
-                &self.expected.iter().collect::<String>()
+                self.get_expected_original()
            ))
        } else {
            let output = self.to_tokens();
            let typed_html = render_tokens(&output.typed_tokens);
-            let expected_html = render_tokens(&output.expected_tokens);
+            let expected_html = self.render_expected_tokens(&output.expected_tokens);

            format_typeans!(format!(
                "{typed_html}<br><span id=typearrow>&darr;</span><br>{expected_html}"
@ -75,13 +73,13 @@ impl Diff {
    }

    fn to_tokens(&self) -> DiffTokens {
-        let mut matcher = SequenceMatcher::new(&self.typed, &self.expected);
+        let mut matcher = SequenceMatcher::new(self.get_typed(), self.get_expected());
        let mut typed_tokens = Vec::new();
        let mut expected_tokens = Vec::new();

        for opcode in matcher.get_opcodes() {
-            let typed_slice = slice(&self.typed, opcode.first_start, opcode.first_end);
-            let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end);
+            let typed_slice = slice(self.get_typed(), opcode.first_start, opcode.first_end);
+            let expected_slice = slice(self.get_expected(), opcode.second_start, opcode.second_end);

            match opcode.tag.as_str() {
                "equal" => {
@ -107,6 +105,8 @@ impl Diff {
            expected_tokens,
        }
    }
+
+    fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String;
 }

 // Utility Functions
@ -139,12 +139,122 @@ fn isolate_leading_mark(text: &str) -> Cow<str> {
        .next()
        .map_or(false, |c| GeneralCategory::of(c).is_mark())
    {
-        format!("\u{a0}{text}").into()
+        Cow::Owned(format!("\u{a0}{text}"))
    } else {
-        text.into()
+        Cow::Borrowed(text)
    }
 }

+// Default Comparison
+struct Diff {
+    typed: Vec<char>,
+    expected: Vec<char>,
+}
+
+impl DiffTrait for Diff {
+    fn get_typed(&self) -> &[char] {
+        &self.typed
+    }
+    fn get_expected(&self) -> &[char] {
+        &self.expected
+    }
+    fn get_expected_original(&self) -> Cow<str> {
+        Cow::Owned(self.get_expected().iter().collect::<String>())
+    }
+
+    fn new(expected: &str, typed: &str) -> Self {
+        Self {
+            typed: Self::normalize_typed(typed),
+            expected: Self::normalize_expected(expected),
+        }
+    }
+    fn normalize_expected(expected: &str) -> Vec<char> {
+        normalize_to_nfc(&prepare_expected(expected))
+            .chars()
+            .collect()
+    }
+    fn normalize_typed(typed: &str) -> Vec<char> {
+        normalize_to_nfc(typed).chars().collect()
+    }
+
+    fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
+        render_tokens(tokens)
+    }
+}
+
+// Non-Combining Comparison
+struct DiffNonCombining {
+    base: Diff,
+    expected_split: Vec<String>,
+    expected_original: String,
+}
+
+impl DiffTrait for DiffNonCombining {
+    fn get_typed(&self) -> &[char] {
+        &self.base.typed
+    }
+    fn get_expected(&self) -> &[char] {
+        &self.base.expected
+    }
+    fn get_expected_original(&self) -> Cow<str> {
+        Cow::Borrowed(&self.expected_original)
+    }
+
+    fn new(expected: &str, typed: &str) -> Self {
+        // filter out combining elements
+        let mut expected_stripped = String::new();
+        // tokenized into "char+combining" for final rendering
+        let mut expected_split: Vec<String> = Vec::new();
+        for c in Self::normalize_expected(expected) {
+            if unicode_normalization::char::is_combining_mark(c) {
+                if let Some(last) = expected_split.last_mut() {
+                    last.push(c);
+                }
+            } else {
+                expected_stripped.push(c);
+                expected_split.push(c.to_string());
+            }
+        }
+
+        Self {
+            base: Diff {
+                typed: Self::normalize_typed(typed),
+                expected: expected_stripped.chars().collect(),
+            },
+            expected_split,
+            expected_original: prepare_expected(expected),
+        }
+    }
+    fn normalize_expected(expected: &str) -> Vec<char> {
+        normalize_to_nfkd(&prepare_expected(expected))
+            .chars()
+            .collect()
+    }
+    fn normalize_typed(typed: &str) -> Vec<char> {
+        normalize_to_nfkd(typed)
+            .chars()
+            .filter(|c| !unicode_normalization::char::is_combining_mark(*c))
+            .collect()
+    }
+
+    // Since the combining characters are still required learning content, use
+    // expected_split to show them directly in the "expected" line, rather than
+    // having to otherwise e.g. include their field twice in the note template.
+    fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
+        let mut idx = 0;
+        tokens.iter().fold(String::new(), |mut acc, token| {
+            let end = idx + token.text.chars().count();
+            let txt = self.expected_split[idx..end].concat();
+            idx = end;
+            let encoded_text = htmlescape::encode_minimal(&txt);
+            let class = token.to_class();
+            acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
+            acc
+        })
+    }
+}
+
+// Utility Items
 #[derive(Debug, PartialEq, Eq)]
 struct DiffTokens {
    typed_tokens: Vec<DiffToken>,
@ -168,19 +278,15 @@ impl DiffToken {
    fn new(kind: DiffTokenKind, text: String) -> Self {
        Self { kind, text }
    }
-
    fn good(text: String) -> Self {
        Self::new(DiffTokenKind::Good, text)
    }
-
    fn bad(text: String) -> Self {
        Self::new(DiffTokenKind::Bad, text)
    }
-
    fn missing(text: String) -> Self {
        Self::new(DiffTokenKind::Missing, text)
    }
-
    fn to_class(&self) -> &'static str {
        match self.kind {
            DiffTokenKind::Good => "typeGood",
@ -293,7 +399,7 @@ mod test {

    #[test]
    fn empty_input_shows_as_code() {
-        let ctx = compare_answer("<div>123</div>", "");
+        let ctx = compare_answer("<div>123</div>", "", true);
        assert_eq!(ctx, "<code id=typeans>123</code>");
    }