typeanswer: NFC fix & cleanup (#3482)

* typeanswer: cleanup * DiffNonCombining's new() used String where plain Vec is appropriate * get rid of normalize_typed for DiffTrait again by pulling code into DiffNonCombining's new() * two DiffNonCombining testcases * typeanswer: return to NFC & typos
2025-11-06 04:37:22 -05:00 · 2024-10-11 12:33:08 +02:00 · 2024-10-11 12:33:08 +02:00 · 8af63f81eb
commit 8af63f81eb
parent 18889239d2
4 changed files with 34 additions and 37 deletions
--- a/qt/aqt/reviewer.py
+++ b/qt/aqt/reviewer.py
@ -712,10 +712,10 @@ class Reviewer:
            # get field and cloze position
            clozeIdx = self.card.ord + 1
            fld = fld.split(":")[1]
-        # loop through fields for a match
        if fld.startswith("nc:"):
            self._combining = False
            fld = fld.split(":")[1]
+        # loop through fields for a match
        for f in self.card.note_type()["flds"]:
            if f["name"] == fld:
                self.typeCorrect = self.card.note()[f["name"]]
--- a/rslib/src/template_filters.rs
+++ b/rslib/src/template_filters.rs
@ -30,7 +30,7 @@ pub(crate) fn apply_filters<'a>(
 ) -> (Cow<'a, str>, Vec<String>) {
    let mut text: Cow<str> = text.into();

-    // type:cloze is handled specially
+    // type:cloze & type:nc are handled specially
    let filters = if filters == ["cloze", "type"] {
        &["type-cloze"]
    } else if filters == ["nc", "type"] {
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@ -13,7 +13,6 @@ use regex::Regex;
 use unicase::eq as uni_eq;
 use unicode_normalization::char::is_combining_mark;
 use unicode_normalization::is_nfc;
-use unicode_normalization::is_nfkd;
 use unicode_normalization::is_nfkd_quick;
 use unicode_normalization::IsNormalized;
 use unicode_normalization::UnicodeNormalization;
@ -399,13 +398,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
    }
 }

-pub(crate) fn normalize_to_nfkd(s: &str) -> Cow<str> {
-    match is_nfkd(s) {
-        false => s.chars().nfkd().collect::<String>().into(),
-        true => s.into(),
-    }
-}
-
 static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
 '€'  =>  "E",
 'Æ'  =>  "AE",
--- a/rslib/src/typeanswer.rs
+++ b/rslib/src/typeanswer.rs
@ -9,7 +9,7 @@ use regex::Regex;
 use unic_ucd_category::GeneralCategory;

 use crate::card_rendering::strip_av_tags;
-use crate::text::normalize_to_nfkd;
+use crate::text::normalize_to_nfc;
 use crate::text::strip_html;

 static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
@ -50,7 +50,6 @@ trait DiffTrait {
    fn get_expected_original(&self) -> Cow<str>;

    fn new(expected: &str, typed: &str) -> Self;
-    fn normalize_typed(typed: &str) -> Vec<char>;

    // Entry Point
    fn to_html(&self) -> String {
@ -109,7 +108,7 @@ trait DiffTrait {

 // Utility Functions
 fn normalize(string: &str) -> Vec<char> {
-    normalize_to_nfkd(string).chars().collect()
+    normalize_to_nfc(string).chars().collect()
 }

 fn slice(chars: &[char], start: usize, end: usize) -> String {
@ -166,13 +165,10 @@ impl DiffTrait for Diff {

    fn new(expected: &str, typed: &str) -> Self {
        Self {
-            typed: Self::normalize_typed(typed),
+            typed: normalize(typed),
            expected: normalize(expected),
        }
    }
-    fn normalize_typed(typed: &str) -> Vec<char> {
-        normalize(typed)
-    }

    fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
        render_tokens(tokens)
@ -199,9 +195,17 @@ impl DiffTrait for DiffNonCombining {

    fn new(expected: &str, typed: &str) -> Self {
        // filter out combining elements
-        let mut expected_stripped = String::new();
-        // tokenized into "char+combining" for final rendering
+        let mut typed_stripped: Vec<char> = Vec::new();
+        let mut expected_stripped: Vec<char> = Vec::new();
+        // also tokenize into "char+combining" for final rendering
        let mut expected_split: Vec<String> = Vec::new();
+
+        for c in normalize(typed) {
+            if !unicode_normalization::char::is_combining_mark(c) {
+                typed_stripped.push(c);
+            }
+        }
+
        for c in normalize(expected) {
            if unicode_normalization::char::is_combining_mark(c) {
                if let Some(last) = expected_split.last_mut() {
@ -215,24 +219,17 @@ impl DiffTrait for DiffNonCombining {

        Self {
            base: Diff {
-                typed: Self::normalize_typed(typed),
-                expected: expected_stripped.chars().collect(),
+                typed: typed_stripped,
+                expected: expected_stripped,
            },
            expected_split,
            expected_original: expected.to_string(),
        }
    }

-    fn normalize_typed(typed: &str) -> Vec<char> {
-        normalize_to_nfkd(typed)
-            .chars()
-            .filter(|c| !unicode_normalization::char::is_combining_mark(*c))
-            .collect()
-    }
-
-    // Since the combining characters are still required learning content, use
+    // Combining characters are still required learning content, so use
    // expected_split to show them directly in the "expected" line, rather than
-    // having to otherwise e.g. include their field twice in the note template.
+    // having to otherwise e.g. include their field twice on the note template.
    fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
        let mut idx = 0;
        tokens.iter().fold(String::new(), |mut acc, token| {
@ -313,9 +310,7 @@ mod test {
            vec![
                bad("y"),
                good(" ahora q"),
-                missing("-"),
-                good("e"),
-                missing("-"),
+                bad("e"),
                good(" vamos"),
                missing("-"),
                good("a hacer"),
@ -327,9 +322,7 @@ mod test {
            vec![
                missing("¿Y"),
                good(" ahora q"),
-                missing("u"),
-                good("e"),
-                missing("́"),
+                missing("ué"),
                good(" vamos"),
                missing(" "),
                good("a hacer"),
@ -369,7 +362,7 @@ mod test {
        let ctx = Diff::new("쓰다듬다", "스다뜸다");
        assert_eq!(
            ctx.to_tokens().typed_tokens,
-            &[bad("ᄉ"), good("ᅳ다"), bad("ᄄ"), good("ᅳᆷ다"),]
+            &[bad("스"), good("다"), bad("뜸"), good("다"),]
        );
    }

@ -419,4 +412,16 @@ mod test {
            "<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"
        );
    }
+
+    #[test]
+    fn noncombining_comparison() {
+        assert_eq!(
+            compare_answer("שִׁנּוּן", "שנון", false),
+            "<code id=typeans><span class=typeGood>שִׁנּוּן</span></code>"
+        );
+        assert_eq!(
+            compare_answer("חוֹף", "חופ", false),
+            "<code id=typeans><span class=typeGood>חו</span><span class=typeBad>פ</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>חוֹ</span><span class=typeMissed>ף</span></code>"
+        );
+    }
 }