diff --git a/proto/anki/card_rendering.proto b/proto/anki/card_rendering.proto index 145e4b0db..4035ae68b 100644 --- a/proto/anki/card_rendering.proto +++ b/proto/anki/card_rendering.proto @@ -165,6 +165,7 @@ message HtmlToTextLineRequest { message CompareAnswerRequest { string expected = 1; string provided = 2; + bool combining = 3; } message ExtractClozeForTypingRequest { diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 6ae37befe..66b2fb618 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -1152,8 +1152,12 @@ class Collection(DeprecatedNamesMixin): "Not intended for public consumption at this time." return self._backend.render_markdown(markdown=text, sanitize=sanitize) - def compare_answer(self, expected: str, provided: str) -> str: - return self._backend.compare_answer(expected=expected, provided=provided) + def compare_answer( + self, expected: str, provided: str, combining: bool = True + ) -> str: + return self._backend.compare_answer( + expected=expected, provided=provided, combining=combining + ) def extract_cloze_for_typing(self, text: str, ordinal: int) -> str: return self._backend.extract_cloze_for_typing(text=text, ordinal=ordinal) diff --git a/qt/aqt/reviewer.py b/qt/aqt/reviewer.py index 4a16f7b47..f64f82208 100644 --- a/qt/aqt/reviewer.py +++ b/qt/aqt/reviewer.py @@ -152,6 +152,7 @@ class Reviewer: self.previous_card: Card | None = None self._answeredIds: list[CardId] = [] self._recordedAudio: str | None = None + self._combining: bool = True self.typeCorrect: str | None = None # web init happens before this is set self.state: Literal["question", "answer", "transition"] | None = None self._refresh_needed: RefreshNeeded | None = None @@ -699,6 +700,7 @@ class Reviewer: return self.typeAnsAnswerFilter(buf) def typeAnsQuestionFilter(self, buf: str) -> str: + self._combining = True self.typeCorrect = None clozeIdx = None m = re.search(self.typeAnsPat, buf) @@ -711,6 +713,9 @@ class Reviewer: clozeIdx = self.card.ord + 1 fld = fld.split(":")[1] # loop through fields for a match + if fld.startswith("nc:"): + self._combining = False + fld = fld.split(":")[1] for f in self.card.note_type()["flds"]: if f["name"] == fld: self.typeCorrect = self.card.note()[f["name"]] @@ -750,7 +755,7 @@ class Reviewer: hadHR = len(buf) != origSize expected = self.typeCorrect provided = self.typedAnswer - output = self.mw.col.compare_answer(expected, provided) + output = self.mw.col.compare_answer(expected, provided, self._combining) # and update the type answer area def repl(match: Match) -> str: diff --git a/rslib/src/card_rendering/service.rs b/rslib/src/card_rendering/service.rs index 7e0f9ba67..8d1585725 100644 --- a/rslib/src/card_rendering/service.rs +++ b/rslib/src/card_rendering/service.rs @@ -167,7 +167,7 @@ impl crate::services::CardRenderingService for Collection { &mut self, input: anki_proto::card_rendering::CompareAnswerRequest, ) -> Result { - Ok(compare_answer(&input.expected, &input.provided).into()) + Ok(compare_answer(&input.expected, &input.provided, input.combining).into()) } fn extract_cloze_for_typing( diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs index b6408d965..f55d45862 100644 --- a/rslib/src/template_filters.rs +++ b/rslib/src/template_filters.rs @@ -33,6 +33,8 @@ pub(crate) fn apply_filters<'a>( // type:cloze is handled specially let filters = if filters == ["cloze", "type"] { &["type-cloze"] + } else if filters == ["nc", "type"] { + &["type-nc"] } else { filters }; @@ -80,6 +82,7 @@ fn apply_filter( "kana" => kana_filter(text), "type" => type_filter(field_name), "type-cloze" => type_cloze_filter(field_name), + "type-nc" => type_nc_filter(field_name), "hint" => hint_filter(text, field_name), "cloze" => cloze_filter(text, context), "cloze-only" => cloze_only_filter(text, context), @@ -171,6 +174,10 @@ fn type_cloze_filter<'a>(field_name: &str) -> Cow<'a, str> { format!("[[type:cloze:{}]]", field_name).into() } +fn type_nc_filter<'a>(field_name: &str) -> Cow<'a, str> { + format!("[[type:nc:{}]]", field_name).into() +} + fn hint_filter<'a>(text: &'a str, field_name: &str) -> Cow<'a, str> { if text.trim().is_empty() { return text.into(); @@ -238,6 +245,7 @@ field fn typing() { assert_eq!(type_filter("Front"), "[[type:Front]]"); assert_eq!(type_cloze_filter("Front"), "[[type:cloze:Front]]"); + assert_eq!(type_nc_filter("Front"), "[[type:nc:Front]]"); let ctx = RenderContext { fields: &Default::default(), nonempty_fields: &Default::default(), @@ -249,6 +257,10 @@ field apply_filters("ignored", &["cloze", "type"], "Text", &ctx), ("[[type:cloze:Text]]".into(), vec![]) ); + assert_eq!( + apply_filters("ignored", &["nc", "type"], "Text", &ctx), + ("[[type:nc:Text]]".into(), vec![]) + ); } #[test] diff --git a/rslib/src/text.rs b/rslib/src/text.rs index b32ef45c1..7f741540c 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -13,6 +13,7 @@ use regex::Regex; use unicase::eq as uni_eq; use unicode_normalization::char::is_combining_mark; use unicode_normalization::is_nfc; +use unicode_normalization::is_nfkd; use unicode_normalization::is_nfkd_quick; use unicode_normalization::IsNormalized; use unicode_normalization::UnicodeNormalization; @@ -367,10 +368,9 @@ pub(crate) fn sanitize_html_no_images(html: &str) -> String { } pub(crate) fn normalize_to_nfc(s: &str) -> Cow { - if !is_nfc(s) { - s.chars().nfc().collect::().into() - } else { - s.into() + match is_nfc(s) { + false => s.chars().nfc().collect::().into(), + true => s.into(), } } @@ -380,6 +380,13 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) { } } +pub(crate) fn normalize_to_nfkd(s: &str) -> Cow { + match is_nfkd(s) { + false => s.chars().nfkd().collect::().into(), + true => s.into(), + } +} + static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map = phf::phf_map! { '€' => "E", 'Æ' => "AE", diff --git a/rslib/src/typeanswer.rs b/rslib/src/typeanswer.rs index b8d8d4b9a..c1d6547fb 100644 --- a/rslib/src/typeanswer.rs +++ b/rslib/src/typeanswer.rs @@ -9,7 +9,7 @@ use regex::Regex; use unic_ucd_category::GeneralCategory; use crate::card_rendering::strip_av_tags; -use crate::text::normalize_to_nfc; +use crate::text::normalize_to_nfkd; use crate::text::strip_html; static LINEBREAKS: Lazy = Lazy::new(|| { @@ -33,85 +33,85 @@ macro_rules! format_typeans { } // Public API -pub fn compare_answer(expected: &str, provided: &str) -> String { - if provided.is_empty() { - format_typeans!(htmlescape::encode_minimal(expected)) +pub fn compare_answer(expected: &str, typed: &str, combining: bool) -> String { + if typed.is_empty() { + format_typeans!(htmlescape::encode_minimal(&prepare_expected(expected))) + } else if combining { + Diff::new(expected, typed).to_html() } else { - Diff::new(expected, provided).to_html() + DiffNonCombining::new(expected, typed).to_html() } } -struct Diff { - provided: Vec, - expected: Vec, - expected_original: String, -} +// Core Logic +trait DiffTrait { + fn get_typed(&self) -> &[char]; + fn get_expected(&self) -> &[char]; + fn get_expected_original(&self) -> Cow; -impl Diff { - fn new(expected: &str, provided: &str) -> Self { - Self { - provided: normalize_to_nfc(provided).chars().collect(), - expected: normalize_to_nfc(&prepare_expected(expected)) - .chars() - .collect(), - expected_original: expected.to_string(), - } - } + fn new(expected: &str, typed: &str) -> Self; + fn normalize_typed(typed: &str) -> Vec; // Entry Point fn to_html(&self) -> String { - if self.provided == self.expected { + if self.get_typed() == self.get_expected() { format_typeans!(format!( "{}", - self.expected_original + self.get_expected_original() )) } else { let output = self.to_tokens(); - let provided_html = render_tokens(&output.provided_tokens); - let expected_html = render_tokens(&output.expected_tokens); + let typed_html = render_tokens(&output.typed_tokens); + let expected_html = self.render_expected_tokens(&output.expected_tokens); format_typeans!(format!( - "{provided_html}

{expected_html}" + "{typed_html}

{expected_html}" )) } } fn to_tokens(&self) -> DiffTokens { - let mut matcher = SequenceMatcher::new(&self.provided, &self.expected); - let mut provided_tokens = Vec::new(); + let mut matcher = SequenceMatcher::new(self.get_typed(), self.get_expected()); + let mut typed_tokens = Vec::new(); let mut expected_tokens = Vec::new(); for opcode in matcher.get_opcodes() { - let provided_slice = slice(&self.provided, opcode.first_start, opcode.first_end); - let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end); + let typed_slice = slice(self.get_typed(), opcode.first_start, opcode.first_end); + let expected_slice = slice(self.get_expected(), opcode.second_start, opcode.second_end); match opcode.tag.as_str() { "equal" => { - provided_tokens.push(DiffToken::good(provided_slice)); + typed_tokens.push(DiffToken::good(typed_slice)); expected_tokens.push(DiffToken::good(expected_slice)); } - "delete" => provided_tokens.push(DiffToken::bad(provided_slice)), + "delete" => typed_tokens.push(DiffToken::bad(typed_slice)), "insert" => { - provided_tokens.push(DiffToken::missing( + typed_tokens.push(DiffToken::missing( "-".repeat(expected_slice.chars().count()), )); expected_tokens.push(DiffToken::missing(expected_slice)); } "replace" => { - provided_tokens.push(DiffToken::bad(provided_slice)); + typed_tokens.push(DiffToken::bad(typed_slice)); expected_tokens.push(DiffToken::missing(expected_slice)); } _ => unreachable!(), } } DiffTokens { - provided_tokens, + typed_tokens, expected_tokens, } } + + fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String; } // Utility Functions +fn normalize(string: &str) -> Vec { + normalize_to_nfkd(string).chars().collect() +} + fn slice(chars: &[char], start: usize, end: usize) -> String { chars[start..end].iter().collect() } @@ -139,17 +139,118 @@ fn isolate_leading_mark(text: &str) -> Cow { if text .chars() .next() - .map_or(false, |ch| GeneralCategory::of(ch).is_mark()) + .map_or(false, |c| GeneralCategory::of(c).is_mark()) { - format!("\u{a0}{text}").into() + Cow::Owned(format!("\u{a0}{text}")) } else { - text.into() + Cow::Borrowed(text) } } +// Default Comparison +struct Diff { + typed: Vec, + expected: Vec, +} + +impl DiffTrait for Diff { + fn get_typed(&self) -> &[char] { + &self.typed + } + fn get_expected(&self) -> &[char] { + &self.expected + } + fn get_expected_original(&self) -> Cow { + Cow::Owned(self.get_expected().iter().collect::()) + } + + fn new(expected: &str, typed: &str) -> Self { + Self { + typed: Self::normalize_typed(typed), + expected: normalize(&prepare_expected(expected)), + } + } + fn normalize_typed(typed: &str) -> Vec { + normalize(typed) + } + + fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { + render_tokens(tokens) + } +} + +// Non-Combining Comparison +struct DiffNonCombining { + base: Diff, + expected_split: Vec, + expected_original: String, +} + +impl DiffTrait for DiffNonCombining { + fn get_typed(&self) -> &[char] { + &self.base.typed + } + fn get_expected(&self) -> &[char] { + &self.base.expected + } + fn get_expected_original(&self) -> Cow { + Cow::Borrowed(&self.expected_original) + } + + fn new(expected: &str, typed: &str) -> Self { + // filter out combining elements + let mut expected_stripped = String::new(); + // tokenized into "char+combining" for final rendering + let mut expected_split: Vec = Vec::new(); + for c in normalize(&prepare_expected(expected)) { + if unicode_normalization::char::is_combining_mark(c) { + if let Some(last) = expected_split.last_mut() { + last.push(c); + } + } else { + expected_stripped.push(c); + expected_split.push(c.to_string()); + } + } + + Self { + base: Diff { + typed: Self::normalize_typed(typed), + expected: expected_stripped.chars().collect(), + }, + expected_split, + expected_original: prepare_expected(expected), + } + } + + fn normalize_typed(typed: &str) -> Vec { + normalize_to_nfkd(typed) + .chars() + .filter(|c| !unicode_normalization::char::is_combining_mark(*c)) + .collect() + } + + // Since the combining characters are still required learning content, use + // expected_split to show them directly in the "expected" line, rather than + // having to otherwise e.g. include their field twice in the note template. + fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { + let mut idx = 0; + tokens.iter().fold(String::new(), |mut acc, token| { + let end = idx + token.text.chars().count(); + let txt = self.expected_split[idx..end].concat(); + idx = end; + let encoded_text = htmlescape::encode_minimal(&txt); + let class = token.to_class(); + acc.push_str(&format!("{encoded_text}")); + acc + }) + } +} + +// Utility Items #[derive(Debug, PartialEq, Eq)] struct DiffTokens { - provided_tokens: Vec, + typed_tokens: Vec, expected_tokens: Vec, } @@ -170,19 +271,15 @@ impl DiffToken { fn new(kind: DiffTokenKind, text: String) -> Self { Self { kind, text } } - fn good(text: String) -> Self { Self::new(DiffTokenKind::Good, text) } - fn bad(text: String) -> Self { Self::new(DiffTokenKind::Bad, text) } - fn missing(text: String) -> Self { Self::new(DiffTokenKind::Missing, text) } - fn to_class(&self) -> &'static str { match self.kind { DiffTokenKind::Good => "typeGood", @@ -212,11 +309,13 @@ mod test { let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer"); let output = ctx.to_tokens(); assert_eq!( - output.provided_tokens, + output.typed_tokens, vec![ bad("y"), good(" ahora q"), - bad("e"), + missing("-"), + good("e"), + missing("-"), good(" vamos"), missing("-"), good("a hacer"), @@ -228,7 +327,9 @@ mod test { vec![ missing("¿Y"), good(" ahora q"), - missing("ué"), + missing("u"), + good("e"), + missing("́"), good(" vamos"), missing(" "), good("a hacer"), @@ -245,18 +346,18 @@ mod test { } #[test] - fn missed_chars_only_shown_in_provided_when_after_good() { + fn missed_chars_only_shown_in_typed_when_after_good() { let ctx = Diff::new("1", "23"); - assert_eq!(ctx.to_tokens().provided_tokens, &[bad("23")]); + assert_eq!(ctx.to_tokens().typed_tokens, &[bad("23")]); let ctx = Diff::new("12", "1"); - assert_eq!(ctx.to_tokens().provided_tokens, &[good("1"), missing("-"),]); + assert_eq!(ctx.to_tokens().typed_tokens, &[good("1"), missing("-"),]); } #[test] fn missed_chars_counted_correctly() { let ctx = Diff::new("нос", "нс"); assert_eq!( - ctx.to_tokens().provided_tokens, + ctx.to_tokens().typed_tokens, &[good("н"), missing("-"), good("с")] ); } @@ -266,8 +367,8 @@ mod test { // this was not parsed as expected with dissimilar 1.0.4 let ctx = Diff::new("쓰다듬다", "스다뜸다"); assert_eq!( - ctx.to_tokens().provided_tokens, - &[bad("스"), good("다"), bad("뜸"), good("다"),] + ctx.to_tokens().typed_tokens, + &[bad("ᄉ"), good("ᅳ다"), bad("ᄄ"), good("ᅳᆷ다"),] ); } @@ -285,13 +386,17 @@ mod test { } #[test] - fn whitespace_is_trimmed() { - assert_eq!(prepare_expected("
foo
"), "foo"); + fn tags_removed() { + assert_eq!(prepare_expected("
123
"), "123"); + assert_eq!( + Diff::new("
123
", "123").to_html(), + "123" + ); } #[test] fn empty_input_shows_as_code() { - let ctx = compare_answer("123", ""); + let ctx = compare_answer("
123
", "", true); assert_eq!(ctx, "123"); }