typeanswer: NFC fix & cleanup (#3482)

* typeanswer: cleanup

* DiffNonCombining's new() used String where plain Vec is appropriate
* get rid of normalize_typed for DiffTrait again by pulling code into DiffNonCombining's new()
* two DiffNonCombining testcases

* typeanswer: return to NFC & typos
This commit is contained in:
a.r 2024-10-11 12:33:08 +02:00 committed by GitHub
parent 18889239d2
commit 8af63f81eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 34 additions and 37 deletions

View file

@ -712,10 +712,10 @@ class Reviewer:
# get field and cloze position # get field and cloze position
clozeIdx = self.card.ord + 1 clozeIdx = self.card.ord + 1
fld = fld.split(":")[1] fld = fld.split(":")[1]
# loop through fields for a match
if fld.startswith("nc:"): if fld.startswith("nc:"):
self._combining = False self._combining = False
fld = fld.split(":")[1] fld = fld.split(":")[1]
# loop through fields for a match
for f in self.card.note_type()["flds"]: for f in self.card.note_type()["flds"]:
if f["name"] == fld: if f["name"] == fld:
self.typeCorrect = self.card.note()[f["name"]] self.typeCorrect = self.card.note()[f["name"]]

View file

@ -30,7 +30,7 @@ pub(crate) fn apply_filters<'a>(
) -> (Cow<'a, str>, Vec<String>) { ) -> (Cow<'a, str>, Vec<String>) {
let mut text: Cow<str> = text.into(); let mut text: Cow<str> = text.into();
// type:cloze is handled specially // type:cloze & type:nc are handled specially
let filters = if filters == ["cloze", "type"] { let filters = if filters == ["cloze", "type"] {
&["type-cloze"] &["type-cloze"]
} else if filters == ["nc", "type"] { } else if filters == ["nc", "type"] {

View file

@ -13,7 +13,6 @@ use regex::Regex;
use unicase::eq as uni_eq; use unicase::eq as uni_eq;
use unicode_normalization::char::is_combining_mark; use unicode_normalization::char::is_combining_mark;
use unicode_normalization::is_nfc; use unicode_normalization::is_nfc;
use unicode_normalization::is_nfkd;
use unicode_normalization::is_nfkd_quick; use unicode_normalization::is_nfkd_quick;
use unicode_normalization::IsNormalized; use unicode_normalization::IsNormalized;
use unicode_normalization::UnicodeNormalization; use unicode_normalization::UnicodeNormalization;
@ -399,13 +398,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
} }
} }
pub(crate) fn normalize_to_nfkd(s: &str) -> Cow<str> {
match is_nfkd(s) {
false => s.chars().nfkd().collect::<String>().into(),
true => s.into(),
}
}
static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! { static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
'€' => "E", '€' => "E",
'Æ' => "AE", 'Æ' => "AE",

View file

@ -9,7 +9,7 @@ use regex::Regex;
use unic_ucd_category::GeneralCategory; use unic_ucd_category::GeneralCategory;
use crate::card_rendering::strip_av_tags; use crate::card_rendering::strip_av_tags;
use crate::text::normalize_to_nfkd; use crate::text::normalize_to_nfc;
use crate::text::strip_html; use crate::text::strip_html;
static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| { static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
@ -50,7 +50,6 @@ trait DiffTrait {
fn get_expected_original(&self) -> Cow<str>; fn get_expected_original(&self) -> Cow<str>;
fn new(expected: &str, typed: &str) -> Self; fn new(expected: &str, typed: &str) -> Self;
fn normalize_typed(typed: &str) -> Vec<char>;
// Entry Point // Entry Point
fn to_html(&self) -> String { fn to_html(&self) -> String {
@ -109,7 +108,7 @@ trait DiffTrait {
// Utility Functions // Utility Functions
fn normalize(string: &str) -> Vec<char> { fn normalize(string: &str) -> Vec<char> {
normalize_to_nfkd(string).chars().collect() normalize_to_nfc(string).chars().collect()
} }
fn slice(chars: &[char], start: usize, end: usize) -> String { fn slice(chars: &[char], start: usize, end: usize) -> String {
@ -166,13 +165,10 @@ impl DiffTrait for Diff {
fn new(expected: &str, typed: &str) -> Self { fn new(expected: &str, typed: &str) -> Self {
Self { Self {
typed: Self::normalize_typed(typed), typed: normalize(typed),
expected: normalize(expected), expected: normalize(expected),
} }
} }
fn normalize_typed(typed: &str) -> Vec<char> {
normalize(typed)
}
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
render_tokens(tokens) render_tokens(tokens)
@ -199,9 +195,17 @@ impl DiffTrait for DiffNonCombining {
fn new(expected: &str, typed: &str) -> Self { fn new(expected: &str, typed: &str) -> Self {
// filter out combining elements // filter out combining elements
let mut expected_stripped = String::new(); let mut typed_stripped: Vec<char> = Vec::new();
// tokenized into "char+combining" for final rendering let mut expected_stripped: Vec<char> = Vec::new();
// also tokenize into "char+combining" for final rendering
let mut expected_split: Vec<String> = Vec::new(); let mut expected_split: Vec<String> = Vec::new();
for c in normalize(typed) {
if !unicode_normalization::char::is_combining_mark(c) {
typed_stripped.push(c);
}
}
for c in normalize(expected) { for c in normalize(expected) {
if unicode_normalization::char::is_combining_mark(c) { if unicode_normalization::char::is_combining_mark(c) {
if let Some(last) = expected_split.last_mut() { if let Some(last) = expected_split.last_mut() {
@ -215,24 +219,17 @@ impl DiffTrait for DiffNonCombining {
Self { Self {
base: Diff { base: Diff {
typed: Self::normalize_typed(typed), typed: typed_stripped,
expected: expected_stripped.chars().collect(), expected: expected_stripped,
}, },
expected_split, expected_split,
expected_original: expected.to_string(), expected_original: expected.to_string(),
} }
} }
fn normalize_typed(typed: &str) -> Vec<char> { // Combining characters are still required learning content, so use
normalize_to_nfkd(typed)
.chars()
.filter(|c| !unicode_normalization::char::is_combining_mark(*c))
.collect()
}
// Since the combining characters are still required learning content, use
// expected_split to show them directly in the "expected" line, rather than // expected_split to show them directly in the "expected" line, rather than
// having to otherwise e.g. include their field twice in the note template. // having to otherwise e.g. include their field twice on the note template.
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
let mut idx = 0; let mut idx = 0;
tokens.iter().fold(String::new(), |mut acc, token| { tokens.iter().fold(String::new(), |mut acc, token| {
@ -313,9 +310,7 @@ mod test {
vec![ vec![
bad("y"), bad("y"),
good(" ahora q"), good(" ahora q"),
missing("-"), bad("e"),
good("e"),
missing("-"),
good(" vamos"), good(" vamos"),
missing("-"), missing("-"),
good("a hacer"), good("a hacer"),
@ -327,9 +322,7 @@ mod test {
vec![ vec![
missing("¿Y"), missing("¿Y"),
good(" ahora q"), good(" ahora q"),
missing("u"), missing(""),
good("e"),
missing("́"),
good(" vamos"), good(" vamos"),
missing(" "), missing(" "),
good("a hacer"), good("a hacer"),
@ -369,7 +362,7 @@ mod test {
let ctx = Diff::new("쓰다듬다", "스다뜸다"); let ctx = Diff::new("쓰다듬다", "스다뜸다");
assert_eq!( assert_eq!(
ctx.to_tokens().typed_tokens, ctx.to_tokens().typed_tokens,
&[bad(""), good("ᅳ다"), bad(""), good("ᅳᆷ다"),] &[bad(""), good(""), bad(""), good(""),]
); );
} }
@ -419,4 +412,16 @@ mod test {
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>" "<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"
); );
} }
#[test]
fn noncombining_comparison() {
assert_eq!(
compare_answer("שִׁנּוּן", "שנון", false),
"<code id=typeans><span class=typeGood>שִׁנּוּן</span></code>"
);
assert_eq!(
compare_answer("חוֹף", "חופ", false),
"<code id=typeans><span class=typeGood>חו</span><span class=typeBad>פ</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>חוֹ</span><span class=typeMissed>ף</span></code>"
);
}
} }