typeanswer: NFC fix & cleanup (#3482)

* typeanswer: cleanup

* DiffNonCombining's new() used String where plain Vec is appropriate
* get rid of normalize_typed for DiffTrait again by pulling code into DiffNonCombining's new()
* two DiffNonCombining testcases

* typeanswer: return to NFC & typos
This commit is contained in:
a.r 2024-10-11 12:33:08 +02:00 committed by GitHub
parent 18889239d2
commit 8af63f81eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 34 additions and 37 deletions

View file

@ -712,10 +712,10 @@ class Reviewer:
# get field and cloze position
clozeIdx = self.card.ord + 1
fld = fld.split(":")[1]
# loop through fields for a match
if fld.startswith("nc:"):
self._combining = False
fld = fld.split(":")[1]
# loop through fields for a match
for f in self.card.note_type()["flds"]:
if f["name"] == fld:
self.typeCorrect = self.card.note()[f["name"]]

View file

@ -30,7 +30,7 @@ pub(crate) fn apply_filters<'a>(
) -> (Cow<'a, str>, Vec<String>) {
let mut text: Cow<str> = text.into();
// type:cloze is handled specially
// type:cloze & type:nc are handled specially
let filters = if filters == ["cloze", "type"] {
&["type-cloze"]
} else if filters == ["nc", "type"] {

View file

@ -13,7 +13,6 @@ use regex::Regex;
use unicase::eq as uni_eq;
use unicode_normalization::char::is_combining_mark;
use unicode_normalization::is_nfc;
use unicode_normalization::is_nfkd;
use unicode_normalization::is_nfkd_quick;
use unicode_normalization::IsNormalized;
use unicode_normalization::UnicodeNormalization;
@ -399,13 +398,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
}
}
pub(crate) fn normalize_to_nfkd(s: &str) -> Cow<str> {
match is_nfkd(s) {
false => s.chars().nfkd().collect::<String>().into(),
true => s.into(),
}
}
static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
'€' => "E",
'Æ' => "AE",

View file

@ -9,7 +9,7 @@ use regex::Regex;
use unic_ucd_category::GeneralCategory;
use crate::card_rendering::strip_av_tags;
use crate::text::normalize_to_nfkd;
use crate::text::normalize_to_nfc;
use crate::text::strip_html;
static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
@ -50,7 +50,6 @@ trait DiffTrait {
fn get_expected_original(&self) -> Cow<str>;
fn new(expected: &str, typed: &str) -> Self;
fn normalize_typed(typed: &str) -> Vec<char>;
// Entry Point
fn to_html(&self) -> String {
@ -109,7 +108,7 @@ trait DiffTrait {
// Utility Functions
fn normalize(string: &str) -> Vec<char> {
normalize_to_nfkd(string).chars().collect()
normalize_to_nfc(string).chars().collect()
}
fn slice(chars: &[char], start: usize, end: usize) -> String {
@ -166,13 +165,10 @@ impl DiffTrait for Diff {
fn new(expected: &str, typed: &str) -> Self {
Self {
typed: Self::normalize_typed(typed),
typed: normalize(typed),
expected: normalize(expected),
}
}
fn normalize_typed(typed: &str) -> Vec<char> {
normalize(typed)
}
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
render_tokens(tokens)
@ -199,9 +195,17 @@ impl DiffTrait for DiffNonCombining {
fn new(expected: &str, typed: &str) -> Self {
// filter out combining elements
let mut expected_stripped = String::new();
// tokenized into "char+combining" for final rendering
let mut typed_stripped: Vec<char> = Vec::new();
let mut expected_stripped: Vec<char> = Vec::new();
// also tokenize into "char+combining" for final rendering
let mut expected_split: Vec<String> = Vec::new();
for c in normalize(typed) {
if !unicode_normalization::char::is_combining_mark(c) {
typed_stripped.push(c);
}
}
for c in normalize(expected) {
if unicode_normalization::char::is_combining_mark(c) {
if let Some(last) = expected_split.last_mut() {
@ -215,24 +219,17 @@ impl DiffTrait for DiffNonCombining {
Self {
base: Diff {
typed: Self::normalize_typed(typed),
expected: expected_stripped.chars().collect(),
typed: typed_stripped,
expected: expected_stripped,
},
expected_split,
expected_original: expected.to_string(),
}
}
fn normalize_typed(typed: &str) -> Vec<char> {
normalize_to_nfkd(typed)
.chars()
.filter(|c| !unicode_normalization::char::is_combining_mark(*c))
.collect()
}
// Since the combining characters are still required learning content, use
// Combining characters are still required learning content, so use
// expected_split to show them directly in the "expected" line, rather than
// having to otherwise e.g. include their field twice in the note template.
// having to otherwise e.g. include their field twice on the note template.
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
let mut idx = 0;
tokens.iter().fold(String::new(), |mut acc, token| {
@ -313,9 +310,7 @@ mod test {
vec![
bad("y"),
good(" ahora q"),
missing("-"),
good("e"),
missing("-"),
bad("e"),
good(" vamos"),
missing("-"),
good("a hacer"),
@ -327,9 +322,7 @@ mod test {
vec![
missing("¿Y"),
good(" ahora q"),
missing("u"),
good("e"),
missing("́"),
missing(""),
good(" vamos"),
missing(" "),
good("a hacer"),
@ -369,7 +362,7 @@ mod test {
let ctx = Diff::new("쓰다듬다", "스다뜸다");
assert_eq!(
ctx.to_tokens().typed_tokens,
&[bad(""), good("ᅳ다"), bad(""), good("ᅳᆷ다"),]
&[bad(""), good(""), bad(""), good(""),]
);
}
@ -419,4 +412,16 @@ mod test {
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"
);
}
#[test]
fn noncombining_comparison() {
assert_eq!(
compare_answer("שִׁנּוּן", "שנון", false),
"<code id=typeans><span class=typeGood>שִׁנּוּן</span></code>"
);
assert_eq!(
compare_answer("חוֹף", "חופ", false),
"<code id=typeans><span class=typeGood>חו</span><span class=typeBad>פ</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>חוֹ</span><span class=typeMissed>ף</span></code>"
);
}
}