typeanswer: simplify by using nfkd throughout

Requires adjusting two testcases, but both render exactly the same in Anki itself.

On NFC vs. NKFD: https://stackoverflow.com/a/77432079
This commit is contained in:
Andreas Reis 2024-09-30 01:19:58 +02:00
parent b9e7195526
commit 2106c4306f

View file

@ -9,7 +9,6 @@ use regex::Regex;
use unic_ucd_category::GeneralCategory; use unic_ucd_category::GeneralCategory;
use crate::card_rendering::strip_av_tags; use crate::card_rendering::strip_av_tags;
use crate::text::normalize_to_nfc;
use crate::text::normalize_to_nfkd; use crate::text::normalize_to_nfkd;
use crate::text::strip_html; use crate::text::strip_html;
@ -51,7 +50,6 @@ trait DiffTrait {
fn get_expected_original(&self) -> Cow<str>; fn get_expected_original(&self) -> Cow<str>;
fn new(expected: &str, typed: &str) -> Self; fn new(expected: &str, typed: &str) -> Self;
fn normalize_expected(expected: &str) -> Vec<char>;
fn normalize_typed(typed: &str) -> Vec<char>; fn normalize_typed(typed: &str) -> Vec<char>;
// Entry Point // Entry Point
@ -110,6 +108,10 @@ trait DiffTrait {
} }
// Utility Functions // Utility Functions
fn normalize(string: &str) -> Vec<char> {
normalize_to_nfkd(string).chars().collect()
}
fn slice(chars: &[char], start: usize, end: usize) -> String { fn slice(chars: &[char], start: usize, end: usize) -> String {
chars[start..end].iter().collect() chars[start..end].iter().collect()
} }
@ -165,16 +167,11 @@ impl DiffTrait for Diff {
fn new(expected: &str, typed: &str) -> Self { fn new(expected: &str, typed: &str) -> Self {
Self { Self {
typed: Self::normalize_typed(typed), typed: Self::normalize_typed(typed),
expected: Self::normalize_expected(expected), expected: normalize(&prepare_expected(expected)),
} }
} }
fn normalize_expected(expected: &str) -> Vec<char> {
normalize_to_nfc(&prepare_expected(expected))
.chars()
.collect()
}
fn normalize_typed(typed: &str) -> Vec<char> { fn normalize_typed(typed: &str) -> Vec<char> {
normalize_to_nfc(typed).chars().collect() normalize(typed)
} }
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String { fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
@ -205,7 +202,7 @@ impl DiffTrait for DiffNonCombining {
let mut expected_stripped = String::new(); let mut expected_stripped = String::new();
// tokenized into "char+combining" for final rendering // tokenized into "char+combining" for final rendering
let mut expected_split: Vec<String> = Vec::new(); let mut expected_split: Vec<String> = Vec::new();
for c in Self::normalize_expected(expected) { for c in normalize(&prepare_expected(expected)) {
if unicode_normalization::char::is_combining_mark(c) { if unicode_normalization::char::is_combining_mark(c) {
if let Some(last) = expected_split.last_mut() { if let Some(last) = expected_split.last_mut() {
last.push(c); last.push(c);
@ -225,11 +222,7 @@ impl DiffTrait for DiffNonCombining {
expected_original: prepare_expected(expected), expected_original: prepare_expected(expected),
} }
} }
fn normalize_expected(expected: &str) -> Vec<char> {
normalize_to_nfkd(&prepare_expected(expected))
.chars()
.collect()
}
fn normalize_typed(typed: &str) -> Vec<char> { fn normalize_typed(typed: &str) -> Vec<char> {
normalize_to_nfkd(typed) normalize_to_nfkd(typed)
.chars() .chars()
@ -320,7 +313,9 @@ mod test {
vec![ vec![
bad("y"), bad("y"),
good(" ahora q"), good(" ahora q"),
bad("e"), missing("-"),
good("e"),
missing("-"),
good(" vamos"), good(" vamos"),
missing("-"), missing("-"),
good("a hacer"), good("a hacer"),
@ -332,7 +327,9 @@ mod test {
vec![ vec![
missing("¿Y"), missing("¿Y"),
good(" ahora q"), good(" ahora q"),
missing(""), missing("u"),
good("e"),
missing("́"),
good(" vamos"), good(" vamos"),
missing(" "), missing(" "),
good("a hacer"), good("a hacer"),
@ -371,7 +368,7 @@ mod test {
let ctx = Diff::new("쓰다듬다", "스다뜸다"); let ctx = Diff::new("쓰다듬다", "스다뜸다");
assert_eq!( assert_eq!(
ctx.to_tokens().typed_tokens, ctx.to_tokens().typed_tokens,
&[bad(""), good(""), bad(""), good(""),] &[bad(""), good("ᅳ다"), bad(""), good("ᅳᆷ다"),]
); );
} }