mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 14:02:21 -04:00
typeanswer: NFC fix & cleanup (#3482)
* typeanswer: cleanup * DiffNonCombining's new() used String where plain Vec is appropriate * get rid of normalize_typed for DiffTrait again by pulling code into DiffNonCombining's new() * two DiffNonCombining testcases * typeanswer: return to NFC & typos
This commit is contained in:
parent
18889239d2
commit
8af63f81eb
4 changed files with 34 additions and 37 deletions
|
@ -712,10 +712,10 @@ class Reviewer:
|
|||
# get field and cloze position
|
||||
clozeIdx = self.card.ord + 1
|
||||
fld = fld.split(":")[1]
|
||||
# loop through fields for a match
|
||||
if fld.startswith("nc:"):
|
||||
self._combining = False
|
||||
fld = fld.split(":")[1]
|
||||
# loop through fields for a match
|
||||
for f in self.card.note_type()["flds"]:
|
||||
if f["name"] == fld:
|
||||
self.typeCorrect = self.card.note()[f["name"]]
|
||||
|
|
|
@ -30,7 +30,7 @@ pub(crate) fn apply_filters<'a>(
|
|||
) -> (Cow<'a, str>, Vec<String>) {
|
||||
let mut text: Cow<str> = text.into();
|
||||
|
||||
// type:cloze is handled specially
|
||||
// type:cloze & type:nc are handled specially
|
||||
let filters = if filters == ["cloze", "type"] {
|
||||
&["type-cloze"]
|
||||
} else if filters == ["nc", "type"] {
|
||||
|
|
|
@ -13,7 +13,6 @@ use regex::Regex;
|
|||
use unicase::eq as uni_eq;
|
||||
use unicode_normalization::char::is_combining_mark;
|
||||
use unicode_normalization::is_nfc;
|
||||
use unicode_normalization::is_nfkd;
|
||||
use unicode_normalization::is_nfkd_quick;
|
||||
use unicode_normalization::IsNormalized;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
@ -399,13 +398,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn normalize_to_nfkd(s: &str) -> Cow<str> {
|
||||
match is_nfkd(s) {
|
||||
false => s.chars().nfkd().collect::<String>().into(),
|
||||
true => s.into(),
|
||||
}
|
||||
}
|
||||
|
||||
static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
|
||||
'€' => "E",
|
||||
'Æ' => "AE",
|
||||
|
|
|
@ -9,7 +9,7 @@ use regex::Regex;
|
|||
use unic_ucd_category::GeneralCategory;
|
||||
|
||||
use crate::card_rendering::strip_av_tags;
|
||||
use crate::text::normalize_to_nfkd;
|
||||
use crate::text::normalize_to_nfc;
|
||||
use crate::text::strip_html;
|
||||
|
||||
static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
|
||||
|
@ -50,7 +50,6 @@ trait DiffTrait {
|
|||
fn get_expected_original(&self) -> Cow<str>;
|
||||
|
||||
fn new(expected: &str, typed: &str) -> Self;
|
||||
fn normalize_typed(typed: &str) -> Vec<char>;
|
||||
|
||||
// Entry Point
|
||||
fn to_html(&self) -> String {
|
||||
|
@ -109,7 +108,7 @@ trait DiffTrait {
|
|||
|
||||
// Utility Functions
|
||||
fn normalize(string: &str) -> Vec<char> {
|
||||
normalize_to_nfkd(string).chars().collect()
|
||||
normalize_to_nfc(string).chars().collect()
|
||||
}
|
||||
|
||||
fn slice(chars: &[char], start: usize, end: usize) -> String {
|
||||
|
@ -166,13 +165,10 @@ impl DiffTrait for Diff {
|
|||
|
||||
fn new(expected: &str, typed: &str) -> Self {
|
||||
Self {
|
||||
typed: Self::normalize_typed(typed),
|
||||
typed: normalize(typed),
|
||||
expected: normalize(expected),
|
||||
}
|
||||
}
|
||||
fn normalize_typed(typed: &str) -> Vec<char> {
|
||||
normalize(typed)
|
||||
}
|
||||
|
||||
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
|
||||
render_tokens(tokens)
|
||||
|
@ -199,9 +195,17 @@ impl DiffTrait for DiffNonCombining {
|
|||
|
||||
fn new(expected: &str, typed: &str) -> Self {
|
||||
// filter out combining elements
|
||||
let mut expected_stripped = String::new();
|
||||
// tokenized into "char+combining" for final rendering
|
||||
let mut typed_stripped: Vec<char> = Vec::new();
|
||||
let mut expected_stripped: Vec<char> = Vec::new();
|
||||
// also tokenize into "char+combining" for final rendering
|
||||
let mut expected_split: Vec<String> = Vec::new();
|
||||
|
||||
for c in normalize(typed) {
|
||||
if !unicode_normalization::char::is_combining_mark(c) {
|
||||
typed_stripped.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
for c in normalize(expected) {
|
||||
if unicode_normalization::char::is_combining_mark(c) {
|
||||
if let Some(last) = expected_split.last_mut() {
|
||||
|
@ -215,24 +219,17 @@ impl DiffTrait for DiffNonCombining {
|
|||
|
||||
Self {
|
||||
base: Diff {
|
||||
typed: Self::normalize_typed(typed),
|
||||
expected: expected_stripped.chars().collect(),
|
||||
typed: typed_stripped,
|
||||
expected: expected_stripped,
|
||||
},
|
||||
expected_split,
|
||||
expected_original: expected.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_typed(typed: &str) -> Vec<char> {
|
||||
normalize_to_nfkd(typed)
|
||||
.chars()
|
||||
.filter(|c| !unicode_normalization::char::is_combining_mark(*c))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Since the combining characters are still required learning content, use
|
||||
// Combining characters are still required learning content, so use
|
||||
// expected_split to show them directly in the "expected" line, rather than
|
||||
// having to otherwise e.g. include their field twice in the note template.
|
||||
// having to otherwise e.g. include their field twice on the note template.
|
||||
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
|
||||
let mut idx = 0;
|
||||
tokens.iter().fold(String::new(), |mut acc, token| {
|
||||
|
@ -313,9 +310,7 @@ mod test {
|
|||
vec![
|
||||
bad("y"),
|
||||
good(" ahora q"),
|
||||
missing("-"),
|
||||
good("e"),
|
||||
missing("-"),
|
||||
bad("e"),
|
||||
good(" vamos"),
|
||||
missing("-"),
|
||||
good("a hacer"),
|
||||
|
@ -327,9 +322,7 @@ mod test {
|
|||
vec![
|
||||
missing("¿Y"),
|
||||
good(" ahora q"),
|
||||
missing("u"),
|
||||
good("e"),
|
||||
missing("́"),
|
||||
missing("ué"),
|
||||
good(" vamos"),
|
||||
missing(" "),
|
||||
good("a hacer"),
|
||||
|
@ -369,7 +362,7 @@ mod test {
|
|||
let ctx = Diff::new("쓰다듬다", "스다뜸다");
|
||||
assert_eq!(
|
||||
ctx.to_tokens().typed_tokens,
|
||||
&[bad("ᄉ"), good("ᅳ다"), bad("ᄄ"), good("ᅳᆷ다"),]
|
||||
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -419,4 +412,16 @@ mod test {
|
|||
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn noncombining_comparison() {
|
||||
assert_eq!(
|
||||
compare_answer("שִׁנּוּן", "שנון", false),
|
||||
"<code id=typeans><span class=typeGood>שִׁנּוּן</span></code>"
|
||||
);
|
||||
assert_eq!(
|
||||
compare_answer("חוֹף", "חופ", false),
|
||||
"<code id=typeans><span class=typeGood>חו</span><span class=typeBad>פ</span><br><span id=typearrow>↓</span><br><span class=typeGood>חוֹ</span><span class=typeMissed>ף</span></code>"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue