mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 14:02:21 -04:00
typeanswer: NFC fix & cleanup (#3482)
* typeanswer: cleanup * DiffNonCombining's new() used String where plain Vec is appropriate * get rid of normalize_typed for DiffTrait again by pulling code into DiffNonCombining's new() * two DiffNonCombining testcases * typeanswer: return to NFC & typos
This commit is contained in:
parent
18889239d2
commit
8af63f81eb
4 changed files with 34 additions and 37 deletions
|
@ -712,10 +712,10 @@ class Reviewer:
|
||||||
# get field and cloze position
|
# get field and cloze position
|
||||||
clozeIdx = self.card.ord + 1
|
clozeIdx = self.card.ord + 1
|
||||||
fld = fld.split(":")[1]
|
fld = fld.split(":")[1]
|
||||||
# loop through fields for a match
|
|
||||||
if fld.startswith("nc:"):
|
if fld.startswith("nc:"):
|
||||||
self._combining = False
|
self._combining = False
|
||||||
fld = fld.split(":")[1]
|
fld = fld.split(":")[1]
|
||||||
|
# loop through fields for a match
|
||||||
for f in self.card.note_type()["flds"]:
|
for f in self.card.note_type()["flds"]:
|
||||||
if f["name"] == fld:
|
if f["name"] == fld:
|
||||||
self.typeCorrect = self.card.note()[f["name"]]
|
self.typeCorrect = self.card.note()[f["name"]]
|
||||||
|
|
|
@ -30,7 +30,7 @@ pub(crate) fn apply_filters<'a>(
|
||||||
) -> (Cow<'a, str>, Vec<String>) {
|
) -> (Cow<'a, str>, Vec<String>) {
|
||||||
let mut text: Cow<str> = text.into();
|
let mut text: Cow<str> = text.into();
|
||||||
|
|
||||||
// type:cloze is handled specially
|
// type:cloze & type:nc are handled specially
|
||||||
let filters = if filters == ["cloze", "type"] {
|
let filters = if filters == ["cloze", "type"] {
|
||||||
&["type-cloze"]
|
&["type-cloze"]
|
||||||
} else if filters == ["nc", "type"] {
|
} else if filters == ["nc", "type"] {
|
||||||
|
|
|
@ -13,7 +13,6 @@ use regex::Regex;
|
||||||
use unicase::eq as uni_eq;
|
use unicase::eq as uni_eq;
|
||||||
use unicode_normalization::char::is_combining_mark;
|
use unicode_normalization::char::is_combining_mark;
|
||||||
use unicode_normalization::is_nfc;
|
use unicode_normalization::is_nfc;
|
||||||
use unicode_normalization::is_nfkd;
|
|
||||||
use unicode_normalization::is_nfkd_quick;
|
use unicode_normalization::is_nfkd_quick;
|
||||||
use unicode_normalization::IsNormalized;
|
use unicode_normalization::IsNormalized;
|
||||||
use unicode_normalization::UnicodeNormalization;
|
use unicode_normalization::UnicodeNormalization;
|
||||||
|
@ -399,13 +398,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn normalize_to_nfkd(s: &str) -> Cow<str> {
|
|
||||||
match is_nfkd(s) {
|
|
||||||
false => s.chars().nfkd().collect::<String>().into(),
|
|
||||||
true => s.into(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
|
static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
|
||||||
'€' => "E",
|
'€' => "E",
|
||||||
'Æ' => "AE",
|
'Æ' => "AE",
|
||||||
|
|
|
@ -9,7 +9,7 @@ use regex::Regex;
|
||||||
use unic_ucd_category::GeneralCategory;
|
use unic_ucd_category::GeneralCategory;
|
||||||
|
|
||||||
use crate::card_rendering::strip_av_tags;
|
use crate::card_rendering::strip_av_tags;
|
||||||
use crate::text::normalize_to_nfkd;
|
use crate::text::normalize_to_nfc;
|
||||||
use crate::text::strip_html;
|
use crate::text::strip_html;
|
||||||
|
|
||||||
static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
|
static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
|
||||||
|
@ -50,7 +50,6 @@ trait DiffTrait {
|
||||||
fn get_expected_original(&self) -> Cow<str>;
|
fn get_expected_original(&self) -> Cow<str>;
|
||||||
|
|
||||||
fn new(expected: &str, typed: &str) -> Self;
|
fn new(expected: &str, typed: &str) -> Self;
|
||||||
fn normalize_typed(typed: &str) -> Vec<char>;
|
|
||||||
|
|
||||||
// Entry Point
|
// Entry Point
|
||||||
fn to_html(&self) -> String {
|
fn to_html(&self) -> String {
|
||||||
|
@ -109,7 +108,7 @@ trait DiffTrait {
|
||||||
|
|
||||||
// Utility Functions
|
// Utility Functions
|
||||||
fn normalize(string: &str) -> Vec<char> {
|
fn normalize(string: &str) -> Vec<char> {
|
||||||
normalize_to_nfkd(string).chars().collect()
|
normalize_to_nfc(string).chars().collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn slice(chars: &[char], start: usize, end: usize) -> String {
|
fn slice(chars: &[char], start: usize, end: usize) -> String {
|
||||||
|
@ -166,13 +165,10 @@ impl DiffTrait for Diff {
|
||||||
|
|
||||||
fn new(expected: &str, typed: &str) -> Self {
|
fn new(expected: &str, typed: &str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
typed: Self::normalize_typed(typed),
|
typed: normalize(typed),
|
||||||
expected: normalize(expected),
|
expected: normalize(expected),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn normalize_typed(typed: &str) -> Vec<char> {
|
|
||||||
normalize(typed)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
|
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
|
||||||
render_tokens(tokens)
|
render_tokens(tokens)
|
||||||
|
@ -199,9 +195,17 @@ impl DiffTrait for DiffNonCombining {
|
||||||
|
|
||||||
fn new(expected: &str, typed: &str) -> Self {
|
fn new(expected: &str, typed: &str) -> Self {
|
||||||
// filter out combining elements
|
// filter out combining elements
|
||||||
let mut expected_stripped = String::new();
|
let mut typed_stripped: Vec<char> = Vec::new();
|
||||||
// tokenized into "char+combining" for final rendering
|
let mut expected_stripped: Vec<char> = Vec::new();
|
||||||
|
// also tokenize into "char+combining" for final rendering
|
||||||
let mut expected_split: Vec<String> = Vec::new();
|
let mut expected_split: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
for c in normalize(typed) {
|
||||||
|
if !unicode_normalization::char::is_combining_mark(c) {
|
||||||
|
typed_stripped.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for c in normalize(expected) {
|
for c in normalize(expected) {
|
||||||
if unicode_normalization::char::is_combining_mark(c) {
|
if unicode_normalization::char::is_combining_mark(c) {
|
||||||
if let Some(last) = expected_split.last_mut() {
|
if let Some(last) = expected_split.last_mut() {
|
||||||
|
@ -215,24 +219,17 @@ impl DiffTrait for DiffNonCombining {
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
base: Diff {
|
base: Diff {
|
||||||
typed: Self::normalize_typed(typed),
|
typed: typed_stripped,
|
||||||
expected: expected_stripped.chars().collect(),
|
expected: expected_stripped,
|
||||||
},
|
},
|
||||||
expected_split,
|
expected_split,
|
||||||
expected_original: expected.to_string(),
|
expected_original: expected.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn normalize_typed(typed: &str) -> Vec<char> {
|
// Combining characters are still required learning content, so use
|
||||||
normalize_to_nfkd(typed)
|
|
||||||
.chars()
|
|
||||||
.filter(|c| !unicode_normalization::char::is_combining_mark(*c))
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Since the combining characters are still required learning content, use
|
|
||||||
// expected_split to show them directly in the "expected" line, rather than
|
// expected_split to show them directly in the "expected" line, rather than
|
||||||
// having to otherwise e.g. include their field twice in the note template.
|
// having to otherwise e.g. include their field twice on the note template.
|
||||||
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
|
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
|
||||||
let mut idx = 0;
|
let mut idx = 0;
|
||||||
tokens.iter().fold(String::new(), |mut acc, token| {
|
tokens.iter().fold(String::new(), |mut acc, token| {
|
||||||
|
@ -313,9 +310,7 @@ mod test {
|
||||||
vec![
|
vec![
|
||||||
bad("y"),
|
bad("y"),
|
||||||
good(" ahora q"),
|
good(" ahora q"),
|
||||||
missing("-"),
|
bad("e"),
|
||||||
good("e"),
|
|
||||||
missing("-"),
|
|
||||||
good(" vamos"),
|
good(" vamos"),
|
||||||
missing("-"),
|
missing("-"),
|
||||||
good("a hacer"),
|
good("a hacer"),
|
||||||
|
@ -327,9 +322,7 @@ mod test {
|
||||||
vec![
|
vec![
|
||||||
missing("¿Y"),
|
missing("¿Y"),
|
||||||
good(" ahora q"),
|
good(" ahora q"),
|
||||||
missing("u"),
|
missing("ué"),
|
||||||
good("e"),
|
|
||||||
missing("́"),
|
|
||||||
good(" vamos"),
|
good(" vamos"),
|
||||||
missing(" "),
|
missing(" "),
|
||||||
good("a hacer"),
|
good("a hacer"),
|
||||||
|
@ -369,7 +362,7 @@ mod test {
|
||||||
let ctx = Diff::new("쓰다듬다", "스다뜸다");
|
let ctx = Diff::new("쓰다듬다", "스다뜸다");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ctx.to_tokens().typed_tokens,
|
ctx.to_tokens().typed_tokens,
|
||||||
&[bad("ᄉ"), good("ᅳ다"), bad("ᄄ"), good("ᅳᆷ다"),]
|
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -419,4 +412,16 @@ mod test {
|
||||||
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn noncombining_comparison() {
|
||||||
|
assert_eq!(
|
||||||
|
compare_answer("שִׁנּוּן", "שנון", false),
|
||||||
|
"<code id=typeans><span class=typeGood>שִׁנּוּן</span></code>"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
compare_answer("חוֹף", "חופ", false),
|
||||||
|
"<code id=typeans><span class=typeGood>חו</span><span class=typeBad>פ</span><br><span id=typearrow>↓</span><br><span class=typeGood>חוֹ</span><span class=typeMissed>ף</span></code>"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue