Anki/rslib/src/typeanswer.rs
a.r d9969a9f4f
lazy_static → once_cell → stabilized versions (#3447)
* Anki: Replace lazy_static with once_cell

Unify to once_cell, lazy_static's replacement. The latter in unmaintained.

* Anki: Replace once_cell with stabilized LazyCell / LazyLock as far as possible

Since 1.80: https://github.com/rust-lang/rust/issues/109736 and https://github.com/rust-lang/rust/pull/98165

Non-Thread-Safe Lazy → std::cell::LazyCell https://doc.rust-lang.org/nightly/std/cell/struct.LazyCell.html

Thread-safe SyncLazy → std::sync::LazyLock https://doc.rust-lang.org/nightly/std/sync/struct.LazyLock.html

The compiler accepted LazyCell only in minilints.

The final use in rslib/src/log.rs couldn't be replaced since get_or_try_init has not yet been standardized: https://github.com/rust-lang/rust/issues/109737

* Declare correct MSRV (dae)

Some of our deps require newer Rust versions, so this was misleading.

Updating the MSRV also allows us to use .inspect() on Option now
2024-09-30 23:35:06 +10:00

420 lines
12 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::borrow::Cow;
use std::sync::LazyLock;
use difflib::sequencematcher::SequenceMatcher;
use regex::Regex;
use unic_ucd_category::GeneralCategory;
use crate::card_rendering::strip_av_tags;
use crate::text::normalize_to_nfkd;
use crate::text::strip_html;
static LINEBREAKS: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?six)
(
\n
|
<br\s?/?>
|
</?div>
)+",
)
.unwrap()
});
macro_rules! format_typeans {
($typeans:expr) => {
format!("<code id=typeans>{}</code>", $typeans)
};
}
// Public API
pub fn compare_answer(expected: &str, typed: &str, combining: bool) -> String {
if typed.is_empty() {
format_typeans!(htmlescape::encode_minimal(&prepare_expected(expected)))
} else if combining {
Diff::new(expected, typed).to_html()
} else {
DiffNonCombining::new(expected, typed).to_html()
}
}
// Core Logic
trait DiffTrait {
fn get_typed(&self) -> &[char];
fn get_expected(&self) -> &[char];
fn get_expected_original(&self) -> Cow<str>;
fn new(expected: &str, typed: &str) -> Self;
fn normalize_typed(typed: &str) -> Vec<char>;
// Entry Point
fn to_html(&self) -> String {
if self.get_typed() == self.get_expected() {
format_typeans!(format!(
"<span class=typeGood>{}</span>",
self.get_expected_original()
))
} else {
let output = self.to_tokens();
let typed_html = render_tokens(&output.typed_tokens);
let expected_html = self.render_expected_tokens(&output.expected_tokens);
format_typeans!(format!(
"{typed_html}<br><span id=typearrow>&darr;</span><br>{expected_html}"
))
}
}
fn to_tokens(&self) -> DiffTokens {
let mut matcher = SequenceMatcher::new(self.get_typed(), self.get_expected());
let mut typed_tokens = Vec::new();
let mut expected_tokens = Vec::new();
for opcode in matcher.get_opcodes() {
let typed_slice = slice(self.get_typed(), opcode.first_start, opcode.first_end);
let expected_slice = slice(self.get_expected(), opcode.second_start, opcode.second_end);
match opcode.tag.as_str() {
"equal" => {
typed_tokens.push(DiffToken::good(typed_slice));
expected_tokens.push(DiffToken::good(expected_slice));
}
"delete" => typed_tokens.push(DiffToken::bad(typed_slice)),
"insert" => {
typed_tokens.push(DiffToken::missing(
"-".repeat(expected_slice.chars().count()),
));
expected_tokens.push(DiffToken::missing(expected_slice));
}
"replace" => {
typed_tokens.push(DiffToken::bad(typed_slice));
expected_tokens.push(DiffToken::missing(expected_slice));
}
_ => unreachable!(),
}
}
DiffTokens {
typed_tokens,
expected_tokens,
}
}
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String;
}
// Utility Functions
fn normalize(string: &str) -> Vec<char> {
normalize_to_nfkd(string).chars().collect()
}
fn slice(chars: &[char], start: usize, end: usize) -> String {
chars[start..end].iter().collect()
}
fn prepare_expected(expected: &str) -> String {
let no_av_tags = strip_av_tags(expected);
let no_linebreaks = LINEBREAKS.replace_all(&no_av_tags, " ");
strip_html(&no_linebreaks).trim().to_string()
}
// Render Functions
fn render_tokens(tokens: &[DiffToken]) -> String {
tokens.iter().fold(String::new(), |mut acc, token| {
let isolated_text = isolate_leading_mark(&token.text);
let encoded_text = htmlescape::encode_minimal(&isolated_text);
let class = token.to_class();
acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
acc
})
}
/// Prefixes a leading mark character with a non-breaking space to prevent
/// it from joining the previous token.
fn isolate_leading_mark(text: &str) -> Cow<str> {
if text
.chars()
.next()
.map_or(false, |c| GeneralCategory::of(c).is_mark())
{
Cow::Owned(format!("\u{a0}{text}"))
} else {
Cow::Borrowed(text)
}
}
// Default Comparison
struct Diff {
typed: Vec<char>,
expected: Vec<char>,
}
impl DiffTrait for Diff {
fn get_typed(&self) -> &[char] {
&self.typed
}
fn get_expected(&self) -> &[char] {
&self.expected
}
fn get_expected_original(&self) -> Cow<str> {
Cow::Owned(self.get_expected().iter().collect::<String>())
}
fn new(expected: &str, typed: &str) -> Self {
Self {
typed: Self::normalize_typed(typed),
expected: normalize(&prepare_expected(expected)),
}
}
fn normalize_typed(typed: &str) -> Vec<char> {
normalize(typed)
}
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
render_tokens(tokens)
}
}
// Non-Combining Comparison
struct DiffNonCombining {
base: Diff,
expected_split: Vec<String>,
expected_original: String,
}
impl DiffTrait for DiffNonCombining {
fn get_typed(&self) -> &[char] {
&self.base.typed
}
fn get_expected(&self) -> &[char] {
&self.base.expected
}
fn get_expected_original(&self) -> Cow<str> {
Cow::Borrowed(&self.expected_original)
}
fn new(expected: &str, typed: &str) -> Self {
// filter out combining elements
let mut expected_stripped = String::new();
// tokenized into "char+combining" for final rendering
let mut expected_split: Vec<String> = Vec::new();
for c in normalize(&prepare_expected(expected)) {
if unicode_normalization::char::is_combining_mark(c) {
if let Some(last) = expected_split.last_mut() {
last.push(c);
}
} else {
expected_stripped.push(c);
expected_split.push(c.to_string());
}
}
Self {
base: Diff {
typed: Self::normalize_typed(typed),
expected: expected_stripped.chars().collect(),
},
expected_split,
expected_original: prepare_expected(expected),
}
}
fn normalize_typed(typed: &str) -> Vec<char> {
normalize_to_nfkd(typed)
.chars()
.filter(|c| !unicode_normalization::char::is_combining_mark(*c))
.collect()
}
// Since the combining characters are still required learning content, use
// expected_split to show them directly in the "expected" line, rather than
// having to otherwise e.g. include their field twice in the note template.
fn render_expected_tokens(&self, tokens: &[DiffToken]) -> String {
let mut idx = 0;
tokens.iter().fold(String::new(), |mut acc, token| {
let end = idx + token.text.chars().count();
let txt = self.expected_split[idx..end].concat();
idx = end;
let encoded_text = htmlescape::encode_minimal(&txt);
let class = token.to_class();
acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
acc
})
}
}
// Utility Items
#[derive(Debug, PartialEq, Eq)]
struct DiffTokens {
typed_tokens: Vec<DiffToken>,
expected_tokens: Vec<DiffToken>,
}
#[derive(Debug, PartialEq, Eq)]
enum DiffTokenKind {
Good,
Bad,
Missing,
}
#[derive(Debug, PartialEq, Eq)]
struct DiffToken {
kind: DiffTokenKind,
text: String,
}
impl DiffToken {
fn new(kind: DiffTokenKind, text: String) -> Self {
Self { kind, text }
}
fn good(text: String) -> Self {
Self::new(DiffTokenKind::Good, text)
}
fn bad(text: String) -> Self {
Self::new(DiffTokenKind::Bad, text)
}
fn missing(text: String) -> Self {
Self::new(DiffTokenKind::Missing, text)
}
fn to_class(&self) -> &'static str {
match self.kind {
DiffTokenKind::Good => "typeGood",
DiffTokenKind::Bad => "typeBad",
DiffTokenKind::Missing => "typeMissed",
}
}
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! token_factory {
($name:ident) => {
fn $name(text: &str) -> DiffToken {
DiffToken::$name(String::from(text))
}
};
}
token_factory!(bad);
token_factory!(good);
token_factory!(missing);
#[test]
fn tokens() {
let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
let output = ctx.to_tokens();
assert_eq!(
output.typed_tokens,
vec![
bad("y"),
good(" ahora q"),
missing("-"),
good("e"),
missing("-"),
good(" vamos"),
missing("-"),
good("a hacer"),
missing("-"),
]
);
assert_eq!(
output.expected_tokens,
vec![
missing("¿Y"),
good(" ahora q"),
missing("u"),
good("e"),
missing("́"),
good(" vamos"),
missing(" "),
good("a hacer"),
missing("?"),
]
);
}
#[test]
fn html_and_media() {
let ctx = Diff::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1 2");
// the spacing is handled by wrapping html output in white-space: pre-wrap
assert_eq!(ctx.to_tokens().expected_tokens, &[good("1 2")]);
}
#[test]
fn missed_chars_only_shown_in_typed_when_after_good() {
let ctx = Diff::new("1", "23");
assert_eq!(ctx.to_tokens().typed_tokens, &[bad("23")]);
let ctx = Diff::new("12", "1");
assert_eq!(ctx.to_tokens().typed_tokens, &[good("1"), missing("-"),]);
}
#[test]
fn missed_chars_counted_correctly() {
let ctx = Diff::new("нос", "нс");
assert_eq!(
ctx.to_tokens().typed_tokens,
&[good("н"), missing("-"), good("с")]
);
}
#[test]
fn handles_certain_unicode_as_expected() {
// this was not parsed as expected with dissimilar 1.0.4
let ctx = Diff::new("쓰다듬다", "스다뜸다");
assert_eq!(
ctx.to_tokens().typed_tokens,
&[bad(""), good("ᅳ다"), bad(""), good("ᅳᆷ다"),]
);
}
#[test]
fn does_not_panic_with_certain_unicode() {
// this was causing a panic with dissimilar 1.0.4
let ctx = Diff::new(
"Сущность должна быть ответственна только за одно дело",
concat!(
"Single responsibility Сущность выполняет только одну задачу.",
"Повод для изменения сущности только один."
),
);
ctx.to_tokens();
}
#[test]
fn tags_removed() {
assert_eq!(prepare_expected("<div>123</div>"), "123");
assert_eq!(
Diff::new("<div>123</div>", "123").to_html(),
"<code id=typeans><span class=typeGood>123</span></code>"
);
}
#[test]
fn empty_input_shows_as_code() {
let ctx = compare_answer("<div>123</div>", "", true);
assert_eq!(ctx, "<code id=typeans>123</code>");
}
#[test]
fn correct_input_is_collapsed() {
let ctx = Diff::new("123", "123");
assert_eq!(
ctx.to_html(),
"<code id=typeans><span class=typeGood>123</span></code>"
);
}
#[test]
fn incorrect_input_is_not_collapsed() {
let ctx = Diff::new("123", "1123");
assert_eq!(
ctx.to_html(),
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"
);
}
}