mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
typeanswer: cleanups (#3415)
* typeanswer: cleanups no functional change * typeanswer: disambiguate no functional change * typeanswer: reorder * typeanswer: skip DiffContext if nothing typed No use to run all that code without input. * typeanswer: skip tokenization if input is correct No use in this case. * typeanswer: make repo check happy (.map → .fold) Either a new check or the call was too complex previously for it to trigger? * Add to contributors * typeanswer: remove slice_* functions They're used only once in to_tokens. Easier to read this way IMHO, anyway.
This commit is contained in:
parent
cf17ca2f84
commit
dc5fa60c8b
2 changed files with 137 additions and 132 deletions
|
@ -193,6 +193,7 @@ Luke Bartholomew <lukesbart@icloud.com>
|
|||
Gregory Abrasaldo <degeemon@gmail.com>
|
||||
Taylor Obyen <https://github.com/taylorobyen>
|
||||
Kris Cherven <krischerven@gmail.com>
|
||||
twwn <github.com/twwn>
|
||||
|
||||
********************
|
||||
|
||||
|
|
|
@ -3,10 +3,8 @@
|
|||
|
||||
use std::borrow::Cow;
|
||||
|
||||
use difflib::sequencematcher::Opcode;
|
||||
use difflib::sequencematcher::SequenceMatcher;
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use unic_ucd_category::GeneralCategory;
|
||||
|
||||
|
@ -14,8 +12,8 @@ use crate::card_rendering::strip_av_tags;
|
|||
use crate::text::normalize_to_nfc;
|
||||
use crate::text::strip_html;
|
||||
|
||||
lazy_static! {
|
||||
static ref LINEBREAKS: Regex = Regex::new(
|
||||
static LINEBREAKS: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(
|
||||
r"(?six)
|
||||
(
|
||||
\n
|
||||
|
@ -23,95 +21,136 @@ lazy_static! {
|
|||
<br\s?/?>
|
||||
|
|
||||
</?div>
|
||||
)+
|
||||
"
|
||||
)+",
|
||||
)
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
macro_rules! format_typeans {
|
||||
($typeans:expr) => {
|
||||
format!("<code id=typeans>{}</code>", $typeans)
|
||||
};
|
||||
}
|
||||
|
||||
struct DiffContext {
|
||||
expected: Vec<char>,
|
||||
// Public API
|
||||
pub fn compare_answer(expected: &str, provided: &str) -> String {
|
||||
if provided.is_empty() {
|
||||
format_typeans!(htmlescape::encode_minimal(expected))
|
||||
} else {
|
||||
Diff::new(expected, provided).to_html()
|
||||
}
|
||||
}
|
||||
|
||||
struct Diff {
|
||||
provided: Vec<char>,
|
||||
expected: Vec<char>,
|
||||
expected_original: String,
|
||||
}
|
||||
|
||||
impl DiffContext {
|
||||
impl Diff {
|
||||
fn new(expected: &str, provided: &str) -> Self {
|
||||
DiffContext {
|
||||
provided: prepare_provided(provided).chars().collect_vec(),
|
||||
expected: prepare_expected(expected).chars().collect_vec(),
|
||||
Self {
|
||||
provided: normalize_to_nfc(provided).chars().collect(),
|
||||
expected: normalize_to_nfc(&prepare_expected(expected))
|
||||
.chars()
|
||||
.collect(),
|
||||
expected_original: expected.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn slice_expected(&self, opcode: &Opcode) -> String {
|
||||
self.expected[opcode.second_start..opcode.second_end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect()
|
||||
// Entry Point
|
||||
fn to_html(&self) -> String {
|
||||
if self.provided == self.expected {
|
||||
format_typeans!(format!(
|
||||
"<span class=typeGood>{}</span>",
|
||||
self.expected_original
|
||||
))
|
||||
} else {
|
||||
let output = self.to_tokens();
|
||||
let provided_html = render_tokens(&output.provided_tokens);
|
||||
let expected_html = render_tokens(&output.expected_tokens);
|
||||
|
||||
format_typeans!(format!(
|
||||
"{provided_html}<br><span id=typearrow>↓</span><br>{expected_html}"
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn slice_provided(&self, opcode: &Opcode) -> String {
|
||||
self.provided[opcode.first_start..opcode.first_end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn to_tokens(&self) -> DiffOutput {
|
||||
fn to_tokens(&self) -> DiffTokens {
|
||||
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
|
||||
let opcodes = matcher.get_opcodes();
|
||||
let mut provided = vec![];
|
||||
let mut expected = vec![];
|
||||
for opcode in opcodes {
|
||||
let mut provided_tokens = Vec::new();
|
||||
let mut expected_tokens = Vec::new();
|
||||
|
||||
for opcode in matcher.get_opcodes() {
|
||||
let provided_slice = slice(&self.provided, opcode.first_start, opcode.first_end);
|
||||
let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end);
|
||||
|
||||
match opcode.tag.as_str() {
|
||||
"equal" => {
|
||||
provided.push(DiffToken::good(self.slice_provided(&opcode)));
|
||||
expected.push(DiffToken::good(self.slice_expected(&opcode)));
|
||||
}
|
||||
"delete" => {
|
||||
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
||||
provided_tokens.push(DiffToken::good(provided_slice));
|
||||
expected_tokens.push(DiffToken::good(expected_slice));
|
||||
}
|
||||
"delete" => provided_tokens.push(DiffToken::bad(provided_slice)),
|
||||
"insert" => {
|
||||
let expected_str = self.slice_expected(&opcode);
|
||||
provided.push(DiffToken::missing("-".repeat(expected_str.chars().count())));
|
||||
expected.push(DiffToken::missing(expected_str));
|
||||
provided_tokens.push(DiffToken::missing(
|
||||
"-".repeat(expected_slice.chars().count()),
|
||||
));
|
||||
expected_tokens.push(DiffToken::missing(expected_slice));
|
||||
}
|
||||
"replace" => {
|
||||
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
||||
expected.push(DiffToken::missing(self.slice_expected(&opcode)));
|
||||
provided_tokens.push(DiffToken::bad(provided_slice));
|
||||
expected_tokens.push(DiffToken::missing(expected_slice));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
DiffOutput { provided, expected }
|
||||
DiffTokens {
|
||||
provided_tokens,
|
||||
expected_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_html(&self) -> String {
|
||||
let output = self.to_tokens();
|
||||
let provided = render_tokens(&output.provided);
|
||||
let expected = render_tokens(&output.expected);
|
||||
format!(
|
||||
"<code id=typeans>{}</code>",
|
||||
if self.provided.is_empty() {
|
||||
htmlescape::encode_minimal(&self.expected.iter().collect::<String>())
|
||||
} else if self.provided == self.expected {
|
||||
provided
|
||||
} else {
|
||||
format!("{provided}<br><span id=typearrow>↓</span><br>{expected}")
|
||||
}
|
||||
)
|
||||
}
|
||||
// Utility Functions
|
||||
fn slice(chars: &[char], start: usize, end: usize) -> String {
|
||||
chars[start..end].iter().collect()
|
||||
}
|
||||
|
||||
fn prepare_expected(expected: &str) -> String {
|
||||
let without_av = strip_av_tags(expected);
|
||||
let without_newlines = LINEBREAKS.replace_all(&without_av, " ");
|
||||
let without_html = strip_html(&without_newlines);
|
||||
let without_outer_whitespace = without_html.trim();
|
||||
normalize_to_nfc(without_outer_whitespace).into()
|
||||
let no_av_tags = strip_av_tags(expected);
|
||||
let no_linebreaks = LINEBREAKS.replace_all(&no_av_tags, " ");
|
||||
strip_html(&no_linebreaks).trim().to_string()
|
||||
}
|
||||
|
||||
fn prepare_provided(provided: &str) -> String {
|
||||
normalize_to_nfc(provided).into()
|
||||
// Render Functions
|
||||
fn render_tokens(tokens: &[DiffToken]) -> String {
|
||||
tokens.iter().fold(String::new(), |mut acc, token| {
|
||||
let isolated_text = isolate_leading_mark(&token.text);
|
||||
let encoded_text = htmlescape::encode_minimal(&isolated_text);
|
||||
let class = token.to_class();
|
||||
acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
|
||||
acc
|
||||
})
|
||||
}
|
||||
|
||||
/// Prefixes a leading mark character with a non-breaking space to prevent
|
||||
/// it from joining the previous token.
|
||||
fn isolate_leading_mark(text: &str) -> Cow<str> {
|
||||
if text
|
||||
.chars()
|
||||
.next()
|
||||
.map_or(false, |ch| GeneralCategory::of(ch).is_mark())
|
||||
{
|
||||
format!("\u{a0}{text}").into()
|
||||
} else {
|
||||
text.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct DiffTokens {
|
||||
provided_tokens: Vec<DiffToken>,
|
||||
expected_tokens: Vec<DiffToken>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -128,64 +167,29 @@ struct DiffToken {
|
|||
}
|
||||
|
||||
impl DiffToken {
|
||||
fn bad(text: String) -> Self {
|
||||
Self {
|
||||
kind: DiffTokenKind::Bad,
|
||||
text,
|
||||
}
|
||||
fn new(kind: DiffTokenKind, text: String) -> Self {
|
||||
Self { kind, text }
|
||||
}
|
||||
|
||||
fn good(text: String) -> Self {
|
||||
Self {
|
||||
kind: DiffTokenKind::Good,
|
||||
text,
|
||||
Self::new(DiffTokenKind::Good, text)
|
||||
}
|
||||
|
||||
fn bad(text: String) -> Self {
|
||||
Self::new(DiffTokenKind::Bad, text)
|
||||
}
|
||||
|
||||
fn missing(text: String) -> Self {
|
||||
Self {
|
||||
kind: DiffTokenKind::Missing,
|
||||
text,
|
||||
Self::new(DiffTokenKind::Missing, text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct DiffOutput {
|
||||
provided: Vec<DiffToken>,
|
||||
expected: Vec<DiffToken>,
|
||||
}
|
||||
|
||||
pub fn compare_answer(expected: &str, provided: &str) -> String {
|
||||
DiffContext::new(expected, provided).to_html()
|
||||
}
|
||||
|
||||
fn render_tokens(tokens: &[DiffToken]) -> String {
|
||||
let text_tokens: Vec<_> = tokens
|
||||
.iter()
|
||||
.map(|token| {
|
||||
let text = with_isolated_leading_mark(&token.text);
|
||||
let encoded = htmlescape::encode_minimal(&text);
|
||||
let class = match token.kind {
|
||||
fn to_class(&self) -> &'static str {
|
||||
match self.kind {
|
||||
DiffTokenKind::Good => "typeGood",
|
||||
DiffTokenKind::Bad => "typeBad",
|
||||
DiffTokenKind::Missing => "typeMissed",
|
||||
};
|
||||
format!("<span class={class}>{encoded}</span>")
|
||||
})
|
||||
.collect();
|
||||
text_tokens.join("")
|
||||
}
|
||||
|
||||
/// If text begins with a mark character, prefix it with a non-breaking
|
||||
/// space to prevent the mark from joining to the previous token.
|
||||
fn with_isolated_leading_mark(text: &str) -> Cow<str> {
|
||||
if let Some(ch) = text.chars().next() {
|
||||
if GeneralCategory::of(ch).is_mark() {
|
||||
return format!("\u{a0}{text}").into();
|
||||
}
|
||||
}
|
||||
text.into()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -205,10 +209,10 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokens() {
|
||||
let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
|
||||
let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
|
||||
let output = ctx.to_tokens();
|
||||
assert_eq!(
|
||||
output.provided,
|
||||
output.provided_tokens,
|
||||
vec![
|
||||
bad("y"),
|
||||
good(" ahora q"),
|
||||
|
@ -220,7 +224,7 @@ mod test {
|
|||
]
|
||||
);
|
||||
assert_eq!(
|
||||
output.expected,
|
||||
output.expected_tokens,
|
||||
vec![
|
||||
missing("¿Y"),
|
||||
good(" ahora q"),
|
||||
|
@ -235,24 +239,24 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn html_and_media() {
|
||||
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> 2", "1 2");
|
||||
let ctx = Diff::new("[sound:foo.mp3]<b>1</b> 2", "1 2");
|
||||
// the spacing is handled by wrapping html output in white-space: pre-wrap
|
||||
assert_eq!(ctx.to_tokens().expected, &[good("1 2")]);
|
||||
assert_eq!(ctx.to_tokens().expected_tokens, &[good("1 2")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missed_chars_only_shown_in_provided_when_after_good() {
|
||||
let ctx = DiffContext::new("1", "23");
|
||||
assert_eq!(ctx.to_tokens().provided, &[bad("23")]);
|
||||
let ctx = DiffContext::new("12", "1");
|
||||
assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("-"),]);
|
||||
let ctx = Diff::new("1", "23");
|
||||
assert_eq!(ctx.to_tokens().provided_tokens, &[bad("23")]);
|
||||
let ctx = Diff::new("12", "1");
|
||||
assert_eq!(ctx.to_tokens().provided_tokens, &[good("1"), missing("-"),]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missed_chars_counted_correctly() {
|
||||
let ctx = DiffContext::new("нос", "нс");
|
||||
let ctx = Diff::new("нос", "нс");
|
||||
assert_eq!(
|
||||
ctx.to_tokens().provided,
|
||||
ctx.to_tokens().provided_tokens,
|
||||
&[good("н"), missing("-"), good("с")]
|
||||
);
|
||||
}
|
||||
|
@ -260,9 +264,9 @@ mod test {
|
|||
#[test]
|
||||
fn handles_certain_unicode_as_expected() {
|
||||
// this was not parsed as expected with dissimilar 1.0.4
|
||||
let ctx = DiffContext::new("쓰다듬다", "스다뜸다");
|
||||
let ctx = Diff::new("쓰다듬다", "스다뜸다");
|
||||
assert_eq!(
|
||||
ctx.to_tokens().provided,
|
||||
ctx.to_tokens().provided_tokens,
|
||||
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
||||
);
|
||||
}
|
||||
|
@ -270,7 +274,7 @@ mod test {
|
|||
#[test]
|
||||
fn does_not_panic_with_certain_unicode() {
|
||||
// this was causing a panic with dissimilar 1.0.4
|
||||
let ctx = DiffContext::new(
|
||||
let ctx = Diff::new(
|
||||
"Сущность должна быть ответственна только за одно дело",
|
||||
concat!(
|
||||
"Single responsibility Сущность выполняет только одну задачу.",
|
||||
|
@ -287,13 +291,13 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn empty_input_shows_as_code() {
|
||||
let ctx = DiffContext::new("123", "");
|
||||
assert_eq!(ctx.to_html(), "<code id=typeans>123</code>");
|
||||
let ctx = compare_answer("123", "");
|
||||
assert_eq!(ctx, "<code id=typeans>123</code>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_input_is_collapsed() {
|
||||
let ctx = DiffContext::new("123", "123");
|
||||
let ctx = Diff::new("123", "123");
|
||||
assert_eq!(
|
||||
ctx.to_html(),
|
||||
"<code id=typeans><span class=typeGood>123</span></code>"
|
||||
|
@ -302,7 +306,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn incorrect_input_is_not_collapsed() {
|
||||
let ctx = DiffContext::new("123", "1123");
|
||||
let ctx = Diff::new("123", "1123");
|
||||
assert_eq!(
|
||||
ctx.to_html(),
|
||||
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
||||
|
|
Loading…
Reference in a new issue