mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
typeanswer: cleanups (#3415)
* typeanswer: cleanups no functional change * typeanswer: disambiguate no functional change * typeanswer: reorder * typeanswer: skip DiffContext if nothing typed No use to run all that code without input. * typeanswer: skip tokenization if input is correct No use in this case. * typeanswer: make repo check happy (.map → .fold) Either a new check or the call was too complex previously for it to trigger? * Add to contributors * typeanswer: remove slice_* functions They're used only once in to_tokens. Easier to read this way IMHO, anyway.
This commit is contained in:
parent
cf17ca2f84
commit
dc5fa60c8b
2 changed files with 137 additions and 132 deletions
|
@ -193,6 +193,7 @@ Luke Bartholomew <lukesbart@icloud.com>
|
||||||
Gregory Abrasaldo <degeemon@gmail.com>
|
Gregory Abrasaldo <degeemon@gmail.com>
|
||||||
Taylor Obyen <https://github.com/taylorobyen>
|
Taylor Obyen <https://github.com/taylorobyen>
|
||||||
Kris Cherven <krischerven@gmail.com>
|
Kris Cherven <krischerven@gmail.com>
|
||||||
|
twwn <github.com/twwn>
|
||||||
|
|
||||||
********************
|
********************
|
||||||
|
|
||||||
|
|
|
@ -3,10 +3,8 @@
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use difflib::sequencematcher::Opcode;
|
|
||||||
use difflib::sequencematcher::SequenceMatcher;
|
use difflib::sequencematcher::SequenceMatcher;
|
||||||
use itertools::Itertools;
|
use once_cell::sync::Lazy;
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use unic_ucd_category::GeneralCategory;
|
use unic_ucd_category::GeneralCategory;
|
||||||
|
|
||||||
|
@ -14,8 +12,8 @@ use crate::card_rendering::strip_av_tags;
|
||||||
use crate::text::normalize_to_nfc;
|
use crate::text::normalize_to_nfc;
|
||||||
use crate::text::strip_html;
|
use crate::text::strip_html;
|
||||||
|
|
||||||
lazy_static! {
|
static LINEBREAKS: Lazy<Regex> = Lazy::new(|| {
|
||||||
static ref LINEBREAKS: Regex = Regex::new(
|
Regex::new(
|
||||||
r"(?six)
|
r"(?six)
|
||||||
(
|
(
|
||||||
\n
|
\n
|
||||||
|
@ -23,95 +21,136 @@ lazy_static! {
|
||||||
<br\s?/?>
|
<br\s?/?>
|
||||||
|
|
|
|
||||||
</?div>
|
</?div>
|
||||||
)+
|
)+",
|
||||||
"
|
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
macro_rules! format_typeans {
|
||||||
|
($typeans:expr) => {
|
||||||
|
format!("<code id=typeans>{}</code>", $typeans)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DiffContext {
|
// Public API
|
||||||
expected: Vec<char>,
|
pub fn compare_answer(expected: &str, provided: &str) -> String {
|
||||||
|
if provided.is_empty() {
|
||||||
|
format_typeans!(htmlescape::encode_minimal(expected))
|
||||||
|
} else {
|
||||||
|
Diff::new(expected, provided).to_html()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Diff {
|
||||||
provided: Vec<char>,
|
provided: Vec<char>,
|
||||||
|
expected: Vec<char>,
|
||||||
|
expected_original: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DiffContext {
|
impl Diff {
|
||||||
fn new(expected: &str, provided: &str) -> Self {
|
fn new(expected: &str, provided: &str) -> Self {
|
||||||
DiffContext {
|
Self {
|
||||||
provided: prepare_provided(provided).chars().collect_vec(),
|
provided: normalize_to_nfc(provided).chars().collect(),
|
||||||
expected: prepare_expected(expected).chars().collect_vec(),
|
expected: normalize_to_nfc(&prepare_expected(expected))
|
||||||
|
.chars()
|
||||||
|
.collect(),
|
||||||
|
expected_original: expected.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn slice_expected(&self, opcode: &Opcode) -> String {
|
// Entry Point
|
||||||
self.expected[opcode.second_start..opcode.second_end]
|
fn to_html(&self) -> String {
|
||||||
.iter()
|
if self.provided == self.expected {
|
||||||
.cloned()
|
format_typeans!(format!(
|
||||||
.collect()
|
"<span class=typeGood>{}</span>",
|
||||||
|
self.expected_original
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
let output = self.to_tokens();
|
||||||
|
let provided_html = render_tokens(&output.provided_tokens);
|
||||||
|
let expected_html = render_tokens(&output.expected_tokens);
|
||||||
|
|
||||||
|
format_typeans!(format!(
|
||||||
|
"{provided_html}<br><span id=typearrow>↓</span><br>{expected_html}"
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn slice_provided(&self, opcode: &Opcode) -> String {
|
fn to_tokens(&self) -> DiffTokens {
|
||||||
self.provided[opcode.first_start..opcode.first_end]
|
|
||||||
.iter()
|
|
||||||
.cloned()
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn to_tokens(&self) -> DiffOutput {
|
|
||||||
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
|
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
|
||||||
let opcodes = matcher.get_opcodes();
|
let mut provided_tokens = Vec::new();
|
||||||
let mut provided = vec![];
|
let mut expected_tokens = Vec::new();
|
||||||
let mut expected = vec![];
|
|
||||||
for opcode in opcodes {
|
for opcode in matcher.get_opcodes() {
|
||||||
|
let provided_slice = slice(&self.provided, opcode.first_start, opcode.first_end);
|
||||||
|
let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end);
|
||||||
|
|
||||||
match opcode.tag.as_str() {
|
match opcode.tag.as_str() {
|
||||||
"equal" => {
|
"equal" => {
|
||||||
provided.push(DiffToken::good(self.slice_provided(&opcode)));
|
provided_tokens.push(DiffToken::good(provided_slice));
|
||||||
expected.push(DiffToken::good(self.slice_expected(&opcode)));
|
expected_tokens.push(DiffToken::good(expected_slice));
|
||||||
}
|
|
||||||
"delete" => {
|
|
||||||
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
|
||||||
}
|
}
|
||||||
|
"delete" => provided_tokens.push(DiffToken::bad(provided_slice)),
|
||||||
"insert" => {
|
"insert" => {
|
||||||
let expected_str = self.slice_expected(&opcode);
|
provided_tokens.push(DiffToken::missing(
|
||||||
provided.push(DiffToken::missing("-".repeat(expected_str.chars().count())));
|
"-".repeat(expected_slice.chars().count()),
|
||||||
expected.push(DiffToken::missing(expected_str));
|
));
|
||||||
|
expected_tokens.push(DiffToken::missing(expected_slice));
|
||||||
}
|
}
|
||||||
"replace" => {
|
"replace" => {
|
||||||
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
provided_tokens.push(DiffToken::bad(provided_slice));
|
||||||
expected.push(DiffToken::missing(self.slice_expected(&opcode)));
|
expected_tokens.push(DiffToken::missing(expected_slice));
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DiffOutput { provided, expected }
|
DiffTokens {
|
||||||
|
provided_tokens,
|
||||||
|
expected_tokens,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn to_html(&self) -> String {
|
// Utility Functions
|
||||||
let output = self.to_tokens();
|
fn slice(chars: &[char], start: usize, end: usize) -> String {
|
||||||
let provided = render_tokens(&output.provided);
|
chars[start..end].iter().collect()
|
||||||
let expected = render_tokens(&output.expected);
|
|
||||||
format!(
|
|
||||||
"<code id=typeans>{}</code>",
|
|
||||||
if self.provided.is_empty() {
|
|
||||||
htmlescape::encode_minimal(&self.expected.iter().collect::<String>())
|
|
||||||
} else if self.provided == self.expected {
|
|
||||||
provided
|
|
||||||
} else {
|
|
||||||
format!("{provided}<br><span id=typearrow>↓</span><br>{expected}")
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_expected(expected: &str) -> String {
|
fn prepare_expected(expected: &str) -> String {
|
||||||
let without_av = strip_av_tags(expected);
|
let no_av_tags = strip_av_tags(expected);
|
||||||
let without_newlines = LINEBREAKS.replace_all(&without_av, " ");
|
let no_linebreaks = LINEBREAKS.replace_all(&no_av_tags, " ");
|
||||||
let without_html = strip_html(&without_newlines);
|
strip_html(&no_linebreaks).trim().to_string()
|
||||||
let without_outer_whitespace = without_html.trim();
|
|
||||||
normalize_to_nfc(without_outer_whitespace).into()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_provided(provided: &str) -> String {
|
// Render Functions
|
||||||
normalize_to_nfc(provided).into()
|
fn render_tokens(tokens: &[DiffToken]) -> String {
|
||||||
|
tokens.iter().fold(String::new(), |mut acc, token| {
|
||||||
|
let isolated_text = isolate_leading_mark(&token.text);
|
||||||
|
let encoded_text = htmlescape::encode_minimal(&isolated_text);
|
||||||
|
let class = token.to_class();
|
||||||
|
acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
|
||||||
|
acc
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prefixes a leading mark character with a non-breaking space to prevent
|
||||||
|
/// it from joining the previous token.
|
||||||
|
fn isolate_leading_mark(text: &str) -> Cow<str> {
|
||||||
|
if text
|
||||||
|
.chars()
|
||||||
|
.next()
|
||||||
|
.map_or(false, |ch| GeneralCategory::of(ch).is_mark())
|
||||||
|
{
|
||||||
|
format!("\u{a0}{text}").into()
|
||||||
|
} else {
|
||||||
|
text.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
struct DiffTokens {
|
||||||
|
provided_tokens: Vec<DiffToken>,
|
||||||
|
expected_tokens: Vec<DiffToken>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
@ -128,66 +167,31 @@ struct DiffToken {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DiffToken {
|
impl DiffToken {
|
||||||
fn bad(text: String) -> Self {
|
fn new(kind: DiffTokenKind, text: String) -> Self {
|
||||||
Self {
|
Self { kind, text }
|
||||||
kind: DiffTokenKind::Bad,
|
|
||||||
text,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn good(text: String) -> Self {
|
fn good(text: String) -> Self {
|
||||||
Self {
|
Self::new(DiffTokenKind::Good, text)
|
||||||
kind: DiffTokenKind::Good,
|
}
|
||||||
text,
|
|
||||||
}
|
fn bad(text: String) -> Self {
|
||||||
|
Self::new(DiffTokenKind::Bad, text)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn missing(text: String) -> Self {
|
fn missing(text: String) -> Self {
|
||||||
Self {
|
Self::new(DiffTokenKind::Missing, text)
|
||||||
kind: DiffTokenKind::Missing,
|
}
|
||||||
text,
|
|
||||||
|
fn to_class(&self) -> &'static str {
|
||||||
|
match self.kind {
|
||||||
|
DiffTokenKind::Good => "typeGood",
|
||||||
|
DiffTokenKind::Bad => "typeBad",
|
||||||
|
DiffTokenKind::Missing => "typeMissed",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
struct DiffOutput {
|
|
||||||
provided: Vec<DiffToken>,
|
|
||||||
expected: Vec<DiffToken>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn compare_answer(expected: &str, provided: &str) -> String {
|
|
||||||
DiffContext::new(expected, provided).to_html()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn render_tokens(tokens: &[DiffToken]) -> String {
|
|
||||||
let text_tokens: Vec<_> = tokens
|
|
||||||
.iter()
|
|
||||||
.map(|token| {
|
|
||||||
let text = with_isolated_leading_mark(&token.text);
|
|
||||||
let encoded = htmlescape::encode_minimal(&text);
|
|
||||||
let class = match token.kind {
|
|
||||||
DiffTokenKind::Good => "typeGood",
|
|
||||||
DiffTokenKind::Bad => "typeBad",
|
|
||||||
DiffTokenKind::Missing => "typeMissed",
|
|
||||||
};
|
|
||||||
format!("<span class={class}>{encoded}</span>")
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
text_tokens.join("")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If text begins with a mark character, prefix it with a non-breaking
|
|
||||||
/// space to prevent the mark from joining to the previous token.
|
|
||||||
fn with_isolated_leading_mark(text: &str) -> Cow<str> {
|
|
||||||
if let Some(ch) = text.chars().next() {
|
|
||||||
if GeneralCategory::of(ch).is_mark() {
|
|
||||||
return format!("\u{a0}{text}").into();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
text.into()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -205,10 +209,10 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokens() {
|
fn tokens() {
|
||||||
let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
|
let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
|
||||||
let output = ctx.to_tokens();
|
let output = ctx.to_tokens();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
output.provided,
|
output.provided_tokens,
|
||||||
vec![
|
vec![
|
||||||
bad("y"),
|
bad("y"),
|
||||||
good(" ahora q"),
|
good(" ahora q"),
|
||||||
|
@ -220,7 +224,7 @@ mod test {
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
output.expected,
|
output.expected_tokens,
|
||||||
vec![
|
vec![
|
||||||
missing("¿Y"),
|
missing("¿Y"),
|
||||||
good(" ahora q"),
|
good(" ahora q"),
|
||||||
|
@ -235,24 +239,24 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn html_and_media() {
|
fn html_and_media() {
|
||||||
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> 2", "1 2");
|
let ctx = Diff::new("[sound:foo.mp3]<b>1</b> 2", "1 2");
|
||||||
// the spacing is handled by wrapping html output in white-space: pre-wrap
|
// the spacing is handled by wrapping html output in white-space: pre-wrap
|
||||||
assert_eq!(ctx.to_tokens().expected, &[good("1 2")]);
|
assert_eq!(ctx.to_tokens().expected_tokens, &[good("1 2")]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn missed_chars_only_shown_in_provided_when_after_good() {
|
fn missed_chars_only_shown_in_provided_when_after_good() {
|
||||||
let ctx = DiffContext::new("1", "23");
|
let ctx = Diff::new("1", "23");
|
||||||
assert_eq!(ctx.to_tokens().provided, &[bad("23")]);
|
assert_eq!(ctx.to_tokens().provided_tokens, &[bad("23")]);
|
||||||
let ctx = DiffContext::new("12", "1");
|
let ctx = Diff::new("12", "1");
|
||||||
assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("-"),]);
|
assert_eq!(ctx.to_tokens().provided_tokens, &[good("1"), missing("-"),]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn missed_chars_counted_correctly() {
|
fn missed_chars_counted_correctly() {
|
||||||
let ctx = DiffContext::new("нос", "нс");
|
let ctx = Diff::new("нос", "нс");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ctx.to_tokens().provided,
|
ctx.to_tokens().provided_tokens,
|
||||||
&[good("н"), missing("-"), good("с")]
|
&[good("н"), missing("-"), good("с")]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -260,9 +264,9 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn handles_certain_unicode_as_expected() {
|
fn handles_certain_unicode_as_expected() {
|
||||||
// this was not parsed as expected with dissimilar 1.0.4
|
// this was not parsed as expected with dissimilar 1.0.4
|
||||||
let ctx = DiffContext::new("쓰다듬다", "스다뜸다");
|
let ctx = Diff::new("쓰다듬다", "스다뜸다");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ctx.to_tokens().provided,
|
ctx.to_tokens().provided_tokens,
|
||||||
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -270,7 +274,7 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn does_not_panic_with_certain_unicode() {
|
fn does_not_panic_with_certain_unicode() {
|
||||||
// this was causing a panic with dissimilar 1.0.4
|
// this was causing a panic with dissimilar 1.0.4
|
||||||
let ctx = DiffContext::new(
|
let ctx = Diff::new(
|
||||||
"Сущность должна быть ответственна только за одно дело",
|
"Сущность должна быть ответственна только за одно дело",
|
||||||
concat!(
|
concat!(
|
||||||
"Single responsibility Сущность выполняет только одну задачу.",
|
"Single responsibility Сущность выполняет только одну задачу.",
|
||||||
|
@ -287,13 +291,13 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_input_shows_as_code() {
|
fn empty_input_shows_as_code() {
|
||||||
let ctx = DiffContext::new("123", "");
|
let ctx = compare_answer("123", "");
|
||||||
assert_eq!(ctx.to_html(), "<code id=typeans>123</code>");
|
assert_eq!(ctx, "<code id=typeans>123</code>");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn correct_input_is_collapsed() {
|
fn correct_input_is_collapsed() {
|
||||||
let ctx = DiffContext::new("123", "123");
|
let ctx = Diff::new("123", "123");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ctx.to_html(),
|
ctx.to_html(),
|
||||||
"<code id=typeans><span class=typeGood>123</span></code>"
|
"<code id=typeans><span class=typeGood>123</span></code>"
|
||||||
|
@ -302,7 +306,7 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn incorrect_input_is_not_collapsed() {
|
fn incorrect_input_is_not_collapsed() {
|
||||||
let ctx = DiffContext::new("123", "1123");
|
let ctx = Diff::new("123", "1123");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ctx.to_html(),
|
ctx.to_html(),
|
||||||
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
||||||
|
|
Loading…
Reference in a new issue