typeanswer: cleanups (#3415)

* typeanswer: cleanups

no functional change

* typeanswer: disambiguate

no functional change

* typeanswer: reorder

* typeanswer: skip DiffContext if nothing typed

No use to run all that code without input.

* typeanswer: skip tokenization if input is correct

No use in this case.

* typeanswer: make repo check happy (.map → .fold)

Either a new check or the call was too complex previously for it to trigger?

* Add to contributors

* typeanswer: remove slice_* functions

They're used only once in to_tokens. Easier to read this way IMHO, anyway.
This commit is contained in:
a.r 2024-09-25 12:15:16 +02:00 committed by GitHub
parent cf17ca2f84
commit dc5fa60c8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 137 additions and 132 deletions

View file

@ -193,6 +193,7 @@ Luke Bartholomew <lukesbart@icloud.com>
Gregory Abrasaldo <degeemon@gmail.com> Gregory Abrasaldo <degeemon@gmail.com>
Taylor Obyen <https://github.com/taylorobyen> Taylor Obyen <https://github.com/taylorobyen>
Kris Cherven <krischerven@gmail.com> Kris Cherven <krischerven@gmail.com>
twwn <github.com/twwn>
******************** ********************

View file

@ -3,10 +3,8 @@
use std::borrow::Cow; use std::borrow::Cow;
use difflib::sequencematcher::Opcode;
use difflib::sequencematcher::SequenceMatcher; use difflib::sequencematcher::SequenceMatcher;
use itertools::Itertools; use once_cell::sync::Lazy;
use lazy_static::lazy_static;
use regex::Regex; use regex::Regex;
use unic_ucd_category::GeneralCategory; use unic_ucd_category::GeneralCategory;
@ -14,8 +12,8 @@ use crate::card_rendering::strip_av_tags;
use crate::text::normalize_to_nfc; use crate::text::normalize_to_nfc;
use crate::text::strip_html; use crate::text::strip_html;
lazy_static! { static LINEBREAKS: Lazy<Regex> = Lazy::new(|| {
static ref LINEBREAKS: Regex = Regex::new( Regex::new(
r"(?six) r"(?six)
( (
\n \n
@ -23,95 +21,136 @@ lazy_static! {
<br\s?/?> <br\s?/?>
| |
</?div> </?div>
)+ )+",
"
) )
.unwrap(); .unwrap()
});
macro_rules! format_typeans {
($typeans:expr) => {
format!("<code id=typeans>{}</code>", $typeans)
};
} }
struct DiffContext { // Public API
expected: Vec<char>, pub fn compare_answer(expected: &str, provided: &str) -> String {
if provided.is_empty() {
format_typeans!(htmlescape::encode_minimal(expected))
} else {
Diff::new(expected, provided).to_html()
}
}
struct Diff {
provided: Vec<char>, provided: Vec<char>,
expected: Vec<char>,
expected_original: String,
} }
impl DiffContext { impl Diff {
fn new(expected: &str, provided: &str) -> Self { fn new(expected: &str, provided: &str) -> Self {
DiffContext { Self {
provided: prepare_provided(provided).chars().collect_vec(), provided: normalize_to_nfc(provided).chars().collect(),
expected: prepare_expected(expected).chars().collect_vec(), expected: normalize_to_nfc(&prepare_expected(expected))
.chars()
.collect(),
expected_original: expected.to_string(),
} }
} }
fn slice_expected(&self, opcode: &Opcode) -> String { // Entry Point
self.expected[opcode.second_start..opcode.second_end] fn to_html(&self) -> String {
.iter() if self.provided == self.expected {
.cloned() format_typeans!(format!(
.collect() "<span class=typeGood>{}</span>",
self.expected_original
))
} else {
let output = self.to_tokens();
let provided_html = render_tokens(&output.provided_tokens);
let expected_html = render_tokens(&output.expected_tokens);
format_typeans!(format!(
"{provided_html}<br><span id=typearrow>&darr;</span><br>{expected_html}"
))
}
} }
fn slice_provided(&self, opcode: &Opcode) -> String { fn to_tokens(&self) -> DiffTokens {
self.provided[opcode.first_start..opcode.first_end]
.iter()
.cloned()
.collect()
}
fn to_tokens(&self) -> DiffOutput {
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected); let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
let opcodes = matcher.get_opcodes(); let mut provided_tokens = Vec::new();
let mut provided = vec![]; let mut expected_tokens = Vec::new();
let mut expected = vec![];
for opcode in opcodes { for opcode in matcher.get_opcodes() {
let provided_slice = slice(&self.provided, opcode.first_start, opcode.first_end);
let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end);
match opcode.tag.as_str() { match opcode.tag.as_str() {
"equal" => { "equal" => {
provided.push(DiffToken::good(self.slice_provided(&opcode))); provided_tokens.push(DiffToken::good(provided_slice));
expected.push(DiffToken::good(self.slice_expected(&opcode))); expected_tokens.push(DiffToken::good(expected_slice));
}
"delete" => {
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
} }
"delete" => provided_tokens.push(DiffToken::bad(provided_slice)),
"insert" => { "insert" => {
let expected_str = self.slice_expected(&opcode); provided_tokens.push(DiffToken::missing(
provided.push(DiffToken::missing("-".repeat(expected_str.chars().count()))); "-".repeat(expected_slice.chars().count()),
expected.push(DiffToken::missing(expected_str)); ));
expected_tokens.push(DiffToken::missing(expected_slice));
} }
"replace" => { "replace" => {
provided.push(DiffToken::bad(self.slice_provided(&opcode))); provided_tokens.push(DiffToken::bad(provided_slice));
expected.push(DiffToken::missing(self.slice_expected(&opcode))); expected_tokens.push(DiffToken::missing(expected_slice));
} }
_ => unreachable!(), _ => unreachable!(),
} }
} }
DiffOutput { provided, expected } DiffTokens {
provided_tokens,
expected_tokens,
}
} }
}
fn to_html(&self) -> String { // Utility Functions
let output = self.to_tokens(); fn slice(chars: &[char], start: usize, end: usize) -> String {
let provided = render_tokens(&output.provided); chars[start..end].iter().collect()
let expected = render_tokens(&output.expected);
format!(
"<code id=typeans>{}</code>",
if self.provided.is_empty() {
htmlescape::encode_minimal(&self.expected.iter().collect::<String>())
} else if self.provided == self.expected {
provided
} else {
format!("{provided}<br><span id=typearrow>&darr;</span><br>{expected}")
}
)
}
} }
fn prepare_expected(expected: &str) -> String { fn prepare_expected(expected: &str) -> String {
let without_av = strip_av_tags(expected); let no_av_tags = strip_av_tags(expected);
let without_newlines = LINEBREAKS.replace_all(&without_av, " "); let no_linebreaks = LINEBREAKS.replace_all(&no_av_tags, " ");
let without_html = strip_html(&without_newlines); strip_html(&no_linebreaks).trim().to_string()
let without_outer_whitespace = without_html.trim();
normalize_to_nfc(without_outer_whitespace).into()
} }
fn prepare_provided(provided: &str) -> String { // Render Functions
normalize_to_nfc(provided).into() fn render_tokens(tokens: &[DiffToken]) -> String {
tokens.iter().fold(String::new(), |mut acc, token| {
let isolated_text = isolate_leading_mark(&token.text);
let encoded_text = htmlescape::encode_minimal(&isolated_text);
let class = token.to_class();
acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
acc
})
}
/// Prefixes a leading mark character with a non-breaking space to prevent
/// it from joining the previous token.
fn isolate_leading_mark(text: &str) -> Cow<str> {
if text
.chars()
.next()
.map_or(false, |ch| GeneralCategory::of(ch).is_mark())
{
format!("\u{a0}{text}").into()
} else {
text.into()
}
}
#[derive(Debug, PartialEq, Eq)]
struct DiffTokens {
provided_tokens: Vec<DiffToken>,
expected_tokens: Vec<DiffToken>,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -128,66 +167,31 @@ struct DiffToken {
} }
impl DiffToken { impl DiffToken {
fn bad(text: String) -> Self { fn new(kind: DiffTokenKind, text: String) -> Self {
Self { Self { kind, text }
kind: DiffTokenKind::Bad,
text,
}
} }
fn good(text: String) -> Self { fn good(text: String) -> Self {
Self { Self::new(DiffTokenKind::Good, text)
kind: DiffTokenKind::Good, }
text,
} fn bad(text: String) -> Self {
Self::new(DiffTokenKind::Bad, text)
} }
fn missing(text: String) -> Self { fn missing(text: String) -> Self {
Self { Self::new(DiffTokenKind::Missing, text)
kind: DiffTokenKind::Missing, }
text,
fn to_class(&self) -> &'static str {
match self.kind {
DiffTokenKind::Good => "typeGood",
DiffTokenKind::Bad => "typeBad",
DiffTokenKind::Missing => "typeMissed",
} }
} }
} }
#[derive(Debug, PartialEq, Eq)]
struct DiffOutput {
provided: Vec<DiffToken>,
expected: Vec<DiffToken>,
}
pub fn compare_answer(expected: &str, provided: &str) -> String {
DiffContext::new(expected, provided).to_html()
}
fn render_tokens(tokens: &[DiffToken]) -> String {
let text_tokens: Vec<_> = tokens
.iter()
.map(|token| {
let text = with_isolated_leading_mark(&token.text);
let encoded = htmlescape::encode_minimal(&text);
let class = match token.kind {
DiffTokenKind::Good => "typeGood",
DiffTokenKind::Bad => "typeBad",
DiffTokenKind::Missing => "typeMissed",
};
format!("<span class={class}>{encoded}</span>")
})
.collect();
text_tokens.join("")
}
/// If text begins with a mark character, prefix it with a non-breaking
/// space to prevent the mark from joining to the previous token.
fn with_isolated_leading_mark(text: &str) -> Cow<str> {
if let Some(ch) = text.chars().next() {
if GeneralCategory::of(ch).is_mark() {
return format!("\u{a0}{text}").into();
}
}
text.into()
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -205,10 +209,10 @@ mod test {
#[test] #[test]
fn tokens() { fn tokens() {
let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer"); let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
let output = ctx.to_tokens(); let output = ctx.to_tokens();
assert_eq!( assert_eq!(
output.provided, output.provided_tokens,
vec![ vec![
bad("y"), bad("y"),
good(" ahora q"), good(" ahora q"),
@ -220,7 +224,7 @@ mod test {
] ]
); );
assert_eq!( assert_eq!(
output.expected, output.expected_tokens,
vec![ vec![
missing("¿Y"), missing("¿Y"),
good(" ahora q"), good(" ahora q"),
@ -235,24 +239,24 @@ mod test {
#[test] #[test]
fn html_and_media() { fn html_and_media() {
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1 2"); let ctx = Diff::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1 2");
// the spacing is handled by wrapping html output in white-space: pre-wrap // the spacing is handled by wrapping html output in white-space: pre-wrap
assert_eq!(ctx.to_tokens().expected, &[good("1 2")]); assert_eq!(ctx.to_tokens().expected_tokens, &[good("1 2")]);
} }
#[test] #[test]
fn missed_chars_only_shown_in_provided_when_after_good() { fn missed_chars_only_shown_in_provided_when_after_good() {
let ctx = DiffContext::new("1", "23"); let ctx = Diff::new("1", "23");
assert_eq!(ctx.to_tokens().provided, &[bad("23")]); assert_eq!(ctx.to_tokens().provided_tokens, &[bad("23")]);
let ctx = DiffContext::new("12", "1"); let ctx = Diff::new("12", "1");
assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("-"),]); assert_eq!(ctx.to_tokens().provided_tokens, &[good("1"), missing("-"),]);
} }
#[test] #[test]
fn missed_chars_counted_correctly() { fn missed_chars_counted_correctly() {
let ctx = DiffContext::new("нос", "нс"); let ctx = Diff::new("нос", "нс");
assert_eq!( assert_eq!(
ctx.to_tokens().provided, ctx.to_tokens().provided_tokens,
&[good("н"), missing("-"), good("с")] &[good("н"), missing("-"), good("с")]
); );
} }
@ -260,9 +264,9 @@ mod test {
#[test] #[test]
fn handles_certain_unicode_as_expected() { fn handles_certain_unicode_as_expected() {
// this was not parsed as expected with dissimilar 1.0.4 // this was not parsed as expected with dissimilar 1.0.4
let ctx = DiffContext::new("쓰다듬다", "스다뜸다"); let ctx = Diff::new("쓰다듬다", "스다뜸다");
assert_eq!( assert_eq!(
ctx.to_tokens().provided, ctx.to_tokens().provided_tokens,
&[bad(""), good(""), bad(""), good(""),] &[bad(""), good(""), bad(""), good(""),]
); );
} }
@ -270,7 +274,7 @@ mod test {
#[test] #[test]
fn does_not_panic_with_certain_unicode() { fn does_not_panic_with_certain_unicode() {
// this was causing a panic with dissimilar 1.0.4 // this was causing a panic with dissimilar 1.0.4
let ctx = DiffContext::new( let ctx = Diff::new(
"Сущность должна быть ответственна только за одно дело", "Сущность должна быть ответственна только за одно дело",
concat!( concat!(
"Single responsibility Сущность выполняет только одну задачу.", "Single responsibility Сущность выполняет только одну задачу.",
@ -287,13 +291,13 @@ mod test {
#[test] #[test]
fn empty_input_shows_as_code() { fn empty_input_shows_as_code() {
let ctx = DiffContext::new("123", ""); let ctx = compare_answer("123", "");
assert_eq!(ctx.to_html(), "<code id=typeans>123</code>"); assert_eq!(ctx, "<code id=typeans>123</code>");
} }
#[test] #[test]
fn correct_input_is_collapsed() { fn correct_input_is_collapsed() {
let ctx = DiffContext::new("123", "123"); let ctx = Diff::new("123", "123");
assert_eq!( assert_eq!(
ctx.to_html(), ctx.to_html(),
"<code id=typeans><span class=typeGood>123</span></code>" "<code id=typeans><span class=typeGood>123</span></code>"
@ -302,7 +306,7 @@ mod test {
#[test] #[test]
fn incorrect_input_is_not_collapsed() { fn incorrect_input_is_not_collapsed() {
let ctx = DiffContext::new("123", "1123"); let ctx = Diff::new("123", "1123");
assert_eq!( assert_eq!(
ctx.to_html(), ctx.to_html(),
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>" "<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"