typeanswer: cleanups (#3415)

* typeanswer: cleanups

no functional change

* typeanswer: disambiguate

no functional change

* typeanswer: reorder

* typeanswer: skip DiffContext if nothing typed

No use to run all that code without input.

* typeanswer: skip tokenization if input is correct

No use in this case.

* typeanswer: make repo check happy (.map → .fold)

Either a new check or the call was too complex previously for it to trigger?

* Add to contributors

* typeanswer: remove slice_* functions

They're used only once in to_tokens. Easier to read this way IMHO, anyway.
This commit is contained in:
a.r 2024-09-25 12:15:16 +02:00 committed by GitHub
parent cf17ca2f84
commit dc5fa60c8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 137 additions and 132 deletions

View file

@ -193,6 +193,7 @@ Luke Bartholomew <lukesbart@icloud.com>
Gregory Abrasaldo <degeemon@gmail.com>
Taylor Obyen <https://github.com/taylorobyen>
Kris Cherven <krischerven@gmail.com>
twwn <github.com/twwn>
********************

View file

@ -3,10 +3,8 @@
use std::borrow::Cow;
use difflib::sequencematcher::Opcode;
use difflib::sequencematcher::SequenceMatcher;
use itertools::Itertools;
use lazy_static::lazy_static;
use once_cell::sync::Lazy;
use regex::Regex;
use unic_ucd_category::GeneralCategory;
@ -14,8 +12,8 @@ use crate::card_rendering::strip_av_tags;
use crate::text::normalize_to_nfc;
use crate::text::strip_html;
lazy_static! {
static ref LINEBREAKS: Regex = Regex::new(
static LINEBREAKS: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?six)
(
\n
@ -23,95 +21,136 @@ lazy_static! {
<br\s?/?>
|
</?div>
)+
"
)+",
)
.unwrap();
.unwrap()
});
macro_rules! format_typeans {
($typeans:expr) => {
format!("<code id=typeans>{}</code>", $typeans)
};
}
struct DiffContext {
expected: Vec<char>,
// Public API
pub fn compare_answer(expected: &str, provided: &str) -> String {
if provided.is_empty() {
format_typeans!(htmlescape::encode_minimal(expected))
} else {
Diff::new(expected, provided).to_html()
}
}
struct Diff {
provided: Vec<char>,
expected: Vec<char>,
expected_original: String,
}
impl DiffContext {
impl Diff {
fn new(expected: &str, provided: &str) -> Self {
DiffContext {
provided: prepare_provided(provided).chars().collect_vec(),
expected: prepare_expected(expected).chars().collect_vec(),
Self {
provided: normalize_to_nfc(provided).chars().collect(),
expected: normalize_to_nfc(&prepare_expected(expected))
.chars()
.collect(),
expected_original: expected.to_string(),
}
}
fn slice_expected(&self, opcode: &Opcode) -> String {
self.expected[opcode.second_start..opcode.second_end]
.iter()
.cloned()
.collect()
// Entry Point
fn to_html(&self) -> String {
if self.provided == self.expected {
format_typeans!(format!(
"<span class=typeGood>{}</span>",
self.expected_original
))
} else {
let output = self.to_tokens();
let provided_html = render_tokens(&output.provided_tokens);
let expected_html = render_tokens(&output.expected_tokens);
format_typeans!(format!(
"{provided_html}<br><span id=typearrow>&darr;</span><br>{expected_html}"
))
}
}
fn slice_provided(&self, opcode: &Opcode) -> String {
self.provided[opcode.first_start..opcode.first_end]
.iter()
.cloned()
.collect()
}
fn to_tokens(&self) -> DiffOutput {
fn to_tokens(&self) -> DiffTokens {
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
let opcodes = matcher.get_opcodes();
let mut provided = vec![];
let mut expected = vec![];
for opcode in opcodes {
let mut provided_tokens = Vec::new();
let mut expected_tokens = Vec::new();
for opcode in matcher.get_opcodes() {
let provided_slice = slice(&self.provided, opcode.first_start, opcode.first_end);
let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end);
match opcode.tag.as_str() {
"equal" => {
provided.push(DiffToken::good(self.slice_provided(&opcode)));
expected.push(DiffToken::good(self.slice_expected(&opcode)));
}
"delete" => {
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
provided_tokens.push(DiffToken::good(provided_slice));
expected_tokens.push(DiffToken::good(expected_slice));
}
"delete" => provided_tokens.push(DiffToken::bad(provided_slice)),
"insert" => {
let expected_str = self.slice_expected(&opcode);
provided.push(DiffToken::missing("-".repeat(expected_str.chars().count())));
expected.push(DiffToken::missing(expected_str));
provided_tokens.push(DiffToken::missing(
"-".repeat(expected_slice.chars().count()),
));
expected_tokens.push(DiffToken::missing(expected_slice));
}
"replace" => {
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
expected.push(DiffToken::missing(self.slice_expected(&opcode)));
provided_tokens.push(DiffToken::bad(provided_slice));
expected_tokens.push(DiffToken::missing(expected_slice));
}
_ => unreachable!(),
}
}
DiffOutput { provided, expected }
DiffTokens {
provided_tokens,
expected_tokens,
}
}
}
fn to_html(&self) -> String {
let output = self.to_tokens();
let provided = render_tokens(&output.provided);
let expected = render_tokens(&output.expected);
format!(
"<code id=typeans>{}</code>",
if self.provided.is_empty() {
htmlescape::encode_minimal(&self.expected.iter().collect::<String>())
} else if self.provided == self.expected {
provided
} else {
format!("{provided}<br><span id=typearrow>&darr;</span><br>{expected}")
}
)
}
// Utility Functions
fn slice(chars: &[char], start: usize, end: usize) -> String {
chars[start..end].iter().collect()
}
fn prepare_expected(expected: &str) -> String {
let without_av = strip_av_tags(expected);
let without_newlines = LINEBREAKS.replace_all(&without_av, " ");
let without_html = strip_html(&without_newlines);
let without_outer_whitespace = without_html.trim();
normalize_to_nfc(without_outer_whitespace).into()
let no_av_tags = strip_av_tags(expected);
let no_linebreaks = LINEBREAKS.replace_all(&no_av_tags, " ");
strip_html(&no_linebreaks).trim().to_string()
}
fn prepare_provided(provided: &str) -> String {
normalize_to_nfc(provided).into()
// Render Functions
fn render_tokens(tokens: &[DiffToken]) -> String {
tokens.iter().fold(String::new(), |mut acc, token| {
let isolated_text = isolate_leading_mark(&token.text);
let encoded_text = htmlescape::encode_minimal(&isolated_text);
let class = token.to_class();
acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
acc
})
}
/// Prefixes a leading mark character with a non-breaking space to prevent
/// it from joining the previous token.
fn isolate_leading_mark(text: &str) -> Cow<str> {
if text
.chars()
.next()
.map_or(false, |ch| GeneralCategory::of(ch).is_mark())
{
format!("\u{a0}{text}").into()
} else {
text.into()
}
}
#[derive(Debug, PartialEq, Eq)]
struct DiffTokens {
provided_tokens: Vec<DiffToken>,
expected_tokens: Vec<DiffToken>,
}
#[derive(Debug, PartialEq, Eq)]
@ -128,64 +167,29 @@ struct DiffToken {
}
impl DiffToken {
fn bad(text: String) -> Self {
Self {
kind: DiffTokenKind::Bad,
text,
}
fn new(kind: DiffTokenKind, text: String) -> Self {
Self { kind, text }
}
fn good(text: String) -> Self {
Self {
kind: DiffTokenKind::Good,
text,
Self::new(DiffTokenKind::Good, text)
}
fn bad(text: String) -> Self {
Self::new(DiffTokenKind::Bad, text)
}
fn missing(text: String) -> Self {
Self {
kind: DiffTokenKind::Missing,
text,
}
}
Self::new(DiffTokenKind::Missing, text)
}
#[derive(Debug, PartialEq, Eq)]
struct DiffOutput {
provided: Vec<DiffToken>,
expected: Vec<DiffToken>,
}
pub fn compare_answer(expected: &str, provided: &str) -> String {
DiffContext::new(expected, provided).to_html()
}
fn render_tokens(tokens: &[DiffToken]) -> String {
let text_tokens: Vec<_> = tokens
.iter()
.map(|token| {
let text = with_isolated_leading_mark(&token.text);
let encoded = htmlescape::encode_minimal(&text);
let class = match token.kind {
fn to_class(&self) -> &'static str {
match self.kind {
DiffTokenKind::Good => "typeGood",
DiffTokenKind::Bad => "typeBad",
DiffTokenKind::Missing => "typeMissed",
};
format!("<span class={class}>{encoded}</span>")
})
.collect();
text_tokens.join("")
}
/// If text begins with a mark character, prefix it with a non-breaking
/// space to prevent the mark from joining to the previous token.
fn with_isolated_leading_mark(text: &str) -> Cow<str> {
if let Some(ch) = text.chars().next() {
if GeneralCategory::of(ch).is_mark() {
return format!("\u{a0}{text}").into();
}
}
text.into()
}
#[cfg(test)]
@ -205,10 +209,10 @@ mod test {
#[test]
fn tokens() {
let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
let output = ctx.to_tokens();
assert_eq!(
output.provided,
output.provided_tokens,
vec![
bad("y"),
good(" ahora q"),
@ -220,7 +224,7 @@ mod test {
]
);
assert_eq!(
output.expected,
output.expected_tokens,
vec![
missing("¿Y"),
good(" ahora q"),
@ -235,24 +239,24 @@ mod test {
#[test]
fn html_and_media() {
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1 2");
let ctx = Diff::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1 2");
// the spacing is handled by wrapping html output in white-space: pre-wrap
assert_eq!(ctx.to_tokens().expected, &[good("1 2")]);
assert_eq!(ctx.to_tokens().expected_tokens, &[good("1 2")]);
}
#[test]
fn missed_chars_only_shown_in_provided_when_after_good() {
let ctx = DiffContext::new("1", "23");
assert_eq!(ctx.to_tokens().provided, &[bad("23")]);
let ctx = DiffContext::new("12", "1");
assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("-"),]);
let ctx = Diff::new("1", "23");
assert_eq!(ctx.to_tokens().provided_tokens, &[bad("23")]);
let ctx = Diff::new("12", "1");
assert_eq!(ctx.to_tokens().provided_tokens, &[good("1"), missing("-"),]);
}
#[test]
fn missed_chars_counted_correctly() {
let ctx = DiffContext::new("нос", "нс");
let ctx = Diff::new("нос", "нс");
assert_eq!(
ctx.to_tokens().provided,
ctx.to_tokens().provided_tokens,
&[good("н"), missing("-"), good("с")]
);
}
@ -260,9 +264,9 @@ mod test {
#[test]
fn handles_certain_unicode_as_expected() {
// this was not parsed as expected with dissimilar 1.0.4
let ctx = DiffContext::new("쓰다듬다", "스다뜸다");
let ctx = Diff::new("쓰다듬다", "스다뜸다");
assert_eq!(
ctx.to_tokens().provided,
ctx.to_tokens().provided_tokens,
&[bad(""), good(""), bad(""), good(""),]
);
}
@ -270,7 +274,7 @@ mod test {
#[test]
fn does_not_panic_with_certain_unicode() {
// this was causing a panic with dissimilar 1.0.4
let ctx = DiffContext::new(
let ctx = Diff::new(
"Сущность должна быть ответственна только за одно дело",
concat!(
"Single responsibility Сущность выполняет только одну задачу.",
@ -287,13 +291,13 @@ mod test {
#[test]
fn empty_input_shows_as_code() {
let ctx = DiffContext::new("123", "");
assert_eq!(ctx.to_html(), "<code id=typeans>123</code>");
let ctx = compare_answer("123", "");
assert_eq!(ctx, "<code id=typeans>123</code>");
}
#[test]
fn correct_input_is_collapsed() {
let ctx = DiffContext::new("123", "123");
let ctx = Diff::new("123", "123");
assert_eq!(
ctx.to_html(),
"<code id=typeans><span class=typeGood>123</span></code>"
@ -302,7 +306,7 @@ mod test {
#[test]
fn incorrect_input_is_not_collapsed() {
let ctx = DiffContext::new("123", "1123");
let ctx = Diff::new("123", "1123");
assert_eq!(
ctx.to_html(),
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"