typeanswer: cleanups (#3415)

* typeanswer: cleanups no functional change * typeanswer: disambiguate no functional change * typeanswer: reorder * typeanswer: skip DiffContext if nothing typed No use to run all that code without input. * typeanswer: skip tokenization if input is correct No use in this case. * typeanswer: make repo check happy (.map → .fold) Either a new check or the call was too complex previously for it to trigger? * Add to contributors * typeanswer: remove slice_* functions They're used only once in to_tokens. Easier to read this way IMHO, anyway.
2025-12-24 20:32:58 -05:00 · 2024-09-25 12:15:16 +02:00 · 2024-09-25 12:15:16 +02:00 · dc5fa60c8b
commit dc5fa60c8b
parent cf17ca2f84
2 changed files with 137 additions and 132 deletions
--- a/1
+++ b/1
@ -193,6 +193,7 @@ Luke Bartholomew <lukesbart@icloud.com>
 Gregory Abrasaldo <degeemon@gmail.com>
 Taylor Obyen <https://github.com/taylorobyen>
 Kris Cherven <krischerven@gmail.com>
+twwn <github.com/twwn>

 ********************

--- a/rslib/src/typeanswer.rs
+++ b/rslib/src/typeanswer.rs
@ -3,10 +3,8 @@

 use std::borrow::Cow;

-use difflib::sequencematcher::Opcode;
 use difflib::sequencematcher::SequenceMatcher;
-use itertools::Itertools;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use regex::Regex;
 use unic_ucd_category::GeneralCategory;

@ -14,8 +12,8 @@ use crate::card_rendering::strip_av_tags;
 use crate::text::normalize_to_nfc;
 use crate::text::strip_html;

-lazy_static! {
-    static ref LINEBREAKS: Regex = Regex::new(
+static LINEBREAKS: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(
        r"(?six)
        (
            \n
@ -23,95 +21,136 @@ lazy_static! {
            <br\s?/?>
            |
            </?div>
-        )+
-    "
+        )+",
    )
-    .unwrap();
+    .unwrap()
+});
+
+macro_rules! format_typeans {
+    ($typeans:expr) => {
+        format!("<code id=typeans>{}</code>", $typeans)
+    };
 }

-struct DiffContext {
-    expected: Vec<char>,
+// Public API
+pub fn compare_answer(expected: &str, provided: &str) -> String {
+    if provided.is_empty() {
+        format_typeans!(htmlescape::encode_minimal(expected))
+    } else {
+        Diff::new(expected, provided).to_html()
+    }
+}
+
+struct Diff {
    provided: Vec<char>,
+    expected: Vec<char>,
+    expected_original: String,
 }

-impl DiffContext {
+impl Diff {
    fn new(expected: &str, provided: &str) -> Self {
-        DiffContext {
-            provided: prepare_provided(provided).chars().collect_vec(),
-            expected: prepare_expected(expected).chars().collect_vec(),
+        Self {
+            provided: normalize_to_nfc(provided).chars().collect(),
+            expected: normalize_to_nfc(&prepare_expected(expected))
+                .chars()
+                .collect(),
+            expected_original: expected.to_string(),
        }
    }

-    fn slice_expected(&self, opcode: &Opcode) -> String {
-        self.expected[opcode.second_start..opcode.second_end]
-            .iter()
-            .cloned()
-            .collect()
+    // Entry Point
+    fn to_html(&self) -> String {
+        if self.provided == self.expected {
+            format_typeans!(format!(
+                "<span class=typeGood>{}</span>",
+                self.expected_original
+            ))
+        } else {
+            let output = self.to_tokens();
+            let provided_html = render_tokens(&output.provided_tokens);
+            let expected_html = render_tokens(&output.expected_tokens);
+
+            format_typeans!(format!(
+                "{provided_html}<br><span id=typearrow>&darr;</span><br>{expected_html}"
+            ))
+        }
    }

-    fn slice_provided(&self, opcode: &Opcode) -> String {
-        self.provided[opcode.first_start..opcode.first_end]
-            .iter()
-            .cloned()
-            .collect()
-    }
-
-    fn to_tokens(&self) -> DiffOutput {
+    fn to_tokens(&self) -> DiffTokens {
        let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
-        let opcodes = matcher.get_opcodes();
-        let mut provided = vec![];
-        let mut expected = vec![];
-        for opcode in opcodes {
+        let mut provided_tokens = Vec::new();
+        let mut expected_tokens = Vec::new();
+
+        for opcode in matcher.get_opcodes() {
+            let provided_slice = slice(&self.provided, opcode.first_start, opcode.first_end);
+            let expected_slice = slice(&self.expected, opcode.second_start, opcode.second_end);
+
            match opcode.tag.as_str() {
                "equal" => {
-                    provided.push(DiffToken::good(self.slice_provided(&opcode)));
-                    expected.push(DiffToken::good(self.slice_expected(&opcode)));
-                }
-                "delete" => {
-                    provided.push(DiffToken::bad(self.slice_provided(&opcode)));
+                    provided_tokens.push(DiffToken::good(provided_slice));
+                    expected_tokens.push(DiffToken::good(expected_slice));
                }
+                "delete" => provided_tokens.push(DiffToken::bad(provided_slice)),
                "insert" => {
-                    let expected_str = self.slice_expected(&opcode);
-                    provided.push(DiffToken::missing("-".repeat(expected_str.chars().count())));
-                    expected.push(DiffToken::missing(expected_str));
+                    provided_tokens.push(DiffToken::missing(
+                        "-".repeat(expected_slice.chars().count()),
+                    ));
+                    expected_tokens.push(DiffToken::missing(expected_slice));
                }
                "replace" => {
-                    provided.push(DiffToken::bad(self.slice_provided(&opcode)));
-                    expected.push(DiffToken::missing(self.slice_expected(&opcode)));
+                    provided_tokens.push(DiffToken::bad(provided_slice));
+                    expected_tokens.push(DiffToken::missing(expected_slice));
                }
                _ => unreachable!(),
            }
        }
-        DiffOutput { provided, expected }
+        DiffTokens {
+            provided_tokens,
+            expected_tokens,
        }
+    }
+}

-    fn to_html(&self) -> String {
-        let output = self.to_tokens();
-        let provided = render_tokens(&output.provided);
-        let expected = render_tokens(&output.expected);
-        format!(
-            "<code id=typeans>{}</code>",
-            if self.provided.is_empty() {
-                htmlescape::encode_minimal(&self.expected.iter().collect::<String>())
-            } else if self.provided == self.expected {
-                provided
-            } else {
-                format!("{provided}<br><span id=typearrow>&darr;</span><br>{expected}")
-            }
-        )
-    }
+// Utility Functions
+fn slice(chars: &[char], start: usize, end: usize) -> String {
+    chars[start..end].iter().collect()
 }

 fn prepare_expected(expected: &str) -> String {
-    let without_av = strip_av_tags(expected);
-    let without_newlines = LINEBREAKS.replace_all(&without_av, " ");
-    let without_html = strip_html(&without_newlines);
-    let without_outer_whitespace = without_html.trim();
-    normalize_to_nfc(without_outer_whitespace).into()
+    let no_av_tags = strip_av_tags(expected);
+    let no_linebreaks = LINEBREAKS.replace_all(&no_av_tags, " ");
+    strip_html(&no_linebreaks).trim().to_string()
 }

-fn prepare_provided(provided: &str) -> String {
-    normalize_to_nfc(provided).into()
+// Render Functions
+fn render_tokens(tokens: &[DiffToken]) -> String {
+    tokens.iter().fold(String::new(), |mut acc, token| {
+        let isolated_text = isolate_leading_mark(&token.text);
+        let encoded_text = htmlescape::encode_minimal(&isolated_text);
+        let class = token.to_class();
+        acc.push_str(&format!("<span class={class}>{encoded_text}</span>"));
+        acc
+    })
+}
+
+/// Prefixes a leading mark character with a non-breaking space to prevent
+/// it from joining the previous token.
+fn isolate_leading_mark(text: &str) -> Cow<str> {
+    if text
+        .chars()
+        .next()
+        .map_or(false, |ch| GeneralCategory::of(ch).is_mark())
+    {
+        format!("\u{a0}{text}").into()
+    } else {
+        text.into()
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+struct DiffTokens {
+    provided_tokens: Vec<DiffToken>,
+    expected_tokens: Vec<DiffToken>,
 }

 #[derive(Debug, PartialEq, Eq)]
@ -128,64 +167,29 @@ struct DiffToken {
 }

 impl DiffToken {
-    fn bad(text: String) -> Self {
-        Self {
-            kind: DiffTokenKind::Bad,
-            text,
-        }
+    fn new(kind: DiffTokenKind, text: String) -> Self {
+        Self { kind, text }
    }

    fn good(text: String) -> Self {
-        Self {
-            kind: DiffTokenKind::Good,
-            text,
+        Self::new(DiffTokenKind::Good, text)
    }
+
+    fn bad(text: String) -> Self {
+        Self::new(DiffTokenKind::Bad, text)
    }

    fn missing(text: String) -> Self {
-        Self {
-            kind: DiffTokenKind::Missing,
-            text,
+        Self::new(DiffTokenKind::Missing, text)
    }
-    }
-}

-#[derive(Debug, PartialEq, Eq)]
-struct DiffOutput {
-    provided: Vec<DiffToken>,
-    expected: Vec<DiffToken>,
-}
-
-pub fn compare_answer(expected: &str, provided: &str) -> String {
-    DiffContext::new(expected, provided).to_html()
-}
-
-fn render_tokens(tokens: &[DiffToken]) -> String {
-    let text_tokens: Vec<_> = tokens
-        .iter()
-        .map(|token| {
-            let text = with_isolated_leading_mark(&token.text);
-            let encoded = htmlescape::encode_minimal(&text);
-            let class = match token.kind {
+    fn to_class(&self) -> &'static str {
+        match self.kind {
            DiffTokenKind::Good => "typeGood",
            DiffTokenKind::Bad => "typeBad",
            DiffTokenKind::Missing => "typeMissed",
-            };
-            format!("<span class={class}>{encoded}</span>")
-        })
-        .collect();
-    text_tokens.join("")
-}
-
-/// If text begins with a mark character, prefix it with a non-breaking
-/// space to prevent the mark from joining to the previous token.
-fn with_isolated_leading_mark(text: &str) -> Cow<str> {
-    if let Some(ch) = text.chars().next() {
-        if GeneralCategory::of(ch).is_mark() {
-            return format!("\u{a0}{text}").into();
        }
    }
-    text.into()
 }

 #[cfg(test)]
@ -205,10 +209,10 @@ mod test {

    #[test]
    fn tokens() {
-        let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
+        let ctx = Diff::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
        let output = ctx.to_tokens();
        assert_eq!(
-            output.provided,
+            output.provided_tokens,
            vec![
                bad("y"),
                good(" ahora q"),
@ -220,7 +224,7 @@ mod test {
            ]
        );
        assert_eq!(
-            output.expected,
+            output.expected_tokens,
            vec![
                missing("¿Y"),
                good(" ahora q"),
@ -235,24 +239,24 @@ mod test {

    #[test]
    fn html_and_media() {
-        let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1  2");
+        let ctx = Diff::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1  2");
        // the spacing is handled by wrapping html output in white-space: pre-wrap
-        assert_eq!(ctx.to_tokens().expected, &[good("1  2")]);
+        assert_eq!(ctx.to_tokens().expected_tokens, &[good("1  2")]);
    }

    #[test]
    fn missed_chars_only_shown_in_provided_when_after_good() {
-        let ctx = DiffContext::new("1", "23");
-        assert_eq!(ctx.to_tokens().provided, &[bad("23")]);
-        let ctx = DiffContext::new("12", "1");
-        assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("-"),]);
+        let ctx = Diff::new("1", "23");
+        assert_eq!(ctx.to_tokens().provided_tokens, &[bad("23")]);
+        let ctx = Diff::new("12", "1");
+        assert_eq!(ctx.to_tokens().provided_tokens, &[good("1"), missing("-"),]);
    }

    #[test]
    fn missed_chars_counted_correctly() {
-        let ctx = DiffContext::new("нос", "нс");
+        let ctx = Diff::new("нос", "нс");
        assert_eq!(
-            ctx.to_tokens().provided,
+            ctx.to_tokens().provided_tokens,
            &[good("н"), missing("-"), good("с")]
        );
    }
@ -260,9 +264,9 @@ mod test {
    #[test]
    fn handles_certain_unicode_as_expected() {
        // this was not parsed as expected with dissimilar 1.0.4
-        let ctx = DiffContext::new("쓰다듬다", "스다뜸다");
+        let ctx = Diff::new("쓰다듬다", "스다뜸다");
        assert_eq!(
-            ctx.to_tokens().provided,
+            ctx.to_tokens().provided_tokens,
            &[bad("스"), good("다"), bad("뜸"), good("다"),]
        );
    }
@ -270,7 +274,7 @@ mod test {
    #[test]
    fn does_not_panic_with_certain_unicode() {
        // this was causing a panic with dissimilar 1.0.4
-        let ctx = DiffContext::new(
+        let ctx = Diff::new(
            "Сущность должна быть ответственна только за одно дело",
            concat!(
                "Single responsibility Сущность выполняет только одну задачу.",
@ -287,13 +291,13 @@ mod test {

    #[test]
    fn empty_input_shows_as_code() {
-        let ctx = DiffContext::new("123", "");
-        assert_eq!(ctx.to_html(), "<code id=typeans>123</code>");
+        let ctx = compare_answer("123", "");
+        assert_eq!(ctx, "<code id=typeans>123</code>");
    }

    #[test]
    fn correct_input_is_collapsed() {
-        let ctx = DiffContext::new("123", "123");
+        let ctx = Diff::new("123", "123");
        assert_eq!(
            ctx.to_html(),
            "<code id=typeans><span class=typeGood>123</span></code>"
@ -302,7 +306,7 @@ mod test {

    #[test]
    fn incorrect_input_is_not_collapsed() {
-        let ctx = DiffContext::new("123", "1123");
+        let ctx = Diff::new("123", "1123");
        assert_eq!(
            ctx.to_html(),
            "<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>&darr;</span><br><span class=typeGood>123</span></code>"