From fa97e954d71d277ef0c2577fa491b3c853f4ca6f Mon Sep 17 00:00:00 2001
From: TRIAEIOU <94647023+TRIAEIOU@users.noreply.github.com>
Date: Sun, 23 Oct 2022 23:32:49 +0200
Subject: [PATCH] Nested clozes and increased cloze meta data
---
pylib/tests/test_models.py | 57 ++---
rslib/src/cloze.rs | 390 ++++++++++++++++++++++++++--------
rslib/src/template_filters.rs | 2 +-
3 files changed, 330 insertions(+), 119 deletions(-)
diff --git a/pylib/tests/test_models.py b/pylib/tests/test_models.py
index 5e45e35bd..b738cd77d 100644
--- a/pylib/tests/test_models.py
+++ b/pylib/tests/test_models.py
@@ -13,8 +13,15 @@ from tests.shared import getEmptyCol
def encode_attribute(s):
+ cmap = {
+ "\"": """,
+ "&": "&",
+ "\n": "
",
+ "<": "<",
+ ">": ">"
+ }
return "".join(
- c if c.isalnum() else "{:X};".format(ord(c)) for c in html.escape(s)
+ c if not c in cmap.keys() else cmap[c] for c in s
)
@@ -185,40 +192,40 @@ def test_cloze():
note["Text"] = "hello {{c1::world}}"
assert col.addNote(note) == 1
assert (
- f'hello [...]'
+ f'hello [...]'
in note.cards()[0].question()
)
- assert 'hello world' in note.cards()[0].answer()
+ assert 'hello world' in note.cards()[0].answer()
# and with a comment
note = col.new_note(m)
note["Text"] = "hello {{c1::world::typical}}"
assert col.addNote(note) == 1
assert (
- f'[typical]'
+ f'[typical]'
in note.cards()[0].question()
)
- assert 'world' in note.cards()[0].answer()
+ assert 'world' in note.cards()[0].answer()
# and with 2 clozes
note = col.new_note(m)
note["Text"] = "hello {{c1::world}} {{c2::bar}}"
assert col.addNote(note) == 2
(c1, c2) = note.cards()
assert (
- f'[...] bar'
+ f'[...] bar'
in c1.question()
)
- assert 'world bar' in c1.answer()
+ assert 'world bar' in c1.answer()
assert (
- f'world [...]'
+ f'world [...]'
in c2.question()
)
- assert 'world bar' in c2.answer()
+ assert 'world bar' in c2.answer()
# if there are multiple answers for a single cloze, they are given in a
# list
note = col.new_note(m)
note["Text"] = "a {{c1::b}} {{c1::c}}"
assert col.addNote(note) == 1
- assert 'b c' in (
+ assert 'b c' in (
note.cards()[0].answer()
)
# if we add another cloze, a card should be generated
@@ -253,23 +260,23 @@ def test_cloze_mathjax():
assert col.addNote(note)
assert len(note.cards()) == 5
assert (
- f'class="cloze" data-cloze="{encode_attribute(q1)}"'
+ f'class="cloze active" data-cloze="{encode_attribute(q1)}"'
in note.cards()[0].question()
)
assert (
- f'class="cloze" data-cloze="{encode_attribute(q2)}"'
+ f'class="cloze active" data-cloze="{encode_attribute(q2)}"'
in note.cards()[1].question()
)
assert (
- f'class="cloze" data-cloze="{encode_attribute(q3)}"'
+ f'class="cloze active" data-cloze="{encode_attribute(q3)}"'
not in note.cards()[2].question()
)
assert (
- f'class="cloze" data-cloze="{encode_attribute(q4)}"'
+ f'class="cloze active" data-cloze="{encode_attribute(q4)}"'
in note.cards()[3].question()
)
assert (
- f'class="cloze" data-cloze="{encode_attribute(q5)}"'
+ f'class="cloze active" data-cloze="{encode_attribute(q5)}"'
in note.cards()[4].question()
)
@@ -280,7 +287,7 @@ def test_cloze_mathjax():
assert (
note.cards()[0]
.question()
- .endswith(r'\(a\) [...] \[ [...] \]')
+ .endswith(r'\(a\) [...] \[ [...] \]')
)
@@ -310,26 +317,26 @@ def test_chained_mods():
col.models.update(m)
note = col.newNote()
- q1 = 'phrase'
- a1 = "sentence"
- q2 = 'en chaine'
- a2 = "chained"
+ a1 = 'phrase'
+ h1 = "sentence"
+ a2 = 'en chaine'
+ h2 = "chained"
note[
"Text"
] = "This {{{{c1::{}::{}}}}} demonstrates {{{{c1::{}::{}}}}} clozes.".format(
- q1,
a1,
- q2,
+ h1,
a2,
+ h2,
)
assert col.addNote(note) == 1
assert (
- f'This [sentence]'
- f' demonstrates [chained] clozes.'
+ f'This [{h1}]'
+ f' demonstrates [{h2}] clozes.'
in note.cards()[0].question()
)
assert (
- f'This phrase demonstrates en chaine clozes.'
+ f'This {a1} demonstrates {a2} clozes.'
in note.cards()[0].answer()
)
diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs
index 0612f7795..347ca03cc 100644
--- a/rslib/src/cloze.rs
+++ b/rslib/src/cloze.rs
@@ -9,18 +9,6 @@ use regex::{Captures, Regex};
use crate::{latex::contains_latex, template::RenderContext, text::strip_html_preserving_entities};
lazy_static! {
- static ref CLOZE: Regex = Regex::new(
- r#"(?xsi)
- \{\{
- c(\d+):: # 1 = cloze number
- (.*?) # 2 = clozed text
- (?:
- ::(.*?) # 3 = optional hint
- )?
- \}\}
- "#
- )
- .unwrap();
static ref MATHJAX: Regex = Regex::new(
r#"(?xsi)
(\\[(\[]) # 1 = mathjax opening tag
@@ -31,14 +19,6 @@ lazy_static! {
.unwrap();
}
-mod cloze_caps {
- // cloze ordinal
- pub const ORD: usize = 1;
- // the occluded text
- pub const TEXT: usize = 2;
- // optional hint
- pub const HINT: usize = 3;
-}
mod mathjax_caps {
pub const OPENING_TAG: usize = 1;
@@ -46,84 +26,213 @@ mod mathjax_caps {
pub const CLOSING_TAG: usize = 3;
}
-pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow {
- let mut cloze_ord_was_in_text = false;
+/// States for cloze parsing state machine
+#[derive(PartialEq, Copy, Clone)]
+enum State {
+ Root,
+ Text,
+ Open,
+ Open2,
+ COpen,
+ Ord,
+ TextOpen1,
+ TextOpen2,
+ TextClose1,
+ HintOpen1,
+ HintOpen2,
+ Hint,
+ HintClose1,
+ Close,
+ Abandon
+}
+/// Struct for storing data for one cloze
+struct Cloze {
+ buf: String,
+ ord_str: String,
+ text: String,
+ hint: String
+}
- let output = CLOZE.replace_all(text, |caps: &Captures| {
- let captured_ord = caps
- .get(cloze_caps::ORD)
- .unwrap()
- .as_str()
- .parse()
- .unwrap_or(0);
-
- let text = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
- if captured_ord != cloze_ord {
- // other cloze deletions are unchanged
- return text;
- } else {
- cloze_ord_was_in_text = true;
+impl Cloze {
+ /// Create empty Cloze struct
+ fn new() -> Self {
+ Self {
+ buf: String::new(),
+ ord_str: String::new(),
+ text: String::new(),
+ hint: String::new()
}
-
- let text_attr;
- let replacement;
- if question {
- text_attr = format!(r#" data-cloze="{}""#, htmlescape::encode_attribute(&text));
- // hint provided?
- if let Some(hint) = caps.get(cloze_caps::HINT) {
- replacement = format!("[{}]", hint.as_str());
- } else {
- replacement = "[...]".to_string();
- }
- } else {
- text_attr = "".to_string();
- replacement = text;
- }
-
- format!(r#"{}"#, text_attr, replacement)
- });
-
- if !cloze_ord_was_in_text {
- return "".into();
}
-
- // if no cloze deletions are found, Anki returns an empty string
- match output {
- Cow::Borrowed(_) => "".into(),
- other => other,
+ /// Create Cloze struct with buf set to c
+ fn new_from(c: char) -> Self {
+ Self {
+ buf: String::from(c),
+ ord_str: String::new(),
+ text: String::new(),
+ hint: String::new()
+ }
}
}
-pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow {
- CLOZE
- .captures_iter(text)
- .filter(|caps| {
- let captured_ord = caps
- .get(cloze_caps::ORD)
- .unwrap()
- .as_str()
- .parse()
- .unwrap_or(0);
+/// Process char for next state
+fn process_char(state: State, c: char) -> State {
+ use self::State::*;
+ match (state, c) {
+ (Root, '{') => Open,
+ (Open, '{') => Open2,
+ (Open, _) => Abandon,
+ (Open2, '{') => Open2,
+ (Open2, 'c') => COpen,
+ (Open2, _) => Abandon,
+ (COpen, '0'..='9') => Ord,
+ (Ord, '0'..='9') => Ord,
+ (Ord, ':') => TextOpen1,
+ (Ord, _) => Abandon,
+ (TextOpen1, ':') => TextOpen2,
+ (TextOpen1, _) => Abandon,
+ (TextOpen2, ':') => HintOpen1,
+ (TextOpen2, '}') => TextClose1,
+ (TextOpen2, _) => Text,
+ (Text, '{') => Open,
+ (Text, ':') => HintOpen1,
+ (Text, '}') => TextClose1,
+ (Text, _) => Text,
+ (HintOpen1, ':') => HintOpen2,
+ (HintOpen1, '}') => TextClose1,
+ (HintOpen1, _) => Text,
+ (HintOpen2, '}') => HintClose1,
+ (HintOpen2, _) => Hint,
+ (Hint, '}') => HintClose1,
+ (Hint, _) => Hint,
+ (TextClose1 | HintClose1, '}') => Close,
+ (TextClose1, _) => Text,
+ (HintClose1, _) => Hint,
+ _ => Root
+ }
+}
- captured_ord == cloze_ord
- })
- .map(|caps| {
- let cloze = if question {
- // hint provided?
- if let Some(hint) = caps.get(cloze_caps::HINT) {
- hint.as_str()
- } else {
- "..."
+/// Minimal encoding of string for storage in attribute (", &, \n, <, >)
+pub fn encode_attribute(text: &str) -> Cow {
+ let mut out = String::new();
+ for c in text.chars() {
+ match c {
+ '"' => out.push_str("""),
+ '&' => out.push_str("&"),
+ '\n' => out.push_str("
"),
+ '<' => out.push_str("<"),
+ '>' => out.push_str(">"),
+ _ => out.push(c)
+ }
+ }
+
+ Cow::from(out)
+}
+
+/// Parse string for clozes and return:
+/// cloze_only == false: resulting HTML string
+/// cloze_only == true: resulting HTML for the cloze texts only
+/// no cloze found: empty string
+fn reveal_clozes(text: &str, cloze_ord: u16, question: bool, cloze_only: bool) -> Cow {
+ let mut state = State::Root;
+ let mut stack: Vec = vec![Cloze::new()];
+ let mut current_found = false;
+
+ for c in text.chars() {
+ let old_state = state;
+ state = process_char(state, c);
+
+ match state {
+ State::Open => stack.push(Cloze::new_from(c)),
+ State::Close => current_found |= close(&mut state, &mut stack, cloze_only, cloze_ord, question),
+ State::Abandon => abandon(&mut state, &mut stack, c),
+ _ => {
+ let last = stack.last_mut().unwrap();
+ last.buf.push(c);
+ match state {
+ State::Root => if !cloze_only {last.text.push(c)},
+ State::Open2 => if old_state == State::Open2 {shift_open(&mut stack)},
+ State::Ord => last.ord_str.push(c),
+ State::Text => last.text.push(c),
+ State::Hint => last.hint.push(c),
+ _ => {}
}
- } else {
- caps.get(cloze_caps::TEXT).unwrap().as_str()
- };
+ }
+ }
+ }
+
+ if !current_found { return Cow::Borrowed("".into()) }
+ if cloze_only { return stack.last().unwrap().text[2..].to_owned().into() }
- cloze
- })
- .collect::>()
- .join(", ")
- .into()
+ if stack.len() > 1 {
+ let cloze = stack.pop().unwrap();
+ stack.last_mut().unwrap().text.push_str(&cloze.buf);
+ }
+ return Cow::from(stack.last().unwrap().text.to_owned());
+
+ // 3 consecutive {{{, shift the first onto the "parent" text
+ fn shift_open(stack: &mut Vec) {
+ let i = stack.len() - 1;
+ let fc = stack[i].buf.remove(0);
+ stack[i - 1].buf.push(fc);
+ }
+
+ // Close cloze and set state
+ fn close(state: &mut State, stack: &mut Vec,
+ cloze_only: bool, cloze_ord: u16, question: bool) -> bool {
+ let cloze = stack.pop().unwrap();
+ *state = if stack.len() > 1 { State::Text } else { State::Root };
+
+ let ordinal = cloze.ord_str.parse::().unwrap();
+ let last = stack.last_mut().unwrap();
+
+ match (cloze_only, question, ordinal == cloze_ord, cloze.hint.is_empty()) {
+ // Cloze text only
+ (true, true, true, true) => last.text.push_str(", ..."),
+ (true, true, true, false) => last.text.push_str(&format!(", {}", &cloze.hint)),
+ (true, _, _, _) => last.text.push_str(&format!(", {}", &cloze.text)),
+
+ // Full cloze
+ // Question - active cloze, no hint
+ (false, true, true, true) => last.text.push_str(
+ &format!(r#"[...]"#,
+ encode_attribute(cloze.text.as_str()), ordinal)),
+ // Question - active cloze, hint
+ (false, true, true, false) => last.text.push_str(
+ &format!(r#"[{}]"#,
+ encode_attribute(cloze.text.as_str()), ordinal, &cloze.hint)),
+ // Question - inactive cloze
+ (false, true, false, _) => last.text.push_str(
+ &format!(r#"{}"#,
+ ordinal, cloze.text.as_str())),
+ // Answer - active cloze
+ (false, false, true, _) => last.text.push_str(
+ &format!(r#"{}"#,
+ ordinal, cloze.text.as_str())),
+ // Answer - inactive cloze
+ (false, false, false, _) => last.text.push_str(
+ &format!(r#"{}"#,
+ ordinal, cloze.text.as_str()))
+ }
+
+ ordinal == cloze_ord
+ }
+
+ // Abandon cloze and set state
+ fn abandon(state: &mut State, stack: &mut Vec, c: char) {
+ let cloze = stack.pop().unwrap();
+ *state = if stack.len() > 1 { State::Text } else { State::Root };
+ let last = stack.last_mut().unwrap();
+ last.text.push_str(&cloze.buf);
+ last.text.push(c);
+ }
+}
+
+pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow {
+ return reveal_clozes(text, cloze_ord, question, false);
+}
+
+pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow {
+ return reveal_clozes(text, cloze_ord, question, true);
}
/// If text contains any LaTeX tags, render the front and back
@@ -144,7 +253,22 @@ pub fn expand_clozes_to_reveal_latex(text: &str) -> String {
}
pub(crate) fn contains_cloze(text: &str) -> bool {
- CLOZE.is_match(text)
+ let mut state = State::Root;
+ let mut stack: Vec = vec![Cloze::new()];
+ let mut i = 0;
+
+ for c in text.chars() {
+ state = process_char(state, c);
+ match state {
+ State::Root => stack[i].text.push(c),
+ State::Open => i += 1,
+ State::Close => return true,
+ State::Abandon => state = if i > 0 { State::Text } else { State::Root },
+ _ => {}
+ }
+ }
+
+ false
}
pub fn cloze_numbers_in_string(html: &str) -> HashSet {
@@ -155,11 +279,31 @@ pub fn cloze_numbers_in_string(html: &str) -> HashSet {
#[allow(clippy::implicit_hasher)]
pub fn add_cloze_numbers_in_string(field: &str, set: &mut HashSet) {
- for cap in CLOZE.captures_iter(field) {
- if let Ok(n) = cap[1].parse() {
- set.insert(n);
+ let mut state = State::Root;
+ let mut stack: Vec = vec![];
+
+ for c in field.chars() {
+ state = process_char(state, c);
+ match state {
+ State::Open => stack.push(String::new()),
+ State::Ord => stack.last_mut().unwrap().push(c),
+ State::Close => drop(set.insert(close(&mut state, &mut stack))),
+ State::Abandon => abandon(&mut state, &mut stack),
+ _ => {}
}
}
+
+ // Close cloze and set state, return ordinal
+ fn close(state: &mut State, stack: &mut Vec) -> u16 {
+ let ord_str = stack.pop().unwrap();
+ *state = if stack.len() > 0 { State::Text } else { State::Root };
+ ord_str.parse::().unwrap()
+ }
+ // Abandon cloze and set state
+ fn abandon(state: &mut State, stack: &mut Vec) {
+ stack.pop();
+ *state = if stack.len() > 0 { State::Text } else { State::Root };
+ }
}
fn strip_html_inside_mathjax(text: &str) -> Cow {
@@ -232,6 +376,66 @@ mod test {
);
}
+ #[test]
+ fn nested_cloze_plain_text() {
+ assert_eq!(
+ strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true).as_ref()),
+ "foo [...]"
+ );
+ assert_eq!(
+ strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false).as_ref()),
+ "foo bar baz"
+ );
+ assert_eq!(
+ strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true).as_ref()),
+ "foo bar [...]"
+ );
+ assert_eq!(
+ strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false).as_ref()),
+ "foo bar baz"
+ );
+ assert_eq!(
+ strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true).as_ref()),
+ "foo [qux]"
+ );
+ assert_eq!(
+ strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false).as_ref()),
+ "foo bar baz"
+ );
+ }
+
+ #[test]
+ fn nested_cloze_html() {
+ assert_eq!(
+ cloze_numbers_in_string("{{c2::te{{c1::s}}}}t{{"),
+ vec![1, 2].into_iter().collect::>()
+ );
+ assert_eq!(
+ reveal_cloze_text("foo {{c1::bar <{{c2::baz}}}}", 1, true),
+ r#"foo [...]"#
+ );
+ assert_eq!(
+ reveal_cloze_text("foo {{c1::bar <{{c2::baz}}}}", 1, false),
+ r#"foo bar <baz"#
+ );
+ assert_eq!(
+ reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true),
+ r#"foo bar [...]"#
+ );
+ assert_eq!(
+ reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false),
+ r#"foo bar baz"#
+ );
+ assert_eq!(
+ reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true),
+ r#"foo [qux]"#
+ );
+ assert_eq!(
+ reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false),
+ r#"foo bar baz"#
+ );
+ }
+
#[test]
fn mathjax_html() {
// escaped angle brackets should be preserved
diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs
index 21b5b99ce..43466a8a6 100644
--- a/rslib/src/template_filters.rs
+++ b/rslib/src/template_filters.rs
@@ -256,7 +256,7 @@ field
assert_eq!(strip_html(&cloze_filter(text, &ctx)).as_ref(), "[...] two");
assert_eq!(
cloze_filter(text, &ctx),
- r#"[...] two"#
+ r#"[...] two"#
);
ctx.card_ord = 1;