Nested clozes and increased cloze meta data

This commit is contained in:
TRIAEIOU 2022-10-23 23:32:49 +02:00
parent 22f54c2c01
commit fa97e954d7
3 changed files with 330 additions and 119 deletions

View file

@ -13,8 +13,15 @@ from tests.shared import getEmptyCol
def encode_attribute(s):
cmap = {
"\"": """,
"&": "&",
"\n": "
",
"<": "&lt;",
">": "&gt;"
}
return "".join(
c if c.isalnum() else "&#x{:X};".format(ord(c)) for c in html.escape(s)
c if not c in cmap.keys() else cmap[c] for c in s
)
@ -185,40 +192,40 @@ def test_cloze():
note["Text"] = "hello {{c1::world}}"
assert col.addNote(note) == 1
assert (
f'hello <span class="cloze" data-cloze="{encode_attribute("world")}">[...]</span>'
f'hello <span class="cloze active" data-cloze="{encode_attribute("world")}" data-ordinal="1">[...]</span>'
in note.cards()[0].question()
)
assert 'hello <span class="cloze">world</span>' in note.cards()[0].answer()
assert 'hello <span class="cloze active" data-ordinal="1">world</span>' in note.cards()[0].answer()
# and with a comment
note = col.new_note(m)
note["Text"] = "hello {{c1::world::typical}}"
assert col.addNote(note) == 1
assert (
f'<span class="cloze" data-cloze="{encode_attribute("world")}">[typical]</span>'
f'<span class="cloze active" data-cloze="{encode_attribute("world")}" data-ordinal="1">[typical]</span>'
in note.cards()[0].question()
)
assert '<span class="cloze">world</span>' in note.cards()[0].answer()
assert '<span class="cloze active" data-ordinal="1">world</span>' in note.cards()[0].answer()
# and with 2 clozes
note = col.new_note(m)
note["Text"] = "hello {{c1::world}} {{c2::bar}}"
assert col.addNote(note) == 2
(c1, c2) = note.cards()
assert (
f'<span class="cloze" data-cloze="{encode_attribute("world")}">[...]</span> bar'
f'<span class="cloze active" data-cloze="{encode_attribute("world")}" data-ordinal="1">[...]</span> <span class="cloze active" data-ordinal="2">bar</span>'
in c1.question()
)
assert '<span class="cloze">world</span> bar' in c1.answer()
assert '<span class="cloze active" data-ordinal="1">world</span> <span class="cloze active" data-ordinal="2">bar</span>' in c1.answer()
assert (
f'world <span class="cloze" data-cloze="{encode_attribute("bar")}">[...]</span>'
f'<span class="cloze" data-ordinal="1">world</span> <span class="cloze active" data-cloze="{encode_attribute("bar")}" data-ordinal="2">[...]</span>'
in c2.question()
)
assert 'world <span class="cloze">bar</span>' in c2.answer()
assert '<span class="cloze" data-ordinal="1">world</span> <span class="cloze active" data-ordinal="2">bar</span>' in c2.answer()
# if there are multiple answers for a single cloze, they are given in a
# list
note = col.new_note(m)
note["Text"] = "a {{c1::b}} {{c1::c}}"
assert col.addNote(note) == 1
assert '<span class="cloze">b</span> <span class="cloze">c</span>' in (
assert '<span class="cloze active" data-ordinal="1">b</span> <span class="cloze active" data-ordinal="1">c</span>' in (
note.cards()[0].answer()
)
# if we add another cloze, a card should be generated
@ -253,23 +260,23 @@ def test_cloze_mathjax():
assert col.addNote(note)
assert len(note.cards()) == 5
assert (
f'class="cloze" data-cloze="{encode_attribute(q1)}"'
f'class="cloze active" data-cloze="{encode_attribute(q1)}"'
in note.cards()[0].question()
)
assert (
f'class="cloze" data-cloze="{encode_attribute(q2)}"'
f'class="cloze active" data-cloze="{encode_attribute(q2)}"'
in note.cards()[1].question()
)
assert (
f'class="cloze" data-cloze="{encode_attribute(q3)}"'
f'class="cloze active" data-cloze="{encode_attribute(q3)}"'
not in note.cards()[2].question()
)
assert (
f'class="cloze" data-cloze="{encode_attribute(q4)}"'
f'class="cloze active" data-cloze="{encode_attribute(q4)}"'
in note.cards()[3].question()
)
assert (
f'class="cloze" data-cloze="{encode_attribute(q5)}"'
f'class="cloze active" data-cloze="{encode_attribute(q5)}"'
in note.cards()[4].question()
)
@ -280,7 +287,7 @@ def test_cloze_mathjax():
assert (
note.cards()[0]
.question()
.endswith(r'\(a\) <span class="cloze" data-cloze="b">[...]</span> \[ [...] \]')
.endswith(r'\(a\) <span class="cloze active" data-cloze="b" data-ordinal="1">[...]</span> \[ [...] \]')
)
@ -310,26 +317,26 @@ def test_chained_mods():
col.models.update(m)
note = col.newNote()
q1 = '<span style="color:red">phrase</span>'
a1 = "<b>sentence</b>"
q2 = '<span style="color:red">en chaine</span>'
a2 = "<i>chained</i>"
a1 = '<span style="color:red">phrase</span>'
h1 = "<b>sentence</b>"
a2 = '<span style="color:red">en chaine</span>'
h2 = "<i>chained</i>"
note[
"Text"
] = "This {{{{c1::{}::{}}}}} demonstrates {{{{c1::{}::{}}}}} clozes.".format(
q1,
a1,
q2,
h1,
a2,
h2,
)
assert col.addNote(note) == 1
assert (
f'This <span class="cloze" data-cloze="{encode_attribute("phrase")}">[sentence]</span>'
f' demonstrates <span class="cloze" data-cloze="{encode_attribute("en chaine")}">[chained]</span> clozes.'
f'This <span class="cloze active" data-cloze="{encode_attribute(a1)}" data-ordinal="1">[{h1}]</span>'
f' demonstrates <span class="cloze active" data-cloze="{encode_attribute(a2)}" data-ordinal="1">[{h2}]</span> clozes.'
in note.cards()[0].question()
)
assert (
f'This <span class="cloze">phrase</span> demonstrates <span class="cloze">en chaine</span> clozes.'
f'This <span class="cloze active" data-ordinal="1">{a1}</span> demonstrates <span class="cloze active" data-ordinal="1">{a2}</span> clozes.'
in note.cards()[0].answer()
)

View file

@ -9,18 +9,6 @@ use regex::{Captures, Regex};
use crate::{latex::contains_latex, template::RenderContext, text::strip_html_preserving_entities};
lazy_static! {
static ref CLOZE: Regex = Regex::new(
r#"(?xsi)
\{\{
c(\d+):: # 1 = cloze number
(.*?) # 2 = clozed text
(?:
::(.*?) # 3 = optional hint
)?
\}\}
"#
)
.unwrap();
static ref MATHJAX: Regex = Regex::new(
r#"(?xsi)
(\\[(\[]) # 1 = mathjax opening tag
@ -31,14 +19,6 @@ lazy_static! {
.unwrap();
}
mod cloze_caps {
// cloze ordinal
pub const ORD: usize = 1;
// the occluded text
pub const TEXT: usize = 2;
// optional hint
pub const HINT: usize = 3;
}
mod mathjax_caps {
pub const OPENING_TAG: usize = 1;
@ -46,84 +26,213 @@ mod mathjax_caps {
pub const CLOSING_TAG: usize = 3;
}
pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
let mut cloze_ord_was_in_text = false;
/// States for cloze parsing state machine
#[derive(PartialEq, Copy, Clone)]
enum State {
Root,
Text,
Open,
Open2,
COpen,
Ord,
TextOpen1,
TextOpen2,
TextClose1,
HintOpen1,
HintOpen2,
Hint,
HintClose1,
Close,
Abandon
}
/// Struct for storing data for one cloze
struct Cloze {
buf: String,
ord_str: String,
text: String,
hint: String
}
let output = CLOZE.replace_all(text, |caps: &Captures| {
let captured_ord = caps
.get(cloze_caps::ORD)
.unwrap()
.as_str()
.parse()
.unwrap_or(0);
let text = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
if captured_ord != cloze_ord {
// other cloze deletions are unchanged
return text;
} else {
cloze_ord_was_in_text = true;
impl Cloze {
/// Create empty Cloze struct
fn new() -> Self {
Self {
buf: String::new(),
ord_str: String::new(),
text: String::new(),
hint: String::new()
}
let text_attr;
let replacement;
if question {
text_attr = format!(r#" data-cloze="{}""#, htmlescape::encode_attribute(&text));
// hint provided?
if let Some(hint) = caps.get(cloze_caps::HINT) {
replacement = format!("[{}]", hint.as_str());
} else {
replacement = "[...]".to_string();
}
} else {
text_attr = "".to_string();
replacement = text;
}
format!(r#"<span class="cloze"{}>{}</span>"#, text_attr, replacement)
});
if !cloze_ord_was_in_text {
return "".into();
}
// if no cloze deletions are found, Anki returns an empty string
match output {
Cow::Borrowed(_) => "".into(),
other => other,
/// Create Cloze struct with buf set to c
fn new_from(c: char) -> Self {
Self {
buf: String::from(c),
ord_str: String::new(),
text: String::new(),
hint: String::new()
}
}
}
pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
CLOZE
.captures_iter(text)
.filter(|caps| {
let captured_ord = caps
.get(cloze_caps::ORD)
.unwrap()
.as_str()
.parse()
.unwrap_or(0);
/// Process char for next state
fn process_char(state: State, c: char) -> State {
use self::State::*;
match (state, c) {
(Root, '{') => Open,
(Open, '{') => Open2,
(Open, _) => Abandon,
(Open2, '{') => Open2,
(Open2, 'c') => COpen,
(Open2, _) => Abandon,
(COpen, '0'..='9') => Ord,
(Ord, '0'..='9') => Ord,
(Ord, ':') => TextOpen1,
(Ord, _) => Abandon,
(TextOpen1, ':') => TextOpen2,
(TextOpen1, _) => Abandon,
(TextOpen2, ':') => HintOpen1,
(TextOpen2, '}') => TextClose1,
(TextOpen2, _) => Text,
(Text, '{') => Open,
(Text, ':') => HintOpen1,
(Text, '}') => TextClose1,
(Text, _) => Text,
(HintOpen1, ':') => HintOpen2,
(HintOpen1, '}') => TextClose1,
(HintOpen1, _) => Text,
(HintOpen2, '}') => HintClose1,
(HintOpen2, _) => Hint,
(Hint, '}') => HintClose1,
(Hint, _) => Hint,
(TextClose1 | HintClose1, '}') => Close,
(TextClose1, _) => Text,
(HintClose1, _) => Hint,
_ => Root
}
}
captured_ord == cloze_ord
})
.map(|caps| {
let cloze = if question {
// hint provided?
if let Some(hint) = caps.get(cloze_caps::HINT) {
hint.as_str()
} else {
"..."
/// Minimal encoding of string for storage in attribute (", &, \n, <, >)
pub fn encode_attribute(text: &str) -> Cow<str> {
let mut out = String::new();
for c in text.chars() {
match c {
'"' => out.push_str("&quot;"),
'&' => out.push_str("&amp;"),
'\n' => out.push_str("&NewLine;"),
'<' => out.push_str("&lt;"),
'>' => out.push_str("&gt;"),
_ => out.push(c)
}
}
Cow::from(out)
}
/// Parse string for clozes and return:
/// cloze_only == false: resulting HTML string
/// cloze_only == true: resulting HTML for the cloze texts only
/// no cloze found: empty string
fn reveal_clozes(text: &str, cloze_ord: u16, question: bool, cloze_only: bool) -> Cow<str> {
let mut state = State::Root;
let mut stack: Vec<Cloze> = vec![Cloze::new()];
let mut current_found = false;
for c in text.chars() {
let old_state = state;
state = process_char(state, c);
match state {
State::Open => stack.push(Cloze::new_from(c)),
State::Close => current_found |= close(&mut state, &mut stack, cloze_only, cloze_ord, question),
State::Abandon => abandon(&mut state, &mut stack, c),
_ => {
let last = stack.last_mut().unwrap();
last.buf.push(c);
match state {
State::Root => if !cloze_only {last.text.push(c)},
State::Open2 => if old_state == State::Open2 {shift_open(&mut stack)},
State::Ord => last.ord_str.push(c),
State::Text => last.text.push(c),
State::Hint => last.hint.push(c),
_ => {}
}
} else {
caps.get(cloze_caps::TEXT).unwrap().as_str()
};
}
}
}
if !current_found { return Cow::Borrowed("".into()) }
if cloze_only { return stack.last().unwrap().text[2..].to_owned().into() }
cloze
})
.collect::<Vec<_>>()
.join(", ")
.into()
if stack.len() > 1 {
let cloze = stack.pop().unwrap();
stack.last_mut().unwrap().text.push_str(&cloze.buf);
}
return Cow::from(stack.last().unwrap().text.to_owned());
// 3 consecutive {{{, shift the first onto the "parent" text
fn shift_open(stack: &mut Vec<Cloze>) {
let i = stack.len() - 1;
let fc = stack[i].buf.remove(0);
stack[i - 1].buf.push(fc);
}
// Close cloze and set state
fn close(state: &mut State, stack: &mut Vec<Cloze>,
cloze_only: bool, cloze_ord: u16, question: bool) -> bool {
let cloze = stack.pop().unwrap();
*state = if stack.len() > 1 { State::Text } else { State::Root };
let ordinal = cloze.ord_str.parse::<u16>().unwrap();
let last = stack.last_mut().unwrap();
match (cloze_only, question, ordinal == cloze_ord, cloze.hint.is_empty()) {
// Cloze text only
(true, true, true, true) => last.text.push_str(", ..."),
(true, true, true, false) => last.text.push_str(&format!(", {}", &cloze.hint)),
(true, _, _, _) => last.text.push_str(&format!(", {}", &cloze.text)),
// Full cloze
// Question - active cloze, no hint
(false, true, true, true) => last.text.push_str(
&format!(r#"<span class="cloze active" data-text="{}" data-ordinal="{}">[...]</span>"#,
encode_attribute(cloze.text.as_str()), ordinal)),
// Question - active cloze, hint
(false, true, true, false) => last.text.push_str(
&format!(r#"<span class="cloze active" data-text="{}" data-ordinal="{}">[{}]</span>"#,
encode_attribute(cloze.text.as_str()), ordinal, &cloze.hint)),
// Question - inactive cloze
(false, true, false, _) => last.text.push_str(
&format!(r#"<span class="cloze" data-ordinal="{}">{}</span>"#,
ordinal, cloze.text.as_str())),
// Answer - active cloze
(false, false, true, _) => last.text.push_str(
&format!(r#"<span class="cloze active" data-ordinal="{}">{}</span>"#,
ordinal, cloze.text.as_str())),
// Answer - inactive cloze
(false, false, false, _) => last.text.push_str(
&format!(r#"<span class="cloze" data-ordinal="{}">{}</span>"#,
ordinal, cloze.text.as_str()))
}
ordinal == cloze_ord
}
// Abandon cloze and set state
fn abandon(state: &mut State, stack: &mut Vec<Cloze>, c: char) {
let cloze = stack.pop().unwrap();
*state = if stack.len() > 1 { State::Text } else { State::Root };
let last = stack.last_mut().unwrap();
last.text.push_str(&cloze.buf);
last.text.push(c);
}
}
pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
return reveal_clozes(text, cloze_ord, question, false);
}
pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
return reveal_clozes(text, cloze_ord, question, true);
}
/// If text contains any LaTeX tags, render the front and back
@ -144,7 +253,22 @@ pub fn expand_clozes_to_reveal_latex(text: &str) -> String {
}
pub(crate) fn contains_cloze(text: &str) -> bool {
CLOZE.is_match(text)
let mut state = State::Root;
let mut stack: Vec<Cloze> = vec![Cloze::new()];
let mut i = 0;
for c in text.chars() {
state = process_char(state, c);
match state {
State::Root => stack[i].text.push(c),
State::Open => i += 1,
State::Close => return true,
State::Abandon => state = if i > 0 { State::Text } else { State::Root },
_ => {}
}
}
false
}
pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
@ -155,11 +279,31 @@ pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
#[allow(clippy::implicit_hasher)]
pub fn add_cloze_numbers_in_string(field: &str, set: &mut HashSet<u16>) {
for cap in CLOZE.captures_iter(field) {
if let Ok(n) = cap[1].parse() {
set.insert(n);
let mut state = State::Root;
let mut stack: Vec<String> = vec![];
for c in field.chars() {
state = process_char(state, c);
match state {
State::Open => stack.push(String::new()),
State::Ord => stack.last_mut().unwrap().push(c),
State::Close => drop(set.insert(close(&mut state, &mut stack))),
State::Abandon => abandon(&mut state, &mut stack),
_ => {}
}
}
// Close cloze and set state, return ordinal
fn close(state: &mut State, stack: &mut Vec<String>) -> u16 {
let ord_str = stack.pop().unwrap();
*state = if stack.len() > 0 { State::Text } else { State::Root };
ord_str.parse::<u16>().unwrap()
}
// Abandon cloze and set state
fn abandon(state: &mut State, stack: &mut Vec<String>) {
stack.pop();
*state = if stack.len() > 0 { State::Text } else { State::Root };
}
}
fn strip_html_inside_mathjax(text: &str) -> Cow<str> {
@ -232,6 +376,66 @@ mod test {
);
}
#[test]
fn nested_cloze_plain_text() {
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true).as_ref()),
"foo [...]"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false).as_ref()),
"foo bar baz"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true).as_ref()),
"foo bar [...]"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false).as_ref()),
"foo bar baz"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true).as_ref()),
"foo [qux]"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false).as_ref()),
"foo bar baz"
);
}
#[test]
fn nested_cloze_html() {
assert_eq!(
cloze_numbers_in_string("{{c2::te{{c1::s}}}}t{{"),
vec![1, 2].into_iter().collect::<HashSet<u16>>()
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar &lt;{{c2::baz}}}}", 1, true),
r#"foo <span class="cloze active" data-text="bar &amp;lt;&lt;span class=&quot;cloze&quot; data-ordinal=&quot;2&quot;&gt;baz&lt;/span&gt;" data-ordinal="1">[...]</span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar &lt;{{c2::baz}}}}", 1, false),
r#"foo <span class="cloze active" data-ordinal="1">bar &lt;<span class="cloze" data-ordinal="2">baz</span></span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true),
r#"foo <span class="cloze" data-ordinal="1">bar <span class="cloze active" data-text="baz" data-ordinal="2">[...]</span></span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false),
r#"foo <span class="cloze" data-ordinal="1">bar <span class="cloze active" data-ordinal="2">baz</span></span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true),
r#"foo <span class="cloze active" data-text="bar &lt;span class=&quot;cloze&quot; data-ordinal=&quot;2&quot;&gt;baz&lt;/span&gt;" data-ordinal="1">[qux]</span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false),
r#"foo <span class="cloze active" data-ordinal="1">bar <span class="cloze" data-ordinal="2">baz</span></span>"#
);
}
#[test]
fn mathjax_html() {
// escaped angle brackets should be preserved

View file

@ -256,7 +256,7 @@ field</a>
assert_eq!(strip_html(&cloze_filter(text, &ctx)).as_ref(), "[...] two");
assert_eq!(
cloze_filter(text, &ctx),
r#"<span class="cloze" data-cloze="one">[...]</span> two"#
r#"<span class="cloze active" data-text="one" data-ordinal="1">[...]</span> <span class="cloze" data-ordinal="2">two</span>"#
);
ctx.card_ord = 1;