diff --git a/pylib/tests/test_models.py b/pylib/tests/test_models.py index 5e45e35bd..b738cd77d 100644 --- a/pylib/tests/test_models.py +++ b/pylib/tests/test_models.py @@ -13,8 +13,15 @@ from tests.shared import getEmptyCol def encode_attribute(s): + cmap = { + "\"": """, + "&": "&", + "\n": " ", + "<": "<", + ">": ">" + } return "".join( - c if c.isalnum() else "&#x{:X};".format(ord(c)) for c in html.escape(s) + c if not c in cmap.keys() else cmap[c] for c in s ) @@ -185,40 +192,40 @@ def test_cloze(): note["Text"] = "hello {{c1::world}}" assert col.addNote(note) == 1 assert ( - f'hello [...]' + f'hello [...]' in note.cards()[0].question() ) - assert 'hello world' in note.cards()[0].answer() + assert 'hello world' in note.cards()[0].answer() # and with a comment note = col.new_note(m) note["Text"] = "hello {{c1::world::typical}}" assert col.addNote(note) == 1 assert ( - f'[typical]' + f'[typical]' in note.cards()[0].question() ) - assert 'world' in note.cards()[0].answer() + assert 'world' in note.cards()[0].answer() # and with 2 clozes note = col.new_note(m) note["Text"] = "hello {{c1::world}} {{c2::bar}}" assert col.addNote(note) == 2 (c1, c2) = note.cards() assert ( - f'[...] bar' + f'[...] bar' in c1.question() ) - assert 'world bar' in c1.answer() + assert 'world bar' in c1.answer() assert ( - f'world [...]' + f'world [...]' in c2.question() ) - assert 'world bar' in c2.answer() + assert 'world bar' in c2.answer() # if there are multiple answers for a single cloze, they are given in a # list note = col.new_note(m) note["Text"] = "a {{c1::b}} {{c1::c}}" assert col.addNote(note) == 1 - assert 'b c' in ( + assert 'b c' in ( note.cards()[0].answer() ) # if we add another cloze, a card should be generated @@ -253,23 +260,23 @@ def test_cloze_mathjax(): assert col.addNote(note) assert len(note.cards()) == 5 assert ( - f'class="cloze" data-cloze="{encode_attribute(q1)}"' + f'class="cloze active" data-cloze="{encode_attribute(q1)}"' in note.cards()[0].question() ) assert ( - f'class="cloze" data-cloze="{encode_attribute(q2)}"' + f'class="cloze active" data-cloze="{encode_attribute(q2)}"' in note.cards()[1].question() ) assert ( - f'class="cloze" data-cloze="{encode_attribute(q3)}"' + f'class="cloze active" data-cloze="{encode_attribute(q3)}"' not in note.cards()[2].question() ) assert ( - f'class="cloze" data-cloze="{encode_attribute(q4)}"' + f'class="cloze active" data-cloze="{encode_attribute(q4)}"' in note.cards()[3].question() ) assert ( - f'class="cloze" data-cloze="{encode_attribute(q5)}"' + f'class="cloze active" data-cloze="{encode_attribute(q5)}"' in note.cards()[4].question() ) @@ -280,7 +287,7 @@ def test_cloze_mathjax(): assert ( note.cards()[0] .question() - .endswith(r'\(a\) [...] \[ [...] \]') + .endswith(r'\(a\) [...] \[ [...] \]') ) @@ -310,26 +317,26 @@ def test_chained_mods(): col.models.update(m) note = col.newNote() - q1 = 'phrase' - a1 = "sentence" - q2 = 'en chaine' - a2 = "chained" + a1 = 'phrase' + h1 = "sentence" + a2 = 'en chaine' + h2 = "chained" note[ "Text" ] = "This {{{{c1::{}::{}}}}} demonstrates {{{{c1::{}::{}}}}} clozes.".format( - q1, a1, - q2, + h1, a2, + h2, ) assert col.addNote(note) == 1 assert ( - f'This [sentence]' - f' demonstrates [chained] clozes.' + f'This [{h1}]' + f' demonstrates [{h2}] clozes.' in note.cards()[0].question() ) assert ( - f'This phrase demonstrates en chaine clozes.' + f'This {a1} demonstrates {a2} clozes.' in note.cards()[0].answer() ) diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs index 0612f7795..347ca03cc 100644 --- a/rslib/src/cloze.rs +++ b/rslib/src/cloze.rs @@ -9,18 +9,6 @@ use regex::{Captures, Regex}; use crate::{latex::contains_latex, template::RenderContext, text::strip_html_preserving_entities}; lazy_static! { - static ref CLOZE: Regex = Regex::new( - r#"(?xsi) - \{\{ - c(\d+):: # 1 = cloze number - (.*?) # 2 = clozed text - (?: - ::(.*?) # 3 = optional hint - )? - \}\} - "# - ) - .unwrap(); static ref MATHJAX: Regex = Regex::new( r#"(?xsi) (\\[(\[]) # 1 = mathjax opening tag @@ -31,14 +19,6 @@ lazy_static! { .unwrap(); } -mod cloze_caps { - // cloze ordinal - pub const ORD: usize = 1; - // the occluded text - pub const TEXT: usize = 2; - // optional hint - pub const HINT: usize = 3; -} mod mathjax_caps { pub const OPENING_TAG: usize = 1; @@ -46,84 +26,213 @@ mod mathjax_caps { pub const CLOSING_TAG: usize = 3; } -pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow { - let mut cloze_ord_was_in_text = false; +/// States for cloze parsing state machine +#[derive(PartialEq, Copy, Clone)] +enum State { + Root, + Text, + Open, + Open2, + COpen, + Ord, + TextOpen1, + TextOpen2, + TextClose1, + HintOpen1, + HintOpen2, + Hint, + HintClose1, + Close, + Abandon +} +/// Struct for storing data for one cloze +struct Cloze { + buf: String, + ord_str: String, + text: String, + hint: String +} - let output = CLOZE.replace_all(text, |caps: &Captures| { - let captured_ord = caps - .get(cloze_caps::ORD) - .unwrap() - .as_str() - .parse() - .unwrap_or(0); - - let text = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned(); - if captured_ord != cloze_ord { - // other cloze deletions are unchanged - return text; - } else { - cloze_ord_was_in_text = true; +impl Cloze { + /// Create empty Cloze struct + fn new() -> Self { + Self { + buf: String::new(), + ord_str: String::new(), + text: String::new(), + hint: String::new() } - - let text_attr; - let replacement; - if question { - text_attr = format!(r#" data-cloze="{}""#, htmlescape::encode_attribute(&text)); - // hint provided? - if let Some(hint) = caps.get(cloze_caps::HINT) { - replacement = format!("[{}]", hint.as_str()); - } else { - replacement = "[...]".to_string(); - } - } else { - text_attr = "".to_string(); - replacement = text; - } - - format!(r#"{}"#, text_attr, replacement) - }); - - if !cloze_ord_was_in_text { - return "".into(); } - - // if no cloze deletions are found, Anki returns an empty string - match output { - Cow::Borrowed(_) => "".into(), - other => other, + /// Create Cloze struct with buf set to c + fn new_from(c: char) -> Self { + Self { + buf: String::from(c), + ord_str: String::new(), + text: String::new(), + hint: String::new() + } } } -pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow { - CLOZE - .captures_iter(text) - .filter(|caps| { - let captured_ord = caps - .get(cloze_caps::ORD) - .unwrap() - .as_str() - .parse() - .unwrap_or(0); +/// Process char for next state +fn process_char(state: State, c: char) -> State { + use self::State::*; + match (state, c) { + (Root, '{') => Open, + (Open, '{') => Open2, + (Open, _) => Abandon, + (Open2, '{') => Open2, + (Open2, 'c') => COpen, + (Open2, _) => Abandon, + (COpen, '0'..='9') => Ord, + (Ord, '0'..='9') => Ord, + (Ord, ':') => TextOpen1, + (Ord, _) => Abandon, + (TextOpen1, ':') => TextOpen2, + (TextOpen1, _) => Abandon, + (TextOpen2, ':') => HintOpen1, + (TextOpen2, '}') => TextClose1, + (TextOpen2, _) => Text, + (Text, '{') => Open, + (Text, ':') => HintOpen1, + (Text, '}') => TextClose1, + (Text, _) => Text, + (HintOpen1, ':') => HintOpen2, + (HintOpen1, '}') => TextClose1, + (HintOpen1, _) => Text, + (HintOpen2, '}') => HintClose1, + (HintOpen2, _) => Hint, + (Hint, '}') => HintClose1, + (Hint, _) => Hint, + (TextClose1 | HintClose1, '}') => Close, + (TextClose1, _) => Text, + (HintClose1, _) => Hint, + _ => Root + } +} - captured_ord == cloze_ord - }) - .map(|caps| { - let cloze = if question { - // hint provided? - if let Some(hint) = caps.get(cloze_caps::HINT) { - hint.as_str() - } else { - "..." +/// Minimal encoding of string for storage in attribute (", &, \n, <, >) +pub fn encode_attribute(text: &str) -> Cow { + let mut out = String::new(); + for c in text.chars() { + match c { + '"' => out.push_str("""), + '&' => out.push_str("&"), + '\n' => out.push_str(" "), + '<' => out.push_str("<"), + '>' => out.push_str(">"), + _ => out.push(c) + } + } + + Cow::from(out) +} + +/// Parse string for clozes and return: +/// cloze_only == false: resulting HTML string +/// cloze_only == true: resulting HTML for the cloze texts only +/// no cloze found: empty string +fn reveal_clozes(text: &str, cloze_ord: u16, question: bool, cloze_only: bool) -> Cow { + let mut state = State::Root; + let mut stack: Vec = vec![Cloze::new()]; + let mut current_found = false; + + for c in text.chars() { + let old_state = state; + state = process_char(state, c); + + match state { + State::Open => stack.push(Cloze::new_from(c)), + State::Close => current_found |= close(&mut state, &mut stack, cloze_only, cloze_ord, question), + State::Abandon => abandon(&mut state, &mut stack, c), + _ => { + let last = stack.last_mut().unwrap(); + last.buf.push(c); + match state { + State::Root => if !cloze_only {last.text.push(c)}, + State::Open2 => if old_state == State::Open2 {shift_open(&mut stack)}, + State::Ord => last.ord_str.push(c), + State::Text => last.text.push(c), + State::Hint => last.hint.push(c), + _ => {} } - } else { - caps.get(cloze_caps::TEXT).unwrap().as_str() - }; + } + } + } + + if !current_found { return Cow::Borrowed("".into()) } + if cloze_only { return stack.last().unwrap().text[2..].to_owned().into() } - cloze - }) - .collect::>() - .join(", ") - .into() + if stack.len() > 1 { + let cloze = stack.pop().unwrap(); + stack.last_mut().unwrap().text.push_str(&cloze.buf); + } + return Cow::from(stack.last().unwrap().text.to_owned()); + + // 3 consecutive {{{, shift the first onto the "parent" text + fn shift_open(stack: &mut Vec) { + let i = stack.len() - 1; + let fc = stack[i].buf.remove(0); + stack[i - 1].buf.push(fc); + } + + // Close cloze and set state + fn close(state: &mut State, stack: &mut Vec, + cloze_only: bool, cloze_ord: u16, question: bool) -> bool { + let cloze = stack.pop().unwrap(); + *state = if stack.len() > 1 { State::Text } else { State::Root }; + + let ordinal = cloze.ord_str.parse::().unwrap(); + let last = stack.last_mut().unwrap(); + + match (cloze_only, question, ordinal == cloze_ord, cloze.hint.is_empty()) { + // Cloze text only + (true, true, true, true) => last.text.push_str(", ..."), + (true, true, true, false) => last.text.push_str(&format!(", {}", &cloze.hint)), + (true, _, _, _) => last.text.push_str(&format!(", {}", &cloze.text)), + + // Full cloze + // Question - active cloze, no hint + (false, true, true, true) => last.text.push_str( + &format!(r#"[...]"#, + encode_attribute(cloze.text.as_str()), ordinal)), + // Question - active cloze, hint + (false, true, true, false) => last.text.push_str( + &format!(r#"[{}]"#, + encode_attribute(cloze.text.as_str()), ordinal, &cloze.hint)), + // Question - inactive cloze + (false, true, false, _) => last.text.push_str( + &format!(r#"{}"#, + ordinal, cloze.text.as_str())), + // Answer - active cloze + (false, false, true, _) => last.text.push_str( + &format!(r#"{}"#, + ordinal, cloze.text.as_str())), + // Answer - inactive cloze + (false, false, false, _) => last.text.push_str( + &format!(r#"{}"#, + ordinal, cloze.text.as_str())) + } + + ordinal == cloze_ord + } + + // Abandon cloze and set state + fn abandon(state: &mut State, stack: &mut Vec, c: char) { + let cloze = stack.pop().unwrap(); + *state = if stack.len() > 1 { State::Text } else { State::Root }; + let last = stack.last_mut().unwrap(); + last.text.push_str(&cloze.buf); + last.text.push(c); + } +} + +pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow { + return reveal_clozes(text, cloze_ord, question, false); +} + +pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow { + return reveal_clozes(text, cloze_ord, question, true); } /// If text contains any LaTeX tags, render the front and back @@ -144,7 +253,22 @@ pub fn expand_clozes_to_reveal_latex(text: &str) -> String { } pub(crate) fn contains_cloze(text: &str) -> bool { - CLOZE.is_match(text) + let mut state = State::Root; + let mut stack: Vec = vec![Cloze::new()]; + let mut i = 0; + + for c in text.chars() { + state = process_char(state, c); + match state { + State::Root => stack[i].text.push(c), + State::Open => i += 1, + State::Close => return true, + State::Abandon => state = if i > 0 { State::Text } else { State::Root }, + _ => {} + } + } + + false } pub fn cloze_numbers_in_string(html: &str) -> HashSet { @@ -155,11 +279,31 @@ pub fn cloze_numbers_in_string(html: &str) -> HashSet { #[allow(clippy::implicit_hasher)] pub fn add_cloze_numbers_in_string(field: &str, set: &mut HashSet) { - for cap in CLOZE.captures_iter(field) { - if let Ok(n) = cap[1].parse() { - set.insert(n); + let mut state = State::Root; + let mut stack: Vec = vec![]; + + for c in field.chars() { + state = process_char(state, c); + match state { + State::Open => stack.push(String::new()), + State::Ord => stack.last_mut().unwrap().push(c), + State::Close => drop(set.insert(close(&mut state, &mut stack))), + State::Abandon => abandon(&mut state, &mut stack), + _ => {} } } + + // Close cloze and set state, return ordinal + fn close(state: &mut State, stack: &mut Vec) -> u16 { + let ord_str = stack.pop().unwrap(); + *state = if stack.len() > 0 { State::Text } else { State::Root }; + ord_str.parse::().unwrap() + } + // Abandon cloze and set state + fn abandon(state: &mut State, stack: &mut Vec) { + stack.pop(); + *state = if stack.len() > 0 { State::Text } else { State::Root }; + } } fn strip_html_inside_mathjax(text: &str) -> Cow { @@ -232,6 +376,66 @@ mod test { ); } + #[test] + fn nested_cloze_plain_text() { + assert_eq!( + strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true).as_ref()), + "foo [...]" + ); + assert_eq!( + strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false).as_ref()), + "foo bar baz" + ); + assert_eq!( + strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true).as_ref()), + "foo bar [...]" + ); + assert_eq!( + strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false).as_ref()), + "foo bar baz" + ); + assert_eq!( + strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true).as_ref()), + "foo [qux]" + ); + assert_eq!( + strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false).as_ref()), + "foo bar baz" + ); + } + + #[test] + fn nested_cloze_html() { + assert_eq!( + cloze_numbers_in_string("{{c2::te{{c1::s}}}}t{{"), + vec![1, 2].into_iter().collect::>() + ); + assert_eq!( + reveal_cloze_text("foo {{c1::bar <{{c2::baz}}}}", 1, true), + r#"foo [...]"# + ); + assert_eq!( + reveal_cloze_text("foo {{c1::bar <{{c2::baz}}}}", 1, false), + r#"foo bar <baz"# + ); + assert_eq!( + reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true), + r#"foo bar [...]"# + ); + assert_eq!( + reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false), + r#"foo bar baz"# + ); + assert_eq!( + reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true), + r#"foo [qux]"# + ); + assert_eq!( + reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false), + r#"foo bar baz"# + ); + } + #[test] fn mathjax_html() { // escaped angle brackets should be preserved diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs index 21b5b99ce..43466a8a6 100644 --- a/rslib/src/template_filters.rs +++ b/rslib/src/template_filters.rs @@ -256,7 +256,7 @@ field assert_eq!(strip_html(&cloze_filter(text, &ctx)).as_ref(), "[...] two"); assert_eq!( cloze_filter(text, &ctx), - r#"[...] two"# + r#"[...] two"# ); ctx.card_ord = 1;