Nested clozes and increased cloze meta data (#2141)

* Nested clozes and increased cloze meta data

* Update contributors

* This reverts commit 3423df73f8.

* Update CONTRIBUTORS

* Formating

* Formating

* Formating

* Formating

* Formating

* Formating

* Formating

* Formating

* Code refactor

* Formating

* Formating

* Formating

* Formating and dead code

* Correct test case

* Remove Hint and Close storage of token string

* Update

* Formating

* Formating

* Formating

* Use write! instead of .push_str(&format).

* Formating
This commit is contained in:
TRIAEIOU 2022-12-19 03:03:15 +01:00 committed by GitHub
parent f41a7a8125
commit 9901ae428a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 384 additions and 121 deletions

View file

@ -106,6 +106,7 @@ Bart Louwers <bart.git@emeel.net>
Sam Penny <github.com/sam1penny>
Yutsuten <mateus.etto@gmail.com>
Zoom <zoomrmc+git@gmail.com>
TRIAEIOU <github.com/TRIAEIOU>
Stefan Kangas <stefankangas@gmail.com>
********************

View file

@ -185,41 +185,53 @@ def test_cloze():
note["Text"] = "hello {{c1::world}}"
assert col.addNote(note) == 1
assert (
f'hello <span class="cloze" data-cloze="{encode_attribute("world")}">[...]</span>'
f'hello <span class="cloze" data-cloze="{encode_attribute("world")}" data-ordinal="1">[...]</span>'
in note.cards()[0].question()
)
assert 'hello <span class="cloze">world</span>' in note.cards()[0].answer()
assert (
'hello <span class="cloze" data-ordinal="1">world</span>'
in note.cards()[0].answer()
)
# and with a comment
note = col.new_note(m)
note["Text"] = "hello {{c1::world::typical}}"
assert col.addNote(note) == 1
assert (
f'<span class="cloze" data-cloze="{encode_attribute("world")}">[typical]</span>'
f'<span class="cloze" data-cloze="{encode_attribute("world")}" data-ordinal="1">[typical]</span>'
in note.cards()[0].question()
)
assert '<span class="cloze">world</span>' in note.cards()[0].answer()
assert (
'<span class="cloze" data-ordinal="1">world</span>' in note.cards()[0].answer()
)
# and with 2 clozes
note = col.new_note(m)
note["Text"] = "hello {{c1::world}} {{c2::bar}}"
assert col.addNote(note) == 2
(c1, c2) = note.cards()
assert (
f'<span class="cloze" data-cloze="{encode_attribute("world")}">[...]</span> bar'
f'<span class="cloze" data-cloze="{encode_attribute("world")}" data-ordinal="1">[...]</span> <span class="cloze-inactive" data-ordinal="2">bar</span>'
in c1.question()
)
assert '<span class="cloze">world</span> bar' in c1.answer()
assert (
f'world <span class="cloze" data-cloze="{encode_attribute("bar")}">[...]</span>'
'<span class="cloze" data-ordinal="1">world</span> <span class="cloze-inactive" data-ordinal="2">bar</span>'
in c1.answer()
)
assert (
f'<span class="cloze-inactive" data-ordinal="1">world</span> <span class="cloze" data-cloze="{encode_attribute("bar")}" data-ordinal="2">[...]</span>'
in c2.question()
)
assert 'world <span class="cloze">bar</span>' in c2.answer()
assert (
'<span class="cloze-inactive" data-ordinal="1">world</span> <span class="cloze" data-ordinal="2">bar</span>'
in c2.answer()
)
# if there are multiple answers for a single cloze, they are given in a
# list
note = col.new_note(m)
note["Text"] = "a {{c1::b}} {{c1::c}}"
assert col.addNote(note) == 1
assert '<span class="cloze">b</span> <span class="cloze">c</span>' in (
note.cards()[0].answer()
assert (
'<span class="cloze" data-ordinal="1">b</span> <span class="cloze" data-ordinal="1">c</span>'
in (note.cards()[0].answer())
)
# if we add another cloze, a card should be generated
cnt = col.card_count()
@ -280,7 +292,9 @@ def test_cloze_mathjax():
assert (
note.cards()[0]
.question()
.endswith(r'\(a\) <span class="cloze" data-cloze="b">[...]</span> \[ [...] \]')
.endswith(
r'\(a\) <span class="cloze" data-cloze="b" data-ordinal="1">[...]</span> \[ [...] \]'
)
)
@ -310,26 +324,26 @@ def test_chained_mods():
col.models.update(m)
note = col.newNote()
q1 = '<span style="color:red">phrase</span>'
a1 = "<b>sentence</b>"
q2 = '<span style="color:red">en chaine</span>'
a2 = "<i>chained</i>"
a1 = '<span style="color:red">phrase</span>'
h1 = "<b>sentence</b>"
a2 = '<span style="color:red">en chaine</span>'
h2 = "<i>chained</i>"
note[
"Text"
] = "This {{{{c1::{}::{}}}}} demonstrates {{{{c1::{}::{}}}}} clozes.".format(
q1,
a1,
q2,
h1,
a2,
h2,
)
assert col.addNote(note) == 1
assert (
f'This <span class="cloze" data-cloze="{encode_attribute("phrase")}">[sentence]</span>'
f' demonstrates <span class="cloze" data-cloze="{encode_attribute("en chaine")}">[chained]</span> clozes.'
'This <span class="cloze" data-cloze="phrase" data-ordinal="1">[sentence]</span>'
f' demonstrates <span class="cloze" data-cloze="{encode_attribute("en chaine")}" data-ordinal="1">[chained]</span> clozes.'
in note.cards()[0].question()
)
assert (
f'This <span class="cloze">phrase</span> demonstrates <span class="cloze">en chaine</span> clozes.'
'This <span class="cloze" data-ordinal="1">phrase</span> demonstrates <span class="cloze" data-ordinal="1">en chaine</span> clozes.'
in note.cards()[0].answer()
)

View file

@ -1,26 +1,20 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{borrow::Cow, collections::HashSet};
use htmlescape::encode_attribute;
use lazy_static::lazy_static;
use nom::{
branch::alt,
bytes::complete::{tag, take_while},
combinator::map,
IResult,
};
use regex::{Captures, Regex};
use std::{borrow::Cow, collections::HashSet, fmt::Write};
use crate::{latex::contains_latex, template::RenderContext, text::strip_html_preserving_entities};
lazy_static! {
static ref CLOZE: Regex = Regex::new(
r#"(?xsi)
\{\{
c(\d+):: # 1 = cloze number
(.*?) # 2 = clozed text
(?:
::(.*?) # 3 = optional hint
)?
\}\}
"#
)
.unwrap();
static ref MATHJAX: Regex = Regex::new(
r#"(?xsi)
(\\[(\[]) # 1 = mathjax opening tag
@ -31,99 +25,276 @@ lazy_static! {
.unwrap();
}
mod cloze_caps {
// cloze ordinal
pub const ORD: usize = 1;
// the occluded text
pub const TEXT: usize = 2;
// optional hint
pub const HINT: usize = 3;
}
mod mathjax_caps {
pub const OPENING_TAG: usize = 1;
pub const INNER_TEXT: usize = 2;
pub const CLOSING_TAG: usize = 3;
}
pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
let mut cloze_ord_was_in_text = false;
#[derive(Debug)]
enum Token<'a> {
OpenCloze(u16),
Text(&'a str),
CloseCloze,
}
let output = CLOZE.replace_all(text, |caps: &Captures| {
let captured_ord = caps
.get(cloze_caps::ORD)
.unwrap()
.as_str()
.parse()
.unwrap_or(0);
let text = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
if captured_ord != cloze_ord {
// other cloze deletions are unchanged
return text;
} else {
cloze_ord_was_in_text = true;
/// Tokenize string
fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
fn open_cloze(text: &str) -> IResult<&str, Token> {
// opening brackets and 'c'
let (text, _opening_brackets_and_c) = tag("{{c")(text)?;
// following number
let (text, digits) = take_while(|c: char| c.is_ascii_digit())(text)?;
let digits: u16 = match digits.parse() {
Ok(digits) => digits,
Err(_) => {
// not a valid number; fail to recognize
return Err(nom::Err::Error(nom::error::make_error(
text,
nom::error::ErrorKind::Digit,
)));
}
};
// ::
let (text, _colons) = tag("::")(text)?;
Ok((text, Token::OpenCloze(digits)))
}
let text_attr;
let replacement;
fn close_cloze(text: &str) -> IResult<&str, Token> {
map(tag("}}"), |_| Token::CloseCloze)(text)
}
/// Match a run of text until an open/close marker is encountered.
fn normal_text(text: &str) -> IResult<&str, Token> {
if text.is_empty() {
return Err(nom::Err::Error(nom::error::make_error(
text,
nom::error::ErrorKind::Eof,
)));
}
let mut index = 0;
let mut other_token = alt((open_cloze, close_cloze));
while other_token(&text[index..]).is_err() && index < text.len() {
index += 1;
}
Ok((&text[index..], Token::Text(&text[0..index])))
}
std::iter::from_fn(move || {
if text.is_empty() {
None
} else {
let (remaining_text, token) =
alt((open_cloze, close_cloze, normal_text))(text).unwrap();
text = remaining_text;
Some(token)
}
})
}
#[derive(Debug)]
enum TextOrCloze<'a> {
Text(&'a str),
Cloze(ExtractedCloze<'a>),
}
#[derive(Debug)]
struct ExtractedCloze<'a> {
ordinal: u16,
nodes: Vec<TextOrCloze<'a>>,
hint: Option<&'a str>,
}
impl ExtractedCloze<'_> {
/// Return the cloze's hint, or "..." if none was provided.
fn hint(&self) -> &str {
self.hint.unwrap_or("...")
}
fn clozed_text(&self) -> Cow<str> {
// happy efficient path?
if self.nodes.len() == 1 {
if let TextOrCloze::Text(text) = self.nodes.last().unwrap() {
return (*text).into();
}
}
let mut buf = String::new();
for node in &self.nodes {
match node {
TextOrCloze::Text(text) => buf.push_str(text),
TextOrCloze::Cloze(cloze) => buf.push_str(&cloze.clozed_text()),
}
}
buf.into()
}
}
fn parse_text_with_clozes(text: &str) -> Vec<TextOrCloze<'_>> {
let mut open_clozes: Vec<ExtractedCloze> = vec![];
let mut output = vec![];
for token in tokenize(text) {
match token {
Token::OpenCloze(ordinal) => open_clozes.push(ExtractedCloze {
ordinal,
nodes: Vec::with_capacity(1), // common case
hint: None,
}),
Token::Text(mut text) => {
if let Some(cloze) = open_clozes.last_mut() {
// extract hint if found
if let Some((head, tail)) = text.split_once("::") {
text = head;
cloze.hint = Some(tail);
}
cloze.nodes.push(TextOrCloze::Text(text));
} else {
output.push(TextOrCloze::Text(text));
}
}
Token::CloseCloze => {
// take the currently active cloze
if let Some(cloze) = open_clozes.pop() {
let target = if let Some(outer_cloze) = open_clozes.last_mut() {
// and place it into the cloze layer above
&mut outer_cloze.nodes
} else {
// or the top level if no other clozes active
&mut output
};
target.push(TextOrCloze::Cloze(cloze));
} else {
// closing marker outside of any clozes
output.push(TextOrCloze::Text("}}"))
}
}
}
}
output
}
fn reveal_cloze_text_in_nodes(
node: &TextOrCloze,
cloze_ord: u16,
question: bool,
output: &mut Vec<String>,
) {
if let TextOrCloze::Cloze(cloze) = node {
if cloze.ordinal == cloze_ord {
if question {
text_attr = format!(r#" data-cloze="{}""#, htmlescape::encode_attribute(&text));
// hint provided?
if let Some(hint) = caps.get(cloze_caps::HINT) {
replacement = format!("[{}]", hint.as_str());
output.push(cloze.hint().into())
} else {
replacement = "[...]".to_string();
output.push(cloze.clozed_text().into())
}
}
for node in &cloze.nodes {
reveal_cloze_text_in_nodes(node, cloze_ord, question, output);
}
}
}
fn reveal_cloze(
cloze: &ExtractedCloze,
cloze_ord: u16,
question: bool,
active_cloze_found_in_text: &mut bool,
buf: &mut String,
) {
let active = cloze.ordinal == cloze_ord;
*active_cloze_found_in_text |= active;
match (question, active) {
(true, true) => {
// question side with active cloze; all inner content is elided
let mut content_buf = String::new();
for node in &cloze.nodes {
match node {
TextOrCloze::Text(text) => content_buf.push_str(text),
TextOrCloze::Cloze(cloze) => reveal_cloze(
cloze,
cloze_ord,
question,
active_cloze_found_in_text,
&mut content_buf,
),
}
}
write!(
buf,
r#"<span class="cloze" data-cloze="{}" data-ordinal="{}">[{}]</span>"#,
encode_attribute(&content_buf),
cloze.ordinal,
cloze.hint()
)
.unwrap();
}
(false, true) => {
write!(
buf,
r#"<span class="cloze" data-ordinal="{}">"#,
cloze.ordinal
)
.unwrap();
for node in &cloze.nodes {
match node {
TextOrCloze::Text(text) => buf.push_str(text),
TextOrCloze::Cloze(cloze) => {
reveal_cloze(cloze, cloze_ord, question, active_cloze_found_in_text, buf)
}
}
}
buf.push_str("</span>");
}
(_, false) => {
// question or answer side inactive cloze; text shown, children may be active
write!(
buf,
r#"<span class="cloze-inactive" data-ordinal="{}">"#,
cloze.ordinal
)
.unwrap();
for node in &cloze.nodes {
match node {
TextOrCloze::Text(text) => buf.push_str(text),
TextOrCloze::Cloze(cloze) => {
reveal_cloze(cloze, cloze_ord, question, active_cloze_found_in_text, buf)
}
}
}
buf.push_str("</span>")
}
}
}
pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
let mut buf = String::new();
let mut active_cloze_found_in_text = false;
for node in &parse_text_with_clozes(text) {
match node {
// top-level text is indiscriminately added
TextOrCloze::Text(text) => buf.push_str(text),
TextOrCloze::Cloze(cloze) => reveal_cloze(
cloze,
cloze_ord,
question,
&mut active_cloze_found_in_text,
&mut buf,
),
}
}
if active_cloze_found_in_text {
buf.into()
} else {
text_attr = "".to_string();
replacement = text;
}
format!(r#"<span class="cloze"{}>{}</span>"#, text_attr, replacement)
});
if !cloze_ord_was_in_text {
return "".into();
}
// if no cloze deletions are found, Anki returns an empty string
match output {
Cow::Borrowed(_) => "".into(),
other => other,
Cow::from("")
}
}
pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
CLOZE
.captures_iter(text)
.filter(|caps| {
let captured_ord = caps
.get(cloze_caps::ORD)
.unwrap()
.as_str()
.parse()
.unwrap_or(0);
captured_ord == cloze_ord
})
.map(|caps| {
let cloze = if question {
// hint provided?
if let Some(hint) = caps.get(cloze_caps::HINT) {
hint.as_str()
} else {
"..."
let mut output = Vec::new();
for node in &parse_text_with_clozes(text) {
reveal_cloze_text_in_nodes(node, cloze_ord, question, &mut output);
}
} else {
caps.get(cloze_caps::TEXT).unwrap().as_str()
};
cloze
})
.collect::<Vec<_>>()
.join(", ")
.into()
output.join(", ").into()
}
/// If text contains any LaTeX tags, render the front and back
@ -144,7 +315,9 @@ pub fn expand_clozes_to_reveal_latex(text: &str) -> String {
}
pub(crate) fn contains_cloze(text: &str) -> bool {
CLOZE.is_match(text)
parse_text_with_clozes(text)
.iter()
.any(|node| matches!(node, TextOrCloze::Cloze(_)))
}
pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
@ -153,13 +326,18 @@ pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
set
}
fn add_cloze_numbers_in_text_with_clozes(nodes: &[TextOrCloze], set: &mut HashSet<u16>) {
for node in nodes {
if let TextOrCloze::Cloze(cloze) = node {
set.insert(cloze.ordinal);
add_cloze_numbers_in_text_with_clozes(&cloze.nodes, set);
}
}
}
#[allow(clippy::implicit_hasher)]
pub fn add_cloze_numbers_in_string(field: &str, set: &mut HashSet<u16>) {
for cap in CLOZE.captures_iter(field) {
if let Ok(n) = cap[1].parse() {
set.insert(n);
}
}
add_cloze_numbers_in_text_with_clozes(&parse_text_with_clozes(field), set)
}
fn strip_html_inside_mathjax(text: &str) -> Cow<str> {
@ -232,6 +410,76 @@ mod test {
);
}
#[test]
fn nested_cloze_plain_text() {
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true).as_ref()),
"foo [...]"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false).as_ref()),
"foo bar baz"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true).as_ref()),
"foo bar [...]"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false).as_ref()),
"foo bar baz"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true).as_ref()),
"foo [qux]"
);
assert_eq!(
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false).as_ref()),
"foo bar baz"
);
}
#[test]
fn nested_cloze_html() {
assert_eq!(
cloze_numbers_in_string("{{c2::te{{c1::s}}}}t{{"),
vec![1, 2].into_iter().collect::<HashSet<u16>>()
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true),
format!(
r#"foo <span class="cloze" data-cloze="{}" data-ordinal="1">[...]</span>"#,
htmlescape::encode_attribute(
r#"bar <span class="cloze-inactive" data-ordinal="2">baz</span>"#
)
)
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false),
r#"foo <span class="cloze" data-ordinal="1">bar <span class="cloze-inactive" data-ordinal="2">baz</span></span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true),
r#"foo <span class="cloze-inactive" data-ordinal="1">bar <span class="cloze" data-cloze="baz" data-ordinal="2">[...]</span></span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false),
r#"foo <span class="cloze-inactive" data-ordinal="1">bar <span class="cloze" data-ordinal="2">baz</span></span>"#
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true),
format!(
r#"foo <span class="cloze" data-cloze="{}" data-ordinal="1">[qux]</span>"#,
htmlescape::encode_attribute(
r#"bar <span class="cloze-inactive" data-ordinal="2">baz</span>"#
)
)
);
assert_eq!(
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false),
r#"foo <span class="cloze" data-ordinal="1">bar <span class="cloze-inactive" data-ordinal="2">baz</span></span>"#
);
}
#[test]
fn mathjax_html() {
// escaped angle brackets should be preserved

View file

@ -256,7 +256,7 @@ field</a>
assert_eq!(strip_html(&cloze_filter(text, &ctx)).as_ref(), "[...] two");
assert_eq!(
cloze_filter(text, &ctx),
r#"<span class="cloze" data-cloze="one">[...]</span> two"#
r#"<span class="cloze" data-cloze="one" data-ordinal="1">[...]</span> <span class="cloze-inactive" data-ordinal="2">two</span>"#
);
ctx.card_ord = 1;