reuse reveal_cloze_text() for LaTeX cloze expansion

This commit is contained in:
Damien Elmes 2020-01-28 07:40:44 +10:00
parent 9ad80f4d2c
commit c075191697
7 changed files with 72 additions and 59 deletions

View file

@ -18,6 +18,7 @@ message BackendInput {
int64 local_minutes_west = 22;
string strip_av_tags = 23;
ExtractAVTagsIn extract_av_tags = 24;
string expand_clozes_to_reveal_latex = 25;
}
}
@ -32,6 +33,7 @@ message BackendOutput {
sint32 local_minutes_west = 22;
string strip_av_tags = 23;
ExtractAVTagsOut extract_av_tags = 24;
string expand_clozes_to_reveal_latex = 25;
BackendError error = 2047;
}

View file

@ -1,5 +1,8 @@
# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
from __future__ import annotations
import io
import json
import os
@ -14,11 +17,11 @@ import urllib.request
import zipfile
from typing import Any, Callable, List, Optional, Tuple, Union
import anki
from anki.consts import *
from anki.db import DB, DBError
from anki.lang import _
from anki.latex import render_latex
from anki.template import expand_clozes
from anki.utils import checksum, isMac, isWin
@ -34,7 +37,7 @@ class MediaManager:
regexps = soundRegexps + imgRegexps
db: Optional[DB]
def __init__(self, col, server: bool) -> None:
def __init__(self, col: anki.storage._Collection, server: bool) -> None:
self.col = col
if server:
self._dir = None
@ -213,23 +216,21 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
) -> List[str]:
l = []
model = self.col.models.get(mid)
strings: List[str] = []
if model["type"] == MODEL_CLOZE and "{{c" in string:
# if the field has clozes in it, we'll need to expand the
# possibilities so we can render latex
strings = expand_clozes(string)
strings = self.col.backend.expand_clozes_to_reveal_latex(string)
else:
strings = [string]
for string in strings:
# handle latex
string = render_latex(string, model, self.col)
# extract filenames
for reg in self.regexps:
for match in re.finditer(reg, string):
fname = match.group("fname")
isLocal = not re.match("(https?|ftp)://", fname.lower())
if isLocal or includeRemote:
l.append(fname)
strings = string
# handle latex
string = render_latex(string, model, self.col)
# extract filenames
for reg in self.regexps:
for match in re.finditer(reg, string):
fname = match.group("fname")
isLocal = not re.match("(https?|ftp)://", fname.lower())
if isLocal or includeRemote:
l.append(fname)
return l
def transformNames(self, txt: str, func: Callable) -> Any:

View file

@ -1,6 +1,7 @@
# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
# pylint: skip-file
from dataclasses import dataclass
from typing import Dict, List, Tuple, Union
@ -175,3 +176,8 @@ class RustBackend:
native_tags = list(map(av_tag_to_native, out.av_tags))
return out.text, native_tags
def expand_clozes_to_reveal_latex(self, text: str) -> str:
return self._run_command(
pb.BackendInput(expand_clozes_to_reveal_latex=text)
).expand_clozes_to_reveal_latex

View file

@ -28,7 +28,6 @@ template_legacy.py file, using the legacy addHook() system.
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
@ -221,44 +220,3 @@ def apply_custom_filters(
res += field_text
return res
# Cloze handling
##########################################################################
# Matches a {{c123::clozed-out text::hint}} Cloze deletion, case-insensitively.
# The regex should be interpolated with a regex number and creates the following
# named groups:
# - tag: The lowercase or uppercase 'c' letter opening the Cloze.
# The c/C difference is only relevant to the legacy code.
# - content: Clozed-out content.
# - hint: Cloze hint, if provided.
clozeReg = r"(?si)\{\{(?P<tag>c)%s::(?P<content>.*?)(::(?P<hint>.*?))?\}\}"
# Constants referring to group names within clozeReg.
CLOZE_REGEX_MATCH_GROUP_TAG = "tag"
CLOZE_REGEX_MATCH_GROUP_CONTENT = "content"
CLOZE_REGEX_MATCH_GROUP_HINT = "hint"
# used by the media check functionality
def expand_clozes(string: str) -> List[str]:
"Render all clozes in string."
ords = set(re.findall(r"{{c(\d+)::.+?}}", string))
strings = []
def qrepl(m):
if m.group(CLOZE_REGEX_MATCH_GROUP_HINT):
return "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT)
else:
return "[...]"
def arepl(m):
return m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT)
for ord in ords:
s = re.sub(clozeReg % ord, qrepl, string)
s = re.sub(clozeReg % ".+?", arepl, s)
strings.append(s)
strings.append(re.sub(clozeReg % ".+?", arepl, string))
return strings

View file

@ -4,6 +4,7 @@
use crate::backend_proto as pt;
use crate::backend_proto::backend_input::Value;
use crate::backend_proto::RenderedTemplateReplacement;
use crate::cloze::expand_clozes_to_reveal_latex;
use crate::err::{AnkiError, Result};
use crate::sched::{local_minutes_west_for_stamp, sched_timing_today};
use crate::template::{
@ -101,6 +102,9 @@ impl Backend {
}
Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()),
Value::ExtractAvTags(input) => OValue::ExtractAvTags(self.extract_av_tags(input)),
Value::ExpandClozesToRevealLatex(input) => {
OValue::ExpandClozesToRevealLatex(expand_clozes_to_reveal_latex(&input))
}
})
}

View file

@ -2,7 +2,7 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::template::RenderContext;
use crate::text::strip_html;
use crate::text::{contains_latex, strip_html};
use lazy_static::lazy_static;
use regex::Captures;
use regex::Regex;
@ -91,6 +91,23 @@ pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str>
}
}
/// If text contains any LaTeX tags, render the front and back
/// of each cloze deletion so that LaTeX can be generated. If
/// no LaTeX is found, returns an empty string.
pub fn expand_clozes_to_reveal_latex(text: &str) -> String {
if !contains_latex(text) {
return "".into();
}
let ords = cloze_numbers_in_string(text);
let mut buf = String::new();
for ord in ords {
buf += reveal_cloze_text(text, ord, true).as_ref();
buf += reveal_cloze_text(text, ord, false).as_ref();
}
buf
}
pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
let mut hash = HashSet::with_capacity(4);
for cap in CLOZE.captures_iter(html) {
@ -122,7 +139,8 @@ pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a
#[cfg(test)]
mod test {
use crate::cloze::cloze_numbers_in_string;
use crate::cloze::{cloze_numbers_in_string, expand_clozes_to_reveal_latex};
use crate::text::strip_html;
use std::collections::HashSet;
#[test]
@ -135,5 +153,16 @@ mod test {
cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"),
vec![1, 2].into_iter().collect::<HashSet<u16>>()
);
assert_eq!(
expand_clozes_to_reveal_latex("{{c1::foo}} {{c2::bar::baz}}"),
"".to_string()
);
let expanded = expand_clozes_to_reveal_latex("[latex]{{c1::foo}} {{c2::bar::baz}}[/latex]");
let expanded = strip_html(expanded.as_ref());
assert!(expanded.contains("foo [baz]"));
assert!(expanded.contains("[...] bar"));
assert!(expanded.contains("foo bar"));
}
}

View file

@ -44,6 +44,15 @@ lazy_static! {
(.*?) # 3 - field text
\[/anki:tts\]
"#).unwrap();
static ref LATEX: Regex = Regex::new(
r#"(?xsi)
\[latex\](.+?)\[/latex\] # 1 - standard latex
|
\[\$\](.+?)\[/\$\] # 2 - inline math
|
\[\$\$\](.+?)\[/\$\$\] # 3 - math environment
"#).unwrap();
}
pub fn strip_html(html: &str) -> Cow<str> {
@ -143,6 +152,10 @@ pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
without_html.into_owned().into()
}
pub(crate) fn contains_latex(text: &str) -> bool {
LATEX.is_match(text)
}
#[cfg(test)]
mod test {
use crate::text::{