diff --git a/proto/backend.proto b/proto/backend.proto index 33010736c..71a68154c 100644 --- a/proto/backend.proto +++ b/proto/backend.proto @@ -18,6 +18,7 @@ message BackendInput { int64 local_minutes_west = 22; string strip_av_tags = 23; ExtractAVTagsIn extract_av_tags = 24; + string expand_clozes_to_reveal_latex = 25; } } @@ -32,6 +33,7 @@ message BackendOutput { sint32 local_minutes_west = 22; string strip_av_tags = 23; ExtractAVTagsOut extract_av_tags = 24; + string expand_clozes_to_reveal_latex = 25; BackendError error = 2047; } diff --git a/pylib/anki/media.py b/pylib/anki/media.py index 68c51a621..3cc54a87e 100644 --- a/pylib/anki/media.py +++ b/pylib/anki/media.py @@ -1,5 +1,8 @@ # Copyright: Ankitects Pty Ltd and contributors # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +from __future__ import annotations + import io import json import os @@ -14,11 +17,11 @@ import urllib.request import zipfile from typing import Any, Callable, List, Optional, Tuple, Union +import anki from anki.consts import * from anki.db import DB, DBError from anki.lang import _ from anki.latex import render_latex -from anki.template import expand_clozes from anki.utils import checksum, isMac, isWin @@ -34,7 +37,7 @@ class MediaManager: regexps = soundRegexps + imgRegexps db: Optional[DB] - def __init__(self, col, server: bool) -> None: + def __init__(self, col: anki.storage._Collection, server: bool) -> None: self.col = col if server: self._dir = None @@ -213,23 +216,21 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); ) -> List[str]: l = [] model = self.col.models.get(mid) - strings: List[str] = [] if model["type"] == MODEL_CLOZE and "{{c" in string: # if the field has clozes in it, we'll need to expand the # possibilities so we can render latex - strings = expand_clozes(string) + strings = self.col.backend.expand_clozes_to_reveal_latex(string) else: - strings = [string] - for string in strings: - # handle latex - string = render_latex(string, model, self.col) - # extract filenames - for reg in self.regexps: - for match in re.finditer(reg, string): - fname = match.group("fname") - isLocal = not re.match("(https?|ftp)://", fname.lower()) - if isLocal or includeRemote: - l.append(fname) + strings = string + # handle latex + string = render_latex(string, model, self.col) + # extract filenames + for reg in self.regexps: + for match in re.finditer(reg, string): + fname = match.group("fname") + isLocal = not re.match("(https?|ftp)://", fname.lower()) + if isLocal or includeRemote: + l.append(fname) return l def transformNames(self, txt: str, func: Callable) -> Any: diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py index 65fd89494..253c31231 100644 --- a/pylib/anki/rsbackend.py +++ b/pylib/anki/rsbackend.py @@ -1,6 +1,7 @@ # Copyright: Ankitects Pty Ltd and contributors # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # pylint: skip-file + from dataclasses import dataclass from typing import Dict, List, Tuple, Union @@ -175,3 +176,8 @@ class RustBackend: native_tags = list(map(av_tag_to_native, out.av_tags)) return out.text, native_tags + + def expand_clozes_to_reveal_latex(self, text: str) -> str: + return self._run_command( + pb.BackendInput(expand_clozes_to_reveal_latex=text) + ).expand_clozes_to_reveal_latex diff --git a/pylib/anki/template.py b/pylib/anki/template.py index 1c667b861..cee9b2d94 100644 --- a/pylib/anki/template.py +++ b/pylib/anki/template.py @@ -28,7 +28,6 @@ template_legacy.py file, using the legacy addHook() system. from __future__ import annotations -import re from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple @@ -221,44 +220,3 @@ def apply_custom_filters( res += field_text return res - - -# Cloze handling -########################################################################## - -# Matches a {{c123::clozed-out text::hint}} Cloze deletion, case-insensitively. -# The regex should be interpolated with a regex number and creates the following -# named groups: -# - tag: The lowercase or uppercase 'c' letter opening the Cloze. -# The c/C difference is only relevant to the legacy code. -# - content: Clozed-out content. -# - hint: Cloze hint, if provided. -clozeReg = r"(?si)\{\{(?Pc)%s::(?P.*?)(::(?P.*?))?\}\}" - -# Constants referring to group names within clozeReg. -CLOZE_REGEX_MATCH_GROUP_TAG = "tag" -CLOZE_REGEX_MATCH_GROUP_CONTENT = "content" -CLOZE_REGEX_MATCH_GROUP_HINT = "hint" - -# used by the media check functionality -def expand_clozes(string: str) -> List[str]: - "Render all clozes in string." - ords = set(re.findall(r"{{c(\d+)::.+?}}", string)) - strings = [] - - def qrepl(m): - if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): - return "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) - else: - return "[...]" - - def arepl(m): - return m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) - - for ord in ords: - s = re.sub(clozeReg % ord, qrepl, string) - s = re.sub(clozeReg % ".+?", arepl, s) - strings.append(s) - strings.append(re.sub(clozeReg % ".+?", arepl, string)) - - return strings diff --git a/rslib/src/backend.rs b/rslib/src/backend.rs index a14ef4d14..04505055a 100644 --- a/rslib/src/backend.rs +++ b/rslib/src/backend.rs @@ -4,6 +4,7 @@ use crate::backend_proto as pt; use crate::backend_proto::backend_input::Value; use crate::backend_proto::RenderedTemplateReplacement; +use crate::cloze::expand_clozes_to_reveal_latex; use crate::err::{AnkiError, Result}; use crate::sched::{local_minutes_west_for_stamp, sched_timing_today}; use crate::template::{ @@ -101,6 +102,9 @@ impl Backend { } Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()), Value::ExtractAvTags(input) => OValue::ExtractAvTags(self.extract_av_tags(input)), + Value::ExpandClozesToRevealLatex(input) => { + OValue::ExpandClozesToRevealLatex(expand_clozes_to_reveal_latex(&input)) + } }) } diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs index c14624c34..d5c7f2f01 100644 --- a/rslib/src/cloze.rs +++ b/rslib/src/cloze.rs @@ -2,7 +2,7 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use crate::template::RenderContext; -use crate::text::strip_html; +use crate::text::{contains_latex, strip_html}; use lazy_static::lazy_static; use regex::Captures; use regex::Regex; @@ -91,6 +91,23 @@ pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow } } +/// If text contains any LaTeX tags, render the front and back +/// of each cloze deletion so that LaTeX can be generated. If +/// no LaTeX is found, returns an empty string. +pub fn expand_clozes_to_reveal_latex(text: &str) -> String { + if !contains_latex(text) { + return "".into(); + } + let ords = cloze_numbers_in_string(text); + let mut buf = String::new(); + for ord in ords { + buf += reveal_cloze_text(text, ord, true).as_ref(); + buf += reveal_cloze_text(text, ord, false).as_ref(); + } + + buf +} + pub fn cloze_numbers_in_string(html: &str) -> HashSet { let mut hash = HashSet::with_capacity(4); for cap in CLOZE.captures_iter(html) { @@ -122,7 +139,8 @@ pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a #[cfg(test)] mod test { - use crate::cloze::cloze_numbers_in_string; + use crate::cloze::{cloze_numbers_in_string, expand_clozes_to_reveal_latex}; + use crate::text::strip_html; use std::collections::HashSet; #[test] @@ -135,5 +153,16 @@ mod test { cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"), vec![1, 2].into_iter().collect::>() ); + + assert_eq!( + expand_clozes_to_reveal_latex("{{c1::foo}} {{c2::bar::baz}}"), + "".to_string() + ); + + let expanded = expand_clozes_to_reveal_latex("[latex]{{c1::foo}} {{c2::bar::baz}}[/latex]"); + let expanded = strip_html(expanded.as_ref()); + assert!(expanded.contains("foo [baz]")); + assert!(expanded.contains("[...] bar")); + assert!(expanded.contains("foo bar")); } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index 1b133b49a..f6be8a106 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -44,6 +44,15 @@ lazy_static! { (.*?) # 3 - field text \[/anki:tts\] "#).unwrap(); + + static ref LATEX: Regex = Regex::new( + r#"(?xsi) + \[latex\](.+?)\[/latex\] # 1 - standard latex + | + \[\$\](.+?)\[/\$\] # 2 - inline math + | + \[\$\$\](.+?)\[/\$\$\] # 3 - math environment + "#).unwrap(); } pub fn strip_html(html: &str) -> Cow { @@ -143,6 +152,10 @@ pub fn strip_html_preserving_image_filenames(html: &str) -> Cow { without_html.into_owned().into() } +pub(crate) fn contains_latex(text: &str) -> bool { + LATEX.is_match(text) +} + #[cfg(test)] mod test { use crate::text::{