From 1f2e00690f6d0a7fc0722d179f55232affd2a50f Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Wed, 8 Jan 2020 17:15:46 +1000 Subject: [PATCH] move the rest of Anki's code out of pystache In the process of factoring out the field filtering, the "extra" and "fullname" args are just passed in as a blank string now. Extra was functionality that allowed a field modifier to be defined as "filtername(arg1,arg2):field", and fullname was the name of the field including any provided field modifiers. From grepping through the add-ons on AnkiWeb, neither appears to have been used. --- pylib/anki/collection.py | 4 +- pylib/anki/media.py | 28 +---- pylib/anki/template/template.py | 141 +------------------------ pylib/anki/template2.py | 182 +++++++++++++++++++++++++++++++- pylib/tests/test_template.py | 11 +- 5 files changed, 191 insertions(+), 175 deletions(-) diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 8634e9230..6ff2992d4 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -30,7 +30,7 @@ from anki.rsbackend import RustBackend from anki.sched import Scheduler as V1Scheduler from anki.schedv2 import Scheduler as V2Scheduler from anki.tags import TagManager -from anki.template2 import renderFromFieldMap +from anki.template2 import render_from_field_map from anki.types import NoteType, QAData, Template from anki.utils import ( devMode, @@ -666,7 +666,7 @@ where c.nid = n.id and c.id in %s group by nid""" fields = runFilter("mungeFields", fields, model, data, self) # render fields - qatext = renderFromFieldMap(qfmt, afmt, fields, card_ord) + qatext = render_from_field_map(qfmt, afmt, fields, card_ord) ret: Dict[str, Any] = dict(q=qatext[0], a=qatext[1], id=card_id) # allow add-ons to modify the generated result diff --git a/pylib/anki/media.py b/pylib/anki/media.py index 684e711f5..df99e1880 100644 --- a/pylib/anki/media.py +++ b/pylib/anki/media.py @@ -18,6 +18,7 @@ from anki.consts import * from anki.db import DB, DBError from anki.lang import _ from anki.latex import mungeQA +from anki.template2 import expand_clozes from anki.utils import checksum, isMac, isWin @@ -216,7 +217,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); if model["type"] == MODEL_CLOZE and "{{c" in string: # if the field has clozes in it, we'll need to expand the # possibilities so we can render latex - strings = self._expandClozes(string) + strings = expand_clozes(string) else: strings = [string] for string in strings: @@ -231,31 +232,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); l.append(fname) return l - def _expandClozes(self, string: str) -> List[str]: - ords = set(re.findall(r"{{c(\d+)::.+?}}", string)) - strings = [] - from anki.template.template import ( - clozeReg, - CLOZE_REGEX_MATCH_GROUP_HINT, - CLOZE_REGEX_MATCH_GROUP_CONTENT, - ) - - def qrepl(m): - if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): - return "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) - else: - return "[...]" - - def arepl(m): - return m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) - - for ord in ords: - s = re.sub(clozeReg % ord, qrepl, string) - s = re.sub(clozeReg % ".+?", arepl, s) - strings.append(s) - strings.append(re.sub(clozeReg % ".+?", arepl, string)) - return strings - def transformNames(self, txt: str, func: Callable) -> Any: for reg in self.regexps: txt = re.sub(reg, func, txt) diff --git a/pylib/anki/template/template.py b/pylib/anki/template/template.py index 2c978cd8a..127c2092e 100644 --- a/pylib/anki/template/template.py +++ b/pylib/anki/template/template.py @@ -1,21 +1,7 @@ import re from typing import Any, Callable, Dict, Pattern -from anki.hooks import runFilter -from anki.utils import stripHTML, stripHTMLMedia - -# Matches a {{c123::clozed-out text::hint}} Cloze deletion, case-insensitively. -# The regex should be interpolated with a regex number and creates the following -# named groups: -# - tag: The lowercase or uppercase 'c' letter opening the Cloze. -# - content: Clozed-out content. -# - hint: Cloze hint, if provided. -clozeReg = r"(?si)\{\{(?Pc)%s::(?P.*?)(::(?P.*?))?\}\}" - -# Constants referring to group names within clozeReg. -CLOZE_REGEX_MATCH_GROUP_TAG = "tag" -CLOZE_REGEX_MATCH_GROUP_CONTENT = "content" -CLOZE_REGEX_MATCH_GROUP_HINT = "hint" +from anki.template2 import apply_field_filters, field_is_not_empty modifiers: Dict[str, Callable] = {} @@ -102,9 +88,8 @@ class Template: replacer = "" inverted = section[2] == "^" - if val: - val = stripHTMLMedia(val).strip() - if (val and not inverted) or (not val and inverted): + nonempty = field_is_not_empty(val or "") + if (nonempty and not inverted) or (not nonempty and inverted): replacer = inner template = template.replace(section, replacer) @@ -156,125 +141,7 @@ class Template: if txt is None: return "{unknown field %s}" % tag_name - # Since 'text:' and other mods can affect html on which Anki relies to - # process clozes, we need to make sure clozes are always - # treated after all the other mods, regardless of how they're specified - # in the template, so that {{cloze:text: == {{text:cloze: - # For type:, we return directly since no other mod than cloze (or other - # pre-defined mods) can be present and those are treated separately - mods.reverse() - mods.sort(key=lambda s: not s == "type") - - for mod in mods: - # built-in modifiers - if mod == "text": - # strip html - txt = stripHTML(txt) if txt else "" - elif mod == "type": - # type answer field; convert it to [[type:...]] for the gui code - # to process - return "[[%s]]" % tag_name - elif mod.startswith("cq-") or mod.startswith("ca-"): - # cloze deletion - mod, extra = mod.split("-") - txt = self.clozeText(txt, extra, mod[1]) if txt and extra else "" - else: - # hook-based field modifier - m = re.search(r"^(.*?)(?:\((.*)\))?$", mod) - if not m: - return "invalid field modifier " + mod - mod, extra = m.groups() - txt = runFilter( - "fmod_" + mod, txt or "", extra or "", context, tag, tag_name - ) - if txt is None: - return "{unknown field %s}" % tag_name - return txt - - @classmethod - def clozeText(cls, txt: str, ord: str, type: str) -> str: - """Processe the given Cloze deletion within the given template.""" - reg = clozeReg - currentRegex = clozeReg % ord - if not re.search(currentRegex, txt): - # No Cloze deletion was found in txt. - return "" - txt = cls._removeFormattingFromMathjax(txt, ord) - - def repl(m): - # replace chosen cloze with type - if type == "q": - if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): - buf = "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) - else: - buf = "[...]" - else: - buf = m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) - # uppercase = no formatting - if m.group(CLOZE_REGEX_MATCH_GROUP_TAG) == "c": - buf = "%s" % buf - return buf - - txt = re.sub(currentRegex, repl, txt) - # and display other clozes normally - return re.sub(reg % r"\d+", "\\2", txt) - - @classmethod - def _removeFormattingFromMathjax(cls, txt, ord) -> str: - """Marks all clozes within MathJax to prevent formatting them. - - Active Cloze deletions within MathJax should not be wrapped inside - a Cloze , as that would interfere with MathJax. - - This method finds all Cloze deletions number `ord` in `txt` which are - inside MathJax inline or display formulas, and replaces their opening - '{{c123' with a '{{C123'. The clozeText method interprets the upper-case - C as "don't wrap this Cloze in a ". - """ - creg = clozeReg.replace("(?si)", "") - - # Scan the string left to right. - # After a MathJax opening - \( or \[ - flip in_mathjax to True. - # After a MathJax closing - \) or \] - flip in_mathjax to False. - # When a Cloze pattern number `ord` is found and we are in MathJax, - # replace its '{{c' with '{{C'. - # - # TODO: Report mismatching opens/closes - e.g. '\(\]' - # TODO: Report errors in this method better than printing to stdout. - # flags in middle of expression deprecated - in_mathjax = False - - def replace(match): - nonlocal in_mathjax - if match.group("mathjax_open"): - if in_mathjax: - print("MathJax opening found while already in MathJax") - in_mathjax = True - elif match.group("mathjax_close"): - if not in_mathjax: - print("MathJax close found while not in MathJax") - in_mathjax = False - elif match.group("cloze"): - if in_mathjax: - return match.group(0).replace( - "{{c{}::".format(ord), "{{C{}::".format(ord) - ) - else: - print("Unexpected: no expected capture group is present") - return match.group(0) - - # The following regex matches one of: - # - MathJax opening - # - MathJax close - # - Cloze deletion number `ord` - return re.sub( - r"(?si)" - r"(?P\\[([])|" - r"(?P\\[\])])|" - r"(?P" + (creg % ord) + ")", - replace, - txt, - ) + return apply_field_filters(tag, txt, context, mods) @modifier("=") def render_delimiter(self, tag_name=None, context=None) -> str: diff --git a/pylib/anki/template2.py b/pylib/anki/template2.py index f8a4a5b3c..abb0fc67e 100644 --- a/pylib/anki/template2.py +++ b/pylib/anki/template2.py @@ -9,15 +9,16 @@ connected to pystache. It may be renamed in the future. from __future__ import annotations import re -from typing import Any, Callable, Dict, Tuple +from typing import Any, Callable, Dict, List, Tuple import anki -from anki.hooks import addHook +from anki.hooks import addHook, runFilter from anki.lang import _ from anki.sound import stripSounds +from anki.utils import stripHTML, stripHTMLMedia -def renderFromFieldMap( +def render_from_field_map( qfmt: str, afmt: str, fields: Dict[str, str], card_ord: int ) -> Tuple[str, str]: "Renders the provided templates, returning rendered q & a text." @@ -35,10 +36,185 @@ def renderFromFieldMap( return qtext, atext +def field_is_not_empty(field_text: str) -> bool: + # fixme: this is an overkill way of preventing a field with only + # a
or
from appearing non-empty + field_text = stripHTMLMedia(field_text) + + return field_text.strip() != "" + + # Filters ########################################################################## +def apply_field_filters( + field_name: str, field_text: str, fields: Dict[str, str], filters: List[str] +) -> str: + """Apply filters to field text, returning modified text.""" + _sort_filters(filters) + + for filter in filters: + # built-in modifiers + if filter == "text": + # strip html + field_text = stripHTML(field_text) if field_text else "" + elif filter == "type": + # type answer field; convert it to [[type:...]] for the gui code + # to process + field_text = "[[type:%s]]" % field_name + elif filter.startswith("cq-") or filter.startswith("ca-"): + # cloze deletion + filter, extra = filter.split("-") + field_text = ( + _clozeText(field_text, extra, filter[1]) if field_text and extra else "" + ) + else: + # the second and fifth arguments are no longer used + field_text = runFilter( + "fmod_" + filter, field_text, "", fields, field_name, "" + ) + if not isinstance(field_text, str): + return "{field modifier '%s' on template invalid}" % filter + return field_text + + +def _sort_filters(filters: List[str]): + "Mutate the list of filters into the correct order." + + # the filter closest to the field name is applied first + filters.reverse() + # Since 'text:' and other mods can affect html on which Anki relies to + # process clozes, we need to make sure clozes are always + # treated after all the other mods, regardless of how they're specified + # in the template, so that {{cloze:text: == {{text:cloze: + # For type:, we return directly since no other mod than cloze (or other + # pre-defined mods) can be present and those are treated separately + filters.sort(key=lambda s: not s == "type") + + +# Matches a {{c123::clozed-out text::hint}} Cloze deletion, case-insensitively. +# The regex should be interpolated with a regex number and creates the following +# named groups: +# - tag: The lowercase or uppercase 'c' letter opening the Cloze. +# - content: Clozed-out content. +# - hint: Cloze hint, if provided. +clozeReg = r"(?si)\{\{(?Pc)%s::(?P.*?)(::(?P.*?))?\}\}" + +# Constants referring to group names within clozeReg. +CLOZE_REGEX_MATCH_GROUP_TAG = "tag" +CLOZE_REGEX_MATCH_GROUP_CONTENT = "content" +CLOZE_REGEX_MATCH_GROUP_HINT = "hint" + + +def _clozeText(txt: str, ord: str, type: str) -> str: + """Process the given Cloze deletion within the given template.""" + reg = clozeReg + currentRegex = clozeReg % ord + if not re.search(currentRegex, txt): + # No Cloze deletion was found in txt. + return "" + txt = _removeFormattingFromMathjax(txt, ord) + + def repl(m): + # replace chosen cloze with type + if type == "q": + if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): + buf = "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) + else: + buf = "[...]" + else: + buf = m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) + # uppercase = no formatting + if m.group(CLOZE_REGEX_MATCH_GROUP_TAG) == "c": + buf = "%s" % buf + return buf + + txt = re.sub(currentRegex, repl, txt) + # and display other clozes normally + return re.sub(reg % r"\d+", "\\2", txt) + + +def _removeFormattingFromMathjax(txt, ord) -> str: + """Marks all clozes within MathJax to prevent formatting them. + + Active Cloze deletions within MathJax should not be wrapped inside + a Cloze , as that would interfere with MathJax. + + This method finds all Cloze deletions number `ord` in `txt` which are + inside MathJax inline or display formulas, and replaces their opening + '{{c123' with a '{{C123'. The clozeText method interprets the upper-case + C as "don't wrap this Cloze in a ". + """ + creg = clozeReg.replace("(?si)", "") + + # Scan the string left to right. + # After a MathJax opening - \( or \[ - flip in_mathjax to True. + # After a MathJax closing - \) or \] - flip in_mathjax to False. + # When a Cloze pattern number `ord` is found and we are in MathJax, + # replace its '{{c' with '{{C'. + # + # TODO: Report mismatching opens/closes - e.g. '\(\]' + # TODO: Report errors in this method better than printing to stdout. + # flags in middle of expression deprecated + in_mathjax = False + + def replace(match): + nonlocal in_mathjax + if match.group("mathjax_open"): + if in_mathjax: + print("MathJax opening found while already in MathJax") + in_mathjax = True + elif match.group("mathjax_close"): + if not in_mathjax: + print("MathJax close found while not in MathJax") + in_mathjax = False + elif match.group("cloze"): + if in_mathjax: + return match.group(0).replace( + "{{c{}::".format(ord), "{{C{}::".format(ord) + ) + else: + print("Unexpected: no expected capture group is present") + return match.group(0) + + # The following regex matches one of: + # - MathJax opening + # - MathJax close + # - Cloze deletion number `ord` + return re.sub( + r"(?si)" + r"(?P\\[([])|" + r"(?P\\[\])])|" + r"(?P" + (creg % ord) + ")", + replace, + txt, + ) + + +def expand_clozes(string: str) -> List[str]: + "Render all clozes in string." + ords = set(re.findall(r"{{c(\d+)::.+?}}", string)) + strings = [] + + def qrepl(m): + if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): + return "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) + else: + return "[...]" + + def arepl(m): + return m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) + + for ord in ords: + s = re.sub(clozeReg % ord, qrepl, string) + s = re.sub(clozeReg % ".+?", arepl, s) + strings.append(s) + strings.append(re.sub(clozeReg % ".+?", arepl, string)) + + return strings + + def hint(txt, extra, context, tag, fullname) -> str: if not txt.strip(): return "" diff --git a/pylib/tests/test_template.py b/pylib/tests/test_template.py index 5a61e11f8..71f53ef3b 100644 --- a/pylib/tests/test_template.py +++ b/pylib/tests/test_template.py @@ -1,9 +1,8 @@ -from anki.template import Template +from anki.template2 import _removeFormattingFromMathjax def test_remove_formatting_from_mathjax(): - t = Template("") - assert t._removeFormattingFromMathjax(r"\(2^{{c3::2}}\)", 3) == r"\(2^{{C3::2}}\)" + assert _removeFormattingFromMathjax(r"\(2^{{c3::2}}\)", 3) == r"\(2^{{C3::2}}\)" txt = ( r"{{c1::ok}} \(2^2\) {{c2::not ok}} \(2^{{c3::2}}\) \(x^3\) " @@ -11,9 +10,7 @@ def test_remove_formatting_from_mathjax(): ) # Cloze 2 is not in MathJax, so it should not get protected against # formatting. - assert t._removeFormattingFromMathjax(txt, 2) == txt + assert _removeFormattingFromMathjax(txt, 2) == txt txt = r"\(a\) {{c1::b}} \[ {{c1::c}} \]" - assert t._removeFormattingFromMathjax(txt, 1) == ( - r"\(a\) {{c1::b}} \[ {{C1::c}} \]" - ) + assert _removeFormattingFromMathjax(txt, 1) == (r"\(a\) {{c1::b}} \[ {{C1::c}} \]")