Document newly found bug in _removeFormattingFromMathjax

Also adds some comments I wrote to help me understand what's going on in the code. I hope to fix this bug myself, but I think it might be beyond what you can do with Python regexes and might require writing a proper parser. So, as step 1, I'm adding in a couple comments explaining that the bug exists and how to reproduce it.
2025-12-21 19:02:57 -05:00 · 2019-12-22 04:28:29 +01:00 · 2019-12-22 04:28:29 +01:00 · 331781cf45
commit 331781cf45
parent 59ce08bc4e
3 changed files with 53 additions and 1 deletions
--- a/anki/template/template.py
+++ b/anki/template/template.py
@ -4,6 +4,9 @@ from typing import Any, Callable, Dict, Pattern
 from anki.hooks import runFilter
 from anki.utils import stripHTML, stripHTMLMedia

+# The (?si) flags make the regex match case-insensitively and make . match any
+# character including newlines.
+# See: https://docs.python.org/3/howto/regex.html#compilation-flags
 clozeReg = r"(?si)\{\{(c)%s::(.*?)(::(.*?))?\}\}"

 modifiers: Dict[str, Callable] = {}
@ -34,6 +37,7 @@ def get_or_attr(obj, name, default=None) -> Any:
            return default


+
 class Template:
    # The regular expression used to find a #section
    section_re: Pattern = None
@ -197,6 +201,7 @@ class Template:
    def clozeText(self, txt, ord, type) -> str:
        reg = clozeReg
        if not re.search(reg%ord, txt):
+            # No Cloze deletion was found in txt.
            return ""
        txt = self._removeFormattingFromMathjax(txt, ord)
        def repl(m):
@ -216,13 +221,31 @@ class Template:
        # and display other clozes normally
        return re.sub(reg%r"\d+", "\\2", txt)

-    # look for clozes wrapped in mathjax, and change {{cx to {{Cx
    def _removeFormattingFromMathjax(self, txt, ord) -> str:
+        """Marks all clozes within MathJax to prevent formatting them.
+
+        Active Cloze deletions within MathJax should not be wrapped inside
+        a Cloze <span>, as that would interfere with MathJax.
+
+        This method finds all Cloze deletions number `ord` in `txt` which are
+        inside MathJax inline or display formulas, and replaces their opening
+        '{{c123' with a '{{C123'. The clozeText method interprets the upper-case
+        C as "don't wrap this Cloze in a <span>".
+        """
+        # TODO: There is a bug in this method.
+        # Say txt = r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord = 1.
+        #
+        # This method should return: '\(a\) {{c1::b}} \[ {{C1::c}} \]'.
+        # Since the {{c1::c}} occurs within a MathJax display formula.
+        # However, it returns '\(a\) {{c1::b}} \[ {{c1::c}} \]'.
+        # This causes the Cloze within the MathJax display formula
+        # to be erroneously formatted with a <span>.
        opening = ["\\(", "\\["]
        closing = ["\\)", "\\]"]
        # flags in middle of expression deprecated
        creg = clozeReg.replace("(?si)", "")
        regex = r"(?si)(\\[([])(.*?)"+(creg%ord)+r"(.*?)(\\[\])])"
+
        def repl(m):
            enclosed = True
            for s in closing:
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -199,6 +199,19 @@ def test_cloze_mathjax():
    assert "class=cloze" in f.cards()[3].q()
    assert "class=cloze" in f.cards()[4].q()

+def test_cloze_mathjax_bug():
+    d = getEmptyCol()
+    d.models.setCurrent(d.models.byName("Cloze"))
+
+    f = d.newNote()
+    f['Text'] = r'\(a\) {{c1::b}} \[ {{c1::c}} \]'
+    assert d.addNote(f)
+    assert len(f.cards()) == 1
+
+    # TODO: The following assertion should work, but currently fails due
+    # to a bug in _removeFormatingFromMathjax.
+    #   assert f.cards()[0].q() == '\(a\) <span class=cloze>[...]</span> \[ [...] \]'
+
 def test_chained_mods():
    d = getEmptyCol()
    d.models.setCurrent(d.models.byName("Cloze"))
--- a/tests/test_template.py
+++ b/tests/test_template.py
@ -0,0 +1,16 @@
+from anki.template import Template
+
+
+def test_remove_formatting_from_mathjax():
+    t = Template('')
+    assert t._removeFormattingFromMathjax(r'\(2^{{c3::2}}\)', 3) == r'\(2^{{C3::2}}\)'
+
+    txt = (r'{{c1::ok}} \(2^2\) {{c2::not ok}} \(2^{{c3::2}}\) \(x^3\) '
+           r'{{c4::blah}} {{c5::text with \(x^2\) jax}}')
+    # Cloze 2 is not in MathJax, so it should not get protected against
+    # formatting.
+    assert t._removeFormattingFromMathjax(txt, 2) == txt
+
+    # TODO: r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord=1 should return
+    # r'\(a\) {{c1::b}} \[ {{C1::c}} \]', but actually fails to mark the cloze
+    # as not-to-be-formatted.