Document newly found bug in _removeFormattingFromMathjax

Also adds some comments I wrote to help me understand what's going on in the code. I hope to fix this bug myself, but I think it might be beyond what you can do with Python regexes and might require writing a proper parser. So, as step 1, I'm adding in a couple comments explaining that the bug exists and how to reproduce it.
2025-11-07 05:07:10 -05:00 · 2019-12-22 04:28:29 +01:00 · 2019-12-22 04:28:29 +01:00 · 331781cf45
commit 331781cf45
parent 59ce08bc4e
3 changed files with 53 additions and 1 deletions
--- a/anki/template/template.py
+++ b/anki/template/template.py
@ -4,6 +4,9 @@ from typing import Any, Callable, Dict, Pattern
 from anki.hooks import runFilter
 from anki.utils import stripHTML, stripHTMLMedia
 # The (?si) flags make the regex match case-insensitively and make . match any
 # character including newlines.
 # See: https://docs.python.org/3/howto/regex.html#compilation-flags
 clozeReg = r"(?si)\{\{(c)%s::(.*?)(::(.*?))?\}\}"
 modifiers: Dict[str, Callable] = {}
@ -34,6 +37,7 @@ def get_or_attr(obj, name, default=None) -> Any:
            return default
 class Template:
    # The regular expression used to find a #section
    section_re: Pattern = None
@ -197,6 +201,7 @@ class Template:
    def clozeText(self, txt, ord, type) -> str:
        reg = clozeReg
        if not re.search(reg%ord, txt):
            # No Cloze deletion was found in txt.
            return ""
        txt = self._removeFormattingFromMathjax(txt, ord)
        def repl(m):
@ -216,13 +221,31 @@ class Template:
        # and display other clozes normally
        return re.sub(reg%r"\d+", "\\2", txt)
    # look for clozes wrapped in mathjax, and change {{cx to {{Cx
    def _removeFormattingFromMathjax(self, txt, ord) -> str:
        """Marks all clozes within MathJax to prevent formatting them.
        Active Cloze deletions within MathJax should not be wrapped inside
        a Cloze <span>, as that would interfere with MathJax.
        This method finds all Cloze deletions number `ord` in `txt` which are
        inside MathJax inline or display formulas, and replaces their opening
        '{{c123' with a '{{C123'. The clozeText method interprets the upper-case
        C as "don't wrap this Cloze in a <span>".
        """
        # TODO: There is a bug in this method.
        # Say txt = r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord = 1.
        #
        # This method should return: '\(a\) {{c1::b}} \[ {{C1::c}} \]'.
        # Since the {{c1::c}} occurs within a MathJax display formula.
        # However, it returns '\(a\) {{c1::b}} \[ {{c1::c}} \]'.
        # This causes the Cloze within the MathJax display formula
        # to be erroneously formatted with a <span>.
        opening = ["\\(", "\\["]
        closing = ["\\)", "\\]"]
        # flags in middle of expression deprecated
        creg = clozeReg.replace("(?si)", "")
        regex = r"(?si)(\\[([])(.*?)"+(creg%ord)+r"(.*?)(\\[\])])"
        def repl(m):
            enclosed = True
            for s in closing:
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -199,6 +199,19 @@ def test_cloze_mathjax():
    assert "class=cloze" in f.cards()[3].q()
    assert "class=cloze" in f.cards()[4].q()
 def test_cloze_mathjax_bug():
    d = getEmptyCol()
    d.models.setCurrent(d.models.byName("Cloze"))
    f = d.newNote()
    f['Text'] = r'\(a\) {{c1::b}} \[ {{c1::c}} \]'
    assert d.addNote(f)
    assert len(f.cards()) == 1
    # TODO: The following assertion should work, but currently fails due
    # to a bug in _removeFormatingFromMathjax.
    #   assert f.cards()[0].q() == '\(a\) <span class=cloze>[...]</span> \[ [...] \]'
 def test_chained_mods():
    d = getEmptyCol()
    d.models.setCurrent(d.models.byName("Cloze"))
--- a/tests/test_template.py
+++ b/tests/test_template.py
@ -0,0 +1,16 @@
 from anki.template import Template
 def test_remove_formatting_from_mathjax():
    t = Template('')
    assert t._removeFormattingFromMathjax(r'\(2^{{c3::2}}\)', 3) == r'\(2^{{C3::2}}\)'
    txt = (r'{{c1::ok}} \(2^2\) {{c2::not ok}} \(2^{{c3::2}}\) \(x^3\) '
           r'{{c4::blah}} {{c5::text with \(x^2\) jax}}')
    # Cloze 2 is not in MathJax, so it should not get protected against
    # formatting.
    assert t._removeFormattingFromMathjax(txt, 2) == txt
    # TODO: r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord=1 should return
    # r'\(a\) {{c1::b}} \[ {{C1::c}} \]', but actually fails to mark the cloze
    # as not-to-be-formatted.