more media regexp fixes

- split quoted and unquoted image case into separate regexps, since we can't include a group reference inside a character set - disallow spaces in the non-quoted case - this should fix matching on images with other attributes again
2025-09-18 22:12:21 -04:00 · 2013-05-22 09:45:58 +09:00 · 2013-05-22 09:45:58 +09:00 · 918694a096
commit 918694a096
parent 35764757eb
2 changed files with 16 additions and 6 deletions
--- a/anki/media.py
+++ b/anki/media.py
@ -12,10 +12,14 @@ from anki.latex import mungeQA

 class MediaManager(object):

-    # other code depends on this order, so don't reorder
-    regexps = ("(?i)(\[sound:(?P<fname>[^]]+)\])",
-               "(?i)(<img[^>]+src=(?P<str>[\"']?)"+
-                "(?P<fname>[^>]+)(?P=str)[^>]*>)")
+    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
+    imgRegexps = [
+        # src element quoted case
+        "(?i)(<img[^>]+src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
+        # unquoted case
+        "(?i)(<img[^>]+src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
+    ]
+    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):
        self.col = col
@ -175,7 +179,9 @@ If the same name exists, compare checksums."""
                return tag
            return tag.replace(
                fname, urllib.quote(fname.encode("utf-8")))
-        return re.sub(self.regexps[1], repl, string)
+        for reg in self.imgRegexps:
+            string = re.sub(reg, repl, string)
+        return string

    # Rebuilding DB
    ##########################################################################
--- a/tests/test_media.py
+++ b/tests/test_media.py
@ -23,7 +23,11 @@ def test_strings():
    mid = d.models.models.keys()[0]
    assert mf(mid, "aoeu") == []
    assert mf(mid, "aoeu<img src='foo.jpg'>ao") == ["foo.jpg"]
-    assert mf(mid, "aoeu<img src=foo bar.jpg>ao") == ["foo bar.jpg"]
+    assert mf(mid, "aoeu<img src='foo.jpg' style='test'>ao") == ["foo.jpg"]
+    assert mf(mid, "aoeu<img src='foo.jpg'><img src=\"bar.jpg\">ao") == [
+            "foo.jpg", "bar.jpg"]
+    assert mf(mid, "aoeu<img src=foo.jpg style=bar>ao") == ["foo.jpg"]
+    assert mf(mid, "<img src=one><img src=two>") == ["one", "two"]
    assert mf(mid, "aoeu<img src=\"foo.jpg\">ao") == ["foo.jpg"]
    assert mf(mid, "aoeu<img src=\"foo.jpg\"><img class=yo src=fo>ao") == [
            "foo.jpg", "fo"]