more media regexp fixes

- split quoted and unquoted image case into separate regexps, since we can't
  include a group reference inside a character set
- disallow spaces in the non-quoted case
- this should fix matching on images with other attributes again
This commit is contained in:
Damien Elmes 2013-05-22 09:45:58 +09:00
parent 35764757eb
commit 918694a096
2 changed files with 16 additions and 6 deletions

View file

@ -12,10 +12,14 @@ from anki.latex import mungeQA
class MediaManager(object): class MediaManager(object):
# other code depends on this order, so don't reorder soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
regexps = ("(?i)(\[sound:(?P<fname>[^]]+)\])", imgRegexps = [
"(?i)(<img[^>]+src=(?P<str>[\"']?)"+ # src element quoted case
"(?P<fname>[^>]+)(?P=str)[^>]*>)") "(?i)(<img[^>]+src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case
"(?i)(<img[^>]+src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
]
regexps = soundRegexps + imgRegexps
def __init__(self, col, server): def __init__(self, col, server):
self.col = col self.col = col
@ -175,7 +179,9 @@ If the same name exists, compare checksums."""
return tag return tag
return tag.replace( return tag.replace(
fname, urllib.quote(fname.encode("utf-8"))) fname, urllib.quote(fname.encode("utf-8")))
return re.sub(self.regexps[1], repl, string) for reg in self.imgRegexps:
string = re.sub(reg, repl, string)
return string
# Rebuilding DB # Rebuilding DB
########################################################################## ##########################################################################

View file

@ -23,7 +23,11 @@ def test_strings():
mid = d.models.models.keys()[0] mid = d.models.models.keys()[0]
assert mf(mid, "aoeu") == [] assert mf(mid, "aoeu") == []
assert mf(mid, "aoeu<img src='foo.jpg'>ao") == ["foo.jpg"] assert mf(mid, "aoeu<img src='foo.jpg'>ao") == ["foo.jpg"]
assert mf(mid, "aoeu<img src=foo bar.jpg>ao") == ["foo bar.jpg"] assert mf(mid, "aoeu<img src='foo.jpg' style='test'>ao") == ["foo.jpg"]
assert mf(mid, "aoeu<img src='foo.jpg'><img src=\"bar.jpg\">ao") == [
"foo.jpg", "bar.jpg"]
assert mf(mid, "aoeu<img src=foo.jpg style=bar>ao") == ["foo.jpg"]
assert mf(mid, "<img src=one><img src=two>") == ["one", "two"]
assert mf(mid, "aoeu<img src=\"foo.jpg\">ao") == ["foo.jpg"] assert mf(mid, "aoeu<img src=\"foo.jpg\">ao") == ["foo.jpg"]
assert mf(mid, "aoeu<img src=\"foo.jpg\"><img class=yo src=fo>ao") == [ assert mf(mid, "aoeu<img src=\"foo.jpg\"><img class=yo src=fo>ao") == [
"foo.jpg", "fo"] "foo.jpg", "fo"]