diff --git a/anki/utils.py b/anki/utils.py index 3315225fd..34b943e95 100644 --- a/anki/utils.py +++ b/anki/utils.py @@ -122,17 +122,22 @@ def fmtFloat(float_value, point=1): # HTML ############################################################################## +reStyle = re.compile("(?s).*?") +reScript = re.compile("(?s).*?") +reTag = re.compile("<.*?>") +reEnts = re.compile("&#?\w+;") +reMedia = re.compile("]+src=[\"']?([^\"'>]+)[\"']?[^>]*>") def stripHTML(s): - s = re.sub("(?s).*?", "", s) - s = re.sub("(?s).*?", "", s) - s = re.sub("<.*?>", "", s) + s = reStyle.sub("", s) + s = reScript.sub("", s) + s = reTag.sub("", s) s = entsToTxt(s) return s def stripHTMLMedia(s): "Strip HTML but keep media filenames" - s = re.sub("]+src=[\"']?([^\"'>]+)[\"']?[^>]*>", " \\1 ", s) + s = reMedia.sub(" \\1 ", s) return stripHTML(s) def minimizeHTML(s): @@ -164,7 +169,7 @@ def entsToTxt(html): except KeyError: pass return text # leave as is - return re.sub("&#?\w+;", fixup, html) + return reEnts.sub(fixup, html) # IDs ##############################################################################