remove comments when stripping

gets rid of unwanted metadata from ms word pastes
This commit is contained in:
Damien Elmes 2018-06-09 16:27:46 +10:00
parent 89fd5cf6e5
commit 8b6ef5579f

View file

@ -125,6 +125,7 @@ def fmtFloat(float_value, point=1):
# HTML # HTML
############################################################################## ##############################################################################
reComment = re.compile("(?s)<!--.*?-->")
reStyle = re.compile("(?si)<style.*?>.*?</style>") reStyle = re.compile("(?si)<style.*?>.*?</style>")
reScript = re.compile("(?si)<script.*?>.*?</script>") reScript = re.compile("(?si)<script.*?>.*?</script>")
reTag = re.compile("(?s)<.*?>") reTag = re.compile("(?s)<.*?>")
@ -132,6 +133,7 @@ reEnts = re.compile("&#?\w+;")
reMedia = re.compile("(?i)<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>") reMedia = re.compile("(?i)<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>")
def stripHTML(s): def stripHTML(s):
s = reComment.sub("", s)
s = reStyle.sub("", s) s = reStyle.sub("", s)
s = reScript.sub("", s) s = reScript.sub("", s)
s = reTag.sub("", s) s = reTag.sub("", s)