diff --git a/aqt/editor.py b/aqt/editor.py
index 97efc5ede..a675ee3db 100644
--- a/aqt/editor.py
+++ b/aqt/editor.py
@@ -13,6 +13,7 @@ from aqt.utils import shortcut, showInfo, showWarning, getBase, getFile, \
openHelp
import aqt
import anki.js
+from BeautifulSoup import BeautifulSoup
# fixme: when tab order returns to the webview, the previously focused field
# is focused, which is not good when the user is tabbing through the dialog
@@ -414,9 +415,6 @@ class Editor(object):
def mungeHTML(self, txt):
if txt == "
":
txt = ""
- fa = re.findall('((.*))', txt)
- if fa:
- txt = txt.replace(fa[0][0], fa[0][1])
return txt
# Setting/unsetting the current note
@@ -885,8 +883,7 @@ class EditorWebView(AnkiWebView):
if evt.source():
assert oldmime.hasHtml()
mime = QMimeData()
- # fix img src links
- mime.setHtml(self._relativeFiles(oldmime.html()))
+ mime.setHtml(self._filteredHTML(oldmime.html()))
else:
mime = self._processMime(oldmime)
# create a new event with the new mime data
@@ -902,7 +899,7 @@ class EditorWebView(AnkiWebView):
if keep:
new = QMimeData()
if mime.hasHtml():
- new.setHtml(mime.html())
+ new.setHtml(self._filteredHTML(mime.html()))
else:
new.setText(mime.text())
mime = new
@@ -950,7 +947,14 @@ class EditorWebView(AnkiWebView):
def _processUrls(self, mime):
url = mime.urls()[0].toString()
- link = self._retrieveURL(url)
+ link = None
+ for suffix in pics+audio:
+ if url.lower().endswith(suffix):
+ link = self._retrieveURL(url)
+ break
+ if not link:
+ # not a supported media type; include link verbatim
+ link = url
mime = QMimeData()
mime.setHtml(link)
return mime
@@ -974,6 +978,8 @@ class EditorWebView(AnkiWebView):
html = mime.html()
if self.strip:
html = stripHTML(html)
+ else:
+ html = self._filteredHTML(html)
mime = QMimeData()
mime.setHtml(html)
return mime
@@ -1009,9 +1015,18 @@ class EditorWebView(AnkiWebView):
file.close()
return self.editor._addMedia(path)
- def _relativeFiles(self, html):
- # when an image is dragged, the relative img src= links end up as a
- # full local path. we want to strip that back to a relative path
- def repl(match):
- return '
' % os.path.basename(match.group(2))
- return re.sub(self.editor.mw.col.media.regexps[1], repl, html)
+ def _filteredHTML(self, html):
+ doc = BeautifulSoup(html)
+ # filter out implicit formatting from webkit
+ for tag in doc("span", "Apple-style-span"):
+ tag.replaceWithChildren()
+ # turn file:/// links into relative ones
+ for tag in doc("img"):
+ if tag['src'].lower().startswith("file://"):
+ tag['src'] = os.path.basename(tag['src'])
+ # strip superfluous elements
+ for elem in "html", "head", "body", "meta":
+ for tag in doc(elem):
+ tag.replaceWithChildren()
+ html = unicode(doc)
+ return html