mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
refactor media downloading on paste/drop
- use html as the first choice for incoming drops/pastes - when filtering incoming html, automatically localize any remote image references - add a special case for pasting/dropping from google images when html stripping is on - move filtering code into editor
This commit is contained in:
parent
a538e29480
commit
e2d2b759a4
1 changed files with 157 additions and 150 deletions
307
aqt/editor.py
307
aqt/editor.py
|
@ -1,11 +1,12 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright: Damien Elmes <anki@ichi2.net>
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
||||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
from anki.lang import _
|
||||||
|
|
||||||
from aqt.qt import *
|
from aqt.qt import *
|
||||||
import re, os, urllib2, ctypes
|
import re, os, urllib2, ctypes
|
||||||
from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia
|
from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia
|
||||||
from anki.sound import play
|
import anki.sound
|
||||||
from anki.hooks import runHook, runFilter
|
from anki.hooks import runHook, runFilter
|
||||||
from aqt.sound import getAudio
|
from aqt.sound import getAudio
|
||||||
from aqt.webview import AnkiWebView
|
from aqt.webview import AnkiWebView
|
||||||
|
@ -16,12 +17,6 @@ import anki.js
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
# fixme: when tab order returns to the webview, the previously focused field
|
|
||||||
# is focused, which is not good when the user is tabbing through the dialog
|
|
||||||
# fixme: set rtl in div css
|
|
||||||
|
|
||||||
# fixme: commit from tag area causes error
|
|
||||||
|
|
||||||
pics = ("jpg", "jpeg", "png", "tif", "tiff", "gif", "svg")
|
pics = ("jpg", "jpeg", "png", "tif", "tiff", "gif", "svg")
|
||||||
audio = ("wav", "mp3", "ogg", "flac", "mp4", "swf", "mov", "mpeg", "mkv")
|
audio = ("wav", "mp3", "ogg", "flac", "mp4", "swf", "mov", "mpeg", "mkv")
|
||||||
|
|
||||||
|
@ -264,72 +259,6 @@ document.onclick = function (evt) {
|
||||||
</body></html>
|
</body></html>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _filterHTML(html):
|
|
||||||
doc = BeautifulSoup(html)
|
|
||||||
# remove implicit regular font style from outermost element
|
|
||||||
if doc.span:
|
|
||||||
try:
|
|
||||||
attrs = doc.span['style'].split(";")
|
|
||||||
except (KeyError, TypeError):
|
|
||||||
attrs = []
|
|
||||||
if attrs:
|
|
||||||
new = []
|
|
||||||
for attr in attrs:
|
|
||||||
sattr = attr.strip()
|
|
||||||
if sattr and sattr not in ("font-style: normal", "font-weight: normal"):
|
|
||||||
new.append(sattr)
|
|
||||||
doc.span['style'] = ";".join(new)
|
|
||||||
# filter out implicit formatting from webkit
|
|
||||||
for tag in doc("span", "Apple-style-span"):
|
|
||||||
preserve = ""
|
|
||||||
for item in tag['style'].split(";"):
|
|
||||||
try:
|
|
||||||
k, v = item.split(":")
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
if k.strip() == "color" and not v.strip() == "rgb(0, 0, 0)":
|
|
||||||
preserve += "color:%s;" % v
|
|
||||||
if k.strip() in ("font-weight", "font-style"):
|
|
||||||
preserve += item + ";"
|
|
||||||
if preserve:
|
|
||||||
# preserve colour attribute, delete implicit class
|
|
||||||
tag['style'] = preserve
|
|
||||||
del tag['class']
|
|
||||||
else:
|
|
||||||
# strip completely
|
|
||||||
tag.replaceWithChildren()
|
|
||||||
for tag in doc("font", "Apple-style-span"):
|
|
||||||
# strip all but colour attr from implicit font tags
|
|
||||||
if 'color' in dict(tag.attrs):
|
|
||||||
for attr in tag.attrs:
|
|
||||||
if attr != "color":
|
|
||||||
del tag[attr]
|
|
||||||
# and apple class
|
|
||||||
del tag['class']
|
|
||||||
else:
|
|
||||||
# remove completely
|
|
||||||
tag.replaceWithChildren()
|
|
||||||
# now images
|
|
||||||
for tag in doc("img"):
|
|
||||||
# turn file:/// links into relative ones
|
|
||||||
try:
|
|
||||||
if tag['src'].lower().startswith("file://"):
|
|
||||||
tag['src'] = os.path.basename(tag['src'])
|
|
||||||
except KeyError:
|
|
||||||
# for some bizarre reason, mnemosyne removes src elements
|
|
||||||
# from missing media
|
|
||||||
pass
|
|
||||||
# strip all other attributes, including implicit max-width
|
|
||||||
for attr, val in tag.attrs:
|
|
||||||
if attr != "src":
|
|
||||||
del tag[attr]
|
|
||||||
# strip superfluous elements
|
|
||||||
for elem in "html", "head", "body", "meta":
|
|
||||||
for tag in doc(elem):
|
|
||||||
tag.replaceWithChildren()
|
|
||||||
html = unicode(doc)
|
|
||||||
return html
|
|
||||||
|
|
||||||
# caller is responsible for resetting note on reset
|
# caller is responsible for resetting note on reset
|
||||||
class Editor(object):
|
class Editor(object):
|
||||||
def __init__(self, mw, widget, parentWindow, addMode=False):
|
def __init__(self, mw, widget, parentWindow, addMode=False):
|
||||||
|
@ -551,7 +480,7 @@ class Editor(object):
|
||||||
def mungeHTML(self, txt):
|
def mungeHTML(self, txt):
|
||||||
if txt == "<br>":
|
if txt == "<br>":
|
||||||
txt = ""
|
txt = ""
|
||||||
return _filterHTML(txt)
|
return self._filterHTML(txt, localize=False)
|
||||||
|
|
||||||
# Setting/unsetting the current note
|
# Setting/unsetting the current note
|
||||||
######################################################################
|
######################################################################
|
||||||
|
@ -824,24 +753,18 @@ to a cloze type first, via Edit>Change Note Type."""))
|
||||||
self.web.eval("setFormat('inserthtml', %s);" % json.dumps(html))
|
self.web.eval("setFormat('inserthtml', %s);" % json.dumps(html))
|
||||||
|
|
||||||
def _addMedia(self, path, canDelete=False):
|
def _addMedia(self, path, canDelete=False):
|
||||||
"Add to media folder and return basename."
|
"Add to media folder and return local img or sound tag."
|
||||||
# copy to media folder
|
# copy to media folder
|
||||||
name = self.mw.col.media.addFile(path)
|
fname = self.mw.col.media.addFile(path)
|
||||||
# remove original?
|
# remove original?
|
||||||
if canDelete and self.mw.pm.profile['deleteMedia']:
|
if canDelete and self.mw.pm.profile['deleteMedia']:
|
||||||
if os.path.abspath(name) != os.path.abspath(path):
|
if os.path.abspath(fname) != os.path.abspath(path):
|
||||||
try:
|
try:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
# return a local html link
|
# return a local html link
|
||||||
ext = name.split(".")[-1].lower()
|
return self.fnameToLink(fname)
|
||||||
if ext in pics:
|
|
||||||
name = urllib.quote(name.encode("utf8"))
|
|
||||||
return '<img src="%s">' % name
|
|
||||||
else:
|
|
||||||
anki.sound.play(name)
|
|
||||||
return '[sound:%s]' % name
|
|
||||||
|
|
||||||
def onRecSound(self):
|
def onRecSound(self):
|
||||||
try:
|
try:
|
||||||
|
@ -853,6 +776,127 @@ to a cloze type first, via Edit>Change Note Type."""))
|
||||||
return
|
return
|
||||||
self.addMedia(file)
|
self.addMedia(file)
|
||||||
|
|
||||||
|
# Media downloads
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
def urlToLink(self, url):
|
||||||
|
fname = self.urlToFile(url)
|
||||||
|
if not fname:
|
||||||
|
return ""
|
||||||
|
return self.fnameToLink(fname)
|
||||||
|
|
||||||
|
def fnameToLink(self, fname):
|
||||||
|
ext = fname.split(".")[-1].lower()
|
||||||
|
if ext in pics:
|
||||||
|
name = urllib.quote(fname.encode("utf8"))
|
||||||
|
return '<img src="%s">' % name
|
||||||
|
else:
|
||||||
|
anki.sound.play(fname)
|
||||||
|
return '[sound:%s]' % fname
|
||||||
|
|
||||||
|
def urlToFile(self, url):
|
||||||
|
l = url.lower()
|
||||||
|
for suffix in pics+audio:
|
||||||
|
if l.endswith(suffix):
|
||||||
|
return self._retrieveURL(url)
|
||||||
|
# not a supported type; return link verbatim
|
||||||
|
return
|
||||||
|
|
||||||
|
def _retrieveURL(self, url):
|
||||||
|
"Download file into media folder and return local filename or None."
|
||||||
|
# urllib is picky with local file links
|
||||||
|
if url.lower().startswith("file://"):
|
||||||
|
url = url.replace("%", "%25")
|
||||||
|
url = url.replace("#", "%23")
|
||||||
|
# fetch it into a temporary folder
|
||||||
|
self.mw.progress.start(
|
||||||
|
immediate=True, parent=self.parentWindow)
|
||||||
|
try:
|
||||||
|
req = urllib2.Request(url, None, {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (compatible; Anki)'})
|
||||||
|
filecontents = urllib2.urlopen(req).read()
|
||||||
|
except urllib2.URLError, e:
|
||||||
|
showWarning(_("An error occurred while opening %s") % e)
|
||||||
|
return
|
||||||
|
finally:
|
||||||
|
self.mw.progress.finish()
|
||||||
|
path = unicode(urllib2.unquote(url.encode("utf8")), "utf8")
|
||||||
|
return self.mw.col.media.writeData(path, filecontents)
|
||||||
|
|
||||||
|
# HTML filtering
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
def _filterHTML(self, html, localize=False):
|
||||||
|
doc = BeautifulSoup(html)
|
||||||
|
# remove implicit regular font style from outermost element
|
||||||
|
if doc.span:
|
||||||
|
try:
|
||||||
|
attrs = doc.span['style'].split(";")
|
||||||
|
except (KeyError, TypeError):
|
||||||
|
attrs = []
|
||||||
|
if attrs:
|
||||||
|
new = []
|
||||||
|
for attr in attrs:
|
||||||
|
sattr = attr.strip()
|
||||||
|
if sattr and sattr not in ("font-style: normal", "font-weight: normal"):
|
||||||
|
new.append(sattr)
|
||||||
|
doc.span['style'] = ";".join(new)
|
||||||
|
# filter out implicit formatting from webkit
|
||||||
|
for tag in doc("span", "Apple-style-span"):
|
||||||
|
preserve = ""
|
||||||
|
for item in tag['style'].split(";"):
|
||||||
|
try:
|
||||||
|
k, v = item.split(":")
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if k.strip() == "color" and not v.strip() == "rgb(0, 0, 0)":
|
||||||
|
preserve += "color:%s;" % v
|
||||||
|
if k.strip() in ("font-weight", "font-style"):
|
||||||
|
preserve += item + ";"
|
||||||
|
if preserve:
|
||||||
|
# preserve colour attribute, delete implicit class
|
||||||
|
tag['style'] = preserve
|
||||||
|
del tag['class']
|
||||||
|
else:
|
||||||
|
# strip completely
|
||||||
|
tag.replaceWithChildren()
|
||||||
|
for tag in doc("font", "Apple-style-span"):
|
||||||
|
# strip all but colour attr from implicit font tags
|
||||||
|
if 'color' in dict(tag.attrs):
|
||||||
|
for attr in tag.attrs:
|
||||||
|
if attr != "color":
|
||||||
|
del tag[attr]
|
||||||
|
# and apple class
|
||||||
|
del tag['class']
|
||||||
|
else:
|
||||||
|
# remove completely
|
||||||
|
tag.replaceWithChildren()
|
||||||
|
# now images
|
||||||
|
for tag in doc("img"):
|
||||||
|
# turn file:/// links into relative ones
|
||||||
|
try:
|
||||||
|
if tag['src'].lower().startswith("file://"):
|
||||||
|
tag['src'] = os.path.basename(tag['src'])
|
||||||
|
if localize:
|
||||||
|
# convert remote image links to local ones
|
||||||
|
fname = self.urlToFile(tag['src'])
|
||||||
|
if fname:
|
||||||
|
tag['src'] = fname
|
||||||
|
except KeyError:
|
||||||
|
# for some bizarre reason, mnemosyne removes src elements
|
||||||
|
# from missing media
|
||||||
|
pass
|
||||||
|
# strip all other attributes, including implicit max-width
|
||||||
|
for attr, val in tag.attrs:
|
||||||
|
if attr != "src":
|
||||||
|
del tag[attr]
|
||||||
|
# strip superfluous elements
|
||||||
|
for elem in "html", "head", "body", "meta":
|
||||||
|
for tag in doc(elem):
|
||||||
|
tag.replaceWithChildren()
|
||||||
|
html = unicode(doc)
|
||||||
|
return html
|
||||||
|
|
||||||
# Advanced menu
|
# Advanced menu
|
||||||
######################################################################
|
######################################################################
|
||||||
|
|
||||||
|
@ -922,7 +966,6 @@ class EditorWebView(AnkiWebView):
|
||||||
def __init__(self, parent, editor):
|
def __init__(self, parent, editor):
|
||||||
AnkiWebView.__init__(self)
|
AnkiWebView.__init__(self)
|
||||||
self.editor = editor
|
self.editor = editor
|
||||||
self.errtxt = _("An error occured while opening %s")
|
|
||||||
self.strip = self.editor.mw.pm.profile['stripHTML']
|
self.strip = self.editor.mw.pm.profile['stripHTML']
|
||||||
|
|
||||||
def keyPressEvent(self, evt):
|
def keyPressEvent(self, evt):
|
||||||
|
@ -981,7 +1024,7 @@ class EditorWebView(AnkiWebView):
|
||||||
if evt.source():
|
if evt.source():
|
||||||
if oldmime.hasHtml():
|
if oldmime.hasHtml():
|
||||||
mime = QMimeData()
|
mime = QMimeData()
|
||||||
mime.setHtml(_filterHTML(oldmime.html()))
|
mime.setHtml(self.editor._filterHTML(oldmime.html()))
|
||||||
else:
|
else:
|
||||||
# old qt on linux won't give us html when dragging an image;
|
# old qt on linux won't give us html when dragging an image;
|
||||||
# in that case just do the default action (which is to ignore
|
# in that case just do the default action (which is to ignore
|
||||||
|
@ -1001,12 +1044,6 @@ class EditorWebView(AnkiWebView):
|
||||||
def prepareClip(self, mode=QClipboard.Clipboard):
|
def prepareClip(self, mode=QClipboard.Clipboard):
|
||||||
clip = self.editor.mw.app.clipboard()
|
clip = self.editor.mw.app.clipboard()
|
||||||
mime = clip.mimeData(mode=mode)
|
mime = clip.mimeData(mode=mode)
|
||||||
if mime.hasHtml() and mime.html().startswith("<!--anki-->"):
|
|
||||||
# pasting from another field, filter extraneous webkit formatting
|
|
||||||
html = mime.html()[11:]
|
|
||||||
html = _filterHTML(html)
|
|
||||||
mime.setHtml(html)
|
|
||||||
return
|
|
||||||
self.saveClip(mode=mode)
|
self.saveClip(mode=mode)
|
||||||
mime = self._processMime(mime)
|
mime = self._processMime(mime)
|
||||||
clip.setMimeData(mime, mode=mode)
|
clip.setMimeData(mime, mode=mode)
|
||||||
|
@ -1037,17 +1074,14 @@ class EditorWebView(AnkiWebView):
|
||||||
# print "html", mime.html()
|
# print "html", mime.html()
|
||||||
# print "urls", mime.urls()
|
# print "urls", mime.urls()
|
||||||
# print "text", mime.text()
|
# print "text", mime.text()
|
||||||
if mime.hasUrls():
|
if mime.hasHtml():
|
||||||
return self._processUrls(mime)
|
return self._processHtml(mime)
|
||||||
elif mime.hasText() and (self.strip or not mime.hasHtml()):
|
elif mime.hasText():
|
||||||
return self._processText(mime)
|
return self._processText(mime)
|
||||||
# we currently aren't able to extract images from html, so we prioritize
|
elif mime.hasUrls():
|
||||||
# images over html in cases where we have both. this is a hack until
|
return self._processUrls(mime)
|
||||||
# issue 92 is implemented
|
|
||||||
elif mime.hasImage():
|
elif mime.hasImage():
|
||||||
return self._processImage(mime)
|
return self._processImage(mime)
|
||||||
elif mime.hasHtml():
|
|
||||||
return self._processHtml(mime)
|
|
||||||
else:
|
else:
|
||||||
# nothing
|
# nothing
|
||||||
return QMimeData()
|
return QMimeData()
|
||||||
|
@ -1056,20 +1090,12 @@ class EditorWebView(AnkiWebView):
|
||||||
url = mime.urls()[0].toString()
|
url = mime.urls()[0].toString()
|
||||||
# chrome likes to give us the URL twice with a \n
|
# chrome likes to give us the URL twice with a \n
|
||||||
url = url.splitlines()[0]
|
url = url.splitlines()[0]
|
||||||
link = self._localizedMediaLink(url)
|
|
||||||
mime = QMimeData()
|
mime = QMimeData()
|
||||||
|
link = self.editor.urlToLink(url)
|
||||||
if link:
|
if link:
|
||||||
mime.setHtml(link)
|
mime.setHtml(link)
|
||||||
return mime
|
return mime
|
||||||
|
|
||||||
def _localizedMediaLink(self, url):
|
|
||||||
l = url.lower()
|
|
||||||
for suffix in pics+audio:
|
|
||||||
if l.endswith(suffix):
|
|
||||||
return self._retrieveURL(url)
|
|
||||||
# not a supported type; return link verbatim
|
|
||||||
return url
|
|
||||||
|
|
||||||
def _processText(self, mime):
|
def _processText(self, mime):
|
||||||
txt = unicode(mime.text())
|
txt = unicode(mime.text())
|
||||||
l = txt.lower()
|
l = txt.lower()
|
||||||
|
@ -1077,12 +1103,7 @@ class EditorWebView(AnkiWebView):
|
||||||
# if the user is pasting an image or sound link, convert it to local
|
# if the user is pasting an image or sound link, convert it to local
|
||||||
if l.startswith("http://") or l.startswith("https://") or l.startswith("file://"):
|
if l.startswith("http://") or l.startswith("https://") or l.startswith("file://"):
|
||||||
txt = txt.split("\r\n")[0]
|
txt = txt.split("\r\n")[0]
|
||||||
html = self._localizedMediaLink(txt)
|
html = self.editor.urlToLink(txt)
|
||||||
if not html:
|
|
||||||
return QMimeData()
|
|
||||||
if html == txt:
|
|
||||||
# wasn't of a supported media type; don't change
|
|
||||||
html = None
|
|
||||||
new = QMimeData()
|
new = QMimeData()
|
||||||
if html:
|
if html:
|
||||||
new.setHtml(html)
|
new.setHtml(html)
|
||||||
|
@ -1092,13 +1113,28 @@ class EditorWebView(AnkiWebView):
|
||||||
|
|
||||||
def _processHtml(self, mime):
|
def _processHtml(self, mime):
|
||||||
html = mime.html()
|
html = mime.html()
|
||||||
if self.strip:
|
newMime = QMimeData()
|
||||||
html = stripHTML(html)
|
if self.strip and not html.startswith("<!--anki-->"):
|
||||||
|
# special case for google images: if after stripping there's no text
|
||||||
|
# and there are image links, we'll paste those as html instead
|
||||||
|
if not stripHTML(html).strip():
|
||||||
|
newHtml = ""
|
||||||
|
mid = self.editor.note.mid
|
||||||
|
for url in self.editor.mw.col.media.filesInStr(
|
||||||
|
mid, html, includeRemote=True):
|
||||||
|
newHtml += self.editor.urlToLink(url)
|
||||||
|
newMime.setHtml(newHtml)
|
||||||
|
else:
|
||||||
|
# use .text() if available so newlines are preserved; otherwise strip
|
||||||
|
if mime.hasText():
|
||||||
|
return self._processText(mime)
|
||||||
|
else:
|
||||||
|
newMime.setText(stripHTML(mime.text()))
|
||||||
else:
|
else:
|
||||||
html = _filterHTML(html)
|
# no stripping
|
||||||
mime = QMimeData()
|
html = self.editor._filterHTML(html, localize=True)
|
||||||
mime.setHtml(html)
|
newMime.setHtml(html)
|
||||||
return mime
|
return newMime
|
||||||
|
|
||||||
def _processImage(self, mime):
|
def _processImage(self, mime):
|
||||||
im = QImage(mime.imageData())
|
im = QImage(mime.imageData())
|
||||||
|
@ -1116,35 +1152,6 @@ class EditorWebView(AnkiWebView):
|
||||||
mime.setHtml(self.editor._addMedia(uname+ext))
|
mime.setHtml(self.editor._addMedia(uname+ext))
|
||||||
return mime
|
return mime
|
||||||
|
|
||||||
def _retrieveURL(self, url):
|
|
||||||
# is it media?
|
|
||||||
ext = url.split(".")[-1].lower()
|
|
||||||
if ext not in pics and ext not in audio:
|
|
||||||
return
|
|
||||||
if url.lower().startswith("file://"):
|
|
||||||
url = url.replace("%", "%25")
|
|
||||||
url = url.replace("#", "%23")
|
|
||||||
# fetch it into a temporary folder
|
|
||||||
self.editor.mw.progress.start(
|
|
||||||
immediate=True, parent=self.editor.parentWindow)
|
|
||||||
try:
|
|
||||||
req = urllib2.Request(url, None, {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (compatible; Anki)'})
|
|
||||||
filecontents = urllib2.urlopen(req).read()
|
|
||||||
except urllib2.URLError, e:
|
|
||||||
showWarning(self.errtxt % e)
|
|
||||||
return
|
|
||||||
finally:
|
|
||||||
self.editor.mw.progress.finish()
|
|
||||||
path = unicode(urllib2.unquote(url.encode("utf8")), "utf8")
|
|
||||||
for badChar in "#%\"":
|
|
||||||
path = path.replace(badChar, "")
|
|
||||||
path = namedtmp(os.path.basename(path))
|
|
||||||
file = open(path, "wb")
|
|
||||||
file.write(filecontents)
|
|
||||||
file.close()
|
|
||||||
return self.editor._addMedia(path)
|
|
||||||
|
|
||||||
def _flagAnkiText(self):
|
def _flagAnkiText(self):
|
||||||
# add a comment in the clipboard html so we can tell text is copied
|
# add a comment in the clipboard html so we can tell text is copied
|
||||||
# from us and doesn't need to be stripped
|
# from us and doesn't need to be stripped
|
||||||
|
|
Loading…
Reference in a new issue