mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 16:56:36 -04:00
refactor media downloading on paste/drop
- use html as the first choice for incoming drops/pastes - when filtering incoming html, automatically localize any remote image references - add a special case for pasting/dropping from google images when html stripping is on - move filtering code into editor
This commit is contained in:
parent
a538e29480
commit
e2d2b759a4
1 changed files with 157 additions and 150 deletions
307
aqt/editor.py
307
aqt/editor.py
|
@ -1,11 +1,12 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright: Damien Elmes <anki@ichi2.net>
|
||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
from anki.lang import _
|
||||
|
||||
from aqt.qt import *
|
||||
import re, os, urllib2, ctypes
|
||||
from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia
|
||||
from anki.sound import play
|
||||
import anki.sound
|
||||
from anki.hooks import runHook, runFilter
|
||||
from aqt.sound import getAudio
|
||||
from aqt.webview import AnkiWebView
|
||||
|
@ -16,12 +17,6 @@ import anki.js
|
|||
from BeautifulSoup import BeautifulSoup
|
||||
import urllib
|
||||
|
||||
# fixme: when tab order returns to the webview, the previously focused field
|
||||
# is focused, which is not good when the user is tabbing through the dialog
|
||||
# fixme: set rtl in div css
|
||||
|
||||
# fixme: commit from tag area causes error
|
||||
|
||||
pics = ("jpg", "jpeg", "png", "tif", "tiff", "gif", "svg")
|
||||
audio = ("wav", "mp3", "ogg", "flac", "mp4", "swf", "mov", "mpeg", "mkv")
|
||||
|
||||
|
@ -264,72 +259,6 @@ document.onclick = function (evt) {
|
|||
</body></html>
|
||||
"""
|
||||
|
||||
def _filterHTML(html):
|
||||
doc = BeautifulSoup(html)
|
||||
# remove implicit regular font style from outermost element
|
||||
if doc.span:
|
||||
try:
|
||||
attrs = doc.span['style'].split(";")
|
||||
except (KeyError, TypeError):
|
||||
attrs = []
|
||||
if attrs:
|
||||
new = []
|
||||
for attr in attrs:
|
||||
sattr = attr.strip()
|
||||
if sattr and sattr not in ("font-style: normal", "font-weight: normal"):
|
||||
new.append(sattr)
|
||||
doc.span['style'] = ";".join(new)
|
||||
# filter out implicit formatting from webkit
|
||||
for tag in doc("span", "Apple-style-span"):
|
||||
preserve = ""
|
||||
for item in tag['style'].split(";"):
|
||||
try:
|
||||
k, v = item.split(":")
|
||||
except ValueError:
|
||||
continue
|
||||
if k.strip() == "color" and not v.strip() == "rgb(0, 0, 0)":
|
||||
preserve += "color:%s;" % v
|
||||
if k.strip() in ("font-weight", "font-style"):
|
||||
preserve += item + ";"
|
||||
if preserve:
|
||||
# preserve colour attribute, delete implicit class
|
||||
tag['style'] = preserve
|
||||
del tag['class']
|
||||
else:
|
||||
# strip completely
|
||||
tag.replaceWithChildren()
|
||||
for tag in doc("font", "Apple-style-span"):
|
||||
# strip all but colour attr from implicit font tags
|
||||
if 'color' in dict(tag.attrs):
|
||||
for attr in tag.attrs:
|
||||
if attr != "color":
|
||||
del tag[attr]
|
||||
# and apple class
|
||||
del tag['class']
|
||||
else:
|
||||
# remove completely
|
||||
tag.replaceWithChildren()
|
||||
# now images
|
||||
for tag in doc("img"):
|
||||
# turn file:/// links into relative ones
|
||||
try:
|
||||
if tag['src'].lower().startswith("file://"):
|
||||
tag['src'] = os.path.basename(tag['src'])
|
||||
except KeyError:
|
||||
# for some bizarre reason, mnemosyne removes src elements
|
||||
# from missing media
|
||||
pass
|
||||
# strip all other attributes, including implicit max-width
|
||||
for attr, val in tag.attrs:
|
||||
if attr != "src":
|
||||
del tag[attr]
|
||||
# strip superfluous elements
|
||||
for elem in "html", "head", "body", "meta":
|
||||
for tag in doc(elem):
|
||||
tag.replaceWithChildren()
|
||||
html = unicode(doc)
|
||||
return html
|
||||
|
||||
# caller is responsible for resetting note on reset
|
||||
class Editor(object):
|
||||
def __init__(self, mw, widget, parentWindow, addMode=False):
|
||||
|
@ -551,7 +480,7 @@ class Editor(object):
|
|||
def mungeHTML(self, txt):
|
||||
if txt == "<br>":
|
||||
txt = ""
|
||||
return _filterHTML(txt)
|
||||
return self._filterHTML(txt, localize=False)
|
||||
|
||||
# Setting/unsetting the current note
|
||||
######################################################################
|
||||
|
@ -824,24 +753,18 @@ to a cloze type first, via Edit>Change Note Type."""))
|
|||
self.web.eval("setFormat('inserthtml', %s);" % json.dumps(html))
|
||||
|
||||
def _addMedia(self, path, canDelete=False):
|
||||
"Add to media folder and return basename."
|
||||
"Add to media folder and return local img or sound tag."
|
||||
# copy to media folder
|
||||
name = self.mw.col.media.addFile(path)
|
||||
fname = self.mw.col.media.addFile(path)
|
||||
# remove original?
|
||||
if canDelete and self.mw.pm.profile['deleteMedia']:
|
||||
if os.path.abspath(name) != os.path.abspath(path):
|
||||
if os.path.abspath(fname) != os.path.abspath(path):
|
||||
try:
|
||||
os.unlink(path)
|
||||
except:
|
||||
pass
|
||||
# return a local html link
|
||||
ext = name.split(".")[-1].lower()
|
||||
if ext in pics:
|
||||
name = urllib.quote(name.encode("utf8"))
|
||||
return '<img src="%s">' % name
|
||||
else:
|
||||
anki.sound.play(name)
|
||||
return '[sound:%s]' % name
|
||||
return self.fnameToLink(fname)
|
||||
|
||||
def onRecSound(self):
|
||||
try:
|
||||
|
@ -853,6 +776,127 @@ to a cloze type first, via Edit>Change Note Type."""))
|
|||
return
|
||||
self.addMedia(file)
|
||||
|
||||
# Media downloads
|
||||
######################################################################
|
||||
|
||||
def urlToLink(self, url):
|
||||
fname = self.urlToFile(url)
|
||||
if not fname:
|
||||
return ""
|
||||
return self.fnameToLink(fname)
|
||||
|
||||
def fnameToLink(self, fname):
|
||||
ext = fname.split(".")[-1].lower()
|
||||
if ext in pics:
|
||||
name = urllib.quote(fname.encode("utf8"))
|
||||
return '<img src="%s">' % name
|
||||
else:
|
||||
anki.sound.play(fname)
|
||||
return '[sound:%s]' % fname
|
||||
|
||||
def urlToFile(self, url):
|
||||
l = url.lower()
|
||||
for suffix in pics+audio:
|
||||
if l.endswith(suffix):
|
||||
return self._retrieveURL(url)
|
||||
# not a supported type; return link verbatim
|
||||
return
|
||||
|
||||
def _retrieveURL(self, url):
|
||||
"Download file into media folder and return local filename or None."
|
||||
# urllib is picky with local file links
|
||||
if url.lower().startswith("file://"):
|
||||
url = url.replace("%", "%25")
|
||||
url = url.replace("#", "%23")
|
||||
# fetch it into a temporary folder
|
||||
self.mw.progress.start(
|
||||
immediate=True, parent=self.parentWindow)
|
||||
try:
|
||||
req = urllib2.Request(url, None, {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; Anki)'})
|
||||
filecontents = urllib2.urlopen(req).read()
|
||||
except urllib2.URLError, e:
|
||||
showWarning(_("An error occurred while opening %s") % e)
|
||||
return
|
||||
finally:
|
||||
self.mw.progress.finish()
|
||||
path = unicode(urllib2.unquote(url.encode("utf8")), "utf8")
|
||||
return self.mw.col.media.writeData(path, filecontents)
|
||||
|
||||
# HTML filtering
|
||||
######################################################################
|
||||
|
||||
def _filterHTML(self, html, localize=False):
|
||||
doc = BeautifulSoup(html)
|
||||
# remove implicit regular font style from outermost element
|
||||
if doc.span:
|
||||
try:
|
||||
attrs = doc.span['style'].split(";")
|
||||
except (KeyError, TypeError):
|
||||
attrs = []
|
||||
if attrs:
|
||||
new = []
|
||||
for attr in attrs:
|
||||
sattr = attr.strip()
|
||||
if sattr and sattr not in ("font-style: normal", "font-weight: normal"):
|
||||
new.append(sattr)
|
||||
doc.span['style'] = ";".join(new)
|
||||
# filter out implicit formatting from webkit
|
||||
for tag in doc("span", "Apple-style-span"):
|
||||
preserve = ""
|
||||
for item in tag['style'].split(";"):
|
||||
try:
|
||||
k, v = item.split(":")
|
||||
except ValueError:
|
||||
continue
|
||||
if k.strip() == "color" and not v.strip() == "rgb(0, 0, 0)":
|
||||
preserve += "color:%s;" % v
|
||||
if k.strip() in ("font-weight", "font-style"):
|
||||
preserve += item + ";"
|
||||
if preserve:
|
||||
# preserve colour attribute, delete implicit class
|
||||
tag['style'] = preserve
|
||||
del tag['class']
|
||||
else:
|
||||
# strip completely
|
||||
tag.replaceWithChildren()
|
||||
for tag in doc("font", "Apple-style-span"):
|
||||
# strip all but colour attr from implicit font tags
|
||||
if 'color' in dict(tag.attrs):
|
||||
for attr in tag.attrs:
|
||||
if attr != "color":
|
||||
del tag[attr]
|
||||
# and apple class
|
||||
del tag['class']
|
||||
else:
|
||||
# remove completely
|
||||
tag.replaceWithChildren()
|
||||
# now images
|
||||
for tag in doc("img"):
|
||||
# turn file:/// links into relative ones
|
||||
try:
|
||||
if tag['src'].lower().startswith("file://"):
|
||||
tag['src'] = os.path.basename(tag['src'])
|
||||
if localize:
|
||||
# convert remote image links to local ones
|
||||
fname = self.urlToFile(tag['src'])
|
||||
if fname:
|
||||
tag['src'] = fname
|
||||
except KeyError:
|
||||
# for some bizarre reason, mnemosyne removes src elements
|
||||
# from missing media
|
||||
pass
|
||||
# strip all other attributes, including implicit max-width
|
||||
for attr, val in tag.attrs:
|
||||
if attr != "src":
|
||||
del tag[attr]
|
||||
# strip superfluous elements
|
||||
for elem in "html", "head", "body", "meta":
|
||||
for tag in doc(elem):
|
||||
tag.replaceWithChildren()
|
||||
html = unicode(doc)
|
||||
return html
|
||||
|
||||
# Advanced menu
|
||||
######################################################################
|
||||
|
||||
|
@ -922,7 +966,6 @@ class EditorWebView(AnkiWebView):
|
|||
def __init__(self, parent, editor):
|
||||
AnkiWebView.__init__(self)
|
||||
self.editor = editor
|
||||
self.errtxt = _("An error occured while opening %s")
|
||||
self.strip = self.editor.mw.pm.profile['stripHTML']
|
||||
|
||||
def keyPressEvent(self, evt):
|
||||
|
@ -981,7 +1024,7 @@ class EditorWebView(AnkiWebView):
|
|||
if evt.source():
|
||||
if oldmime.hasHtml():
|
||||
mime = QMimeData()
|
||||
mime.setHtml(_filterHTML(oldmime.html()))
|
||||
mime.setHtml(self.editor._filterHTML(oldmime.html()))
|
||||
else:
|
||||
# old qt on linux won't give us html when dragging an image;
|
||||
# in that case just do the default action (which is to ignore
|
||||
|
@ -1001,12 +1044,6 @@ class EditorWebView(AnkiWebView):
|
|||
def prepareClip(self, mode=QClipboard.Clipboard):
|
||||
clip = self.editor.mw.app.clipboard()
|
||||
mime = clip.mimeData(mode=mode)
|
||||
if mime.hasHtml() and mime.html().startswith("<!--anki-->"):
|
||||
# pasting from another field, filter extraneous webkit formatting
|
||||
html = mime.html()[11:]
|
||||
html = _filterHTML(html)
|
||||
mime.setHtml(html)
|
||||
return
|
||||
self.saveClip(mode=mode)
|
||||
mime = self._processMime(mime)
|
||||
clip.setMimeData(mime, mode=mode)
|
||||
|
@ -1037,17 +1074,14 @@ class EditorWebView(AnkiWebView):
|
|||
# print "html", mime.html()
|
||||
# print "urls", mime.urls()
|
||||
# print "text", mime.text()
|
||||
if mime.hasUrls():
|
||||
return self._processUrls(mime)
|
||||
elif mime.hasText() and (self.strip or not mime.hasHtml()):
|
||||
if mime.hasHtml():
|
||||
return self._processHtml(mime)
|
||||
elif mime.hasText():
|
||||
return self._processText(mime)
|
||||
# we currently aren't able to extract images from html, so we prioritize
|
||||
# images over html in cases where we have both. this is a hack until
|
||||
# issue 92 is implemented
|
||||
elif mime.hasUrls():
|
||||
return self._processUrls(mime)
|
||||
elif mime.hasImage():
|
||||
return self._processImage(mime)
|
||||
elif mime.hasHtml():
|
||||
return self._processHtml(mime)
|
||||
else:
|
||||
# nothing
|
||||
return QMimeData()
|
||||
|
@ -1056,20 +1090,12 @@ class EditorWebView(AnkiWebView):
|
|||
url = mime.urls()[0].toString()
|
||||
# chrome likes to give us the URL twice with a \n
|
||||
url = url.splitlines()[0]
|
||||
link = self._localizedMediaLink(url)
|
||||
mime = QMimeData()
|
||||
link = self.editor.urlToLink(url)
|
||||
if link:
|
||||
mime.setHtml(link)
|
||||
return mime
|
||||
|
||||
def _localizedMediaLink(self, url):
|
||||
l = url.lower()
|
||||
for suffix in pics+audio:
|
||||
if l.endswith(suffix):
|
||||
return self._retrieveURL(url)
|
||||
# not a supported type; return link verbatim
|
||||
return url
|
||||
|
||||
def _processText(self, mime):
|
||||
txt = unicode(mime.text())
|
||||
l = txt.lower()
|
||||
|
@ -1077,12 +1103,7 @@ class EditorWebView(AnkiWebView):
|
|||
# if the user is pasting an image or sound link, convert it to local
|
||||
if l.startswith("http://") or l.startswith("https://") or l.startswith("file://"):
|
||||
txt = txt.split("\r\n")[0]
|
||||
html = self._localizedMediaLink(txt)
|
||||
if not html:
|
||||
return QMimeData()
|
||||
if html == txt:
|
||||
# wasn't of a supported media type; don't change
|
||||
html = None
|
||||
html = self.editor.urlToLink(txt)
|
||||
new = QMimeData()
|
||||
if html:
|
||||
new.setHtml(html)
|
||||
|
@ -1092,13 +1113,28 @@ class EditorWebView(AnkiWebView):
|
|||
|
||||
def _processHtml(self, mime):
|
||||
html = mime.html()
|
||||
if self.strip:
|
||||
html = stripHTML(html)
|
||||
newMime = QMimeData()
|
||||
if self.strip and not html.startswith("<!--anki-->"):
|
||||
# special case for google images: if after stripping there's no text
|
||||
# and there are image links, we'll paste those as html instead
|
||||
if not stripHTML(html).strip():
|
||||
newHtml = ""
|
||||
mid = self.editor.note.mid
|
||||
for url in self.editor.mw.col.media.filesInStr(
|
||||
mid, html, includeRemote=True):
|
||||
newHtml += self.editor.urlToLink(url)
|
||||
newMime.setHtml(newHtml)
|
||||
else:
|
||||
# use .text() if available so newlines are preserved; otherwise strip
|
||||
if mime.hasText():
|
||||
return self._processText(mime)
|
||||
else:
|
||||
newMime.setText(stripHTML(mime.text()))
|
||||
else:
|
||||
html = _filterHTML(html)
|
||||
mime = QMimeData()
|
||||
mime.setHtml(html)
|
||||
return mime
|
||||
# no stripping
|
||||
html = self.editor._filterHTML(html, localize=True)
|
||||
newMime.setHtml(html)
|
||||
return newMime
|
||||
|
||||
def _processImage(self, mime):
|
||||
im = QImage(mime.imageData())
|
||||
|
@ -1116,35 +1152,6 @@ class EditorWebView(AnkiWebView):
|
|||
mime.setHtml(self.editor._addMedia(uname+ext))
|
||||
return mime
|
||||
|
||||
def _retrieveURL(self, url):
|
||||
# is it media?
|
||||
ext = url.split(".")[-1].lower()
|
||||
if ext not in pics and ext not in audio:
|
||||
return
|
||||
if url.lower().startswith("file://"):
|
||||
url = url.replace("%", "%25")
|
||||
url = url.replace("#", "%23")
|
||||
# fetch it into a temporary folder
|
||||
self.editor.mw.progress.start(
|
||||
immediate=True, parent=self.editor.parentWindow)
|
||||
try:
|
||||
req = urllib2.Request(url, None, {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; Anki)'})
|
||||
filecontents = urllib2.urlopen(req).read()
|
||||
except urllib2.URLError, e:
|
||||
showWarning(self.errtxt % e)
|
||||
return
|
||||
finally:
|
||||
self.editor.mw.progress.finish()
|
||||
path = unicode(urllib2.unquote(url.encode("utf8")), "utf8")
|
||||
for badChar in "#%\"":
|
||||
path = path.replace(badChar, "")
|
||||
path = namedtmp(os.path.basename(path))
|
||||
file = open(path, "wb")
|
||||
file.write(filecontents)
|
||||
file.close()
|
||||
return self.editor._addMedia(path)
|
||||
|
||||
def _flagAnkiText(self):
|
||||
# add a comment in the clipboard html so we can tell text is copied
|
||||
# from us and doesn't need to be stripped
|
||||
|
|
Loading…
Reference in a new issue