drag&drop support, and paste refactor

- insert resulting html directly into fields instead of relying on fragile clipboard rewriting - catch paste events in js and send them back to python so we cover all possible ways of pasting - use checksum in pasted image filenames instead of .cacheKey(), which was not unique across runs - try each _processX() in turn instead of having special cases inside them - rewrite the HTML filtering: - img rewriting and stripping of certain tags handled in bs4; other processing handled in js for easier sharing with other clients - use a whitelist of HTML tags and properties instead of focusing mainly on webkit cruft - don't run filter when focus lost, as it no longer seems to be necessary - the "strip html" option is currently ignored
2026-01-09 12:03:56 -05:00 · 2016-12-15 18:14:47 +10:00 · 2016-12-15 18:14:47 +10:00 · 9b6a21126d
commit 9b6a21126d
parent 3859d8706b
1 changed files with 181 additions and 202 deletions
--- a/aqt/editor.py
+++ b/aqt/editor.py
@ -7,10 +7,12 @@ import urllib.request, urllib.error, urllib.parse
 import ctypes
 import urllib.request, urllib.parse, urllib.error
 import warnings
+import html

 from anki.lang import _
 from aqt.qt import *
-from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia
+from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia, \
+    checksum
 import anki.sound
 from anki.hooks import runHook, runFilter
 from aqt.sound import getAudio
@ -171,6 +173,11 @@ function onDragOver(elem) {
    dropTarget = elem;
 }

+function onPaste(elem) {
+    pycmd("paste");
+    window.event.preventDefault();
+}
+
 function caretToEnd() {
    var r = document.createRange()
    r.selectNodeContents(currentField);
@ -249,7 +256,7 @@ function setFields(fields, focusTo) {
        txt += "<tr><td class=fname>{0}</td></tr><tr><td width=100%%>".format(n);
        txt += "<div id=f{0} onkeydown='onKey();' oninput='checkForEmptyField()' onmouseup='onKey();'".format(i);
        txt += " onfocus='onFocus(this);' onblur='onBlur();' class=field ";
-        txt += "ondragover='onDragOver(this);' ";
+        txt += "ondragover='onDragOver(this);' onpaste='onPaste(this);' ";
        txt += "contentEditable=true class=field>{0}</div>".format(f);
        txt += "</td></tr>";
    }
@ -285,6 +292,80 @@ function hideDupes() {
    $("#dupes").hide();
 }

+var pasteHTML = function(html, internal) {
+    if (!internal) {
+        html = filterHTML(html);
+    }
+    setFormat("inserthtml", html);
+};
+
+var filterHTML = function(html) {
+    // wrap it in <top> as we aren't allowed to change top level elements
+    var top = $.parseHTML("<ankitop>" + html + "</ankitop>")[0];
+    filterNode(top);
+    var outHtml = top.innerHTML;
+    // get rid of nbsp
+    outHtml = outHtml.replace(/&nbsp;/ig, " ");
+    //console.log(`input html: ${html}`);
+    //console.log(`outpt html: ${outHtml}`);
+    return outHtml;
+};
+
+var allowedTags = {};
+
+for (let tag of [
+    "H1", "H2", "H3", "P", "DIV", "BR", "LI", "UL", "OL",
+    "B", "I", "U", "BLOCKQUOTE", "CODE", "EM", "STRONG",
+    "PRE", "SUB", "SUP", "TABLE"]) {
+    allowedTags[tag] = {"attrs": []};
+}
+
+allowedTags["A"] = {"attrs": ["HREF"]};
+allowedTags["TR"] = {"attrs": ["ROWSPAN"]};
+allowedTags["TD"] = {"attrs": ["COLSPAN", "ROWSPAN"]};
+allowedTags["TH"] = {"attrs": ["COLSPAN", "ROWSPAN"]};
+allowedTags["IMG"] = {"attrs": ["SRC"]};
+
+var filterNode = function(node) {
+    // if it's a text node, nothing to do
+    if (node.nodeType == 3) {
+        return;
+    }
+
+    // descend first, and take a copy of the child nodes as the loop will skip
+    // elements due to node modifications otherwise
+
+    var nodes = [];
+    for (let i=0; i<node.childNodes.length; i++) {
+        nodes.push(node.childNodes[i]);
+    }
+    for (let i=0; i<nodes.length; i++) {
+        filterNode(nodes[i]);
+    }
+
+    if (node.tagName == "ANKITOP") {
+        return;
+    }
+
+    const tag = allowedTags[node.tagName];
+    if (!tag) {
+        node.outerHTML = node.innerHTML;
+    } else {
+        // allowed, filter out attributes
+        var toRemove = [];
+        for (let i = 0; i < node.attributes.length; i++) {
+            let attr = node.attributes[i];
+            let attrName = attr.name.toUpperCase();
+            if (tag.attrs.indexOf(attrName) == -1) {
+                toRemove.push(attr)
+            }
+        }
+        for (let i = 0; i < toRemove.length; i++) {
+            node.removeAttributeNode(toRemove[i]);
+        }
+    }
+};
+
 var mouseDown = 0;

 $(function () {
@ -525,7 +606,7 @@ class Editor(object):
    def mungeHTML(self, txt):
        if txt == "<br>":
            txt = ""
-        return self._filterHTML(txt, localize=False)
+        return txt

    # Setting/unsetting the current note
    ######################################################################
@ -870,63 +951,27 @@ to a cloze type first, via Edit>Change Note Type."""))
        path = urllib.parse.unquote(url)
        return self.mw.col.media.writeData(path, filecontents)

-    # HTML filtering
+    # Paste/drag&drop
    ######################################################################

-    def _filterHTML(self, html, localize=False):
+    removeTags = ["script", "iframe", "object", "style"]
+
+    def _pastePreFilter(self, html):
        with warnings.catch_warnings() as w:
            warnings.simplefilter('ignore', UserWarning)
            doc = BeautifulSoup(html, "html.parser")
-        # remove implicit regular font style from outermost element
-        if doc.span:
-            try:
-                attrs = doc.span['style'].split(";")
-            except (KeyError, TypeError):
-                attrs = []
-            if attrs:
-                new = []
-                for attr in attrs:
-                    sattr = attr.strip()
-                    if sattr and sattr not in ("font-style: normal", "font-weight: normal"):
-                        new.append(sattr)
-                doc.span['style'] = ";".join(new)
-            # filter out implicit formatting from webkit
-        for tag in doc("span", "Apple-style-span"):
-            preserve = ""
-            for item in tag['style'].split(";"):
-                try:
-                    k, v = item.split(":")
-                except ValueError:
-                    continue
-                if k.strip() == "color" and not v.strip() == "rgb(0, 0, 0)":
-                    preserve += "color:%s;" % v
-                if k.strip() in ("font-weight", "font-style"):
-                    preserve += item + ";"
-            if preserve:
-                # preserve colour attribute, delete implicit class
-                tag['style'] = preserve
-                del tag['class']
-            else:
-                # strip completely
-                tag.replaceWithChildren()
-        for tag in doc("font", "Apple-style-span"):
-            # strip all but colour attr from implicit font tags
-            if 'color' in dict(tag.attrs):
-                for attr in tag.attrs:
-                    if attr != "color":
-                        del tag[attr]
-                    # and apple class
-                del tag['class']
-            else:
-                # remove completely
-                tag.replaceWithChildren()
-            # now images
+
+        for tag in self.removeTags:
+            for node in doc(tag):
+                node.decompose()
+
+        # convert p tags to divs
+        for node in doc("p"):
+            node.name = "div"
+
        for tag in doc("img"):
-            # turn file:/// links into relative ones
            try:
-                if tag['src'].lower().startswith("file://"):
-                    tag['src'] = os.path.basename(tag['src'])
-                if localize and self.isURL(tag['src']):
+                if self.isURL(tag['src']):
                    # convert remote image links to local ones
                    fname = self.urlToFile(tag['src'])
                    if fname:
@ -935,17 +980,23 @@ to a cloze type first, via Edit>Change Note Type."""))
                # for some bizarre reason, mnemosyne removes src elements
                # from missing media
                pass
-                # strip all other attributes, including implicit max-width
-            for attr, val in tag.attrs.items():
-                if attr != "src":
-                    del tag[attr]
-            # strip superfluous elements
-        for elem in "html", "head", "body", "meta":
-            for tag in doc(elem):
-                tag.replaceWithChildren()
+
        html = str(doc)
        return html

+    def doPaste(self, html, internal):
+        if not internal:
+            html = self._pastePreFilter(html)
+        self.web.eval("pasteHTML(%s);" % json.dumps(html))
+
+    def doDrop(self, html, internal):
+        self.web.evalWithCallback("dropTarget.focus();",
+                                  lambda _: self.doPaste(html, internal))
+        self.web.setFocus()
+
+    def onPaste(self):
+        self.web.onPaste()
+
    # Advanced menu
    ######################################################################

@ -992,14 +1043,12 @@ to a cloze type first, via Edit>Change Note Type."""))
        record=onRecSound,
        more=onAdvanced,
        dupes=showDupes,
+        paste=onPaste,
    )

 # Pasting, drag & drop, and keyboard layouts
 ######################################################################

-# fixme: drag & drop
-# fixme: middle click to paste
-
 class EditorWebView(AnkiWebView):

    def __init__(self, parent, editor):
@ -1017,170 +1066,100 @@ class EditorWebView(AnkiWebView):
        self._flagAnkiText()

    def onPaste(self):
-        mime = self.mungeClip()
-        self.triggerPageAction(QWebEnginePage.Paste)
-        self.restoreClip()
+        mime = self.editor.mw.app.clipboard().mimeData(mode=QClipboard.Clipboard)
+        html, internal = self._processMime(mime)
+        if not html:
+            return
+        self.editor.doPaste(html, internal)

-    # def mouseReleaseEvent(self, evt):
-    #     if not isMac and not isWin and evt.button() == Qt.MidButton:
-    #         # middle click on x11; munge the clipboard before standard
-    #         # handling
-    #         mime = self.mungeClip(mode=QClipboard.Selection)
-    #         AnkiWebView.mouseReleaseEvent(self, evt)
-    #         self.restoreClip(mode=QClipboard.Selection)
-    #     else:
-    #         AnkiWebView.mouseReleaseEvent(self, evt)
-    #
-    # def dropEvent(self, evt):
-    #     oldmime = evt.mimeData()
-    #     # coming from this program?
-    #     if evt.source():
-    #         if oldmime.hasHtml():
-    #             mime = QMimeData()
-    #             mime.setHtml(self.editor._filterHTML(oldmime.html()))
-    #         else:
-    #             # old qt on linux won't give us html when dragging an image;
-    #             # in that case just do the default action (which is to ignore
-    #             # the drag)
-    #             return AnkiWebView.dropEvent(self, evt)
-    #     else:
-    #         mime = self._processMime(oldmime)
-    #     # create a new event with the new mime data and run it
-    #     new = QDropEvent(evt.pos(), evt.possibleActions(), mime,
-    #                      evt.mouseButtons(), evt.keyboardModifiers())
-    #     evt.accept()
-    #     AnkiWebView.dropEvent(self, new)
-    #     # tell the drop target to take focus so the drop contents are saved
-    #     self.eval("dropTarget.focus();")
-    #     self.setFocus()
+    def dropEvent(self, evt):
+        mime = evt.mimeData()

-    def mungeClip(self, mode=QClipboard.Clipboard):
-        clip = self.editor.mw.app.clipboard()
-        mime = clip.mimeData(mode=mode)
-        self.saveClip(mode=mode)
-        mime = self._processMime(mime)
-        clip.setMimeData(mime, mode=mode)
-        return mime
-
-    def restoreClip(self, mode=QClipboard.Clipboard):
-        clip = self.editor.mw.app.clipboard()
-        clip.setMimeData(self.savedClip, mode=mode)
-
-    def saveClip(self, mode):
-        # we don't own the clipboard object, so we need to copy it or we'll crash
-        mime = self.editor.mw.app.clipboard().mimeData(mode=mode)
-        n = QMimeData()
-        if mime.hasText():
-            n.setText(mime.text())
-        if mime.hasHtml():
-            n.setHtml(mime.html())
-        if mime.hasUrls():
-            n.setUrls(mime.urls())
-        if mime.hasImage():
-            n.setImageData(mime.imageData())
-        self.savedClip = n
-
-    def _processMime(self, mime):
-        # print "html=%s image=%s urls=%s txt=%s" % (
-        #     mime.hasHtml(), mime.hasImage(), mime.hasUrls(), mime.hasText())
-        # print "html", mime.html()
-        # print "urls", mime.urls()
-        # print "text", mime.text()
-        if mime.hasHtml():
-            return self._processHtml(mime)
-        elif mime.hasUrls():
-            return self._processUrls(mime)
-        elif mime.hasText():
-            return self._processText(mime)
-        elif mime.hasImage():
-            return self._processImage(mime)
+        if evt.source() and mime.hasHtml():
+            # don't filter html from other fields
+            html, internal = mime.html(), True
        else:
-            # nothing
-            return QMimeData()
+            html, internal = self._processMime(mime)
+
+        if not html:
+            return
+
+        self.editor.doDrop(html, internal)
+
+    # returns (html, isInternal)
+    def _processMime(self, mime):
+        print("html=%s image=%s urls=%s txt=%s" % (
+            mime.hasHtml(), mime.hasImage(), mime.hasUrls(), mime.hasText()))
+        print("html", mime.html())
+        print("urls", mime.urls())
+        print("text", mime.text())
+
+        # try various content types in turn
+        html, internal = self._processHtml(mime)
+        if html:
+            return html, internal
+        for fn in (self._processUrls, self._processImage, self._processText):
+            html = fn(mime)
+            if html:
+                return html, False
+        return "", False

-    # when user is dragging a file from a file manager on any platform, the
-    # url type should be set, and it is not URL-encoded. on a mac no text type
-    # is returned, and on windows the text type is not returned in cases like
-    # "foo's bar.jpg"
    def _processUrls(self, mime):
+        if not mime.hasUrls():
+            return
+
        url = mime.urls()[0].toString()
        # chrome likes to give us the URL twice with a \n
        url = url.splitlines()[0]
-        newmime = QMimeData()
-        link = self.editor.urlToLink(url)
-        if link:
-            newmime.setHtml(link)
-        elif mime.hasImage():
-            # if we couldn't convert the url to a link and there's an
-            # image on the clipboard (such as copy&paste from
-            # google images in safari), use that instead
-            return self._processImage(mime)
-        else:
-            newmime.setText(url)
-        return newmime
+        return self.editor.urlToLink(url)

-    # if the user has used 'copy link location' in the browser, the clipboard
-    # will contain the URL as text, and no URLs or HTML. the URL will already
-    # be URL-encoded, and shouldn't be a file:// url unless they're browsing
-    # locally, which we don't support
    def _processText(self, mime):
-        txt = str(mime.text())
-        html = None
+        if not mime.hasText():
+            return
+
+        txt = mime.text()
+
        # if the user is pasting an image or sound link, convert it to local
        if self.editor.isURL(txt):
            txt = txt.split("\r\n")[0]
-            html = self.editor.urlToLink(txt)
-        new = QMimeData()
-        if html:
-            new.setHtml(html)
-        else:
-            new.setText(txt)
-        return new
+            return self.editor.urlToLink(txt)
+
+        # normal text; convert it to HTML
+        return html.escape(txt)

    def _processHtml(self, mime):
+        if not mime.hasHtml():
+            return None, False
        html = mime.html()
-        newMime = QMimeData()
-        if self.strip and not html.startswith("<!--anki-->"):
-            # special case for google images: if after stripping there's no text
-            # and there are image links, we'll paste those as html instead
-            if not stripHTML(html).strip():
-                newHtml = ""
-                mid = self.editor.note.mid
-                for url in self.editor.mw.col.media.filesInStr(
-                    mid, html, includeRemote=True):
-                    newHtml += self.editor.urlToLink(url)
-                if not newHtml and mime.hasImage():
-                    return self._processImage(mime)
-                newMime.setHtml(newHtml)
-            else:
-                # use .text() if available so newlines are preserved; otherwise strip
-                if mime.hasText():
-                    return self._processText(mime)
-                else:
-                    newMime.setText(stripHTML(mime.text()))
-        else:
-            if html.startswith("<!--anki-->"):
-                html = html[11:]
-            # no html stripping
-            html = self.editor._filterHTML(html, localize=True)
-            newMime.setHtml(html)
-        return newMime
+
+        # no filtering required for internal pastes
+        if html.startswith("<!--anki-->"):
+            return html[11:], True
+
+        return html, False

    def _processImage(self, mime):
        im = QImage(mime.imageData())
-        uname = namedtmp("paste-%d" % im.cacheKey())
+        uname = namedtmp("paste")
        if self.editor.mw.pm.profile.get("pastePNG", False):
            ext = ".png"
            im.save(uname+ext, None, 50)
        else:
            ext = ".jpg"
            im.save(uname+ext, None, 80)
+
        # invalid image?
-        if not os.path.exists(uname+ext):
-            return QMimeData()
-        mime = QMimeData()
-        mime.setHtml(self.editor._addMedia(uname+ext))
-        return mime
+        path = uname+ext
+        if not os.path.exists(path):
+            return
+
+        # hash and rename
+        csum = checksum(open(path, "rb").read())
+        newpath = "{}-{}{}".format(uname, csum, ext)
+        os.rename(path, newpath)
+
+        # add to media and return resulting html link
+        return self.editor._addMedia(newpath)

    def _flagAnkiText(self):
        # add a comment in the clipboard html so we can tell text is copied