From a68af5f1999c9772dc390c86559b57bf95d07542 Mon Sep 17 00:00:00 2001 From: Roman Franchuk Date: Sun, 17 Apr 2016 15:49:28 +0300 Subject: [PATCH] Deal with Internationalized Domain Name in URI This fixes https://anki.tenderapp.com/discussions/ankidesktop/17132-anki-reports-an-error-when-pasting-an-image-from-non-ascii-url The problem is that urllib2 can't work with IDN. However, it's perfectly valid to have such URIs in HTML, and Firefox, when copying image, generates exactly this kind of HTML. --- aqt/editor.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/aqt/editor.py b/aqt/editor.py index 8ad50f7ce..d758c7903 100644 --- a/aqt/editor.py +++ b/aqt/editor.py @@ -7,6 +7,8 @@ import urllib2 import ctypes import urllib +from urlparse import urlparse, urlunparse + from anki.lang import _ from aqt.qt import * from anki.utils import stripHTML, isWin, isMac, namedtmp, json, stripHTMLMedia @@ -837,6 +839,12 @@ to a cloze type first, via Edit>Change Note Type.""")) self.mw.progress.start( immediate=True, parent=self.parentWindow) try: + # urllib2 doesn't work properly with IRI + # The following code translates IRI to standard URI + scheme, netloc, path, params, query, fragment = urlparse(url) + idna_netloc = urllib2.unquote(netloc.encode("ascii")).decode("utf-8").encode("idna") + url = urlunparse([scheme, idna_netloc, path, params, query, fragment]) + req = urllib2.Request(url, None, { 'User-Agent': 'Mozilla/5.0 (compatible; Anki)'}) filecontents = urllib2.urlopen(req).read()