From e97c381a6ff3adf5df173fcd57d0d51b6c62ab5a Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Fri, 16 Jul 2021 10:37:59 +1000 Subject: [PATCH] Revert "stop (un)escaping media filenames" This was flawed - while non-Latin text is now acceptable in an IRI, we still need to be concerned with reserved characters such as spaces, and Anki unfortunately has been storing the filenames in unencoded form in the DB, meaning we must encode them at display time. We won't be able to move away from this until existing notes are rewritten, and it will probably require breaking compatibility with older clients. https://en.wikipedia.org/wiki/Internationalized_Resource_Identifier This reverts commit 14110add551dbb4b8b6857336513a16bb129cb06. --- pylib/anki/media.py | 27 ++++++++++++++++++++++----- pylib/tests/test_media.py | 4 ++++ qt/aqt/editor.py | 17 +++++++++++++++-- qt/aqt/main.py | 1 + qt/aqt/utils.py | 4 ++-- 5 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pylib/anki/media.py b/pylib/anki/media.py index df9097a59..29f67f047 100644 --- a/pylib/anki/media.py +++ b/pylib/anki/media.py @@ -8,10 +8,12 @@ import pprint import re import sys import time -from typing import Any, Callable, List, Optional, Tuple +import urllib.error +import urllib.parse +import urllib.request +from typing import Any, Callable, List, Match, Optional, Tuple from anki import media_pb2 -from anki._legacy import deprecated from anki.consts import * from anki.latex import render_latex, render_latex_returning_errors from anki.models import NotetypeId @@ -183,12 +185,27 @@ class MediaManager: txt = re.sub(reg, "", txt) return txt - @deprecated(info="no longer required") def escapeImages(self, string: str, unescape: bool = False) -> str: - return string + "escape_media_filenames alias for compatibility with add-ons." + return self.escape_media_filenames(string, unescape) - @deprecated(info="no longer required") def escape_media_filenames(self, string: str, unescape: bool = False) -> str: + "Apply or remove percent encoding to filenames in html tags (audio, image, object)." + fn: Callable + if unescape: + fn = urllib.parse.unquote + else: + fn = urllib.parse.quote + + def repl(match: Match) -> str: + tag = match.group(0) + fname = match.group("fname") + if re.match("(https?|ftp)://", fname): + return tag + return tag.replace(fname, fn(fname)) + + for reg in self.html_media_regexps: + string = re.sub(reg, repl, string) return string # Checking media diff --git a/pylib/tests/test_media.py b/pylib/tests/test_media.py index 73dd9bb41..30e2b8056 100644 --- a/pylib/tests/test_media.py +++ b/pylib/tests/test_media.py @@ -49,6 +49,10 @@ def test_strings(): assert sp("aoeu") == "aoeu" assert sp("aoeu[sound:foo.mp3]aoeu") == "aoeuaoeu" assert sp("aoeu") == "aoeu" + es = col.media.escape_media_filenames + assert es("aoeu") == "aoeu" + assert es("") == "" + assert es('') == '' def test_deckIntegration(): diff --git a/qt/aqt/editor.py b/qt/aqt/editor.py index 21764547d..052833748 100644 --- a/qt/aqt/editor.py +++ b/qt/aqt/editor.py @@ -434,7 +434,10 @@ $editorToolbar.then(({{ toolbar }}) => toolbar.appendGroup({{ if not self.note: return - data = self.note.items() + data = [ + (fld, self.mw.col.media.escape_media_filenames(val)) + for fld, val in self.note.items() + ] self.widget.show() self.updateTags() @@ -584,9 +587,13 @@ $editorToolbar.then(({{ toolbar }}) => toolbar.appendGroup({{ if html.find(">") > -1: # filter html through beautifulsoup so we can strip out things like a # leading + html_escaped = self.mw.col.media.escape_media_filenames(html) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - html = str(BeautifulSoup(html, "html.parser")) + html_escaped = str(BeautifulSoup(html_escaped, "html.parser")) + html = self.mw.col.media.escape_media_filenames( + html_escaped, unescape=True + ) self.note.fields[field] = html if not self.addMode: self._save_current_note() @@ -1260,9 +1267,15 @@ def remove_null_bytes(txt: str, editor: Editor) -> str: return txt.replace("\x00", "") +def reverse_url_quoting(txt: str, editor: Editor) -> str: + # reverse the url quoting we added to get images to display + return editor.mw.col.media.escape_media_filenames(txt, unescape=True) + + gui_hooks.editor_will_use_font_for_field.append(fontMungeHack) gui_hooks.editor_will_munge_html.append(munge_html) gui_hooks.editor_will_munge_html.append(remove_null_bytes) +gui_hooks.editor_will_munge_html.append(reverse_url_quoting) def set_cloze_button(editor: Editor) -> None: diff --git a/qt/aqt/main.py b/qt/aqt/main.py index c224a18cb..cdf78b842 100644 --- a/qt/aqt/main.py +++ b/qt/aqt/main.py @@ -484,6 +484,7 @@ class AnkiQt(QMainWindow): return aqt.sound.av_refs_to_play_icons(text) def prepare_card_text_for_display(self, text: str) -> str: + text = self.col.media.escape_media_filenames(text) text = self._add_play_buttons(text) return text diff --git a/qt/aqt/utils.py b/qt/aqt/utils.py index e44958c25..c6a983408 100644 --- a/qt/aqt/utils.py +++ b/qt/aqt/utils.py @@ -34,7 +34,6 @@ from PyQt5.QtWidgets import ( ) import aqt -from anki._legacy import deprecated from anki.collection import Collection from anki.lang import TR, tr_legacyglobal # pylint: disable=unused-import from anki.utils import invalidFilename, isMac, isWin, noBundledLibs, versionWithBuild @@ -680,8 +679,9 @@ def restore_combo_history(comboBox: QComboBox, name: str) -> List[str]: return history -@deprecated(info="use mw.prepare_card_text_for_display()") def mungeQA(col: Collection, txt: str) -> str: + print("mungeQA() deprecated; use mw.prepare_card_text_for_display()") + txt = col.media.escape_media_filenames(txt) return txt