Revert "stop (un)escaping media filenames"

This was flawed - while non-Latin text is now acceptable
in an IRI, we still need to be concerned with reserved characters
such as spaces, and Anki unfortunately has been storing the filenames
in unencoded form in the DB, meaning we must encode them at display
time. We won't be able to move away from this until existing notes
are rewritten, and it will probably require breaking compatibility with
older clients.

https://en.wikipedia.org/wiki/Internationalized_Resource_Identifier

This reverts commit 14110add55.
This commit is contained in:
Damien Elmes 2021-07-16 10:37:59 +10:00
parent 33afe8aa32
commit e97c381a6f
5 changed files with 44 additions and 9 deletions

View file

@ -8,10 +8,12 @@ import pprint
import re
import sys
import time
from typing import Any, Callable, List, Optional, Tuple
import urllib.error
import urllib.parse
import urllib.request
from typing import Any, Callable, List, Match, Optional, Tuple
from anki import media_pb2
from anki._legacy import deprecated
from anki.consts import *
from anki.latex import render_latex, render_latex_returning_errors
from anki.models import NotetypeId
@ -183,12 +185,27 @@ class MediaManager:
txt = re.sub(reg, "", txt)
return txt
@deprecated(info="no longer required")
def escapeImages(self, string: str, unescape: bool = False) -> str:
return string
"escape_media_filenames alias for compatibility with add-ons."
return self.escape_media_filenames(string, unescape)
@deprecated(info="no longer required")
def escape_media_filenames(self, string: str, unescape: bool = False) -> str:
"Apply or remove percent encoding to filenames in html tags (audio, image, object)."
fn: Callable
if unescape:
fn = urllib.parse.unquote
else:
fn = urllib.parse.quote
def repl(match: Match) -> str:
tag = match.group(0)
fname = match.group("fname")
if re.match("(https?|ftp)://", fname):
return tag
return tag.replace(fname, fn(fname))
for reg in self.html_media_regexps:
string = re.sub(reg, repl, string)
return string
# Checking media

View file

@ -49,6 +49,10 @@ def test_strings():
assert sp("aoeu") == "aoeu"
assert sp("aoeu[sound:foo.mp3]aoeu") == "aoeuaoeu"
assert sp("a<img src=yo>oeu") == "aoeu"
es = col.media.escape_media_filenames
assert es("aoeu") == "aoeu"
assert es("<img src='http://foo.com'>") == "<img src='http://foo.com'>"
assert es('<img src="foo bar.jpg">') == '<img src="foo%20bar.jpg">'
def test_deckIntegration():

View file

@ -434,7 +434,10 @@ $editorToolbar.then(({{ toolbar }}) => toolbar.appendGroup({{
if not self.note:
return
data = self.note.items()
data = [
(fld, self.mw.col.media.escape_media_filenames(val))
for fld, val in self.note.items()
]
self.widget.show()
self.updateTags()
@ -584,9 +587,13 @@ $editorToolbar.then(({{ toolbar }}) => toolbar.appendGroup({{
if html.find(">") > -1:
# filter html through beautifulsoup so we can strip out things like a
# leading </div>
html_escaped = self.mw.col.media.escape_media_filenames(html)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
html = str(BeautifulSoup(html, "html.parser"))
html_escaped = str(BeautifulSoup(html_escaped, "html.parser"))
html = self.mw.col.media.escape_media_filenames(
html_escaped, unescape=True
)
self.note.fields[field] = html
if not self.addMode:
self._save_current_note()
@ -1260,9 +1267,15 @@ def remove_null_bytes(txt: str, editor: Editor) -> str:
return txt.replace("\x00", "")
def reverse_url_quoting(txt: str, editor: Editor) -> str:
# reverse the url quoting we added to get images to display
return editor.mw.col.media.escape_media_filenames(txt, unescape=True)
gui_hooks.editor_will_use_font_for_field.append(fontMungeHack)
gui_hooks.editor_will_munge_html.append(munge_html)
gui_hooks.editor_will_munge_html.append(remove_null_bytes)
gui_hooks.editor_will_munge_html.append(reverse_url_quoting)
def set_cloze_button(editor: Editor) -> None:

View file

@ -484,6 +484,7 @@ class AnkiQt(QMainWindow):
return aqt.sound.av_refs_to_play_icons(text)
def prepare_card_text_for_display(self, text: str) -> str:
text = self.col.media.escape_media_filenames(text)
text = self._add_play_buttons(text)
return text

View file

@ -34,7 +34,6 @@ from PyQt5.QtWidgets import (
)
import aqt
from anki._legacy import deprecated
from anki.collection import Collection
from anki.lang import TR, tr_legacyglobal # pylint: disable=unused-import
from anki.utils import invalidFilename, isMac, isWin, noBundledLibs, versionWithBuild
@ -680,8 +679,9 @@ def restore_combo_history(comboBox: QComboBox, name: str) -> List[str]:
return history
@deprecated(info="use mw.prepare_card_text_for_display()")
def mungeQA(col: Collection, txt: str) -> str:
print("mungeQA() deprecated; use mw.prepare_card_text_for_display()")
txt = col.media.escape_media_filenames(txt)
return txt