replace nbsp with standard space when stripping html

fixes https://anki.tenderapp.com/discussions/ankidesktop/1952-duplicate-not-detected-on-paste
This commit is contained in:
Damien Elmes 2013-07-11 21:23:13 +09:00
parent ffff271362
commit 2ba8c41fec
2 changed files with 3 additions and 3 deletions

View file

@ -60,9 +60,6 @@ def _imgLink(col, latex, model):
def _latexFromHtml(col, latex): def _latexFromHtml(col, latex):
"Convert entities and fix newlines." "Convert entities and fix newlines."
# entitydefs defines nbsp as \xa0 instead of a standard space, so we
# replace it first
latex = latex.replace(" ", " ")
latex = re.sub("<br( /)?>|<div>", "\n", latex) latex = re.sub("<br( /)?>|<div>", "\n", latex)
latex = re.sub("<.+?>", "", latex) latex = re.sub("<.+?>", "", latex)
latex = stripHTML(latex) latex = stripHTML(latex)

View file

@ -146,6 +146,9 @@ def minimizeHTML(s):
return s return s
def entsToTxt(html): def entsToTxt(html):
# entitydefs defines nbsp as \xa0 instead of a standard space, so we
# replace it first
html = html.replace("&nbsp;", " ")
def fixup(m): def fixup(m):
text = m.group(0) text = m.group(0)
if text[:2] == "&#": if text[:2] == "&#":