mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 14:02:21 -04:00
ensure fields normalized before checksumming
https://forums.ankiweb.net/t/python-checksum-rust-checksum/8195
This commit is contained in:
parent
bd959731d7
commit
1ab085dfab
1 changed files with 5 additions and 1 deletions
|
@ -15,6 +15,7 @@ import sys
|
|||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import unicodedata
|
||||
from contextlib import contextmanager
|
||||
from hashlib import sha1
|
||||
from html.entities import name2codepoint
|
||||
|
@ -201,8 +202,11 @@ def checksum(data: Union[bytes, str]) -> str:
|
|||
|
||||
|
||||
def fieldChecksum(data: str) -> int:
|
||||
without_html = stripHTMLMedia(data)
|
||||
normalized = unicodedata.normalize("NFC", without_html)
|
||||
utf8_text = normalized.encode("utf-8")
|
||||
# 32 bit unsigned number from first 8 digits of sha1 hash
|
||||
return int(checksum(stripHTMLMedia(data).encode("utf-8"))[:8], 16)
|
||||
return int(checksum(utf8_text)[:8], 16)
|
||||
|
||||
|
||||
# Temp files
|
||||
|
|
Loading…
Reference in a new issue