ensure fields normalized before checksumming

https://forums.ankiweb.net/t/python-checksum-rust-checksum/8195
This commit is contained in:
Damien Elmes 2021-03-13 10:23:32 +10:00
parent bd959731d7
commit 1ab085dfab

View file

@ -15,6 +15,7 @@ import sys
import tempfile
import time
import traceback
import unicodedata
from contextlib import contextmanager
from hashlib import sha1
from html.entities import name2codepoint
@ -201,8 +202,11 @@ def checksum(data: Union[bytes, str]) -> str:
def fieldChecksum(data: str) -> int:
without_html = stripHTMLMedia(data)
normalized = unicodedata.normalize("NFC", without_html)
utf8_text = normalized.encode("utf-8")
# 32 bit unsigned number from first 8 digits of sha1 hash
return int(checksum(stripHTMLMedia(data).encode("utf-8"))[:8], 16)
return int(checksum(utf8_text)[:8], 16)
# Temp files