mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 06:22:22 -04:00
ensure fields normalized before checksumming
https://forums.ankiweb.net/t/python-checksum-rust-checksum/8195
This commit is contained in:
parent
bd959731d7
commit
1ab085dfab
1 changed files with 5 additions and 1 deletions
|
@ -15,6 +15,7 @@ import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
import unicodedata
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from html.entities import name2codepoint
|
from html.entities import name2codepoint
|
||||||
|
@ -201,8 +202,11 @@ def checksum(data: Union[bytes, str]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def fieldChecksum(data: str) -> int:
|
def fieldChecksum(data: str) -> int:
|
||||||
|
without_html = stripHTMLMedia(data)
|
||||||
|
normalized = unicodedata.normalize("NFC", without_html)
|
||||||
|
utf8_text = normalized.encode("utf-8")
|
||||||
# 32 bit unsigned number from first 8 digits of sha1 hash
|
# 32 bit unsigned number from first 8 digits of sha1 hash
|
||||||
return int(checksum(stripHTMLMedia(data).encode("utf-8"))[:8], 16)
|
return int(checksum(utf8_text)[:8], 16)
|
||||||
|
|
||||||
|
|
||||||
# Temp files
|
# Temp files
|
||||||
|
|
Loading…
Reference in a new issue