Revert "ensure fields normalized before checksumming"

This reverts commit f4bd867b3b54b172125d2f2021c8c6a6e69c4c4d.
This commit is contained in:
Damien Elmes 2021-03-17 22:21:13 +10:00
parent f8b5210df9
commit 7472181aeb

View file

@ -15,7 +15,6 @@ import sys
import tempfile import tempfile
import time import time
import traceback import traceback
import unicodedata
from contextlib import contextmanager from contextlib import contextmanager
from hashlib import sha1 from hashlib import sha1
from html.entities import name2codepoint from html.entities import name2codepoint
@ -202,11 +201,8 @@ def checksum(data: Union[bytes, str]) -> str:
def fieldChecksum(data: str) -> int: def fieldChecksum(data: str) -> int:
without_html = stripHTMLMedia(data)
normalized = unicodedata.normalize("NFC", without_html)
utf8_text = normalized.encode("utf-8")
# 32 bit unsigned number from first 8 digits of sha1 hash # 32 bit unsigned number from first 8 digits of sha1 hash
return int(checksum(utf8_text)[:8], 16) return int(checksum(stripHTMLMedia(data).encode("utf-8"))[:8], 16)
# Temp files # Temp files