Replace unic-ucd-category with icu_properties

This commit is contained in:
Abdo 2025-12-31 08:48:27 +03:00
parent b3dfba5e2d
commit faefbdcbf9
5 changed files with 44 additions and 82 deletions

109
Cargo.lock generated
View file

@ -115,6 +115,8 @@ dependencies = [
"hex",
"htmlescape",
"hyper 1.7.0",
"icu_properties",
"icu_properties_data",
"id_tree",
"inflections",
"itertools 0.14.0",
@ -153,7 +155,6 @@ dependencies = [
"tracing",
"tracing-appender",
"tracing-subscriber",
"unic-ucd-category",
"unicase",
"unicode-normalization",
"windows 0.61.3",
@ -3191,9 +3192,9 @@ dependencies = [
[[package]]
name = "icu_collections"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
dependencies = [
"displaydoc",
"potential_utf",
@ -3204,9 +3205,9 @@ dependencies = [
[[package]]
name = "icu_locale_core"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
dependencies = [
"displaydoc",
"litemap",
@ -3217,11 +3218,10 @@ dependencies = [
[[package]]
name = "icu_normalizer"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
@ -3232,42 +3232,38 @@ dependencies = [
[[package]]
name = "icu_normalizer_data"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
[[package]]
name = "icu_properties"
version = "2.0.1"
version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locale_core",
"icu_properties_data",
"icu_provider",
"potential_utf",
"zerotrie",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "2.0.1"
version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
[[package]]
name = "icu_provider"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
dependencies = [
"displaydoc",
"icu_locale_core",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke 0.8.0",
"zerofrom",
@ -3856,12 +3852,6 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matches"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
[[package]]
name = "matchit"
version = "0.8.4"
@ -5798,10 +5788,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
version = "1.0.219"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
@ -5837,10 +5828,19 @@ dependencies = [
]
[[package]]
name = "serde_derive"
version = "1.0.219"
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
@ -6835,27 +6835,6 @@ dependencies = [
"yoke 0.7.5",
]
[[package]]
name = "unic-char-property"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
dependencies = [
"unic-char-range",
]
[[package]]
name = "unic-char-range"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
[[package]]
name = "unic-common"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
[[package]]
name = "unic-langid"
version = "0.9.6"
@ -6899,27 +6878,6 @@ dependencies = [
"unic-langid-impl",
]
[[package]]
name = "unic-ucd-category"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8d4591f5fcfe1bd4453baaf803c40e1b1e69ff8455c47620440b46efef91c0"
dependencies = [
"matches",
"unic-char-property",
"unic-char-range",
"unic-ucd-version",
]
[[package]]
name = "unic-ucd-version"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
dependencies = [
"unic-common",
]
[[package]]
name = "unicase"
version = "2.6.0"
@ -7962,9 +7920,9 @@ dependencies = [
[[package]]
name = "writeable"
version = "0.6.1"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
[[package]]
name = "xattr"
@ -8020,7 +7978,6 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive 0.8.0",
"zerofrom",

View file

@ -136,7 +136,8 @@ tracing = { version = "0.1.41", features = ["max_level_trace", "release_max_leve
tracing-appender = "0.2.3"
tracing-subscriber = { version = "0.3.20", features = ["fmt", "env-filter"] }
unic-langid = { version = "0.9.6", features = ["macros"] }
unic-ucd-category = "0.9.0"
icu_properties_data = "2.1.2"
icu_properties = "2.1.2"
unicode-normalization = "0.1.24"
walkdir = "2.5.0"
which = "8.0.0"

View file

@ -102,7 +102,8 @@ tower-http.workspace = true
tracing.workspace = true
tracing-appender.workspace = true
tracing-subscriber.workspace = true
unic-ucd-category.workspace = true
icu_properties_data.workspace = true
icu_properties.workspace = true
unicase.workspace = true
unicode-normalization.workspace = true
zip.workspace = true

View file

@ -18,11 +18,12 @@ use anki_io::write_file;
use anki_io::FileIoError;
use anki_io::FileIoSnafu;
use anki_io::FileOp;
use icu_properties::props::EnumeratedProperty;
use icu_properties::props::GeneralCategory;
use regex::Regex;
use sha1::Digest;
use sha1::Sha1;
use tracing::debug;
use unic_ucd_category::GeneralCategory;
use unicode_normalization::is_nfc;
use unicode_normalization::UnicodeNormalization;
@ -76,7 +77,7 @@ fn disallowed_char(char: char) -> bool {
'[' | ']' | '<' | '>' | ':' | '"' | '/' | '?' | '*' | '^' | '\\' | '|' => true,
c if c.is_ascii_control() => true,
// Macs do not allow invalid Unicode characters like 05F8 to be in a filename.
c if GeneralCategory::of(c) == GeneralCategory::Unassigned => true,
c if GeneralCategory::for_char(c) == GeneralCategory::Unassigned => true,
_ => false,
}
}

View file

@ -5,8 +5,10 @@ use std::borrow::Cow;
use std::sync::LazyLock;
use difflib::sequencematcher::SequenceMatcher;
use icu_properties::props::EnumeratedProperty;
use icu_properties::props::GeneralCategory;
use icu_properties::props::GeneralCategoryGroup;
use regex::Regex;
use unic_ucd_category::GeneralCategory;
use unicode_normalization::char::is_combining_mark;
use unicode_normalization::UnicodeNormalization;
@ -140,7 +142,7 @@ fn isolate_leading_mark(text: &str) -> Cow<'_, str> {
if text
.chars()
.next()
.is_some_and(|c| GeneralCategory::of(c).is_mark())
.is_some_and(|c| GeneralCategoryGroup::Mark.contains(GeneralCategory::for_char(c)))
{
Cow::Owned(format!("\u{a0}{text}"))
} else {