Anki/pylib/anki/lang.py
Abdo f94d05bcbe
Switch to Ruff (#4119)
* Add check:ruff build action

* Add fix:ruff action

* Add Ruff config

Mostly generated by Cursor

* Handle rest of lints

* Fix formatting

* Replace black and isort with ruff-format

* Run ruff-format

* Fix lint errors

* Remove pylint disables

* Remove .pylintrc

* Update docs

* Fix check:format not just checking

* Fix isort rule being ignored

* Sort imports

* Ensure ./ninja format also handles import sorting

* Remove unused isort cfg

* Enable unsafe fixes in fix:ruff, and enable unused var warning

* Re-run on config change; enable unnecessary ARG ignores

* Use all pycodestyle errors, and add some more commented-out ones

Latter logged on https://github.com/ankitects/anki/issues/4135
2025-06-29 14:38:35 +07:00

253 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
from __future__ import annotations
import locale
import re
import warnings
import weakref
from typing import TYPE_CHECKING, Any
import anki
import anki._backend
import anki.i18n_pb2 as _pb
from anki._legacy import DeprecatedNamesMixinForModule
# public exports
TR = anki._fluent.LegacyTranslationEnum
FormatTimeSpan = _pb.FormatTimespanRequest
langs = sorted(
[
("Afrikaans", "af_ZA"),
("Bahasa Melayu", "ms_MY"),
("Català", "ca_ES"),
("Dansk", "da_DK"),
("Deutsch", "de_DE"),
("Eesti", "et_EE"),
("English (United States)", "en_US"),
("English (United Kingdom)", "en_GB"),
("Español", "es_ES"),
("Esperanto", "eo_UY"),
("Euskara", "eu_ES"),
("Français", "fr_FR"),
("Galego", "gl_ES"),
("Hrvatski", "hr_HR"),
("Italiano", "it_IT"),
("lo jbobau", "jbo_EN"),
("Lenga d'òc", "oc_FR"),
("Magyar", "hu_HU"),
("Nederlands", "nl_NL"),
("Norsk", "nb_NO"),
("Polski", "pl_PL"),
("Português Brasileiro", "pt_BR"),
("Português", "pt_PT"),
("Română", "ro_RO"),
("Slovenčina", "sk_SK"),
("Slovenščina", "sl_SI"),
("Suomi", "fi_FI"),
("Svenska", "sv_SE"),
("Tiếng Việt", "vi_VN"),
("Türkçe", "tr_TR"),
("简体中文", "zh_CN"),
("日本語", "ja_JP"),
("繁體中文", "zh_TW"),
("한국어", "ko_KR"),
("Čeština", "cs_CZ"),
("Ελληνικά", "el_GR"),
("Български", "bg_BG"),
("Монгол хэл", "mn_MN"),
("Pусский язык", "ru_RU"),
("Српски", "sr_SP"),
("Українська мова", "uk_UA"),
("Հայերեն", "hy_AM"),
("עִבְרִית", "he_IL"),
("العربية", "ar_SA"),
("فارسی", "fa_IR"),
("ภาษาไทย", "th_TH"),
("Latin", "la_LA"),
("Gaeilge", "ga_IE"),
("Беларуская мова", "be_BY"),
("ଓଡ଼ିଆ", "or_OR"),
("Filipino", "tl"),
("ئۇيغۇر", "ug"),
]
)
# compatibility with old versions
compatMap = {
"af": "af_ZA",
"ar": "ar_SA",
"be": "be_BY",
"bg": "bg_BG",
"ca": "ca_ES",
"cs": "cs_CZ",
"da": "da_DK",
"de": "de_DE",
"el": "el_GR",
"en": "en_US",
"eo": "eo_UY",
"es": "es_ES",
"et": "et_EE",
"eu": "eu_ES",
"fa": "fa_IR",
"fi": "fi_FI",
"fr": "fr_FR",
"gl": "gl_ES",
"he": "he_IL",
"hr": "hr_HR",
"hu": "hu_HU",
"hy": "hy_AM",
"it": "it_IT",
"ja": "ja_JP",
"jbo": "jbo_EN",
"ko": "ko_KR",
"la": "la_LA",
"mn": "mn_MN",
"ms": "ms_MY",
"nl": "nl_NL",
"nb": "nb_NL",
"no": "nb_NL",
"oc": "oc_FR",
"or": "or_OR",
"pl": "pl_PL",
"pt": "pt_PT",
"ro": "ro_RO",
"ru": "ru_RU",
"sk": "sk_SK",
"sl": "sl_SI",
"sr": "sr_SP",
"sv": "sv_SE",
"th": "th_TH",
"tr": "tr_TR",
"uk": "uk_UA",
"vi": "vi_VN",
}
def lang_to_disk_lang(lang: str) -> str:
"""Normalize lang, then convert it to name used on disk."""
# convert it into our canonical representation first
lang = lang.replace("-", "_")
if lang in compatMap:
lang = compatMap[lang]
# these language/region combinations are fully qualified, but with a hyphen
if lang in (
"en_GB",
"ga_IE",
"hy_AM",
"nb_NO",
"nn_NO",
"pt_BR",
"pt_PT",
"sv_SE",
"zh_CN",
"zh_TW",
):
return lang.replace("_", "-")
# other languages have the region portion stripped
match = re.match("(.*)_", lang)
if match:
return match.group(1)
else:
return lang
# the currently set interface language
current_lang = "en"
# the current Fluent translation instance. Code in pylib/ should
# not reference this, and should use col.tr instead. The global
# instance exists for legacy reasons, and as a convenience for the
# Qt code.
current_i18n: anki._backend.RustBackend | None = None
tr_legacyglobal = anki._backend.Translations(None)
def _(str: str) -> str:
print(f"gettext _() is deprecated: {str}")
return str
def ngettext(single: str, plural: str, num: int) -> str:
print(f"ngettext() is deprecated: {plural}")
return plural
def set_lang(lang: str) -> None:
global current_lang, current_i18n
current_lang = lang
current_i18n = anki._backend.RustBackend(langs=[lang])
tr_legacyglobal.backend = weakref.ref(current_i18n)
def get_def_lang(user_lang: str | None = None) -> tuple[int, str]:
"""Return user_lang converted to name used on disk and its index, defaulting to system language
or English if not available."""
def get_index_of_language(wanted_locale: str) -> int | None:
for i, (_, locale_) in enumerate(langs):
if locale_ == wanted_locale:
return i
return None
try:
# getdefaultlocale() is deprecated since Python 3.11, but we need to keep using it as getlocale() behaves differently: https://bugs.python.org/issue38805
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
(sys_lang, enc) = locale.getdefaultlocale()
except AttributeError:
# this will return a different format on Windows (e.g. Italian_Italy), resulting in us falling back to en_US
# further below
(sys_lang, enc) = locale.getlocale()
except Exception:
# fails on osx
sys_lang = "en_US"
if user_lang in compatMap:
user_lang = compatMap[user_lang]
idx = None
lang = None
for preferred_lang in (user_lang, sys_lang):
idx = get_index_of_language(preferred_lang)
is_language_supported = idx is not None
if is_language_supported:
assert preferred_lang is not None
lang = preferred_lang
break
# if the specified language and the system language aren't available, revert to english
is_preferred_language_supported = idx is not None
if not is_preferred_language_supported:
lang = "en_US"
idx = get_index_of_language(lang)
is_english_supported = idx is not None
if not is_english_supported:
raise AssertionError("English is supposed to be a supported language.")
assert idx is not None and lang is not None
return (idx, lang)
def is_rtl(lang: str) -> bool:
return lang in ("he", "ar", "fa", "ug")
# strip off unicode isolation markers from a translated string
# for testing purposes
def without_unicode_isolation(string: str) -> str:
return string.replace("\u2068", "").replace("\u2069", "")
def with_collapsed_whitespace(string: str) -> str:
return re.sub(r"\s+", " ", string)
_deprecated_names = DeprecatedNamesMixinForModule(globals())
if not TYPE_CHECKING:
def __getattr__(name: str) -> Any:
return _deprecated_names.__getattr__(name)