From dc9362d4edc41886fd90d9f17a0caaf8a4ad75c4 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Fri, 14 Feb 2020 14:51:07 +1000 Subject: [PATCH] add i18n support to Rust code using Fluent --- proto/backend.proto | 2 + rslib/Cargo.toml | 2 + rslib/src/backend.rs | 10 +- rslib/src/i18n/mod.rs | 269 ++++++++++++++++++++++++++++++++ rslib/src/lib.rs | 1 + rslib/src/media/check.rs | 15 +- rslib/tests/support/ja/test.ftl | 2 + rslib/tests/support/test.ftl | 7 + 8 files changed, 304 insertions(+), 4 deletions(-) create mode 100644 rslib/src/i18n/mod.rs create mode 100644 rslib/tests/support/ja/test.ftl create mode 100644 rslib/tests/support/test.ftl diff --git a/proto/backend.proto b/proto/backend.proto index b7d7aa582..2405a5aa5 100644 --- a/proto/backend.proto +++ b/proto/backend.proto @@ -8,6 +8,8 @@ message BackendInit { string collection_path = 1; string media_folder_path = 2; string media_db_path = 3; + repeated string preferred_langs = 4; + string locale_folder_path = 5; } // 1-15 reserved for future use; 2047 for errors diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index 9f5b18ceb..1ba92d746 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -30,6 +30,8 @@ serde_tuple = "0.4.0" coarsetime = "0.1.12" utime = "0.2.1" serde-aux = "0.6.1" +unic-langid = { version = "0.7.0", features = ["macros"] } +fluent = "0.9.1" [target.'cfg(target_vendor="apple")'.dependencies] rusqlite = { version = "0.21.0", features = ["trace"] } diff --git a/rslib/src/backend.rs b/rslib/src/backend.rs index cc2b4bcea..7df2cd6d7 100644 --- a/rslib/src/backend.rs +++ b/rslib/src/backend.rs @@ -5,6 +5,7 @@ use crate::backend_proto as pb; use crate::backend_proto::backend_input::Value; use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn}; use crate::err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind}; +use crate::i18n::I18n; use crate::latex::{extract_latex, ExtractedLatex}; use crate::media::check::MediaChecker; use crate::media::sync::MediaSyncProgress; @@ -28,6 +29,7 @@ pub struct Backend { media_folder: PathBuf, media_db: String, progress_callback: Option, + i18n: I18n, } enum Progress<'a> { @@ -102,10 +104,13 @@ pub fn init_backend(init_msg: &[u8]) -> std::result::Result { Err(_) => return Err("couldn't decode init request".into()), }; + let i18n = I18n::new(&input.preferred_langs, input.locale_folder_path); + match Backend::new( &input.collection_path, &input.media_folder_path, &input.media_db_path, + i18n, ) { Ok(backend) => Ok(backend), Err(e) => Err(format!("{:?}", e)), @@ -113,12 +118,13 @@ pub fn init_backend(init_msg: &[u8]) -> std::result::Result { } impl Backend { - pub fn new(col_path: &str, media_folder: &str, media_db: &str) -> Result { + pub fn new(col_path: &str, media_folder: &str, media_db: &str, i18n: I18n) -> Result { Ok(Backend { col_path: col_path.into(), media_folder: media_folder.into(), media_db: media_db.into(), progress_callback: None, + i18n, }) } @@ -354,7 +360,7 @@ impl Backend { |progress: usize| self.fire_progress_callback(Progress::MediaCheck(progress as u32)); let mgr = MediaManager::new(&self.media_folder, &self.media_db)?; - let mut checker = MediaChecker::new(&mgr, &self.col_path, callback); + let mut checker = MediaChecker::new(&mgr, &self.col_path, callback, &self.i18n); let output = checker.check()?; Ok(pb::MediaCheckOut { diff --git a/rslib/src/i18n/mod.rs b/rslib/src/i18n/mod.rs new file mode 100644 index 000000000..10e639bba --- /dev/null +++ b/rslib/src/i18n/mod.rs @@ -0,0 +1,269 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use fluent::{FluentArgs, FluentBundle, FluentResource}; +use log::error; +use std::borrow::Cow; +use std::fs; +use std::path::{Path, PathBuf}; +use unic_langid::LanguageIdentifier; + +pub use fluent::fluent_args as tr_args; + +/// All languages we (currently) support, excluding the fallback +/// English. +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum LanguageDialect { + Japanese, + ChineseMainland, + ChineseTaiwan, +} + +fn lang_dialect(lang: LanguageIdentifier) -> Option { + use LanguageDialect as L; + Some(match lang.get_language() { + "ja" => L::Japanese, + "zh" => match lang.get_region() { + Some("TW") => L::ChineseTaiwan, + _ => L::ChineseMainland, + }, + _ => return None, + }) +} + +fn dialect_file_locale(dialect: LanguageDialect) -> &'static str { + match dialect { + LanguageDialect::Japanese => "ja", + LanguageDialect::ChineseMainland => "zh", + LanguageDialect::ChineseTaiwan => todo!(), + } +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum TranslationFile { + Test, + MediaCheck, +} + +fn data_for_fallback(file: TranslationFile) -> String { + match file { + TranslationFile::MediaCheck => include_str!("media-check.ftl"), + TranslationFile::Test => include_str!("../../tests/support/test.ftl"), + } + .to_string() +} + +fn data_for_lang_and_file( + dialect: LanguageDialect, + file: TranslationFile, + locales: &Path, +) -> Option { + let path = locales.join(dialect_file_locale(dialect)).join(match file { + TranslationFile::MediaCheck => "media-check.ftl", + TranslationFile::Test => "test.ftl", + }); + fs::read_to_string(&path) + .map_err(|e| { + error!("Unable to read translation file: {:?}: {}", path, e); + }) + .ok() +} + +fn get_bundle( + text: String, + locales: &[LanguageIdentifier], +) -> Option> { + let res = FluentResource::try_new(text) + .map_err(|e| { + error!("Unable to parse translations file: {:?}", e); + }) + .ok()?; + + let mut bundle: FluentBundle = FluentBundle::new(locales); + bundle + .add_resource(res) + .map_err(|e| { + error!("Duplicate key detected in translation file: {:?}", e); + }) + .ok()?; + + Some(bundle) +} + +pub struct I18n { + // language identifiers, used for date/time rendering + langs: Vec, + // languages supported by us + supported: Vec, + + locale_folder: PathBuf, +} + +impl I18n { + pub fn new, P: Into>(locale_codes: &[S], locale_folder: P) -> Self { + let mut langs = vec![]; + let mut supported = vec![]; + for code in locale_codes { + if let Ok(ident) = code.as_ref().parse::() { + langs.push(ident.clone()); + if let Some(dialect) = lang_dialect(ident) { + supported.push(dialect) + } + } + } + // add fallback date/time + langs.push("en_US".parse().unwrap()); + + Self { + langs, + supported, + locale_folder: locale_folder.into(), + } + } + + pub fn get(&self, file: TranslationFile) -> I18nCategory { + I18nCategory::new(&*self.langs, &*self.supported, file, &self.locale_folder) + } +} + +pub struct I18nCategory { + // bundles in preferred language order, with fallback English as the + // last element + bundles: Vec>, +} + +impl I18nCategory { + pub fn new( + langs: &[LanguageIdentifier], + preferred: &[LanguageDialect], + file: TranslationFile, + locale_folder: &Path, + ) -> Self { + let mut bundles = Vec::with_capacity(preferred.len() + 1); + for dialect in preferred { + if let Some(text) = data_for_lang_and_file(*dialect, file, locale_folder) { + if let Some(mut bundle) = get_bundle(text, langs) { + if cfg!(test) { + bundle.set_use_isolating(false); + } + bundles.push(bundle); + } else { + error!("Failed to create bundle for {:?} {:?}", dialect, file); + } + } + } + + let mut fallback_bundle = get_bundle(data_for_fallback(file), langs).unwrap(); + if cfg!(test) { + fallback_bundle.set_use_isolating(false); + } + + bundles.push(fallback_bundle); + + Self { bundles } + } + + /// Get translation with zero arguments. + pub fn tr(&self, key: &str) -> Cow { + self.tr_(key, None) + } + + /// Get translation with one or more arguments. + pub fn trn(&self, key: &str, args: FluentArgs) -> Cow { + self.tr_(key, Some(args)) + } + + fn tr_<'a>(&'a self, key: &str, args: Option) -> Cow<'a, str> { + for bundle in &self.bundles { + let msg = match bundle.get_message(key) { + Some(msg) => msg, + // not translated in this bundle + None => continue, + }; + + let pat = match msg.value { + Some(val) => val, + // empty value + None => continue, + }; + + let mut errs = vec![]; + let out = bundle.format_pattern(pat, args.as_ref(), &mut errs); + if !errs.is_empty() { + error!("Error(s) in translation '{}': {:?}", key, errs); + } + // clone so we can discard args + return out.to_string().into(); + } + + format!("Missing translation key: {}", key).into() + } +} + +#[cfg(test)] +mod test { + use crate::i18n::{dialect_file_locale, lang_dialect, TranslationFile}; + use crate::i18n::{tr_args, I18n, LanguageDialect}; + use std::path::PathBuf; + use unic_langid::LanguageIdentifier; + + #[test] + fn dialect() { + use LanguageDialect as L; + let mut ident: LanguageIdentifier = "en-US".parse().unwrap(); + assert_eq!(lang_dialect(ident), None); + ident = "ja_JP".parse().unwrap(); + assert_eq!(lang_dialect(ident), Some(L::Japanese)); + ident = "zh".parse().unwrap(); + assert_eq!(lang_dialect(ident), Some(L::ChineseMainland)); + ident = "zh-TW".parse().unwrap(); + assert_eq!(lang_dialect(ident), Some(L::ChineseTaiwan)); + + assert_eq!(dialect_file_locale(L::Japanese), "ja"); + assert_eq!(dialect_file_locale(L::ChineseMainland), "zh"); + // assert_eq!(dialect_file_locale(L::Other), "templates"); + } + + #[test] + fn i18n() { + // English fallback + let i18n = I18n::new(&["zz"], "../../tests/support"); + let cat = i18n.get(TranslationFile::Test); + assert_eq!(cat.tr("valid-key"), "a valid key"); + assert_eq!( + cat.tr("invalid-key"), + "Missing translation key: invalid-key" + ); + + assert_eq!( + cat.trn("two-args-key", tr_args!["one"=>1, "two"=>"2"]), + "two args: 1 and 2" + ); + + // commented out to avoid scary warning during unit tests + // assert_eq!( + // cat.trn("two-args-key", tr_args!["one"=>"testing error reporting"]), + // "two args: testing error reporting and {$two}" + // ); + + assert_eq!(cat.trn("plural", tr_args!["hats"=>1]), "You have 1 hat."); + assert_eq!(cat.trn("plural", tr_args!["hats"=>3]), "You have 3 hats."); + + // Other language + let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + d.push("tests/support"); + let i18n = I18n::new(&["ja_JP"], d); + let cat = i18n.get(TranslationFile::Test); + assert_eq!(cat.tr("valid-key"), "キー"); + assert_eq!(cat.tr("only-in-english"), "not translated"); + assert_eq!( + cat.tr("invalid-key"), + "Missing translation key: invalid-key" + ); + + assert_eq!( + cat.trn("two-args-key", tr_args!["one"=>1, "two"=>"2"]), + "1と2" + ); + } +} diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index 62acc5dc2..936394e62 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -12,6 +12,7 @@ pub fn version() -> &'static str { pub mod backend; pub mod cloze; pub mod err; +pub mod i18n; pub mod latex; pub mod media; pub mod sched; diff --git a/rslib/src/media/check.rs b/rslib/src/media/check.rs index 064355d04..eeb651de6 100644 --- a/rslib/src/media/check.rs +++ b/rslib/src/media/check.rs @@ -3,6 +3,7 @@ use crate::cloze::expand_clozes_to_reveal_latex; use crate::err::{AnkiError, Result}; +use crate::i18n::I18n; use crate::latex::extract_latex; use crate::media::col::{ for_every_note, get_note_types, mark_collection_modified, open_or_create_collection_db, @@ -46,6 +47,7 @@ where progress_cb: P, checked: usize, progress_updated: Instant, + i18n: &'a I18n, } impl

MediaChecker<'_, P> @@ -56,6 +58,7 @@ where mgr: &'a MediaManager, col_path: &'a Path, progress_cb: P, + i18n: &'a I18n, ) -> MediaChecker<'a, P> { MediaChecker { mgr, @@ -63,6 +66,7 @@ where progress_cb, checked: 0, progress_updated: Instant::now(), + i18n, } } @@ -75,6 +79,8 @@ where let referenced_files = self.check_media_references(&folder_check.renamed)?; let (unused, missing) = find_unused_and_missing(folder_check.files, referenced_files); + let _ = self.i18n; + Ok(MediaCheckOutput { unused, missing, @@ -338,6 +344,7 @@ fn extract_latex_refs(note: &Note, seen_files: &mut HashSet, svg: bool) #[cfg(test)] mod test { use crate::err::Result; + use crate::i18n::I18n; use crate::media::check::{MediaCheckOutput, MediaChecker}; use crate::media::MediaManager; use std::fs; @@ -371,8 +378,10 @@ mod test { fs::write(&mgr.media_folder.join("foo[.jpg"), "foo")?; fs::write(&mgr.media_folder.join("_under.jpg"), "foo")?; + let i18n = I18n::new(&["zz"], "dummy"); + let progress = |_n| true; - let mut checker = MediaChecker::new(&mgr, &col_path, progress); + let mut checker = MediaChecker::new(&mgr, &col_path, progress, &i18n); let output = checker.check()?; assert_eq!( @@ -398,10 +407,12 @@ mod test { fn unicode_normalization() -> Result<()> { let (_dir, mgr, col_path) = common_setup()?; + let i18n = I18n::new(&["zz"], "dummy"); + fs::write(&mgr.media_folder.join("ぱぱ.jpg"), "nfd encoding")?; let progress = |_n| true; - let mut checker = MediaChecker::new(&mgr, &col_path, progress); + let mut checker = MediaChecker::new(&mgr, &col_path, progress, &i18n); let mut output = checker.check()?; output.missing.sort(); diff --git a/rslib/tests/support/ja/test.ftl b/rslib/tests/support/ja/test.ftl new file mode 100644 index 000000000..1d8a84ff1 --- /dev/null +++ b/rslib/tests/support/ja/test.ftl @@ -0,0 +1,2 @@ +valid-key = キー +two-args-key = {$one}と{$two} diff --git a/rslib/tests/support/test.ftl b/rslib/tests/support/test.ftl new file mode 100644 index 000000000..eb1867ab1 --- /dev/null +++ b/rslib/tests/support/test.ftl @@ -0,0 +1,7 @@ +valid-key = a valid key +only-in-english = not translated +two-args-key = two args: {$one} and {$two} +plural = You have {$hats -> + [one] 1 hat + *[other] {$hats} hats + }. \ No newline at end of file