mirror of
https://github.com/ankitects/anki.git
synced 2025-11-10 14:47:12 -05:00
dump po data into json to speed up invocations
This commit is contained in:
parent
43f5d13ed6
commit
57f9ca787b
3 changed files with 122 additions and 85 deletions
1
qt/po/.gitignore
vendored
1
qt/po/.gitignore
vendored
|
|
@ -1,3 +1,4 @@
|
||||||
.build
|
.build
|
||||||
repo
|
repo
|
||||||
ftl
|
ftl
|
||||||
|
strings.json
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,9 @@ import re
|
||||||
import sys
|
import sys
|
||||||
import polib
|
import polib
|
||||||
from fluent.syntax import parse, serialize
|
from fluent.syntax import parse, serialize
|
||||||
from fluent.syntax.ast import Message, TextElement, Identifier, Pattern
|
from fluent.syntax.ast import Message, TextElement, Identifier, Pattern, Junk
|
||||||
|
|
||||||
# extract a translated string from the gettext catalogs and insert it into ftl
|
# extract a translated string from strings.json and insert it into ftl
|
||||||
# eg:
|
# eg:
|
||||||
# $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "Delete Unused Media" ""
|
# $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "Delete Unused Media" ""
|
||||||
# $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "%(a)s %(b)s" "%(a)s=$val1,%(b)s=$val2"
|
# $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "%(a)s %(b)s" "%(a)s=$val1,%(b)s=$val2"
|
||||||
|
|
@ -16,7 +16,7 @@ from fluent.syntax.ast import Message, TextElement, Identifier, Pattern
|
||||||
# NOTE: the English text is written into the templates folder of the repo, so must be copied
|
# NOTE: the English text is written into the templates folder of the repo, so must be copied
|
||||||
# into Anki's source tree
|
# into Anki's source tree
|
||||||
|
|
||||||
ftl_filename, key, msgid, repls = sys.argv[1:]
|
ftl_filename, key, msgid_substring, repls = sys.argv[1:]
|
||||||
|
|
||||||
# split up replacements
|
# split up replacements
|
||||||
replacements = []
|
replacements = []
|
||||||
|
|
@ -29,82 +29,49 @@ for repl in repls.split(","):
|
||||||
prefix = os.path.splitext(os.path.basename(ftl_filename))[0]
|
prefix = os.path.splitext(os.path.basename(ftl_filename))[0]
|
||||||
key = f"{prefix}-{key}"
|
key = f"{prefix}-{key}"
|
||||||
|
|
||||||
# returns a string, an array of plurals, or None if there's no translation
|
strings = json.load(open("strings.json", "r"))
|
||||||
def get_msgstr(entry):
|
|
||||||
# non-empty single string?
|
|
||||||
if entry.msgstr:
|
|
||||||
msg = entry.msgstr
|
|
||||||
if replacements:
|
|
||||||
for (old, new) in replacements:
|
|
||||||
msg = msg.replace(old, f"{{{new}}}")
|
|
||||||
return msg.strip()
|
|
||||||
# plural string and non-empty?
|
|
||||||
elif entry.msgstr_plural and entry.msgstr_plural[0]:
|
|
||||||
# convert the dict into a list in the correct order
|
|
||||||
plurals = list(entry.msgstr_plural.items())
|
|
||||||
plurals.sort()
|
|
||||||
# update variables and discard keys
|
|
||||||
adjusted = []
|
|
||||||
for _k, msg in plurals:
|
|
||||||
if replacements:
|
|
||||||
for (old, new) in replacements:
|
|
||||||
msg = msg.replace(old, f"{{{new}}}")
|
|
||||||
adjusted.append(msg.strip())
|
|
||||||
if len(adjusted) > 1 and adjusted[0]:
|
|
||||||
return adjusted
|
|
||||||
else:
|
|
||||||
if adjusted[0]:
|
|
||||||
return adjusted[0]
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
msgids = []
|
||||||
|
if msgid_substring in strings["en"]:
|
||||||
|
# is the ID an exact match?
|
||||||
|
msgids.append(msgid_substring)
|
||||||
|
else:
|
||||||
|
for id in strings["en"].keys():
|
||||||
|
if msgid_substring in id:
|
||||||
|
msgids.append(id)
|
||||||
|
|
||||||
# start by checking the .pot file for the message
|
msgid = None
|
||||||
base = "repo/desktop"
|
if len(msgids) == 0:
|
||||||
pot = os.path.join(base, "anki.pot")
|
|
||||||
pot_cat = polib.pofile(pot)
|
|
||||||
catalogs = []
|
|
||||||
|
|
||||||
# is the ID an exact match?
|
|
||||||
resolved_entry = None
|
|
||||||
for entry in pot_cat:
|
|
||||||
if entry.msgid == msgid:
|
|
||||||
resolved_entry = entry
|
|
||||||
|
|
||||||
# try a substring match, but make sure it doesn't match
|
|
||||||
# multiple items
|
|
||||||
if resolved_entry is None:
|
|
||||||
for entry in pot_cat:
|
|
||||||
if msgid in entry.msgid:
|
|
||||||
if resolved_entry is not None:
|
|
||||||
print("aborting, matched both", resolved_entry.msgid)
|
|
||||||
print("and", entry.msgid)
|
|
||||||
sys.exit(1)
|
|
||||||
resolved_entry = entry
|
|
||||||
|
|
||||||
if resolved_entry is None:
|
|
||||||
print("no IDs matched")
|
print("no IDs matched")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
elif len(msgids) == 1:
|
||||||
|
msgid = msgids[0]
|
||||||
|
else:
|
||||||
|
for c, id in enumerate(msgids):
|
||||||
|
print(f"* {c}: {id}")
|
||||||
|
msgid = msgids[int(input("number to use? "))]
|
||||||
|
|
||||||
msgid = resolved_entry.msgid
|
def transform_entry(entry):
|
||||||
print("matched id", msgid)
|
if isinstance(entry, str):
|
||||||
|
return(transform_string(entry))
|
||||||
|
else:
|
||||||
|
return [transform_string(e) for e in entry]
|
||||||
|
|
||||||
print("loading translations...")
|
def transform_string(msg):
|
||||||
# load the translations from each language
|
for (old, new) in replacements:
|
||||||
langs = [d for d in os.listdir(base) if d != "anki.pot"]
|
msg = msg.replace(old, f"{{{new}}}")
|
||||||
for lang in langs:
|
# strip leading/trailing whitespace
|
||||||
po_path = os.path.join(base, lang, "anki.po")
|
return msg.strip()
|
||||||
cat = polib.pofile(po_path)
|
|
||||||
catalogs.append((lang, cat))
|
|
||||||
|
|
||||||
to_insert = []
|
to_insert = []
|
||||||
for (lang, cat) in catalogs:
|
for lang in strings.keys():
|
||||||
for entry in cat:
|
entry = strings[lang].get(msgid)
|
||||||
if entry.msgid == msgid:
|
if not entry:
|
||||||
translation = get_msgstr(entry)
|
continue
|
||||||
if translation:
|
entry = transform_entry(entry)
|
||||||
print(f"{lang} had translation {translation}")
|
if entry:
|
||||||
to_insert.append((lang, translation))
|
print(f"{lang} had translation {entry}")
|
||||||
break
|
to_insert.append((lang, entry))
|
||||||
|
|
||||||
plurals = json.load(open("plurals.json"))
|
plurals = json.load(open("plurals.json"))
|
||||||
|
|
||||||
|
|
@ -140,9 +107,22 @@ def add_simple_message(fname, key, message):
|
||||||
orig = open(fname).read()
|
orig = open(fname).read()
|
||||||
|
|
||||||
obj = parse(orig)
|
obj = parse(orig)
|
||||||
|
for ent in obj.body:
|
||||||
|
if isinstance(ent, Junk):
|
||||||
|
raise Exception(f"file had junk! {fname} {ent}")
|
||||||
obj.body.append(Message(Identifier(key), Pattern([TextElement(message)])))
|
obj.body.append(Message(Identifier(key), Pattern([TextElement(message)])))
|
||||||
|
|
||||||
modified = serialize(obj)
|
modified = serialize(obj, with_junk=True)
|
||||||
|
# escape leading dots
|
||||||
|
modified = re.sub(r"(?ms)^( +)\.", "\\1{\".\"}", modified)
|
||||||
|
|
||||||
|
# ensure the resulting serialized file is valid by parsing again
|
||||||
|
obj = parse(modified)
|
||||||
|
for ent in obj.body:
|
||||||
|
if isinstance(ent, Junk):
|
||||||
|
raise Exception(f"introduced junk! {fname} {ent}")
|
||||||
|
|
||||||
|
# it's ok, write it out
|
||||||
open(fname, "w").write(modified)
|
open(fname, "w").write(modified)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -158,16 +138,13 @@ def add_message(fname, key, translation):
|
||||||
print()
|
print()
|
||||||
input("proceed? ctrl+c to abort")
|
input("proceed? ctrl+c to abort")
|
||||||
|
|
||||||
# add template first
|
# for each language's translation
|
||||||
if resolved_entry.msgid_plural:
|
|
||||||
original = [resolved_entry.msgid, resolved_entry.msgid_plural]
|
|
||||||
else:
|
|
||||||
original = resolved_entry.msgid
|
|
||||||
|
|
||||||
add_message(ftl_filename, key, original)
|
|
||||||
|
|
||||||
# the each language's translation
|
|
||||||
for lang, translation in to_insert:
|
for lang, translation in to_insert:
|
||||||
|
if lang == "en":
|
||||||
|
# template
|
||||||
|
ftl_path = ftl_filename
|
||||||
|
else:
|
||||||
|
# translation
|
||||||
ftl_path = ftl_filename.replace("templates", lang)
|
ftl_path = ftl_filename.replace("templates", lang)
|
||||||
ftl_dir = os.path.dirname(ftl_path)
|
ftl_dir = os.path.dirname(ftl_path)
|
||||||
|
|
||||||
|
|
|
||||||
59
qt/po/scripts/extract-po-strings.py
Normal file
59
qt/po/scripts/extract-po-strings.py
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import polib
|
||||||
|
|
||||||
|
# Read strings from all .po and .pot files and store them in a JSON file
|
||||||
|
# for quick access.
|
||||||
|
|
||||||
|
# returns a string, an array of plurals, or None if there's no translation
|
||||||
|
def get_msgstr(entry):
|
||||||
|
# non-empty single string?
|
||||||
|
if entry.msgstr:
|
||||||
|
return entry.msgstr
|
||||||
|
# plural string and non-empty?
|
||||||
|
elif entry.msgstr_plural and entry.msgstr_plural[0]:
|
||||||
|
# convert the dict into a list in the correct order
|
||||||
|
plurals = list(entry.msgstr_plural.items())
|
||||||
|
plurals.sort()
|
||||||
|
# update variables and discard keys
|
||||||
|
adjusted = []
|
||||||
|
for _k, msg in plurals:
|
||||||
|
assert msg
|
||||||
|
adjusted.append(msg)
|
||||||
|
if len(adjusted) > 1 and adjusted[0]:
|
||||||
|
return adjusted
|
||||||
|
else:
|
||||||
|
if adjusted[0]:
|
||||||
|
return adjusted[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
langs = {}
|
||||||
|
|
||||||
|
# .pot first
|
||||||
|
base = "repo/desktop"
|
||||||
|
pot = os.path.join(base, "anki.pot")
|
||||||
|
pot_cat = polib.pofile(pot)
|
||||||
|
|
||||||
|
for entry in pot_cat:
|
||||||
|
if entry.msgid_plural:
|
||||||
|
msgstr = [entry.msgid, entry.msgid_plural]
|
||||||
|
else:
|
||||||
|
msgstr = entry.msgid
|
||||||
|
langs.setdefault("en", {})[entry.msgid] = msgstr
|
||||||
|
|
||||||
|
# then .po files
|
||||||
|
folders = (d for d in os.listdir(base) if d != "anki.pot")
|
||||||
|
for lang in folders:
|
||||||
|
po_path = os.path.join(base, lang, "anki.po")
|
||||||
|
cat = polib.pofile(po_path)
|
||||||
|
for entry in cat:
|
||||||
|
msgstr = get_msgstr(entry)
|
||||||
|
if not msgstr:
|
||||||
|
continue
|
||||||
|
langs.setdefault(lang, {})[entry.msgid] = msgstr
|
||||||
|
|
||||||
|
open("strings.json", "w").write(json.dumps(langs))
|
||||||
|
print("wrote to strings.json")
|
||||||
Loading…
Reference in a new issue