dump po data into json to speed up invocations

This commit is contained in:
Damien Elmes 2020-02-27 20:37:24 +10:00
parent 43f5d13ed6
commit 57f9ca787b
3 changed files with 122 additions and 85 deletions

1
qt/po/.gitignore vendored
View file

@ -1,3 +1,4 @@
.build .build
repo repo
ftl ftl
strings.json

View file

@ -6,9 +6,9 @@ import re
import sys import sys
import polib import polib
from fluent.syntax import parse, serialize from fluent.syntax import parse, serialize
from fluent.syntax.ast import Message, TextElement, Identifier, Pattern from fluent.syntax.ast import Message, TextElement, Identifier, Pattern, Junk
# extract a translated string from the gettext catalogs and insert it into ftl # extract a translated string from strings.json and insert it into ftl
# eg: # eg:
# $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "Delete Unused Media" "" # $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "Delete Unused Media" ""
# $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "%(a)s %(b)s" "%(a)s=$val1,%(b)s=$val2" # $ python extract-po-string.py /path/to/templates/media-check.ftl delete-unused "%(a)s %(b)s" "%(a)s=$val1,%(b)s=$val2"
@ -16,7 +16,7 @@ from fluent.syntax.ast import Message, TextElement, Identifier, Pattern
# NOTE: the English text is written into the templates folder of the repo, so must be copied # NOTE: the English text is written into the templates folder of the repo, so must be copied
# into Anki's source tree # into Anki's source tree
ftl_filename, key, msgid, repls = sys.argv[1:] ftl_filename, key, msgid_substring, repls = sys.argv[1:]
# split up replacements # split up replacements
replacements = [] replacements = []
@ -29,82 +29,49 @@ for repl in repls.split(","):
prefix = os.path.splitext(os.path.basename(ftl_filename))[0] prefix = os.path.splitext(os.path.basename(ftl_filename))[0]
key = f"{prefix}-{key}" key = f"{prefix}-{key}"
# returns a string, an array of plurals, or None if there's no translation strings = json.load(open("strings.json", "r"))
def get_msgstr(entry):
# non-empty single string?
if entry.msgstr:
msg = entry.msgstr
if replacements:
for (old, new) in replacements:
msg = msg.replace(old, f"{{{new}}}")
return msg.strip()
# plural string and non-empty?
elif entry.msgstr_plural and entry.msgstr_plural[0]:
# convert the dict into a list in the correct order
plurals = list(entry.msgstr_plural.items())
plurals.sort()
# update variables and discard keys
adjusted = []
for _k, msg in plurals:
if replacements:
for (old, new) in replacements:
msg = msg.replace(old, f"{{{new}}}")
adjusted.append(msg.strip())
if len(adjusted) > 1 and adjusted[0]:
return adjusted
else:
if adjusted[0]:
return adjusted[0]
return None
msgids = []
if msgid_substring in strings["en"]:
# is the ID an exact match?
msgids.append(msgid_substring)
else:
for id in strings["en"].keys():
if msgid_substring in id:
msgids.append(id)
# start by checking the .pot file for the message msgid = None
base = "repo/desktop" if len(msgids) == 0:
pot = os.path.join(base, "anki.pot")
pot_cat = polib.pofile(pot)
catalogs = []
# is the ID an exact match?
resolved_entry = None
for entry in pot_cat:
if entry.msgid == msgid:
resolved_entry = entry
# try a substring match, but make sure it doesn't match
# multiple items
if resolved_entry is None:
for entry in pot_cat:
if msgid in entry.msgid:
if resolved_entry is not None:
print("aborting, matched both", resolved_entry.msgid)
print("and", entry.msgid)
sys.exit(1)
resolved_entry = entry
if resolved_entry is None:
print("no IDs matched") print("no IDs matched")
sys.exit(1) sys.exit(1)
elif len(msgids) == 1:
msgid = msgids[0]
else:
for c, id in enumerate(msgids):
print(f"* {c}: {id}")
msgid = msgids[int(input("number to use? "))]
msgid = resolved_entry.msgid def transform_entry(entry):
print("matched id", msgid) if isinstance(entry, str):
return(transform_string(entry))
else:
return [transform_string(e) for e in entry]
print("loading translations...") def transform_string(msg):
# load the translations from each language for (old, new) in replacements:
langs = [d for d in os.listdir(base) if d != "anki.pot"] msg = msg.replace(old, f"{{{new}}}")
for lang in langs: # strip leading/trailing whitespace
po_path = os.path.join(base, lang, "anki.po") return msg.strip()
cat = polib.pofile(po_path)
catalogs.append((lang, cat))
to_insert = [] to_insert = []
for (lang, cat) in catalogs: for lang in strings.keys():
for entry in cat: entry = strings[lang].get(msgid)
if entry.msgid == msgid: if not entry:
translation = get_msgstr(entry) continue
if translation: entry = transform_entry(entry)
print(f"{lang} had translation {translation}") if entry:
to_insert.append((lang, translation)) print(f"{lang} had translation {entry}")
break to_insert.append((lang, entry))
plurals = json.load(open("plurals.json")) plurals = json.load(open("plurals.json"))
@ -140,9 +107,22 @@ def add_simple_message(fname, key, message):
orig = open(fname).read() orig = open(fname).read()
obj = parse(orig) obj = parse(orig)
for ent in obj.body:
if isinstance(ent, Junk):
raise Exception(f"file had junk! {fname} {ent}")
obj.body.append(Message(Identifier(key), Pattern([TextElement(message)]))) obj.body.append(Message(Identifier(key), Pattern([TextElement(message)])))
modified = serialize(obj) modified = serialize(obj, with_junk=True)
# escape leading dots
modified = re.sub(r"(?ms)^( +)\.", "\\1{\".\"}", modified)
# ensure the resulting serialized file is valid by parsing again
obj = parse(modified)
for ent in obj.body:
if isinstance(ent, Junk):
raise Exception(f"introduced junk! {fname} {ent}")
# it's ok, write it out
open(fname, "w").write(modified) open(fname, "w").write(modified)
@ -158,16 +138,13 @@ def add_message(fname, key, translation):
print() print()
input("proceed? ctrl+c to abort") input("proceed? ctrl+c to abort")
# add template first # for each language's translation
if resolved_entry.msgid_plural:
original = [resolved_entry.msgid, resolved_entry.msgid_plural]
else:
original = resolved_entry.msgid
add_message(ftl_filename, key, original)
# the each language's translation
for lang, translation in to_insert: for lang, translation in to_insert:
if lang == "en":
# template
ftl_path = ftl_filename
else:
# translation
ftl_path = ftl_filename.replace("templates", lang) ftl_path = ftl_filename.replace("templates", lang)
ftl_dir = os.path.dirname(ftl_path) ftl_dir = os.path.dirname(ftl_path)

View file

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import os
import json
import polib
# Read strings from all .po and .pot files and store them in a JSON file
# for quick access.
# returns a string, an array of plurals, or None if there's no translation
def get_msgstr(entry):
# non-empty single string?
if entry.msgstr:
return entry.msgstr
# plural string and non-empty?
elif entry.msgstr_plural and entry.msgstr_plural[0]:
# convert the dict into a list in the correct order
plurals = list(entry.msgstr_plural.items())
plurals.sort()
# update variables and discard keys
adjusted = []
for _k, msg in plurals:
assert msg
adjusted.append(msg)
if len(adjusted) > 1 and adjusted[0]:
return adjusted
else:
if adjusted[0]:
return adjusted[0]
return None
langs = {}
# .pot first
base = "repo/desktop"
pot = os.path.join(base, "anki.pot")
pot_cat = polib.pofile(pot)
for entry in pot_cat:
if entry.msgid_plural:
msgstr = [entry.msgid, entry.msgid_plural]
else:
msgstr = entry.msgid
langs.setdefault("en", {})[entry.msgid] = msgstr
# then .po files
folders = (d for d in os.listdir(base) if d != "anki.pot")
for lang in folders:
po_path = os.path.join(base, lang, "anki.po")
cat = polib.pofile(po_path)
for entry in cat:
msgstr = get_msgstr(entry)
if not msgstr:
continue
langs.setdefault(lang, {})[entry.msgid] = msgstr
open("strings.json", "w").write(json.dumps(langs))
print("wrote to strings.json")