dupeguru/hscommon/loc.py

241 lines
8.1 KiB
Python
Raw Normal View History

2019-09-09 19:54:28 -05:00
import os
import os.path as op
import shutil
import re
import tempfile
import polib
from . import pygettext
from .util import modified_after, dedupe, ensure_folder
from .build import print_and_do, ensure_empty_folder
2019-09-09 19:54:28 -05:00
LC_MESSAGES = "LC_MESSAGES"
2019-09-09 19:54:28 -05:00
# There isn't a 1-on-1 exact fit between .po language codes and cocoa ones
PO2COCOA = {
"pl_PL": "pl",
"pt_BR": "pt-BR",
"zh_CN": "zh-Hans",
2019-09-09 19:54:28 -05:00
}
COCOA2PO = {v: k for k, v in PO2COCOA.items()}
2019-09-09 19:54:28 -05:00
def get_langs(folder):
return [name for name in os.listdir(folder) if op.isdir(op.join(folder, name))]
2019-09-09 19:54:28 -05:00
def files_with_ext(folder, ext):
return [op.join(folder, fn) for fn in os.listdir(folder) if fn.endswith(ext)]
2019-09-09 19:54:28 -05:00
def generate_pot(folders, outpath, keywords, merge=False):
if merge and not op.exists(outpath):
merge = False
if merge:
_, genpath = tempfile.mkstemp()
else:
genpath = outpath
pyfiles = []
for folder in folders:
for root, dirs, filenames in os.walk(folder):
keep = [fn for fn in filenames if fn.endswith(".py")]
2019-09-09 19:54:28 -05:00
pyfiles += [op.join(root, fn) for fn in keep]
pygettext.main(pyfiles, outpath=genpath, keywords=keywords)
if merge:
merge_po_and_preserve(genpath, outpath)
os.remove(genpath)
2019-09-09 19:54:28 -05:00
def compile_all_po(base_folder):
langs = get_langs(base_folder)
for lang in langs:
pofolder = op.join(base_folder, lang, LC_MESSAGES)
pofiles = files_with_ext(pofolder, ".po")
2019-09-09 19:54:28 -05:00
for pofile in pofiles:
p = polib.pofile(pofile)
p.save_as_mofile(pofile[:-3] + ".mo")
2019-09-09 19:54:28 -05:00
def merge_locale_dir(target, mergeinto):
langs = get_langs(target)
for lang in langs:
if not op.exists(op.join(mergeinto, lang)):
continue
mofolder = op.join(target, lang, LC_MESSAGES)
mofiles = files_with_ext(mofolder, ".mo")
2019-09-09 19:54:28 -05:00
for mofile in mofiles:
shutil.copy(mofile, op.join(mergeinto, lang, LC_MESSAGES))
2019-09-09 19:54:28 -05:00
def merge_pots_into_pos(folder):
# We're going to take all pot files in `folder` and for each lang, merge it with the po file
# with the same name.
potfiles = files_with_ext(folder, ".pot")
2019-09-09 19:54:28 -05:00
for potfile in potfiles:
refpot = polib.pofile(potfile)
refname = op.splitext(op.basename(potfile))[0]
for lang in get_langs(folder):
po = polib.pofile(op.join(folder, lang, LC_MESSAGES, refname + ".po"))
2019-09-09 19:54:28 -05:00
po.merge(refpot)
po.save()
2019-09-09 19:54:28 -05:00
def merge_po_and_preserve(source, dest):
# Merges source entries into dest, but keep old entries intact
sourcepo = polib.pofile(source)
destpo = polib.pofile(dest)
for entry in sourcepo:
if destpo.find(entry.msgid) is not None:
# The entry is already there
continue
destpo.append(entry)
destpo.save()
2019-09-09 19:54:28 -05:00
def normalize_all_pos(base_folder):
"""Normalize the format of .po files in base_folder.
2019-09-09 19:54:28 -05:00
When getting POs from external sources, such as Transifex, we end up with spurious diffs because
of a difference in the way line wrapping is handled. It wouldn't be a big deal if it happened
once, but these spurious diffs keep overwriting each other, and it's annoying.
2019-09-09 19:54:28 -05:00
Our PO files will keep polib's format. Call this function to ensure that freshly pulled POs
are of the right format before committing them.
"""
langs = get_langs(base_folder)
for lang in langs:
pofolder = op.join(base_folder, lang, LC_MESSAGES)
pofiles = files_with_ext(pofolder, ".po")
2019-09-09 19:54:28 -05:00
for pofile in pofiles:
p = polib.pofile(pofile)
p.save()
# --- Cocoa
2019-09-09 19:54:28 -05:00
def all_lproj_paths(folder):
return files_with_ext(folder, ".lproj")
2019-09-09 19:54:28 -05:00
def escape_cocoa_strings(s):
return s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
2019-09-09 19:54:28 -05:00
def unescape_cocoa_strings(s):
return s.replace("\\\\", "\\").replace('\\"', '"').replace("\\n", "\n")
2019-09-09 19:54:28 -05:00
def strings2pot(target, dest):
with open(target, "rt", encoding="utf-8") as fp:
2019-09-09 19:54:28 -05:00
contents = fp.read()
# We're reading an en.lproj file. We only care about the righthand part of the translation.
re_trans = re.compile(r'".*" = "(.*)";')
strings = re_trans.findall(contents)
if op.exists(dest):
po = polib.pofile(dest)
else:
po = polib.POFile()
for s in dedupe(strings):
s = unescape_cocoa_strings(s)
entry = po.find(s)
if entry is None:
entry = polib.POEntry(msgid=s)
po.append(entry)
# we don't know or care about a line number so we put 0
entry.occurrences.append((target, "0"))
2019-09-09 19:54:28 -05:00
entry.occurrences = dedupe(entry.occurrences)
po.save(dest)
2019-09-09 19:54:28 -05:00
def allstrings2pot(lprojpath, dest, excludes=None):
allstrings = files_with_ext(lprojpath, ".strings")
2019-09-09 19:54:28 -05:00
if excludes:
allstrings = [
p for p in allstrings if op.splitext(op.basename(p))[0] not in excludes
]
2019-09-09 19:54:28 -05:00
for strings_path in allstrings:
strings2pot(strings_path, dest)
2019-09-09 19:54:28 -05:00
def po2strings(pofile, en_strings, dest):
# Takes en_strings and replace all righthand parts of "foo" = "bar"; entries with translations
# in pofile, then puts the result in dest.
po = polib.pofile(pofile)
if not modified_after(pofile, dest):
return
ensure_folder(op.dirname(dest))
print("Creating {} from {}".format(dest, pofile))
with open(en_strings, "rt", encoding="utf-8") as fp:
2019-09-09 19:54:28 -05:00
contents = fp.read()
re_trans = re.compile(r'(?<= = ").*(?=";\n)')
2019-09-09 19:54:28 -05:00
def repl(match):
s = match.group(0)
unescaped = unescape_cocoa_strings(s)
entry = po.find(unescaped)
if entry is None:
print("WARNING: Could not find entry '{}' in .po file".format(s))
return s
trans = entry.msgstr
return escape_cocoa_strings(trans) if trans else s
2019-09-09 19:54:28 -05:00
contents = re_trans.sub(repl, contents)
with open(dest, "wt", encoding="utf-8") as fp:
2019-09-09 19:54:28 -05:00
fp.write(contents)
2019-09-09 19:54:28 -05:00
def generate_cocoa_strings_from_code(code_folder, dest_folder):
# Uses the "genstrings" command to generate strings file from all .m files in "code_folder".
# The strings file (their name depends on the localization table used in the source) will be
# placed in "dest_folder".
# genstrings produces utf-16 files with comments. After having generated the files, we convert
# them to utf-8 and remove the comments.
ensure_empty_folder(dest_folder)
print_and_do(
'genstrings -o "{}" `find "{}" -name *.m | xargs`'.format(
dest_folder, code_folder
)
)
2019-09-09 19:54:28 -05:00
for stringsfile in os.listdir(dest_folder):
stringspath = op.join(dest_folder, stringsfile)
with open(stringspath, "rt", encoding="utf-16") as fp:
2019-09-09 19:54:28 -05:00
content = fp.read()
content = re.sub(r"/\*.*?\*/", "", content)
content = re.sub(r"\n{2,}", "\n", content)
2019-09-09 19:54:28 -05:00
# I have no idea why, but genstrings seems to have problems with "%" character in strings
# and inserts (number)$ after it. Find these bogus inserts and remove them.
content = re.sub(r"%\d\$", "%", content)
with open(stringspath, "wt", encoding="utf-8") as fp:
2019-09-09 19:54:28 -05:00
fp.write(content)
2019-09-09 19:54:28 -05:00
def generate_cocoa_strings_from_xib(xib_folder):
xibs = [
op.join(xib_folder, fn) for fn in os.listdir(xib_folder) if fn.endswith(".xib")
]
2019-09-09 19:54:28 -05:00
for xib in xibs:
dest = xib.replace(".xib", ".strings")
print_and_do("ibtool {} --generate-strings-file {}".format(xib, dest))
print_and_do("iconv -f utf-16 -t utf-8 {0} | tee {0}".format(dest))
2019-09-09 19:54:28 -05:00
def localize_stringsfile(stringsfile, dest_root_folder):
stringsfile_name = op.basename(stringsfile)
for lang in get_langs("locale"):
pofile = op.join("locale", lang, "LC_MESSAGES", "ui.po")
2019-09-09 19:54:28 -05:00
cocoa_lang = PO2COCOA.get(lang, lang)
dest_lproj = op.join(dest_root_folder, cocoa_lang + ".lproj")
2019-09-09 19:54:28 -05:00
ensure_folder(dest_lproj)
po2strings(pofile, stringsfile, op.join(dest_lproj, stringsfile_name))
2019-09-09 19:54:28 -05:00
def localize_all_stringsfiles(src_folder, dest_root_folder):
stringsfiles = [
op.join(src_folder, fn)
for fn in os.listdir(src_folder)
if fn.endswith(".strings")
]
2019-09-09 19:54:28 -05:00
for path in stringsfiles:
localize_stringsfile(path, dest_root_folder)