mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
209 lines
8.0 KiB
Python
209 lines
8.0 KiB
Python
import os
|
|
import os.path as op
|
|
import shutil
|
|
import re
|
|
import tempfile
|
|
|
|
import polib
|
|
|
|
from . import pygettext
|
|
from .util import modified_after, dedupe, ensure_folder, ensure_file
|
|
from .build import print_and_do, ensure_empty_folder, copy
|
|
|
|
LC_MESSAGES = 'LC_MESSAGES'
|
|
|
|
# There isn't a 1-on-1 exact fit between .po language codes and cocoa ones
|
|
PO2COCOA = {
|
|
'pl_PL': 'pl',
|
|
'pt_BR': 'pt-BR',
|
|
'zh_CN': 'zh-Hans',
|
|
}
|
|
|
|
COCOA2PO = {v: k for k, v in PO2COCOA.items()}
|
|
|
|
def get_langs(folder):
|
|
return [name for name in os.listdir(folder) if op.isdir(op.join(folder, name))]
|
|
|
|
def files_with_ext(folder, ext):
|
|
return [op.join(folder, fn) for fn in os.listdir(folder) if fn.endswith(ext)]
|
|
|
|
def generate_pot(folders, outpath, keywords, merge=False):
|
|
if merge and not op.exists(outpath):
|
|
merge = False
|
|
if merge:
|
|
_, genpath = tempfile.mkstemp()
|
|
else:
|
|
genpath = outpath
|
|
pyfiles = []
|
|
for folder in folders:
|
|
for root, dirs, filenames in os.walk(folder):
|
|
keep = [fn for fn in filenames if fn.endswith('.py')]
|
|
pyfiles += [op.join(root, fn) for fn in keep]
|
|
pygettext.main(pyfiles, outpath=genpath, keywords=keywords)
|
|
if merge:
|
|
merge_po_and_preserve(genpath, outpath)
|
|
os.remove(genpath)
|
|
|
|
def compile_all_po(base_folder):
|
|
langs = get_langs(base_folder)
|
|
for lang in langs:
|
|
pofolder = op.join(base_folder, lang, LC_MESSAGES)
|
|
pofiles = files_with_ext(pofolder, '.po')
|
|
for pofile in pofiles:
|
|
p = polib.pofile(pofile)
|
|
p.save_as_mofile(pofile[:-3] + '.mo')
|
|
|
|
def merge_locale_dir(target, mergeinto):
|
|
langs = get_langs(target)
|
|
for lang in langs:
|
|
if not op.exists(op.join(mergeinto, lang)):
|
|
continue
|
|
mofolder = op.join(target, lang, LC_MESSAGES)
|
|
mofiles = files_with_ext(mofolder, '.mo')
|
|
for mofile in mofiles:
|
|
shutil.copy(mofile, op.join(mergeinto, lang, LC_MESSAGES))
|
|
|
|
def merge_pots_into_pos(folder):
|
|
# We're going to take all pot files in `folder` and for each lang, merge it with the po file
|
|
# with the same name.
|
|
potfiles = files_with_ext(folder, '.pot')
|
|
for potfile in potfiles:
|
|
refpot = polib.pofile(potfile)
|
|
refname = op.splitext(op.basename(potfile))[0]
|
|
for lang in get_langs(folder):
|
|
po = polib.pofile(op.join(folder, lang, LC_MESSAGES, refname + '.po'))
|
|
po.merge(refpot)
|
|
po.save()
|
|
|
|
def merge_po_and_preserve(source, dest):
|
|
# Merges source entries into dest, but keep old entries intact
|
|
sourcepo = polib.pofile(source)
|
|
destpo = polib.pofile(dest)
|
|
for entry in sourcepo:
|
|
if destpo.find(entry.msgid) is not None:
|
|
# The entry is already there
|
|
continue
|
|
destpo.append(entry)
|
|
destpo.save()
|
|
|
|
def normalize_all_pos(base_folder):
|
|
"""Normalize the format of .po files in base_folder.
|
|
|
|
When getting POs from external sources, such as Transifex, we end up with spurious diffs because
|
|
of a difference in the way line wrapping is handled. It wouldn't be a big deal if it happened
|
|
once, but these spurious diffs keep overwriting each other, and it's annoying.
|
|
|
|
Our PO files will keep polib's format. Call this function to ensure that freshly pulled POs
|
|
are of the right format before committing them.
|
|
"""
|
|
langs = get_langs(base_folder)
|
|
for lang in langs:
|
|
pofolder = op.join(base_folder, lang, LC_MESSAGES)
|
|
pofiles = files_with_ext(pofolder, '.po')
|
|
for pofile in pofiles:
|
|
p = polib.pofile(pofile)
|
|
p.save()
|
|
|
|
#--- Cocoa
|
|
def all_lproj_paths(folder):
|
|
return files_with_ext(folder, '.lproj')
|
|
|
|
def escape_cocoa_strings(s):
|
|
return s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
|
|
|
def unescape_cocoa_strings(s):
|
|
return s.replace('\\\\', '\\').replace('\\"', '"').replace('\\n', '\n')
|
|
|
|
def strings2pot(target, dest):
|
|
with open(target, 'rt', encoding='utf-8') as fp:
|
|
contents = fp.read()
|
|
# We're reading an en.lproj file. We only care about the righthand part of the translation.
|
|
re_trans = re.compile(r'".*" = "(.*)";')
|
|
strings = re_trans.findall(contents)
|
|
if op.exists(dest):
|
|
po = polib.pofile(dest)
|
|
else:
|
|
po = polib.POFile()
|
|
for s in dedupe(strings):
|
|
s = unescape_cocoa_strings(s)
|
|
entry = po.find(s)
|
|
if entry is None:
|
|
entry = polib.POEntry(msgid=s)
|
|
po.append(entry)
|
|
# we don't know or care about a line number so we put 0
|
|
entry.occurrences.append((target, '0'))
|
|
entry.occurrences = dedupe(entry.occurrences)
|
|
po.save(dest)
|
|
|
|
def allstrings2pot(lprojpath, dest, excludes=None):
|
|
allstrings = files_with_ext(lprojpath, '.strings')
|
|
if excludes:
|
|
allstrings = [p for p in allstrings if op.splitext(op.basename(p))[0] not in excludes]
|
|
for strings_path in allstrings:
|
|
strings2pot(strings_path, dest)
|
|
|
|
def po2strings(pofile, en_strings, dest):
|
|
# Takes en_strings and replace all righthand parts of "foo" = "bar"; entries with translations
|
|
# in pofile, then puts the result in dest.
|
|
po = polib.pofile(pofile)
|
|
if not modified_after(pofile, dest):
|
|
return
|
|
ensure_folder(op.dirname(dest))
|
|
print("Creating {} from {}".format(dest, pofile))
|
|
with open(en_strings, 'rt', encoding='utf-8') as fp:
|
|
contents = fp.read()
|
|
re_trans = re.compile(r'(?<= = ").*(?=";\n)')
|
|
def repl(match):
|
|
s = match.group(0)
|
|
unescaped = unescape_cocoa_strings(s)
|
|
entry = po.find(unescaped)
|
|
if entry is None:
|
|
print("WARNING: Could not find entry '{}' in .po file".format(s))
|
|
return s
|
|
trans = entry.msgstr
|
|
return escape_cocoa_strings(trans) if trans else s
|
|
contents = re_trans.sub(repl, contents)
|
|
with open(dest, 'wt', encoding='utf-8') as fp:
|
|
fp.write(contents)
|
|
|
|
def generate_cocoa_strings_from_code(code_folder, dest_folder):
|
|
# Uses the "genstrings" command to generate strings file from all .m files in "code_folder".
|
|
# The strings file (their name depends on the localization table used in the source) will be
|
|
# placed in "dest_folder".
|
|
# genstrings produces utf-16 files with comments. After having generated the files, we convert
|
|
# them to utf-8 and remove the comments.
|
|
ensure_empty_folder(dest_folder)
|
|
print_and_do('genstrings -o "{}" `find "{}" -name *.m | xargs`'.format(dest_folder, code_folder))
|
|
for stringsfile in os.listdir(dest_folder):
|
|
stringspath = op.join(dest_folder, stringsfile)
|
|
with open(stringspath, 'rt', encoding='utf-16') as fp:
|
|
content = fp.read()
|
|
content = re.sub('/\*.*?\*/', '', content)
|
|
content = re.sub('\n{2,}', '\n', content)
|
|
# I have no idea why, but genstrings seems to have problems with "%" character in strings
|
|
# and inserts (number)$ after it. Find these bogus inserts and remove them.
|
|
content = re.sub('%\d\$', '%', content)
|
|
with open(stringspath, 'wt', encoding='utf-8') as fp:
|
|
fp.write(content)
|
|
|
|
def generate_cocoa_strings_from_xib(xib_folder):
|
|
xibs = [op.join(xib_folder, fn) for fn in os.listdir(xib_folder) if fn.endswith('.xib')]
|
|
for xib in xibs:
|
|
dest = xib.replace('.xib', '.strings')
|
|
print_and_do('ibtool {} --generate-strings-file {}'.format(xib, dest))
|
|
print_and_do('iconv -f utf-16 -t utf-8 {0} | tee {0}'.format(dest))
|
|
|
|
def localize_stringsfile(stringsfile, dest_root_folder):
|
|
stringsfile_name = op.basename(stringsfile)
|
|
for lang in get_langs('locale'):
|
|
pofile = op.join('locale', lang, 'LC_MESSAGES', 'ui.po')
|
|
cocoa_lang = PO2COCOA.get(lang, lang)
|
|
dest_lproj = op.join(dest_root_folder, cocoa_lang + '.lproj')
|
|
ensure_folder(dest_lproj)
|
|
po2strings(pofile, stringsfile, op.join(dest_lproj, stringsfile_name))
|
|
|
|
def localize_all_stringsfiles(src_folder, dest_root_folder):
|
|
stringsfiles = [op.join(src_folder, fn) for fn in os.listdir(src_folder) if fn.endswith('.strings')]
|
|
for path in stringsfiles:
|
|
localize_stringsfile(path, dest_root_folder)
|