mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-10-31 22:05:58 +00:00
Andrew Senetar
7ba8aa3514
- Format all files with black - Update tox.ini flake8 arguments to be compatible - Add black to requirements-extra.txt - Reduce ignored flake8 rules and fix a few violations
241 lines
8.1 KiB
Python
241 lines
8.1 KiB
Python
import os
|
|
import os.path as op
|
|
import shutil
|
|
import re
|
|
import tempfile
|
|
|
|
import polib
|
|
|
|
from . import pygettext
|
|
from .util import modified_after, dedupe, ensure_folder
|
|
from .build import print_and_do, ensure_empty_folder
|
|
|
|
LC_MESSAGES = "LC_MESSAGES"
|
|
|
|
# There isn't a 1-on-1 exact fit between .po language codes and cocoa ones
|
|
PO2COCOA = {
|
|
"pl_PL": "pl",
|
|
"pt_BR": "pt-BR",
|
|
"zh_CN": "zh-Hans",
|
|
}
|
|
|
|
COCOA2PO = {v: k for k, v in PO2COCOA.items()}
|
|
|
|
|
|
def get_langs(folder):
|
|
return [name for name in os.listdir(folder) if op.isdir(op.join(folder, name))]
|
|
|
|
|
|
def files_with_ext(folder, ext):
|
|
return [op.join(folder, fn) for fn in os.listdir(folder) if fn.endswith(ext)]
|
|
|
|
|
|
def generate_pot(folders, outpath, keywords, merge=False):
|
|
if merge and not op.exists(outpath):
|
|
merge = False
|
|
if merge:
|
|
_, genpath = tempfile.mkstemp()
|
|
else:
|
|
genpath = outpath
|
|
pyfiles = []
|
|
for folder in folders:
|
|
for root, dirs, filenames in os.walk(folder):
|
|
keep = [fn for fn in filenames if fn.endswith(".py")]
|
|
pyfiles += [op.join(root, fn) for fn in keep]
|
|
pygettext.main(pyfiles, outpath=genpath, keywords=keywords)
|
|
if merge:
|
|
merge_po_and_preserve(genpath, outpath)
|
|
os.remove(genpath)
|
|
|
|
|
|
def compile_all_po(base_folder):
|
|
langs = get_langs(base_folder)
|
|
for lang in langs:
|
|
pofolder = op.join(base_folder, lang, LC_MESSAGES)
|
|
pofiles = files_with_ext(pofolder, ".po")
|
|
for pofile in pofiles:
|
|
p = polib.pofile(pofile)
|
|
p.save_as_mofile(pofile[:-3] + ".mo")
|
|
|
|
|
|
def merge_locale_dir(target, mergeinto):
|
|
langs = get_langs(target)
|
|
for lang in langs:
|
|
if not op.exists(op.join(mergeinto, lang)):
|
|
continue
|
|
mofolder = op.join(target, lang, LC_MESSAGES)
|
|
mofiles = files_with_ext(mofolder, ".mo")
|
|
for mofile in mofiles:
|
|
shutil.copy(mofile, op.join(mergeinto, lang, LC_MESSAGES))
|
|
|
|
|
|
def merge_pots_into_pos(folder):
|
|
# We're going to take all pot files in `folder` and for each lang, merge it with the po file
|
|
# with the same name.
|
|
potfiles = files_with_ext(folder, ".pot")
|
|
for potfile in potfiles:
|
|
refpot = polib.pofile(potfile)
|
|
refname = op.splitext(op.basename(potfile))[0]
|
|
for lang in get_langs(folder):
|
|
po = polib.pofile(op.join(folder, lang, LC_MESSAGES, refname + ".po"))
|
|
po.merge(refpot)
|
|
po.save()
|
|
|
|
|
|
def merge_po_and_preserve(source, dest):
|
|
# Merges source entries into dest, but keep old entries intact
|
|
sourcepo = polib.pofile(source)
|
|
destpo = polib.pofile(dest)
|
|
for entry in sourcepo:
|
|
if destpo.find(entry.msgid) is not None:
|
|
# The entry is already there
|
|
continue
|
|
destpo.append(entry)
|
|
destpo.save()
|
|
|
|
|
|
def normalize_all_pos(base_folder):
|
|
"""Normalize the format of .po files in base_folder.
|
|
|
|
When getting POs from external sources, such as Transifex, we end up with spurious diffs because
|
|
of a difference in the way line wrapping is handled. It wouldn't be a big deal if it happened
|
|
once, but these spurious diffs keep overwriting each other, and it's annoying.
|
|
|
|
Our PO files will keep polib's format. Call this function to ensure that freshly pulled POs
|
|
are of the right format before committing them.
|
|
"""
|
|
langs = get_langs(base_folder)
|
|
for lang in langs:
|
|
pofolder = op.join(base_folder, lang, LC_MESSAGES)
|
|
pofiles = files_with_ext(pofolder, ".po")
|
|
for pofile in pofiles:
|
|
p = polib.pofile(pofile)
|
|
p.save()
|
|
|
|
|
|
# --- Cocoa
|
|
def all_lproj_paths(folder):
|
|
return files_with_ext(folder, ".lproj")
|
|
|
|
|
|
def escape_cocoa_strings(s):
|
|
return s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
|
|
|
|
|
|
def unescape_cocoa_strings(s):
|
|
return s.replace("\\\\", "\\").replace('\\"', '"').replace("\\n", "\n")
|
|
|
|
|
|
def strings2pot(target, dest):
|
|
with open(target, "rt", encoding="utf-8") as fp:
|
|
contents = fp.read()
|
|
# We're reading an en.lproj file. We only care about the righthand part of the translation.
|
|
re_trans = re.compile(r'".*" = "(.*)";')
|
|
strings = re_trans.findall(contents)
|
|
if op.exists(dest):
|
|
po = polib.pofile(dest)
|
|
else:
|
|
po = polib.POFile()
|
|
for s in dedupe(strings):
|
|
s = unescape_cocoa_strings(s)
|
|
entry = po.find(s)
|
|
if entry is None:
|
|
entry = polib.POEntry(msgid=s)
|
|
po.append(entry)
|
|
# we don't know or care about a line number so we put 0
|
|
entry.occurrences.append((target, "0"))
|
|
entry.occurrences = dedupe(entry.occurrences)
|
|
po.save(dest)
|
|
|
|
|
|
def allstrings2pot(lprojpath, dest, excludes=None):
|
|
allstrings = files_with_ext(lprojpath, ".strings")
|
|
if excludes:
|
|
allstrings = [
|
|
p for p in allstrings if op.splitext(op.basename(p))[0] not in excludes
|
|
]
|
|
for strings_path in allstrings:
|
|
strings2pot(strings_path, dest)
|
|
|
|
|
|
def po2strings(pofile, en_strings, dest):
|
|
# Takes en_strings and replace all righthand parts of "foo" = "bar"; entries with translations
|
|
# in pofile, then puts the result in dest.
|
|
po = polib.pofile(pofile)
|
|
if not modified_after(pofile, dest):
|
|
return
|
|
ensure_folder(op.dirname(dest))
|
|
print("Creating {} from {}".format(dest, pofile))
|
|
with open(en_strings, "rt", encoding="utf-8") as fp:
|
|
contents = fp.read()
|
|
re_trans = re.compile(r'(?<= = ").*(?=";\n)')
|
|
|
|
def repl(match):
|
|
s = match.group(0)
|
|
unescaped = unescape_cocoa_strings(s)
|
|
entry = po.find(unescaped)
|
|
if entry is None:
|
|
print("WARNING: Could not find entry '{}' in .po file".format(s))
|
|
return s
|
|
trans = entry.msgstr
|
|
return escape_cocoa_strings(trans) if trans else s
|
|
|
|
contents = re_trans.sub(repl, contents)
|
|
with open(dest, "wt", encoding="utf-8") as fp:
|
|
fp.write(contents)
|
|
|
|
|
|
def generate_cocoa_strings_from_code(code_folder, dest_folder):
|
|
# Uses the "genstrings" command to generate strings file from all .m files in "code_folder".
|
|
# The strings file (their name depends on the localization table used in the source) will be
|
|
# placed in "dest_folder".
|
|
# genstrings produces utf-16 files with comments. After having generated the files, we convert
|
|
# them to utf-8 and remove the comments.
|
|
ensure_empty_folder(dest_folder)
|
|
print_and_do(
|
|
'genstrings -o "{}" `find "{}" -name *.m | xargs`'.format(
|
|
dest_folder, code_folder
|
|
)
|
|
)
|
|
for stringsfile in os.listdir(dest_folder):
|
|
stringspath = op.join(dest_folder, stringsfile)
|
|
with open(stringspath, "rt", encoding="utf-16") as fp:
|
|
content = fp.read()
|
|
content = re.sub(r"/\*.*?\*/", "", content)
|
|
content = re.sub(r"\n{2,}", "\n", content)
|
|
# I have no idea why, but genstrings seems to have problems with "%" character in strings
|
|
# and inserts (number)$ after it. Find these bogus inserts and remove them.
|
|
content = re.sub(r"%\d\$", "%", content)
|
|
with open(stringspath, "wt", encoding="utf-8") as fp:
|
|
fp.write(content)
|
|
|
|
|
|
def generate_cocoa_strings_from_xib(xib_folder):
|
|
xibs = [
|
|
op.join(xib_folder, fn) for fn in os.listdir(xib_folder) if fn.endswith(".xib")
|
|
]
|
|
for xib in xibs:
|
|
dest = xib.replace(".xib", ".strings")
|
|
print_and_do("ibtool {} --generate-strings-file {}".format(xib, dest))
|
|
print_and_do("iconv -f utf-16 -t utf-8 {0} | tee {0}".format(dest))
|
|
|
|
|
|
def localize_stringsfile(stringsfile, dest_root_folder):
|
|
stringsfile_name = op.basename(stringsfile)
|
|
for lang in get_langs("locale"):
|
|
pofile = op.join("locale", lang, "LC_MESSAGES", "ui.po")
|
|
cocoa_lang = PO2COCOA.get(lang, lang)
|
|
dest_lproj = op.join(dest_root_folder, cocoa_lang + ".lproj")
|
|
ensure_folder(dest_lproj)
|
|
po2strings(pofile, stringsfile, op.join(dest_lproj, stringsfile_name))
|
|
|
|
|
|
def localize_all_stringsfiles(src_folder, dest_root_folder):
|
|
stringsfiles = [
|
|
op.join(src_folder, fn)
|
|
for fn in os.listdir(src_folder)
|
|
if fn.endswith(".strings")
|
|
]
|
|
for path in stringsfiles:
|
|
localize_stringsfile(path, dest_root_folder)
|