1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2024-11-16 20:29:02 +00:00
dupeguru/hscommon/loc.py
Andrew Senetar 7ba8aa3514
Format files with black
- Format all files with black
- Update tox.ini flake8 arguments to be compatible
- Add black to requirements-extra.txt
- Reduce ignored flake8 rules and fix a few violations
2019-12-31 20:16:27 -06:00

241 lines
8.1 KiB
Python

import os
import os.path as op
import shutil
import re
import tempfile
import polib
from . import pygettext
from .util import modified_after, dedupe, ensure_folder
from .build import print_and_do, ensure_empty_folder
LC_MESSAGES = "LC_MESSAGES"
# There isn't a 1-on-1 exact fit between .po language codes and cocoa ones
PO2COCOA = {
"pl_PL": "pl",
"pt_BR": "pt-BR",
"zh_CN": "zh-Hans",
}
COCOA2PO = {v: k for k, v in PO2COCOA.items()}
def get_langs(folder):
return [name for name in os.listdir(folder) if op.isdir(op.join(folder, name))]
def files_with_ext(folder, ext):
return [op.join(folder, fn) for fn in os.listdir(folder) if fn.endswith(ext)]
def generate_pot(folders, outpath, keywords, merge=False):
if merge and not op.exists(outpath):
merge = False
if merge:
_, genpath = tempfile.mkstemp()
else:
genpath = outpath
pyfiles = []
for folder in folders:
for root, dirs, filenames in os.walk(folder):
keep = [fn for fn in filenames if fn.endswith(".py")]
pyfiles += [op.join(root, fn) for fn in keep]
pygettext.main(pyfiles, outpath=genpath, keywords=keywords)
if merge:
merge_po_and_preserve(genpath, outpath)
os.remove(genpath)
def compile_all_po(base_folder):
langs = get_langs(base_folder)
for lang in langs:
pofolder = op.join(base_folder, lang, LC_MESSAGES)
pofiles = files_with_ext(pofolder, ".po")
for pofile in pofiles:
p = polib.pofile(pofile)
p.save_as_mofile(pofile[:-3] + ".mo")
def merge_locale_dir(target, mergeinto):
langs = get_langs(target)
for lang in langs:
if not op.exists(op.join(mergeinto, lang)):
continue
mofolder = op.join(target, lang, LC_MESSAGES)
mofiles = files_with_ext(mofolder, ".mo")
for mofile in mofiles:
shutil.copy(mofile, op.join(mergeinto, lang, LC_MESSAGES))
def merge_pots_into_pos(folder):
# We're going to take all pot files in `folder` and for each lang, merge it with the po file
# with the same name.
potfiles = files_with_ext(folder, ".pot")
for potfile in potfiles:
refpot = polib.pofile(potfile)
refname = op.splitext(op.basename(potfile))[0]
for lang in get_langs(folder):
po = polib.pofile(op.join(folder, lang, LC_MESSAGES, refname + ".po"))
po.merge(refpot)
po.save()
def merge_po_and_preserve(source, dest):
# Merges source entries into dest, but keep old entries intact
sourcepo = polib.pofile(source)
destpo = polib.pofile(dest)
for entry in sourcepo:
if destpo.find(entry.msgid) is not None:
# The entry is already there
continue
destpo.append(entry)
destpo.save()
def normalize_all_pos(base_folder):
"""Normalize the format of .po files in base_folder.
When getting POs from external sources, such as Transifex, we end up with spurious diffs because
of a difference in the way line wrapping is handled. It wouldn't be a big deal if it happened
once, but these spurious diffs keep overwriting each other, and it's annoying.
Our PO files will keep polib's format. Call this function to ensure that freshly pulled POs
are of the right format before committing them.
"""
langs = get_langs(base_folder)
for lang in langs:
pofolder = op.join(base_folder, lang, LC_MESSAGES)
pofiles = files_with_ext(pofolder, ".po")
for pofile in pofiles:
p = polib.pofile(pofile)
p.save()
# --- Cocoa
def all_lproj_paths(folder):
return files_with_ext(folder, ".lproj")
def escape_cocoa_strings(s):
return s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
def unescape_cocoa_strings(s):
return s.replace("\\\\", "\\").replace('\\"', '"').replace("\\n", "\n")
def strings2pot(target, dest):
with open(target, "rt", encoding="utf-8") as fp:
contents = fp.read()
# We're reading an en.lproj file. We only care about the righthand part of the translation.
re_trans = re.compile(r'".*" = "(.*)";')
strings = re_trans.findall(contents)
if op.exists(dest):
po = polib.pofile(dest)
else:
po = polib.POFile()
for s in dedupe(strings):
s = unescape_cocoa_strings(s)
entry = po.find(s)
if entry is None:
entry = polib.POEntry(msgid=s)
po.append(entry)
# we don't know or care about a line number so we put 0
entry.occurrences.append((target, "0"))
entry.occurrences = dedupe(entry.occurrences)
po.save(dest)
def allstrings2pot(lprojpath, dest, excludes=None):
allstrings = files_with_ext(lprojpath, ".strings")
if excludes:
allstrings = [
p for p in allstrings if op.splitext(op.basename(p))[0] not in excludes
]
for strings_path in allstrings:
strings2pot(strings_path, dest)
def po2strings(pofile, en_strings, dest):
# Takes en_strings and replace all righthand parts of "foo" = "bar"; entries with translations
# in pofile, then puts the result in dest.
po = polib.pofile(pofile)
if not modified_after(pofile, dest):
return
ensure_folder(op.dirname(dest))
print("Creating {} from {}".format(dest, pofile))
with open(en_strings, "rt", encoding="utf-8") as fp:
contents = fp.read()
re_trans = re.compile(r'(?<= = ").*(?=";\n)')
def repl(match):
s = match.group(0)
unescaped = unescape_cocoa_strings(s)
entry = po.find(unescaped)
if entry is None:
print("WARNING: Could not find entry '{}' in .po file".format(s))
return s
trans = entry.msgstr
return escape_cocoa_strings(trans) if trans else s
contents = re_trans.sub(repl, contents)
with open(dest, "wt", encoding="utf-8") as fp:
fp.write(contents)
def generate_cocoa_strings_from_code(code_folder, dest_folder):
# Uses the "genstrings" command to generate strings file from all .m files in "code_folder".
# The strings file (their name depends on the localization table used in the source) will be
# placed in "dest_folder".
# genstrings produces utf-16 files with comments. After having generated the files, we convert
# them to utf-8 and remove the comments.
ensure_empty_folder(dest_folder)
print_and_do(
'genstrings -o "{}" `find "{}" -name *.m | xargs`'.format(
dest_folder, code_folder
)
)
for stringsfile in os.listdir(dest_folder):
stringspath = op.join(dest_folder, stringsfile)
with open(stringspath, "rt", encoding="utf-16") as fp:
content = fp.read()
content = re.sub(r"/\*.*?\*/", "", content)
content = re.sub(r"\n{2,}", "\n", content)
# I have no idea why, but genstrings seems to have problems with "%" character in strings
# and inserts (number)$ after it. Find these bogus inserts and remove them.
content = re.sub(r"%\d\$", "%", content)
with open(stringspath, "wt", encoding="utf-8") as fp:
fp.write(content)
def generate_cocoa_strings_from_xib(xib_folder):
xibs = [
op.join(xib_folder, fn) for fn in os.listdir(xib_folder) if fn.endswith(".xib")
]
for xib in xibs:
dest = xib.replace(".xib", ".strings")
print_and_do("ibtool {} --generate-strings-file {}".format(xib, dest))
print_and_do("iconv -f utf-16 -t utf-8 {0} | tee {0}".format(dest))
def localize_stringsfile(stringsfile, dest_root_folder):
stringsfile_name = op.basename(stringsfile)
for lang in get_langs("locale"):
pofile = op.join("locale", lang, "LC_MESSAGES", "ui.po")
cocoa_lang = PO2COCOA.get(lang, lang)
dest_lproj = op.join(dest_root_folder, cocoa_lang + ".lproj")
ensure_folder(dest_lproj)
po2strings(pofile, stringsfile, op.join(dest_lproj, stringsfile_name))
def localize_all_stringsfiles(src_folder, dest_root_folder):
stringsfiles = [
op.join(src_folder, fn)
for fn in os.listdir(src_folder)
if fn.endswith(".strings")
]
for path in stringsfiles:
localize_stringsfile(path, dest_root_folder)