# This module was taken from CPython's Tools/i18n and dirtily hacked to bypass the need for cmdline # invocation. # Originally written by Barry Warsaw # # Minimally patched to make it even more xgettext compatible # by Peter Funk # # 2002-11-22 Jürgen Hermann # Added checks that _() only contains string literals, and # command line args are resolved to module lists, i.e. you # can now pass a filename, a module or package name, or a # directory (including globbing chars, important for Win32). # Made docstring fit in 80 chars wide displays using pydoc. # import os import imp import sys import glob import time import token import tokenize import operator __version__ = '1.5' default_keywords = ['_'] DEFAULTKEYWORDS = ', '.join(default_keywords) EMPTYSTRING = '' # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's # there. pot_header = """ msgid "" msgstr "" "Content-Type: text/plain; charset=utf-8\\n" "Content-Transfer-Encoding: utf-8\\n" """ def usage(code, msg=''): print(__doc__ % globals(), file=sys.stderr) if msg: print(msg, file=sys.stderr) sys.exit(code) escapes = [] def make_escapes(pass_iso8859): global escapes if pass_iso8859: # Allow iso-8859 characters to pass through so that e.g. 'msgid # "H?he"' would result not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. mod = 128 else: mod = 256 for i in range(256): if 32 <= (i % mod) <= 126: escapes.append(chr(i)) else: escapes.append("\\%03o" % i) escapes[ord('\\')] = '\\\\' escapes[ord('\t')] = '\\t' escapes[ord('\r')] = '\\r' escapes[ord('\n')] = '\\n' escapes[ord('\"')] = '\\"' def escape(s): global escapes s = list(s) for i in range(len(s)): s[i] = escapes[ord(s[i])] return EMPTYSTRING.join(s) def safe_eval(s): # unwrap quotes, safely return eval(s, {'__builtins__':{}}, {}) def normalize(s): # This converts the various Python string types into a format that is # appropriate for .po files, namely much closer to C style. lines = s.split('\n') if len(lines) == 1: s = '"' + escape(s) + '"' else: if not lines[-1]: del lines[-1] lines[-1] = lines[-1] + '\n' for i in range(len(lines)): lines[i] = escape(lines[i]) lineterm = '\\n"\n"' s = '""\n"' + lineterm.join(lines) + '"' return s def containsAny(str, set): """Check whether 'str' contains ANY of the chars in 'set'""" return 1 in [c in str for c in set] def _visit_pyfiles(list, dirname, names): """Helper for getFilesForName().""" # get extension for python source files if '_py_ext' not in globals(): global _py_ext _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0] # don't recurse into CVS directories if 'CVS' in names: names.remove('CVS') # add all *.py files to list list.extend( [os.path.join(dirname, file) for file in names if os.path.splitext(file)[1] == _py_ext] ) def _get_modpkg_path(dotted_name, pathlist=None): """Get the filesystem path for a module or a package. Return the file system path to a file for a module, and to a directory for a package. Return None if the name is not found, or is a builtin or extension module. """ # split off top-most name parts = dotted_name.split('.', 1) if len(parts) > 1: # we have a dotted path, import top-level package try: file, pathname, description = imp.find_module(parts[0], pathlist) if file: file.close() except ImportError: return None # check if it's indeed a package if description[2] == imp.PKG_DIRECTORY: # recursively handle the remaining name parts pathname = _get_modpkg_path(parts[1], [pathname]) else: pathname = None else: # plain name try: file, pathname, description = imp.find_module( dotted_name, pathlist) if file: file.close() if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: pathname = None except ImportError: pathname = None return pathname def getFilesForName(name): """Get a list of module files for a filename, a module or package name, or a directory. """ if not os.path.exists(name): # check for glob chars if containsAny(name, "*?[]"): files = glob.glob(name) list = [] for file in files: list.extend(getFilesForName(file)) return list # try to find module or package name = _get_modpkg_path(name) if not name: return [] if os.path.isdir(name): # find all python files in directory list = [] os.walk(name, _visit_pyfiles, list) return list elif os.path.exists(name): # a single file return [name] return [] class TokenEater: def __init__(self, options): self.__options = options self.__messages = {} self.__state = self.__waiting self.__data = [] self.__lineno = -1 self.__freshmodule = 1 self.__curfile = None def __call__(self, ttype, tstring, stup, etup, line): # dispatch ## import token ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ ## 'tstring:', tstring self.__state(ttype, tstring, stup[0]) def __waiting(self, ttype, tstring, lineno): opts = self.__options # Do docstring extractions, if enabled if opts.docstrings and not opts.nodocstrings.get(self.__curfile): # module docstring? if self.__freshmodule: if ttype == tokenize.STRING: self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__freshmodule = 0 elif ttype not in (tokenize.COMMENT, tokenize.NL): self.__freshmodule = 0 return # class docstring? if ttype == tokenize.NAME and tstring in ('class', 'def'): self.__state = self.__suiteseen return if ttype == tokenize.NAME and tstring in opts.keywords: self.__state = self.__keywordseen def __suiteseen(self, ttype, tstring, lineno): # ignore anything until we see the colon if ttype == tokenize.OP and tstring == ':': self.__state = self.__suitedocstring def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise if ttype == tokenize.STRING: self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT): # there was no class docstring self.__state = self.__waiting def __keywordseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == '(': self.__data = [] self.__lineno = lineno self.__state = self.__openseen else: self.__state = self.__waiting def __openseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == ')': # We've seen the last of the translatable strings. Record the # line number of the first line of the strings and update the list # of messages seen. Reset state for the next batch. If there # were no strings inside _(), then just ignore this entry. if self.__data: self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]: # warn if we see anything else than STRING or whitespace print('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' % { 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno }, file=sys.stderr) self.__state = self.__waiting def __addentry(self, msg, lineno=None, isdocstring=0): if lineno is None: lineno = self.__lineno if not msg in self.__options.toexclude: entry = (self.__curfile, lineno) self.__messages.setdefault(msg, {})[entry] = isdocstring def set_filename(self, filename): self.__curfile = filename self.__freshmodule = 1 def write(self, fp): options = self.__options timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') # The time stamp in the header doesn't have the same format as that # generated by xgettext... print(pot_header, file=fp) # Sort the entries. First sort each particular entry's keys, then # sort all the entries by their first item. reverse = {} for k, v in self.__messages.items(): keys = sorted(v.keys()) reverse.setdefault(tuple(keys), []).append((k, v)) rkeys = sorted(reverse.keys()) for rkey in rkeys: rentries = reverse[rkey] rentries.sort() for k, v in rentries: # If the entry was gleaned out of a docstring, then add a # comment stating so. This is to aid translators who may wish # to skip translating some unimportant docstrings. isdocstring = any(v.values()) # k is the message string, v is a dictionary-set of (filename, # lineno) tuples. We want to sort the entries in v first by # file name and then by line number. v = sorted(v.keys()) if not options.writelocations: pass # location comments are different b/w Solaris and GNU: elif options.locationstyle == options.SOLARIS: for filename, lineno in v: d = {'filename': filename, 'lineno': lineno} print('# File: %(filename)s, line: %(lineno)d' % d, file=fp) elif options.locationstyle == options.GNU: # fit as many locations on one line, as long as the # resulting line length doesn't exceeds 'options.width' locline = '#:' for filename, lineno in v: d = {'filename': filename, 'lineno': lineno} s = ' %(filename)s:%(lineno)d' % d if len(locline) + len(s) <= options.width: locline = locline + s else: print(locline, file=fp) locline = "#:" + s if len(locline) > 2: print(locline, file=fp) if isdocstring: print('#, docstring', file=fp) print('msgid', normalize(k), file=fp) print('msgstr ""\n', file=fp) def main(source_files, outpath, keywords=None): global default_keywords # for holding option values class Options: # constants GNU = 1 SOLARIS = 2 # defaults extractall = 0 # FIXME: currently this option has no effect at all. escape = 0 keywords = [] outfile = 'messages.pot' writelocations = 1 locationstyle = GNU verbose = 0 width = 78 excludefilename = '' docstrings = 0 nodocstrings = {} options = Options() locations = {'gnu' : options.GNU, 'solaris' : options.SOLARIS, } options.outfile = outpath if keywords: options.keywords = keywords # calculate escapes make_escapes(options.escape) # calculate all keywords options.keywords.extend(default_keywords) # initialize list of strings to exclude if options.excludefilename: try: fp = open(options.excludefilename, encoding='utf-8') options.toexclude = fp.readlines() fp.close() except IOError: print("Can't read --exclude-file: %s" % options.excludefilename, file=sys.stderr) sys.exit(1) else: options.toexclude = [] # slurp through all the files eater = TokenEater(options) for filename in source_files: if options.verbose: print('Working on %s' % filename) fp = open(filename, encoding='utf-8') closep = 1 try: eater.set_filename(filename) try: tokens = tokenize.generate_tokens(fp.readline) for _token in tokens: eater(*_token) except tokenize.TokenError as e: print('%s: %s, line %d, column %d' % ( e.args[0], filename, e.args[1][0], e.args[1][1]), file=sys.stderr) finally: if closep: fp.close() fp = open(options.outfile, 'w', encoding='utf-8') closep = 1 try: eater.write(fp) finally: if closep: fp.close()