This is code to aid in localization of Genshi templates, without altering the underlying templates. It was originally written by Matt Good, then updated and fixed up by David Fraser. Firstly here is a module that can be used to extract text from Genshi template streams. {{{ #!python import fnmatch import os import re import logging import copy import genshi.core import genshi.input import genshi.eval import genshi.template ignore_tags = ['script', 'style'] include_attribs = ['title', 'alt', 'longdesc'] exclude_dirs = ('.AppleDouble', '.svn', 'CVS', '_darcs') gettext_re = re.compile(r"_\(((?:'[^']*')|(?:\"[^\"]*\"))\)") # calculate escapes escapes = [] def make_escapes(pass_iso8859): global escapes if pass_iso8859: # Allow iso-8859 characters to pass through so that e.g. 'msgid # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. mod = 128 else: mod = 256 for i in range(256): if 32 <= (i % mod) <= 126: escapes.append(chr(i)) else: escapes.append("\\%03o" % i) escapes[ord('\\')] = '\\\\' escapes[ord('\t')] = '\\t' escapes[ord('\r')] = '\\r' escapes[ord('\n')] = '\\n' escapes[ord('\"')] = '\\"' make_escapes(False) def escape(s): global escapes s = list(s) for i in range(len(s)): s[i] = escapes[ord(s[i])] return EMPTYSTRING.join(s) def normalize(s): """This converts the various Python string types into a format that is appropriate for .po files, namely much closer to C style.""" lines = s.split('\n') if len(lines) == 1: s = '"' + escape(s) + '"' else: if not lines[-1]: del lines[-1] lines[-1] = lines[-1] + '\n' for i in range(len(lines)): lines[i] = escape(lines[i]) lineterm = '\\n"\n"' s = '""\n"' + lineterm.join(lines) + '"' return s def lang_extract(potfile, source_files, template_class=None): """extracts text strings from the given source files and outputs them at the end of the given pot file""" fd = open(potfile, 'at+') try: keys_found = {} key_order = [] for fname, linenum, key in extract_keys(source_files, ['.'], template_class): if key in keys_found: keys_found[key].append((fname, linenum)) else: keys_found[key] = [(fname, linenum)] key_order.append(key) for key in key_order: for fname, linenum in keys_found[key]: fd.write('#: %s:%s\n' % (fname, linenum)) fd.write('msgid %s\n' % normalize(key)) fd.write('msgstr ""\n\n') finally: fd.close() def _matching_files(dirname, fileglob): """searches for matching filenames in a directory""" for root, dirs, files in os.walk(dirname): for exclude in exclude_dirs: try: dirs.remove(exclude) except ValueError: pass for fname in fnmatch.filter(files, fileglob): yield os.path.join(root, fname) def extract_keys(files, search_path=None, template_class=None): """finds all the text keys in the given files""" loader = genshi.template.TemplateLoader(search_path) for fname in files: logging.info('Scanning l10n keys from: %s' % fname) try: if template_class is None: template = loader.load(fname) else: template = loader.load(fname, cls=template_class) except genshi.input.ParseError, e: logging.warning('Skipping extracting l10n keys from %s: %s' % (fname, e)) continue for linenum, key in extract_from_template(template): yield fname, linenum, key def extract_from_template(template, search_text=True): """helper to extract linenumber and key pairs from a given template""" return extract_from_stream(template.stream, search_text) def extract_from_stream(stream, search_text=True): """takes a MatchTemplate.stream (not a normal XML Stream) and searches for localizable text, yielding linenumber, text tuples""" # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text) # in this case, only Python strings in expressions are extracted stream = iter(stream) tagname = None skip_level = 0 for kind, data, pos in stream: linenum = pos[1] print kind, linenum if skip_level: if kind is genshi.core.START: tag, attrs = data if tag.localname in ignore_tags: skip_level += 1 if kind is genshi.core.END: tag = data if tag.localname in ignore_tags: skip_level -= 1 continue if kind is genshi.core.START: tag, attrs = data tagname = tag.localname if tagname in ignore_tags: # skip the substream skip_level += 1 continue for name, value in attrs: if isinstance(value, basestring): if search_text and name in include_attribs: yield linenum, value else: for dummy, key in extract_from_stream(value, name in include_attribs): yield linenum, key elif kind is genshi.template.EXPR: if data.source != "?": # TODO: check if these expressions should be localized for key in gettext_re.findall(data.source): key = key[1:-1] if key: yield linenum, key elif kind is genshi.core.TEXT and search_text: key = data.strip() if key: yield linenum, key elif kind is genshi.template.SUB: sub_kind, sub_stream = data for linenum, key in extract_from_stream(sub_stream, search_text): yield linenum, key }}} The following function can then be used to localize the template stream (see below for details on use): {{{ #!python def localize_template(template_source_stream, ugettext, search_text=True): """localizes the given template source stream (i.e. genshi.XML(template_source), not the parsed template's stream need to pass in the ugettext function you want to use""" # NOTE: this MUST NOT modify the underlying objects or template reuse will break # in addition, if it calls itself recursively it must convert the result to a list or it will break on repetition # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text) # in this case, only Python strings in expressions are extracted stream = iter(template_source_stream) skip_level = 0 for kind, data, pos in stream: # handle skipping whole chunks we don't want to localize (just yielding everything in them) if skip_level: if kind is genshi.core.START: tag, attrs = data tag = tag.localname if tag in ignore_tags: skip_level += 1 if kind is genshi.core.END: tag = data.localname if tag in ignore_tags: skip_level -= 1 yield kind, data, pos continue # handle different kinds of things we want to localize if kind is genshi.core.START: tag, attrs = data tagname = tag.localname if tagname in ignore_tags: skip_level += 1 yield kind, data, pos continue new_attrs = genshi.core.Attrs(attrs[:]) changed = False for name, value in attrs: if isinstance(value, basestring): if search_text and name in include_attribs: new_value = ugettext(search_text) new_attrs.set(name, new_value) changed = True else: # this seems to be handling substreams, so we should get back a localized substream # note: passing search_text=False implies far fewer matches, this may be wasteful and the subcall could be skipped in some cases new_value = list(localize_template(value, ugettext, search_text=(name in include_attribs))) new_attrs.set(name, new_value) changed = True if changed: # ensure we don't change the original string attrs = new_attrs yield kind, (tag, attrs), pos elif kind is genshi.template.EXPR: if data.source != "?": # TODO: check if these expressions should be localized for key in gettext_re.findall(data.source): key = key[1:-1] if key: new_key = ugettext(key) # TODO: if we do this, it needs to be fixed :-) new_data = genshi.eval.Expression(data.source.replace(key, new_key)) # we lose the following data, but can't assign as its readonly # new_data.code.co_filename = data.code.co_filename # new_data.code.co_firstlineno = data.code.co_firstlineno yield kind, data, pos elif kind is genshi.core.TEXT and search_text: # we can adjust this as strings are immutable, so this won't change the original string key = data.strip() if key: new_key = ugettext(key) data = data.replace(key, new_key) yield kind, data, pos elif kind is genshi.template.SUB: sub_kind, sub_stream = data new_sub_stream = list(localize_template(sub_stream, ugettext, search_text=search_text)) yield kind, (sub_kind, new_sub_stream), pos else: yield kind, data, pos }}}