Edgewall Software

Changes between Initial Version and Version 1 of GenshiRecipes/Localization


Ignore:
Timestamp:
Nov 9, 2006, 2:28:23 PM (17 years ago)
Author:
David Fraser <davidf@…>
Comment:

Initial posting of module

Legend:

Unmodified
Added
Removed
Modified
  • GenshiRecipes/Localization

    v1 v1  
     1This is code to aid in localization of Genshi templates, without altering the underlying templates.
     2It was originally written by Matt Good, then updated and fixed up by David Fraser.
     3
     4Firstly here is a module that can be used to extract text from Genshi template streams.
     5{{{
     6#!python
     7
     8import fnmatch
     9import os
     10import re
     11import logging
     12import copy
     13
     14import genshi.core
     15import genshi.input
     16import genshi.eval
     17import genshi.template
     18
     19ignore_tags = ['script', 'style']
     20include_attribs = ['title', 'alt', 'longdesc']
     21exclude_dirs = ('.AppleDouble', '.svn', 'CVS', '_darcs')
     22gettext_re = re.compile(r"_\(((?:'[^']*')|(?:\"[^\"]*\"))\)")
     23
     24# calculate escapes
     25escapes = []
     26
     27def make_escapes(pass_iso8859):
     28    global escapes
     29    if pass_iso8859:
     30        # Allow iso-8859 characters to pass through so that e.g. 'msgid
     31        # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
     32        # escape any character outside the 32..126 range.
     33        mod = 128
     34    else:
     35        mod = 256
     36    for i in range(256):
     37        if 32 <= (i % mod) <= 126:
     38            escapes.append(chr(i))
     39        else:
     40            escapes.append("\\%03o" % i)
     41    escapes[ord('\\')] = '\\\\'
     42    escapes[ord('\t')] = '\\t'
     43    escapes[ord('\r')] = '\\r'
     44    escapes[ord('\n')] = '\\n'
     45    escapes[ord('\"')] = '\\"'
     46
     47make_escapes(False)
     48
     49def escape(s):
     50    global escapes
     51    s = list(s)
     52    for i in range(len(s)):
     53        s[i] = escapes[ord(s[i])]
     54    return EMPTYSTRING.join(s)
     55
     56def normalize(s):
     57    """This converts the various Python string types into a format that is
     58    appropriate for .po files, namely much closer to C style."""
     59    lines = s.split('\n')
     60    if len(lines) == 1:
     61        s = '"' + escape(s) + '"'
     62    else:
     63        if not lines[-1]:
     64            del lines[-1]
     65            lines[-1] = lines[-1] + '\n'
     66        for i in range(len(lines)):
     67            lines[i] = escape(lines[i])
     68        lineterm = '\\n"\n"'
     69        s = '""\n"' + lineterm.join(lines) + '"'
     70    return s
     71
     72def lang_extract(potfile, source_files, template_class=None):
     73    """extracts text strings from the given source files and outputs them at the end of the given pot file"""
     74    fd = open(potfile, 'at+')
     75    try:
     76        keys_found = {}
     77        key_order = []
     78        for fname, linenum, key in extract_keys(source_files, ['.'], template_class):
     79            if key in keys_found:
     80                keys_found[key].append((fname, linenum))
     81            else:
     82                keys_found[key] = [(fname, linenum)]
     83                key_order.append(key)
     84        for key in key_order:
     85            for fname, linenum in keys_found[key]:
     86                fd.write('#: %s:%s\n' % (fname, linenum))
     87            fd.write('msgid %s\n' % normalize(key))
     88            fd.write('msgstr ""\n\n')
     89    finally:
     90        fd.close()
     91
     92def _matching_files(dirname, fileglob):
     93    """searches for matching filenames in a directory"""
     94    for root, dirs, files in os.walk(dirname):
     95        for exclude in exclude_dirs:
     96            try:
     97                dirs.remove(exclude)
     98            except ValueError:
     99                pass
     100        for fname in fnmatch.filter(files, fileglob):
     101            yield os.path.join(root, fname)
     102
     103def extract_keys(files, search_path=None, template_class=None):
     104    """finds all the text keys in the given files"""
     105    loader = genshi.template.TemplateLoader(search_path)
     106    for fname in files:
     107        logging.info('Scanning l10n keys from: %s' % fname)
     108        try:
     109            if template_class is None:
     110                template = loader.load(fname)
     111            else:
     112                template = loader.load(fname, cls=template_class)
     113        except genshi.input.ParseError, e:
     114            logging.warning('Skipping extracting l10n keys from %s: %s' % (fname, e))
     115            continue
     116        for linenum, key in extract_from_template(template):
     117            yield fname, linenum, key
     118
     119def extract_from_template(template, search_text=True):
     120    """helper to extract linenumber and key pairs from a given template"""
     121    return extract_from_stream(template.stream, search_text)
     122
     123def extract_from_stream(stream, search_text=True):
     124    """takes a MatchTemplate.stream (not a normal XML Stream) and searches for localizable text, yielding linenumber, text tuples"""
     125    # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text)
     126    # in this case, only Python strings in expressions are extracted
     127    stream = iter(stream)
     128    tagname = None
     129    skip_level = 0
     130    for kind, data, pos in stream:
     131        linenum = pos[1]
     132        print kind, linenum
     133        if skip_level:
     134            if kind is genshi.core.START:
     135                tag, attrs = data
     136                if tag.localname in ignore_tags:
     137                    skip_level += 1
     138            if kind is genshi.core.END:
     139                tag = data
     140                if tag.localname in ignore_tags:
     141                    skip_level -= 1
     142            continue
     143        if kind is genshi.core.START:
     144            tag, attrs = data
     145            tagname = tag.localname
     146            if tagname in ignore_tags:
     147                # skip the substream
     148                skip_level += 1
     149                continue
     150            for name, value in attrs:
     151                if isinstance(value, basestring):
     152                   if search_text and name in include_attribs:
     153                       yield linenum, value
     154                else:
     155                    for dummy, key in extract_from_stream(value,
     156                                                      name in include_attribs):
     157                        yield linenum, key
     158        elif kind is genshi.template.EXPR:
     159            if data.source != "?":
     160                # TODO: check if these expressions should be localized
     161                for key in gettext_re.findall(data.source):
     162                    key = key[1:-1]
     163                    if key:
     164                        yield linenum, key
     165        elif kind is genshi.core.TEXT and search_text:
     166            key = data.strip()
     167            if key:
     168                yield linenum, key
     169        elif kind is genshi.template.SUB:
     170            sub_kind, sub_stream = data
     171            for linenum, key in extract_from_stream(sub_stream, search_text):
     172                yield linenum, key
     173}}}
     174
     175The following function can then be used to localize the template stream (see below for details on use):
     176{{{
     177#!python
     178def localize_template(template_source_stream, ugettext, search_text=True):
     179    """localizes the given template source stream (i.e. genshi.XML(template_source), not the parsed template's stream
     180    need to pass in the ugettext function you want to use"""
     181    # NOTE: this MUST NOT modify the underlying objects or template reuse will break
     182    # in addition, if it calls itself recursively it must convert the result to a list or it will break on repetition
     183    # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text)
     184    # in this case, only Python strings in expressions are extracted
     185    stream = iter(template_source_stream)
     186    skip_level = 0
     187    for kind, data, pos in stream:
     188        # handle skipping whole chunks we don't want to localize (just yielding everything in them)
     189        if skip_level:
     190            if kind is genshi.core.START:
     191                tag, attrs = data
     192                tag = tag.localname
     193                if tag in ignore_tags:
     194                    skip_level += 1
     195            if kind is genshi.core.END:
     196                tag = data.localname
     197                if tag in ignore_tags:
     198                    skip_level -= 1
     199            yield kind, data, pos
     200            continue
     201        # handle different kinds of things we want to localize
     202        if kind is genshi.core.START:
     203            tag, attrs = data
     204            tagname = tag.localname
     205            if tagname in ignore_tags:
     206                skip_level += 1
     207                yield kind, data, pos
     208                continue
     209            new_attrs = genshi.core.Attrs(attrs[:])
     210            changed = False
     211            for name, value in attrs:
     212                if isinstance(value, basestring):
     213                   if search_text and name in include_attribs:
     214                       new_value = ugettext(search_text)
     215                       new_attrs.set(name, new_value)
     216                       changed = True
     217                else:
     218                    # this seems to be handling substreams, so we should get back a localized substream
     219                    # note: passing search_text=False implies far fewer matches, this may be wasteful and the subcall could be skipped in some cases
     220                    new_value = list(localize_template(value, ugettext, search_text=(name in include_attribs)))
     221                    new_attrs.set(name, new_value)
     222                    changed = True
     223            if changed:
     224                # ensure we don't change the original string
     225                attrs = new_attrs
     226            yield kind, (tag, attrs), pos
     227        elif kind is genshi.template.EXPR:
     228            if data.source != "?":
     229                # TODO: check if these expressions should be localized
     230                for key in gettext_re.findall(data.source):
     231                    key = key[1:-1]
     232                    if key:
     233                        new_key = ugettext(key)
     234                        # TODO: if we do this, it needs to be fixed :-)
     235                        new_data = genshi.eval.Expression(data.source.replace(key, new_key))
     236                        # we lose the following data, but can't assign as its readonly
     237                        # new_data.code.co_filename = data.code.co_filename
     238                        # new_data.code.co_firstlineno = data.code.co_firstlineno
     239            yield kind, data, pos
     240        elif kind is genshi.core.TEXT and search_text:
     241            # we can adjust this as strings are immutable, so this won't change the original string
     242            key = data.strip()
     243            if key:
     244                new_key = ugettext(key)
     245                data = data.replace(key, new_key)
     246            yield kind, data, pos
     247        elif kind is genshi.template.SUB:
     248            sub_kind, sub_stream = data
     249            new_sub_stream = list(localize_template(sub_stream, ugettext, search_text=search_text))
     250            yield kind, (sub_kind, new_sub_stream), pos
     251        else:
     252            yield kind, data, pos
     253
     254}}}