| 1 | This is code to aid in localization of Genshi templates, without altering the underlying templates. |
| 2 | It was originally written by Matt Good, then updated and fixed up by David Fraser. |
| 3 | |
| 4 | Firstly here is a module that can be used to extract text from Genshi template streams. |
| 5 | {{{ |
| 6 | #!python |
| 7 | |
| 8 | import fnmatch |
| 9 | import os |
| 10 | import re |
| 11 | import logging |
| 12 | import copy |
| 13 | |
| 14 | import genshi.core |
| 15 | import genshi.input |
| 16 | import genshi.eval |
| 17 | import genshi.template |
| 18 | |
| 19 | ignore_tags = ['script', 'style'] |
| 20 | include_attribs = ['title', 'alt', 'longdesc'] |
| 21 | exclude_dirs = ('.AppleDouble', '.svn', 'CVS', '_darcs') |
| 22 | gettext_re = re.compile(r"_\(((?:'[^']*')|(?:\"[^\"]*\"))\)") |
| 23 | |
| 24 | # calculate escapes |
| 25 | escapes = [] |
| 26 | |
| 27 | def make_escapes(pass_iso8859): |
| 28 | global escapes |
| 29 | if pass_iso8859: |
| 30 | # Allow iso-8859 characters to pass through so that e.g. 'msgid |
| 31 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we |
| 32 | # escape any character outside the 32..126 range. |
| 33 | mod = 128 |
| 34 | else: |
| 35 | mod = 256 |
| 36 | for i in range(256): |
| 37 | if 32 <= (i % mod) <= 126: |
| 38 | escapes.append(chr(i)) |
| 39 | else: |
| 40 | escapes.append("\\%03o" % i) |
| 41 | escapes[ord('\\')] = '\\\\' |
| 42 | escapes[ord('\t')] = '\\t' |
| 43 | escapes[ord('\r')] = '\\r' |
| 44 | escapes[ord('\n')] = '\\n' |
| 45 | escapes[ord('\"')] = '\\"' |
| 46 | |
| 47 | make_escapes(False) |
| 48 | |
| 49 | def escape(s): |
| 50 | global escapes |
| 51 | s = list(s) |
| 52 | for i in range(len(s)): |
| 53 | s[i] = escapes[ord(s[i])] |
| 54 | return EMPTYSTRING.join(s) |
| 55 | |
| 56 | def normalize(s): |
| 57 | """This converts the various Python string types into a format that is |
| 58 | appropriate for .po files, namely much closer to C style.""" |
| 59 | lines = s.split('\n') |
| 60 | if len(lines) == 1: |
| 61 | s = '"' + escape(s) + '"' |
| 62 | else: |
| 63 | if not lines[-1]: |
| 64 | del lines[-1] |
| 65 | lines[-1] = lines[-1] + '\n' |
| 66 | for i in range(len(lines)): |
| 67 | lines[i] = escape(lines[i]) |
| 68 | lineterm = '\\n"\n"' |
| 69 | s = '""\n"' + lineterm.join(lines) + '"' |
| 70 | return s |
| 71 | |
| 72 | def lang_extract(potfile, source_files, template_class=None): |
| 73 | """extracts text strings from the given source files and outputs them at the end of the given pot file""" |
| 74 | fd = open(potfile, 'at+') |
| 75 | try: |
| 76 | keys_found = {} |
| 77 | key_order = [] |
| 78 | for fname, linenum, key in extract_keys(source_files, ['.'], template_class): |
| 79 | if key in keys_found: |
| 80 | keys_found[key].append((fname, linenum)) |
| 81 | else: |
| 82 | keys_found[key] = [(fname, linenum)] |
| 83 | key_order.append(key) |
| 84 | for key in key_order: |
| 85 | for fname, linenum in keys_found[key]: |
| 86 | fd.write('#: %s:%s\n' % (fname, linenum)) |
| 87 | fd.write('msgid %s\n' % normalize(key)) |
| 88 | fd.write('msgstr ""\n\n') |
| 89 | finally: |
| 90 | fd.close() |
| 91 | |
| 92 | def _matching_files(dirname, fileglob): |
| 93 | """searches for matching filenames in a directory""" |
| 94 | for root, dirs, files in os.walk(dirname): |
| 95 | for exclude in exclude_dirs: |
| 96 | try: |
| 97 | dirs.remove(exclude) |
| 98 | except ValueError: |
| 99 | pass |
| 100 | for fname in fnmatch.filter(files, fileglob): |
| 101 | yield os.path.join(root, fname) |
| 102 | |
| 103 | def extract_keys(files, search_path=None, template_class=None): |
| 104 | """finds all the text keys in the given files""" |
| 105 | loader = genshi.template.TemplateLoader(search_path) |
| 106 | for fname in files: |
| 107 | logging.info('Scanning l10n keys from: %s' % fname) |
| 108 | try: |
| 109 | if template_class is None: |
| 110 | template = loader.load(fname) |
| 111 | else: |
| 112 | template = loader.load(fname, cls=template_class) |
| 113 | except genshi.input.ParseError, e: |
| 114 | logging.warning('Skipping extracting l10n keys from %s: %s' % (fname, e)) |
| 115 | continue |
| 116 | for linenum, key in extract_from_template(template): |
| 117 | yield fname, linenum, key |
| 118 | |
| 119 | def extract_from_template(template, search_text=True): |
| 120 | """helper to extract linenumber and key pairs from a given template""" |
| 121 | return extract_from_stream(template.stream, search_text) |
| 122 | |
| 123 | def extract_from_stream(stream, search_text=True): |
| 124 | """takes a MatchTemplate.stream (not a normal XML Stream) and searches for localizable text, yielding linenumber, text tuples""" |
| 125 | # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text) |
| 126 | # in this case, only Python strings in expressions are extracted |
| 127 | stream = iter(stream) |
| 128 | tagname = None |
| 129 | skip_level = 0 |
| 130 | for kind, data, pos in stream: |
| 131 | linenum = pos[1] |
| 132 | print kind, linenum |
| 133 | if skip_level: |
| 134 | if kind is genshi.core.START: |
| 135 | tag, attrs = data |
| 136 | if tag.localname in ignore_tags: |
| 137 | skip_level += 1 |
| 138 | if kind is genshi.core.END: |
| 139 | tag = data |
| 140 | if tag.localname in ignore_tags: |
| 141 | skip_level -= 1 |
| 142 | continue |
| 143 | if kind is genshi.core.START: |
| 144 | tag, attrs = data |
| 145 | tagname = tag.localname |
| 146 | if tagname in ignore_tags: |
| 147 | # skip the substream |
| 148 | skip_level += 1 |
| 149 | continue |
| 150 | for name, value in attrs: |
| 151 | if isinstance(value, basestring): |
| 152 | if search_text and name in include_attribs: |
| 153 | yield linenum, value |
| 154 | else: |
| 155 | for dummy, key in extract_from_stream(value, |
| 156 | name in include_attribs): |
| 157 | yield linenum, key |
| 158 | elif kind is genshi.template.EXPR: |
| 159 | if data.source != "?": |
| 160 | # TODO: check if these expressions should be localized |
| 161 | for key in gettext_re.findall(data.source): |
| 162 | key = key[1:-1] |
| 163 | if key: |
| 164 | yield linenum, key |
| 165 | elif kind is genshi.core.TEXT and search_text: |
| 166 | key = data.strip() |
| 167 | if key: |
| 168 | yield linenum, key |
| 169 | elif kind is genshi.template.SUB: |
| 170 | sub_kind, sub_stream = data |
| 171 | for linenum, key in extract_from_stream(sub_stream, search_text): |
| 172 | yield linenum, key |
| 173 | }}} |
| 174 | |
| 175 | The following function can then be used to localize the template stream (see below for details on use): |
| 176 | {{{ |
| 177 | #!python |
| 178 | def localize_template(template_source_stream, ugettext, search_text=True): |
| 179 | """localizes the given template source stream (i.e. genshi.XML(template_source), not the parsed template's stream |
| 180 | need to pass in the ugettext function you want to use""" |
| 181 | # NOTE: this MUST NOT modify the underlying objects or template reuse will break |
| 182 | # in addition, if it calls itself recursively it must convert the result to a list or it will break on repetition |
| 183 | # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text) |
| 184 | # in this case, only Python strings in expressions are extracted |
| 185 | stream = iter(template_source_stream) |
| 186 | skip_level = 0 |
| 187 | for kind, data, pos in stream: |
| 188 | # handle skipping whole chunks we don't want to localize (just yielding everything in them) |
| 189 | if skip_level: |
| 190 | if kind is genshi.core.START: |
| 191 | tag, attrs = data |
| 192 | tag = tag.localname |
| 193 | if tag in ignore_tags: |
| 194 | skip_level += 1 |
| 195 | if kind is genshi.core.END: |
| 196 | tag = data.localname |
| 197 | if tag in ignore_tags: |
| 198 | skip_level -= 1 |
| 199 | yield kind, data, pos |
| 200 | continue |
| 201 | # handle different kinds of things we want to localize |
| 202 | if kind is genshi.core.START: |
| 203 | tag, attrs = data |
| 204 | tagname = tag.localname |
| 205 | if tagname in ignore_tags: |
| 206 | skip_level += 1 |
| 207 | yield kind, data, pos |
| 208 | continue |
| 209 | new_attrs = genshi.core.Attrs(attrs[:]) |
| 210 | changed = False |
| 211 | for name, value in attrs: |
| 212 | if isinstance(value, basestring): |
| 213 | if search_text and name in include_attribs: |
| 214 | new_value = ugettext(search_text) |
| 215 | new_attrs.set(name, new_value) |
| 216 | changed = True |
| 217 | else: |
| 218 | # this seems to be handling substreams, so we should get back a localized substream |
| 219 | # note: passing search_text=False implies far fewer matches, this may be wasteful and the subcall could be skipped in some cases |
| 220 | new_value = list(localize_template(value, ugettext, search_text=(name in include_attribs))) |
| 221 | new_attrs.set(name, new_value) |
| 222 | changed = True |
| 223 | if changed: |
| 224 | # ensure we don't change the original string |
| 225 | attrs = new_attrs |
| 226 | yield kind, (tag, attrs), pos |
| 227 | elif kind is genshi.template.EXPR: |
| 228 | if data.source != "?": |
| 229 | # TODO: check if these expressions should be localized |
| 230 | for key in gettext_re.findall(data.source): |
| 231 | key = key[1:-1] |
| 232 | if key: |
| 233 | new_key = ugettext(key) |
| 234 | # TODO: if we do this, it needs to be fixed :-) |
| 235 | new_data = genshi.eval.Expression(data.source.replace(key, new_key)) |
| 236 | # we lose the following data, but can't assign as its readonly |
| 237 | # new_data.code.co_filename = data.code.co_filename |
| 238 | # new_data.code.co_firstlineno = data.code.co_firstlineno |
| 239 | yield kind, data, pos |
| 240 | elif kind is genshi.core.TEXT and search_text: |
| 241 | # we can adjust this as strings are immutable, so this won't change the original string |
| 242 | key = data.strip() |
| 243 | if key: |
| 244 | new_key = ugettext(key) |
| 245 | data = data.replace(key, new_key) |
| 246 | yield kind, data, pos |
| 247 | elif kind is genshi.template.SUB: |
| 248 | sub_kind, sub_stream = data |
| 249 | new_sub_stream = list(localize_template(sub_stream, ugettext, search_text=search_text)) |
| 250 | yield kind, (sub_kind, new_sub_stream), pos |
| 251 | else: |
| 252 | yield kind, data, pos |
| 253 | |
| 254 | }}} |