| | 1 | This is code to aid in localization of Genshi templates, without altering the underlying templates. |
| | 2 | It was originally written by Matt Good, then updated and fixed up by David Fraser. |
| | 3 | |
| | 4 | Firstly here is a module that can be used to extract text from Genshi template streams. |
| | 5 | {{{ |
| | 6 | #!python |
| | 7 | |
| | 8 | import fnmatch |
| | 9 | import os |
| | 10 | import re |
| | 11 | import logging |
| | 12 | import copy |
| | 13 | |
| | 14 | import genshi.core |
| | 15 | import genshi.input |
| | 16 | import genshi.eval |
| | 17 | import genshi.template |
| | 18 | |
| | 19 | ignore_tags = ['script', 'style'] |
| | 20 | include_attribs = ['title', 'alt', 'longdesc'] |
| | 21 | exclude_dirs = ('.AppleDouble', '.svn', 'CVS', '_darcs') |
| | 22 | gettext_re = re.compile(r"_\(((?:'[^']*')|(?:\"[^\"]*\"))\)") |
| | 23 | |
| | 24 | # calculate escapes |
| | 25 | escapes = [] |
| | 26 | |
| | 27 | def make_escapes(pass_iso8859): |
| | 28 | global escapes |
| | 29 | if pass_iso8859: |
| | 30 | # Allow iso-8859 characters to pass through so that e.g. 'msgid |
| | 31 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we |
| | 32 | # escape any character outside the 32..126 range. |
| | 33 | mod = 128 |
| | 34 | else: |
| | 35 | mod = 256 |
| | 36 | for i in range(256): |
| | 37 | if 32 <= (i % mod) <= 126: |
| | 38 | escapes.append(chr(i)) |
| | 39 | else: |
| | 40 | escapes.append("\\%03o" % i) |
| | 41 | escapes[ord('\\')] = '\\\\' |
| | 42 | escapes[ord('\t')] = '\\t' |
| | 43 | escapes[ord('\r')] = '\\r' |
| | 44 | escapes[ord('\n')] = '\\n' |
| | 45 | escapes[ord('\"')] = '\\"' |
| | 46 | |
| | 47 | make_escapes(False) |
| | 48 | |
| | 49 | def escape(s): |
| | 50 | global escapes |
| | 51 | s = list(s) |
| | 52 | for i in range(len(s)): |
| | 53 | s[i] = escapes[ord(s[i])] |
| | 54 | return EMPTYSTRING.join(s) |
| | 55 | |
| | 56 | def normalize(s): |
| | 57 | """This converts the various Python string types into a format that is |
| | 58 | appropriate for .po files, namely much closer to C style.""" |
| | 59 | lines = s.split('\n') |
| | 60 | if len(lines) == 1: |
| | 61 | s = '"' + escape(s) + '"' |
| | 62 | else: |
| | 63 | if not lines[-1]: |
| | 64 | del lines[-1] |
| | 65 | lines[-1] = lines[-1] + '\n' |
| | 66 | for i in range(len(lines)): |
| | 67 | lines[i] = escape(lines[i]) |
| | 68 | lineterm = '\\n"\n"' |
| | 69 | s = '""\n"' + lineterm.join(lines) + '"' |
| | 70 | return s |
| | 71 | |
| | 72 | def lang_extract(potfile, source_files, template_class=None): |
| | 73 | """extracts text strings from the given source files and outputs them at the end of the given pot file""" |
| | 74 | fd = open(potfile, 'at+') |
| | 75 | try: |
| | 76 | keys_found = {} |
| | 77 | key_order = [] |
| | 78 | for fname, linenum, key in extract_keys(source_files, ['.'], template_class): |
| | 79 | if key in keys_found: |
| | 80 | keys_found[key].append((fname, linenum)) |
| | 81 | else: |
| | 82 | keys_found[key] = [(fname, linenum)] |
| | 83 | key_order.append(key) |
| | 84 | for key in key_order: |
| | 85 | for fname, linenum in keys_found[key]: |
| | 86 | fd.write('#: %s:%s\n' % (fname, linenum)) |
| | 87 | fd.write('msgid %s\n' % normalize(key)) |
| | 88 | fd.write('msgstr ""\n\n') |
| | 89 | finally: |
| | 90 | fd.close() |
| | 91 | |
| | 92 | def _matching_files(dirname, fileglob): |
| | 93 | """searches for matching filenames in a directory""" |
| | 94 | for root, dirs, files in os.walk(dirname): |
| | 95 | for exclude in exclude_dirs: |
| | 96 | try: |
| | 97 | dirs.remove(exclude) |
| | 98 | except ValueError: |
| | 99 | pass |
| | 100 | for fname in fnmatch.filter(files, fileglob): |
| | 101 | yield os.path.join(root, fname) |
| | 102 | |
| | 103 | def extract_keys(files, search_path=None, template_class=None): |
| | 104 | """finds all the text keys in the given files""" |
| | 105 | loader = genshi.template.TemplateLoader(search_path) |
| | 106 | for fname in files: |
| | 107 | logging.info('Scanning l10n keys from: %s' % fname) |
| | 108 | try: |
| | 109 | if template_class is None: |
| | 110 | template = loader.load(fname) |
| | 111 | else: |
| | 112 | template = loader.load(fname, cls=template_class) |
| | 113 | except genshi.input.ParseError, e: |
| | 114 | logging.warning('Skipping extracting l10n keys from %s: %s' % (fname, e)) |
| | 115 | continue |
| | 116 | for linenum, key in extract_from_template(template): |
| | 117 | yield fname, linenum, key |
| | 118 | |
| | 119 | def extract_from_template(template, search_text=True): |
| | 120 | """helper to extract linenumber and key pairs from a given template""" |
| | 121 | return extract_from_stream(template.stream, search_text) |
| | 122 | |
| | 123 | def extract_from_stream(stream, search_text=True): |
| | 124 | """takes a MatchTemplate.stream (not a normal XML Stream) and searches for localizable text, yielding linenumber, text tuples""" |
| | 125 | # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text) |
| | 126 | # in this case, only Python strings in expressions are extracted |
| | 127 | stream = iter(stream) |
| | 128 | tagname = None |
| | 129 | skip_level = 0 |
| | 130 | for kind, data, pos in stream: |
| | 131 | linenum = pos[1] |
| | 132 | print kind, linenum |
| | 133 | if skip_level: |
| | 134 | if kind is genshi.core.START: |
| | 135 | tag, attrs = data |
| | 136 | if tag.localname in ignore_tags: |
| | 137 | skip_level += 1 |
| | 138 | if kind is genshi.core.END: |
| | 139 | tag = data |
| | 140 | if tag.localname in ignore_tags: |
| | 141 | skip_level -= 1 |
| | 142 | continue |
| | 143 | if kind is genshi.core.START: |
| | 144 | tag, attrs = data |
| | 145 | tagname = tag.localname |
| | 146 | if tagname in ignore_tags: |
| | 147 | # skip the substream |
| | 148 | skip_level += 1 |
| | 149 | continue |
| | 150 | for name, value in attrs: |
| | 151 | if isinstance(value, basestring): |
| | 152 | if search_text and name in include_attribs: |
| | 153 | yield linenum, value |
| | 154 | else: |
| | 155 | for dummy, key in extract_from_stream(value, |
| | 156 | name in include_attribs): |
| | 157 | yield linenum, key |
| | 158 | elif kind is genshi.template.EXPR: |
| | 159 | if data.source != "?": |
| | 160 | # TODO: check if these expressions should be localized |
| | 161 | for key in gettext_re.findall(data.source): |
| | 162 | key = key[1:-1] |
| | 163 | if key: |
| | 164 | yield linenum, key |
| | 165 | elif kind is genshi.core.TEXT and search_text: |
| | 166 | key = data.strip() |
| | 167 | if key: |
| | 168 | yield linenum, key |
| | 169 | elif kind is genshi.template.SUB: |
| | 170 | sub_kind, sub_stream = data |
| | 171 | for linenum, key in extract_from_stream(sub_stream, search_text): |
| | 172 | yield linenum, key |
| | 173 | }}} |
| | 174 | |
| | 175 | The following function can then be used to localize the template stream (see below for details on use): |
| | 176 | {{{ |
| | 177 | #!python |
| | 178 | def localize_template(template_source_stream, ugettext, search_text=True): |
| | 179 | """localizes the given template source stream (i.e. genshi.XML(template_source), not the parsed template's stream |
| | 180 | need to pass in the ugettext function you want to use""" |
| | 181 | # NOTE: this MUST NOT modify the underlying objects or template reuse will break |
| | 182 | # in addition, if it calls itself recursively it must convert the result to a list or it will break on repetition |
| | 183 | # search_text is set to false when extracting from substreams (that are attribute values for an attribute which is not text) |
| | 184 | # in this case, only Python strings in expressions are extracted |
| | 185 | stream = iter(template_source_stream) |
| | 186 | skip_level = 0 |
| | 187 | for kind, data, pos in stream: |
| | 188 | # handle skipping whole chunks we don't want to localize (just yielding everything in them) |
| | 189 | if skip_level: |
| | 190 | if kind is genshi.core.START: |
| | 191 | tag, attrs = data |
| | 192 | tag = tag.localname |
| | 193 | if tag in ignore_tags: |
| | 194 | skip_level += 1 |
| | 195 | if kind is genshi.core.END: |
| | 196 | tag = data.localname |
| | 197 | if tag in ignore_tags: |
| | 198 | skip_level -= 1 |
| | 199 | yield kind, data, pos |
| | 200 | continue |
| | 201 | # handle different kinds of things we want to localize |
| | 202 | if kind is genshi.core.START: |
| | 203 | tag, attrs = data |
| | 204 | tagname = tag.localname |
| | 205 | if tagname in ignore_tags: |
| | 206 | skip_level += 1 |
| | 207 | yield kind, data, pos |
| | 208 | continue |
| | 209 | new_attrs = genshi.core.Attrs(attrs[:]) |
| | 210 | changed = False |
| | 211 | for name, value in attrs: |
| | 212 | if isinstance(value, basestring): |
| | 213 | if search_text and name in include_attribs: |
| | 214 | new_value = ugettext(search_text) |
| | 215 | new_attrs.set(name, new_value) |
| | 216 | changed = True |
| | 217 | else: |
| | 218 | # this seems to be handling substreams, so we should get back a localized substream |
| | 219 | # note: passing search_text=False implies far fewer matches, this may be wasteful and the subcall could be skipped in some cases |
| | 220 | new_value = list(localize_template(value, ugettext, search_text=(name in include_attribs))) |
| | 221 | new_attrs.set(name, new_value) |
| | 222 | changed = True |
| | 223 | if changed: |
| | 224 | # ensure we don't change the original string |
| | 225 | attrs = new_attrs |
| | 226 | yield kind, (tag, attrs), pos |
| | 227 | elif kind is genshi.template.EXPR: |
| | 228 | if data.source != "?": |
| | 229 | # TODO: check if these expressions should be localized |
| | 230 | for key in gettext_re.findall(data.source): |
| | 231 | key = key[1:-1] |
| | 232 | if key: |
| | 233 | new_key = ugettext(key) |
| | 234 | # TODO: if we do this, it needs to be fixed :-) |
| | 235 | new_data = genshi.eval.Expression(data.source.replace(key, new_key)) |
| | 236 | # we lose the following data, but can't assign as its readonly |
| | 237 | # new_data.code.co_filename = data.code.co_filename |
| | 238 | # new_data.code.co_firstlineno = data.code.co_firstlineno |
| | 239 | yield kind, data, pos |
| | 240 | elif kind is genshi.core.TEXT and search_text: |
| | 241 | # we can adjust this as strings are immutable, so this won't change the original string |
| | 242 | key = data.strip() |
| | 243 | if key: |
| | 244 | new_key = ugettext(key) |
| | 245 | data = data.replace(key, new_key) |
| | 246 | yield kind, data, pos |
| | 247 | elif kind is genshi.template.SUB: |
| | 248 | sub_kind, sub_stream = data |
| | 249 | new_sub_stream = list(localize_template(sub_stream, ugettext, search_text=search_text)) |
| | 250 | yield kind, (sub_kind, new_sub_stream), pos |
| | 251 | else: |
| | 252 | yield kind, data, pos |
| | 253 | |
| | 254 | }}} |