File indexing completed on 2024-04-21 05:41:58
0001 #! /usr/bin/env python 0002 # -*- coding: utf-8 -*- 0003 0004 ## 0005 # Copyright 2010,2011 Stephen Kelly <steveire@gmail.com> 0006 # 0007 # Redistribution and use in source and binary forms, with or without 0008 # modification, are permitted provided that the following conditions 0009 # are met: 0010 # 0011 # 1. Redistributions of source code must retain the above copyright 0012 # notice, this list of conditions and the following disclaimer. 0013 # 2. Redistributions in binary form must reproduce the above copyright 0014 # notice, this list of conditions and the following disclaimer in the 0015 # documentation and/or other materials provided with the distribution. 0016 # 0017 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 0018 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 0019 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 0020 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 0021 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 0022 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 0023 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 0024 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0025 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 0026 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0027 ## 0028 0029 ## Parts of this file are reproduced from the Django framework. The Django license appears below. 0030 0031 ## 0032 # Copyright (c) Django Software Foundation and individual contributors. 0033 # All rights reserved. 0034 # 0035 # Redistribution and use in source and binary forms, with or without modification, 0036 # are permitted provided that the following conditions are met: 0037 # 0038 # 1. Redistributions of source code must retain the above copyright notice, 0039 # this list of conditions and the following disclaimer. 0040 # 0041 # 2. Redistributions in binary form must reproduce the above copyright 0042 # notice, this list of conditions and the following disclaimer in the 0043 # documentation and/or other materials provided with the distribution. 0044 # 0045 # 3. Neither the name of Django nor the names of its contributors may be used 0046 # to endorse or promote products derived from this software without 0047 # specific prior written permission. 0048 # 0049 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 0050 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 0051 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 0052 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 0053 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 0054 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 0055 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 0056 # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0057 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 0058 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0059 ## 0060 0061 import os, sys, glob, operator 0062 import re 0063 import os.path 0064 0065 # == Introduction to the template syntax == 0066 # 0067 # The template syntax looks like this: 0068 # (For more see here: http://grantlee.org/apidox/for_themers.html ) 0069 # 0070 # This is plain text 0071 # This is text with a {{ value }} substitution 0072 # This is {% if condition_is_met %}a conditional{% endif %} 0073 # {# This is a comment #} 0074 # This is a {% comment %} multi-line 0075 # comment 0076 # {% endcomment %} 0077 # 0078 # That is, we have plain text. 0079 # We have value substitution with {{ }} 0080 # We have comments with {# #} 0081 # We have control tags with {% %} 0082 # 0083 # The first token inside {% %} syntax is called a tag name. Above, we have 0084 # an if tag and a comment tag. 0085 # 0086 # The 'value' in {{ value }} is called a filter expression. In the above case 0087 # the filter expression is a simple value which was inserted into the context. 0088 # In other cases it can be {{ value|upper }}, that is the value can be passed 0089 # through a filter called 'upper' with the '|', or filter expression can 0090 # be {{ value|join:"-" }}, that is it can be passed through the join filter 0091 # which takes an argument. In this case, the 'value' would actually be a list, 0092 # and the join filter would concatenate them with a dash. A filter can have 0093 # either no arguments, like upper, or it can take one argument, delimited by 0094 # a colon (';'). A filter expression can consist of a value followed by a 0095 # chain of filters, such as {{ value|join:"-"|upper }}. A filter expression 0096 # can appear one time inside {{ }} but may appear multiple times inside {% %} 0097 # For example {% cycle foo|upper bar|join:"-" bat %} contains 3 filter 0098 # expressions, 'foo|upper', 'bar|join:"-"' and 'bat'. 0099 # 0100 # Comments are ignored in the templates. 0101 # 0102 # == i18n in templates == 0103 # 0104 # The purpose of this script is to extract translatable strings from templates 0105 # The aim is to allow template authors to write templates like this: 0106 # 0107 # This is a {{ _("translatable string") }} in the template. 0108 # This is a {% i18n "translatable string about %1" something %} 0109 # This is a {% i18nc "Some context information" "string about %1" something %} 0110 # This is a {% i18np "%1 string about %2" numthings something %} 0111 # This is a {% i18ncp "some context" "%1 string about %2" numthings something %} 0112 # 0113 # That is, simple translation with _(), and i18n* tags to allow for variable 0114 # substitution, context messages and plurals. Translatable strings may appear 0115 # in a filter expression, either as the value begin filtered, or as the argument 0116 # or both: 0117 # 0118 # {{ _("hello")|upper }} 0119 # {{ list|join:_("and") }} 0120 # 0121 # == How the strings are extracted == 0122 # 0123 # The strings are extracted by parsing the template with regular expressions. 0124 # The tag_re regular expression breaks the template into a stream of tokens 0125 # containing plain text, {{ values }} and {% tags %}. 0126 # That work is done by the tokenize method with the create_token method. 0127 # Each token is then processed to extract the translatable strings from 0128 # the filter expressions. 0129 0130 0131 # The original context of much of this script is in the django template system: 0132 # https://github.com/django/django/blob/master/django/template/base.py 0133 0134 0135 TOKEN_TEXT = 0 0136 TOKEN_VAR = 1 0137 TOKEN_BLOCK = 2 0138 TOKEN_COMMENT = 3 0139 0140 # template syntax constants 0141 FILTER_SEPARATOR = '|' 0142 FILTER_ARGUMENT_SEPARATOR = ':' 0143 BLOCK_TAG_START = '{%' 0144 BLOCK_TAG_END = '%}' 0145 VARIABLE_TAG_START = '{{' 0146 VARIABLE_TAG_END = '}}' 0147 COMMENT_TAG_START = '{#' 0148 COMMENT_TAG_END = '#}' 0149 0150 # match a variable or block tag and capture the entire tag, including start/end delimiters 0151 tag_re = re.compile('(%s.*?%s|%s.*?%s)' % (re.escape(BLOCK_TAG_START), re.escape(BLOCK_TAG_END), 0152 re.escape(VARIABLE_TAG_START), re.escape(VARIABLE_TAG_END))) 0153 0154 0155 # Expression to match some_token and some_token="with spaces" (and similarly 0156 # for single-quoted strings). 0157 smart_split_re = re.compile(r""" 0158 ((?: 0159 [^\s'"]* 0160 (?: 0161 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*') 0162 [^\s'"]* 0163 )+ 0164 ) | \S+) 0165 """, re.VERBOSE) 0166 0167 def smart_split(text): 0168 r""" 0169 Generator that splits a string by spaces, leaving quoted phrases together. 0170 Supports both single and double quotes, and supports escaping quotes with 0171 backslashes. In the output, strings will keep their initial and trailing 0172 quote marks and escaped quotes will remain escaped (the results can then 0173 be further processed with unescape_string_literal()). 0174 0175 >>> list(smart_split(r'This is "a person\'s" test.')) 0176 [u'This', u'is', u'"a person\\\'s"', u'test.'] 0177 >>> list(smart_split(r"Another 'person\'s' test.")) 0178 [u'Another', u"'person\\'s'", u'test.'] 0179 >>> list(smart_split(r'A "\"funky\" style" test.')) 0180 [u'A', u'"\\"funky\\" style"', u'test.'] 0181 """ 0182 for bit in smart_split_re.finditer(text): 0183 yield bit.group(0) 0184 0185 0186 # This only matches constant *strings* (things in quotes or marked for 0187 # translation). 0188 0189 constant_string = r"(?:%(strdq)s|%(strsq)s)" % { 0190 'strdq': r'"[^"\\]*(?:\\.[^"\\]*)*"', # double-quoted string 0191 'strsq': r"'[^'\\]*(?:\\.[^'\\]*)*'", # single-quoted string 0192 } 0193 0194 filter_raw_string = r"""^%(i18n_open)s(?P<l10nable>%(constant_string)s)%(i18n_close)s""" % { 0195 'constant_string': constant_string, 0196 'i18n_open' : re.escape("_("), 0197 'i18n_close' : re.escape(")"), 0198 } 0199 0200 filter_re = re.compile(filter_raw_string, re.UNICODE|re.VERBOSE) 0201 0202 class TemplateSyntaxError(Exception): 0203 pass 0204 0205 class TranslatableString: 0206 _string = '' 0207 context = '' 0208 plural = '' 0209 line_number = -1 0210 0211 def __repr__(self): 0212 return "String('%s', '%s', '%s')" % (self._string, self.context, self.plural) 0213 0214 class Token(object): 0215 def __init__(self, token_type, contents): 0216 # token_type must be TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK or TOKEN_COMMENT. 0217 self.token_type, self.contents = token_type, contents 0218 0219 def __str__(self): 0220 return '<%s token: "%s...">' % \ 0221 ({TOKEN_TEXT: 'Text', TOKEN_VAR: 'Var', TOKEN_BLOCK: 'Block', TOKEN_COMMENT: 'Comment'}[self.token_type], 0222 self.contents[:20].replace('\n', '')) 0223 0224 def create_token(token_string, in_tag): 0225 """ 0226 Convert the given token string into a new Token object and return it. 0227 If in_tag is True, we are processing something that matched a tag, 0228 otherwise it should be treated as a literal string. 0229 """ 0230 if in_tag: 0231 if token_string.startswith(VARIABLE_TAG_START): 0232 token = Token(TOKEN_VAR, token_string[len(VARIABLE_TAG_START):-len(VARIABLE_TAG_END)].strip()) 0233 elif token_string.startswith(BLOCK_TAG_START): 0234 token = Token(TOKEN_BLOCK, token_string[len(BLOCK_TAG_START):-len(BLOCK_TAG_END)].strip()) 0235 elif token_string.startswith(COMMENT_TAG_START): 0236 token = Token(TOKEN_COMMENT, '') 0237 else: 0238 token = Token(TOKEN_TEXT, token_string) 0239 return token 0240 0241 def tokenize(template_string): 0242 0243 in_tag = False 0244 result = [] 0245 for bit in tag_re.split(template_string): 0246 if bit: 0247 result.append(create_token(bit, in_tag)) 0248 in_tag = not in_tag 0249 return result 0250 0251 class TranslationOutputter: 0252 translatable_strings = [] 0253 line_number = 0 0254 0255 def get_translatable_filter_args(self, token): 0256 """ 0257 Find the filter expressions in token and extract the strings in it. 0258 """ 0259 matches = filter_re.finditer(token) 0260 upto = 0 0261 var_obj = False 0262 for match in matches: 0263 l10nable = match.group("l10nable") 0264 0265 if l10nable: 0266 # Make sure it's a quoted string 0267 if l10nable.startswith('"') and l10nable.endswith('"') \ 0268 or l10nable.startswith("'") and l10nable.endswith("'"): 0269 ts = TranslatableString() 0270 ts._string = l10nable[1:-1] 0271 ts.line_number = self.line_number 0272 self.translatable_strings.append(ts) 0273 0274 def get_contextual_strings(self, token): 0275 split = [] 0276 _bits = smart_split(token.contents) 0277 _bit = next(_bits) 0278 if _bit =="i18n" or _bit == "i18n_var": 0279 # {% i18n "A one %1, a two %2, a three %3" var1 var2 var3 %} 0280 # {% i18n_var "A one %1, a two %2, a three %3" var1 var2 var3 as result %} 0281 _bit = next(_bits) 0282 if not _bit.startswith("'") and not _bit.startswith('"'): 0283 return 0284 0285 sentinal = _bit[0] 0286 if not _bit.endswith(sentinal): 0287 return 0288 0289 translatable_string = TranslatableString() 0290 translatable_string._string = _bit[1:-1] 0291 translatable_string.line_number = self.line_number 0292 self.translatable_strings.append(translatable_string) 0293 elif _bit =="i18nc" or _bit == "i18nc_var": 0294 # {% i18nc "An email send operation failed." "%1 Failed!" var1 %} 0295 # {% i18nc_var "An email send operation failed." "%1 Failed!" var1 as result %} 0296 _bit = next(_bits) 0297 if not _bit.startswith("'") and not _bit.startswith('"'): 0298 return 0299 0300 sentinal = _bit[0] 0301 if not _bit.endswith(sentinal): 0302 return 0303 0304 translatable_string = TranslatableString() 0305 translatable_string.context = _bit[1:-1] 0306 _bit = next(_bits) 0307 translatable_string._string = _bit[1:-1] 0308 translatable_string.line_number = self.line_number 0309 self.translatable_strings.append(translatable_string) 0310 elif _bit =="i18np" or _bit =="i18np_var": 0311 # {% i18np "An email send operation failed." "%1 email send operations failed. Error : % 2." count count errorMsg %} 0312 # {% i18np_var "An email send operation failed." "%1 email send operations failed. Error : % 2." count count errorMsg as result %} 0313 _bit = next(_bits) 0314 if not _bit.startswith("'") and not _bit.startswith('"'): 0315 return 0316 0317 sentinal = _bit[0] 0318 if not _bit.endswith(sentinal): 0319 return 0320 0321 translatable_string = TranslatableString() 0322 translatable_string._string = _bit[1:-1] 0323 _bit = next(_bits) 0324 translatable_string.plural = _bit[1:-1] 0325 translatable_string.line_number = self.line_number 0326 self.translatable_strings.append(translatable_string) 0327 elif _bit =="i18ncp" or _bit =="i18ncp_var": 0328 # {% i18np "The user tried to send an email, but that failed." "An email send operation failed." "%1 email send operation failed." count count %} 0329 # {% i18np_var "The user tried to send an email, but that failed." "An email send operation failed." "%1 email send operation failed." count count as result %} 0330 0331 _bit = next(_bits) 0332 if not _bit.startswith("'") and not _bit.startswith('"'): 0333 return 0334 0335 sentinal = _bit[0] 0336 if not _bit.endswith(sentinal): 0337 return 0338 0339 translatable_string = TranslatableString() 0340 translatable_string.context = _bit[1:-1] 0341 _bit = next(_bits) 0342 translatable_string._string = _bit[1:-1] 0343 _bit = next(_bits) 0344 translatable_string.plural = _bit[1:-1] 0345 translatable_string.line_number = self.line_number 0346 self.translatable_strings.append(translatable_string) 0347 else: 0348 return 0349 0350 for _bit in _bits: 0351 0352 if (_bit == "as"): 0353 return 0354 self.get_translatable_filter_args(_bit) 0355 0356 def get_plain_strings(self, token): 0357 split = [] 0358 bits = iter(smart_split(token.contents)) 0359 for bit in bits: 0360 self.get_translatable_filter_args(bit) 0361 0362 def translate(self, template_file, outputfile): 0363 self.translatable_strings = [] 0364 self.line_number = 0 0365 template_string_lines = template_file.readlines() 0366 for template_string_line in template_string_lines: 0367 self.line_number += 1 0368 for token in tokenize(template_string_line): 0369 if token.token_type == TOKEN_VAR or token.token_type == TOKEN_BLOCK: 0370 self.get_plain_strings(token) 0371 if token.token_type == TOKEN_BLOCK: 0372 self.get_contextual_strings(token) 0373 self.createOutput(template_file.name, self.translatable_strings, outputfile) 0374 0375 def createOutput(self, template_filename, translatable_strings, outputfile): 0376 0377 for translatable_string in translatable_strings: 0378 outputfile.write("// i18n: file: " + template_filename + ":" + str(translatable_string.line_number) + "\n") 0379 if translatable_string.context: 0380 if not translatable_string.plural: 0381 outputfile.write("pgettext(\"" + translatable_string.context + "\", \"" + translatable_string._string + "\");\n") 0382 else: 0383 outputfile.write("npgettext(\"" + translatable_string.context + "\", \"" + translatable_string._string + "\", \"" + translatable_string.plural + "\");\n") 0384 else: 0385 if translatable_string.plural: 0386 outputfile.write("ngettext(\"" + translatable_string._string + "\", \"" + translatable_string.plural + "\");\n") 0387 else: 0388 outputfile.write("gettext(\"" + translatable_string._string + "\");\n") 0389 0390 0391 if __name__ == "__main__": 0392 ex = TranslationOutputter() 0393 0394 outputfile = sys.stdout 0395 0396 files = sys.argv[1:] 0397 0398 for filename in files: 0399 f = open(filename, "r") 0400 ex.translate(f, outputfile) 0401 0402 outputfile.write("\n")