File indexing completed on 2024-05-12 05:47:02
0001 # -*- coding: utf-8 -*- 0002 0003 """ 0004 Make some comparations between the translation and the original text. 0005 0006 @author: Javier Viñal <fjvinal@gmail.com> 0007 @license: GPLv3 0008 """ 0009 0010 import re 0011 import string 0012 import enchant 0013 from pology import _, n_, split 0014 0015 0016 def test_if_empty_translation (msg, cat): 0017 """ 0018 Compare the translation with the original text, testing if the translation 0019 is empty. 0020 0021 [type V4A hook]. 0022 @return: parts 0023 """ 0024 for i in range(len(msg.msgstr)): 0025 if i > 0: 0026 lm = len(msg.msgid_plural) 0027 else: 0028 lm = len(msg.msgid) 0029 if lm > 0 and len(msg.msgstr[i]) == 0: 0030 return [("msgstr", 0, [(0, 0, 'La traducción parece estar vacía')])] 0031 0032 return [] 0033 0034 0035 _purepunc = re.compile("^\W+$", re.U) 0036 0037 def test_if_purepunc (msg, cat): 0038 """ 0039 Compare the translation with the original text, testing if the translation 0040 is different when the original text has not alphanumeric text. 0041 0042 [type V4A hook]. 0043 @return: parts 0044 """ 0045 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0046 return [] 0047 0048 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0049 return [] 0050 0051 for i in range(len(msg.msgstr)): 0052 msgstr = msg.msgstr[i] 0053 if i > 0: 0054 msgid = msg.msgid_plural 0055 else: 0056 msgid = msg.msgid 0057 0058 if _purepunc.match(msgid): 0059 msgid = msgid.replace('"', '') 0060 msgid = msgid.replace("'", "") 0061 msgid = msgid.replace(" ", "") 0062 msgstr = msgstr.replace('"', '') 0063 msgstr = msgstr.replace("'", "") 0064 msgstr = msgstr.replace("«", "") 0065 msgstr = msgstr.replace("»", "") 0066 msgstr = msgstr.replace(" ", "") 0067 msgstr = msgstr.replace("\"", "") 0068 if msgid != msgstr: 0069 return [("msgstr", 0, [(0, 0, 'Se ha traducido un texto no alfanumérico')])] 0070 0071 return [] 0072 0073 def test_if_non_printable_characters (msg, cat): 0074 """ 0075 Compare the translation with the original text, testing if the translation 0076 is different when the original text has not alphanumeric text. 0077 0078 [type V4A hook]. 0079 @return: parts 0080 """ 0081 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0082 return [] 0083 0084 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0085 return [] 0086 0087 for i in range(len(msg.msgstr)): 0088 msgstr = msg.msgstr[i] 0089 if i > 0: 0090 msgid = msg.msgid_plural 0091 else: 0092 msgid = msg.msgid 0093 for c in msgstr: 0094 if ( 0095 (c not in string.printable) 0096 and (c not in msgid) 0097 and (c not in "áéíóúüñçÁÉÍÓÚÜÑÇ¿¡|«»©ºª€/") 0098 ): 0099 return [ 0100 ( 0101 "msgstr", 0102 0, 0103 [ 0104 ( 0105 0, 0106 0, 0107 ( 0108 'La traducción contiene caracteres no ' 'imprimibles' 0109 ) 0110 ) 0111 ] 0112 ) 0113 ] 0114 elif ( 0115 (c in string.punctuation) 0116 and (c not in msgid) 0117 and (c not in "¿¡|«»©ºª€/.,;:()_-") 0118 ): 0119 return [ 0120 ( 0121 "msgstr", 0122 0, 0123 [ 0124 ( 0125 0, 0126 0, 0127 ( 0128 'La traducción contiene signos de puntuación ' 0129 'no incluidos en el original' 0130 ) 0131 ) 0132 ] 0133 ) 0134 ] 0135 return [] 0136 0137 def test_if_very_long_translation (msg, cat): 0138 """ 0139 Compare the translation with the original text, testing if the transaled text 0140 is much longer than the original (As much twice with a correction for small text). 0141 0142 [type V4A hook]. 0143 @return: parts 0144 """ 0145 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0146 return [] 0147 0148 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0149 return [] 0150 0151 for i in range(len(msg.msgstr)): 0152 if i > 0: 0153 lm = len(msg.msgid_plural.split()) 0154 else: 0155 lm = len(msg.msgid.split()) 0156 if lm > 0 and len(msg.msgstr[i].split()) > (1.6 * lm + 5): 0157 return [("msgstr", 0, [(0, 0, 'La traducción parece demasiado larga')])] 0158 0159 return [] 0160 0161 0162 def test_if_very_short_translation (msg, cat): 0163 """ 0164 Compare the translation with the original text, testing if the transaled text 0165 is much shorter than the original. 0166 0167 [type V4A hook]. 0168 @return: parts 0169 """ 0170 0171 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0172 return [] 0173 0174 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0175 return [] 0176 0177 for i in range(len(msg.msgstr)): 0178 if len(msg.msgstr[i]) > 0: 0179 if i > 0: 0180 lm = len(msg.msgid_plural.split()) 0181 else: 0182 lm = len(msg.msgid.split()) 0183 if lm > (1.6 * len(msg.msgstr[i].split()) + 5): 0184 return [("msgstr", 0, [(0, 0, 'La traducción parece demasiado corta')])] 0185 0186 return [] 0187 0188 0189 _valid_word = re.compile("^\w+$", re.U) 0190 _capital_word = re.compile("^[A-Z0-9ÑÇÁÉÍÓÚÁÉÍÓÚÂÊÎÔÛÄËÏÖÜĀ]+$", re.U) 0191 _proper_name = re.compile("^\W*?[A-Z0-9ÑÇÁÉÍÓÚÁÉÍÓÚÂÊÎÔÛÄËÏÖÜĀ]\w+(\W+?[A-Z0-9ÑÇÁÉÍÓÚÁÉÍÓÚÂÊÎÔÛÄËÏÖÜĀ]\w+)+\W*$", re.U) 0192 0193 def test_if_not_translated (msg, cat): 0194 """ 0195 Compare the translation with the original text, testing if the paragraph is 0196 not translated. 0197 0198 [type V4A hook]. 0199 @return: parts 0200 """ 0201 0202 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0203 return [] 0204 0205 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0206 return [] 0207 0208 for i in range(len(msg.msgstr)): 0209 if i > 0: 0210 msgid = msg.msgid_plural 0211 else: 0212 msgid = msg.msgid 0213 0214 if _proper_name.match(msg.msgstr[i]) or _purepunc.match(msgid): 0215 continue 0216 0217 e = None 0218 l = None 0219 if len(msgid) > 0 and msgid == msg.msgstr[i]: 0220 for word in split.proper_words(msgid, markup=True, accels=['&']): 0221 if _valid_word.match(word) and not _capital_word.match(word): 0222 word = word.encode("utf-8") 0223 if e is None: 0224 e = enchant.Dict("en") 0225 if l is None: 0226 l = enchant.Dict("es") 0227 if e.check(word) and not l.check(word): 0228 return [("msgstr", 0, [(0, 0, 'El párrafo parece no estar traducido')])] 0229 0230 return [] 0231 0232 _ent_accel = re.compile("&[A-Za-z0-9ÑñÇç](?!\w+;)", re.U) 0233 0234 def test_paired_accelerators (msg, cat): 0235 """ 0236 Compare number of accelerators (&) between original and translated text. 0237 0238 [type V4A hook]. 0239 @return: parts 0240 """ 0241 0242 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0243 return [] 0244 0245 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0246 return [] 0247 0248 for i in range(len(msg.msgstr)): 0249 if i > 0: 0250 msgid = msg.msgid_plural 0251 else: 0252 msgid = msg.msgid 0253 0254 cont_orig = len(_ent_accel.findall(msgid)) 0255 cont_tran = len(_ent_accel.findall(msg.msgstr[i])) 0256 0257 if cont_orig < cont_tran: 0258 return [("msgstr", 0, [(0, 0, "Sobran aceleradores «&» en la traducción")])] 0259 elif cont_orig > cont_tran: 0260 return [("msgstr", 0, [(0, 0, "Faltan aceleradores «&» en la traducción")])] 0261 return [] 0262 0263 0264 def test_paired_strings (msg, cat): 0265 """ 0266 Compare number of some strings between original and translated text. 0267 0268 [type V4A hook]. 0269 @return: parts 0270 """ 0271 0272 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0273 return [] 0274 0275 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0276 return [] 0277 0278 for i in range(len(msg.msgstr)): 0279 if i > 0: 0280 msgid = msg.msgid_plural 0281 else: 0282 msgid = msg.msgid 0283 0284 for s in (["\t", "tabuladores"], 0285 ["\r", "retornos de carro"], 0286 ["\n", "saltos de línea"] 0287 ): 0288 cont_orig = msgid.count(s[0]) 0289 cont_tran = msg.msgstr[i].count(s[0]) 0290 0291 if cont_orig < cont_tran: 0292 return [("msgstr", 0, [(0, 0, "Sobran " + s[1] + " en la traducción")])] 0293 elif cont_orig > cont_tran: 0294 return [("msgstr", 0, [(0, 0, "Faltan " + s[1] + " en la traducción")])] 0295 return [] 0296 0297 0298 def test_paired_brackets (msg, cat): 0299 """ 0300 Compare number of some brackets between original and translated text. 0301 0302 [type V4A hook]. 0303 @return: parts 0304 """ 0305 0306 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0307 return [] 0308 0309 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0310 return [] 0311 0312 for i in range(len(msg.msgstr)): 0313 if i > 0: 0314 msgid = msg.msgid_plural 0315 else: 0316 msgid = msg.msgid 0317 0318 for s in (["(", ")", "paréntesis"], 0319 ["{", "}", "llaves"], 0320 ["[", "]", "corchetes"], 0321 ["«", "»", "comillas españolas"] 0322 ): 0323 cont_orig_open = msgid.count(s[0]) 0324 cont_orig_close = msgid.count(s[1]) 0325 if cont_orig_open != cont_orig_close: 0326 continue 0327 cont_tran_open = msg.msgstr[i].count(s[0]) 0328 cont_tran_close = msg.msgstr[i].count(s[1]) 0329 0330 if cont_tran_open < cont_tran_close: 0331 return [("msgstr", 0, [(0, 0, "Sobran " + s[2] + " en la traducción")])] 0332 elif cont_tran_open > cont_tran_close: 0333 return [("msgstr", 0, [(0, 0, "Faltan " + s[2] + " en la traducción")])] 0334 return [] 0335 0336 _ent_function = re.compile("(?:\w+\:\:)*\w+\(\)", re.U) 0337 _ent_parameter = re.compile("(?<=\W)\-\-\w+(?:\-\w+)*", re.U) 0338 0339 def test_paired_expressions (msg, cat): 0340 """ 0341 Compare expressions (functions, parameters) between original and translated text. 0342 Should be the same. 0343 0344 [type V4A hook]. 0345 @return: parts 0346 """ 0347 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0348 return [] 0349 0350 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0351 return [] 0352 0353 for i in range(len(msg.msgstr)): 0354 if i > 0: 0355 msgid = msg.msgid_plural 0356 else: 0357 msgid = msg.msgid 0358 0359 for expr in ([_ent_function, "Nombres de función"], 0360 [_ent_parameter, "Parámetros de orden"] 0361 ): 0362 expr_orig = sorted(expr[0].findall(msgid)) 0363 expr_trans = sorted(expr[0].findall(msg.msgstr[i])) 0364 0365 if expr_orig != expr_trans: 0366 return [("msgstr", 0, [(0, 0, expr[1] + " distintos en la traducción")])] 0367 0368 return [] 0369 0370 0371 _ent_number = re.compile("\b\d+([\s.,:/-]\d+)*\b", re.U) 0372 _not_digit = re.compile("\D", re.U) 0373 0374 def test_paired_numbers (msg, cat): 0375 """ 0376 Compare numbers and dates between original and translated text. 0377 Should be the same (except for commas/colons and one digit numbers) 0378 0379 [type V4A hook]. 0380 @return: parts 0381 """ 0382 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0383 return [] 0384 0385 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0386 return [] 0387 0388 for i in range(len(msg.msgstr)): 0389 if i > 0: 0390 msgid = msg.msgid_plural 0391 else: 0392 msgid = msg.msgid 0393 0394 number_orig = [] 0395 for number in _ent_number.findall(msgid): 0396 if len(number) > 1: 0397 number_orig += _not_digit.split(number) 0398 0399 number_trans = [] 0400 for number in _ent_number.findall(msg.msgstr[i]): 0401 if len(number) > 1: 0402 number_trans += _not_digit.split(number) 0403 0404 if sorted(number_orig) != sorted(number_trans): 0405 return [("msgstr", 0, [(0, 0, "Valores de números distintos en la traducción")])] 0406 0407 return [] 0408 0409 _ent_context_tags = re.compile("\<(application|bcode|command|email|envar|filename|icode|link|returnvalue)\>(.+?)\<\/\1\>", re.U) 0410 0411 def test_paired_context_tags (msg, cat): 0412 """ 0413 Compare context tags between original and translated text. 0414 Some of them should not be changed in the translation. 0415 0416 [type V4A hook]. 0417 @return: parts 0418 """ 0419 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0420 return [] 0421 0422 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0423 return [] 0424 0425 for i in range(len(msg.msgstr)): 0426 if i > 0: 0427 msgid = msg.msgid_plural 0428 else: 0429 msgid = msg.msgid 0430 0431 for tag in _ent_context_tags.findall(msgid): 0432 if not (tag[1] in msg.msgstr[i]): 0433 return [("msgstr", 0, [(0, 0, "Valor de etiqueta de contexto" + tag[1] + "traducido indebidamente")])] 0434 0435 return [] 0436 0437 _ent_xml_entities = re.compile("\<\/(application|bcode|command|email|emphasis|envar|filename|icode|interface|link|message|nl|numid|placeholder|resource|shortcut|note|warning|para|title|subtitle|list|item|)\>", re.U) 0438 0439 def test_paired_xml_entities (msg, cat): 0440 """ 0441 Compare xml entities between original and translated text. 0442 Some of them should not be changed in the translation. 0443 0444 [type V4A hook]. 0445 @return: parts 0446 """ 0447 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0448 return [] 0449 0450 if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"): 0451 return [] 0452 0453 for i in range(len(msg.msgstr)): 0454 if i > 0: 0455 msgid = msg.msgid_plural 0456 else: 0457 msgid = msg.msgid 0458 0459 for tag in _ent_xml_entities.findall(msgid): 0460 if not (tag in msg.msgstr[i]): 0461 return [("msgstr", 0, [(0, 0, "Etiqueta XML" + tag + "no encontrada en la traducción")])] 0462 0463 for tag in _ent_xml_entities.findall(msg.msgstr[i]): 0464 if not (tag in msgid): 0465 return [("msgstr", 0, [(0, 0, "Etiqueta XML" + tag + "no encontrada en el texto original")])] 0466 0467 return []