File indexing completed on 2024-05-12 05:47:02

0001 # -*- coding: utf-8 -*-
0002 
0003 """
0004 Make some comparations between the translation and the original text.
0005 
0006 @author: Javier Viñal <fjvinal@gmail.com>
0007 @license: GPLv3
0008 """
0009 
0010 import re
0011 import string
0012 import enchant
0013 from pology import _, n_, split
0014 
0015 
0016 def test_if_empty_translation (msg, cat):
0017     """
0018     Compare the translation with the original text, testing if the translation
0019     is empty.
0020 
0021     [type V4A hook].
0022     @return: parts
0023     """
0024     for i in range(len(msg.msgstr)):
0025         if i > 0:
0026             lm = len(msg.msgid_plural)
0027         else:
0028             lm = len(msg.msgid)
0029         if lm > 0 and len(msg.msgstr[i]) == 0:
0030             return [("msgstr", 0, [(0, 0, 'La traducción parece estar vacía')])]
0031 
0032     return []
0033 
0034 
0035 _purepunc = re.compile("^\W+$", re.U)
0036 
0037 def test_if_purepunc (msg, cat):
0038     """
0039     Compare the translation with the original text, testing if the translation
0040     is different when the original text has not alphanumeric text.
0041 
0042     [type V4A hook].
0043     @return: parts
0044     """
0045     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0046         return []
0047 
0048     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0049         return []
0050 
0051     for i in range(len(msg.msgstr)):
0052         msgstr = msg.msgstr[i]
0053         if i > 0:
0054             msgid = msg.msgid_plural
0055         else:
0056             msgid = msg.msgid
0057 
0058         if _purepunc.match(msgid):
0059             msgid = msgid.replace('"', '')
0060             msgid = msgid.replace("'", "")
0061             msgid = msgid.replace(" ", "")
0062             msgstr = msgstr.replace('"', '')
0063             msgstr = msgstr.replace("'", "")
0064             msgstr = msgstr.replace("«", "")
0065             msgstr = msgstr.replace("»", "")
0066             msgstr = msgstr.replace(" ", "")
0067             msgstr = msgstr.replace("\"", "")
0068             if msgid != msgstr:
0069                 return [("msgstr", 0, [(0, 0, 'Se ha traducido un texto no alfanumérico')])]
0070 
0071     return []
0072 
0073 def test_if_non_printable_characters (msg, cat):
0074     """
0075     Compare the translation with the original text, testing if the translation
0076     is different when the original text has not alphanumeric text.
0077 
0078     [type V4A hook].
0079     @return: parts
0080     """
0081     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0082         return []
0083 
0084     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0085         return []
0086 
0087     for i in range(len(msg.msgstr)):
0088         msgstr = msg.msgstr[i]
0089     if i > 0:
0090         msgid = msg.msgid_plural
0091     else:
0092         msgid = msg.msgid
0093     for c in msgstr:
0094         if (
0095             (c not in string.printable)
0096             and (c not in msgid)
0097             and (c not in "áéíóúüñçÁÉÍÓÚÜÑÇ¿¡|«»©ºª€/")
0098         ):
0099             return [
0100                 (
0101                     "msgstr",
0102                     0,
0103                     [
0104                         (
0105                             0,
0106                             0,
0107                             (
0108                                 'La traducción contiene caracteres no ' 'imprimibles'
0109                             )
0110                         )
0111                     ]
0112                 )
0113             ]
0114         elif (
0115             (c in string.punctuation)
0116             and (c not in msgid)
0117             and (c not in "¿¡|«»©ºª€/.,;:()_-")
0118         ):
0119             return [
0120                 (
0121                     "msgstr",
0122                     0,
0123                     [
0124                         (
0125                             0,
0126                             0,
0127                             (
0128                                 'La traducción contiene signos de puntuación '
0129                                 'no incluidos en el original'
0130                             )
0131                         )
0132                     ]
0133                 )
0134             ]
0135     return []
0136 
0137 def test_if_very_long_translation (msg, cat):
0138     """
0139     Compare the translation with the original text, testing if the transaled text
0140     is much longer than the original (As much twice with a correction for small text).
0141 
0142     [type V4A hook].
0143     @return: parts
0144     """
0145     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0146         return []
0147 
0148     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0149         return []
0150 
0151     for i in range(len(msg.msgstr)):
0152         if i > 0:
0153             lm = len(msg.msgid_plural.split())
0154         else:
0155             lm = len(msg.msgid.split())
0156         if lm > 0 and len(msg.msgstr[i].split()) > (1.6 * lm + 5):
0157             return [("msgstr", 0, [(0, 0, 'La traducción parece demasiado larga')])]
0158 
0159     return []
0160 
0161 
0162 def test_if_very_short_translation (msg, cat):
0163     """
0164     Compare the translation with the original text, testing if the transaled text
0165     is much shorter than the original.
0166 
0167     [type V4A hook].
0168     @return: parts
0169     """
0170 
0171     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0172         return []
0173 
0174     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0175         return []
0176 
0177     for i in range(len(msg.msgstr)):
0178         if len(msg.msgstr[i]) > 0:
0179             if i > 0:
0180                 lm = len(msg.msgid_plural.split())
0181             else:
0182                 lm = len(msg.msgid.split())
0183             if lm > (1.6 * len(msg.msgstr[i].split()) +  5):
0184                 return [("msgstr", 0, [(0, 0, 'La traducción parece demasiado corta')])]
0185 
0186     return []
0187 
0188 
0189 _valid_word = re.compile("^\w+$", re.U)
0190 _capital_word = re.compile("^[A-Z0-9ÑÇÁÉÍÓÚÁÉÍÓÚÂÊÎÔÛÄËÏÖÜĀ]+$", re.U)
0191 _proper_name = re.compile("^\W*?[A-Z0-9ÑÇÁÉÍÓÚÁÉÍÓÚÂÊÎÔÛÄËÏÖÜĀ]\w+(\W+?[A-Z0-9ÑÇÁÉÍÓÚÁÉÍÓÚÂÊÎÔÛÄËÏÖÜĀ]\w+)+\W*$", re.U)
0192 
0193 def test_if_not_translated (msg, cat):
0194     """
0195     Compare the translation with the original text, testing if the paragraph is
0196     not translated.
0197 
0198     [type V4A hook].
0199     @return: parts
0200     """
0201 
0202     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0203         return []
0204 
0205     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0206         return []
0207 
0208     for i in range(len(msg.msgstr)):
0209         if i > 0:
0210             msgid = msg.msgid_plural
0211         else:
0212             msgid = msg.msgid
0213 
0214         if _proper_name.match(msg.msgstr[i]) or _purepunc.match(msgid):
0215             continue
0216 
0217         e = None
0218         l = None
0219         if len(msgid) > 0 and msgid == msg.msgstr[i]:
0220             for word in split.proper_words(msgid, markup=True, accels=['&']):
0221                 if _valid_word.match(word) and not _capital_word.match(word):
0222                     word = word.encode("utf-8")
0223                     if e is None:
0224                         e = enchant.Dict("en")
0225                     if l is None:
0226                         l = enchant.Dict("es")
0227                     if e.check(word) and not l.check(word):
0228                         return [("msgstr", 0, [(0, 0, 'El párrafo parece no estar traducido')])]
0229 
0230     return []
0231 
0232 _ent_accel = re.compile("&[A-Za-z0-9ÑñÇç](?!\w+;)", re.U)
0233 
0234 def test_paired_accelerators (msg, cat):
0235     """
0236     Compare number of accelerators (&) between original and translated text.
0237 
0238     [type V4A hook].
0239     @return: parts
0240     """
0241 
0242     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0243         return []
0244 
0245     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0246         return []
0247 
0248     for i in range(len(msg.msgstr)):
0249         if i > 0:
0250             msgid = msg.msgid_plural
0251         else:
0252             msgid = msg.msgid
0253 
0254         cont_orig = len(_ent_accel.findall(msgid))
0255         cont_tran = len(_ent_accel.findall(msg.msgstr[i]))
0256 
0257         if cont_orig < cont_tran:
0258             return [("msgstr", 0, [(0, 0, "Sobran aceleradores «&» en la traducción")])]
0259         elif cont_orig > cont_tran:
0260             return [("msgstr", 0, [(0, 0, "Faltan aceleradores «&» en la traducción")])]
0261     return []
0262 
0263 
0264 def test_paired_strings (msg, cat):
0265     """
0266     Compare number of some strings between original and translated text.
0267 
0268     [type V4A hook].
0269     @return: parts
0270     """
0271 
0272     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0273         return []
0274 
0275     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0276         return []
0277 
0278     for i in range(len(msg.msgstr)):
0279         if i > 0:
0280             msgid = msg.msgid_plural
0281         else:
0282             msgid = msg.msgid
0283 
0284         for s in (["\t", "tabuladores"],
0285                     ["\r", "retornos de carro"],
0286                     ["\n", "saltos de línea"]
0287                   ):
0288             cont_orig = msgid.count(s[0])
0289             cont_tran = msg.msgstr[i].count(s[0])
0290 
0291             if cont_orig < cont_tran:
0292                 return [("msgstr", 0, [(0, 0, "Sobran " + s[1] + " en la traducción")])]
0293             elif cont_orig > cont_tran:
0294                 return [("msgstr", 0, [(0, 0, "Faltan " + s[1] + " en la traducción")])]
0295     return []
0296 
0297 
0298 def test_paired_brackets (msg, cat):
0299     """
0300     Compare number of some brackets between original and translated text.
0301 
0302     [type V4A hook].
0303     @return: parts
0304     """
0305 
0306     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0307         return []
0308 
0309     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0310         return []
0311 
0312     for i in range(len(msg.msgstr)):
0313         if i > 0:
0314             msgid = msg.msgid_plural
0315         else:
0316             msgid = msg.msgid
0317 
0318         for s in (["(", ")", "paréntesis"],
0319                   ["{", "}", "llaves"],
0320                   ["[", "]", "corchetes"],
0321                   ["«", "»", "comillas españolas"]
0322                   ):
0323             cont_orig_open = msgid.count(s[0])
0324             cont_orig_close = msgid.count(s[1])
0325             if cont_orig_open != cont_orig_close:
0326                 continue
0327             cont_tran_open = msg.msgstr[i].count(s[0])
0328             cont_tran_close = msg.msgstr[i].count(s[1])
0329 
0330             if cont_tran_open < cont_tran_close:
0331                 return [("msgstr", 0, [(0, 0, "Sobran " + s[2] + " en la traducción")])]
0332             elif cont_tran_open > cont_tran_close:
0333                 return [("msgstr", 0, [(0, 0, "Faltan " + s[2] + " en la traducción")])]
0334     return []
0335 
0336 _ent_function = re.compile("(?:\w+\:\:)*\w+\(\)", re.U)
0337 _ent_parameter = re.compile("(?<=\W)\-\-\w+(?:\-\w+)*", re.U)
0338 
0339 def test_paired_expressions (msg, cat):
0340     """
0341     Compare expressions (functions, parameters) between original and translated text.
0342     Should be the same.
0343 
0344     [type V4A hook].
0345     @return: parts
0346     """
0347     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0348         return []
0349 
0350     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0351         return []
0352 
0353     for i in range(len(msg.msgstr)):
0354         if i > 0:
0355             msgid = msg.msgid_plural
0356         else:
0357             msgid = msg.msgid
0358 
0359         for expr in ([_ent_function, "Nombres de función"],
0360                      [_ent_parameter, "Parámetros de orden"]
0361                      ):
0362             expr_orig = sorted(expr[0].findall(msgid))
0363             expr_trans = sorted(expr[0].findall(msg.msgstr[i]))
0364 
0365             if expr_orig != expr_trans:
0366                 return [("msgstr", 0, [(0, 0, expr[1] + " distintos en la traducción")])]
0367 
0368     return []
0369 
0370 
0371 _ent_number = re.compile("\b\d+([\s.,:/-]\d+)*\b", re.U)
0372 _not_digit = re.compile("\D", re.U)
0373 
0374 def test_paired_numbers (msg, cat):
0375     """
0376     Compare numbers and dates between original and translated text.
0377     Should be the same (except for commas/colons and one digit numbers)
0378 
0379     [type V4A hook].
0380     @return: parts
0381     """
0382     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0383         return []
0384 
0385     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0386         return []
0387 
0388     for i in range(len(msg.msgstr)):
0389         if i > 0:
0390             msgid = msg.msgid_plural
0391         else:
0392             msgid = msg.msgid
0393 
0394         number_orig = []
0395         for number in _ent_number.findall(msgid):
0396             if len(number) > 1:
0397                 number_orig += _not_digit.split(number)
0398 
0399         number_trans = []
0400         for number in _ent_number.findall(msg.msgstr[i]):
0401             if len(number) > 1:
0402                 number_trans += _not_digit.split(number)
0403 
0404         if sorted(number_orig) != sorted(number_trans):
0405             return [("msgstr", 0, [(0, 0, "Valores de números distintos en la traducción")])]
0406 
0407     return []
0408 
0409 _ent_context_tags = re.compile("\<(application|bcode|command|email|envar|filename|icode|link|returnvalue)\>(.+?)\<\/\1\>", re.U)
0410 
0411 def test_paired_context_tags (msg, cat):
0412     """
0413     Compare context tags between original and translated text.
0414     Some of them should not be changed in the translation.
0415 
0416     [type V4A hook].
0417     @return: parts
0418     """
0419     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0420         return []
0421 
0422     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0423         return []
0424 
0425     for i in range(len(msg.msgstr)):
0426         if i > 0:
0427             msgid = msg.msgid_plural
0428         else:
0429             msgid = msg.msgid
0430 
0431         for tag in _ent_context_tags.findall(msgid):
0432             if not (tag[1] in msg.msgstr[i]):
0433                 return [("msgstr", 0, [(0, 0, "Valor de etiqueta de contexto" + tag[1] + "traducido indebidamente")])]
0434 
0435     return []
0436 
0437 _ent_xml_entities = re.compile("\<\/(application|bcode|command|email|emphasis|envar|filename|icode|interface|link|message|nl|numid|placeholder|resource|shortcut|note|warning|para|title|subtitle|list|item|)\>", re.U)
0438 
0439 def test_paired_xml_entities (msg, cat):
0440     """
0441     Compare xml entities between original and translated text.
0442     Some of them should not be changed in the translation.
0443 
0444     [type V4A hook].
0445     @return: parts
0446     """
0447     if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"):
0448         return []
0449 
0450     if msg.msgid in ("Your emails", "Your names", "CREDIT_FOR_TRANSLATORS", "ROLES_OF_TRANSLATORS"):
0451         return []
0452 
0453     for i in range(len(msg.msgstr)):
0454         if i > 0:
0455             msgid = msg.msgid_plural
0456         else:
0457             msgid = msg.msgid
0458 
0459         for tag in _ent_xml_entities.findall(msgid):
0460             if not (tag in msg.msgstr[i]):
0461                 return [("msgstr", 0, [(0, 0, "Etiqueta XML" + tag + "no encontrada en la traducción")])]
0462 
0463         for tag in _ent_xml_entities.findall(msg.msgstr[i]):
0464             if not (tag in msgid):
0465                 return [("msgstr", 0, [(0, 0, "Etiqueta XML" + tag + "no encontrada en el texto original")])]
0466 
0467     return []