File indexing completed on 2024-05-12 17:18:06
0001 # -*- coding: utf-8 -*- 0002 0003 """ 0004 Remove special substrings from parts of the message. 0005 0006 @author: Javier Viñal <fjvinal@gmail.com> 0007 @license: GPLv3 0008 """ 0009 0010 import re 0011 #from pology import PologyError, datadir, _, n_ 0012 #from pology.report import report, warning, format_item_list 0013 0014 0015 # Capitals words in valid contexts in the translated text according with Spanish grammar 0016 # (beggining of paragraph, after some punctuation characters and after a new line) 0017 _valid_capital_word_middle = re.compile("(?<=[.:?!>»\"]\s)\w*?[A-ZÁÉÍÓÚÜÑÇ]\w*", re.U) 0018 _valid_capital_word_initial = re.compile("^\w*?[A-ZÁÉÍÓÚÜÑÇ]\w*", re.U) 0019 0020 # All capital words in the original English text, 0021 _ent_capital_word = re.compile("\w*?[A-Z]\w*", re.U) 0022 # All plural full capital words (acronyms) without the final 's'. 0023 _ent_capital_word_plural = re.compile("[A-Z0-9]+(?=\'?s\b)", re.U) 0024 0025 def remove_paired_capital_words (msg, cat): 0026 """ 0027 Remove all capital words from original text and from translated text, except that are located 0028 in a place where may be a capital word according the Spanish grammar.[type F4A hook]. 0029 0030 @return: number of errors 0031 """ 0032 0033 # Obtains capitals words in valid contexts in the translated text. 0034 for i in range(len(msg.msgstr)): 0035 ents = set() 0036 ents.update(_valid_capital_word_middle.findall(msg.msgstr[i])) 0037 ents.update(_valid_capital_word_initial.findall(msg.msgstr[i])) 0038 if i == 0: 0039 # Obtains all capitals words in the original English text. 0040 ents.update(_ent_capital_word.findall(msg.msgid)) 0041 ents.update(_ent_capital_word_plural.findall(msg.msgid)) 0042 else: 0043 if msg.msgid_plural: 0044 ents.update(_ent_capital_word.findall(msg.msgid_plural)) 0045 ents.update(_ent_capital_word_plural.findall(msg.msgid_plural)) 0046 # Joins both set of words an remove them from the message. 0047 for ent in ents: 0048 # report(_("@info", "Palabra en mayusculas: %(info)s \n", info=ent)) 0049 msg.msgstr[i] = re.sub(r'\b' + ent + r'\b', '~', msg.msgstr[i], 0, re.U) 0050 if i == 0: 0051 msg.msgid = re.sub(r'\b' + ent + r'\b', '~', msg.msgid, 0, re.U) 0052 else: 0053 msg.msgid_plural = re.sub(r'\b' + ent + r'\b', '~', msg.msgid_plural, 0, re.U) 0054 0055 # The remainning words could have wrong capitalization in the translated message. 0056 # TODO: Look the remaining words in a Spanish dictionary. 0057 0058 return 0 0059 0060 def remove_original_capital_words (msg, cat): 0061 """ 0062 Remove all capital words of the original text and from translated text. 0063 [type F4A hook]. 0064 0065 @return: number of errors 0066 """ 0067 0068 # Obtains capitals words in valid contexts in the translated text. 0069 for i in range(len(msg.msgstr)): 0070 ents = set() 0071 if i == 0: 0072 # Obtains all capitals words in the original English text. 0073 ents.update(_ent_capital_word.findall(msg.msgid)) 0074 ents.update(_ent_capital_word_plural.findall(msg.msgid)) 0075 else: 0076 if msg.msgid_plural: 0077 ents.update(_ent_capital_word.findall(msg.msgid_plural)) 0078 ents.update(_ent_capital_word_plural.findall(msg.msgid_plural)) 0079 # Remove English capital words from translated text. 0080 for ent in ents: 0081 msg.msgstr[i] = re.sub(r'\b' + ent + r'\b', '~', msg.msgstr[i], 0, re.U) 0082 0083 return 0 0084 0085 _ent_parameter = re.compile("(%\d%?|\$\{.+?\}|\$\w+|%(?:\d\$)?[ds]|%\|.+?\|)", re.U) 0086 0087 def remove_paired_parameters (msg, cat): 0088 """ 0089 Remove format strings from the original text, and from translation 0090 all that are also found in the original text [type F4A hook]. 0091 0092 @return: number of errors 0093 """ 0094 0095 pars_orig = set() 0096 pars_orig.update(_ent_parameter.findall(msg.msgid)) 0097 0098 pars_orig_plural = set() 0099 if msg.msgid_plural: 0100 pars_orig_plural.update(_ent_parameter.findall(msg.msgid_plural)) 0101 0102 for i in range(len(msg.msgstr)): 0103 pars_trans = set(_ent_parameter.findall(msg.msgstr[i])) 0104 if i == 0: 0105 for par in pars_trans.intersection(pars_orig): 0106 msg.msgid = msg.msgid.replace(par, "~") 0107 msg.msgstr[i] = msg.msgstr[i].replace(par, "~") 0108 else: 0109 for par in pars_trans.intersection(pars_orig_plural): 0110 msg.msgid_plural = msg.msgid_plural.replace(par, "~") 0111 msg.msgstr[i] = msg.msgstr[i].replace(par, "~") 0112 0113 return 0 0114 0115 _ent_xml_entity = re.compile("\<\/?\w+\>") 0116 0117 _auto_comment_tag = ("trans_comment", "literallayout", "option", "programlisting", "othercredit", 0118 "author", "email", "holder", 0119 "surname", "personname", "affiliation", "address", "sect1", "chapter", "chapterinfo", "date", "command", "option", 0120 "refentrytitle", "refentryinfo", "refname", "synopsis", "literal", "varname", "term", "glossterm", 0121 "filename", "entry", "envar", "userinput", "cmdsynopsis", "releaseinfo", "language", "Name", 0122 "City", "Region", "Region/state", "unit", "Query", "Kgm") 0123 0124 def remove_tags_without_translation (msg, cat): 0125 """ 0126 Remove all paragraph that belong to contexts that do not 0127 have need of translation. 0128 0129 [type F4A hook]. 0130 @return: number of errors 0131 """ 0132 0133 if msg.msgctxt in ("EMAIL OF TRANSLATORS", "NAME OF TRANSLATORS", "ROLES OF TRANSLATORS"): 0134 msg.msgid = "" 0135 msg.msgid_plural = "" 0136 for i in range(len(msg.msgstr)): 0137 msg.msgstr[i] = "" 0138 return 0 0139 0140 # Avoid specially tagged messages. 0141 for tagline in msg.auto_comment: 0142 for tag in tagline.split(): 0143 if tag in _auto_comment_tag: 0144 msg.msgid = "" 0145 if msg.msgid_plural: 0146 msg.msgid_plural = "" 0147 for i in range(len(msg.msgstr)): 0148 msg.msgstr[i] = "" 0149 return 0 0150 0151 if msg.msgctxt: 0152 for tag in msg.msgctxt.split(): 0153 if tag in _auto_comment_tag: 0154 msg.msgid = "" 0155 if msg.msgid_plural: 0156 msg.msgid_plural = "" 0157 for i in range(len(msg.msgstr)): 0158 msg.msgstr[i] = "" 0159 return 0 0160 0161 return 0