File indexing completed on 2024-11-03 08:24:24
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Collection of PO entries. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import copy 0011 import difflib 0012 import os 0013 import re 0014 import tempfile 0015 import time 0016 import types 0017 0018 from pology import PologyError, _, n_ 0019 from pology.header import Header, format_datetime 0020 from pology.message import Message as MessageMonitored 0021 from pology.message import MessageUnsafe as MessageUnsafe 0022 from pology.escape import escape_c as escape 0023 from pology.escape import unescape_c as unescape 0024 from pology.fsops import mkdirpath 0025 from pology.monitored import Monitored 0026 from pology.resolve import expand_vars 0027 from pology.wrap import select_field_wrapper 0028 0029 0030 class CatalogSyntaxError (PologyError): 0031 """ 0032 Exception for errors in catalog syntax. 0033 0034 This exception is normally raised when parsing a catalog, 0035 e.g. on invalid syntax or non-decodable characters. 0036 """ 0037 0038 pass 0039 0040 0041 def _parse_quoted (s): 0042 0043 sp = s[s.index("\"") + 1:s.rindex("\"")] 0044 sp = unescape(sp); 0045 return sp 0046 0047 0048 class _MessageDict: 0049 0050 def __init__ (self, lcache=True): 0051 0052 self.manual_comment = [] 0053 self.auto_comment = [] 0054 self.source = [] 0055 self.flag = [] 0056 self.obsolete = False 0057 self.msgctxt_previous = [] 0058 self.msgid_previous = [] 0059 self.msgid_plural_previous = [] 0060 self.msgctxt = [] 0061 self.msgid = [] 0062 self.msgid_plural = [] 0063 self.msgstr = [] 0064 self.refline = -1 0065 self.refentry = -1 0066 0067 if lcache: 0068 self._lines_all = [] 0069 self._lines_manual_comment = [] 0070 self._lines_auto_comment = [] 0071 self._lines_source = [] 0072 self._lines_flag = [] 0073 self._lines_msgctxt_previous = [] 0074 self._lines_msgid_previous = [] 0075 self._lines_msgid_plural_previous = [] 0076 self._lines_msgctxt = [] 0077 self._lines_msgid = [] 0078 self._lines_msgid_plural = [] 0079 self._lines_msgstr = [] 0080 0081 0082 def _read_lines_and_encoding (file, filename): 0083 0084 fstr = file.read() 0085 # Determine line ending. 0086 maxlno = 0 0087 for clend in (b"\r\n", b"\n", b"\r"): # "\r\n" should be checked first 0088 lno = len(fstr.split(clend)) 0089 if maxlno < lno: 0090 maxlno = lno 0091 lend = clend 0092 lines = [x + b"\n" for x in fstr.split(lend)] 0093 if lines[-1] == b"\n": 0094 lines.pop() 0095 0096 enc = None 0097 enc_rx = re.compile(rb"Content-Type:.*charset=(.+?)\\n", re.I) 0098 for line in lines: 0099 if line.strip().startswith(b"#:"): 0100 break 0101 m = enc_rx.search(line) 0102 if m: 0103 enc = m.group(1).strip() 0104 if not enc or enc == b"CHARSET": # no encoding given 0105 enc = None 0106 break 0107 if enc is None: 0108 enc = b"UTF-8" # fall back to UTF-8 if encoding not found 0109 enc = enc.decode() 0110 0111 enclines = [] 0112 lno = 0 0113 for line in lines: 0114 lno += 1 0115 try: 0116 encline = line.decode(enc) 0117 except UnicodeDecodeError as e: 0118 raise CatalogSyntaxError( 0119 _("@info", 0120 "Text decoding failure at %(file)s:%(line)d:%(col)d " 0121 "under assumed encoding '%(enc)s'.", 0122 file=filename, line=lno, col=e.start, enc=enc)) 0123 enclines.append(encline) 0124 0125 return enclines, enc 0126 0127 0128 def _parse_po_file (file, MessageType=MessageMonitored, 0129 headonly=False, lcache=True): 0130 0131 if isinstance(file, str): 0132 filename = file 0133 file = open(filename, "rb") 0134 close_later = True 0135 else: 0136 if hasattr(file, "name"): 0137 filename = file.name 0138 else: 0139 filename = _("@item generic name for the source or destination " 0140 "of data being read or written", 0141 "<stream>").resolve("none") 0142 close_later = False 0143 lines, fenc = _read_lines_and_encoding(file, filename) 0144 if close_later: 0145 file.close() 0146 0147 ctx_modern, ctx_obsolete, \ 0148 ctx_previous, ctx_current, \ 0149 ctx_none, ctx_msgctxt, ctx_msgid, ctx_msgid_plural, ctx_msgstr = list(range(9)) 0150 0151 messages1 = list() 0152 lno = 0 0153 eno = 0 0154 0155 class Namespace: pass 0156 loc = Namespace() 0157 loc.lno = 0 0158 loc.tail = None 0159 loc.msg = _MessageDict(lcache) 0160 loc.life_context = ctx_modern 0161 loc.field_context = ctx_none 0162 loc.age_context = ctx_current 0163 0164 # The message has been completed by the previous line if the context just 0165 # switched away from ctx_msgstr; 0166 # call whenever context switch happens, *before* assigning new context. 0167 nlines = len(lines) 0168 def try_finish (): 0169 if loc.field_context == ctx_msgstr: 0170 messages1.append(loc.msg) 0171 loc.msg = _MessageDict(lcache) 0172 loc.field_context = ctx_none 0173 # In header-only mode, the first message read is the header. 0174 # Compose the tail of this and rest of the lines, and 0175 # set lno to nlines for exit. 0176 if headonly: 0177 # If not at end of file, current line is part of 0178 # first message and should be retained in the tail. 0179 offset = loc.lno < nlines and 1 or 0 0180 loc.tail = "".join(lines[loc.lno - offset:]) 0181 loc.lno = nlines 0182 0183 while loc.lno < nlines: # sentry for last entry 0184 line_raw = lines[lno] 0185 loc.lno += 1 0186 lno = loc.lno # shortcut 0187 line = line_raw.strip() 0188 if not line: 0189 continue 0190 0191 string_follows = True 0192 loc.life_context = ctx_modern 0193 loc.age_context = ctx_current 0194 0195 if line.startswith("#"): 0196 0197 if 0: pass 0198 0199 elif line.startswith("#~|"): 0200 line = line[3:].lstrip() 0201 loc.age_context = ctx_previous 0202 0203 elif line.startswith("#~"): 0204 line = line[2:].lstrip() 0205 loc.life_context = ctx_obsolete 0206 0207 elif line.startswith("#|"): 0208 line = line[2:].lstrip() 0209 loc.age_context = ctx_previous 0210 0211 elif line.startswith("#:"): 0212 try_finish() 0213 string_follows = False 0214 for srcref in line[2:].split(" "): 0215 srcref = srcref.strip() 0216 if srcref: 0217 lst = srcref.split(":", 1) 0218 if len(lst) == 2: 0219 file = lst[0] 0220 try: 0221 line = int(lst[1]) 0222 assert line > 0 0223 except: 0224 file = srcref 0225 line = -1 0226 loc.msg.source.append((file, line)) 0227 else: 0228 loc.msg.source.append((srcref, -1)) 0229 0230 elif line.startswith("#,"): 0231 try_finish() 0232 string_follows = False 0233 for flag in line[2:].split(","): 0234 flag = flag.strip() 0235 if flag: 0236 loc.msg.flag.append(flag) 0237 0238 elif line.startswith("#."): 0239 try_finish() 0240 string_follows = False 0241 loc.msg.auto_comment.append(line[2:].lstrip()) 0242 0243 elif line.startswith("#"): 0244 try_finish() 0245 string_follows = False 0246 loc.msg.manual_comment.append(line[2:].lstrip()) 0247 0248 else: 0249 # Cannot reach, all unknown comments treated as manual above. 0250 raise CatalogSyntaxError( 0251 _("@info", 0252 "Unknown comment type at %(file)s:%(line)d.", 0253 file=filename, line=lno)) 0254 0255 if line and string_follows: # for starting fields 0256 if 0: pass 0257 0258 elif line.startswith("msgctxt"): 0259 # TODO: Assert context. 0260 try_finish() 0261 loc.field_context = ctx_msgctxt 0262 line = line[7:].lstrip() 0263 0264 elif line.startswith("msgid_plural"): 0265 # TODO: Assert context. 0266 # No need for try_finish(), msgid_plural cannot start message. 0267 loc.field_context = ctx_msgid_plural 0268 line = line[12:].lstrip() 0269 0270 elif line.startswith("msgid"): 0271 # TODO: Assert context. 0272 try_finish() 0273 if loc.life_context == ctx_obsolete: 0274 loc.msg.obsolete = True 0275 loc.field_context = ctx_msgid 0276 if loc.age_context == ctx_current: 0277 loc.msg.refline = lno 0278 loc.msg.refentry = eno 0279 eno += 1 0280 line = line[5:].lstrip() 0281 0282 elif line.startswith("msgstr"): 0283 # TODO: Assert context. 0284 loc.field_context = ctx_msgstr 0285 line = line[6:].lstrip() 0286 msgstr_i = 0 0287 if line.startswith("["): 0288 line = line[1:].lstrip() 0289 llen = len(line) 0290 p = 0 0291 while p < llen and line[p].isdigit(): 0292 p += 1 0293 if p == 0: 0294 raise CatalogSyntaxError( 0295 _("@info", 0296 "Malformed '%(field)s' ordinal " 0297 "at %(file)s:%(line)d.", 0298 file=filename, line=lno, field="msgstr")) 0299 msgstr_i = int(line[:p]) 0300 line = line[p:].lstrip() 0301 if line.startswith("]"): 0302 line = line[1:].lstrip() 0303 else: 0304 raise CatalogSyntaxError( 0305 _("@info", 0306 "Malformed '%(field)s' ordinal " 0307 "at %(file)s:%(line)d.", 0308 file=filename, line=lno, field="msgstr")) 0309 # Add missing msgstr entries. 0310 for i in range(len(loc.msg.msgstr), msgstr_i + 1): 0311 loc.msg.msgstr.append([]) 0312 0313 elif not line.startswith("\""): 0314 raise CatalogSyntaxError( 0315 _("@info", 0316 "Unknown field name at %(file)s:%(line)d.", 0317 file=filename, line=lno)) 0318 0319 if line and string_follows: # for continuing fields 0320 if line.startswith("\""): 0321 s = _parse_quoted(line) 0322 if loc.age_context == ctx_previous: 0323 if loc.field_context == ctx_msgctxt: 0324 loc.msg.msgctxt_previous.append(s) 0325 elif loc.field_context == ctx_msgid: 0326 loc.msg.msgid_previous.append(s) 0327 elif loc.field_context == ctx_msgid_plural: 0328 loc.msg.msgid_plural_previous.append(s) 0329 else: 0330 if loc.field_context == ctx_msgctxt: 0331 loc.msg.msgctxt.append(s) 0332 elif loc.field_context == ctx_msgid: 0333 loc.msg.msgid.append(s) 0334 elif loc.field_context == ctx_msgid_plural: 0335 loc.msg.msgid_plural.append(s) 0336 elif loc.field_context == ctx_msgstr: 0337 loc.msg.msgstr[msgstr_i].append(s) 0338 else: 0339 raise CatalogSyntaxError( 0340 _("@info", 0341 "Expected string continuation at %(file)s:%(line)d.", 0342 file=filename, line=lno)) 0343 0344 # Update line caches. 0345 if lcache: 0346 loc.msg._lines_all.append(line_raw) 0347 if 0: pass 0348 elif line_raw.startswith("#:"): 0349 loc.msg._lines_source.append(line_raw) 0350 elif line_raw.startswith("#,"): 0351 loc.msg._lines_flag.append(line_raw) 0352 elif line_raw.startswith("#."): 0353 loc.msg._lines_auto_comment.append(line_raw) 0354 elif line_raw.startswith("#") and line_raw[1:2] not in ("~", "|"): 0355 loc.msg._lines_manual_comment.append(line_raw) 0356 elif loc.age_context == ctx_previous: 0357 if loc.field_context == ctx_msgctxt: 0358 loc.msg._lines_msgctxt_previous.append(line_raw) 0359 elif loc.field_context == ctx_msgid: 0360 loc.msg._lines_msgid_previous.append(line_raw) 0361 elif loc.field_context == ctx_msgid_plural: 0362 loc.msg._lines_msgid_plural_previous.append(line_raw) 0363 else: 0364 raise PologyError( 0365 _("@info", 0366 "Internal problem (%(id)d) at %(file)s:%(line)d.", 0367 id=11, file=filename, line=lno)) 0368 elif loc.age_context == ctx_current: 0369 if loc.field_context == ctx_msgctxt: 0370 loc.msg._lines_msgctxt.append(line_raw) 0371 elif loc.field_context == ctx_msgid: 0372 loc.msg._lines_msgid.append(line_raw) 0373 elif loc.field_context == ctx_msgid_plural: 0374 loc.msg._lines_msgid_plural.append(line_raw) 0375 elif loc.field_context == ctx_msgstr: 0376 loc.msg._lines_msgstr.append(line_raw) 0377 else: 0378 raise PologyError( 0379 _("@info", 0380 "Internal problem (%(id)d) at %(file)s:%(line)d.", 0381 id=12, file=filename, line=lno)) 0382 else: 0383 raise PologyError( 0384 _("@info", 0385 "Internal problem (%(id)d) at %(file)s:%(line)d.", 0386 id=10, file=filename, line=lno)) 0387 0388 try_finish() # the last message 0389 0390 if len(messages1) == 0: 0391 raise CatalogSyntaxError( 0392 _("@info", 0393 "No header at %(file)s:%(line)d.", 0394 file=filename, line=lno)) 0395 0396 # Join fields. 0397 join_or_none = lambda x: "".join(x) if x else None 0398 for i, msg in enumerate(messages1): 0399 msg.msgctxt_previous = join_or_none(msg.msgctxt_previous) 0400 msg.msgid_previous = join_or_none(msg.msgid_previous) 0401 msg.msgid_plural_previous = join_or_none(msg.msgid_plural_previous) 0402 msg.msgctxt = join_or_none(msg.msgctxt) 0403 msg.msgid = join_or_none(msg.msgid) 0404 msg.msgid_plural = join_or_none(msg.msgid_plural) 0405 msg.msgstr = [join_or_none(x) for x in msg.msgstr] 0406 if i > 0 and msg.msgid == "" and msg.msgctxt is None: 0407 raise CatalogSyntaxError( 0408 _("@info", 0409 "Empty message at %(file)s:%(line)d.", 0410 file=filename, line=msg.refline)) 0411 0412 # Repack raw dictionaries as message objects. 0413 messages2 = [] 0414 for msg1 in messages1: 0415 messages2.append(MessageType(msg1.__dict__)) 0416 0417 return (messages2, fenc, loc.tail) 0418 0419 0420 def _srcref_repack (srcrefs): 0421 srcdict = {} 0422 for file, line in srcrefs: 0423 if not file in srcdict: 0424 srcdict[file] = [line] 0425 else: 0426 srcdict[file].append(line) 0427 srcdict[file].sort() 0428 return srcdict 0429 0430 0431 _Catalog_spec = { 0432 # Data. 0433 "header" : {"type" : Header}, 0434 "filename" : {"type" : (str,)}, 0435 "name" : {"type" : (str,), "derived" : True}, 0436 "*" : {}, # messages sequence: the type is assigned at construction 0437 } 0438 0439 0440 class Catalog (Monitored): 0441 """ 0442 Class for access and operations on PO catalogs. 0443 0444 Catalog behaves as an ordered sequence of messages. The typical way of 0445 iterating over the messages from a PO file on disk would be:: 0446 0447 cat = Catalog("relative/path/foo.po") 0448 for msg in cat: 0449 ... 0450 (do something with msg) 0451 ... 0452 cat.sync() 0453 0454 where L{sync()<sync>} method is used to write any modifications back to 0455 the disk. 0456 0457 The header entry of the catalog is not part of the message sequence, 0458 but is provided by the L{header} attribute, an object of 0459 type different from an ordinary message entry. 0460 0461 The catalog is a I{monitored} class. 0462 Catalog message entries themeselves may also be monitored (default), 0463 but need not, depending on the mode of creation. 0464 0465 @ivar header: the header entry 0466 @type header: L{Header} 0467 0468 @ivar filename: the file name which the catalog was created with 0469 @type filename: string 0470 0471 @ivar name: (read-only) 0472 the name of the catalog 0473 0474 Determined as base of the filename, without extension. 0475 @type name: string 0476 0477 @see: L{Monitored} 0478 @see: L{Message}, L{MessageUnsafe} 0479 @see: L{Header} 0480 """ 0481 0482 def __init__ (self, filename, 0483 create=False, truncate=False, 0484 wrapping=None, monitored=True, 0485 headonly=False, readfh=None, single_entry=0): 0486 """ 0487 Build a message catalog by reading from a PO file or creating anew. 0488 0489 The message entries in the catalog may be monitored themselves or not. 0490 That is, when monitoring is requested, entries are represented by 0491 the L{Message} class, otherwise with L{MessageUnsafe}. 0492 0493 Monitored messages are usually appropriate when the application is 0494 expected to modify them. Non-monitored messages should provide better 0495 performance, so use them whenever the catalog is opened for read-only 0496 purposes (such as checks). 0497 0498 Catalog can also be opened in header-only mode, for better 0499 performance when only the header data is needed. This mode provides 0500 L{header} attribute as usual, but the rest of entries are 0501 unavailable. If any of the operations dealing with message entries 0502 are invoked, an error is signaled. 0503 0504 Instead of opening and reading from catalog's filename, 0505 catalog can be read from a file-like object provided by 0506 C{readfh} parameter. 0507 Same as when reading from file on disk, text will be decoded 0508 using catalog's encoding after reading it from C{readfh}. 0509 0510 If a problem which prevents construction of a valid catalog is 0511 detected while parsing a PO file, L{CatalogSyntaxError} is raised. 0512 0513 @param filename: name of the PO catalog on disk, or new catalog 0514 @type filename: string 0515 0516 @param create: 0517 whether a blank catalog can be created when the PO file does 0518 not already exist, or signal an error 0519 @type create: bool 0520 0521 @param truncate: 0522 whether catalog should be empty (and with uninitialized header) 0523 regardless of whether it is opened or created 0524 @type truncate: bool 0525 0526 @param wrapping: 0527 sequence of keywords specifying wrapping policy for 0528 message text fields (C{msgid}, C{msgstr}, etc.). 0529 See L{select_field_wrapper<wrap.select_field_wrapper>} 0530 function for possible keywords and their effects on wrapping. 0531 If given as C{None}, it will be deduced from the catalog 0532 (see L{wrapping} method). 0533 @type wrapping: sequence of strings 0534 0535 @param monitored: whether the message entries are monitored 0536 @type monitored: bool 0537 0538 @param headonly: whether to open in header-only mode 0539 @type headonly: bool 0540 0541 @param readfh: file to read the catalog from 0542 @type readfh: file-like object 0543 """ 0544 0545 self._monitored = monitored 0546 0547 # Select type of message object to use. 0548 if monitored: 0549 message_type = MessageMonitored 0550 else: 0551 message_type = MessageUnsafe 0552 0553 # Signal if catalog should exist on disk but does not. 0554 if not create and not (os.path.exists(filename) or readfh): 0555 raise PologyError( 0556 _("@info", 0557 "File '%(file)s' does not exist.", 0558 file=filename)) 0559 0560 # Read messages or create empty catalog. 0561 if not truncate and (os.path.exists(filename) or readfh): 0562 file = readfh or filename 0563 m, e, t = _parse_po_file(file, message_type, headonly, monitored) 0564 self._encoding = e 0565 self._created_from_scratch = False 0566 if not m[0].msgctxt and not m[0].msgid: 0567 # Proper PO, containing the header. 0568 self._header = Header(m[0]) 0569 self._header._committed = True # status for sync 0570 if (single_entry > 0): 0571 self.__dict__["*"] = [m[single_entry]] 0572 else: 0573 self.__dict__["*"] = m[1:] 0574 else: 0575 # Improper PO, missing the header. 0576 self._header = Header() 0577 self._header._committed = False # status for sync 0578 if (single_entry > 0): 0579 self.__dict__["*"] = [m[single_entry-1]] 0580 else: 0581 self.__dict__["*"] = m 0582 self._tail = t 0583 else: 0584 self._encoding = "UTF-8" 0585 self._created_from_scratch = True 0586 self._header = Header() 0587 self._header._committed = False # status for sync 0588 self.__dict__["*"] = [] 0589 self._tail = None 0590 0591 self._filename = filename 0592 0593 self._messages = self.__dict__["*"] # nicer name for the sequence 0594 0595 # Fill in the message key-position links. 0596 # Set committed and remove-on-sync status. 0597 self._msgpos = {} 0598 for i in range(len(self._messages)): 0599 self._msgpos[self._messages[i].key] = i 0600 self._messages[i]._committed = True 0601 self._messages[i]._remove_on_sync = False 0602 0603 # Initialize monitoring. 0604 final_spec = copy.deepcopy(_Catalog_spec) 0605 final_spec["*"]["type"] = message_type 0606 self.assert_spec_init(final_spec) 0607 0608 # Inverse map (by msgstr) will be computed on first use. 0609 self._invmap = None 0610 0611 # Cached plural definition from the header. 0612 self._plustr = "" 0613 0614 # Cached language of the translation. 0615 # None means the language has not been determined. 0616 self._lang = None 0617 self._lang_determined = False 0618 0619 # Cached environments. 0620 self._envs = None 0621 self._envs_determined = False 0622 0623 # Cached accelerator markers. 0624 self._accels = None 0625 self._accels_determined = False 0626 0627 # Cached markup types. 0628 self._mtypes = None 0629 self._mtypes_determined = False 0630 0631 # Cached wrapping policy. 0632 if wrapping is None: 0633 self._wrap_determined = False 0634 self._wrapf = None 0635 self._wrapkw = None 0636 else: 0637 self._wrap_determined = True 0638 self._wrapf = select_field_wrapper(wrapping) 0639 self._wrapkw = tuple(wrapping) 0640 0641 0642 def _assert_headonly (self): 0643 0644 if self._tail: 0645 raise PologyError( 0646 _("@info", 0647 "Trying to access catalog messages in header-only mode.")) 0648 0649 0650 def __getattr__ (self, att): 0651 """ 0652 Attribute getter. 0653 0654 Processes read-only attributes, and sends others to the base class. 0655 0656 @param att: name of the attribute to get 0657 @returns: attribute value 0658 """ 0659 if 0: pass 0660 0661 elif att == "name": 0662 basename = os.path.basename(self._filename) 0663 p = basename.rfind(".") 0664 if p >= 0: 0665 return basename[:p] 0666 else: 0667 return basename 0668 0669 else: 0670 return Monitored.__getattr__(self, att) 0671 0672 0673 def __len__ (self): 0674 """ 0675 The number of messages in the catalog. 0676 0677 The number includes obsolete entries, and excludes header entry. 0678 0679 @returns: the number of messages 0680 @rtype: int 0681 """ 0682 0683 self._assert_headonly() 0684 return len(self._messages) 0685 0686 0687 def __getitem__ (self, ident): 0688 """ 0689 Get message by position or another message. 0690 0691 If the position is out of range, or the lookup message does not have 0692 a counterpart in this catalog with the same key, an error is signaled. 0693 0694 Runtime complexity O(1), regardless of the C{ident} type. 0695 0696 @param ident: position index or another message 0697 @type ident: int or L{Message_base} 0698 0699 @returns: reference to the message in catalog 0700 @rtype: L{Message_base} 0701 """ 0702 0703 self._assert_headonly() 0704 self.assert_spec_getitem() 0705 if not isinstance(ident, int): 0706 ident = self._msgpos[ident.key] 0707 return self._messages[ident] 0708 0709 0710 def __setitem__ (self, ident, msg): 0711 """ 0712 Set message by position or another message. 0713 0714 If the position is out of range, or the lookup message does not have 0715 a counterpart in this catalog with the same key, an error is signaled. 0716 0717 Runtime complexity O(1), regardless of the C{ident} type. 0718 0719 @param ident: position index or another message 0720 @type ident: int or L{Message_base} 0721 0722 @returns: reference to the message in catalog 0723 @rtype: L{Message_base} 0724 """ 0725 0726 self._assert_headonly() 0727 self.assert_spec_setitem(msg) 0728 if not isinstance(ident, int): 0729 ident = self._msgpos[ident.key] 0730 self._messages[ident] = msg 0731 if self._messages[ident] is not msg: 0732 self.__dict__["#"]["*"] += 1 0733 return self._messages[ident] 0734 0735 0736 def __contains__ (self, msg): 0737 """ 0738 Whether the message with the same key exists in the catalog. 0739 0740 Runtime complexity O(1). 0741 0742 @param msg: message to look for 0743 @type msg: L{Message_base} 0744 0745 @returns: C{True} if the message exists 0746 @rtype: bool 0747 """ 0748 0749 self._assert_headonly() 0750 return msg.key in self._msgpos 0751 0752 0753 def __eq__ (self, ocat): 0754 """ 0755 Whether two catalogs are equal in all apparent parts. 0756 0757 Catalogs are considered equal if they are of the same length, 0758 their headers are equal, and each two messages with the 0759 same position are equal. 0760 0761 Runtime complexity O(n). 0762 0763 @returns: C{True} if catalogs are equal 0764 @rtype: bool 0765 """ 0766 0767 if len(self) != len(ocat): 0768 return False 0769 if self.header != ocat.header: 0770 return False 0771 for i in range(len(ocat)): 0772 if self[i] != ocat[i]: 0773 return False 0774 return True 0775 0776 0777 def __ne__ (self, ocat): 0778 """ 0779 Whether two catalogs are equal in all apparent parts. 0780 0781 Equivalent to C{not (self == ocat)}. 0782 0783 @returns: C{False} if catalogs are equal 0784 @rtype: bool 0785 """ 0786 0787 return not self.__eq__(ocat) 0788 0789 0790 def find (self, msg, wobs=True): 0791 """ 0792 Position of the message in the catalog. 0793 0794 Runtime complexity O(1). 0795 0796 @param msg: message to look for 0797 @type msg: L{Message_base} 0798 @param wobs: obsolete messages considered non-existant if C{False} 0799 @type wobs: bool 0800 0801 @returns: position index if the message exists, -1 otherwise 0802 @rtype: int 0803 """ 0804 0805 self._assert_headonly() 0806 if msg.key in self._msgpos: 0807 if wobs or not msg.obsolete: 0808 return self._msgpos[msg.key] 0809 return -1 0810 0811 0812 def get (self, msg, defmsg=None): 0813 """ 0814 Get message by key of another message, with default fallback. 0815 0816 If the lookup message C{msg} does not have a counterpart 0817 in this catalog with the same key, C{defmsg} is returned. 0818 C{msg} can also be C{None}, when C{defmsg} is returned. 0819 0820 Runtime complexity O(1). 0821 0822 @param msg: message for the lookup by key 0823 @type msg: L{Message_base} or None 0824 @param defmsg: fallback in case lookup failed 0825 @type defmsg: any 0826 0827 @returns: reference to the message in catalog, or default 0828 @rtype: L{Message_base} or type(defmsg) 0829 """ 0830 0831 if msg is None: 0832 return defmsg 0833 pos = self.find(msg) 0834 if pos >= 0: 0835 return self._messages[pos] 0836 else: 0837 return defmsg 0838 0839 0840 def add (self, msg, pos=None, srefsyn={}): 0841 """ 0842 Add a message to the catalog. 0843 0844 If the message with the same key already exists in the catalog, 0845 it will be replaced with the new message, ignoring position. 0846 The return value will be C{None}. 0847 0848 If the message does not exist in the catalog, when the position is 0849 C{None}, the insertion will be attempted such as that the messages be 0850 near according to the source references; if the position is not 0851 C{None}, the message is inserted at the given position. 0852 The return value will be the true insertion position. 0853 0854 Negative position can be given as well. It counts backward from 0855 the first non-obsolete message if the message to be added 0856 is not obsolete, or from last message otherwise. 0857 0858 When the message is inserted according to source references, 0859 a dictionary of file paths to consider synonymous can be given 0860 by the C{srefsyn}. The key is the file path for which the synonyms 0861 are being given, and the value the list of synonymous file paths. 0862 The mapping is not symmetric; if B is in the list of synonyms to A, 0863 A will not be automatically considered to be among synonyms of B, 0864 unless explicitly given in the list of synonyms to B. 0865 0866 Runtime complexity O(1) if the message is present in the catalog; 0867 O(n - pos) if the position is given and the message is not present; 0868 O(n) if the position is not given and the message is not present. 0869 0870 @param msg: message to insert 0871 @type msg: L{Message_base} 0872 0873 @param pos: position index to insert at 0874 @type pos: int or None 0875 0876 @param srefsyn: synonymous names to some of the source files 0877 @type srefsyn: {string: [string*]*} 0878 0879 @returns: if inserted, the position where inserted 0880 @rtype: int or None 0881 """ 0882 0883 return self.add_more([(msg, pos)], srefsyn=srefsyn)[0] 0884 0885 0886 def add_more (self, msgpos, cumulative=False, srefsyn={}): 0887 """ 0888 Add more than one message to the catalog. 0889 0890 Like L{add}, except that several messages are added in one call. 0891 This significantly speeds up insertion when insertion positions of 0892 all messages are known beforehand. 0893 0894 Insertion positions can be given relative to state before the call, 0895 or cumulative to earlier insertions in the list. 0896 For example, if insertions are given as C{[(msg1, 2), (msg2, 5)]} and 0897 not cumulative, then the resulting position for C{msg1} will be 2, 0898 and for C{msg2} 6 (assuming that both messages actually got inserted). 0899 This behavior can be toggled by the C{cumulative} parameter. 0900 0901 @param msgpos: messages with target insertion positions 0902 @type msgpos: [(L{Message_base}, int), ...] 0903 @param cumulative: whether input positions are cumulative 0904 @type cumulative: bool 0905 @param srefsyn: synonymous names to some of the source files 0906 @type srefsyn: {string: [string*]*} 0907 0908 @returns: positions where inserted, or None where replaced 0909 @rtype: [int or None, ...] 0910 """ 0911 0912 self._assert_headonly() 0913 for msg, pos in msgpos: 0914 self.assert_spec_setitem(msg) 0915 if not msg.msgid and msg.msgctxt is None: 0916 raise PologyError( 0917 _("@info", 0918 "Trying to insert message with empty key into catalog.")) 0919 0920 # Resolve backward positions, set aside automatic positions, 0921 # set aside replacements. 0922 msgpos_ins = [] 0923 msgs_auto = [] 0924 msgs_repl = [] 0925 for msg, pos in msgpos: 0926 if msg.key not in self._msgpos: 0927 if pos is not None: 0928 if pos < 0: 0929 pos = len(self._messages) + pos 0930 if pos < 0 or pos > len(self._messages): 0931 raise PologyError( 0932 _("@info", 0933 "Trying to insert message into catalog by " 0934 "position out of range.")) 0935 msgpos_ins.append((msg, pos)) 0936 else: 0937 msgs_auto.append(msg) 0938 else: 0939 msgs_repl.append(msg) 0940 0941 # Sort messages to be inserted by resolved positions. 0942 msgpos_ins = sorted(msgpos_ins, key=lambda x: x[1]) 0943 0944 # Resolve messages to be inserted by automatic positions. 0945 for msg in msgs_auto: 0946 pos, d1 = self._pick_insertion_point(msg, srefsyn) 0947 i = 0 0948 while i < len(msgpos_ins): 0949 omsg, opos = msgpos_ins[i] 0950 if pos < opos: 0951 break 0952 elif cumulative: 0953 pos += 1 0954 msgpos_ins.insert(i, (msg, pos)) 0955 0956 # Accumulate insertion positions if not cumulative. 0957 if not cumulative and len(msgpos_ins) > 1: 0958 off = 0 0959 msgpos_tmp = [] 0960 for msg, pos in msgpos_ins: 0961 msgpos_tmp.append((msg, pos + off)) 0962 off += 1 0963 msgpos_ins = msgpos_tmp 0964 0965 # Update key-position links for the index to be added. 0966 off = 0 0967 for i in range(len(msgpos_ins)): 0968 pos1 = msgpos_ins[i][1] - off 0969 if i + 1 < len(msgpos_ins): 0970 pos2 = msgpos_ins[i + 1][1] - (off + 1) 0971 else: 0972 pos2 = len(self._messages) 0973 for j in range(pos1, pos2): 0974 ckey = self._messages[j].key 0975 self._msgpos[ckey] = j + (off + 1) 0976 off += 1 0977 0978 # Insert messages at computed positions. 0979 for msg, pos in msgpos_ins: 0980 self._messages.insert(pos, msg) 0981 self._messages[pos]._remove_on_sync = False # no pending removal 0982 self._messages[pos]._committed = False # write it on sync 0983 self._msgpos[msg.key] = pos # store new key-position link 0984 self.__dict__["#"]["*"] += 1 # indicate sequence change 0985 0986 # Replace existing messages. 0987 for msg in msgs_repl: 0988 pos = self._msgpos[msg.key] 0989 self._messages[pos] = msg 0990 0991 # Recover insertion/replacement positions. 0992 pos_res = [] 0993 msgpos_ins_d = dict(msgpos_ins) 0994 for msg, pos in msgpos: 0995 ipos = msgpos_ins_d.get(msg) 0996 if ipos is not None: 0997 pos_res.append(ipos) 0998 else: 0999 pos_res.append(None) 1000 1001 return pos_res 1002 1003 1004 def obspos (self): 1005 """ 1006 Get canonical position of the first obsolete message. 1007 1008 I{Canonical} position of the first obsolete message is the position 1009 of first of the contiguous obsolete messages at the end of the catalog. 1010 Normally this should be the same as the position of the very first 1011 obsolete message, as all obsolete messages should be contiguously 1012 grouped at the end. But there is no enforcement of such grouping, 1013 therefore the more stricter definition. 1014 1015 If there are no messages in the catalog, or the last message 1016 is not obsolete, the position is reported as number of messages 1017 (i.e. one position after the last message). 1018 1019 Runtime complexity O(number of contiguous trailing obsolete messages). 1020 1021 @return: canonical position of first obsolete message 1022 @rtype: int 1023 """ 1024 1025 op = len(self._messages) 1026 while op > 0 and self._messages[op - 1].obsolete: 1027 op -= 1 1028 1029 return op 1030 1031 1032 def add_last (self, msg): 1033 """ 1034 Add a message to the selected end of catalog, if not already in it. 1035 1036 Synonym to C{cat.add(msg, cat.obspos())} if the message is 1037 not obsolete (i.e. tries to add the message after all non-obsolete), 1038 or to C{cat.add(msg, len(cat))} (tries to add at the very end). 1039 If the message already exits in the catalog (by key), 1040 same behavior as for L{add} applies. 1041 1042 @see: L{add} 1043 """ 1044 1045 if not msg.obsolete: 1046 return self.add(msg, self.obspos()) 1047 else: 1048 return self.add(msg, len(self._messages)) 1049 1050 1051 def remove (self, ident): 1052 """ 1053 Remove a message from the catalog, by position or another message. 1054 1055 If the position is out of range, or the lookup message does not have 1056 a counterpart in this catalog with the same key, an error is signaled. 1057 1058 Runtime complexity O(n), regardless of C{ident} type. 1059 Use L{remove_on_sync()<remove_on_sync>} method for O(1) complexity, 1060 when the logic allows the removal to be delayed to syncing time. 1061 1062 @param ident: position index or another message 1063 @type ident: int or L{Message_base} 1064 1065 @returns: C{None} 1066 """ 1067 1068 self._assert_headonly() 1069 1070 # Determine position and key by given ident. 1071 if isinstance(ident, int): 1072 ip = ident 1073 key = self._messages[ip].key 1074 else: 1075 key = ident.key 1076 ip = self._msgpos[key] 1077 1078 # Update key-position links for the removed index. 1079 for i in range(ip + 1, len(self._messages)): 1080 ckey = self._messages[i].key 1081 self._msgpos[ckey] = i - 1 1082 1083 # Remove from messages and key-position links. 1084 self._messages.pop(ip) 1085 self._msgpos.pop(key) 1086 self.__dict__["#"]["*"] += 1 # indicate sequence change 1087 1088 1089 def remove_on_sync (self, ident): 1090 """ 1091 Remove a message from the catalog, by position or another message, 1092 on the next sync. 1093 1094 If the position is out of range, or the lookup message does not have 1095 a counterpart in this catalog with the same key, an error is signaled. 1096 1097 Suited for for-in iterations over a catalog with a sync afterwards, 1098 so that the indices are not confused by removal, and good performance. 1099 1100 Runtime complexity O(1). 1101 1102 @param ident: position index or another message 1103 @type ident: int or L{Message_base} 1104 1105 @returns: C{None} 1106 """ 1107 1108 self._assert_headonly() 1109 1110 # Determine position and key by given ident. 1111 if isinstance(ident, int): 1112 ip = ident 1113 else: 1114 ip = self._msgpos[ident.key] 1115 1116 # Indicate removal on sync for this message. 1117 self._messages[ip]._remove_on_sync = True 1118 self.__dict__["#"]["*"] += 1 # indicate sequence change (pending) 1119 1120 1121 def sync (self, force=False, noobsend=False, writefh=None, fitplural=False): 1122 """ 1123 Write catalog file to disk if any message has been modified. 1124 1125 All activities scheduled for sync-time are performed, such as 1126 delayed message removal. 1127 1128 If catalog is monitored, unmodified messages (and message parts) 1129 are not reformatted unless forced. 1130 1131 Instead of opening and writing into catalog's filename, 1132 catalog can be written to a file-like object provided by 1133 C{writefh} parameter. 1134 Same as when writing to file on disk, text will be encoded 1135 using catalog's encoding before writing it to C{writefh}. 1136 1137 If in a plural message the number of C{msgstr} fields is not equal 1138 to the number specified in the plural header, the C{fitplural} 1139 parameter can be set to C{True} to correct this on syncing. 1140 However, this fitting will be performed only on clean plural messages, 1141 i.e. those in which all existing C{msgstr} fields are empty, 1142 as otherwise it is unclear how to adapt them to plural header. 1143 1144 @param force: whether to reformat unmodified messages 1145 @type force: bool 1146 @param noobsend: do not reorder messages to group all obsolete at end 1147 @type noobsend: bool 1148 @param writefh: file to write the catalog to 1149 @type writefh: file-like object open in binary mode 1150 @param fitplural: whether to fit the number of msgstr fields in 1151 clean plural messages to plural header specification 1152 @type fitplural: bool 1153 1154 @returns: C{True} if the file was modified, C{False} otherwise 1155 @rtype: bool 1156 """ 1157 1158 # Cannot sync catalogs which have been given no path 1159 # (usually temporary catalogs). 1160 if not self._filename.strip(): 1161 raise PologyError( 1162 _("@info", 1163 "Trying to sync unnamed catalog.")) 1164 1165 # Fit the number of msgstr entries in plural messages if requested. 1166 # Must be done before the modification test below. 1167 if fitplural: 1168 n = self.nplurals() 1169 for msg in self._messages: 1170 if ( msg.msgid_plural is not None 1171 and len(msg.msgstr) != n 1172 and all(len(s) == 0 for s in msg.msgstr) 1173 ): 1174 msg.msgstr[:] = [""] * n 1175 1176 # If catalog is not monitored, force syncing. 1177 if not self._monitored: 1178 force = True 1179 1180 # If no modifications throughout and sync not forced, return. 1181 if not force and not self.modcount: 1182 return False 1183 1184 # No need to indicate sequence changes here, as after sync the 1185 # catalog is set to unmodified throughout. 1186 1187 # Temporarily insert header, for homogeneous iteration. 1188 self._messages.insert(0, self._header) 1189 self._messages[0]._remove_on_sync = False # never remove header 1190 nmsgs = len(self._messages) 1191 1192 # Starting position for reinserting obsolete messages. 1193 obstop = len(self._messages) 1194 while obstop > 0 and self._messages[obstop - 1].obsolete: 1195 obstop -= 1 1196 obsins = obstop 1197 1198 # NOTE: Key-position links may be invalidated from this point onwards, 1199 # by reorderings/removals. To make sure it is not used before the 1200 # rebuild at the end, delete now. 1201 del self._msgpos 1202 1203 if not self._wrap_determined: 1204 self.wrapping() 1205 1206 flines = [] 1207 i = 0 1208 while i < nmsgs: 1209 msg = self._messages[i] 1210 if msg.get("_remove_on_sync", False): 1211 # Removal on sync requested, just skip. 1212 i += 1 1213 elif not noobsend and msg.obsolete and i < obstop: 1214 # Obsolete message out of order, reinsert and repeat the index. 1215 # Reinsertion is such that the relative ordering of obsolete 1216 # messages is preserved. 1217 msg = self._messages.pop(i) 1218 self._messages.insert(obsins - 1, msg) # -1 due to popping 1219 obstop -= 1 1220 else: 1221 # Normal message, append formatted lines to rest. 1222 committed = msg.get("_committed", False) 1223 flines.extend(msg.to_lines(self._wrapf, 1224 force or not committed)) 1225 # Message should finish with one empty line. 1226 if flines[-1] != "\n": 1227 flines.append("\n") 1228 i += 1 1229 if not self._tail: 1230 # Remove trailing empty lines. 1231 while flines and flines[-1] == "\n": 1232 flines.pop(-1) 1233 else: 1234 # Tail has to be converted to separate lines, 1235 # so that possibly new encoding is applied to it too 1236 # while being able to report line/column on error. 1237 flines.extend(x + "\n" for x in self._tail.split("\n")) 1238 if self._tail.endswith("\n"): 1239 flines.pop(-1) 1240 1241 # Remove temporarily inserted header. 1242 self._messages.pop(0) 1243 1244 # Update message map. 1245 self.sync_map() 1246 1247 # Reset modification state throughout. 1248 self.modcount = 0 1249 1250 # Encode lines and write file. 1251 enclines = [] 1252 for i, line in enumerate(flines): 1253 try: 1254 encline = line.encode(self._encoding) 1255 except UnicodeEncodeError as e: 1256 raise CatalogSyntaxError( 1257 _("@info", 1258 "Text encoding failure at %(file)s:%(line)d:%(col)d " 1259 "under assumed encoding '%(enc)s'.", 1260 file=self._filename, line=(i + 1), col=e[2], 1261 enc=self._encoding)) 1262 enclines.append(encline) 1263 if not writefh: 1264 # Create the parent directory if it does not exist. 1265 pdirpath = os.path.dirname(self._filename) 1266 mkdirpath(pdirpath) 1267 # Write to file atomically: directly write to temporary file, 1268 # then rename it to destination file. 1269 #ofl = tempfile.NamedTemporaryFile(delete=False, dir=pdirpath) 1270 #tmpfname = ofl.name 1271 # ...needs Python 2.6 1272 tmpfname = os.path.join(pdirpath, 1273 os.path.basename(self._filename) + "~tmpw") 1274 ofl = open(tmpfname, "wb") 1275 else: 1276 ofl = writefh 1277 ofl.writelines(enclines) 1278 if not writefh: 1279 ofl.close() 1280 if os.name == "nt" and os.path.exists(self._filename): 1281 # NT does not allow to overwrite on rename. 1282 tmpfname2 = self._filename + "~tmpo" 1283 os.rename(self._filename, tmpfname2) 1284 os.rename(tmpfname, self._filename) 1285 os.remove(tmpfname2) 1286 else: 1287 os.rename(tmpfname, self._filename) 1288 1289 # Indicate the catalog is no longer created from scratch, if it was. 1290 self._created_from_scratch = False 1291 1292 # Indicate header has been committed. 1293 self._header._committed = True 1294 1295 # Indicate for each message that it has been committed. 1296 for msg in self._messages: 1297 msg._committed = True 1298 1299 return True 1300 1301 1302 def sync_map (self): 1303 """ 1304 Update message map. 1305 1306 In case there were any modifications to message keys, 1307 or any pending removals issued, this function will update 1308 the sequence of messages such that membership operations 1309 work properly again. 1310 Obsolete messages will be moved to end of catalog. 1311 Referent line and entry numbers will remain invalid, 1312 as catalog will not be written out. 1313 1314 This is a less expensive alternative to syncing the catalog, 1315 when it is only necessary to continue using it in synced state, 1316 rather than actually writing it out. 1317 """ 1318 1319 # Execute pending removals. 1320 # Separate messages into current and obsolete. 1321 newlst = [] 1322 newlst_obs = [] 1323 for msg in self._messages: 1324 if not msg.get("_remove_on_sync", False): 1325 if not msg.obsolete: 1326 newlst.append(msg) 1327 else: 1328 newlst_obs.append(msg) 1329 newlst.extend(newlst_obs) 1330 self.__dict__["*"] = newlst 1331 self._messages = self.__dict__["*"] 1332 1333 # Rebuild key-position links. 1334 self._msgpos = {} 1335 for i in range(len(self._messages)): 1336 self._msgpos[self._messages[i].key] = i 1337 1338 # Set inverse map to non-computed. 1339 self._invmap = None 1340 1341 1342 def _make_invmap (self): 1343 1344 # Map for inverse lookup (by translation) has as key the msgstr[0], 1345 # and the value the list of messages having the same msgstr[0]. 1346 1347 self._invmap = {} 1348 for msg in self._messages: 1349 ikey = msg.msgstr[0] 1350 msgs = self._invmap.get(ikey) 1351 if msgs is None: 1352 msgs = [] 1353 self._invmap[ikey] = msgs 1354 msgs.append(msg) 1355 1356 1357 def insertion_inquiry (self, msg, srefsyn={}): 1358 """ 1359 Compute the tentative insertion of the message into the catalog. 1360 1361 The tentative insertion is a tuple of position of a message when it 1362 would be inserted into the catalog, and the I{weight} indicating 1363 the quality of positioning. The weight is computed by analyzing 1364 the source references. 1365 1366 Runtime complexity O(n). 1367 1368 @param msg: message to compute the tentative insertion for 1369 @type msg: L{Message_base} 1370 @param srefsyn: synonymous names to some of the source files 1371 @type srefsyn: {string: [string*]*} 1372 1373 @returns: the insertion position and its weight 1374 @rtype: int, float 1375 """ 1376 1377 self._assert_headonly() 1378 return self._pick_insertion_point(msg, srefsyn) 1379 1380 1381 def created (self): 1382 """ 1383 Whether the catalog has been newly created (no existing PO file). 1384 1385 A catalog is no longer considered newly created after the first sync. 1386 1387 @returns: C{True} if newly created, C{False} otherwise 1388 @rtype: bool 1389 """ 1390 1391 return self._created_from_scratch 1392 1393 1394 def _pick_insertion_point (self, msg, srefsyn={}): 1395 1396 # Return the best insertion position with associated weight. 1397 # Assume the existing messages in the catalog are properly ordered. 1398 1399 if not msg.obsolete: 1400 last = self.obspos() 1401 else: 1402 last = len(self._messages) 1403 1404 # Insert at the last position if the candidate message has 1405 # no source references. 1406 if not msg.source: 1407 return last, 0.0 1408 1409 ins_pos = -1 1410 # Try to find insertion position by comparing the source references 1411 # of the candidate the source references of the existing messages. 1412 # The order of matching must be very specific for logical insertion. 1413 # If the matching source files are found, insert according to 1414 # the line number. 1415 for src, lno in msg.source: 1416 src_pos = 0 1417 src_match = False 1418 curr_prim_esrc = "" 1419 for i in range(last): 1420 emsg = self._messages[i] 1421 if not emsg.source: 1422 continue 1423 same_prim_esrc = False 1424 for esrc, elno in emsg.source: 1425 if curr_prim_esrc in [esrc] + srefsyn.get(esrc, []): 1426 same_prim_esrc = True 1427 break 1428 if not same_prim_esrc: 1429 curr_prim_esrc, elno = emsg.source[0] 1430 1431 if src in [curr_prim_esrc] + srefsyn.get(curr_prim_esrc, []): 1432 # The source file names match. 1433 # Insert at this position if the candidate's line 1434 # number preceeds that of the current message. 1435 src_match = True 1436 if lno < elno: 1437 ins_pos = i 1438 break 1439 elif src_match: 1440 # The sources no longer match, but were matched 1441 # before. This means the candidate line number is 1442 # after all existing, so insert at this position. 1443 ins_pos = i 1444 break 1445 1446 if ins_pos >= 0: 1447 break 1448 1449 if ins_pos >= 0: 1450 break 1451 1452 if ins_pos >= 0: 1453 return ins_pos, 1.0 1454 else: 1455 return last, 0.0 1456 1457 1458 def nplurals (self): 1459 """ 1460 Number of msgstr fields expected for plural messages. 1461 1462 Determined by the Plural-Forms header field; if this field 1463 is absent from the header, defaults to 1. 1464 1465 @returns: number of plurals 1466 @rtype: int 1467 """ 1468 1469 # Get nplurals string from the header. 1470 plforms = self._header.get_field_value("Plural-Forms") 1471 if not plforms: # no plural definition 1472 return 1 1473 nplustr = plforms.split(";")[0] 1474 1475 # Get the number of forms from the string. 1476 m = re.search(r"\d+", nplustr) 1477 if not m: # malformed nplurals 1478 return 1 1479 1480 return int(m.group(0)) 1481 1482 1483 def plural_index (self, number): 1484 """ 1485 Msgstr field index in plural messages for given number. 1486 1487 Determined by the Plural-Forms header field; if this field 1488 is absent from the header, defaults to 0. 1489 1490 @param number: the number to determine the plural form for 1491 @type number: int 1492 1493 @returns: index of msgstr field 1494 @rtype: int 1495 """ 1496 1497 # Get plural definition from the header. 1498 plforms = self._header.get_field_value("Plural-Forms") 1499 if not plforms: # no plural definition, assume 0 1500 return 0 1501 plustr = plforms.split(";")[1] 1502 1503 # Rebuild evaluation string only if changed to last invocation. 1504 if plustr != self._plustr: 1505 # Record raw plural definition for check on next call. 1506 self._plustr = plustr 1507 1508 # Prepare Python-evaluable string out of the raw definition. 1509 plustr = plustr[plustr.find("=") + 1:] # remove plural= part 1510 p = -1 1511 evalstr = "" 1512 while 1: 1513 p = plustr.find("?") 1514 if p < 0: 1515 evalstr += " " + plustr 1516 break 1517 cond = plustr[:p] 1518 plustr = plustr[p + 1:] 1519 cond = cond.replace("&&", " and ") 1520 cond = cond.replace("||", " or ") 1521 evalstr += "(" + cond + ") and " 1522 p = plustr.find(":") 1523 body = plustr[:p] 1524 plustr = plustr[p + 1:] 1525 evalstr += "\"" + body + "\" or " 1526 if not evalstr.strip(): 1527 evalstr = "0" 1528 1529 # Record the current evaluable definition. 1530 self._plustr_eval = evalstr 1531 1532 # Evaluate the definition. 1533 n = number # set eval context (plural definition uses n as variable) 1534 form = int(eval(self._plustr_eval)) 1535 1536 return form 1537 1538 1539 def plural_indices_single (self): 1540 """ 1541 Indices of the msgstr fields which are used for single number only. 1542 1543 @returns: msgstr indices used for single numbers 1544 @rtype: [int*] 1545 """ 1546 1547 # Get plural definition from the header. 1548 plforms = self._header.get_field_value("Plural-Forms") 1549 if not plforms: # no plural definition, assume 0 1550 return [0] 1551 plustr = plforms.split(";")[1] 1552 1553 lst = re.findall(r"\bn\s*==\s*\d+\s*\)?\s*\?\s*(\d+)", plustr) 1554 if not lst and re.search(r"\bn\s*(!=|>|<)\s*\d+\s*([^?]|$)", plustr): 1555 lst = ["0"] 1556 1557 return [int(x) for x in lst] 1558 1559 1560 def select_by_key (self, msgctxt, msgid, wobs=False): 1561 """ 1562 Select message from the catalog by the fields that define its key. 1563 1564 If matched, the message is returned as a single-element list, or 1565 an empty list when there is no match. This is so that the result 1566 of this method is in line with other C{select_*} methods. 1567 1568 Runtime complexity as that of L{find}. 1569 1570 @param msgctxt: the text of C{msgctxt} field 1571 @type msgctxt: string or C{None} 1572 @param msgid: the text of C{msgid} field 1573 @type msgid: string 1574 @param wobs: whether to include obsolete messages in selection 1575 @type wobs: bool 1576 1577 @returns: selected messages 1578 @rtype: [L{Message_base}*] 1579 """ 1580 1581 m = MessageUnsafe({"msgctxt" : msgctxt, "msgid" : msgid}) 1582 p = self.find(m, wobs) 1583 if p >= 0: 1584 return [self._messages[p]] 1585 else: 1586 return [] 1587 1588 1589 def select_by_key_match (self, msgctxt, msgid, exctxt=False, exid=True, 1590 case=True, wobs=False): 1591 """ 1592 Select messages from the catalog by matching key-defining fields. 1593 1594 Parameters C{msgctxt} and C{msgid} are either exact values, 1595 to be matched by equality against message fields, 1596 or regular expression strings. Parameters C{exctxt} and C{exid} 1597 control which kind of match it is, respectively. 1598 1599 Runtime complexity O(n), unless all matches are exact, 1600 when as that of L{find}. 1601 1602 @param msgctxt: the text or regex string of C{msgctxt} field 1603 @type msgctxt: string or C{None} 1604 @param msgid: the text or regex string of C{msgid} field 1605 @type msgid: string 1606 @param exctxt: C{msgctxt} is exact value if C{True}, regex if C{False} 1607 @type exctxt: bool 1608 @param exid: C{msgid} is exact value if C{True}, regex if C{False} 1609 @type exid: bool 1610 @param case: whether regex matching is case-sensitive 1611 @type case: bool 1612 @param wobs: whether to include obsolete messages in selection 1613 @type wobs: bool 1614 1615 @returns: selected messages 1616 @rtype: [L{Message_base}*] 1617 """ 1618 1619 if exctxt and exid: 1620 return self.select_by_key(msgctxt, msgid, wobs=wobs) 1621 1622 rxflags = re.U 1623 if not case: 1624 rxflags |= re.I 1625 if not exctxt: 1626 if msgctxt is not None: 1627 msgctxt_rx = re.compile(msgctxt, rxflags) 1628 else: 1629 # Force exact match if actually no context required. 1630 exctxt = True 1631 if not exid: 1632 msgid_rx = re.compile(msgid, rxflags) 1633 1634 selected_msgs = [] 1635 for msg in self._messages: 1636 if ( (wobs or not msg.obsolete) 1637 and ( (exid and msg.msgid == msgid) 1638 or (not exid and msgid_rx.search(msg.msgid))) 1639 and ( (exctxt and msg.msgctxt == msgctxt) 1640 or (not exctxt and msgctxt_rx.search(msg.msgctxt or ""))) 1641 ): 1642 selected_msgs.append(msg) 1643 1644 return selected_msgs 1645 1646 1647 def select_by_msgid (self, msgid, wobs=False): 1648 """ 1649 Select messages from the catalog by matching C{msgid} field. 1650 1651 Several messages may have the same C{msgid} field, due to different 1652 C{msgctxt} fields. Empty list is returned when there is no match. 1653 1654 Runtime complexity O(n). 1655 1656 @param msgid: the text of C{msgid} field 1657 @type msgid: string 1658 @param wobs: whether to include obsolete messages in selection 1659 @type wobs: bool 1660 1661 @returns: selected messages 1662 @rtype: [L{Message_base}*] 1663 """ 1664 1665 selected_msgs = [] 1666 for msg in self._messages: 1667 if (wobs or not msg.obsolete) and msg.msgid == msgid: 1668 selected_msgs.append(msg) 1669 1670 return selected_msgs 1671 1672 1673 def select_by_msgid_fuzzy (self, msgid, cutoff=0.6, wobs=False): 1674 """ 1675 Select messages from the catalog by near-matching C{msgid} field. 1676 1677 The C{cutoff} parameter determines the minimal admissible similarity 1678 (1.0 fo exact match). 1679 1680 The messages are returned ordered by decreasing similarity. 1681 1682 Runtime complexity O(n) * O(length(msgid)*avg(length(msgids))) 1683 (probably). 1684 1685 @param msgid: the text of C{msgid} field 1686 @type msgid: string 1687 @param cutoff: minimal similarity 1688 @type cutoff: float 1689 @param wobs: whether to include obsolete messages in selection 1690 @type wobs: bool 1691 1692 @returns: selected messages 1693 @rtype: [L{Message_base}*] 1694 """ 1695 1696 # Build dictionary of message keys by msgid; 1697 # there can be several keys per msgid, pack in a list. 1698 msgkeys = {} 1699 for msg in self._messages: 1700 if msg.obsolete and not wobs: 1701 # Skip obsolete messages if not explicitly included. 1702 continue 1703 if msg.msgid not in msgkeys: 1704 msgkeys[msg.msgid] = [] 1705 msgkeys[msg.msgid].append(msg.key) 1706 1707 # Get near-match msgids. 1708 near_msgids = difflib.get_close_matches(msgid, msgkeys, cutoff=cutoff) 1709 1710 # Collect messages per selected msgids. 1711 selected_msgs = [] 1712 for near_msgid in near_msgids: 1713 for msgkey in msgkeys[near_msgid]: 1714 selected_msgs.append(self._messages[self._msgpos[msgkey]]) 1715 1716 return selected_msgs 1717 1718 1719 def select_by_msgstr (self, msgstr0, wobs=False, lazy=False): 1720 """ 1721 Select messages from the catalog inversely, by their msgstr[0]. 1722 1723 Several messages may have the same C{msgstr[0]} field, 1724 so the return value is always a list of messages. 1725 Empty list is returned when there is no match. 1726 1727 Runtime complexity is O(n) if C{lazy} is C{False}. 1728 If C{lazy} is C{True}, complexity is O(n) for the first search, 1729 and then O(1) until next syncing of the catalog; 1730 if msgstr fields of some messages change in between, 1731 or messages are added or removed from the catalog, 1732 this is not seen until next syncing. 1733 1734 @param msgstr0: the text of C{msgstr[0]} field 1735 @type msgstr0: string 1736 @param wobs: whether to include obsolete messages in selection 1737 @type wobs: bool 1738 @param lazy: whether to assume msgstr are not modified between syncings 1739 @type lazy: bool 1740 1741 @returns: selected messages 1742 @rtype: [L{Message_base}*] 1743 """ 1744 1745 if not lazy: 1746 selected_msgs = {} 1747 for msg in self._messages: 1748 if (wobs or not msg.obsolete) and msg.msgstr[0] == msgstr0: 1749 selected_msgs.append(msg) 1750 else: 1751 if self._invmap is None: 1752 self._make_invmap() 1753 selected_msgs = self._invmap.get(msgstr0, []) 1754 if not wobs: 1755 selected_msgs = [x for x in selected_msgs if not x.obsolete] 1756 1757 return selected_msgs 1758 1759 1760 def encoding (self): 1761 """ 1762 Report encoding used when syncing the catalog. 1763 1764 Encoding is determined from C{Content-Type} header field. 1765 1766 It is not defined when the header will be examined, 1767 or if it will be reexamined when it changes. 1768 If you want to set encoding after the catalog has been 1769 opened, use L{set_encoding}. 1770 1771 @returns: the encoding name 1772 @rtype: string 1773 """ 1774 1775 return self._encoding 1776 1777 1778 def set_encoding (self, encoding): 1779 """ 1780 Set encoding used when syncing the catalog. 1781 1782 Encoding set by this method will later be readable by 1783 the L{encoding} method. 1784 This will also modify the catalog header C{Content-Type} field. 1785 1786 @param encoding: the encoding name 1787 @type encoding: string 1788 """ 1789 1790 self._encoding = encoding 1791 1792 ctval = "text/plain; charset=%s" % encoding 1793 self.header.set_field("Content-Type", ctval) 1794 1795 1796 def accelerator (self): 1797 """ 1798 Report characters used as accelerator markers in GUI messages. 1799 1800 Accelerator characters are determined by looking for certain 1801 header fields, in this order: C{Accelerator-Marker}, 1802 C{X-Accelerator-Marker}. 1803 In each field, several accelerator markers can be stated as 1804 comma-separated list, or there may be several fields; 1805 the union of all parsed markers is reported. 1806 1807 If empty set is returned, it was determined that there are 1808 no accelerator markers in the catalog; 1809 if C{None}, that there is no determination about markers. 1810 1811 It is not defined when the header will be examined, 1812 or if it will be reexamined when it changes. 1813 If you want to set accelerator markers after the catalog has been 1814 opened, use L{set_accelerator}. 1815 1816 @returns: accelerator markers 1817 @rtype: set(string*) or C{None} 1818 """ 1819 1820 if self._accels_determined: 1821 return self._accels 1822 1823 accels = None 1824 self._accels_determined = True 1825 1826 for fname in ( 1827 "Accelerator-Marker", 1828 "X-Accelerator-Marker", 1829 ): 1830 fields = self._header.select_fields(fname) 1831 for fname, fval in fields: 1832 if accels is None: 1833 accels = set() 1834 accels.update([x.strip() for x in fval.split(",")]) 1835 if accels: 1836 accels.discard("") 1837 1838 self._accels = accels 1839 return accels 1840 1841 1842 def set_accelerator (self, accels): 1843 """ 1844 Set accelerator markers that can be expected in messages. 1845 1846 Accelerator markers set by this method will later be readable by 1847 the L{accelerator} method. This will not modify the catalog header 1848 in any way; if that is desired, it must be done manually by 1849 manipulating the header fields. 1850 1851 If C{accels} is given as C{None}, it means the accelerator markers 1852 are undetermined; if empty, that there are no markers in messages. 1853 1854 @param accels: accelerator markers 1855 @type accels: sequence of strings or C{None} 1856 """ 1857 1858 if accels is not None: 1859 self._accels = set(accels) 1860 self._accels.discard("") 1861 else: 1862 self._accels = None 1863 self._accels_determined = True 1864 1865 1866 def markup (self): 1867 """ 1868 Report what types of markup can be expected in messages. 1869 1870 Markup types are determined by looking for some header fields, 1871 which state markup types as short symbolic names, 1872 e.g. "html", "docbook", "mediawiki", etc. 1873 The header fields are tried in this order: C{Text-Markup}, 1874 C{X-Text-Markup}. 1875 In each field, several markup types can be stated as 1876 comma-separated list. 1877 If there are several fields, it is undefined from which one 1878 markup names are collected. 1879 Markup names are always reported in lower-case, regardless 1880 of the original casing used in the header. 1881 See L{set_markup} for list of markup types currently observed 1882 by various Pology modules to influence processing behavior. 1883 1884 If empty set is returned, it was determined that there is 1885 no markup in the catalog; 1886 if C{None}, that there is no determination about markup. 1887 1888 It is not defined when the header will be examined, 1889 or if it will be reexamined when it changes. 1890 If you want to set markup types after the catalog has been 1891 opened, use L{set_markup} method. 1892 1893 @returns: markup names 1894 @rtype: set(string*) or C{None} 1895 """ 1896 1897 if self._mtypes_determined: 1898 return self._mtypes 1899 1900 mtypes = None 1901 self._mtypes_determined = True 1902 1903 for fname in ( 1904 "Text-Markup", 1905 "X-Text-Markup", 1906 ): 1907 fval = self._header.get_field_value(fname) 1908 if fval is not None: 1909 mtypes = set([x.strip().lower() for x in fval.split(",")]) 1910 mtypes.discard("") 1911 1912 self._mtypes = mtypes 1913 return mtypes 1914 1915 1916 def set_markup (self, mtypes): 1917 """ 1918 Set markup types that can be expected in messages. 1919 1920 Markup types set by this method will later be readable by 1921 the L{markup} method. This will not modify the catalog header 1922 in any way; if that is desired, it must be done manually by 1923 manipulating the header fields. 1924 1925 If C{mtypes} is given as C{None}, it means the markup types 1926 are undetermined; if empty, that there is no markup in messages. 1927 1928 The following markup types are currently used by various parts 1929 of Pology to influence behavior on processing: 1930 - C{html}: HTML 4.01 1931 - C{qtrich}: Qt rich-text, (almost) a subset of HTML 1932 - C{kuit}: UI semantic markup in KDE4 1933 - C{kde4}: markup in KDE4 UI POs, a mix of Qt rich-text and KUIT 1934 - C{docbook4}: Docbook 4.x markup, in documentation POs 1935 - C{xmlents}: only XML-like entities, no other formal markup 1936 1937 @param mtypes: markup types 1938 @type mtypes: sequence of strings or C{None} 1939 """ 1940 1941 if mtypes is not None: 1942 self._mtypes = set([x.lower() for x in mtypes]) 1943 else: 1944 self._mtypes = None 1945 self._mtypes_determined = True 1946 1947 1948 def language (self): 1949 """ 1950 Report language of the translation. 1951 1952 Language is determined by looking for the C{Language} header field. 1953 If this field is present, it should contain the language code 1954 in line with GNU C library locales, e.g. C{pt} for Portuguese, 1955 or C{pt_BR} for Brazilian Portuguese. 1956 If the field is not present, language is considered undetermined, 1957 and C{None} is returned. 1958 1959 It is not defined when the header will be examined, 1960 or if it will be reexamined when it changes (most probably not). 1961 If you want to set language after the catalog has been 1962 opened, use L{set_language} method. 1963 1964 @returns: language code 1965 @rtype: string or C{None} 1966 """ 1967 1968 if self._lang_determined: 1969 return self._lang 1970 1971 lang = None 1972 self._lang_determined = True 1973 1974 fval = self._header.get_field_value("Language") 1975 if fval: 1976 lang = fval.strip() 1977 1978 self._lang = lang 1979 return lang 1980 1981 1982 def set_language (self, lang): 1983 """ 1984 Set language of the translation. 1985 1986 Language set by this method will later be readable by 1987 the L{language} method. This will not modify the catalog header 1988 in any way; if that is desired, it must be done manually by 1989 manipulating the header fields. 1990 1991 If C{lang} is given as C{None}, it means the language is undetermined. 1992 If it is given as empty string, it means the language is deliberately 1993 considered unknown. 1994 1995 @param lang: language code 1996 @type lang: string or C{None} 1997 """ 1998 1999 if lang is not None: 2000 self._lang = str(lang) 2001 else: 2002 self._lang = None 2003 self._lang_determined = True 2004 2005 2006 def environment (self): 2007 """ 2008 Report environments which the catalog is part of. 2009 2010 Sometimes the language alone is not enough to determine all 2011 the non-technical aspects of translation. 2012 For example, in a given language but different translation domains, 2013 one translator may decide to use one of the two synonyms naming a 2014 concept, and the other translator the other synonym. 2015 I{Environments} are a way to specify such sets of choices, 2016 so that automatic tools (e.g. terminology checker) can 2017 detect how to process a given catalog. 2018 2019 An environment can represent anything. 2020 It may be a single translator, who applies own set of choices 2021 to all the catalogs under own maintenance; 2022 it may be a translation project, with many cooperating translators; 2023 and so on. 2024 Each environment is named by an alphanumeric keyword 2025 (such as normalized project name, translator's name, etc.), 2026 and should be unique within a given language. 2027 2028 Environments are read from one of the following header fieldsE{:} 2029 C{Environment}, C{X-Environment}. 2030 The value the field should be comma-separated list of 2031 environment keywords. 2032 If there are several environment fields, it is undefined 2033 from which the environments are read. 2034 2035 If more than one environment is stated, then wherever the conventions 2036 of two environments conflict, the environment mentioned later 2037 in the list should take precedence. 2038 For example, environment list such as C{"footp, jdoe"} 2039 would mean to apply conventions of FOO translation project, 2040 ammended by that of translator Johnas Doemann. 2041 2042 It there is no environment header field, C{None} is reported. 2043 Empty list is reported if such field exists, but its value is empty. 2044 2045 It is not defined when the header will be examined, 2046 or if it will be reexamined when it changes (most probably not). 2047 if you want to set environments after the catalog has been 2048 opened, use L{set_environment} method. 2049 2050 @returns: environment keywords 2051 @rtype: [string*] or C{None} 2052 """ 2053 2054 if self._envs_determined: 2055 return self._envs 2056 2057 envs = None 2058 self._envs_determined = True 2059 2060 for fname in ( 2061 "Environment", 2062 "X-Environment", 2063 ): 2064 fval = self._header.get_field_value(fname) 2065 if fval is not None: 2066 envs = [x.strip().lower() for x in fval.split(",")] 2067 while "" in envs: 2068 envs.remove("") 2069 break 2070 2071 self._envs = envs 2072 return envs 2073 2074 2075 def set_environment (self, envs): 2076 """ 2077 Set environments which the catalog is part of. 2078 2079 Environments set by this method will later be readable by 2080 the L{environment} method. This will not modify the catalog header 2081 in any way; if that is desired, it must be done manually by 2082 manipulating the header fields. 2083 2084 If C{envs} is given as C{None}, it means that the environments 2085 are undetermined; if empty, the catalog belongs to no environment. 2086 2087 @param envs: environment keywords 2088 @type envs: sequence of strings or C{None} 2089 """ 2090 2091 if envs is not None: 2092 self._envs = set([x.lower() for x in envs]) 2093 else: 2094 self._envs = None 2095 self._envs_determined = True 2096 2097 2098 def wrapping (self): 2099 """ 2100 Report wrapping policy for message fields. 2101 2102 Long text fields in messages (C{msgid}, C{msgstr}, etc.) may 2103 be wrapped in different ways, as wrapping does not influence 2104 their semantics. 2105 (This is unlike translator and extracted comments, which are 2106 never wrapped, because division into lines may be significant.) 2107 PO processing tools will typically offer wrapping options, 2108 but it may be more convenient to have wrapping policy 2109 bound to the catalog, which tools respect unless overridden. 2110 2111 The following header fields are checked for wrapping policy, 2112 in given order: C{Wrapping}, C{X-Wrapping}. 2113 Wrapping policy (i.e. value of these header fields) is 2114 an unordered comma-separated list of wrapping keywords. 2115 See L{select_field_wrapper<wrap.select_field_wrapper>} 2116 for possible keywords. 2117 If no wrapping policy field is found in the header, 2118 C{None} is returned. 2119 If several wrapping policy fields are present, 2120 it is undefined which one is taken into account. 2121 2122 It is not defined when the header will be examined, 2123 or if it will be reexamined when it changes (most probably not). 2124 If you want to set wrapping after the catalog has been 2125 opened, use L{set_wrapping} method. 2126 2127 @returns: wrapping keywords 2128 @rtype: (string...) or C{None} 2129 """ 2130 2131 if self._wrap_determined: 2132 return self._wrapkw 2133 2134 wrapkw = None 2135 self._wrap_determined = True 2136 2137 for fname in ( 2138 "Wrapping", 2139 "X-Wrapping", 2140 ): 2141 fval = self._header.get_field_value(fname) 2142 if fval is not None: 2143 wrapkw = [x.strip().lower() for x in fval.split(",")] 2144 wrapkw = tuple(sorted(wrapkw)) 2145 break 2146 2147 self._wrapkw = wrapkw 2148 self._wrapf = select_field_wrapper(wrapkw) 2149 2150 return self._wrapkw 2151 2152 2153 def set_wrapping (self, wrapkw): 2154 """ 2155 Set wrapping policy for message fields. 2156 2157 Wrapping policy set by this method will later be readable by 2158 the L{wrapping} method. This will not modify the catalog header 2159 in any way; if that is desired, it must be done manually by 2160 manipulating the header fields. 2161 2162 Wrapping policy is a sequence of keywords. 2163 See L{select_field_wrapper<wrap.select_field_wrapper>} 2164 for possible keywords. 2165 If C{None} is given instead, it is passed directly to 2166 L{select_field_wrapper<wrap.select_field_wrapper>}, 2167 which will construct default wrapper. 2168 2169 @param wrapkw: wrapping policy 2170 @type wrapkw: [string...] or C{None} 2171 """ 2172 2173 self._wrapkw = tuple(sorted(wrapkw)) if wrapkw is not None else None 2174 self._wrapf = select_field_wrapper(wrapkw) 2175 self._wrap_determined = True 2176 2177 2178 def wrapf (self): 2179 """ 2180 Get wrapping function used for message fields. 2181 2182 Wrapping function is determined based on wrapping policy 2183 (see L{wrapping}, L{set_wrapping}). 2184 Wrapping function returned by this method is suitable as 2185 C{wrapf} parameter in methods of C{Message} objects. 2186 2187 @returns: wrapping function 2188 @rtype: (string, string, string?)->[string] 2189 2190 @see: L{wrap_field<wrap.wrap_field>} 2191 """ 2192 2193 self.wrapping() 2194 return self._wrapf 2195 2196 2197 def messages_by_source (self): 2198 """ 2199 Get messages grouped as lists by source. 2200 2201 All messages sharing the same primary source file 2202 (their first source reference) are grouped 2203 and filed under that source file path. 2204 Grouping is represented by list of tuples of 2205 (source, list of messages), with both sources and 2206 messages within partial lists ordered by appearance. 2207 2208 @return: messages grouped by sources 2209 @rtype: [(string, [L{Message_base}])] 2210 """ 2211 2212 msgs_by_src = {} 2213 sources = [] 2214 for msg in self._messages: 2215 src = msg.source and msg.source[0][0] or "" 2216 if src not in msgs_by_src: 2217 msgs_by_src[src] = [] 2218 sources.append(src) 2219 msgs_by_src[src].append(msg) 2220 2221 return [(x, msgs_by_src[x]) for x in sources] 2222 2223 2224 def sort_by_source (self): 2225 """ 2226 Sort messages in catalog by source references. 2227 2228 Source references within each message are sorted too, 2229 before messages are sorted by source references. 2230 2231 If any message changed its position due to sorting, 2232 L{sync_map} is called at the end. 2233 """ 2234 2235 # Sort source references within messages. 2236 for msg in self._messages: 2237 sorted_source = sorted(msg.source, 2238 key=lambda s: (s[0].lower(), s[1])) 2239 if self._monitored: 2240 msg.source = Monlist(list(map(Monpair, sorted_source))) 2241 else: 2242 msg.source = sorted_source 2243 2244 sorted_messages = sorted(self._messages, 2245 key=lambda m: [(s[0].lower(), s[1]) 2246 for s in m.source[:1]]) 2247 2248 any_moved = False 2249 for i in range(len(self._messages)): 2250 if sorted_messages[i] is not self._messages[i]: 2251 any_moved = True 2252 break 2253 if any_moved: 2254 self._messages = sorted_messages 2255 self.sync_map() 2256 2257 2258 def update_header (self, project=None, title=None, 2259 copyright=None, license=None, 2260 name=None, email=None, teamemail=None, 2261 langname=None, langcode=None, 2262 encoding=None, ctenc=None, 2263 plforms=None, poeditor=None): 2264 """ 2265 Update catalog header. 2266 2267 If a piece of information is not given (i.e. C{None}), 2268 the corresponding header field is left unmodified. 2269 If it is given as empty string, the corresponding header field 2270 is removed. 2271 PO revision date is updated always, to current date. 2272 2273 Some fields (as noted in parameter descriptions) are expanded 2274 on variables by applying the 2275 L{expand_vars<pology.resolve.expand_vars>} function. 2276 For example:: 2277 2278 title="Translation of %project into %langname." 2279 2280 The following variables are available: 2281 - C{%basename}: PO file base name 2282 - C{%poname}: PO file base name without .po extension 2283 - C{%project}: value of C{project} parameter (if not C{None}/empty) 2284 - C{%langname}: value of C{langname} parameter (if not C{None}/empty) 2285 - C{%langcode}: value of C{langcode} parameter (if not C{None}/empty) 2286 2287 @param project: project name 2288 @type project: string 2289 @param title: translation title (expanded on variables) 2290 @type title: string 2291 @param copyright: copyright notice (expanded on variables) 2292 @type copyright: string 2293 @param license: license notice (expanded on variables) 2294 @type license: string 2295 @param name: translator's name 2296 @type name: string 2297 @param email: translator's email address 2298 @type email: string 2299 @param teamemail: language team's email address 2300 @type teamemail: string 2301 @param langname: full language name 2302 @type langname: string 2303 @param langcode: language code 2304 @type langcode: string 2305 @param encoding: text encoding 2306 @type encoding: string 2307 @param ctenc: content transfer encoding 2308 @type ctenc: string 2309 @param plforms: plural forms expression 2310 @type plforms: string 2311 @param poeditor: translator's PO editor 2312 @type poeditor: string 2313 2314 @returns: reference to header 2315 """ 2316 2317 varmap = {} 2318 varmap["basename"] = os.path.basename(self.filename) 2319 varmap["poname"] = self.name 2320 if project: 2321 varmap["project"] = project 2322 if langname: 2323 varmap["langname"] = langname 2324 if langcode: 2325 varmap["langcode"] = langcode 2326 varhead="%" 2327 2328 hdr = self.header 2329 2330 if title: 2331 title = expand_vars(title, varmap, varhead) 2332 hdr.title[:] = [str(title)] 2333 elif title == "": 2334 hdr.title[:] = [] 2335 2336 if copyright: 2337 copyright = expand_vars(copyright, varmap, varhead) 2338 hdr.copyright = str(copyright) 2339 elif copyright == "": 2340 hdr.copyright = None 2341 2342 if license: 2343 license = expand_vars(license, varmap, varhead) 2344 hdr.license = str(license) 2345 elif license == "": 2346 hdr.license = None 2347 2348 if project: 2349 hdr.set_field("Project-Id-Version", str(project)) 2350 elif project == "": 2351 hdr.remove_field("Project-Id-Version") 2352 2353 hdr.set_field("PO-Revision-Date", format_datetime()) 2354 2355 if name or email: 2356 if name and email: 2357 tr_ident = "%s <%s>" % (name, email) 2358 elif name: 2359 tr_ident = "%s" % name 2360 else: 2361 tr_ident = "<%s>" % email 2362 2363 # Remove author placeholder. 2364 for i in range(len(hdr.author)): 2365 if "FIRST AUTHOR" in hdr.author[i]: 2366 hdr.author.pop(i) 2367 break 2368 2369 # Look for current author in the comments, 2370 # to update only years if present. 2371 cyear = time.strftime("%Y") 2372 acfmt = "%s, %s." 2373 new_author = True 2374 for i in range(len(hdr.author)): 2375 if tr_ident in hdr.author[i]: 2376 # Parse the current list of years. 2377 years = re.findall(r"\b(\d{2,4})\s*[,.]", hdr.author[i]) 2378 if cyear not in years: 2379 years.append(cyear) 2380 years.sort() 2381 hdr.author[i] = acfmt % (tr_ident, ", ".join(years)) 2382 new_author = False 2383 break 2384 if new_author: 2385 hdr.author.append(acfmt % (tr_ident, cyear)) 2386 2387 hdr.set_field("Last-Translator", str(tr_ident)) 2388 2389 elif name == "" or email == "": 2390 hdr.remove_field("Last-Translator") 2391 2392 if langname: 2393 tm_ident = None 2394 if langname and teamemail: 2395 tm_ident = "%s <%s>" % (langname, teamemail) 2396 elif langname: 2397 tm_ident = langname 2398 hdr.set_field("Language-Team", str(tm_ident)) 2399 elif langname == "": 2400 hdr.remove_field("Language-Team") 2401 2402 if langcode: 2403 hdr.set_field("Language", str(langcode), after="Language-Team") 2404 elif langcode == "": 2405 hdr.remove_field("Language") 2406 2407 if encoding: 2408 ctval = "text/plain; charset=%s" % encoding 2409 hdr.set_field("Content-Type", ctval) 2410 elif encoding == "": 2411 hdr.remove_field("Content-Type") 2412 2413 if ctenc: 2414 hdr.set_field("Content-Transfer-Encoding", str(ctenc)) 2415 elif ctenc == "": 2416 hdr.remove_field("Content-Transfer-Encoding") 2417 2418 if plforms: 2419 hdr.set_field("Plural-Forms", str(plforms)) 2420 elif plforms == "": 2421 hdr.remove_field("Plural-Forms") 2422 2423 if poeditor: 2424 hdr.set_field("X-Generator", str(poeditor)) 2425 elif poeditor == "": 2426 hdr.remove_field("X-Generator") 2427 2428 return hdr 2429 2430 2431 def detect_renamed_sources (self, cat, minshare=0.7): 2432 """ 2433 Heuristically determine possible renamings of source files 2434 from this catalog based on source files in the other catalog. 2435 2436 To determine the possibility that the source file A from this catalog 2437 has been renamed into source file B in the other catalog C{cat}, 2438 primarily the share of common messages to A and B is considered. 2439 The minimum needed commonality can be given by C{minshare} parameter. 2440 2441 When a source file from this catalog is directly mentioned in 2442 the other catalog, it is immediatelly considered to have 2443 no possible renamings. 2444 2445 The return value is a dictionary in which the key is 2446 the source file and the value is the list of its possible 2447 renamed counterparts. 2448 The renaming list is never empty, i.e. if no renamings 2449 were detected for a given source file, that source file 2450 will not be present in the dictionary. 2451 The dictionary is fully symmetric: if source file B is in 2452 the renaming list of file A, then there will be 2453 an entry for file B with A in its renaming list 2454 (even when B is comming from the other catalog). 2455 2456 Instead of a single other catalog to test against, 2457 a sequence of several other catalogs can be given. 2458 2459 @param cat: catalog against which to test for renamings 2460 @type cat: Catalog or [Catalog*] 2461 @param minshare: the minimum commonality between two source files 2462 to consider them as possible renaming pair (0.0-1.0) 2463 @type minshare: float 2464 2465 @returns: the renaming dictionary 2466 @rtype: {string: [string*]*} 2467 """ 2468 2469 renamings = {} 2470 2471 # Collect all own sources, to avoid matching for them. 2472 ownfs = set() 2473 for msg in self._messages: 2474 for src, lno in msg.source: 2475 ownfs.add(src) 2476 2477 if isinstance(cat, Catalog): 2478 cats = [cat] 2479 else: 2480 cats = cat 2481 2482 for ocat in cats: 2483 if self is ocat: 2484 continue 2485 2486 fcnts = {} 2487 ccnts = {} 2488 for msg in self._messages: 2489 omsg = ocat.get(msg) 2490 if omsg is None: 2491 continue 2492 for src, lno in msg.source: 2493 if src not in fcnts: 2494 fcnts[src] = 0.0 2495 ccnts[src] = {} 2496 # Weigh each message disproportionally to the number of 2497 # files it appears in (i.e. the sum of counts == 1). 2498 fcnts[src] += 1.0 / len(msg.source) 2499 counted = {} 2500 for osrc, olno in omsg.source: 2501 if osrc not in ownfs and osrc not in counted: 2502 if osrc not in ccnts[src]: 2503 ccnts[src][osrc] = 0.0 2504 ccnts[src][osrc] += 1.0 / len(omsg.source) 2505 counted[osrc] = True 2506 2507 # Select match groups. 2508 fuzzies = {} 2509 for src, fcnt in sorted(fcnts.items()): 2510 shares = [] 2511 for osrc, ccnt in sorted(ccnts[src].items()): 2512 share = ccnt / (fcnt + 1.0) # tip a bit to avoid fcnt of 0.x 2513 if share >= minshare: 2514 shares.append((osrc, share)) 2515 if shares: 2516 shares.sort(key=lambda x: x[1]) # not necessary atm 2517 fuzzies[src] = [f for f, s in shares] 2518 2519 # Update the dictionary of renamings. 2520 for src, fuzzsrcs in sorted(fuzzies.items()): 2521 group = [src] + fuzzsrcs 2522 for src in group: 2523 if src not in renamings: 2524 renamings[src] = [] 2525 for osrc in group: 2526 if src != osrc and osrc not in renamings[src]: 2527 renamings[src].append(osrc) 2528 if not renamings[src]: 2529 renamings.pop(src) 2530 2531 return renamings 2532