pology/pology/catalog.py

0001 # -*- coding: UTF-8 -*-
0002
0003 """
0004 Collection of PO entries.
0005
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009
0010 import copy
0011 import difflib
0012 import os
0013 import re
0014 import tempfile
0015 import time
0016 import types
0017
0018 from pology import PologyError, _, n_
0019 from pology.header import Header, format_datetime
0020 from pology.message import Message as MessageMonitored
0021 from pology.message import MessageUnsafe as MessageUnsafe
0022 from pology.escape import escape_c as escape
0023 from pology.escape import unescape_c as unescape
0024 from pology.fsops import mkdirpath
0025 from pology.monitored import Monitored
0026 from pology.resolve import expand_vars
0027 from pology.wrap import select_field_wrapper
0028
0029
0030 class CatalogSyntaxError (PologyError):
0031     """
0032     Exception for errors in catalog syntax.
0033
0034     This exception is normally raised when parsing a catalog,
0035     e.g. on invalid syntax or non-decodable characters.
0036     """
0037
0038     pass
0039
0040
0041 def _parse_quoted (s):
0042
0043     sp = s[s.index("\"") + 1:s.rindex("\"")]
0044     sp = unescape(sp);
0045     return sp
0046
0047
0048 class _MessageDict:
0049
0050     def __init__ (self, lcache=True):
0051
0052         self.manual_comment = []
0053         self.auto_comment = []
0054         self.source = []
0055         self.flag = []
0056         self.obsolete = False
0057         self.msgctxt_previous = []
0058         self.msgid_previous = []
0059         self.msgid_plural_previous = []
0060         self.msgctxt = []
0061         self.msgid = []
0062         self.msgid_plural = []
0063         self.msgstr = []
0064         self.refline = -1
0065         self.refentry = -1
0066
0067         if lcache:
0068             self._lines_all = []
0069             self._lines_manual_comment = []
0070             self._lines_auto_comment = []
0071             self._lines_source = []
0072             self._lines_flag = []
0073             self._lines_msgctxt_previous = []
0074             self._lines_msgid_previous = []
0075             self._lines_msgid_plural_previous = []
0076             self._lines_msgctxt = []
0077             self._lines_msgid = []
0078             self._lines_msgid_plural = []
0079             self._lines_msgstr = []
0080
0081
0082 def _read_lines_and_encoding (file, filename):
0083
0084     fstr = file.read()
0085     # Determine line ending.
0086     maxlno = 0
0087     for clend in (b"\r\n", b"\n", b"\r"): # "\r\n" should be checked first
0088         lno = len(fstr.split(clend))
0089         if maxlno < lno:
0090             maxlno = lno
0091             lend = clend
0092     lines = [x + b"\n" for x in fstr.split(lend)]
0093     if lines[-1] == b"\n":
0094         lines.pop()
0095
0096     enc = None
0097     enc_rx = re.compile(rb"Content-Type:.*charset=(.+?)\\n", re.I)
0098     for line in lines:
0099         if line.strip().startswith(b"#:"):
0100             break
0101         m = enc_rx.search(line)
0102         if m:
0103             enc = m.group(1).strip()
0104             if not enc or enc == b"CHARSET": # no encoding given
0105                 enc = None
0106             break
0107     if enc is None:
0108         enc = b"UTF-8" # fall back to UTF-8 if encoding not found
0109     enc = enc.decode()
0110
0111     enclines = []
0112     lno = 0
0113     for line in lines:
0114         lno += 1
0115         try:
0116             encline = line.decode(enc)
0117         except UnicodeDecodeError as e:
0118             raise CatalogSyntaxError(
0119                 _("@info",
0120                   "Text decoding failure at %(file)s:%(line)d:%(col)d "
0121                   "under assumed encoding '%(enc)s'.",
0122                   file=filename, line=lno, col=e.start, enc=enc))
0123         enclines.append(encline)
0124
0125     return enclines, enc
0126
0127
0128 def _parse_po_file (file, MessageType=MessageMonitored,
0129                     headonly=False, lcache=True):
0130
0131     if isinstance(file, str):
0132         filename = file
0133         file = open(filename, "rb")
0134         close_later = True
0135     else:
0136         if hasattr(file, "name"):
0137             filename = file.name
0138         else:
0139             filename = _("@item generic name for the source or destination "
0140                          "of data being read or written",
0141                          "&lt;stream&gt;").resolve("none")
0142         close_later = False
0143     lines, fenc = _read_lines_and_encoding(file, filename)
0144     if close_later:
0145         file.close()
0146
0147     ctx_modern, ctx_obsolete, \
0148     ctx_previous, ctx_current, \
0149     ctx_none, ctx_msgctxt, ctx_msgid, ctx_msgid_plural, ctx_msgstr = list(range(9))
0150
0151     messages1 = list()
0152     lno = 0
0153     eno = 0
0154
0155     class Namespace: pass
0156     loc = Namespace()
0157     loc.lno = 0
0158     loc.tail = None
0159     loc.msg = _MessageDict(lcache)
0160     loc.life_context = ctx_modern
0161     loc.field_context = ctx_none
0162     loc.age_context = ctx_current
0163
0164     # The message has been completed by the previous line if the context just
0165     # switched away from ctx_msgstr;
0166     # call whenever context switch happens, *before* assigning new context.
0167     nlines = len(lines)
0168     def try_finish ():
0169         if loc.field_context == ctx_msgstr:
0170             messages1.append(loc.msg)
0171             loc.msg = _MessageDict(lcache)
0172             loc.field_context = ctx_none
0173             # In header-only mode, the first message read is the header.
0174             # Compose the tail of this and rest of the lines, and
0175             # set lno to nlines for exit.
0176             if headonly:
0177                 # If not at end of file, current line is part of
0178                 # first message and should be retained in the tail.
0179                 offset = loc.lno < nlines and 1 or 0
0180                 loc.tail = "".join(lines[loc.lno - offset:])
0181                 loc.lno = nlines
0182
0183     while loc.lno < nlines: # sentry for last entry
0184         line_raw = lines[lno]
0185         loc.lno += 1
0186         lno = loc.lno # shortcut
0187         line = line_raw.strip()
0188         if not line:
0189             continue
0190
0191         string_follows = True
0192         loc.life_context = ctx_modern
0193         loc.age_context = ctx_current
0194
0195         if line.startswith("#"):
0196
0197             if 0: pass
0198
0199             elif line.startswith("#~|"):
0200                 line = line[3:].lstrip()
0201                 loc.age_context = ctx_previous
0202
0203             elif line.startswith("#~"):
0204                 line = line[2:].lstrip()
0205                 loc.life_context = ctx_obsolete
0206
0207             elif line.startswith("#|"):
0208                 line = line[2:].lstrip()
0209                 loc.age_context = ctx_previous
0210
0211             elif line.startswith("#:"):
0212                 try_finish()
0213                 string_follows = False
0214                 for srcref in line[2:].split(" "):
0215                     srcref = srcref.strip()
0216                     if srcref:
0217                         lst = srcref.split(":", 1)
0218                         if len(lst) == 2:
0219                             file = lst[0]
0220                             try:
0221                                 line = int(lst[1])
0222                                 assert line > 0
0223                             except:
0224                                 file = srcref
0225                                 line = -1
0226                             loc.msg.source.append((file, line))
0227                         else:
0228                             loc.msg.source.append((srcref, -1))
0229
0230             elif line.startswith("#,"):
0231                 try_finish()
0232                 string_follows = False
0233                 for flag in line[2:].split(","):
0234                     flag = flag.strip()
0235                     if flag:
0236                         loc.msg.flag.append(flag)
0237
0238             elif line.startswith("#."):
0239                 try_finish()
0240                 string_follows = False
0241                 loc.msg.auto_comment.append(line[2:].lstrip())
0242
0243             elif line.startswith("#"):
0244                 try_finish()
0245                 string_follows = False
0246                 loc.msg.manual_comment.append(line[2:].lstrip())
0247
0248             else:
0249                 # Cannot reach, all unknown comments treated as manual above.
0250                 raise CatalogSyntaxError(
0251                     _("@info",
0252                       "Unknown comment type at %(file)s:%(line)d.",
0253                       file=filename, line=lno))
0254
0255         if line and string_follows: # for starting fields
0256             if 0: pass
0257
0258             elif line.startswith("msgctxt"):
0259                 # TODO: Assert context.
0260                 try_finish()
0261                 loc.field_context = ctx_msgctxt
0262                 line = line[7:].lstrip()
0263
0264             elif line.startswith("msgid_plural"):
0265                 # TODO: Assert context.
0266                 # No need for try_finish(), msgid_plural cannot start message.
0267                 loc.field_context = ctx_msgid_plural
0268                 line = line[12:].lstrip()
0269
0270             elif line.startswith("msgid"):
0271                 # TODO: Assert context.
0272                 try_finish()
0273                 if loc.life_context == ctx_obsolete:
0274                     loc.msg.obsolete = True
0275                 loc.field_context = ctx_msgid
0276                 if loc.age_context == ctx_current:
0277                     loc.msg.refline = lno
0278                     loc.msg.refentry = eno
0279                     eno += 1
0280                 line = line[5:].lstrip()
0281
0282             elif line.startswith("msgstr"):
0283                 # TODO: Assert context.
0284                 loc.field_context = ctx_msgstr
0285                 line = line[6:].lstrip()
0286                 msgstr_i = 0
0287                 if line.startswith("["):
0288                     line = line[1:].lstrip()
0289                     llen = len(line)
0290                     p = 0
0291                     while p < llen and line[p].isdigit():
0292                         p += 1
0293                     if p == 0:
0294                         raise CatalogSyntaxError(
0295                             _("@info",
0296                               "Malformed '%(field)s' ordinal "
0297                               "at %(file)s:%(line)d.",
0298                               file=filename, line=lno, field="msgstr"))
0299                     msgstr_i = int(line[:p])
0300                     line = line[p:].lstrip()
0301                     if line.startswith("]"):
0302                         line = line[1:].lstrip()
0303                     else:
0304                         raise CatalogSyntaxError(
0305                             _("@info",
0306                               "Malformed '%(field)s' ordinal "
0307                               "at %(file)s:%(line)d.",
0308                               file=filename, line=lno, field="msgstr"))
0309                 # Add missing msgstr entries.
0310                 for i in range(len(loc.msg.msgstr), msgstr_i + 1):
0311                     loc.msg.msgstr.append([])
0312
0313             elif not line.startswith("\""):
0314                 raise CatalogSyntaxError(
0315                     _("@info",
0316                       "Unknown field name at %(file)s:%(line)d.",
0317                       file=filename, line=lno))
0318
0319         if line and string_follows: # for continuing fields
0320             if line.startswith("\""):
0321                 s = _parse_quoted(line)
0322                 if loc.age_context == ctx_previous:
0323                     if loc.field_context == ctx_msgctxt:
0324                         loc.msg.msgctxt_previous.append(s)
0325                     elif loc.field_context == ctx_msgid:
0326                         loc.msg.msgid_previous.append(s)
0327                     elif loc.field_context == ctx_msgid_plural:
0328                         loc.msg.msgid_plural_previous.append(s)
0329                 else:
0330                     if loc.field_context == ctx_msgctxt:
0331                         loc.msg.msgctxt.append(s)
0332                     elif loc.field_context == ctx_msgid:
0333                         loc.msg.msgid.append(s)
0334                     elif loc.field_context == ctx_msgid_plural:
0335                         loc.msg.msgid_plural.append(s)
0336                     elif loc.field_context == ctx_msgstr:
0337                         loc.msg.msgstr[msgstr_i].append(s)
0338             else:
0339                 raise CatalogSyntaxError(
0340                     _("@info",
0341                       "Expected string continuation at %(file)s:%(line)d.",
0342                       file=filename, line=lno))
0343
0344         # Update line caches.
0345         if lcache:
0346             loc.msg._lines_all.append(line_raw)
0347             if 0: pass
0348             elif line_raw.startswith("#:"):
0349                 loc.msg._lines_source.append(line_raw)
0350             elif line_raw.startswith("#,"):
0351                 loc.msg._lines_flag.append(line_raw)
0352             elif line_raw.startswith("#."):
0353                 loc.msg._lines_auto_comment.append(line_raw)
0354             elif line_raw.startswith("#") and line_raw[1:2] not in ("~", "|"):
0355                 loc.msg._lines_manual_comment.append(line_raw)
0356             elif loc.age_context == ctx_previous:
0357                 if loc.field_context == ctx_msgctxt:
0358                     loc.msg._lines_msgctxt_previous.append(line_raw)
0359                 elif loc.field_context == ctx_msgid:
0360                     loc.msg._lines_msgid_previous.append(line_raw)
0361                 elif loc.field_context == ctx_msgid_plural:
0362                     loc.msg._lines_msgid_plural_previous.append(line_raw)
0363                 else:
0364                     raise PologyError(
0365                         _("@info",
0366                           "Internal problem (%(id)d) at %(file)s:%(line)d.",
0367                           id=11, file=filename, line=lno))
0368             elif loc.age_context == ctx_current:
0369                 if loc.field_context == ctx_msgctxt:
0370                     loc.msg._lines_msgctxt.append(line_raw)
0371                 elif loc.field_context == ctx_msgid:
0372                     loc.msg._lines_msgid.append(line_raw)
0373                 elif loc.field_context == ctx_msgid_plural:
0374                     loc.msg._lines_msgid_plural.append(line_raw)
0375                 elif loc.field_context == ctx_msgstr:
0376                     loc.msg._lines_msgstr.append(line_raw)
0377                 else:
0378                     raise PologyError(
0379                         _("@info",
0380                           "Internal problem (%(id)d) at %(file)s:%(line)d.",
0381                           id=12, file=filename, line=lno))
0382             else:
0383                 raise PologyError(
0384                     _("@info",
0385                       "Internal problem (%(id)d) at %(file)s:%(line)d.",
0386                       id=10, file=filename, line=lno))
0387
0388     try_finish() # the last message
0389
0390     if len(messages1) == 0:
0391         raise CatalogSyntaxError(
0392             _("@info",
0393               "No header at %(file)s:%(line)d.",
0394               file=filename, line=lno))
0395
0396     # Join fields.
0397     join_or_none = lambda x: "".join(x) if x else None
0398     for i, msg in enumerate(messages1):
0399         msg.msgctxt_previous = join_or_none(msg.msgctxt_previous)
0400         msg.msgid_previous = join_or_none(msg.msgid_previous)
0401         msg.msgid_plural_previous = join_or_none(msg.msgid_plural_previous)
0402         msg.msgctxt = join_or_none(msg.msgctxt)
0403         msg.msgid = join_or_none(msg.msgid)
0404         msg.msgid_plural = join_or_none(msg.msgid_plural)
0405         msg.msgstr = [join_or_none(x) for x in msg.msgstr]
0406         if i > 0 and msg.msgid == "" and msg.msgctxt is None:
0407             raise CatalogSyntaxError(
0408                 _("@info",
0409                   "Empty message at %(file)s:%(line)d.",
0410                   file=filename, line=msg.refline))
0411
0412     # Repack raw dictionaries as message objects.
0413     messages2 = []
0414     for msg1 in messages1:
0415         messages2.append(MessageType(msg1.__dict__))
0416
0417     return (messages2, fenc, loc.tail)
0418
0419
0420 def _srcref_repack (srcrefs):
0421     srcdict = {}
0422     for file, line in srcrefs:
0423         if not file in srcdict:
0424             srcdict[file] = [line]
0425         else:
0426             srcdict[file].append(line)
0427     srcdict[file].sort()
0428     return srcdict
0429
0430
0431 _Catalog_spec = {
0432     # Data.
0433     "header" : {"type" : Header},
0434     "filename" : {"type" : (str,)},
0435     "name" : {"type" : (str,), "derived" : True},
0436     "*" : {}, # messages sequence: the type is assigned at construction
0437 }
0438
0439
0440 class Catalog (Monitored):
0441     """
0442     Class for access and operations on PO catalogs.
0443
0444     Catalog behaves as an ordered sequence of messages. The typical way of
0445     iterating over the messages from a PO file on disk would be::
0446
0447         cat = Catalog("relative/path/foo.po")
0448         for msg in cat:
0449             ...
0450             (do something with msg)
0451             ...
0452         cat.sync()
0453
0454     where L{sync()<sync>} method is used to write any modifications back to
0455     the disk.
0456
0457     The header entry of the catalog is not part of the message sequence,
0458     but is provided by the L{header} attribute, an object of
0459     type different from an ordinary message entry.
0460
0461     The catalog is a I{monitored} class.
0462     Catalog message entries themeselves may also be monitored (default),
0463     but need not, depending on the mode of creation.
0464
0465     @ivar header: the header entry
0466     @type header: L{Header}
0467
0468     @ivar filename: the file name which the catalog was created with
0469     @type filename: string
0470
0471     @ivar name: (read-only)
0472         the name of the catalog
0473
0474         Determined as base of the filename, without extension.
0475     @type name: string
0476
0477     @see: L{Monitored}
0478     @see: L{Message}, L{MessageUnsafe}
0479     @see: L{Header}
0480     """
0481
0482     def __init__ (self, filename,
0483                   create=False, truncate=False,
0484                   wrapping=None, monitored=True,
0485                   headonly=False, readfh=None, single_entry=0):
0486         """
0487         Build a message catalog by reading from a PO file or creating anew.
0488
0489         The message entries in the catalog may be monitored themselves or not.
0490         That is, when monitoring is requested, entries are represented by
0491         the L{Message} class, otherwise with L{MessageUnsafe}.
0492
0493         Monitored messages are usually appropriate when the application is
0494         expected to modify them. Non-monitored messages should provide better
0495         performance, so use them whenever the catalog is opened for read-only
0496         purposes (such as checks).
0497
0498         Catalog can also be opened in header-only mode, for better
0499         performance when only the header data is needed. This mode provides
0500         L{header} attribute as usual, but the rest of entries are
0501         unavailable. If any of the operations dealing with message entries
0502         are invoked, an error is signaled.
0503
0504         Instead of opening and reading from catalog's filename,
0505         catalog can be read from a file-like object provided by
0506         C{readfh} parameter.
0507         Same as when reading from file on disk, text will be decoded
0508         using catalog's encoding after reading it from C{readfh}.
0509
0510         If a problem which prevents construction of a valid catalog is
0511         detected while parsing a PO file, L{CatalogSyntaxError} is raised.
0512
0513         @param filename: name of the PO catalog on disk, or new catalog
0514         @type filename: string
0515
0516         @param create:
0517             whether a blank catalog can be created when the PO file does
0518             not already exist, or signal an error
0519         @type create: bool
0520
0521         @param truncate:
0522             whether catalog should be empty (and with uninitialized header)
0523             regardless of whether it is opened or created
0524         @type truncate: bool
0525
0526         @param wrapping:
0527             sequence of keywords specifying wrapping policy for
0528             message text fields (C{msgid}, C{msgstr}, etc.).
0529             See L{select_field_wrapper<wrap.select_field_wrapper>}
0530             function for possible keywords and their effects on wrapping.
0531             If given as C{None}, it will be deduced from the catalog
0532             (see L{wrapping} method).
0533         @type wrapping: sequence of strings
0534
0535         @param monitored: whether the message entries are monitored
0536         @type monitored: bool
0537
0538         @param headonly: whether to open in header-only mode
0539         @type headonly: bool
0540
0541         @param readfh: file to read the catalog from
0542         @type readfh: file-like object
0543         """
0544
0545         self._monitored = monitored
0546
0547         # Select type of message object to use.
0548         if monitored:
0549             message_type = MessageMonitored
0550         else:
0551             message_type = MessageUnsafe
0552
0553         # Signal if catalog should exist on disk but does not.
0554         if not create and not (os.path.exists(filename) or readfh):
0555             raise PologyError(
0556                 _("@info",
0557                   "File '%(file)s' does not exist.",
0558                   file=filename))
0559
0560         # Read messages or create empty catalog.
0561         if not truncate and (os.path.exists(filename) or readfh):
0562             file = readfh or filename
0563             m, e, t = _parse_po_file(file, message_type, headonly, monitored)
0564             self._encoding = e
0565             self._created_from_scratch = False
0566             if not m[0].msgctxt and not m[0].msgid:
0567                 # Proper PO, containing the header.
0568                 self._header = Header(m[0])
0569                 self._header._committed = True # status for sync
0570                 if (single_entry > 0):
0571                     self.__dict__["*"] = [m[single_entry]]
0572                 else:
0573                     self.__dict__["*"] = m[1:]
0574             else:
0575                 # Improper PO, missing the header.
0576                 self._header = Header()
0577                 self._header._committed = False # status for sync
0578                 if (single_entry > 0):
0579                     self.__dict__["*"] = [m[single_entry-1]]
0580                 else:
0581                     self.__dict__["*"] = m
0582             self._tail = t
0583         else:
0584             self._encoding = "UTF-8"
0585             self._created_from_scratch = True
0586             self._header = Header()
0587             self._header._committed = False # status for sync
0588             self.__dict__["*"] = []
0589             self._tail = None
0590
0591         self._filename = filename
0592
0593         self._messages = self.__dict__["*"] # nicer name for the sequence
0594
0595         # Fill in the message key-position links.
0596         # Set committed and remove-on-sync status.
0597         self._msgpos = {}
0598         for i in range(len(self._messages)):
0599             self._msgpos[self._messages[i].key] = i
0600             self._messages[i]._committed = True
0601             self._messages[i]._remove_on_sync = False
0602
0603         # Initialize monitoring.
0604         final_spec = copy.deepcopy(_Catalog_spec)
0605         final_spec["*"]["type"] = message_type
0606         self.assert_spec_init(final_spec)
0607
0608         # Inverse map (by msgstr) will be computed on first use.
0609         self._invmap = None
0610
0611         # Cached plural definition from the header.
0612         self._plustr = ""
0613
0614         # Cached language of the translation.
0615         # None means the language has not been determined.
0616         self._lang = None
0617         self._lang_determined = False
0618
0619         # Cached environments.
0620         self._envs = None
0621         self._envs_determined = False
0622
0623         # Cached accelerator markers.
0624         self._accels = None
0625         self._accels_determined = False
0626
0627         # Cached markup types.
0628         self._mtypes = None
0629         self._mtypes_determined = False
0630
0631         # Cached wrapping policy.
0632         if wrapping is None:
0633             self._wrap_determined = False
0634             self._wrapf = None
0635             self._wrapkw = None
0636         else:
0637             self._wrap_determined = True
0638             self._wrapf = select_field_wrapper(wrapping)
0639             self._wrapkw = tuple(wrapping)
0640
0641
0642     def _assert_headonly (self):
0643
0644         if self._tail:
0645             raise PologyError(
0646                 _("@info",
0647                   "Trying to access catalog messages in header-only mode."))
0648
0649
0650     def __getattr__ (self, att):
0651         """
0652         Attribute getter.
0653
0654         Processes read-only attributes, and sends others to the base class.
0655
0656         @param att: name of the attribute to get
0657         @returns: attribute value
0658         """
0659         if 0: pass
0660
0661         elif att == "name":
0662             basename = os.path.basename(self._filename)
0663             p = basename.rfind(".")
0664             if p >= 0:
0665                 return basename[:p]
0666             else:
0667                 return basename
0668
0669         else:
0670             return Monitored.__getattr__(self, att)
0671
0672
0673     def __len__ (self):
0674         """
0675         The number of messages in the catalog.
0676
0677         The number includes obsolete entries, and excludes header entry.
0678
0679         @returns: the number of messages
0680         @rtype: int
0681         """
0682
0683         self._assert_headonly()
0684         return len(self._messages)
0685
0686
0687     def __getitem__ (self, ident):
0688         """
0689         Get message by position or another message.
0690
0691         If the position is out of range, or the lookup message does not have
0692         a counterpart in this catalog with the same key, an error is signaled.
0693
0694         Runtime complexity O(1), regardless of the C{ident} type.
0695
0696         @param ident: position index or another message
0697         @type ident: int or L{Message_base}
0698
0699         @returns: reference to the message in catalog
0700         @rtype: L{Message_base}
0701         """
0702
0703         self._assert_headonly()
0704         self.assert_spec_getitem()
0705         if not isinstance(ident, int):
0706             ident = self._msgpos[ident.key]
0707         return self._messages[ident]
0708
0709
0710     def __setitem__ (self, ident, msg):
0711         """
0712         Set message by position or another message.
0713
0714         If the position is out of range, or the lookup message does not have
0715         a counterpart in this catalog with the same key, an error is signaled.
0716
0717         Runtime complexity O(1), regardless of the C{ident} type.
0718
0719         @param ident: position index or another message
0720         @type ident: int or L{Message_base}
0721
0722         @returns: reference to the message in catalog
0723         @rtype: L{Message_base}
0724         """
0725
0726         self._assert_headonly()
0727         self.assert_spec_setitem(msg)
0728         if not isinstance(ident, int):
0729             ident = self._msgpos[ident.key]
0730         self._messages[ident] = msg
0731         if self._messages[ident] is not msg:
0732             self.__dict__["#"]["*"] += 1
0733         return self._messages[ident]
0734
0735
0736     def __contains__ (self, msg):
0737         """
0738         Whether the message with the same key exists in the catalog.
0739
0740         Runtime complexity O(1).
0741
0742         @param msg: message to look for
0743         @type msg: L{Message_base}
0744
0745         @returns: C{True} if the message exists
0746         @rtype: bool
0747         """
0748
0749         self._assert_headonly()
0750         return msg.key in self._msgpos
0751
0752
0753     def __eq__ (self, ocat):
0754         """
0755         Whether two catalogs are equal in all apparent parts.
0756
0757         Catalogs are considered equal if they are of the same length,
0758         their headers are equal, and each two messages with the
0759         same position are equal.
0760
0761         Runtime complexity O(n).
0762
0763         @returns: C{True} if catalogs are equal
0764         @rtype: bool
0765         """
0766
0767         if len(self) != len(ocat):
0768             return False
0769         if self.header != ocat.header:
0770             return False
0771         for i in range(len(ocat)):
0772             if self[i] != ocat[i]:
0773                 return False
0774         return True
0775
0776
0777     def __ne__ (self, ocat):
0778         """
0779         Whether two catalogs are equal in all apparent parts.
0780
0781         Equivalent to C{not (self == ocat)}.
0782
0783         @returns: C{False} if catalogs are equal
0784         @rtype: bool
0785         """
0786
0787         return not self.__eq__(ocat)
0788
0789
0790     def find (self, msg, wobs=True):
0791         """
0792         Position of the message in the catalog.
0793
0794         Runtime complexity O(1).
0795
0796         @param msg: message to look for
0797         @type msg: L{Message_base}
0798         @param wobs: obsolete messages considered non-existant if C{False}
0799         @type wobs: bool
0800
0801         @returns: position index if the message exists, -1 otherwise
0802         @rtype: int
0803         """
0804
0805         self._assert_headonly()
0806         if msg.key in self._msgpos:
0807             if wobs or not msg.obsolete:
0808                 return self._msgpos[msg.key]
0809         return -1
0810
0811
0812     def get (self, msg, defmsg=None):
0813         """
0814         Get message by key of another message, with default fallback.
0815
0816         If the lookup message C{msg} does not have a counterpart
0817         in this catalog with the same key, C{defmsg} is returned.
0818         C{msg} can also be C{None}, when C{defmsg} is returned.
0819
0820         Runtime complexity O(1).
0821
0822         @param msg: message for the lookup by key
0823         @type msg: L{Message_base} or None
0824         @param defmsg: fallback in case lookup failed
0825         @type defmsg: any
0826
0827         @returns: reference to the message in catalog, or default
0828         @rtype: L{Message_base} or type(defmsg)
0829         """
0830
0831         if msg is None:
0832             return defmsg
0833         pos = self.find(msg)
0834         if pos >= 0:
0835             return self._messages[pos]
0836         else:
0837             return defmsg
0838
0839
0840     def add (self, msg, pos=None, srefsyn={}):
0841         """
0842         Add a message to the catalog.
0843
0844         If the message with the same key already exists in the catalog,
0845         it will be replaced with the new message, ignoring position.
0846         The return value will be C{None}.
0847
0848         If the message does not exist in the catalog, when the position is
0849         C{None}, the insertion will be attempted such as that the messages be
0850         near according to the source references; if the position is not
0851         C{None}, the message is inserted at the given position.
0852         The return value will be the true insertion position.
0853
0854         Negative position can be given as well. It counts backward from
0855         the first non-obsolete message if the message to be added
0856         is not obsolete, or from last message otherwise.
0857
0858         When the message is inserted according to source references,
0859         a dictionary of file paths to consider synonymous can be given
0860         by the C{srefsyn}. The key is the file path for which the synonyms
0861         are being given, and the value the list of synonymous file paths.
0862         The mapping is not symmetric; if B is in the list of synonyms to A,
0863         A will not be automatically considered to be among synonyms of B,
0864         unless explicitly given in the list of synonyms to B.
0865
0866         Runtime complexity O(1) if the message is present in the catalog;
0867         O(n - pos) if the position is given and the message is not present;
0868         O(n) if the position is not given and the message is not present.
0869
0870         @param msg: message to insert
0871         @type msg: L{Message_base}
0872
0873         @param pos: position index to insert at
0874         @type pos: int or None
0875
0876         @param srefsyn: synonymous names to some of the source files
0877         @type srefsyn: {string: [string*]*}
0878
0879         @returns: if inserted, the position where inserted
0880         @rtype: int or None
0881         """
0882
0883         return self.add_more([(msg, pos)], srefsyn=srefsyn)[0]
0884
0885
0886     def add_more (self, msgpos, cumulative=False, srefsyn={}):
0887         """
0888         Add more than one message to the catalog.
0889
0890         Like L{add}, except that several messages are added in one call.
0891         This significantly speeds up insertion when insertion positions of
0892         all messages are known beforehand.
0893
0894         Insertion positions can be given relative to state before the call,
0895         or cumulative to earlier insertions in the list.
0896         For example, if insertions are given as C{[(msg1, 2), (msg2, 5)]} and
0897         not cumulative, then the resulting position for C{msg1} will be 2,
0898         and for C{msg2} 6 (assuming that both messages actually got inserted).
0899         This behavior can be toggled by the C{cumulative} parameter.
0900
0901         @param msgpos: messages with target insertion positions
0902         @type msgpos: [(L{Message_base}, int), ...]
0903         @param cumulative: whether input positions are cumulative
0904         @type cumulative: bool
0905         @param srefsyn: synonymous names to some of the source files
0906         @type srefsyn: {string: [string*]*}
0907
0908         @returns: positions where inserted, or None where replaced
0909         @rtype: [int or None, ...]
0910         """
0911
0912         self._assert_headonly()
0913         for msg, pos in msgpos:
0914             self.assert_spec_setitem(msg)
0915             if not msg.msgid and msg.msgctxt is None:
0916                 raise PologyError(
0917                     _("@info",
0918                       "Trying to insert message with empty key into catalog."))
0919
0920         # Resolve backward positions, set aside automatic positions,
0921         # set aside replacements.
0922         msgpos_ins = []
0923         msgs_auto = []
0924         msgs_repl = []
0925         for msg, pos in msgpos:
0926             if msg.key not in self._msgpos:
0927                 if pos is not None:
0928                     if pos < 0:
0929                         pos = len(self._messages) + pos
0930                     if pos < 0 or pos > len(self._messages):
0931                         raise PologyError(
0932                             _("@info",
0933                               "Trying to insert message into catalog by "
0934                               "position out of range."))
0935                     msgpos_ins.append((msg, pos))
0936                 else:
0937                     msgs_auto.append(msg)
0938             else:
0939                 msgs_repl.append(msg)
0940
0941         # Sort messages to be inserted by resolved positions.
0942         msgpos_ins = sorted(msgpos_ins, key=lambda x: x[1])
0943
0944         # Resolve messages to be inserted by automatic positions.
0945         for msg in msgs_auto:
0946             pos, d1 = self._pick_insertion_point(msg, srefsyn)
0947             i = 0
0948             while i < len(msgpos_ins):
0949                 omsg, opos = msgpos_ins[i]
0950                 if pos < opos:
0951                     break
0952                 elif cumulative:
0953                     pos += 1
0954             msgpos_ins.insert(i, (msg, pos))
0955
0956         # Accumulate insertion positions if not cumulative.
0957         if not cumulative and len(msgpos_ins) > 1:
0958             off = 0
0959             msgpos_tmp = []
0960             for msg, pos in msgpos_ins:
0961                 msgpos_tmp.append((msg, pos + off))
0962                 off += 1
0963             msgpos_ins = msgpos_tmp
0964
0965         # Update key-position links for the index to be added.
0966         off = 0
0967         for i in range(len(msgpos_ins)):
0968             pos1 = msgpos_ins[i][1] - off
0969             if i + 1 < len(msgpos_ins):
0970                 pos2 = msgpos_ins[i + 1][1] - (off + 1)
0971             else:
0972                 pos2 = len(self._messages)
0973             for j in range(pos1, pos2):
0974                 ckey = self._messages[j].key
0975                 self._msgpos[ckey] = j + (off + 1)
0976             off += 1
0977
0978         # Insert messages at computed positions.
0979         for msg, pos in msgpos_ins:
0980             self._messages.insert(pos, msg)
0981             self._messages[pos]._remove_on_sync = False # no pending removal
0982             self._messages[pos]._committed = False # write it on sync
0983             self._msgpos[msg.key] = pos # store new key-position link
0984             self.__dict__["#"]["*"] += 1 # indicate sequence change
0985
0986         # Replace existing messages.
0987         for msg in msgs_repl:
0988             pos = self._msgpos[msg.key]
0989             self._messages[pos] = msg
0990
0991         # Recover insertion/replacement positions.
0992         pos_res = []
0993         msgpos_ins_d = dict(msgpos_ins)
0994         for msg, pos in msgpos:
0995             ipos = msgpos_ins_d.get(msg)
0996             if ipos is not None:
0997                 pos_res.append(ipos)
0998             else:
0999                 pos_res.append(None)
1000
1001         return pos_res
1002
1003
1004     def obspos (self):
1005         """
1006         Get canonical position of the first obsolete message.
1007
1008         I{Canonical} position of the first obsolete message is the position
1009         of first of the contiguous obsolete messages at the end of the catalog.
1010         Normally this should be the same as the position of the very first
1011         obsolete message, as all obsolete messages should be contiguously
1012         grouped at the end. But there is no enforcement of such grouping,
1013         therefore the more stricter definition.
1014
1015         If there are no messages in the catalog, or the last message
1016         is not obsolete, the position is reported as number of messages
1017         (i.e. one position after the last message).
1018
1019         Runtime complexity O(number of contiguous trailing obsolete messages).
1020
1021         @return: canonical position of first obsolete message
1022         @rtype: int
1023         """
1024
1025         op = len(self._messages)
1026         while op > 0 and self._messages[op - 1].obsolete:
1027             op -= 1
1028
1029         return op
1030
1031
1032     def add_last (self, msg):
1033         """
1034         Add a message to the selected end of catalog, if not already in it.
1035
1036         Synonym to C{cat.add(msg, cat.obspos())} if the message is
1037         not obsolete (i.e. tries to add the message after all non-obsolete),
1038         or to C{cat.add(msg, len(cat))} (tries to add at the very end).
1039         If the message already exits in the catalog (by key),
1040         same behavior as for L{add} applies.
1041
1042         @see: L{add}
1043         """
1044
1045         if not msg.obsolete:
1046             return self.add(msg, self.obspos())
1047         else:
1048             return self.add(msg, len(self._messages))
1049
1050
1051     def remove (self, ident):
1052         """
1053         Remove a message from the catalog, by position or another message.
1054
1055         If the position is out of range, or the lookup message does not have
1056         a counterpart in this catalog with the same key, an error is signaled.
1057
1058         Runtime complexity O(n), regardless of C{ident} type.
1059         Use L{remove_on_sync()<remove_on_sync>} method for O(1) complexity,
1060         when the logic allows the removal to be delayed to syncing time.
1061
1062         @param ident: position index or another message
1063         @type ident: int or L{Message_base}
1064
1065         @returns: C{None}
1066         """
1067
1068         self._assert_headonly()
1069
1070         # Determine position and key by given ident.
1071         if isinstance(ident, int):
1072             ip = ident
1073             key = self._messages[ip].key
1074         else:
1075             key = ident.key
1076             ip = self._msgpos[key]
1077
1078         # Update key-position links for the removed index.
1079         for i in range(ip + 1, len(self._messages)):
1080             ckey = self._messages[i].key
1081             self._msgpos[ckey] = i - 1
1082
1083         # Remove from messages and key-position links.
1084         self._messages.pop(ip)
1085         self._msgpos.pop(key)
1086         self.__dict__["#"]["*"] += 1 # indicate sequence change
1087
1088
1089     def remove_on_sync (self, ident):
1090         """
1091         Remove a message from the catalog, by position or another message,
1092         on the next sync.
1093
1094         If the position is out of range, or the lookup message does not have
1095         a counterpart in this catalog with the same key, an error is signaled.
1096
1097         Suited for for-in iterations over a catalog with a sync afterwards,
1098         so that the indices are not confused by removal, and good performance.
1099
1100         Runtime complexity O(1).
1101
1102         @param ident: position index or another message
1103         @type ident: int or L{Message_base}
1104
1105         @returns: C{None}
1106         """
1107
1108         self._assert_headonly()
1109
1110         # Determine position and key by given ident.
1111         if isinstance(ident, int):
1112             ip = ident
1113         else:
1114             ip = self._msgpos[ident.key]
1115
1116         # Indicate removal on sync for this message.
1117         self._messages[ip]._remove_on_sync = True
1118         self.__dict__["#"]["*"] += 1 # indicate sequence change (pending)
1119
1120
1121     def sync (self, force=False, noobsend=False, writefh=None, fitplural=False):
1122         """
1123         Write catalog file to disk if any message has been modified.
1124
1125         All activities scheduled for sync-time are performed, such as
1126         delayed message removal.
1127
1128         If catalog is monitored, unmodified messages (and message parts)
1129         are not reformatted unless forced.
1130
1131         Instead of opening and writing into catalog's filename,
1132         catalog can be written to a file-like object provided by
1133         C{writefh} parameter.
1134         Same as when writing to file on disk, text will be encoded
1135         using catalog's encoding before writing it to C{writefh}.
1136
1137         If in a plural message the number of C{msgstr} fields is not equal
1138         to the number specified in the plural header, the C{fitplural}
1139         parameter can be set to C{True} to correct this on syncing.
1140         However, this fitting will be performed only on clean plural messages,
1141         i.e. those in which all existing C{msgstr} fields are empty,
1142         as otherwise it is unclear how to adapt them to plural header.
1143
1144         @param force: whether to reformat unmodified messages
1145         @type force: bool
1146         @param noobsend: do not reorder messages to group all obsolete at end
1147         @type noobsend: bool
1148         @param writefh: file to write the catalog to
1149         @type writefh: file-like object open in binary mode
1150         @param fitplural: whether to fit the number of msgstr fields in
1151             clean plural messages to plural header specification
1152         @type fitplural: bool
1153
1154         @returns: C{True} if the file was modified, C{False} otherwise
1155         @rtype: bool
1156         """
1157
1158         # Cannot sync catalogs which have been given no path
1159         # (usually temporary catalogs).
1160         if not self._filename.strip():
1161             raise PologyError(
1162                 _("@info",
1163                   "Trying to sync unnamed catalog."))
1164
1165         # Fit the number of msgstr entries in plural messages if requested.
1166         # Must be done before the modification test below.
1167         if fitplural:
1168             n = self.nplurals()
1169             for msg in self._messages:
1170                 if (    msg.msgid_plural is not None
1171                     and len(msg.msgstr) != n
1172                     and all(len(s) == 0 for s in msg.msgstr)
1173                 ):
1174                     msg.msgstr[:] = [""] * n
1175
1176         # If catalog is not monitored, force syncing.
1177         if not self._monitored:
1178             force = True
1179
1180         # If no modifications throughout and sync not forced, return.
1181         if not force and not self.modcount:
1182             return False
1183
1184         # No need to indicate sequence changes here, as after sync the
1185         # catalog is set to unmodified throughout.
1186
1187         # Temporarily insert header, for homogeneous iteration.
1188         self._messages.insert(0, self._header)
1189         self._messages[0]._remove_on_sync = False # never remove header
1190         nmsgs = len(self._messages)
1191
1192         # Starting position for reinserting obsolete messages.
1193         obstop = len(self._messages)
1194         while obstop > 0 and self._messages[obstop - 1].obsolete:
1195             obstop -= 1
1196         obsins = obstop
1197
1198         # NOTE: Key-position links may be invalidated from this point onwards,
1199         # by reorderings/removals. To make sure it is not used before the
1200         # rebuild at the end, delete now.
1201         del self._msgpos
1202
1203         if not self._wrap_determined:
1204             self.wrapping()
1205
1206         flines = []
1207         i = 0
1208         while i < nmsgs:
1209             msg = self._messages[i]
1210             if msg.get("_remove_on_sync", False):
1211                 # Removal on sync requested, just skip.
1212                 i += 1
1213             elif not noobsend and msg.obsolete and i < obstop:
1214                 # Obsolete message out of order, reinsert and repeat the index.
1215                 # Reinsertion is such that the relative ordering of obsolete
1216                 # messages is preserved.
1217                 msg = self._messages.pop(i)
1218                 self._messages.insert(obsins - 1, msg) # -1 due to popping
1219                 obstop -= 1
1220             else:
1221                 # Normal message, append formatted lines to rest.
1222                 committed = msg.get("_committed", False)
1223                 flines.extend(msg.to_lines(self._wrapf,
1224                                            force or not committed))
1225                 # Message should finish with one empty line.
1226                 if flines[-1] != "\n":
1227                     flines.append("\n")
1228                 i += 1
1229         if not self._tail:
1230             # Remove trailing empty lines.
1231             while flines and flines[-1] == "\n":
1232                 flines.pop(-1)
1233         else:
1234             # Tail has to be converted to separate lines,
1235             # so that possibly new encoding is applied to it too
1236             # while being able to report line/column on error.
1237             flines.extend(x + "\n" for x in self._tail.split("\n"))
1238             if self._tail.endswith("\n"):
1239                 flines.pop(-1)
1240
1241         # Remove temporarily inserted header.
1242         self._messages.pop(0)
1243
1244         # Update message map.
1245         self.sync_map()
1246
1247         # Reset modification state throughout.
1248         self.modcount = 0
1249
1250         # Encode lines and write file.
1251         enclines = []
1252         for i, line in enumerate(flines):
1253             try:
1254                 encline = line.encode(self._encoding)
1255             except UnicodeEncodeError as e:
1256                 raise CatalogSyntaxError(
1257                     _("@info",
1258                       "Text encoding failure at %(file)s:%(line)d:%(col)d "
1259                       "under assumed encoding '%(enc)s'.",
1260                       file=self._filename, line=(i + 1), col=e[2],
1261                       enc=self._encoding))
1262             enclines.append(encline)
1263         if not writefh:
1264             # Create the parent directory if it does not exist.
1265             pdirpath = os.path.dirname(self._filename)
1266             mkdirpath(pdirpath)
1267             # Write to file atomically: directly write to temporary file,
1268             # then rename it to destination file.
1269             #ofl = tempfile.NamedTemporaryFile(delete=False, dir=pdirpath)
1270             #tmpfname = ofl.name
1271             # ...needs Python 2.6
1272             tmpfname = os.path.join(pdirpath,
1273                                     os.path.basename(self._filename) + "~tmpw")
1274             ofl = open(tmpfname, "wb")
1275         else:
1276             ofl = writefh
1277         ofl.writelines(enclines)
1278         if not writefh:
1279             ofl.close()
1280             if os.name == "nt" and os.path.exists(self._filename):
1281                 # NT does not allow to overwrite on rename.
1282                 tmpfname2 = self._filename + "~tmpo"
1283                 os.rename(self._filename, tmpfname2)
1284                 os.rename(tmpfname, self._filename)
1285                 os.remove(tmpfname2)
1286             else:
1287                 os.rename(tmpfname, self._filename)
1288
1289         # Indicate the catalog is no longer created from scratch, if it was.
1290         self._created_from_scratch = False
1291
1292         # Indicate header has been committed.
1293         self._header._committed = True
1294
1295         # Indicate for each message that it has been committed.
1296         for msg in self._messages:
1297             msg._committed = True
1298
1299         return True
1300
1301
1302     def sync_map (self):
1303         """
1304         Update message map.
1305
1306         In case there were any modifications to message keys,
1307         or any pending removals issued, this function will update
1308         the sequence of messages such that membership operations
1309         work properly again.
1310         Obsolete messages will be moved to end of catalog.
1311         Referent line and entry numbers will remain invalid,
1312         as catalog will not be written out.
1313
1314         This is a less expensive alternative to syncing the catalog,
1315         when it is only necessary to continue using it in synced state,
1316         rather than actually writing it out.
1317         """
1318
1319         # Execute pending removals.
1320         # Separate messages into current and obsolete.
1321         newlst = []
1322         newlst_obs = []
1323         for msg in self._messages:
1324             if not msg.get("_remove_on_sync", False):
1325                 if not msg.obsolete:
1326                     newlst.append(msg)
1327                 else:
1328                     newlst_obs.append(msg)
1329         newlst.extend(newlst_obs)
1330         self.__dict__["*"] = newlst
1331         self._messages = self.__dict__["*"]
1332
1333         # Rebuild key-position links.
1334         self._msgpos = {}
1335         for i in range(len(self._messages)):
1336             self._msgpos[self._messages[i].key] = i
1337
1338         # Set inverse map to non-computed.
1339         self._invmap = None
1340
1341
1342     def _make_invmap (self):
1343
1344         # Map for inverse lookup (by translation) has as key the msgstr[0],
1345         # and the value the list of messages having the same msgstr[0].
1346
1347         self._invmap = {}
1348         for msg in self._messages:
1349             ikey = msg.msgstr[0]
1350             msgs = self._invmap.get(ikey)
1351             if msgs is None:
1352                 msgs = []
1353                 self._invmap[ikey] = msgs
1354             msgs.append(msg)
1355
1356
1357     def insertion_inquiry (self, msg, srefsyn={}):
1358         """
1359         Compute the tentative insertion of the message into the catalog.
1360
1361         The tentative insertion is a tuple of position of a message when it
1362         would be inserted into the catalog, and the I{weight} indicating
1363         the quality of positioning. The weight is computed by analyzing
1364         the source references.
1365
1366         Runtime complexity O(n).
1367
1368         @param msg: message to compute the tentative insertion for
1369         @type msg: L{Message_base}
1370         @param srefsyn: synonymous names to some of the source files
1371         @type srefsyn: {string: [string*]*}
1372
1373         @returns: the insertion position and its weight
1374         @rtype: int, float
1375         """
1376
1377         self._assert_headonly()
1378         return self._pick_insertion_point(msg, srefsyn)
1379
1380
1381     def created (self):
1382         """
1383         Whether the catalog has been newly created (no existing PO file).
1384
1385         A catalog is no longer considered newly created after the first sync.
1386
1387         @returns: C{True} if newly created, C{False} otherwise
1388         @rtype: bool
1389         """
1390
1391         return self._created_from_scratch
1392
1393
1394     def _pick_insertion_point (self, msg, srefsyn={}):
1395
1396         # Return the best insertion position with associated weight.
1397         # Assume the existing messages in the catalog are properly ordered.
1398
1399         if not msg.obsolete:
1400             last = self.obspos()
1401         else:
1402             last = len(self._messages)
1403
1404         # Insert at the last position if the candidate message has
1405         # no source references.
1406         if not msg.source:
1407             return last, 0.0
1408
1409         ins_pos = -1
1410         # Try to find insertion position by comparing the source references
1411         # of the candidate the source references of the existing messages.
1412         # The order of matching must be very specific for logical insertion.
1413         # If the matching source files are found, insert according to
1414         # the line number.
1415         for src, lno in msg.source:
1416             src_pos = 0
1417             src_match = False
1418             curr_prim_esrc = ""
1419             for i in range(last):
1420                 emsg = self._messages[i]
1421                 if not emsg.source:
1422                     continue
1423                 same_prim_esrc = False
1424                 for esrc, elno in emsg.source:
1425                     if curr_prim_esrc in [esrc] + srefsyn.get(esrc, []):
1426                         same_prim_esrc = True
1427                         break
1428                 if not same_prim_esrc:
1429                     curr_prim_esrc, elno = emsg.source[0]
1430
1431                 if src in [curr_prim_esrc] + srefsyn.get(curr_prim_esrc, []):
1432                     # The source file names match.
1433                     # Insert at this position if the candidate's line
1434                     # number preceeds that of the current message.
1435                     src_match = True
1436                     if lno < elno:
1437                         ins_pos = i
1438                         break
1439                 elif src_match:
1440                     # The sources no longer match, but were matched
1441                     # before. This means the candidate line number is
1442                     # after all existing, so insert at this position.
1443                     ins_pos = i
1444                     break
1445
1446                 if ins_pos >= 0:
1447                     break
1448
1449             if ins_pos >= 0:
1450                 break
1451
1452         if ins_pos >= 0:
1453             return ins_pos, 1.0
1454         else:
1455             return last, 0.0
1456
1457
1458     def nplurals (self):
1459         """
1460         Number of msgstr fields expected for plural messages.
1461
1462         Determined by the Plural-Forms header field; if this field
1463         is absent from the header, defaults to 1.
1464
1465         @returns: number of plurals
1466         @rtype: int
1467         """
1468
1469         # Get nplurals string from the header.
1470         plforms = self._header.get_field_value("Plural-Forms")
1471         if not plforms: # no plural definition
1472             return 1
1473         nplustr = plforms.split(";")[0]
1474
1475         # Get the number of forms from the string.
1476         m = re.search(r"\d+", nplustr)
1477         if not m: # malformed nplurals
1478             return 1
1479
1480         return int(m.group(0))
1481
1482
1483     def plural_index (self, number):
1484         """
1485         Msgstr field index in plural messages for given number.
1486
1487         Determined by the Plural-Forms header field; if this field
1488         is absent from the header, defaults to 0.
1489
1490         @param number: the number to determine the plural form for
1491         @type number: int
1492
1493         @returns: index of msgstr field
1494         @rtype: int
1495         """
1496
1497         # Get plural definition from the header.
1498         plforms = self._header.get_field_value("Plural-Forms")
1499         if not plforms: # no plural definition, assume 0
1500             return 0
1501         plustr = plforms.split(";")[1]
1502
1503         # Rebuild evaluation string only if changed to last invocation.
1504         if plustr != self._plustr:
1505             # Record raw plural definition for check on next call.
1506             self._plustr = plustr
1507
1508             # Prepare Python-evaluable string out of the raw definition.
1509             plustr = plustr[plustr.find("=") + 1:] # remove plural= part
1510             p = -1
1511             evalstr = ""
1512             while 1:
1513                 p = plustr.find("?")
1514                 if p < 0:
1515                     evalstr += " " + plustr
1516                     break
1517                 cond = plustr[:p]
1518                 plustr = plustr[p + 1:]
1519                 cond = cond.replace("&&", " and ")
1520                 cond = cond.replace("||", " or ")
1521                 evalstr += "(" + cond + ") and "
1522                 p = plustr.find(":")
1523                 body = plustr[:p]
1524                 plustr = plustr[p + 1:]
1525                 evalstr += "\"" + body + "\" or "
1526             if not evalstr.strip():
1527                 evalstr = "0"
1528
1529             # Record the current evaluable definition.
1530             self._plustr_eval = evalstr
1531
1532         # Evaluate the definition.
1533         n = number # set eval context (plural definition uses n as variable)
1534         form = int(eval(self._plustr_eval))
1535
1536         return form
1537
1538
1539     def plural_indices_single (self):
1540         """
1541         Indices of the msgstr fields which are used for single number only.
1542
1543         @returns: msgstr indices used for single numbers
1544         @rtype: [int*]
1545         """
1546
1547         # Get plural definition from the header.
1548         plforms = self._header.get_field_value("Plural-Forms")
1549         if not plforms: # no plural definition, assume 0
1550             return [0]
1551         plustr = plforms.split(";")[1]
1552
1553         lst = re.findall(r"\bn\s*==\s*\d+\s*\)?\s*\?\s*(\d+)", plustr)
1554         if not lst and re.search(r"\bn\s*(!=|>|<)\s*\d+\s*([^?]|$)", plustr):
1555             lst = ["0"]
1556
1557         return [int(x) for x in lst]
1558
1559
1560     def select_by_key (self, msgctxt, msgid, wobs=False):
1561         """
1562         Select message from the catalog by the fields that define its key.
1563
1564         If matched, the message is returned as a single-element list, or
1565         an empty list when there is no match. This is so that the result
1566         of this method is in line with other C{select_*} methods.
1567
1568         Runtime complexity as that of L{find}.
1569
1570         @param msgctxt: the text of C{msgctxt} field
1571         @type msgctxt: string or C{None}
1572         @param msgid: the text of C{msgid} field
1573         @type msgid: string
1574         @param wobs: whether to include obsolete messages in selection
1575         @type wobs: bool
1576
1577         @returns: selected messages
1578         @rtype: [L{Message_base}*]
1579         """
1580
1581         m = MessageUnsafe({"msgctxt" : msgctxt, "msgid" : msgid})
1582         p = self.find(m, wobs)
1583         if p >= 0:
1584             return [self._messages[p]]
1585         else:
1586             return []
1587
1588
1589     def select_by_key_match (self, msgctxt, msgid, exctxt=False, exid=True,
1590                              case=True, wobs=False):
1591         """
1592         Select messages from the catalog by matching key-defining fields.
1593
1594         Parameters C{msgctxt} and C{msgid} are either exact values,
1595         to be matched by equality against message fields,
1596         or regular expression strings. Parameters C{exctxt} and C{exid}
1597         control which kind of match it is, respectively.
1598
1599         Runtime complexity O(n), unless all matches are exact,
1600         when as that of L{find}.
1601
1602         @param msgctxt: the text or regex string of C{msgctxt} field
1603         @type msgctxt: string or C{None}
1604         @param msgid: the text or regex string of C{msgid} field
1605         @type msgid: string
1606         @param exctxt: C{msgctxt} is exact value if C{True}, regex if C{False}
1607         @type exctxt: bool
1608         @param exid: C{msgid} is exact value if C{True}, regex if C{False}
1609         @type exid: bool
1610         @param case: whether regex matching is case-sensitive
1611         @type case: bool
1612         @param wobs: whether to include obsolete messages in selection
1613         @type wobs: bool
1614
1615         @returns: selected messages
1616         @rtype: [L{Message_base}*]
1617         """
1618
1619         if exctxt and exid:
1620             return self.select_by_key(msgctxt, msgid, wobs=wobs)
1621
1622         rxflags = re.U
1623         if not case:
1624             rxflags |= re.I
1625         if not exctxt:
1626             if msgctxt is not None:
1627                 msgctxt_rx = re.compile(msgctxt, rxflags)
1628             else:
1629                 # Force exact match if actually no context required.
1630                 exctxt = True
1631         if not exid:
1632             msgid_rx = re.compile(msgid, rxflags)
1633
1634         selected_msgs = []
1635         for msg in self._messages:
1636             if (    (wobs or not msg.obsolete)
1637                 and (   (exid and msg.msgid == msgid)
1638                      or (not exid and msgid_rx.search(msg.msgid)))
1639                 and (   (exctxt and msg.msgctxt == msgctxt)
1640                      or (not exctxt and msgctxt_rx.search(msg.msgctxt or "")))
1641             ):
1642                 selected_msgs.append(msg)
1643
1644         return selected_msgs
1645
1646
1647     def select_by_msgid (self, msgid, wobs=False):
1648         """
1649         Select messages from the catalog by matching C{msgid} field.
1650
1651         Several messages may have the same C{msgid} field, due to different
1652         C{msgctxt} fields. Empty list is returned when there is no match.
1653
1654         Runtime complexity O(n).
1655
1656         @param msgid: the text of C{msgid} field
1657         @type msgid: string
1658         @param wobs: whether to include obsolete messages in selection
1659         @type wobs: bool
1660
1661         @returns: selected messages
1662         @rtype: [L{Message_base}*]
1663         """
1664
1665         selected_msgs = []
1666         for msg in self._messages:
1667             if (wobs or not msg.obsolete) and msg.msgid == msgid:
1668                 selected_msgs.append(msg)
1669
1670         return selected_msgs
1671
1672
1673     def select_by_msgid_fuzzy (self, msgid, cutoff=0.6, wobs=False):
1674         """
1675         Select messages from the catalog by near-matching C{msgid} field.
1676
1677         The C{cutoff} parameter determines the minimal admissible similarity
1678         (1.0 fo exact match).
1679
1680         The messages are returned ordered by decreasing similarity.
1681
1682         Runtime complexity O(n) * O(length(msgid)*avg(length(msgids)))
1683         (probably).
1684
1685         @param msgid: the text of C{msgid} field
1686         @type msgid: string
1687         @param cutoff: minimal similarity
1688         @type cutoff: float
1689         @param wobs: whether to include obsolete messages in selection
1690         @type wobs: bool
1691
1692         @returns: selected messages
1693         @rtype: [L{Message_base}*]
1694         """
1695
1696         # Build dictionary of message keys by msgid;
1697         # there can be several keys per msgid, pack in a list.
1698         msgkeys = {}
1699         for msg in self._messages:
1700             if msg.obsolete and not wobs:
1701                 # Skip obsolete messages if not explicitly included.
1702                 continue
1703             if msg.msgid not in msgkeys:
1704                 msgkeys[msg.msgid] = []
1705             msgkeys[msg.msgid].append(msg.key)
1706
1707         # Get near-match msgids.
1708         near_msgids = difflib.get_close_matches(msgid, msgkeys, cutoff=cutoff)
1709
1710         # Collect messages per selected msgids.
1711         selected_msgs = []
1712         for near_msgid in near_msgids:
1713             for msgkey in msgkeys[near_msgid]:
1714                 selected_msgs.append(self._messages[self._msgpos[msgkey]])
1715
1716         return selected_msgs
1717
1718
1719     def select_by_msgstr (self, msgstr0, wobs=False, lazy=False):
1720         """
1721         Select messages from the catalog inversely, by their msgstr[0].
1722
1723         Several messages may have the same C{msgstr[0]} field,
1724         so the return value is always a list of messages.
1725         Empty list is returned when there is no match.
1726
1727         Runtime complexity is O(n) if C{lazy} is C{False}.
1728         If C{lazy} is C{True}, complexity is O(n) for the first search,
1729         and then O(1) until next syncing of the catalog;
1730         if msgstr fields of some messages change in between,
1731         or messages are added or removed from the catalog,
1732         this is not seen until next syncing.
1733
1734         @param msgstr0: the text of C{msgstr[0]} field
1735         @type msgstr0: string
1736         @param wobs: whether to include obsolete messages in selection
1737         @type wobs: bool
1738         @param lazy: whether to assume msgstr are not modified between syncings
1739         @type lazy: bool
1740
1741         @returns: selected messages
1742         @rtype: [L{Message_base}*]
1743         """
1744
1745         if not lazy:
1746             selected_msgs = {}
1747             for msg in self._messages:
1748                 if (wobs or not msg.obsolete) and msg.msgstr[0] == msgstr0:
1749                     selected_msgs.append(msg)
1750         else:
1751             if self._invmap is None:
1752                 self._make_invmap()
1753             selected_msgs = self._invmap.get(msgstr0, [])
1754             if not wobs:
1755                 selected_msgs = [x for x in selected_msgs if not x.obsolete]
1756
1757         return selected_msgs
1758
1759
1760     def encoding (self):
1761         """
1762         Report encoding used when syncing the catalog.
1763
1764         Encoding is determined from C{Content-Type} header field.
1765
1766         It is not defined when the header will be examined,
1767         or if it will be reexamined when it changes.
1768         If you want to set encoding after the catalog has been
1769         opened, use L{set_encoding}.
1770
1771         @returns: the encoding name
1772         @rtype: string
1773         """
1774
1775         return self._encoding
1776
1777
1778     def set_encoding (self, encoding):
1779         """
1780         Set encoding used when syncing the catalog.
1781
1782         Encoding set by this method will later be readable by
1783         the L{encoding} method.
1784         This will also modify the catalog header C{Content-Type} field.
1785
1786         @param encoding: the encoding name
1787         @type encoding: string
1788         """
1789
1790         self._encoding = encoding
1791
1792         ctval = "text/plain; charset=%s" % encoding
1793         self.header.set_field("Content-Type", ctval)
1794
1795
1796     def accelerator (self):
1797         """
1798         Report characters used as accelerator markers in GUI messages.
1799
1800         Accelerator characters are determined by looking for certain
1801         header fields, in this order: C{Accelerator-Marker},
1802         C{X-Accelerator-Marker}.
1803         In each field, several accelerator markers can be stated as
1804         comma-separated list, or there may be several fields;
1805         the union of all parsed markers is reported.
1806
1807         If empty set is returned, it was determined that there are
1808         no accelerator markers in the catalog;
1809         if C{None}, that there is no determination about markers.
1810
1811         It is not defined when the header will be examined,
1812         or if it will be reexamined when it changes.
1813         If you want to set accelerator markers after the catalog has been
1814         opened, use L{set_accelerator}.
1815
1816         @returns: accelerator markers
1817         @rtype: set(string*) or C{None}
1818         """
1819
1820         if self._accels_determined:
1821             return self._accels
1822
1823         accels = None
1824         self._accels_determined = True
1825
1826         for fname in (
1827             "Accelerator-Marker",
1828             "X-Accelerator-Marker",
1829         ):
1830             fields = self._header.select_fields(fname)
1831             for fname, fval in fields:
1832                 if accels is None:
1833                     accels = set()
1834                 accels.update([x.strip() for x in fval.split(",")])
1835         if accels:
1836             accels.discard("")
1837
1838         self._accels = accels
1839         return accels
1840
1841
1842     def set_accelerator (self, accels):
1843         """
1844         Set accelerator markers that can be expected in messages.
1845
1846         Accelerator markers set by this method will later be readable by
1847         the L{accelerator} method. This will not modify the catalog header
1848         in any way; if that is desired, it must be done manually by
1849         manipulating the header fields.
1850
1851         If C{accels} is given as C{None}, it means the accelerator markers
1852         are undetermined; if empty, that there are no markers in messages.
1853
1854         @param accels: accelerator markers
1855         @type accels: sequence of strings or C{None}
1856         """
1857
1858         if accels is not None:
1859             self._accels = set(accels)
1860             self._accels.discard("")
1861         else:
1862             self._accels = None
1863         self._accels_determined = True
1864
1865
1866     def markup (self):
1867         """
1868         Report what types of markup can be expected in messages.
1869
1870         Markup types are determined by looking for some header fields,
1871         which state markup types as short symbolic names,
1872         e.g. "html", "docbook", "mediawiki", etc.
1873         The header fields are tried in this order: C{Text-Markup},
1874         C{X-Text-Markup}.
1875         In each field, several markup types can be stated as
1876         comma-separated list.
1877         If there are several fields, it is undefined from which one
1878         markup names are collected.
1879         Markup names are always reported in lower-case, regardless
1880         of the original casing used in the header.
1881         See L{set_markup} for list of markup types currently observed
1882         by various Pology modules to influence processing behavior.
1883
1884         If empty set is returned, it was determined that there is
1885         no markup in the catalog;
1886         if C{None}, that there is no determination about markup.
1887
1888         It is not defined when the header will be examined,
1889         or if it will be reexamined when it changes.
1890         If you want to set markup types after the catalog has been
1891         opened, use L{set_markup} method.
1892
1893         @returns: markup names
1894         @rtype: set(string*) or C{None}
1895         """
1896
1897         if self._mtypes_determined:
1898             return self._mtypes
1899
1900         mtypes = None
1901         self._mtypes_determined = True
1902
1903         for fname in (
1904             "Text-Markup",
1905             "X-Text-Markup",
1906         ):
1907             fval = self._header.get_field_value(fname)
1908             if fval is not None:
1909                 mtypes = set([x.strip().lower() for x in fval.split(",")])
1910                 mtypes.discard("")
1911
1912         self._mtypes = mtypes
1913         return mtypes
1914
1915
1916     def set_markup (self, mtypes):
1917         """
1918         Set markup types that can be expected in messages.
1919
1920         Markup types set by this method will later be readable by
1921         the L{markup} method. This will not modify the catalog header
1922         in any way; if that is desired, it must be done manually by
1923         manipulating the header fields.
1924
1925         If C{mtypes} is given as C{None}, it means the markup types
1926         are undetermined; if empty, that there is no markup in messages.
1927
1928         The following markup types are currently used by various parts
1929         of Pology to influence behavior on processing:
1930           - C{html}: HTML 4.01
1931           - C{qtrich}: Qt rich-text, (almost) a subset of HTML
1932           - C{kuit}: UI semantic markup in KDE4
1933           - C{kde4}: markup in KDE4 UI POs, a mix of Qt rich-text and KUIT
1934           - C{docbook4}: Docbook 4.x markup, in documentation POs
1935           - C{xmlents}: only XML-like entities, no other formal markup
1936
1937         @param mtypes: markup types
1938         @type mtypes: sequence of strings or C{None}
1939         """
1940
1941         if mtypes is not None:
1942             self._mtypes = set([x.lower() for x in mtypes])
1943         else:
1944             self._mtypes = None
1945         self._mtypes_determined = True
1946
1947
1948     def language (self):
1949         """
1950         Report language of the translation.
1951
1952         Language is determined by looking for the C{Language} header field.
1953         If this field is present, it should contain the language code
1954         in line with GNU C library locales, e.g. C{pt} for Portuguese,
1955         or C{pt_BR} for Brazilian Portuguese.
1956         If the field is not present, language is considered undetermined,
1957         and C{None} is returned.
1958
1959         It is not defined when the header will be examined,
1960         or if it will be reexamined when it changes (most probably not).
1961         If you want to set language after the catalog has been
1962         opened, use L{set_language} method.
1963
1964         @returns: language code
1965         @rtype: string or C{None}
1966         """
1967
1968         if self._lang_determined:
1969             return self._lang
1970
1971         lang = None
1972         self._lang_determined = True
1973
1974         fval = self._header.get_field_value("Language")
1975         if fval:
1976             lang = fval.strip()
1977
1978         self._lang = lang
1979         return lang
1980
1981
1982     def set_language (self, lang):
1983         """
1984         Set language of the translation.
1985
1986         Language set by this method will later be readable by
1987         the L{language} method. This will not modify the catalog header
1988         in any way; if that is desired, it must be done manually by
1989         manipulating the header fields.
1990
1991         If C{lang} is given as C{None}, it means the language is undetermined.
1992         If it is given as empty string, it means the language is deliberately
1993         considered unknown.
1994
1995         @param lang: language code
1996         @type lang: string or C{None}
1997         """
1998
1999         if lang is not None:
2000             self._lang = str(lang)
2001         else:
2002             self._lang = None
2003         self._lang_determined = True
2004
2005
2006     def environment (self):
2007         """
2008         Report environments which the catalog is part of.
2009
2010         Sometimes the language alone is not enough to determine all
2011         the non-technical aspects of translation.
2012         For example, in a given language but different translation domains,
2013         one translator may decide to use one of the two synonyms naming a
2014         concept, and the other translator the other synonym.
2015         I{Environments} are a way to specify such sets of choices,
2016         so that automatic tools (e.g. terminology checker) can
2017         detect how to process a given catalog.
2018
2019         An environment can represent anything.
2020         It may be a single translator, who applies own set of choices
2021         to all the catalogs under own maintenance;
2022         it may be a translation project, with many cooperating translators;
2023         and so on.
2024         Each environment is named by an alphanumeric keyword
2025         (such as normalized project name, translator's name, etc.),
2026         and should be unique within a given language.
2027
2028         Environments are read from one of the following header fieldsE{:}
2029         C{Environment}, C{X-Environment}.
2030         The value the field should be comma-separated list of
2031         environment keywords.
2032         If there are several environment fields, it is undefined
2033         from which the environments are read.
2034
2035         If more than one environment is stated, then wherever the conventions
2036         of two environments conflict, the environment mentioned later
2037         in the list should take precedence.
2038         For example, environment list such as C{"footp, jdoe"}
2039         would mean to apply conventions of FOO translation project,
2040         ammended by that of translator Johnas Doemann.
2041
2042         It there is no environment header field, C{None} is reported.
2043         Empty list is reported if such field exists, but its value is empty.
2044
2045         It is not defined when the header will be examined,
2046         or if it will be reexamined when it changes (most probably not).
2047         if you want to set environments after the catalog has been
2048         opened, use L{set_environment} method.
2049
2050         @returns: environment keywords
2051         @rtype: [string*] or C{None}
2052         """
2053
2054         if self._envs_determined:
2055             return self._envs
2056
2057         envs = None
2058         self._envs_determined = True
2059
2060         for fname in (
2061             "Environment",
2062             "X-Environment",
2063         ):
2064             fval = self._header.get_field_value(fname)
2065             if fval is not None:
2066                 envs = [x.strip().lower() for x in fval.split(",")]
2067                 while "" in envs:
2068                     envs.remove("")
2069                 break
2070
2071         self._envs = envs
2072         return envs
2073
2074
2075     def set_environment (self, envs):
2076         """
2077         Set environments which the catalog is part of.
2078
2079         Environments set by this method will later be readable by
2080         the L{environment} method. This will not modify the catalog header
2081         in any way; if that is desired, it must be done manually by
2082         manipulating the header fields.
2083
2084         If C{envs} is given as C{None}, it means that the environments
2085         are undetermined; if empty, the catalog belongs to no environment.
2086
2087         @param envs: environment keywords
2088         @type envs: sequence of strings or C{None}
2089         """
2090
2091         if envs is not None:
2092             self._envs = set([x.lower() for x in envs])
2093         else:
2094             self._envs = None
2095         self._envs_determined = True
2096
2097
2098     def wrapping (self):
2099         """
2100         Report wrapping policy for message fields.
2101
2102         Long text fields in messages (C{msgid}, C{msgstr}, etc.) may
2103         be wrapped in different ways, as wrapping does not influence
2104         their semantics.
2105         (This is unlike translator and extracted comments, which are
2106         never wrapped, because division into lines may be significant.)
2107         PO processing tools will typically offer wrapping options,
2108         but it may be more convenient to have wrapping policy
2109         bound to the catalog, which tools respect unless overridden.
2110
2111         The following header fields are checked for wrapping policy,
2112         in given order: C{Wrapping}, C{X-Wrapping}.
2113         Wrapping policy (i.e. value of these header fields) is
2114         an unordered comma-separated list of wrapping keywords.
2115         See L{select_field_wrapper<wrap.select_field_wrapper>}
2116         for possible keywords.
2117         If no wrapping policy field is found in the header,
2118         C{None} is returned.
2119         If several wrapping policy fields are present,
2120         it is undefined which one is taken into account.
2121
2122         It is not defined when the header will be examined,
2123         or if it will be reexamined when it changes (most probably not).
2124         If you want to set wrapping after the catalog has been
2125         opened, use L{set_wrapping} method.
2126
2127         @returns: wrapping keywords
2128         @rtype: (string...) or C{None}
2129         """
2130
2131         if self._wrap_determined:
2132             return self._wrapkw
2133
2134         wrapkw = None
2135         self._wrap_determined = True
2136
2137         for fname in (
2138             "Wrapping",
2139             "X-Wrapping",
2140         ):
2141             fval = self._header.get_field_value(fname)
2142             if fval is not None:
2143                 wrapkw = [x.strip().lower() for x in fval.split(",")]
2144                 wrapkw = tuple(sorted(wrapkw))
2145                 break
2146
2147         self._wrapkw = wrapkw
2148         self._wrapf = select_field_wrapper(wrapkw)
2149
2150         return self._wrapkw
2151
2152
2153     def set_wrapping (self, wrapkw):
2154         """
2155         Set wrapping policy for message fields.
2156
2157         Wrapping policy set by this method will later be readable by
2158         the L{wrapping} method. This will not modify the catalog header
2159         in any way; if that is desired, it must be done manually by
2160         manipulating the header fields.
2161
2162         Wrapping policy is a sequence of keywords.
2163         See L{select_field_wrapper<wrap.select_field_wrapper>}
2164         for possible keywords.
2165         If C{None} is given instead, it is passed directly to
2166         L{select_field_wrapper<wrap.select_field_wrapper>},
2167         which will construct default wrapper.
2168
2169         @param wrapkw: wrapping policy
2170         @type wrapkw: [string...] or C{None}
2171         """
2172
2173         self._wrapkw = tuple(sorted(wrapkw)) if wrapkw is not None else None
2174         self._wrapf = select_field_wrapper(wrapkw)
2175         self._wrap_determined = True
2176
2177
2178     def wrapf (self):
2179         """
2180         Get wrapping function used for message fields.
2181
2182         Wrapping function is determined based on wrapping policy
2183         (see L{wrapping}, L{set_wrapping}).
2184         Wrapping function returned by this method is suitable as
2185         C{wrapf} parameter in methods of C{Message} objects.
2186
2187         @returns: wrapping function
2188         @rtype: (string, string, string?)->[string]
2189
2190         @see: L{wrap_field<wrap.wrap_field>}
2191         """
2192
2193         self.wrapping()
2194         return self._wrapf
2195
2196
2197     def messages_by_source (self):
2198         """
2199         Get messages grouped as lists by source.
2200
2201         All messages sharing the same primary source file
2202         (their first source reference) are grouped
2203         and filed under that source file path.
2204         Grouping is represented by list of tuples of
2205         (source, list of messages), with both sources and
2206         messages within partial lists ordered by appearance.
2207
2208         @return: messages grouped by sources
2209         @rtype: [(string, [L{Message_base}])]
2210         """
2211
2212         msgs_by_src = {}
2213         sources = []
2214         for msg in self._messages:
2215             src = msg.source and msg.source[0][0] or ""
2216             if src not in msgs_by_src:
2217                 msgs_by_src[src] = []
2218                 sources.append(src)
2219             msgs_by_src[src].append(msg)
2220
2221         return [(x, msgs_by_src[x]) for x in sources]
2222
2223
2224     def sort_by_source (self):
2225         """
2226         Sort messages in catalog by source references.
2227
2228         Source references within each message are sorted too,
2229         before messages are sorted by source references.
2230
2231         If any message changed its position due to sorting,
2232         L{sync_map} is called at the end.
2233         """
2234
2235         # Sort source references within messages.
2236         for msg in self._messages:
2237             sorted_source = sorted(msg.source,
2238                                    key=lambda s: (s[0].lower(), s[1]))
2239             if self._monitored:
2240                 msg.source = Monlist(list(map(Monpair, sorted_source)))
2241             else:
2242                 msg.source = sorted_source
2243
2244         sorted_messages = sorted(self._messages,
2245                                  key=lambda m: [(s[0].lower(), s[1])
2246                                                 for s in m.source[:1]])
2247
2248         any_moved = False
2249         for i in range(len(self._messages)):
2250             if sorted_messages[i] is not self._messages[i]:
2251                 any_moved = True
2252                 break
2253         if any_moved:
2254             self._messages = sorted_messages
2255             self.sync_map()
2256
2257
2258     def update_header (self, project=None, title=None,
2259                        copyright=None, license=None,
2260                        name=None, email=None, teamemail=None,
2261                        langname=None, langcode=None,
2262                        encoding=None, ctenc=None,
2263                        plforms=None, poeditor=None):
2264         """
2265         Update catalog header.
2266
2267         If a piece of information is not given (i.e. C{None}),
2268         the corresponding header field is left unmodified.
2269         If it is given as empty string, the corresponding header field
2270         is removed.
2271         PO revision date is updated always, to current date.
2272
2273         Some fields (as noted in parameter descriptions) are expanded
2274         on variables by applying the
2275         L{expand_vars<pology.resolve.expand_vars>} function.
2276         For example::
2277
2278             title="Translation of %project into %langname."
2279
2280         The following variables are available:
2281           - C{%basename}: PO file base name
2282           - C{%poname}: PO file base name without .po extension
2283           - C{%project}: value of C{project} parameter (if not C{None}/empty)
2284           - C{%langname}: value of C{langname} parameter (if not C{None}/empty)
2285           - C{%langcode}: value of C{langcode} parameter (if not C{None}/empty)
2286
2287         @param project: project name
2288         @type project: string
2289         @param title: translation title (expanded on variables)
2290         @type title: string
2291         @param copyright: copyright notice (expanded on variables)
2292         @type copyright: string
2293         @param license: license notice (expanded on variables)
2294         @type license: string
2295         @param name: translator's name
2296         @type name: string
2297         @param email: translator's email address
2298         @type email: string
2299         @param teamemail: language team's email address
2300         @type teamemail: string
2301         @param langname: full language name
2302         @type langname: string
2303         @param langcode: language code
2304         @type langcode: string
2305         @param encoding: text encoding
2306         @type encoding: string
2307         @param ctenc: content transfer encoding
2308         @type ctenc: string
2309         @param plforms: plural forms expression
2310         @type plforms: string
2311         @param poeditor: translator's PO editor
2312         @type poeditor: string
2313
2314         @returns: reference to header
2315         """
2316
2317         varmap = {}
2318         varmap["basename"] = os.path.basename(self.filename)
2319         varmap["poname"] = self.name
2320         if project:
2321             varmap["project"] = project
2322         if langname:
2323             varmap["langname"] = langname
2324         if langcode:
2325             varmap["langcode"] = langcode
2326         varhead="%"
2327
2328         hdr = self.header
2329
2330         if title:
2331             title = expand_vars(title, varmap, varhead)
2332             hdr.title[:] = [str(title)]
2333         elif title == "":
2334             hdr.title[:] = []
2335
2336         if copyright:
2337             copyright = expand_vars(copyright, varmap, varhead)
2338             hdr.copyright = str(copyright)
2339         elif copyright == "":
2340             hdr.copyright = None
2341
2342         if license:
2343             license = expand_vars(license, varmap, varhead)
2344             hdr.license = str(license)
2345         elif license == "":
2346             hdr.license = None
2347
2348         if project:
2349             hdr.set_field("Project-Id-Version", str(project))
2350         elif project == "":
2351             hdr.remove_field("Project-Id-Version")
2352
2353         hdr.set_field("PO-Revision-Date", format_datetime())
2354
2355         if name or email:
2356             if name and email:
2357                 tr_ident = "%s <%s>" % (name, email)
2358             elif name:
2359                 tr_ident = "%s" % name
2360             else:
2361                 tr_ident = "<%s>" % email
2362
2363             # Remove author placeholder.
2364             for i in range(len(hdr.author)):
2365                 if "FIRST AUTHOR" in hdr.author[i]:
2366                     hdr.author.pop(i)
2367                     break
2368
2369             # Look for current author in the comments,
2370             # to update only years if present.
2371             cyear = time.strftime("%Y")
2372             acfmt = "%s, %s."
2373             new_author = True
2374             for i in range(len(hdr.author)):
2375                 if tr_ident in hdr.author[i]:
2376                     # Parse the current list of years.
2377                     years = re.findall(r"\b(\d{2,4})\s*[,.]", hdr.author[i])
2378                     if cyear not in years:
2379                         years.append(cyear)
2380                     years.sort()
2381                     hdr.author[i] = acfmt % (tr_ident, ", ".join(years))
2382                     new_author = False
2383                     break
2384             if new_author:
2385                 hdr.author.append(acfmt % (tr_ident, cyear))
2386
2387             hdr.set_field("Last-Translator", str(tr_ident))
2388
2389         elif name == "" or email == "":
2390             hdr.remove_field("Last-Translator")
2391
2392         if langname:
2393             tm_ident = None
2394             if langname and teamemail:
2395                 tm_ident = "%s <%s>" % (langname, teamemail)
2396             elif langname:
2397                 tm_ident = langname
2398             hdr.set_field("Language-Team", str(tm_ident))
2399         elif langname == "":
2400             hdr.remove_field("Language-Team")
2401
2402         if langcode:
2403             hdr.set_field("Language", str(langcode), after="Language-Team")
2404         elif langcode == "":
2405             hdr.remove_field("Language")
2406
2407         if encoding:
2408             ctval = "text/plain; charset=%s" % encoding
2409             hdr.set_field("Content-Type", ctval)
2410         elif encoding == "":
2411             hdr.remove_field("Content-Type")
2412
2413         if ctenc:
2414             hdr.set_field("Content-Transfer-Encoding", str(ctenc))
2415         elif ctenc == "":
2416             hdr.remove_field("Content-Transfer-Encoding")
2417
2418         if plforms:
2419             hdr.set_field("Plural-Forms", str(plforms))
2420         elif plforms == "":
2421             hdr.remove_field("Plural-Forms")
2422
2423         if poeditor:
2424             hdr.set_field("X-Generator", str(poeditor))
2425         elif poeditor == "":
2426             hdr.remove_field("X-Generator")
2427
2428         return hdr
2429
2430
2431     def detect_renamed_sources (self, cat, minshare=0.7):
2432         """
2433         Heuristically determine possible renamings of source files
2434         from this catalog based on source files in the other catalog.
2435
2436         To determine the possibility that the source file A from this catalog
2437         has been renamed into source file B in the other catalog C{cat},
2438         primarily the share of common messages to A and B is considered.
2439         The minimum needed commonality can be given by C{minshare} parameter.
2440
2441         When a source file from this catalog is directly mentioned in
2442         the other catalog, it is immediatelly considered to have
2443         no possible renamings.
2444
2445         The return value is a dictionary in which the key is
2446         the source file and the value is the list of its possible
2447         renamed counterparts.
2448         The renaming list is never empty, i.e. if no renamings
2449         were detected for a given source file, that source file
2450         will not be present in the dictionary.
2451         The dictionary is fully symmetric: if source file B is in
2452         the renaming list of file A, then there will be
2453         an entry for file B with A in its renaming list
2454         (even when B is comming from the other catalog).
2455
2456         Instead of a single other catalog to test against,
2457         a sequence of several other catalogs can be given.
2458
2459         @param cat: catalog against which to test for renamings
2460         @type cat: Catalog or [Catalog*]
2461         @param minshare: the minimum commonality between two source files
2462             to consider them as possible renaming pair (0.0-1.0)
2463         @type minshare: float
2464
2465         @returns: the renaming dictionary
2466         @rtype: {string: [string*]*}
2467         """
2468
2469         renamings = {}
2470
2471         # Collect all own sources, to avoid matching for them.
2472         ownfs = set()
2473         for msg in self._messages:
2474             for src, lno in msg.source:
2475                 ownfs.add(src)
2476
2477         if isinstance(cat, Catalog):
2478             cats = [cat]
2479         else:
2480             cats = cat
2481
2482         for ocat in cats:
2483             if self is ocat:
2484                 continue
2485
2486             fcnts = {}
2487             ccnts = {}
2488             for msg in self._messages:
2489                 omsg = ocat.get(msg)
2490                 if omsg is None:
2491                     continue
2492                 for src, lno in msg.source:
2493                     if src not in fcnts:
2494                         fcnts[src] = 0.0
2495                         ccnts[src] = {}
2496                     # Weigh each message disproportionally to the number of
2497                     # files it appears in (i.e. the sum of counts == 1).
2498                     fcnts[src] += 1.0 / len(msg.source)
2499                     counted = {}
2500                     for osrc, olno in omsg.source:
2501                         if osrc not in ownfs and osrc not in counted:
2502                             if osrc not in ccnts[src]:
2503                                 ccnts[src][osrc] = 0.0
2504                             ccnts[src][osrc] += 1.0 / len(omsg.source)
2505                             counted[osrc] = True
2506
2507             # Select match groups.
2508             fuzzies = {}
2509             for src, fcnt in sorted(fcnts.items()):
2510                 shares = []
2511                 for osrc, ccnt in sorted(ccnts[src].items()):
2512                     share = ccnt / (fcnt + 1.0) # tip a bit to avoid fcnt of 0.x
2513                     if share >= minshare:
2514                         shares.append((osrc, share))
2515                 if shares:
2516                     shares.sort(key=lambda x: x[1]) # not necessary atm
2517                     fuzzies[src] = [f for f, s in shares]
2518
2519             # Update the dictionary of renamings.
2520             for src, fuzzsrcs in sorted(fuzzies.items()):
2521                 group = [src] + fuzzsrcs
2522                 for src in group:
2523                     if src not in renamings:
2524                         renamings[src] = []
2525                     for osrc in group:
2526                         if src != osrc and osrc not in renamings[src]:
2527                             renamings[src].append(osrc)
2528                     if not renamings[src]:
2529                         renamings.pop(src)
2530
2531         return renamings
2532