pology/pology/header.py

0001 # -*- coding: UTF-8 -*-
0002
0003 """
0004 Header entry in PO catalogs.
0005
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009
0010 from pology import PologyError
0011 from pology.wrap import wrap_field
0012 from pology.monitored import Monitored, Monlist, Monpair
0013 from .message import Message
0014
0015 import datetime
0016 import time
0017 import re
0018
0019 _Header_spec = {
0020     "title" : {"type" : Monlist,
0021                "spec" : {"*" : {"type" : str}}},
0022     "copyright" : {"type" : (str, type(None))},
0023     "license" : {"type" : (str, type(None))},
0024     "author" : {"type" : Monlist,
0025                 "spec" : {"*" : {"type" : str}}},
0026     "comment" : {"type" : Monlist,
0027                  "spec" : {"*" : {"type" : str}}},
0028     "field" : {"type" : Monlist,
0029                "spec" : {"*" : {"type" : Monpair,
0030                                 "spec" : {"first" : {"type" : str},
0031                                           "second" : {"type" : str}}}}},
0032     "initialized" : {"type" : bool, "derived" : True},
0033     # Dummies for summary iteration in catalog:
0034     "obsolete" : {"type" : bool, "derived" : True},
0035     "key" : {"type" : bool, "derived" : True},
0036 }
0037
0038 class Header (Monitored):
0039     """
0040     Header entry in PO catalogs.
0041
0042     The PO header is syntactically just another entry in the catalog,
0043     but with different semantics. Therefore, instead operating on it using
0044     L{Message}, this class provides a different set of interface attributes
0045     and methods.
0046
0047     Like L{Message}, this class implements monitoring; the starred-types
0048     (e.g. C{list*}) are according to the same convention as for messages,
0049     and also the strings are assumed unicode unless otherwise noted.
0050
0051     There is no lightweight alternative to the monitored header, like that of
0052     L{MessageUnsafe} for messages, because no performance demand is expected
0053     for the headers only.
0054
0055     @ivar title: comment lines giving the title
0056     @type title: list* of strings
0057
0058     @ivar copyright: comment line with the copyright statement
0059     @type copyright: string
0060
0061     @ivar license: comment line with the license statement
0062     @type license: string
0063
0064     @ivar author: comment lines stating translators who worked on this catalog
0065     @type author: list* of strings
0066
0067     @ivar comment: the free comment lines, being none of the specific ones
0068     @type comment: list* of strings
0069
0070     @ivar field: parsed header fields as key-value string pairs
0071     @type field: list* of pairs*
0072
0073     @ivar initialized: (read-only) whether the header is fully initialized
0074     @type initialized: bool
0075
0076     @see: L{Message}
0077     """
0078
0079     def __init__ (self, init=None):
0080         """
0081         Initializes the header by the given message or header.
0082
0083         @param init: the PO entry containing the header, or another header
0084         @type init: subclass of L{Message_base}, or L{Header}
0085         """
0086
0087         if isinstance(init, Header): # copy header fields
0088             hdr = init
0089             self._title = Monlist(hdr._title)
0090             self._copyright = hdr._copyright
0091             self._license = hdr._license
0092             self._author = Monlist(hdr._author)
0093             self._comment = Monlist(hdr._comment)
0094             self._field = Monlist(list(map(Monpair, hdr._field)))
0095
0096             # Create the message.
0097             self._message = hdr.to_msg()
0098
0099         elif init: # parse header message
0100             msg = init
0101             # Comments.
0102             self._title = Monlist()
0103             self._copyright = ""
0104             self._license = ""
0105             self._author = Monlist()
0106             self._comment = Monlist()
0107             intitle = True
0108             for c in msg.manual_comment:
0109                 if 0: pass
0110                 elif (    not self._copyright
0111                       and re.search(r"copyright|\(C\)|©", c, re.I|re.U)
0112                 ):
0113                     self._copyright = c
0114                     intitle = False
0115                 elif (    not self._license
0116                       and (    re.search("license", c, re.I)
0117                            and not re.search("^translation *of.* to", c, re.I))
0118                 ):
0119                     self._license = c
0120                     intitle = False
0121                 elif re.search("<.*@.*>", c):
0122                     self._author.append(c)
0123                     intitle = False
0124                 elif intitle:
0125                     self._title.append(c)
0126                 else:
0127                     self._comment.append(c)
0128
0129             # Header fields.
0130             self._field = Monlist()
0131             for field in msg.msgstr[0].split("\n"):
0132                 m = re.match(r"(.*?): ?(.*)", field)
0133                 if m: self._field.append(Monpair(m.groups()))
0134
0135             # Copy the message.
0136             self._message = Message(msg)
0137
0138         else: # create default fields
0139             self._title = Monlist(["SOME DESCRIPTIVE TITLE."]);
0140             self._copyright = "Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER"
0141             self._license = "This file is distributed under the same license as the PACKAGE package."
0142             self._author = Monlist(["FIRST AUTHOR <EMAIL@ADDRESS>, YEAR."])
0143             self._comment = Monlist([""])
0144
0145             self._field = Monlist([
0146                 Monpair(("Project-Id-Version", "PACKAGE VERSION")),
0147                 Monpair(("Report-Msgid-Bugs-To", "")),
0148                 Monpair(("POT-Creation-Date", format_datetime())),
0149                 Monpair(("PO-Revision-Date", "YEAR-MO-DA HO:MI+ZONE")),
0150                 Monpair(("Last-Translator", "FULL NAME <EMAIL@ADDRESS>")),
0151                 Monpair(("Language-Team", "LANGUAGE <LL@li.org>")),
0152                 Monpair(("Language", "")),
0153                 Monpair(("MIME-Version", "1.0")),
0154                 Monpair(("Content-Type", "text/plain; charset=CHARSET")),
0155                 Monpair(("Content-Transfer-Encoding", "8bit")),
0156                 Monpair(("Plural-Forms", "nplurals=INTEGER; plural=EXPRESSION;")),
0157             ])
0158
0159             # Create the message.
0160             self._message = Message()
0161             self._remake_msg(force=True)
0162
0163         self.assert_spec_init(_Header_spec)
0164
0165         # Unmodify all monitored members.
0166         self.modcount = 0
0167
0168
0169     def __getattr__ (self, att):
0170         """
0171         Attribute getter.
0172
0173         Processes read-only attributes, and sends others to the base class.
0174
0175         @param att: name of the attribute to get
0176         @returns: attribute value
0177         """
0178
0179         if att == "obsolete":
0180             return False
0181         elif att == "key":
0182             return Message().key # key of an empty-msgid message
0183         elif att == "initialized":
0184             # Check if all necessary fields have been initialized.
0185             gfv = self.get_field_value
0186             return not (False
0187                or "PACKAGE VERSION" in gfv("Project-Id-Version", "")
0188                or "YEAR-MO-DA" in gfv("PO-Revision-Date", "")
0189                or "FULL NAME" in gfv("Last-Translator", "")
0190                or "LANGUAGE" in gfv("Language-Team", "")
0191                or "CHARSET" in gfv("Content-Type", "")
0192                or "ENCODING" in gfv("Content-Transfer-Encoding", "")
0193                or "INTEGER" in gfv("Plural-Forms", "")
0194                or "EXPRESSION" in gfv("Plural-Forms", "")
0195             )
0196         else:
0197             return Monitored.__getattr__(self, att)
0198
0199
0200     def get (self, att, default=None):
0201         """
0202         Get attribute value.
0203
0204         Allows accessing the header like a dictionary.
0205
0206         @param att: name of the attribute to get
0207         @type att: string
0208         @param default: value to return if the attribute does not exist
0209
0210         @returns: value of the attribute or the default value
0211         """
0212
0213         if hasattr(self, att):
0214             return getattr(self, att)
0215         else:
0216             return default
0217
0218
0219     def _remake_msg (self, force=False):
0220
0221         m = self._message
0222
0223         if (force
0224             or self.title_modcount or self.title.modcount
0225             or self.copyright_modcount
0226             or self.license_modcount
0227             or self.author_modcount or self.author.modcount
0228             or self.comment_modcount or self.comment.modcount
0229         ):
0230             m.manual_comment = Monlist()
0231             for t in self.title:
0232                 m.manual_comment.append(t)
0233             if self.copyright:
0234                 m.manual_comment.append(self.copyright)
0235             if self.license:
0236                 m.manual_comment.append(self.license)
0237             for a in self.author:
0238                 m.manual_comment.append(a)
0239             for c in self.comment:
0240                 m.manual_comment.append(c)
0241
0242         if force or self.field_modcount or self.field.modcount:
0243             m.msgstr = Monlist([""])
0244             for field in self.field:
0245                 m.msgstr[0] += "%s: %s\n" % tuple(field)
0246
0247         if force or self.modcount:
0248             m.fuzzy = not self.initialized
0249
0250
0251     def __eq__ (self, ohdr):
0252         """
0253         Reports wheter headers are equal in all apparent parts.
0254
0255         "Apparent" parts include all those which are visible in the PO file.
0256         I.e. the check will ignore internal states, like line caches, etc.
0257
0258         @returns: C{True} if headers are equal in apparent parts
0259         @rtype: bool
0260         """
0261
0262         return self.to_msg() == ohdr.to_msg()
0263
0264
0265     def __ne__ (self, ohdr):
0266         """
0267         Reports wheter headers are not equal in some apparent parts.
0268
0269         Equivalent to C{not (self == ohdr)}.
0270
0271         @returns: C{False} if headers are equal in all apparent parts
0272         @rtype: bool
0273         """
0274
0275         return not self.__eq__(ohdr)
0276
0277
0278     def to_msg (self, force=False):
0279         """
0280         Convert the header into ordinary message object.
0281
0282         The message object returned may be the modification of the one
0283         passed to the constructor. In that case, and if the message object
0284         has monitoring features, the force parameter will tell whether to
0285         modify all message elements, or to try to keep the changes minimal.
0286
0287         @param force: whether to recreate all message elements
0288         @type force: bool
0289
0290         @returns: header as message
0291         @rtype: the type that initialized the object
0292         """
0293
0294         self._remake_msg(force)
0295         return self._message
0296
0297
0298     def to_lines (self, wrapf=wrap_field, force=False, colorize=0):
0299         """
0300         The line-representation of the header.
0301
0302         Equivalent to the same-named method of message classes.
0303
0304         @see: L{Message_base}
0305         """
0306
0307         return self.to_msg(force).to_lines(wrapf, force, colorize)
0308
0309
0310     def to_string (self, wrapf=wrap_field, force=False, colorize=0):
0311         """
0312         The string-representation of the header.
0313
0314         Equivalent to the same-named method of message classes.
0315
0316         @see: L{Message_base}
0317         """
0318
0319         return self.to_msg(force).to_string(wrapf, force, colorize)
0320
0321
0322     def select_fields (self, name):
0323         """
0324         Find header fields with the given name.
0325
0326         Header fields need not be unique.
0327
0328         @param name: look for the fields with this name
0329         @type name: string
0330
0331         @returns: references to name-value pairs matching the field name
0332         @rtype: list of pairs*
0333         """
0334
0335         fields = []
0336         for pair in self.field:
0337             if pair.first == name:
0338                 fields.append(pair)
0339         return fields
0340
0341
0342     def get_field_value (self, name, default=None):
0343         """
0344         Get the value of the given header field.
0345
0346         If there are several fields with the same name, it is undefined which
0347         of them will supply the value; this method should be used only
0348         for fields which are expected to be unique.
0349         If there are no fields named as requested, C{default} is returned.
0350
0351         @param name: field name
0352         @type name: string
0353         @param default: value returned if there is no such field
0354         @type default: as given
0355
0356         @returns: field value
0357         @rtype: string or C{default}
0358         """
0359
0360         for pair in self.field:
0361             if pair.first == name:
0362                 return pair.second
0363         return default
0364
0365
0366     def replace_field_value (self, name, new_value, nth=0):
0367         """
0368         Replace the value of the n-th occurence of the named header field.
0369
0370         Header fields need not be unique, hence the n-th qualification.
0371
0372         @param name: name of the header field
0373         @type name: string
0374
0375         @param new_value: new value for the field
0376         @type new_value: string
0377
0378         @param nth: replace the value of this field among same-named fields
0379         @type nth: int
0380
0381         @returns: True if the requested field was found, False otherwise
0382         @rtype: bool
0383         """
0384
0385         nfound = 0
0386         for i in range(len(self._field)):
0387             if self.field[i][0] == name:
0388                 nfound += 1
0389                 if nfound - 1 == nth:
0390                     self.field[i] = Monpair((str(name), new_value))
0391                     break
0392
0393         return nfound - 1 == nth
0394
0395
0396     def set_field (self, name, value, after=None, before=None, reorder=False):
0397         """
0398         Set a header field to a value.
0399
0400         If the field already exists, its value is replaced with the given one.
0401         If there are several same-named fields, it is undefined which one
0402         and how many of them are going to have their values replaced;
0403         this method should be used only for fields expected to be unique.
0404         If there is no such field yet, it is inserted into the header;
0405         after the field C{after} or before the field C{before} if given
0406         and existing, or appended to the end otherwise.
0407         If the field already exists, but not in the position according to
0408         C{after} or C{before}, reordering can be requested too.
0409
0410         @param name: name of the header field
0411         @type name: unicode
0412
0413         @param value: new value for the field
0414         @type value: unicode
0415
0416         @param after: the field to insert after
0417         @type after: string
0418
0419         @param before: the field to insert before
0420         @type before: string
0421
0422         @param reorder: whether to move an existing field into better position
0423         @type reorder: bool
0424
0425         @returns: position where the field was modified or inserted
0426         @rtype: int
0427         """
0428
0429         ins_pos = -1
0430         rpl_pos = -1
0431         for i in range(len(self._field)):
0432             if self.field[i][0] == name:
0433                 rpl_pos = i
0434                 if not reorder:
0435                     break
0436             if (   (after and i > 0 and self.field[i - 1][0] == after)
0437                 or (before and self.field[i][0] == before)
0438             ):
0439                 ins_pos = i
0440                 # Do not break, must try all fields for value replacement.
0441
0442         if reorder and ins_pos >= 0 and rpl_pos >= 0 and ins_pos != rpl_pos:
0443             self._field.pop(rpl_pos)
0444             if ins_pos > rpl_pos:
0445                 ins_pos -= 1
0446             rpl_pos = -1
0447
0448         pair = Monpair((name, value))
0449         if rpl_pos >= 0:
0450             self._field[rpl_pos] = pair
0451             pos = rpl_pos
0452         elif ins_pos >= 0:
0453             self._field.insert(ins_pos, pair)
0454             pos = ins_pos
0455         else:
0456             self._field.append(pair)
0457             pos = len(self._field)
0458
0459         return pos
0460
0461
0462     def remove_field (self, name):
0463         """
0464         Remove header fields with the given name, if it exists.
0465
0466         @param name: remove fields with this name
0467         @type name: string
0468
0469         @return: number of removed fields
0470         @rtype: int
0471         """
0472
0473         i = 0
0474         nrem = 0
0475         while i < len(self.field):
0476             if self.field[i][0] == name:
0477                 self.field.pop(i)
0478                 nrem += 1
0479             else:
0480                 i += 1
0481
0482         return nrem
0483
0484
0485 _dt_fmt = "%Y-%m-%d %H:%M:%S%z"
0486 _dt_fmt_nosec = "%Y-%m-%d %H:%M%z"
0487
0488 def format_datetime (dt=None, wsec=False):
0489     """
0490     Format datetime as found in PO header fields.
0491
0492     If a particular datetime object C{dt} is not given,
0493     current datetime is used instead.
0494
0495     If C{wsec} is C{False}, the formatted string will not contain
0496     the seconds component, which is usual for PO header datetimes.
0497     If seconds accuracy is desired, C{wsec} can be set to C{True}.
0498
0499     @param dt: datetime
0500     @type dt: datetime.datetime
0501     @param wsec: whether to add seconds component
0502     @type wsec: bool
0503
0504     @return: formatted datetime
0505     @rtype: string
0506     """
0507
0508     if dt is not None:
0509         if wsec:
0510             dtstr = dt.strftime(_dt_fmt)
0511         else:
0512             dtstr = dt.strftime(_dt_fmt_nosec)
0513         # If timezone is not present, assume UTC.
0514         if dt.tzinfo is None:
0515             dtstr += "+0000"
0516     else:
0517         if wsec:
0518             dtstr = time.strftime(_dt_fmt)
0519         else:
0520             dtstr = time.strftime(_dt_fmt_nosec)
0521
0522     return str(dtstr)
0523
0524
0525 _parse_date_rxs = [re.compile(x) for x in (
0526     r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+):(\d+) *([+-]\d+) *$",
0527     r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+)() *([+-]\d+) *$",
0528     # ...needs empty group to differentiate from the next case.
0529     r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+):(\d+) *$",
0530     r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+) *$",
0531     r"^ *(\d+)-(\d+)-(\d+) *$",
0532     r"^ *(\d+)-(\d+) *$",
0533     r"^ *(\d+) *$",
0534 )]
0535
0536 def parse_datetime (dstr):
0537     """
0538     Parse formatted datetime from a PO header field into a datetime object.
0539
0540     The formatted datetime may also have a seconds component,
0541     which is typically not present in PO headers.
0542     It may also lack a contiguous number of components from the back,
0543     e.g. having no time zone offset, or no time at all.
0544
0545     @param dstr: formatted datetime
0546     @type dstr: string
0547
0548     @return: datetime object
0549     @rtype: datetime.datetime
0550     """
0551
0552     for parse_date_rx in _parse_date_rxs:
0553         m = parse_date_rx.search(dstr)
0554         if m:
0555             break
0556     if not m:
0557         raise PologyError(_("@info",
0558                             "Cannot parse datetime string '%(str)s'.",
0559                             str=dstr))
0560     pgroups = list([int(x or 0) for x in m.groups()])
0561     pgroups.extend([1] * (3 - len(pgroups)))
0562     pgroups.extend([0] * (7 - len(pgroups)))
0563     year, month, day, hour, minute, second, off = pgroups
0564     offhr = off // 100
0565     offmin = off % 100
0566     dt = datetime.datetime(year=year, month=month, day=day,
0567                            hour=hour, minute=minute, second=second,
0568                            tzinfo=TZInfo(hours=offhr, minutes=offmin))
0569     return dt
0570
0571
0572 class TZInfo (datetime.tzinfo):
0573     """
0574     A simple derived time zone info for use in datetime objects.
0575     """
0576
0577     def __init__ (self, hours=None, minutes=None):
0578         """
0579         Create a time zone with given offset in hours and minutes.
0580
0581         The offset given by C{minutes} is added to that given by C{hours},
0582         e.g. C{hours=2} and C{minutes=30} means two and a half hours offset.
0583         If C{minutes} is given but C{hours} is not, C{hours} is considered zero.
0584         If neither C{hours} nor C{minutes} are given,
0585         the offset is read from system time zone.
0586
0587         @param hours: the time zone offset in hours
0588         @type hours: int
0589         @param minutes: additional offset in minutes
0590         @type minutes: int
0591         """
0592
0593         self._isdst = time.localtime()[-1]
0594         if hours is None and minutes is None:
0595             tzoff_sec = -(time.altzone if self._isdst else time.timezone)
0596             tzoff_hr = tzoff_sec // 3600
0597             tzoff_min = (tzoff_sec - tzoff_hr * 3600) // 60
0598         else:
0599             tzoff_hr = hours or 0
0600             tzoff_min = minutes or 0
0601
0602         self._dst = datetime.timedelta(0)
0603         self._utcoffset = datetime.timedelta(hours=tzoff_hr, minutes=tzoff_min)
0604
0605
0606     def utcoffset (self, dt):
0607
0608         return self._utcoffset
0609
0610
0611     def dst (self, dt):
0612
0613         return self._dst
0614
0615
0616     def tzname (self, dt):
0617
0618         return time.tzname[self._isdst]
0619