File indexing completed on 2024-11-03 08:24:25
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Header entry in PO catalogs. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 from pology import PologyError 0011 from pology.wrap import wrap_field 0012 from pology.monitored import Monitored, Monlist, Monpair 0013 from .message import Message 0014 0015 import datetime 0016 import time 0017 import re 0018 0019 _Header_spec = { 0020 "title" : {"type" : Monlist, 0021 "spec" : {"*" : {"type" : str}}}, 0022 "copyright" : {"type" : (str, type(None))}, 0023 "license" : {"type" : (str, type(None))}, 0024 "author" : {"type" : Monlist, 0025 "spec" : {"*" : {"type" : str}}}, 0026 "comment" : {"type" : Monlist, 0027 "spec" : {"*" : {"type" : str}}}, 0028 "field" : {"type" : Monlist, 0029 "spec" : {"*" : {"type" : Monpair, 0030 "spec" : {"first" : {"type" : str}, 0031 "second" : {"type" : str}}}}}, 0032 "initialized" : {"type" : bool, "derived" : True}, 0033 # Dummies for summary iteration in catalog: 0034 "obsolete" : {"type" : bool, "derived" : True}, 0035 "key" : {"type" : bool, "derived" : True}, 0036 } 0037 0038 class Header (Monitored): 0039 """ 0040 Header entry in PO catalogs. 0041 0042 The PO header is syntactically just another entry in the catalog, 0043 but with different semantics. Therefore, instead operating on it using 0044 L{Message}, this class provides a different set of interface attributes 0045 and methods. 0046 0047 Like L{Message}, this class implements monitoring; the starred-types 0048 (e.g. C{list*}) are according to the same convention as for messages, 0049 and also the strings are assumed unicode unless otherwise noted. 0050 0051 There is no lightweight alternative to the monitored header, like that of 0052 L{MessageUnsafe} for messages, because no performance demand is expected 0053 for the headers only. 0054 0055 @ivar title: comment lines giving the title 0056 @type title: list* of strings 0057 0058 @ivar copyright: comment line with the copyright statement 0059 @type copyright: string 0060 0061 @ivar license: comment line with the license statement 0062 @type license: string 0063 0064 @ivar author: comment lines stating translators who worked on this catalog 0065 @type author: list* of strings 0066 0067 @ivar comment: the free comment lines, being none of the specific ones 0068 @type comment: list* of strings 0069 0070 @ivar field: parsed header fields as key-value string pairs 0071 @type field: list* of pairs* 0072 0073 @ivar initialized: (read-only) whether the header is fully initialized 0074 @type initialized: bool 0075 0076 @see: L{Message} 0077 """ 0078 0079 def __init__ (self, init=None): 0080 """ 0081 Initializes the header by the given message or header. 0082 0083 @param init: the PO entry containing the header, or another header 0084 @type init: subclass of L{Message_base}, or L{Header} 0085 """ 0086 0087 if isinstance(init, Header): # copy header fields 0088 hdr = init 0089 self._title = Monlist(hdr._title) 0090 self._copyright = hdr._copyright 0091 self._license = hdr._license 0092 self._author = Monlist(hdr._author) 0093 self._comment = Monlist(hdr._comment) 0094 self._field = Monlist(list(map(Monpair, hdr._field))) 0095 0096 # Create the message. 0097 self._message = hdr.to_msg() 0098 0099 elif init: # parse header message 0100 msg = init 0101 # Comments. 0102 self._title = Monlist() 0103 self._copyright = "" 0104 self._license = "" 0105 self._author = Monlist() 0106 self._comment = Monlist() 0107 intitle = True 0108 for c in msg.manual_comment: 0109 if 0: pass 0110 elif ( not self._copyright 0111 and re.search(r"copyright|\(C\)|©", c, re.I|re.U) 0112 ): 0113 self._copyright = c 0114 intitle = False 0115 elif ( not self._license 0116 and ( re.search("license", c, re.I) 0117 and not re.search("^translation *of.* to", c, re.I)) 0118 ): 0119 self._license = c 0120 intitle = False 0121 elif re.search("<.*@.*>", c): 0122 self._author.append(c) 0123 intitle = False 0124 elif intitle: 0125 self._title.append(c) 0126 else: 0127 self._comment.append(c) 0128 0129 # Header fields. 0130 self._field = Monlist() 0131 for field in msg.msgstr[0].split("\n"): 0132 m = re.match(r"(.*?): ?(.*)", field) 0133 if m: self._field.append(Monpair(m.groups())) 0134 0135 # Copy the message. 0136 self._message = Message(msg) 0137 0138 else: # create default fields 0139 self._title = Monlist(["SOME DESCRIPTIVE TITLE."]); 0140 self._copyright = "Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER" 0141 self._license = "This file is distributed under the same license as the PACKAGE package." 0142 self._author = Monlist(["FIRST AUTHOR <EMAIL@ADDRESS>, YEAR."]) 0143 self._comment = Monlist([""]) 0144 0145 self._field = Monlist([ 0146 Monpair(("Project-Id-Version", "PACKAGE VERSION")), 0147 Monpair(("Report-Msgid-Bugs-To", "")), 0148 Monpair(("POT-Creation-Date", format_datetime())), 0149 Monpair(("PO-Revision-Date", "YEAR-MO-DA HO:MI+ZONE")), 0150 Monpair(("Last-Translator", "FULL NAME <EMAIL@ADDRESS>")), 0151 Monpair(("Language-Team", "LANGUAGE <LL@li.org>")), 0152 Monpair(("Language", "")), 0153 Monpair(("MIME-Version", "1.0")), 0154 Monpair(("Content-Type", "text/plain; charset=CHARSET")), 0155 Monpair(("Content-Transfer-Encoding", "8bit")), 0156 Monpair(("Plural-Forms", "nplurals=INTEGER; plural=EXPRESSION;")), 0157 ]) 0158 0159 # Create the message. 0160 self._message = Message() 0161 self._remake_msg(force=True) 0162 0163 self.assert_spec_init(_Header_spec) 0164 0165 # Unmodify all monitored members. 0166 self.modcount = 0 0167 0168 0169 def __getattr__ (self, att): 0170 """ 0171 Attribute getter. 0172 0173 Processes read-only attributes, and sends others to the base class. 0174 0175 @param att: name of the attribute to get 0176 @returns: attribute value 0177 """ 0178 0179 if att == "obsolete": 0180 return False 0181 elif att == "key": 0182 return Message().key # key of an empty-msgid message 0183 elif att == "initialized": 0184 # Check if all necessary fields have been initialized. 0185 gfv = self.get_field_value 0186 return not (False 0187 or "PACKAGE VERSION" in gfv("Project-Id-Version", "") 0188 or "YEAR-MO-DA" in gfv("PO-Revision-Date", "") 0189 or "FULL NAME" in gfv("Last-Translator", "") 0190 or "LANGUAGE" in gfv("Language-Team", "") 0191 or "CHARSET" in gfv("Content-Type", "") 0192 or "ENCODING" in gfv("Content-Transfer-Encoding", "") 0193 or "INTEGER" in gfv("Plural-Forms", "") 0194 or "EXPRESSION" in gfv("Plural-Forms", "") 0195 ) 0196 else: 0197 return Monitored.__getattr__(self, att) 0198 0199 0200 def get (self, att, default=None): 0201 """ 0202 Get attribute value. 0203 0204 Allows accessing the header like a dictionary. 0205 0206 @param att: name of the attribute to get 0207 @type att: string 0208 @param default: value to return if the attribute does not exist 0209 0210 @returns: value of the attribute or the default value 0211 """ 0212 0213 if hasattr(self, att): 0214 return getattr(self, att) 0215 else: 0216 return default 0217 0218 0219 def _remake_msg (self, force=False): 0220 0221 m = self._message 0222 0223 if (force 0224 or self.title_modcount or self.title.modcount 0225 or self.copyright_modcount 0226 or self.license_modcount 0227 or self.author_modcount or self.author.modcount 0228 or self.comment_modcount or self.comment.modcount 0229 ): 0230 m.manual_comment = Monlist() 0231 for t in self.title: 0232 m.manual_comment.append(t) 0233 if self.copyright: 0234 m.manual_comment.append(self.copyright) 0235 if self.license: 0236 m.manual_comment.append(self.license) 0237 for a in self.author: 0238 m.manual_comment.append(a) 0239 for c in self.comment: 0240 m.manual_comment.append(c) 0241 0242 if force or self.field_modcount or self.field.modcount: 0243 m.msgstr = Monlist([""]) 0244 for field in self.field: 0245 m.msgstr[0] += "%s: %s\n" % tuple(field) 0246 0247 if force or self.modcount: 0248 m.fuzzy = not self.initialized 0249 0250 0251 def __eq__ (self, ohdr): 0252 """ 0253 Reports wheter headers are equal in all apparent parts. 0254 0255 "Apparent" parts include all those which are visible in the PO file. 0256 I.e. the check will ignore internal states, like line caches, etc. 0257 0258 @returns: C{True} if headers are equal in apparent parts 0259 @rtype: bool 0260 """ 0261 0262 return self.to_msg() == ohdr.to_msg() 0263 0264 0265 def __ne__ (self, ohdr): 0266 """ 0267 Reports wheter headers are not equal in some apparent parts. 0268 0269 Equivalent to C{not (self == ohdr)}. 0270 0271 @returns: C{False} if headers are equal in all apparent parts 0272 @rtype: bool 0273 """ 0274 0275 return not self.__eq__(ohdr) 0276 0277 0278 def to_msg (self, force=False): 0279 """ 0280 Convert the header into ordinary message object. 0281 0282 The message object returned may be the modification of the one 0283 passed to the constructor. In that case, and if the message object 0284 has monitoring features, the force parameter will tell whether to 0285 modify all message elements, or to try to keep the changes minimal. 0286 0287 @param force: whether to recreate all message elements 0288 @type force: bool 0289 0290 @returns: header as message 0291 @rtype: the type that initialized the object 0292 """ 0293 0294 self._remake_msg(force) 0295 return self._message 0296 0297 0298 def to_lines (self, wrapf=wrap_field, force=False, colorize=0): 0299 """ 0300 The line-representation of the header. 0301 0302 Equivalent to the same-named method of message classes. 0303 0304 @see: L{Message_base} 0305 """ 0306 0307 return self.to_msg(force).to_lines(wrapf, force, colorize) 0308 0309 0310 def to_string (self, wrapf=wrap_field, force=False, colorize=0): 0311 """ 0312 The string-representation of the header. 0313 0314 Equivalent to the same-named method of message classes. 0315 0316 @see: L{Message_base} 0317 """ 0318 0319 return self.to_msg(force).to_string(wrapf, force, colorize) 0320 0321 0322 def select_fields (self, name): 0323 """ 0324 Find header fields with the given name. 0325 0326 Header fields need not be unique. 0327 0328 @param name: look for the fields with this name 0329 @type name: string 0330 0331 @returns: references to name-value pairs matching the field name 0332 @rtype: list of pairs* 0333 """ 0334 0335 fields = [] 0336 for pair in self.field: 0337 if pair.first == name: 0338 fields.append(pair) 0339 return fields 0340 0341 0342 def get_field_value (self, name, default=None): 0343 """ 0344 Get the value of the given header field. 0345 0346 If there are several fields with the same name, it is undefined which 0347 of them will supply the value; this method should be used only 0348 for fields which are expected to be unique. 0349 If there are no fields named as requested, C{default} is returned. 0350 0351 @param name: field name 0352 @type name: string 0353 @param default: value returned if there is no such field 0354 @type default: as given 0355 0356 @returns: field value 0357 @rtype: string or C{default} 0358 """ 0359 0360 for pair in self.field: 0361 if pair.first == name: 0362 return pair.second 0363 return default 0364 0365 0366 def replace_field_value (self, name, new_value, nth=0): 0367 """ 0368 Replace the value of the n-th occurence of the named header field. 0369 0370 Header fields need not be unique, hence the n-th qualification. 0371 0372 @param name: name of the header field 0373 @type name: string 0374 0375 @param new_value: new value for the field 0376 @type new_value: string 0377 0378 @param nth: replace the value of this field among same-named fields 0379 @type nth: int 0380 0381 @returns: True if the requested field was found, False otherwise 0382 @rtype: bool 0383 """ 0384 0385 nfound = 0 0386 for i in range(len(self._field)): 0387 if self.field[i][0] == name: 0388 nfound += 1 0389 if nfound - 1 == nth: 0390 self.field[i] = Monpair((str(name), new_value)) 0391 break 0392 0393 return nfound - 1 == nth 0394 0395 0396 def set_field (self, name, value, after=None, before=None, reorder=False): 0397 """ 0398 Set a header field to a value. 0399 0400 If the field already exists, its value is replaced with the given one. 0401 If there are several same-named fields, it is undefined which one 0402 and how many of them are going to have their values replaced; 0403 this method should be used only for fields expected to be unique. 0404 If there is no such field yet, it is inserted into the header; 0405 after the field C{after} or before the field C{before} if given 0406 and existing, or appended to the end otherwise. 0407 If the field already exists, but not in the position according to 0408 C{after} or C{before}, reordering can be requested too. 0409 0410 @param name: name of the header field 0411 @type name: unicode 0412 0413 @param value: new value for the field 0414 @type value: unicode 0415 0416 @param after: the field to insert after 0417 @type after: string 0418 0419 @param before: the field to insert before 0420 @type before: string 0421 0422 @param reorder: whether to move an existing field into better position 0423 @type reorder: bool 0424 0425 @returns: position where the field was modified or inserted 0426 @rtype: int 0427 """ 0428 0429 ins_pos = -1 0430 rpl_pos = -1 0431 for i in range(len(self._field)): 0432 if self.field[i][0] == name: 0433 rpl_pos = i 0434 if not reorder: 0435 break 0436 if ( (after and i > 0 and self.field[i - 1][0] == after) 0437 or (before and self.field[i][0] == before) 0438 ): 0439 ins_pos = i 0440 # Do not break, must try all fields for value replacement. 0441 0442 if reorder and ins_pos >= 0 and rpl_pos >= 0 and ins_pos != rpl_pos: 0443 self._field.pop(rpl_pos) 0444 if ins_pos > rpl_pos: 0445 ins_pos -= 1 0446 rpl_pos = -1 0447 0448 pair = Monpair((name, value)) 0449 if rpl_pos >= 0: 0450 self._field[rpl_pos] = pair 0451 pos = rpl_pos 0452 elif ins_pos >= 0: 0453 self._field.insert(ins_pos, pair) 0454 pos = ins_pos 0455 else: 0456 self._field.append(pair) 0457 pos = len(self._field) 0458 0459 return pos 0460 0461 0462 def remove_field (self, name): 0463 """ 0464 Remove header fields with the given name, if it exists. 0465 0466 @param name: remove fields with this name 0467 @type name: string 0468 0469 @return: number of removed fields 0470 @rtype: int 0471 """ 0472 0473 i = 0 0474 nrem = 0 0475 while i < len(self.field): 0476 if self.field[i][0] == name: 0477 self.field.pop(i) 0478 nrem += 1 0479 else: 0480 i += 1 0481 0482 return nrem 0483 0484 0485 _dt_fmt = "%Y-%m-%d %H:%M:%S%z" 0486 _dt_fmt_nosec = "%Y-%m-%d %H:%M%z" 0487 0488 def format_datetime (dt=None, wsec=False): 0489 """ 0490 Format datetime as found in PO header fields. 0491 0492 If a particular datetime object C{dt} is not given, 0493 current datetime is used instead. 0494 0495 If C{wsec} is C{False}, the formatted string will not contain 0496 the seconds component, which is usual for PO header datetimes. 0497 If seconds accuracy is desired, C{wsec} can be set to C{True}. 0498 0499 @param dt: datetime 0500 @type dt: datetime.datetime 0501 @param wsec: whether to add seconds component 0502 @type wsec: bool 0503 0504 @return: formatted datetime 0505 @rtype: string 0506 """ 0507 0508 if dt is not None: 0509 if wsec: 0510 dtstr = dt.strftime(_dt_fmt) 0511 else: 0512 dtstr = dt.strftime(_dt_fmt_nosec) 0513 # If timezone is not present, assume UTC. 0514 if dt.tzinfo is None: 0515 dtstr += "+0000" 0516 else: 0517 if wsec: 0518 dtstr = time.strftime(_dt_fmt) 0519 else: 0520 dtstr = time.strftime(_dt_fmt_nosec) 0521 0522 return str(dtstr) 0523 0524 0525 _parse_date_rxs = [re.compile(x) for x in ( 0526 r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+):(\d+) *([+-]\d+) *$", 0527 r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+)() *([+-]\d+) *$", 0528 # ...needs empty group to differentiate from the next case. 0529 r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+):(\d+) *$", 0530 r"^ *(\d+)-(\d+)-(\d+) *(\d+):(\d+) *$", 0531 r"^ *(\d+)-(\d+)-(\d+) *$", 0532 r"^ *(\d+)-(\d+) *$", 0533 r"^ *(\d+) *$", 0534 )] 0535 0536 def parse_datetime (dstr): 0537 """ 0538 Parse formatted datetime from a PO header field into a datetime object. 0539 0540 The formatted datetime may also have a seconds component, 0541 which is typically not present in PO headers. 0542 It may also lack a contiguous number of components from the back, 0543 e.g. having no time zone offset, or no time at all. 0544 0545 @param dstr: formatted datetime 0546 @type dstr: string 0547 0548 @return: datetime object 0549 @rtype: datetime.datetime 0550 """ 0551 0552 for parse_date_rx in _parse_date_rxs: 0553 m = parse_date_rx.search(dstr) 0554 if m: 0555 break 0556 if not m: 0557 raise PologyError(_("@info", 0558 "Cannot parse datetime string '%(str)s'.", 0559 str=dstr)) 0560 pgroups = list([int(x or 0) for x in m.groups()]) 0561 pgroups.extend([1] * (3 - len(pgroups))) 0562 pgroups.extend([0] * (7 - len(pgroups))) 0563 year, month, day, hour, minute, second, off = pgroups 0564 offhr = off // 100 0565 offmin = off % 100 0566 dt = datetime.datetime(year=year, month=month, day=day, 0567 hour=hour, minute=minute, second=second, 0568 tzinfo=TZInfo(hours=offhr, minutes=offmin)) 0569 return dt 0570 0571 0572 class TZInfo (datetime.tzinfo): 0573 """ 0574 A simple derived time zone info for use in datetime objects. 0575 """ 0576 0577 def __init__ (self, hours=None, minutes=None): 0578 """ 0579 Create a time zone with given offset in hours and minutes. 0580 0581 The offset given by C{minutes} is added to that given by C{hours}, 0582 e.g. C{hours=2} and C{minutes=30} means two and a half hours offset. 0583 If C{minutes} is given but C{hours} is not, C{hours} is considered zero. 0584 If neither C{hours} nor C{minutes} are given, 0585 the offset is read from system time zone. 0586 0587 @param hours: the time zone offset in hours 0588 @type hours: int 0589 @param minutes: additional offset in minutes 0590 @type minutes: int 0591 """ 0592 0593 self._isdst = time.localtime()[-1] 0594 if hours is None and minutes is None: 0595 tzoff_sec = -(time.altzone if self._isdst else time.timezone) 0596 tzoff_hr = tzoff_sec // 3600 0597 tzoff_min = (tzoff_sec - tzoff_hr * 3600) // 60 0598 else: 0599 tzoff_hr = hours or 0 0600 tzoff_min = minutes or 0 0601 0602 self._dst = datetime.timedelta(0) 0603 self._utcoffset = datetime.timedelta(hours=tzoff_hr, minutes=tzoff_min) 0604 0605 0606 def utcoffset (self, dt): 0607 0608 return self._utcoffset 0609 0610 0611 def dst (self, dt): 0612 0613 return self._dst 0614 0615 0616 def tzname (self, dt): 0617 0618 return time.tzname[self._isdst] 0619