Warning, /sdk/pology/bin/traplint is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3
0002 # -*- coding: UTF-8 -*-
0003
0004 try:
0005 import fallback_import_paths
0006 except:
0007 pass
0008
0009 import sys
0010 import os
0011 import re
0012 import locale
0013
0014 from pology import PologyError, version, _, n_
0015 from pology.lang.sr.wconv import ctol, hictoall
0016 from pology.lang.sr.trapnakron import rootdir
0017 from pology.lang.sr.trapnakron import trapnakron_ui
0018 from pology.lang.sr.trapnakron import norm_pkey, norm_rtkey
0019 from pology.lang.sr.trapnakron import _disamb_marker
0020 from pology.colors import ColorOptionParser
0021 from pology.fsops import str_to_unicode
0022 from pology.normalize import identify
0023 from pology.report import report, warning, format_item_list
0024 from pology.vcs import VcsSubversion
0025
0026
0027 def validate (tp, onlysrcs=None, onlykeys=None, demoexp=False, expwkeys=False):
0028
0029 needed_pkeys = set()
0030
0031 nom_pkeys = (
0032 ["н"],
0033 ["нм", "нж", "нс", "ну"],
0034 )
0035 needed_pkeys.update(sum(nom_pkeys, []))
0036
0037 gender_pkey = "_род"
0038 needed_pkeys.add(gender_pkey)
0039
0040 known_genders = set(("м", "ж", "с", "у"))
0041 known_genders.update(list(map(ctol, known_genders)))
0042
0043 known_alts = [
0044 ("_s", "сист"),
0045 ("_a", "алт"),
0046 ("_a2", "алт2"),
0047 ("_a3", "алт3"),
0048 ]
0049 base_envs = ["", "л", "иј", "ијл"]
0050 all_envs = set(base_envs)
0051 for aenv in [x[1] for x in known_alts]:
0052 all_envs.update(x + aenv for x in base_envs)
0053
0054 if demoexp:
0055 demoexp_pkeys = ["н", "г", "д", "а", "в", "и",
0056 "нк", "гк", "дк", "ак", "вк",
0057 "нм", "нмп"]
0058 needed_pkeys.update(demoexp_pkeys)
0059
0060 dkeys_by_rtkey = {}
0061
0062 # Sort keys such that derivations are checked by file and position.
0063 dkeys = tp.dkeys(single=onlykeys is None)
0064 def sortkey (x):
0065 path, lno, cno = tp.source_pos(x)
0066 return path.count(os.path.sep), path, lno, cno
0067 dkeys = sorted(dkeys, key=sortkey)
0068
0069 nproblems = 0
0070 unmatched_srcs = set(onlysrcs) if onlysrcs is not None else None
0071 unmatched_keys = set(onlykeys) if onlykeys is not None else None
0072 reported_fmtexps = set()
0073
0074 for dkey in dkeys:
0075 srcname = tp.source_name(dkey)
0076 path, lno, cno = tp.source_pos(dkey)
0077 cnproblems = 0
0078
0079 if ( ( onlysrcs is not None
0080 and not _match_text(srcname, onlysrcs, unmatched_srcs))
0081 or ( onlykeys is not None
0082 and not _match_text(dkey, onlykeys, unmatched_keys))
0083 ):
0084 continue
0085
0086 try:
0087 aprops = []
0088 seenesuffs = set()
0089 cenvs = tp.envs(dkey)
0090 for cenv in cenvs:
0091 if cenv != "":
0092 envmatched = False
0093 for ksuff, esuff in known_alts:
0094 if cenv in all_envs and cenv.endswith(esuff):
0095 envmatched = True
0096 break
0097 else:
0098 envmatched = True
0099 ksuff, esuff = "", ""
0100 if envmatched and esuff not in seenesuffs:
0101 dkeym = dkey + ksuff
0102 props = dict([(x, tp.get2(dkeym, norm_pkey(x)))
0103 for x in needed_pkeys])
0104 aprops.append((esuff, props))
0105 seenesuffs.add(esuff)
0106 elif cenv not in all_envs:
0107 warning(_("@info",
0108 "Derivation at %(file)s:%(line)d:%(col)d "
0109 "defines unknown environment '%(env)s'.",
0110 file=path, line=lno, col=cno, env=cenv))
0111 cnproblems += 1
0112 except Exception as e:
0113 warning(str_to_unicode(str(e)))
0114 cnproblems += 1
0115 continue
0116
0117 for esuff, props in aprops:
0118 # Assure all nominative forms are unique.
0119 for pkeys in nom_pkeys: # select first nominative set by priority
0120 pvals = [props.get(x) for x in pkeys]
0121 noms = [x for x in pvals if x is not None]
0122 if noms:
0123 break
0124 if noms:
0125 rtkeys = list(map(norm_rtkey, noms))
0126 for rtkey in rtkeys:
0127 odkey = dkeys_by_rtkey.get(rtkey)
0128 if odkey is not None and tp.props(dkey) != tp.props(odkey):
0129 opath, olno, ocno = tp.source_pos(odkey)
0130 warning(_("@info",
0131 "Derivation at %(file1)s:%(line1)d:%(col1)d "
0132 "has normalized nominative equal to "
0133 "derivation at %(file2)s:%(line2)d:%(col2)d; "
0134 "consider adding a disambiguation marker "
0135 "(%(dchar)s).",
0136 file1=path, line1=lno, col1=cno,
0137 file2=opath, line2=olno, col2=ocno,
0138 dchar=_disamb_marker))
0139 cnproblems += 1
0140 for rtkey in rtkeys: # must be in new loop
0141 dkeys_by_rtkey[rtkey] = dkey
0142
0143 # Assure presence of gender on noun derivations.
0144 if props.get(nom_pkeys[0][0]) is not None:
0145 gender = props.get(gender_pkey)
0146 if gender is None:
0147 warning(_("@info",
0148 "Derivation at %(file)s:%(line)d:%(col)d "
0149 "does not define gender.",
0150 file=path, line=lno, col=cno))
0151 cnproblems += 1
0152 else:
0153 for gender in hictoall(gender):
0154 if gender not in known_genders:
0155 warning(_("@info",
0156 "Derivation at %(file)s:%(line)d:%(col)d "
0157 "defines unknown gender '%(gen)s'.",
0158 file=path, line=lno, col=cno, gen=gender))
0159 cnproblems += 1
0160
0161 # Show selection of expanded properties if requested.
0162 if demoexp and not cnproblems:
0163 demoprops = [(x, props.get(x)) for x in demoexp_pkeys]
0164 demoprops = [x for x in demoprops if x[1] is not None]
0165 fmtprops = ["%s=%s" % (x[0], _escape_pval(x[1]))
0166 for x in demoprops]
0167 fmtsyns = ["%s" % _escape_syn(x) for x in tp.syns(dkey)]
0168 fmtexp = ", ".join(fmtsyns) + ": " + ", ".join(fmtprops)
0169 if expwkeys:
0170 fmtdkeys = ", ".join(sorted(tp.altdkeys(dkey)))
0171 fmtexp = "# " + fmtdkeys + "\n" + fmtexp
0172 if fmtexp not in reported_fmtexps:
0173 if not esuff:
0174 report(fmtexp)
0175 reported_fmtexps.add(fmtexp)
0176 else:
0177 afmtexp = " @" + esuff + ": " + ", ".join(fmtprops)
0178 report(afmtexp)
0179
0180 nproblems += cnproblems
0181 tp.empty_pcache()
0182
0183 if unmatched_srcs:
0184 fmtsrcs = format_item_list(sorted(getattr(x, "pattern", x)
0185 for x in unmatched_srcs))
0186 warning(_("@info",
0187 "Sources requested by name not found: %(srclist)s.",
0188 srclist=fmtsrcs))
0189 if unmatched_keys:
0190 fmtkeys = format_item_list(sorted(getattr(x, "pattern", x)
0191 for x in unmatched_keys))
0192 warning(_("@info",
0193 "Derivations requested by key not found: %(keylist)s.",
0194 keylist=fmtkeys))
0195
0196 return nproblems
0197
0198
0199 class _Wre (object):
0200
0201 def __init__ (self, pattern):
0202
0203 self.regex = re.compile(pattern, re.U)
0204 self.pattern = pattern
0205
0206
0207 def _match_text (text, tests, unmatched_tests=None):
0208
0209 match = False
0210 for test in tests:
0211 if isinstance(test, str):
0212 if test == text:
0213 match = True
0214 break
0215 elif isinstance(test, _Wre):
0216 if test.regex.search(text):
0217 match = True
0218 break
0219 elif callable(test):
0220 if test(text):
0221 match = True
0222 break
0223 else:
0224 raise PologyError(
0225 _("@info",
0226 "Unknown matcher type '%(type)s'.",
0227 type=type(test)))
0228
0229 if unmatched_tests is not None:
0230 if match and test in unmatched_tests:
0231 unmatched_tests.remove(test)
0232
0233 return match
0234
0235
0236 def _escape_pval (pval):
0237
0238 pval = pval.replace(",", "\,")
0239 return pval
0240
0241
0242 def _escape_syn (pval):
0243
0244 pval = pval.replace(",", "\,")
0245 pval = pval.replace(":", "\:")
0246 return pval
0247
0248
0249 def _collect_mod_dkeys (tp, onlysrcs=None, onlykeys=None):
0250
0251 # Collect the unified diff of trapnakron root.
0252 vcs = VcsSubversion()
0253 udiff = vcs.diff(rootdir())
0254 udiff = _elim_moved_blocks(udiff)
0255
0256 # Collect key syntagmas related to added lines.
0257 asyns = set()
0258 skip_file = True
0259 prev_syns = None
0260 for tag, data in udiff:
0261 if tag == "@":
0262 continue
0263
0264 fpath = data
0265 if tag == ":":
0266 if not fpath.endswith(".sd"):
0267 skip_file = True
0268 else:
0269 srcname = os.path.splitext(os.path.basename(fpath))[0]
0270 if onlysrcs is None:
0271 skip_file = False
0272 else:
0273 skip_file = not _match_text(srcname, onlysrcs)
0274 if skip_file:
0275 continue
0276
0277 line = data.strip()
0278 if line.startswith(("#", ">")) or not line:
0279 continue
0280 if tag == " ":
0281 if not line.startswith("@"):
0282 prev_syns = _parse_syns(line)
0283 elif tag == "+":
0284 if not line.startswith("@"):
0285 syns = _parse_syns(line)
0286 elif prev_syns:
0287 syns = prev_syns
0288 asyns.update(syns)
0289 prev_syns = []
0290
0291 # Collect derivation keys from syntagmas.
0292 onlykeys_mod = set()
0293 dkeys_in_tp = set(tp.dkeys(single=True))
0294 for syn in asyns:
0295 dkey = identify(syn)
0296 if ( dkey and dkey in dkeys_in_tp
0297 and (onlykeys is None or _match_text(dkey, onlykeys))
0298 ):
0299 onlykeys_mod.add(dkey)
0300
0301 return None, onlykeys_mod
0302
0303
0304 # Eliminate difference blocks due to pure moving between and within files.
0305 def _elim_moved_blocks (udiff):
0306
0307 segcnt_ad = {}
0308 segcnt_rm = {}
0309 ctag = ""
0310 cseg = []
0311 for tag, data in udiff + [("@", None)]: # sentry
0312 if tag == "@":
0313 if ctag in ("+", "-"):
0314 cskey = "".join(cseg)
0315 segcnt = segcnt_ad if ctag == "+" else segcnt_rm
0316 if cskey not in segcnt:
0317 segcnt[cskey] = 0
0318 segcnt[cskey] += 1
0319 ctag = ""
0320 cseg = []
0321 elif tag in ("+", "-"):
0322 if ctag and ctag != tag:
0323 ctag = "xxx"
0324 else:
0325 ctag = tag
0326 cseg.append(data)
0327
0328 udiff_mod = []
0329 subdiff = []
0330 ctag = ""
0331 cseg = []
0332 for tag, data in udiff + [("@", None)]:
0333 if tag in (":", "@"):
0334 if subdiff:
0335 cskey = "".join(cseg)
0336 if ( ctag not in ("+", "-")
0337 or segcnt_ad.get(cskey, 0) != 1
0338 or segcnt_rm.get(cskey, 0) != 1
0339 ):
0340 udiff_mod.extend(subdiff)
0341 subdiff = []
0342 cseg = []
0343 ctag = ""
0344 if tag == ":":
0345 udiff_mod.append((tag, data))
0346 else:
0347 subdiff = [(tag, data)]
0348 else:
0349 subdiff.append((tag, data))
0350 if tag in ("+", "-"):
0351 if ctag and ctag != tag:
0352 ctag = "xxx"
0353 else:
0354 ctag = tag
0355 cseg.append(data)
0356
0357 return udiff_mod
0358
0359
0360 def _parse_syns (line):
0361
0362 if line.strip().startswith(("#", ">")):
0363 return []
0364
0365 llen = len(line)
0366 pos = 0
0367 syns = []
0368 csyn = ""
0369 intag = False
0370 while pos < llen:
0371 c = line[pos]
0372 if c == "\\":
0373 pos += 1
0374 if pos < llen:
0375 csyn += line[pos]
0376 elif intag:
0377 if cltag:
0378 if c == cltag:
0379 intag = False
0380 else:
0381 cn = line[pos + 1:pos + 2]
0382 if cn in (",", ":") or cn.isspace():
0383 intag = False
0384 elif c == "~":
0385 intag = True
0386 cltag = "}" if line[pos + 1:pos + 2] == "{" else ""
0387 elif c in (",", ":"):
0388 csyn = csyn.strip()
0389 if csyn.startswith("|"):
0390 csyn = csyn[1:]
0391 syns.append(csyn)
0392 if c == ":":
0393 break
0394 else:
0395 csyn = ""
0396 spos = pos + 1
0397 else:
0398 csyn += line[pos]
0399 pos += 1
0400
0401 return syns
0402
0403
0404 def _statistics (tp, onlysrcs, onlykeys):
0405
0406 dkeys = set()
0407 fpaths = {}
0408 for dkey in tp.dkeys(single=True):
0409 srcname = tp.source_name(dkey)
0410 fpath, lno, cno = tp.source_pos(dkey)
0411
0412 if ( (onlysrcs is not None and not _match_text(srcname, onlysrcs))
0413 or (onlykeys is not None and not _match_text(dkey, onlykeys))
0414 ):
0415 continue
0416
0417 dkeys.add(dkey)
0418 if fpath not in fpaths:
0419 fpaths[fpath] = [srcname, 0]
0420 fpaths[fpath][1] += 1
0421
0422 report("-" * 40)
0423 if onlysrcs is not None or onlykeys is not None:
0424 report(_("@info statistics; side note stating that not all entries "
0425 "have been taken into account, but only some selected",
0426 "(Selection active.)"))
0427 report(_("@info statistics",
0428 "Total derivations: %(num)d",
0429 num=len(dkeys)))
0430 if len(fpaths) > 0:
0431 report(_("@info statistics",
0432 "Total files: %(num)d",
0433 num=len(fpaths)))
0434 report(_("@info statistics",
0435 "Average derivations per file: %(num).1f",
0436 num=(float(len(dkeys)) / len(fpaths))))
0437 bydif = sorted([(v[1], v[0]) for k, v in list(fpaths.items())])
0438 report(_("@info statistics",
0439 "Most derivations in a file: %(num)d (%(file)s)",
0440 num=bydif[-1][0], file=bydif[-1][1]))
0441
0442
0443 def _main ():
0444
0445 locale.setlocale(locale.LC_ALL, "")
0446
0447 usage= _("@info command usage",
0448 "%(cmd)s [OPTIONS] [DKEY|SRCPATH|:SRCNAME]...",
0449 cmd="%prog")
0450 desc = _("@info command description",
0451 "Check validity and expand derivations from internal trapnakron.")
0452 ver = _("@info command version",
0453 "%(cmd)s (Pology) %(version)s\n"
0454 "Copyright © 2009, 2010 "
0455 "Chusslove Illich (Часлав Илић) <%(email)s>",
0456 cmd="%prog", version=version(), email="caslav.ilic@gmx.net")
0457
0458 opars = ColorOptionParser(usage=usage, description=desc, version=ver)
0459 opars.add_option(
0460 "-e", "--expansion-sample",
0461 action="store_true", dest="demoexp", default=False,
0462 help=_("@info command line option description",
0463 "Show a sample of expanded properties for "
0464 "each valid derivation."))
0465 opars.add_option(
0466 "-k", "--show-keys",
0467 action="store_true", dest="expwkeys", default=False,
0468 help=_("@info command line option description",
0469 "When expanding, also show all derivation keys by derivation."))
0470 opars.add_option(
0471 "-m", "--modified",
0472 action="store_true", dest="modified", default=False,
0473 help=_("@info command line option description",
0474 "Validate or expand only modified derivations."))
0475 opars.add_option(
0476 "-r", "--regex",
0477 action="store_true", dest="regex", default=False,
0478 help=_("@info command line option description",
0479 "Source names and derivation keys given in command line "
0480 "are regular expressions."))
0481 opars.add_option(
0482 "-s", "--statistics",
0483 action="store_true", dest="statistics", default=False,
0484 help=_("@info command line option description",
0485 "Show statistics."))
0486
0487 (options, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:]))
0488
0489 try:
0490 import psyco
0491 psyco.full()
0492 except ImportError:
0493 pass
0494
0495 onlysrcs = set()
0496 onlykeys = set()
0497 sksep = ":"
0498 for arg in free_args:
0499 if os.path.isfile(arg):
0500 test = os.path.splitext(arg.split(os.path.sep)[-1])[0]
0501 onlysrcs.add(test)
0502 elif arg.startswith(sksep):
0503 test = arg[len(sksep):]
0504 if options.regex:
0505 test = _Wre(test)
0506 onlysrcs.add(test)
0507 else:
0508 if options.regex:
0509 arg = _Wre(arg)
0510 else:
0511 arg = identify(arg)
0512 onlykeys.add(arg)
0513
0514 onlysrcs = onlysrcs or None
0515 onlykeys = onlykeys or None
0516
0517 # Create and validate the trapnakron.
0518 tp = trapnakron_ui()
0519 if options.modified:
0520 onlysrcs, onlykeys = _collect_mod_dkeys(tp, onlysrcs, onlykeys)
0521 validate(tp, onlysrcs, onlykeys, options.demoexp, options.expwkeys)
0522
0523 if options.statistics:
0524 _statistics(tp, onlysrcs, onlykeys)
0525
0526
0527 if __name__ == '__main__':
0528 _main()
0529