File indexing completed on 2024-03-24 17:21:40

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Operations with environment, file system and external commands.
0005 
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009 
0010 import codecs
0011 import locale
0012 import os
0013 import re
0014 import subprocess
0015 import sys
0016 
0017 from pology import PologyError, _, n_
0018 import pology.config
0019 from pology.escape import escape_sh
0020 from pology.report import report, error, warning
0021 
0022 
0023 def collect_files (paths,
0024                    recurse=True, sort=True, unique=True, relcwd=True,
0025                    selectf=None):
0026     """
0027     Collect list of files from given directory and file paths.
0028 
0029     C{paths} can be any sequence of strings, or a single string.
0030     Directories can be searched for files recursively or non-resursively,
0031     as requested by the C{recurse} parameter.
0032     Parameters C{sort} and C{unique} determine if the resulting paths
0033     are sorted alphabetically increasing and if duplicate paths are removed.
0034     If C{relcwd} is set to C{True}, absolute file paths which point to files
0035     within the current working directory are made relative to it.
0036 
0037     Only selected files may be collected by supplying
0038     a selection function through C{selectf} parameter.
0039     It takes a file path as argument and returns a boolean,
0040     C{True} to select the file or C{False} to discard it.
0041 
0042     @param paths: paths to search for files
0043     @type paths: string or iter(string*)
0044     @param recurse: whether to search for files recursively
0045     @type recurse: bool
0046     @param sort: whether to sort collected paths
0047     @type sort: bool
0048     @param unique: whether to eliminate duplicate collected paths
0049     @type unique: bool
0050     @param relcwd: whether to make collected absolute paths within
0051         current working directory relative to it
0052     @param relcwd: bool
0053     @param selectf: test to select or discard a file path
0054     @type selectf: (string)->bool
0055 
0056     @returns: collected file paths
0057     @rtype: [string...]
0058     """
0059 
0060     if isinstance(paths, str):
0061         paths = [paths]
0062 
0063     filepaths = []
0064     for path in paths:
0065         if os.path.isdir(path):
0066             for root, dirs, files in os.walk(path):
0067                 for file in files:
0068                     filepath = os.path.normpath(os.path.join(root, file))
0069                     if not selectf or selectf(filepath):
0070                         filepaths.append(filepath)
0071                 if not recurse:
0072                     dirs[:] = []
0073         elif os.path.isfile(path):
0074             if not selectf or selectf(path):
0075                 filepaths.append(path)
0076         elif not os.path.exists(path):
0077             raise PologyError(
0078                 _("@info",
0079                   "Path '%(path)s' does not exist.",
0080                   path=path))
0081         else:
0082             raise PologyError(
0083                 _("@info",
0084                   "Path '%(path)s' is neither a file nor a directory.",
0085                   path=path))
0086 
0087     if sort:
0088         if unique:
0089             filepaths = list(set(filepaths))
0090         filepaths.sort()
0091     elif unique:
0092         # To preserve the order, reinsert paths avoiding duplicates.
0093         seen = {}
0094         ufilepaths = []
0095         for filepath in filepaths:
0096             if filepath not in seen:
0097                 seen[filepath] = True
0098                 ufilepaths.append(filepath)
0099         filepaths = ufilepaths
0100 
0101     if relcwd:
0102         filepaths = list(map(join_ncwd, filepaths))
0103 
0104     return filepaths
0105 
0106 
0107 def collect_files_by_ext (paths, extension,
0108                           recurse=True, sort=True, unique=True, relcwd=True,
0109                           selectf=None):
0110     """
0111     Collect list of files having given extension from given paths.
0112 
0113     The C{extension} parameter can be a single extension or
0114     a sequence of extensions, without the leading dot.
0115     Files with empty extension (i.e. dot at the end of path)
0116     are collected by supplying empty string for C{extension},
0117     and files with no extension by supplying another empty sequence.
0118 
0119     Other parameters behave in the same way as in L{collect_files}.
0120 
0121     @param extension: extension of files to collect
0122     @type extension: string or sequence of strings
0123 
0124     @see: L{collect_files}
0125     """
0126 
0127     if isinstance(extension, str):
0128         extensions = [extension]
0129     else:
0130         extensions = extension
0131 
0132     def selectf_mod (fpath):
0133 
0134         ext = os.path.splitext(fpath)[1]
0135         if ext not in ("", "."):
0136             hasext = ext[1:] in extensions
0137         elif ext == ".":
0138             hasext = extensions == ""
0139         else: # ext == ""
0140             hasext = not extensions
0141         if selectf and hasext:
0142             return selectf(fpath)
0143         else:
0144             return hasext
0145 
0146     return collect_files(paths, recurse, sort, unique, relcwd, selectf_mod)
0147 
0148 
0149 def collect_catalogs (paths,
0150                       recurse=True, sort=True, unique=True, relcwd=True,
0151                       selectf=None):
0152     """
0153     Collect list of catalog file paths from given paths.
0154 
0155     Applies C{collect_files_by_ext} with extensions set to C{("po", "pot")}.
0156     """
0157 
0158     catexts = ("po", "pot")
0159 
0160     return collect_files_by_ext(paths, catexts,
0161                                 recurse, sort, unique, relcwd, selectf)
0162 
0163 
0164 def collect_catalogs_by_env (catpathenv,
0165                              recurse=True, sort=True, unique=True, relcwd=True,
0166                              selectf=None):
0167     """
0168     Collect list of catalog file paths from directories given
0169     by an environment variable.
0170 
0171     Other parameters behave in the same way as in L{collect_catalogs}.
0172 
0173     @param catpathenv: environment variable name
0174     @type catpathenv: string
0175     """
0176 
0177     catpath = os.getenv(catpathenv)
0178     if catpath is None:
0179         return []
0180 
0181     catdirs = catpath.split(":")
0182 
0183     return collect_catalogs(catdirs,
0184                             recurse, sort, unique, relcwd, selectf)
0185 
0186 
0187 def mkdirpath (dirpath):
0188     """
0189     Make all the directories in the path which do not exist yet.
0190 
0191     Like shell's C{mkdir -p}.
0192 
0193     @param dirpath: the directory path to create
0194     @type dirpath: string
0195 
0196     @returns: the path of topmost created directory, if any
0197     @rtype: string or C{None}
0198     """
0199 
0200     toppath = None
0201     incpath = ""
0202     for subdir in os.path.normpath(dirpath).split(os.path.sep):
0203         if not subdir:
0204             subdir = os.path.sep
0205         incpath = os.path.join(incpath, subdir)
0206         if not os.path.isdir(incpath):
0207             os.mkdir(incpath)
0208             if toppath is None:
0209                 toppath = incpath
0210     return toppath
0211 
0212 
0213 def system_wd (cmdline, wdir):
0214     """
0215     Execute command line in a specific working directory.
0216 
0217     Like C{os.system}, only switching CWD during execution.
0218 
0219     @param cmdline: command line to execute
0220     @type cmdline: string
0221     @param wdir: working directory for the command (CWD if none given)
0222     @type wdir: path
0223 
0224     @returns: exit code from the command
0225     @rtype: int
0226     """
0227 
0228     cwd = getucwd()
0229     try:
0230         os.chdir(wdir)
0231         ret = os.system(cmdline)
0232     except:
0233         os.chdir(cwd)
0234         raise
0235 
0236     return ret
0237 
0238 
0239 def assert_system (cmdline, echo=False, wdir=None):
0240     """
0241     Execute command line and assert success.
0242 
0243     If the command exits with non-zero zero state, the program aborts.
0244 
0245     C{cmdline} can be either a monolithic string, in which case it is
0246     executed through a shell, or a list of argument strings,
0247     when the process is started directly with these arguments.
0248 
0249     C{cmdline} is processed with L{unicode_to_str} to convert any
0250     unicode strings to raw byte strings in expected system encoding.
0251 
0252     @param cmdline: command line to execute
0253     @type cmdline: string
0254     @param echo: whether to echo the supplied command line
0255     @type echo: bool
0256     @param wdir: working directory for the command (CWD if none given)
0257     @type wdir: path
0258     """
0259 
0260     if echo:
0261         if isinstance(cmdline, str):
0262             cmdstr = cmdline
0263         else:
0264             cmdstr = " ".join(map(escape_sh, cmdline))
0265         report(cmdstr)
0266     if wdir is not None:
0267         cwd = getucwd()
0268         os.chdir(wdir)
0269     if isinstance(cmdline, str):
0270         cmdline = unicode_to_str(cmdline)
0271         shell = True
0272     else:
0273         cmdline = list(map(unicode_to_str, cmdline))
0274         shell = False
0275     ret = subprocess.call(cmdline, shell=shell)
0276     if wdir is not None:
0277         os.chdir(cwd)
0278     if ret:
0279         if echo:
0280             error(_("@info",
0281                     "Non-zero exit from the previous command."))
0282         else:
0283             error(_("@info",
0284                     "Non-zero exit from the command:\n%(cmdline)s",
0285                     cmdline=cmdline))
0286 
0287 
0288 def collect_system (cmdline, echo=False, wdir=None, env=None, instr=None):
0289     """
0290     Execute command line and collect stdout, stderr, and exit code.
0291 
0292     C{cmdline} can be either a monolithic string, in which case it is
0293     executed through a shell, or a list of argument strings,
0294     when the process is started directly with these arguments.
0295 
0296     C{cmdline} is processed with L{unicode_to_str} to convert any
0297     unicode strings to raw byte strings in expected system encoding.
0298 
0299     @param cmdline: command line to execute
0300     @type cmdline: string or [string*]
0301     @param echo: whether to echo the command line, as well as stdout/stderr
0302     @type echo: bool
0303     @param wdir: working directory for the command (CWD if none given)
0304     @type wdir: path
0305     @param env: environment for the execution (variable name-value pairs)
0306     @type env: {string: string}
0307     @param instr: string to pass to the command stdin
0308     @type instr: string
0309 
0310     @returns: stdout, stderr, and exit code
0311     @rtype: (string, string, int)
0312     """
0313 
0314     if echo:
0315         if isinstance(cmdline, str):
0316             cmdstr = cmdline
0317         else:
0318             cmdstr = " ".join(map(escape_sh, cmdline))
0319         report(cmdstr)
0320     if wdir is not None:
0321         cwd = getucwd()
0322         os.chdir(wdir)
0323     stdin = instr is not None and subprocess.PIPE or None
0324     if isinstance(cmdline, str):
0325         cmdline = unicode_to_str(cmdline)
0326         shell = True
0327     else:
0328         cmdline = list(map(unicode_to_str, cmdline))
0329         shell = False
0330     p = subprocess.Popen(cmdline, shell=shell, env=env,
0331                          stdout=subprocess.PIPE, stderr=subprocess.PIPE,
0332                          stdin=stdin)
0333     if instr is not None:
0334         p.stdin.write(instr.encode(locale.getpreferredencoding()))
0335     strout, strerr = list(map(str_to_unicode, p.communicate()))
0336     ret = p.returncode
0337     if wdir is not None:
0338         os.chdir(cwd)
0339 
0340     if echo:
0341         if strout:
0342             sys.stdout.write(
0343                 _("@info ^^^ points to the earlier output in the terminal",
0344                   "===== stdout from the command above =====") + "\n")
0345             sys.stdout.write(strout)
0346         if strerr:
0347             sys.stderr.write(
0348                 _("@info ^^^ points to the earlier output in the terminal",
0349                   "***** stderr from the command ^^^ *****") + "\n")
0350             sys.stderr.write(strerr)
0351 
0352     return (strout, strerr, ret)
0353 
0354 
0355 def lines_from_file (filepath, encoding=None):
0356     """
0357     Read content of a text file into list of lines.
0358 
0359     Only CR, LF, and CR+LF are treated as line breaks.
0360 
0361     If the given file path is not readable, or text cannot be decoded using
0362     given encoding, exceptions are raised. If encoding is not given,
0363     the encoding specified by the environment is used.
0364 
0365     @param filepath: path of the file to read
0366     @type filepath: string
0367     @param encoding: text encoding for the file
0368     @param encoding: string
0369 
0370     @returns: lines
0371     @rtype: [string...]
0372     """
0373 
0374     if encoding is None:
0375         encoding = locale.getpreferredencoding()
0376 
0377     try:
0378         ifl = codecs.open(filepath, "r", encoding)
0379     except:
0380         warning(_("@info",
0381                   "Cannot open '%(file)s' for reading.",
0382                   file=filepath))
0383         raise
0384     try:
0385         content = ifl.read()
0386     except:
0387         warning(_("@info",
0388                   "Cannot read content of '%(file)s' using %(enc)s encoding.",
0389                   file=filepath, enc=encoding))
0390         raise
0391     ifl.close()
0392 
0393     lines = [x + "\n" for x in re.split(r"\r\n|\r|\n", content)]
0394     # ...no file.readlines(), it treats some other characters as line breaks.
0395     if lines[-1] == "\n":
0396         # If the file ended properly in a line break, the last line will be
0397         # phony, from the empty element splitted out by the last line break.
0398         lines.pop()
0399 
0400     return lines
0401 
0402 
0403 def join_ncwd (*elements):
0404     """
0405     Join path and normalize it with respect to current working directory.
0406 
0407     Path elements are joined with C{os.path.join} and the joined path
0408     normalized by C{os.path.normpath}.
0409     The normalized path is then made relative to current working directory
0410     if it points to a location within current working directory.
0411 
0412     @param elements: path elements
0413     @type elements: varlist
0414 
0415     @returns: normalized joined path
0416     @rtype: string
0417     """
0418 
0419     path = os.path.join(*elements)
0420     cwd = getucwd() + os.path.sep
0421     apath = os.path.abspath(path)
0422     if apath.startswith(cwd):
0423         path = apath[len(cwd):]
0424     else:
0425         path = os.path.normpath(path)
0426 
0427     return path
0428 
0429 
0430 def str_to_unicode (strarg):
0431     """
0432     Convert a raw string value or sequence of values into Unicode.
0433 
0434     Strings comming in from the environment are frequently raw byte sequences,
0435     and need to be converted into Unicode strings according to system locale
0436     (e.g. command-line arguments).
0437     This function will take either a single raw string or any sequence
0438     of raw strings and convert it into a Unicode string or list thereof.
0439 
0440     If the input value is not a single raw or unicode string,
0441     it is assumed to be a sequence of values.
0442     In case there are values in the input which are not raw strings,
0443     they will be carried over into the result as-is.
0444 
0445     @param strarg: input string or sequence
0446     @type strarg: string, unicode, or sequence of objects
0447 
0448     @returns: unicode string or sequence of objects
0449     @rtype: unicode string or list of objects
0450     """
0451 
0452     if isinstance(strarg, str):
0453         return strarg
0454 
0455     lenc = locale.getpreferredencoding()
0456 
0457     if isinstance(strarg, bytes):
0458         return strarg.decode(lenc, "replace")
0459     else:
0460         uargs = []
0461         for val in strarg:
0462             if isinstance(val, bytes):
0463                 val = val.decode(lenc, "replace")
0464             uargs.append(val)
0465         return uargs
0466 
0467 
0468 def unicode_to_str (strarg):
0469     """
0470     Convert a unicode string into raw byte sequence.
0471 
0472     Strings goint to the environment should frequently be raw byte sequences,
0473     and need to be converted from Unicode strings according to system locale
0474     (e.g. command-line arguments).
0475     This function will take either a single Unicode string or any sequence
0476     of Unicode strings and convert it into a raw string or list thereof.
0477 
0478     If the input value is not a single raw or unicode string,
0479     it is assumed to be a sequence of values.
0480     In case there are values in the input which are not Unicode strings,
0481     they will be carried over into the result as-is.
0482 
0483     @param strarg: input string or sequence
0484     @type strarg: string, unicode, or sequence of objects
0485 
0486     @returns: raw string or sequence of objects
0487     @rtype: raw string or list of objects
0488     """
0489 
0490     if isinstance(strarg, bytes):
0491         return strarg
0492 
0493     lenc = locale.getpreferredencoding()
0494 
0495     if isinstance(strarg, str):
0496         return strarg.encode(lenc)
0497     else:
0498         uargs = []
0499         for val in strarg:
0500             if isinstance(val, str):
0501                 val = val.encode(lenc)
0502             uargs.append(val)
0503         return uargs
0504 
0505 
0506 def get_env_langs ():
0507     """
0508     Guess user's preferred languages from the environment.
0509 
0510     Various environment variables are examined to collect
0511     the list of languages in which the user may be wanting
0512     to read or write in in the environment.
0513     The list is ordered from most to least preferred language,
0514     and may be empty.
0515     Languages are given by their ISO-639 codes.
0516 
0517     @returns: preferred languages
0518     @rtype: [string...]
0519     """
0520 
0521     langs = []
0522 
0523     # Variables which contain colon-separated language strings.
0524     for lenv in ["LANGUAGE"]:
0525         langs.extend((os.getenv(lenv, "")).split(":"))
0526 
0527     # Variables which contain locale string:
0528     # split into parts, and assemble possible language codes from least to
0529     for lenv in ["LC_ALL", "LANG"]:
0530         lval = os.getenv(lenv, "")
0531         lsplit = []
0532         for sep in ("@", ".", "_"): # order is important
0533             p = lval.rfind(sep)
0534             if p >= 0:
0535                 el, lval = lval[p + len(sep):], lval[:p]
0536             else:
0537                 el = None
0538             lsplit.insert(0, el)
0539         lsplit.insert(0, lval)
0540         lng, ctr, enc, mod = lsplit
0541 
0542         if lng and ctr and mod:
0543             langs.append("%s_%s@%s" % (lng, ctr, mod))
0544         if lng and ctr:
0545             langs.append("%s_%s" % (lng, ctr))
0546         if lng and mod:
0547             langs.append("%s@%s" % (lng, mod))
0548         if lng:
0549             langs.append(lng)
0550 
0551     # Normalize codes, remove empty and any duplicates (but keep order).
0552     langs2 = [x.strip() for x in langs]
0553     langs2 = [x for x in langs2 if x]
0554     seen = set()
0555     langs = []
0556     for lang in langs2:
0557         if lang not in seen:
0558             seen.add(lang)
0559             langs.append(lang)
0560 
0561     return langs
0562 
0563 
0564 def term_width (stream=sys.stdout, default=None):
0565     """
0566     Get number of columns in the terminal of output stream.
0567 
0568     If the output stream is not linked to the terminal, 0 is returned.
0569     If the output stream is linked to the terminal, but the number of columns
0570     cannot be determined, the supplied default value is returned instead.
0571 
0572     @param stream: output stream for which the terminal is looked up
0573     @type stream: file
0574     @param default: value to return if width cannot be determined
0575     @type default: int
0576 
0577     @returns: width of the terminal in columns
0578     @rtype: int
0579     """
0580 
0581     if not stream.isatty():
0582         return 0
0583 
0584     try:
0585         import curses
0586         curses.setupterm()
0587     except:
0588         return default
0589 
0590     ncols = curses.tigetnum("cols")
0591 
0592     return ncols if ncols >= 0 else default
0593 
0594 
0595 def build_path_selector (incnames=None, incpaths=None,
0596                          excnames=None, excpaths=None,
0597                          ormatch=False):
0598     """
0599     Build a path selection function based on inclusion-exclusion condition.
0600 
0601     Frequently a collection of paths needs to be filtered,
0602     to pass only specific paths (inclusion),
0603     or to block only specific paths (exclusion), or both.
0604     Filtering conditions are normally posed on full paths,
0605     but frequently file base names without extensions are really tested.
0606 
0607     This function builds a selector function which takes a path and
0608     returns C{True} to select the path or C{False} to discard it,
0609     based on four sets of conditions: inclusions by base name without
0610     extension (C{incnames}), inclusion by full path (C{incpaths}),
0611     exclusions by base name without extension (C{excnames}), and
0612     exclusions by full path (C{excpaths}).
0613     Each condition in each of the sets can be a regular expression string,
0614     an object with C{search(string)} method returning true or false value
0615     (e.g. compiled regular expression), or a general function taking string
0616     and returning true or false value.
0617 
0618     If C{ormatch} is C{False}, the path is included if there are
0619     no inclusion conditions or all inclusion conditions match;
0620     the path is excluded if there is at least one exclusion condition
0621     and all exclusion conditions match.
0622     If C{ormatch} is C{True}, the path is included if there are
0623     no inclusion conditions or at least one of them matches;
0624     the path is excluded if at least one exclusion condition match.
0625 
0626     @param incnames: conditions for inclusion by base name without extension
0627     @type incnames: sequence (see description)
0628     @param incpaths: conditions for inclusion by full path
0629     @type incpaths: sequence (see description)
0630     @param excnames: conditions for exclusion by base name without extension
0631     @type excnames: sequence (see description)
0632     @param excpaths: conditions for exclusion by full path
0633     @type excpaths: sequence (see description)
0634     @param ormatch: whether conditions are linked with OR
0635     @type ormatch: bool
0636 
0637     @returns: path selection function
0638     @rtype: (string)->bool
0639     """
0640 
0641     # Shortcut to avoid complicated selector function.
0642     if not incnames and not incpaths and not excnames and not excpaths:
0643         return lambda x: x
0644 
0645     incnames_tf = _build_path_selector_type(incnames)
0646     incpaths_tf = _build_path_selector_type(incpaths)
0647     excnames_tf = _build_path_selector_type(excnames)
0648     excpaths_tf = _build_path_selector_type(excpaths)
0649     sumf = any if ormatch else all
0650 
0651     def selector (path):
0652         path = os.path.abspath(path)
0653         name = None
0654         if incnames_tf or excnames_tf:
0655             name = os.path.basename(os.path.normpath(path))
0656             p = name.rfind(".")
0657             if p > 0:
0658                 name = name[:p]
0659         incargs = (  list(zip(incnames_tf, [name] * len(incnames_tf)))
0660                    + list(zip(incpaths_tf, [path] * len(incpaths_tf))))
0661         incress = [x(y) for x, y in incargs]
0662         excargs = (  list(zip(excnames_tf, [name] * len(excnames_tf)))
0663                    + list(zip(excpaths_tf, [path] * len(excpaths_tf))))
0664         excress = [x(y) for x, y in excargs]
0665         return (    (not incress or sumf(incress))
0666                 and (not excress or not sumf(excress)))
0667 
0668     return selector
0669 
0670 
0671 def _build_path_selector_type (sels):
0672 
0673     sels_tf = []
0674     if not sels:
0675         return sels_tf
0676     def tofunc (sel):
0677         if hasattr(sel, "search"):
0678             return lambda x: bool(sel.search(x))
0679         elif isinstance(sel, str):
0680             sel_rx = re.compile(sel, re.U)
0681             return lambda x: bool(sel_rx.search(x))
0682         elif callable(sel):
0683             return sel
0684         else:
0685             raise PologyError(
0686                 _("@info",
0687                   "Cannot convert object '%(obj)s' into a string matcher.",
0688                   obj=sel))
0689     sels_tf = list(map(tofunc, sels))
0690 
0691     return sels_tf
0692 
0693 
0694 _dhead = ":"
0695 _dincname = "+"
0696 _dincpath = "/+"
0697 _dexcname = "-"
0698 _dexcpath = "/-"
0699 
0700 def collect_paths_from_file (fpath, cmnts=True, incexc=True, respathf=None,
0701                              getsel=False, abort=False):
0702     """
0703     Collect list of paths from the file.
0704 
0705     In general, non-empty lines in the file are taken to be paths,
0706     and empty lines are skipped.
0707     If C{cmnts} is C{True}, then also the lines starting with C{'#'}
0708     are skipped as comments.
0709 
0710     The C{respathf} parameter provides a function to be applied to each path
0711     and return a list of paths, which then substitute the original path.
0712     This function can be used, for example, to recursively collect files
0713     from listed directories, or to exclude paths by an external condition.
0714 
0715     If C{incexc} is C{True}, then the lines starting with C{':'}
0716     define directives by which files and directories are included
0717     or excluded from the final list.
0718     Inclusion-exclusion directives are mostly useful when some of the paths
0719     are directories, and C{respathf} parameter is used to provide
0720     a function to collect subpaths from listed directories;
0721     the inclusion-exclusion directives are applied to those subpaths too.
0722     The directives are as follows:
0723       - C{:-REGEX}: excludes path if its base name without extension
0724             matches the regular expression
0725       - C{:/-REGEX}: excludes path if it matches the regular expression
0726       - C{:+REGEX}: includes path only if its base name without extension
0727             matches the regular expression
0728       - C{:/+REGEX}: includes path only if it matches the regular expression
0729     The path is included if there are no inclusion directives,
0730     or it matches at least one inclusion directive;
0731     the path is excluded if it matches at least one exclusion directive.
0732     Inclusion-exclusion directives are given to L{build_path_selector}
0733     to create the path selection function (with C{ormatch} set to C{True}),
0734     which is then used to filter collected paths
0735     (after application of C{respathf}, if given).
0736 
0737     If C{getsel} is set to C{True}, the selection function is returned
0738     instead of being applied to read paths immediately.
0739     This is useful in case the C{respathf} parameter is not sufficient
0740     to resolve paths, but more complex processing is required.
0741     from directories externally, instead with C{respathf}).
0742     If there were no inclusion-exclusion directives in the file,
0743     the resulting selection function will return C{True} for any path.
0744 
0745     @param fpath: the path to file which contains paths
0746     @type fpath: string
0747     @param cmnts: whether the file can contain comments
0748     @type cmnts: bool
0749     @param incexc: whether the file can contain inclusion-exclusion directives
0750     @type incexc: boolean
0751     @param respathf: function to resolve collected paths
0752     @type respathf: (string)->[string...]
0753     @param getsel: whether to return constructed path selection function
0754         instead of applying it
0755     @type getsel: bool
0756     @param abort: whether to abort the execution on exceptions from
0757         path resolution or selection functions
0758     @type abort: bool
0759 
0760     @returns: collected paths, possibly with path selection function
0761     @rtype: [string...] or ([string...], (string)->bool)
0762     """
0763 
0764     if abort:
0765         def abort_or_raise (e):
0766             error(str_to_unicode(str(e)))
0767     else:
0768         def abort_or_raise (e):
0769             raise
0770 
0771     paths = []
0772     incnames = []
0773     incpaths = []
0774     excnames = []
0775     excpaths = []
0776     lines = open(fpath).read().split("\n")
0777     lno = 0
0778     for line in lines:
0779         lno += 1
0780         if not line or (cmnts and line.startswith("#")):
0781             continue
0782 
0783         if incexc and line.startswith(_dhead):
0784             line = line[len(_dhead):]
0785             dstr = None
0786             for sels, shead in (
0787                 (incnames, _dincname), (incpaths, _dincpath),
0788                 (excnames, _dexcname), (excpaths, _dexcpath),
0789             ):
0790                 if line.startswith(shead):
0791                     dstr = line[len(shead):]
0792                     try:
0793                         rx = re.compile(dstr, re.U)
0794                     except:
0795                         raise PologyError(
0796                             _("@info",
0797                               "Invalid regular expression in inclusion/"
0798                               "exclusion directive at %(file)s:%(line)d.",
0799                               file=fpath, line=lno))
0800                     sels.append(rx)
0801                     break
0802             if dstr is None:
0803                 raise PologyError(
0804                     _("@info",
0805                       "Unknown inclusion/exclusion directive "
0806                       "at %(file)s:%(line)d.",
0807                       file=fpath, line=lno))
0808         else:
0809             paths.append(line)
0810 
0811     if respathf:
0812         try:
0813             paths = sum(list(map(respathf, paths)), [])
0814         except Exception as e:
0815             abort_or_raise(e)
0816 
0817     selectf = build_path_selector(incnames=incnames, incpaths=incpaths,
0818                                   excnames=excnames, excpaths=excpaths,
0819                                   ormatch=True)
0820     if getsel:
0821         return paths, selectf
0822     else:
0823         try:
0824             paths = list(filter(selectf, paths))
0825         except Exception as e:
0826             abort_or_raise(e)
0827         return paths
0828 
0829 
0830 def collect_paths_cmdline (rawpaths=None,
0831                            incnames=None, incpaths=None,
0832                            excnames=None, excpaths=None,
0833                            ormatch=False,
0834                            filesfrom=None, cmnts=True, incexc=True,
0835                            elsecwd=False, respathf=None,
0836                            getsel=False,
0837                            abort=False):
0838     """
0839     Collect list of paths from usual sources given on command line.
0840 
0841     Scripts that process paths will in general get paths directly
0842     (as free command line arguments or on standard input),
0843     or indirectly from files containing lists of paths
0844     (usually given by a command line option).
0845     Sometimes input directory paths will be searched for
0846     paths of all files in them, possibly of certain type.
0847     Especially when searching directory paths, the script may take
0848     options to exclude or include only paths that match something.
0849     This function conveniently wraps up these possibilities,
0850     to fetch all possible paths in single statement.
0851 
0852     The C{rawpaths} parameter provides a list of directly supplied
0853     paths, e.g. from command line arguments.
0854     C{incnames}, C{incpaths}, C{excnames}, and C{excpaths} are
0855     lists of inclusion and exclusion conditions out of which
0856     single path selection function is constructed,
0857     with C{ormatch} determining how conditions are linked,
0858     see L{build_path_selector} for details.
0859     C{filesfrom} is a list of files containing lists of paths,
0860     C{cmnts} and C{incexc} are options for the file format,
0861     see L{collect_paths_from_file} for details.
0862     If both C{rawpaths} and C{filesfrom} are not given or empty,
0863     C{elsecwd} determines if current working directory is added
0864     to list of paths (C{True}) or not (C{False}).
0865     C{respathf} is a function which takes a path and returns
0866     list of paths, see description of the same parameter in
0867     L{collect_paths_from_file}.
0868 
0869     The order of path collection is as follows.
0870     First all paths from C{rawpaths} are added, applying C{respathf}.
0871     Then all paths from all files given by C{fromfiles}
0872     are added, by applying L{collect_paths_from_file} on each file
0873     (C{respathf} is applied by sending it to L{collect_paths_from_file}).
0874     If both C{rawpaths} and C{fromfiles} were C{None} or empty,
0875     current working directory is added, possibly applying C{respathf}.
0876     Finally, all paths are filtered through inclusion-exclusion tests;
0877     if no inclusion tests are given, then all files are included
0878     unless excluded by an exclusion test.
0879 
0880     If C{getsel} is set to C{True}, the path selection function
0881     is returned instead of being applied to collected paths.
0882     This function will also include path selection functions
0883     constructed from inclusion-exclusion directives found in C{filesfrom},
0884     linked with the top conditions according to C{ormatch}.
0885 
0886     @param respathf: function to resolve collected paths
0887     @type respathf: (string)->[string...]
0888     @param getsel: whether to return constructed path selection function
0889         instead of applying it
0890     @type getsel: bool
0891     @param abort: whether to abort the execution on exceptions from
0892         path resolution or selection functions
0893     @type abort: bool
0894 
0895     @returns: collected paths, possibly with path selection function
0896     @rtype: [string...] or ([string...], (string)->bool)
0897     """
0898 
0899     paths = []
0900 
0901     if abort:
0902         def abort_or_raise (e):
0903             error(str_to_unicode(str(e)))
0904     else:
0905         def abort_or_raise (e):
0906             raise
0907 
0908     # First add paths given directly, then add paths read from files.
0909     if rawpaths:
0910         rawpaths2 = rawpaths
0911         if respathf:
0912             try:
0913                 rawpaths2 = sum(list(map(respathf, rawpaths)), [])
0914             except Exception as e:
0915                 abort_or_raise(e)
0916         paths.extend(rawpaths2)
0917     ffselfs = []
0918     if filesfrom:
0919         for ffpath in filesfrom:
0920             res = collect_paths_from_file(ffpath, cmnts, incexc,
0921                                           respathf, getsel=getsel,
0922                                           abort=abort)
0923             if getsel:
0924                 cpaths, cself = res
0925                 paths.extend(cpaths)
0926                 ffselfs.append(cself)
0927             else:
0928                 paths.extend(res)
0929     # If neither direct paths nor files to read paths from were given,
0930     # add current working directory if requested.
0931     if elsecwd and not rawpaths and not filesfrom:
0932         cwd = getucwd()
0933         if respathf:
0934             try:
0935                 paths.extend(respathf(cwd))
0936             except Exception as e:
0937                 abort_or_raise(e)
0938         else:
0939             paths.append(cwd)
0940 
0941     selectf = build_path_selector(incnames=incnames, incpaths=incpaths,
0942                                   excnames=excnames, excpaths=excpaths,
0943                                   ormatch=ormatch)
0944     if ffselfs:
0945         if ormatch:
0946             selftot = lambda p: selectf(p) or any([x(p) for x in ffselfs])
0947         else:
0948             selftot = lambda p: selectf(p) and all([x(p) for x in ffselfs])
0949     else:
0950         selftot = selectf
0951 
0952     if getsel:
0953         return paths, selftot
0954     else:
0955         try:
0956             paths = list(filter(selftot, paths))
0957         except Exception as e:
0958             abort_or_raise(e)
0959         return paths
0960 
0961 
0962 def getucwd ():
0963     """
0964     Get path of current working directory as Unicode string.
0965 
0966     C{os.getcwd()} returns a raw byte sequence, to which
0967     the L{str_to_unicode} function is applied to make best guess
0968     at decoding it into a unicode string.
0969 
0970     @returns: path of current working directory
0971     @rtype: string
0972     """
0973 
0974     rawcwd = os.getcwd()
0975     cwd = str_to_unicode(rawcwd)
0976     return cwd
0977 
0978 
0979 def exit_on_exception (func, cleanup=None):
0980     """
0981     Gracefully exit a Pology script when an exception is received.
0982 
0983     Any error message will be printed, any progress lines will be cleared,
0984     and keyboard interrupt will exist silently.
0985 
0986     The backtrace can be shown instead (on non-keyboard interrupt exceptions)
0987     by setting C{[global]/show-backtrace} user configuration field to true.
0988 
0989     @param func: a zero-argument function
0990     @type func: () -> any
0991     @param cleanup: a zero-argument function to execute before exiting
0992     @type cleanup: () -> any
0993 
0994     @returns: path of current working directory
0995     @rtype: string
0996     """
0997 
0998     try:
0999         func()
1000     except KeyboardInterrupt:
1001         report("", newline=False)
1002         if cleanup:
1003             cleanup()
1004         exit(100)
1005     except Exception as e:
1006         report("", newline=False)
1007         if cleanup:
1008             cleanup()
1009         if pology.config.section("global").boolean("show-backtrace"):
1010             raise
1011         else:
1012             error(str_to_unicode(str(e)), code=1)
1013