File indexing completed on 2024-05-12 03:55:00

0001 /*
0002     This file is part of the KDE libraries
0003 
0004     SPDX-FileCopyrightText: 2007 Bernhard Loos <nhuh.put@web.de>
0005     SPDX-FileCopyrightText: 2007, 2008 Oswald Buddenhagen <ossi@kde.org>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 
0010 #include "kshell.h"
0011 #include "kshell_p.h"
0012 
0013 #include <QDir>
0014 #include <QRegularExpression>
0015 #include <QString>
0016 #include <QStringList>
0017 
0018 /*
0019  * A short introduction into cmd semantics:
0020  * - Variable expansion is done first, without regard to *any* escaping -
0021  *   if something looks like an existing variable, it is replaced.
0022  * - Then follows regular tokenization by the shell. &, &&, | and || are
0023  *   command delimiters. ( and ) are command grouping operators; they are
0024  *   recognized only a the start resp. end of a command; mismatched )s are
0025  *   an error if any (s are present. <, > are just like under UNIX - they can
0026  *   appear *anywhere* in a command, perform their function and are cut out.
0027  *   @ at the start of a command is eaten (local echo off - no function as
0028  *   far as cmd /c is concerned). : at the start of a command declares a label,
0029  *   which effectively means the remainder of the line is a comment - note that
0030  *   command separators are not recognized past that point.
0031  *   ^ is the escape char for everything including itself.
0032  *   cmd ignores *all* special chars between double quotes, so there is no
0033  *   way to escape the closing quote. Note that the quotes are *not* removed
0034  *   from the resulting command line.
0035  * - Then follows delayed variable expansion if it is enabled and at least
0036  *   one exclamation mark is present. This involves another layer of ^
0037  *   escaping, regardless of quotes. (Win2k+)
0038  * - Then follows argument splitting as described in
0039  *   http://msdn2.microsoft.com/en-us/library/ms880421.aspx .
0040  *   Note that this is done by the called application and therefore might
0041  *   be subject to completely different semantics, in fact.
0042  */
0043 
0044 inline static bool isMetaChar(ushort c)
0045 {
0046     static const uchar iqm[] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; // &()<>|
0047 
0048     return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7)));
0049 }
0050 
0051 inline static bool isSpecialChar(ushort c)
0052 {
0053     // Chars that should be quoted (TM). This includes:
0054     // - control chars & space
0055     // - the shell meta chars &()<>^|
0056     // - the potential separators ,;=
0057     static const uchar iqm[] = {0xff, 0xff, 0xff, 0xff, 0x41, 0x13, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10};
0058 
0059     return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7)));
0060 }
0061 
0062 inline static bool isWhiteSpace(ushort c)
0063 {
0064     return c == ' ' || c == '\t';
0065 }
0066 
0067 QStringList KShell::splitArgs(const QString &_args, Options flags, Errors *err)
0068 {
0069     QString args(_args);
0070     QStringList ret;
0071 
0072     const QLatin1Char bs('\\'), dq('\"');
0073 
0074     if (flags & AbortOnMeta) {
0075         args.remove(PERCENT_ESCAPE);
0076         if (args.indexOf(QLatin1Char('%')) >= 0) {
0077             if (err) {
0078                 *err = FoundMeta;
0079             }
0080             return QStringList();
0081         }
0082 
0083         args = _args;
0084         args.replace(PERCENT_ESCAPE, QLatin1String("%"));
0085 
0086         if (!args.isEmpty() && args[0].unicode() == '@') {
0087             args.remove(0, 1);
0088         }
0089 
0090         for (int p = 0; p < args.length(); p++) {
0091             ushort c = args[p].unicode();
0092             if (c == '^') {
0093                 args.remove(p, 1);
0094             } else if (c == '"') {
0095                 while (++p < args.length() && args[p].unicode() != '"')
0096                     ;
0097             } else if (isMetaChar(c)) {
0098                 if (err) {
0099                     *err = FoundMeta;
0100                 }
0101                 return QStringList();
0102             }
0103         }
0104     }
0105 
0106     if (err) {
0107         *err = NoError;
0108     }
0109 
0110     int p = 0;
0111     const int length = args.length();
0112     for (;;) {
0113         while (p < length && isWhiteSpace(args[p].unicode())) {
0114             ++p;
0115         }
0116         if (p == length) {
0117             return ret;
0118         }
0119 
0120         QString arg;
0121         bool inquote = false;
0122         for (;;) {
0123             bool copy = true; // copy this char
0124             int bslashes = 0; // number of preceding backslashes to insert
0125             while (p < length && args[p] == bs) {
0126                 ++p;
0127                 ++bslashes;
0128             }
0129             if (p < length && args[p] == dq) {
0130                 if (bslashes % 2 == 0) {
0131                     // Even number of backslashes, so the quote is not escaped.
0132                     if (inquote) {
0133                         if (p + 1 < length && args[p + 1] == dq) {
0134                             // Two consecutive quotes make a literal quote.
0135                             // This is not documented on MSDN.
0136                             ++p;
0137                         } else {
0138                             // Closing quote
0139                             copy = false;
0140                             inquote = !inquote;
0141                         }
0142                     } else {
0143                         // Opening quote
0144                         copy = false;
0145                         inquote = !inquote;
0146                     }
0147                 }
0148                 bslashes /= 2;
0149             }
0150 
0151             while (--bslashes >= 0) {
0152                 arg.append(bs);
0153             }
0154 
0155             if (p == length || (!inquote && isWhiteSpace(args[p].unicode()))) {
0156                 ret.append(arg);
0157                 if (inquote) {
0158                     if (err) {
0159                         *err = BadQuoting;
0160                     }
0161                     return QStringList();
0162                 }
0163                 break;
0164             }
0165 
0166             if (copy) {
0167                 arg.append(args[p]);
0168             }
0169             ++p;
0170         }
0171     }
0172     // not reached
0173 }
0174 
0175 QString KShell::quoteArgInternal(const QString &arg, bool _inquote)
0176 {
0177     // Escape quotes, preceding backslashes are doubled. Surround with quotes.
0178     // Note that cmd does not understand quote escapes in quoted strings,
0179     // so the quoting needs to be "suspended".
0180     const QLatin1Char bs('\\'), dq('\"');
0181     QString ret;
0182     bool inquote = _inquote;
0183     int bslashes = 0;
0184     for (int p = 0; p < arg.length(); p++) {
0185         if (arg[p] == bs) {
0186             bslashes++;
0187         } else if (arg[p] == dq) {
0188             if (inquote) {
0189                 ret.append(dq);
0190                 inquote = false;
0191             }
0192             for (; bslashes; bslashes--) {
0193                 ret.append(QLatin1String("\\\\"));
0194             }
0195             ret.append(QLatin1String("\\^\""));
0196         } else {
0197             if (!inquote) {
0198                 ret.append(dq);
0199                 inquote = true;
0200             }
0201             for (; bslashes; bslashes--) {
0202                 ret.append(bs);
0203             }
0204             ret.append(arg[p]);
0205         }
0206     }
0207     ret.replace(QLatin1Char('%'), PERCENT_ESCAPE);
0208     if (bslashes) {
0209         // Ensure that we don't have directly trailing backslashes,
0210         // so concatenating with another string won't cause surprises.
0211         if (!inquote && !_inquote) {
0212             ret.append(dq);
0213         }
0214         for (; bslashes; bslashes--) {
0215             ret.append(QLatin1String("\\\\"));
0216         }
0217         ret.append(dq);
0218         if (inquote && _inquote) {
0219             ret.append(dq);
0220         }
0221     } else if (inquote != _inquote) {
0222         ret.append(dq);
0223     }
0224     return ret;
0225 }
0226 
0227 QString KShell::quoteArg(const QString &arg)
0228 {
0229     if (arg.isEmpty()) {
0230         return QStringLiteral("\"\"");
0231     }
0232 
0233     // Ensure that we don't have directly trailing backslashes,
0234     // so concatenating with another string won't cause surprises.
0235     if (arg.endsWith(QLatin1Char('\\'))) {
0236         return quoteArgInternal(arg, false);
0237     }
0238 
0239     for (int x = arg.length() - 1; x >= 0; --x)
0240         if (isSpecialChar(arg[x].unicode())) {
0241             return quoteArgInternal(arg, false);
0242         }
0243 
0244     // Escape quotes. Preceding backslashes are doubled.
0245     // Note that the remaining string is not quoted.
0246     QString ret(arg);
0247     ret.replace(QRegularExpression(QStringLiteral("(\\\\*)\"")), QStringLiteral("\\1\\1\\^\""));
0248     ret.replace(QLatin1Char('%'), PERCENT_ESCAPE);
0249     return ret;
0250 }