File indexing completed on 2024-05-12 03:55:00
0001 /* 0002 This file is part of the KDE libraries 0003 0004 SPDX-FileCopyrightText: 2007 Bernhard Loos <nhuh.put@web.de> 0005 SPDX-FileCopyrightText: 2007, 2008 Oswald Buddenhagen <ossi@kde.org> 0006 0007 SPDX-License-Identifier: LGPL-2.0-or-later 0008 */ 0009 0010 #include "kshell.h" 0011 #include "kshell_p.h" 0012 0013 #include <QDir> 0014 #include <QRegularExpression> 0015 #include <QString> 0016 #include <QStringList> 0017 0018 /* 0019 * A short introduction into cmd semantics: 0020 * - Variable expansion is done first, without regard to *any* escaping - 0021 * if something looks like an existing variable, it is replaced. 0022 * - Then follows regular tokenization by the shell. &, &&, | and || are 0023 * command delimiters. ( and ) are command grouping operators; they are 0024 * recognized only a the start resp. end of a command; mismatched )s are 0025 * an error if any (s are present. <, > are just like under UNIX - they can 0026 * appear *anywhere* in a command, perform their function and are cut out. 0027 * @ at the start of a command is eaten (local echo off - no function as 0028 * far as cmd /c is concerned). : at the start of a command declares a label, 0029 * which effectively means the remainder of the line is a comment - note that 0030 * command separators are not recognized past that point. 0031 * ^ is the escape char for everything including itself. 0032 * cmd ignores *all* special chars between double quotes, so there is no 0033 * way to escape the closing quote. Note that the quotes are *not* removed 0034 * from the resulting command line. 0035 * - Then follows delayed variable expansion if it is enabled and at least 0036 * one exclamation mark is present. This involves another layer of ^ 0037 * escaping, regardless of quotes. (Win2k+) 0038 * - Then follows argument splitting as described in 0039 * http://msdn2.microsoft.com/en-us/library/ms880421.aspx . 0040 * Note that this is done by the called application and therefore might 0041 * be subject to completely different semantics, in fact. 0042 */ 0043 0044 inline static bool isMetaChar(ushort c) 0045 { 0046 static const uchar iqm[] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; // &()<>| 0047 0048 return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); 0049 } 0050 0051 inline static bool isSpecialChar(ushort c) 0052 { 0053 // Chars that should be quoted (TM). This includes: 0054 // - control chars & space 0055 // - the shell meta chars &()<>^| 0056 // - the potential separators ,;= 0057 static const uchar iqm[] = {0xff, 0xff, 0xff, 0xff, 0x41, 0x13, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10}; 0058 0059 return (c < sizeof(iqm) * 8) && (iqm[c / 8] & (1 << (c & 7))); 0060 } 0061 0062 inline static bool isWhiteSpace(ushort c) 0063 { 0064 return c == ' ' || c == '\t'; 0065 } 0066 0067 QStringList KShell::splitArgs(const QString &_args, Options flags, Errors *err) 0068 { 0069 QString args(_args); 0070 QStringList ret; 0071 0072 const QLatin1Char bs('\\'), dq('\"'); 0073 0074 if (flags & AbortOnMeta) { 0075 args.remove(PERCENT_ESCAPE); 0076 if (args.indexOf(QLatin1Char('%')) >= 0) { 0077 if (err) { 0078 *err = FoundMeta; 0079 } 0080 return QStringList(); 0081 } 0082 0083 args = _args; 0084 args.replace(PERCENT_ESCAPE, QLatin1String("%")); 0085 0086 if (!args.isEmpty() && args[0].unicode() == '@') { 0087 args.remove(0, 1); 0088 } 0089 0090 for (int p = 0; p < args.length(); p++) { 0091 ushort c = args[p].unicode(); 0092 if (c == '^') { 0093 args.remove(p, 1); 0094 } else if (c == '"') { 0095 while (++p < args.length() && args[p].unicode() != '"') 0096 ; 0097 } else if (isMetaChar(c)) { 0098 if (err) { 0099 *err = FoundMeta; 0100 } 0101 return QStringList(); 0102 } 0103 } 0104 } 0105 0106 if (err) { 0107 *err = NoError; 0108 } 0109 0110 int p = 0; 0111 const int length = args.length(); 0112 for (;;) { 0113 while (p < length && isWhiteSpace(args[p].unicode())) { 0114 ++p; 0115 } 0116 if (p == length) { 0117 return ret; 0118 } 0119 0120 QString arg; 0121 bool inquote = false; 0122 for (;;) { 0123 bool copy = true; // copy this char 0124 int bslashes = 0; // number of preceding backslashes to insert 0125 while (p < length && args[p] == bs) { 0126 ++p; 0127 ++bslashes; 0128 } 0129 if (p < length && args[p] == dq) { 0130 if (bslashes % 2 == 0) { 0131 // Even number of backslashes, so the quote is not escaped. 0132 if (inquote) { 0133 if (p + 1 < length && args[p + 1] == dq) { 0134 // Two consecutive quotes make a literal quote. 0135 // This is not documented on MSDN. 0136 ++p; 0137 } else { 0138 // Closing quote 0139 copy = false; 0140 inquote = !inquote; 0141 } 0142 } else { 0143 // Opening quote 0144 copy = false; 0145 inquote = !inquote; 0146 } 0147 } 0148 bslashes /= 2; 0149 } 0150 0151 while (--bslashes >= 0) { 0152 arg.append(bs); 0153 } 0154 0155 if (p == length || (!inquote && isWhiteSpace(args[p].unicode()))) { 0156 ret.append(arg); 0157 if (inquote) { 0158 if (err) { 0159 *err = BadQuoting; 0160 } 0161 return QStringList(); 0162 } 0163 break; 0164 } 0165 0166 if (copy) { 0167 arg.append(args[p]); 0168 } 0169 ++p; 0170 } 0171 } 0172 // not reached 0173 } 0174 0175 QString KShell::quoteArgInternal(const QString &arg, bool _inquote) 0176 { 0177 // Escape quotes, preceding backslashes are doubled. Surround with quotes. 0178 // Note that cmd does not understand quote escapes in quoted strings, 0179 // so the quoting needs to be "suspended". 0180 const QLatin1Char bs('\\'), dq('\"'); 0181 QString ret; 0182 bool inquote = _inquote; 0183 int bslashes = 0; 0184 for (int p = 0; p < arg.length(); p++) { 0185 if (arg[p] == bs) { 0186 bslashes++; 0187 } else if (arg[p] == dq) { 0188 if (inquote) { 0189 ret.append(dq); 0190 inquote = false; 0191 } 0192 for (; bslashes; bslashes--) { 0193 ret.append(QLatin1String("\\\\")); 0194 } 0195 ret.append(QLatin1String("\\^\"")); 0196 } else { 0197 if (!inquote) { 0198 ret.append(dq); 0199 inquote = true; 0200 } 0201 for (; bslashes; bslashes--) { 0202 ret.append(bs); 0203 } 0204 ret.append(arg[p]); 0205 } 0206 } 0207 ret.replace(QLatin1Char('%'), PERCENT_ESCAPE); 0208 if (bslashes) { 0209 // Ensure that we don't have directly trailing backslashes, 0210 // so concatenating with another string won't cause surprises. 0211 if (!inquote && !_inquote) { 0212 ret.append(dq); 0213 } 0214 for (; bslashes; bslashes--) { 0215 ret.append(QLatin1String("\\\\")); 0216 } 0217 ret.append(dq); 0218 if (inquote && _inquote) { 0219 ret.append(dq); 0220 } 0221 } else if (inquote != _inquote) { 0222 ret.append(dq); 0223 } 0224 return ret; 0225 } 0226 0227 QString KShell::quoteArg(const QString &arg) 0228 { 0229 if (arg.isEmpty()) { 0230 return QStringLiteral("\"\""); 0231 } 0232 0233 // Ensure that we don't have directly trailing backslashes, 0234 // so concatenating with another string won't cause surprises. 0235 if (arg.endsWith(QLatin1Char('\\'))) { 0236 return quoteArgInternal(arg, false); 0237 } 0238 0239 for (int x = arg.length() - 1; x >= 0; --x) 0240 if (isSpecialChar(arg[x].unicode())) { 0241 return quoteArgInternal(arg, false); 0242 } 0243 0244 // Escape quotes. Preceding backslashes are doubled. 0245 // Note that the remaining string is not quoted. 0246 QString ret(arg); 0247 ret.replace(QRegularExpression(QStringLiteral("(\\\\*)\"")), QStringLiteral("\\1\\1\\^\"")); 0248 ret.replace(QLatin1Char('%'), PERCENT_ESCAPE); 0249 return ret; 0250 }