File indexing completed on 2024-05-12 15:43:34
0001 // krazy:excludeall=doublequote_chars (UStrings aren't QStrings) 0002 /* 0003 * This file is part of the KDE libraries 0004 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 0005 * Copyright (C) 2003 Apple Computer, Inc. 0006 * 0007 * This library is free software; you can redistribute it and/or 0008 * modify it under the terms of the GNU Lesser General Public 0009 * License as published by the Free Software Foundation; either 0010 * version 2 of the License, or (at your option) any later version. 0011 * 0012 * This library is distributed in the hope that it will be useful, 0013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0015 * Lesser General Public License for more details. 0016 * 0017 * You should have received a copy of the GNU Lesser General Public 0018 * License along with this library; if not, write to the Free Software 0019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 0020 * 0021 */ 0022 0023 #include "regexp_object.h" 0024 0025 #include "regexp_object.lut.h" 0026 0027 #include <stdio.h> 0028 #include "value.h" 0029 #include "object.h" 0030 #include "types.h" 0031 #include "nodes.h" 0032 #include "interpreter.h" 0033 #include "operations.h" 0034 #include "error_object.h" 0035 #include "lookup.h" 0036 0037 using namespace KJS; 0038 0039 // ------------------------------ RegExpPrototype --------------------------- 0040 0041 // ECMA 15.10.5 0042 0043 const ClassInfo RegExpPrototype::info = {"RegExp", nullptr, nullptr, nullptr}; 0044 0045 RegExpPrototype::RegExpPrototype(ExecState *exec, 0046 ObjectPrototype *objProto, 0047 FunctionPrototype *funcProto) 0048 : JSObject(objProto) 0049 { 0050 static const Identifier *execPropertyName = new Identifier("exec"); 0051 static const Identifier *testPropertyName = new Identifier("test"); 0052 static const Identifier *compilePropertyName = new Identifier("compile"); 0053 0054 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Exec, 0, *execPropertyName), DontEnum); 0055 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Test, 0, *testPropertyName), DontEnum); 0056 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::ToString, 0, exec->propertyNames().toString), DontEnum); 0057 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Compile, 1, *compilePropertyName), DontEnum); 0058 } 0059 0060 // ------------------------------ RegExpProtoFunc --------------------------- 0061 0062 RegExpProtoFunc::RegExpProtoFunc(ExecState *exec, FunctionPrototype *funcProto, int i, int len, const Identifier &name) 0063 : InternalFunctionImp(funcProto, name), id(i) 0064 { 0065 putDirect(exec->propertyNames().length, len, DontDelete | ReadOnly | DontEnum); 0066 } 0067 0068 JSValue *RegExpProtoFunc::callAsFunction(ExecState *exec, JSObject *thisObj, const List &args) 0069 { 0070 if (!thisObj->inherits(&RegExpImp::info)) { 0071 if (thisObj->inherits(&RegExpPrototype::info)) { 0072 switch (id) { 0073 case ToString: return jsString("//"); 0074 } 0075 } 0076 0077 return throwError(exec, TypeError); 0078 } 0079 0080 switch (id) { 0081 case Test: // 15.10.6.2 0082 case Exec: { 0083 RegExp *regExp = static_cast<RegExpImp *>(thisObj)->regExp(); 0084 RegExpObjectImp *regExpObj = static_cast<RegExpObjectImp *>(exec->lexicalInterpreter()->builtinRegExp()); 0085 0086 UString input; 0087 if (args.isEmpty()) { 0088 input = JSValue::toString(regExpObj->get(exec, exec->propertyNames().input), exec); 0089 } else { 0090 input = JSValue::toString(args[0], exec); 0091 } 0092 0093 double lastIndex = JSValue::toInteger(thisObj->get(exec, exec->propertyNames().lastIndex), exec); 0094 0095 bool globalFlag = JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().global), exec); 0096 if (!globalFlag) { 0097 lastIndex = 0; 0098 } 0099 if (lastIndex < 0 || lastIndex > input.size()) { 0100 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum); 0101 return jsNull(); 0102 } 0103 0104 int foundIndex; 0105 0106 RegExpStringContext ctx(input); 0107 UString match = regExpObj->performMatch(regExp, exec, ctx, input, static_cast<int>(lastIndex), &foundIndex); 0108 0109 if (exec->hadException()) { 0110 return jsUndefined(); 0111 } 0112 0113 bool didMatch = !match.isNull(); 0114 0115 if (globalFlag) { 0116 if (didMatch) { 0117 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(foundIndex + match.size()), DontDelete | DontEnum); 0118 } else { 0119 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum); 0120 } 0121 } 0122 0123 // Test 0124 if (id == Test) { 0125 return jsBoolean(didMatch); 0126 } 0127 0128 // Exec 0129 if (didMatch) { 0130 return regExpObj->arrayOfMatches(exec, match); 0131 } else { 0132 return jsNull(); 0133 } 0134 } 0135 break; 0136 case ToString: { 0137 UString result = "/" + JSValue::toString(thisObj->get(exec, exec->propertyNames().source), exec) + "/"; 0138 if (JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().global), exec)) { 0139 result += "g"; 0140 } 0141 if (JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().ignoreCase), exec)) { 0142 result += "i"; 0143 } 0144 if (JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().multiline), exec)) { 0145 result += "m"; 0146 } 0147 return jsString(result); 0148 } 0149 case Compile: { // JS1.2 legacy, but still in use in the wild somewhat 0150 RegExpImp *instance = static_cast<RegExpImp *>(thisObj); 0151 RegExp *newEngine = RegExpObjectImp::makeEngine(exec, JSValue::toString(args[0], exec), args[1]); 0152 if (!newEngine) { 0153 return exec->exception(); 0154 } 0155 instance->setRegExp(exec, newEngine); 0156 return instance; 0157 } 0158 } 0159 0160 return jsUndefined(); 0161 } 0162 0163 // ------------------------------ RegExpImp ------------------------------------ 0164 0165 const ClassInfo RegExpImp::info = {"RegExp", nullptr, nullptr, nullptr}; 0166 0167 RegExpImp::RegExpImp(RegExpPrototype *regexpProto) 0168 : JSObject(regexpProto), reg(nullptr) 0169 { 0170 } 0171 0172 RegExpImp::~RegExpImp() 0173 { 0174 delete reg; 0175 } 0176 0177 void RegExpImp::setRegExp(ExecState *exec, RegExp *r) 0178 { 0179 delete reg; 0180 reg = r; 0181 0182 putDirect(exec->propertyNames().global, jsBoolean(r->flags() & RegExp::Global), DontDelete | ReadOnly | DontEnum); 0183 putDirect(exec->propertyNames().ignoreCase, jsBoolean(r->flags() & RegExp::IgnoreCase), DontDelete | ReadOnly | DontEnum); 0184 putDirect(exec->propertyNames().multiline, jsBoolean(r->flags() & RegExp::Multiline), DontDelete | ReadOnly | DontEnum); 0185 0186 putDirect(exec->propertyNames().source, jsString(r->pattern()), DontDelete | ReadOnly | DontEnum); 0187 putDirect(exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum); 0188 } 0189 0190 JSObject *RegExpImp::valueClone(Interpreter *targetCtx) const 0191 { 0192 RegExpImp *copy = new RegExpImp(static_cast<RegExpPrototype *>(targetCtx->builtinRegExpPrototype())); 0193 copy->setRegExp(targetCtx->globalExec(), new RegExp(reg->pattern(), reg->flags())); 0194 return copy; 0195 } 0196 0197 // ------------------------------ RegExpObjectImp ------------------------------ 0198 0199 const ClassInfo RegExpObjectImp::info = {"Function", &InternalFunctionImp::info, &RegExpTable, nullptr}; 0200 0201 /* Source for regexp_object.lut.h 0202 @begin RegExpTable 20 0203 input RegExpObjectImp::Input None 0204 $_ RegExpObjectImp::Input DontEnum 0205 multiline RegExpObjectImp::Multiline None 0206 $* RegExpObjectImp::Multiline DontEnum 0207 lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly 0208 $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum 0209 lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly 0210 $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum 0211 leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly 0212 $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum 0213 rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly 0214 $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum 0215 $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly 0216 $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly 0217 $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly 0218 $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly 0219 $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly 0220 $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly 0221 $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly 0222 $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly 0223 $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly 0224 @end 0225 */ 0226 0227 struct KJS::RegExpObjectImpPrivate { 0228 // Global search cache / settings 0229 RegExpObjectImpPrivate() : lastInput(""), lastNumSubPatterns(0), multiline(false) { } 0230 UString lastInput; 0231 OwnArrayPtr<int> lastOvector; 0232 unsigned lastNumSubPatterns : 31; 0233 bool multiline : 1; 0234 }; 0235 0236 RegExpObjectImp::RegExpObjectImp(ExecState *exec, 0237 FunctionPrototype *funcProto, 0238 RegExpPrototype *regProto) 0239 0240 : InternalFunctionImp(funcProto), 0241 d(new RegExpObjectImpPrivate) 0242 { 0243 // ECMA 15.10.5.1 RegExp.prototype 0244 putDirect(exec->propertyNames().prototype, regProto, DontEnum | DontDelete | ReadOnly); 0245 0246 // no. of arguments for constructor 0247 putDirect(exec->propertyNames().length, jsNumber(2), ReadOnly | DontDelete | DontEnum); 0248 } 0249 0250 void RegExpObjectImp::throwRegExpError(ExecState *exec) 0251 { 0252 throwError(exec, RangeError, "Resource exhaustion trying to perform regexp match."); 0253 } 0254 0255 /* 0256 To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular 0257 expression matching through the performMatch function. We use cached results to calculate, 0258 e.g., RegExp.lastMatch and RegExp.leftParen. 0259 */ 0260 UString RegExpObjectImp::performMatch(RegExp *r, ExecState *exec, const RegExpStringContext &c, 0261 const UString &s, 0262 int startOffset, int *endOffset, int **ovector) 0263 { 0264 int tmpOffset; 0265 int *tmpOvector; 0266 bool error = false; 0267 UString match = r->match(c, s, &error, startOffset, &tmpOffset, &tmpOvector); 0268 if (error) { 0269 if (endOffset) { 0270 *endOffset = -1; 0271 } 0272 throwRegExpError(exec); 0273 return match; 0274 } 0275 0276 if (endOffset) { 0277 *endOffset = tmpOffset; 0278 } 0279 if (ovector) { 0280 *ovector = tmpOvector; 0281 } 0282 0283 if (!match.isNull()) { 0284 ASSERT(tmpOvector); 0285 0286 d->lastInput = s; 0287 d->lastOvector.set(tmpOvector); 0288 d->lastNumSubPatterns = r->subPatterns(); 0289 } 0290 0291 return match; 0292 } 0293 0294 JSObject *RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const 0295 { 0296 List list; 0297 // The returned array contains 'result' as first item, followed by the list of matches 0298 list.append(jsString(result)); 0299 if (d->lastOvector) 0300 for (int i = 1; i < d->lastNumSubPatterns + 1; ++i) { 0301 int start = d->lastOvector[2 * i]; 0302 if (start == -1) { 0303 list.append(jsUndefined()); 0304 } else { 0305 UString substring = d->lastInput.substr(start, d->lastOvector[2 * i + 1] - start); 0306 list.append(jsString(substring)); 0307 } 0308 } 0309 JSObject *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list); 0310 arr->put(exec, exec->propertyNames().index, jsNumber(d->lastOvector[0])); 0311 arr->put(exec, exec->propertyNames().input, jsString(d->lastInput)); 0312 return arr; 0313 } 0314 0315 JSValue *RegExpObjectImp::getBackref(int i) const 0316 { 0317 if (d->lastOvector && i < int(d->lastNumSubPatterns + 1)) { 0318 UString substring = d->lastInput.substr(d->lastOvector[2 * i], d->lastOvector[2 * i + 1] - d->lastOvector[2 * i]); 0319 return jsString(substring); 0320 } 0321 0322 return jsString(""); 0323 } 0324 0325 JSValue *RegExpObjectImp::getLastMatch() const 0326 { 0327 if (d->lastOvector) { 0328 UString substring = d->lastInput.substr(d->lastOvector[0], d->lastOvector[1] - d->lastOvector[0]); 0329 return jsString(substring); 0330 } 0331 0332 return jsString(""); 0333 } 0334 0335 JSValue *RegExpObjectImp::getLastParen() const 0336 { 0337 int i = d->lastNumSubPatterns; 0338 if (i > 0) { 0339 ASSERT(d->lastOvector); 0340 UString substring = d->lastInput.substr(d->lastOvector[2 * i], d->lastOvector[2 * i + 1] - d->lastOvector[2 * i]); 0341 return jsString(substring); 0342 } 0343 0344 return jsString(""); 0345 } 0346 0347 JSValue *RegExpObjectImp::getLeftContext() const 0348 { 0349 if (d->lastOvector) { 0350 UString substring = d->lastInput.substr(0, d->lastOvector[0]); 0351 return jsString(substring); 0352 } 0353 0354 return jsString(""); 0355 } 0356 0357 JSValue *RegExpObjectImp::getRightContext() const 0358 { 0359 if (d->lastOvector) { 0360 UString s = d->lastInput; 0361 UString substring = s.substr(d->lastOvector[1], s.size() - d->lastOvector[1]); 0362 return jsString(substring); 0363 } 0364 0365 return jsString(""); 0366 } 0367 0368 bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier &propertyName, PropertySlot &slot) 0369 { 0370 return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot); 0371 } 0372 0373 JSValue *RegExpObjectImp::getValueProperty(ExecState *, int token) const 0374 { 0375 switch (token) { 0376 case Dollar1: 0377 return getBackref(1); 0378 case Dollar2: 0379 return getBackref(2); 0380 case Dollar3: 0381 return getBackref(3); 0382 case Dollar4: 0383 return getBackref(4); 0384 case Dollar5: 0385 return getBackref(5); 0386 case Dollar6: 0387 return getBackref(6); 0388 case Dollar7: 0389 return getBackref(7); 0390 case Dollar8: 0391 return getBackref(8); 0392 case Dollar9: 0393 return getBackref(9); 0394 case Input: 0395 return jsString(d->lastInput); 0396 case Multiline: 0397 return jsBoolean(d->multiline); 0398 case LastMatch: 0399 return getLastMatch(); 0400 case LastParen: 0401 return getLastParen(); 0402 case LeftContext: 0403 return getLeftContext(); 0404 case RightContext: 0405 return getRightContext(); 0406 default: 0407 ASSERT(0); 0408 } 0409 0410 return jsString(""); 0411 } 0412 0413 void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, JSValue *value, int attr) 0414 { 0415 lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this); 0416 } 0417 0418 void RegExpObjectImp::putValueProperty(ExecState *exec, int token, JSValue *value, int /*attr*/) 0419 { 0420 switch (token) { 0421 case Input: 0422 d->lastInput = JSValue::toString(value, exec); 0423 break; 0424 case Multiline: 0425 d->multiline = JSValue::toBoolean(value, exec); 0426 break; 0427 default: 0428 ASSERT(0); 0429 } 0430 } 0431 0432 bool RegExpObjectImp::implementsConstruct() const 0433 { 0434 return true; 0435 } 0436 0437 RegExp *RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, JSValue *flagsInput) 0438 { 0439 int reflags = RegExp::None; 0440 0441 if (!JSValue::isUndefined(flagsInput)) { 0442 const UString flags = JSValue::toString(flagsInput, exec); 0443 0444 // Check flags 0445 for (int pos = 0; pos < flags.size(); ++pos) { 0446 switch (flags[pos].unicode()) { 0447 case 'g': 0448 if (reflags & RegExp::Global) { 0449 throwError(exec, SyntaxError, 0450 "Regular expression flag 'g' given twice", 1, -1, "<regexp>"); 0451 return nullptr; 0452 } 0453 reflags |= RegExp::Global; 0454 break; 0455 case 'i': 0456 if (reflags & RegExp::IgnoreCase) { 0457 throwError(exec, SyntaxError, 0458 "Regular expression flag 'i' given twice", 1, -1, "<regexp>"); 0459 return nullptr; 0460 } 0461 reflags |= RegExp::IgnoreCase; 0462 break; 0463 case 'm': 0464 if (reflags & RegExp::Multiline) { 0465 throwError(exec, SyntaxError, 0466 "Regular expression flag 'm' given twice", 1, -1, "<regexp>"); 0467 return nullptr; 0468 } 0469 reflags |= RegExp::Multiline; 0470 break; 0471 default: { 0472 throwError(exec, SyntaxError, 0473 "Invalid regular expression flags", 1, -1, "<regexp>"); 0474 return nullptr; 0475 } 0476 } 0477 } 0478 } 0479 0480 RegExp *re = new RegExp(p, reflags); 0481 if (!re->isValid()) { 0482 throwError(exec, SyntaxError, 0483 "Invalid regular expression", 1, -1, "<regexp>"); 0484 delete re; 0485 return nullptr; 0486 } 0487 return re; 0488 } 0489 0490 // ECMA 15.10.4 0491 JSObject *RegExpObjectImp::construct(ExecState *exec, const List &args) 0492 { 0493 JSObject *o = JSValue::getObject(args[0]); 0494 if (o && o->inherits(&RegExpImp::info)) { 0495 if (!JSValue::isUndefined(args[1])) { 0496 return throwError(exec, TypeError); 0497 } 0498 return o; 0499 } 0500 0501 UString p = JSValue::isUndefined(args[0]) ? UString("") : JSValue::toString(args[0], exec); 0502 0503 RegExp *re = makeEngine(exec, p, args[1]); 0504 if (!re) { 0505 return JSValue::toObject(exec->exception(), exec); 0506 } 0507 0508 RegExpPrototype *proto = static_cast<RegExpPrototype *>(exec->lexicalInterpreter()->builtinRegExpPrototype()); 0509 RegExpImp *dat = new RegExpImp(proto); 0510 0511 dat->setRegExp(exec, re); 0512 0513 return dat; 0514 } 0515 0516 // ECMA 15.10.3 0517 JSValue *RegExpObjectImp::callAsFunction(ExecState *exec, JSObject * /*thisObj*/, const List &args) 0518 { 0519 // The RegExp argument case is handled by construct() 0520 0521 return construct(exec, args); 0522 }