File indexing completed on 2024-05-12 15:43:34

0001 // krazy:excludeall=doublequote_chars (UStrings aren't QStrings)
0002 /*
0003  *  This file is part of the KDE libraries
0004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
0005  *  Copyright (C) 2003 Apple Computer, Inc.
0006  *
0007  *  This library is free software; you can redistribute it and/or
0008  *  modify it under the terms of the GNU Lesser General Public
0009  *  License as published by the Free Software Foundation; either
0010  *  version 2 of the License, or (at your option) any later version.
0011  *
0012  *  This library is distributed in the hope that it will be useful,
0013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015  *  Lesser General Public License for more details.
0016  *
0017  *  You should have received a copy of the GNU Lesser General Public
0018  *  License along with this library; if not, write to the Free Software
0019  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0020  *
0021  */
0022 
0023 #include "regexp_object.h"
0024 
0025 #include "regexp_object.lut.h"
0026 
0027 #include <stdio.h>
0028 #include "value.h"
0029 #include "object.h"
0030 #include "types.h"
0031 #include "nodes.h"
0032 #include "interpreter.h"
0033 #include "operations.h"
0034 #include "error_object.h"
0035 #include "lookup.h"
0036 
0037 using namespace KJS;
0038 
0039 // ------------------------------ RegExpPrototype ---------------------------
0040 
0041 // ECMA 15.10.5
0042 
0043 const ClassInfo RegExpPrototype::info = {"RegExp", nullptr, nullptr, nullptr};
0044 
0045 RegExpPrototype::RegExpPrototype(ExecState *exec,
0046                                  ObjectPrototype *objProto,
0047                                  FunctionPrototype *funcProto)
0048     : JSObject(objProto)
0049 {
0050     static const Identifier *execPropertyName = new Identifier("exec");
0051     static const Identifier *testPropertyName = new Identifier("test");
0052     static const Identifier *compilePropertyName = new Identifier("compile");
0053 
0054     putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Exec, 0, *execPropertyName), DontEnum);
0055     putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Test, 0, *testPropertyName), DontEnum);
0056     putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::ToString, 0,  exec->propertyNames().toString), DontEnum);
0057     putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Compile,  1, *compilePropertyName), DontEnum);
0058 }
0059 
0060 // ------------------------------ RegExpProtoFunc ---------------------------
0061 
0062 RegExpProtoFunc::RegExpProtoFunc(ExecState *exec, FunctionPrototype *funcProto, int i, int len, const Identifier &name)
0063     : InternalFunctionImp(funcProto, name), id(i)
0064 {
0065     putDirect(exec->propertyNames().length, len, DontDelete | ReadOnly | DontEnum);
0066 }
0067 
0068 JSValue *RegExpProtoFunc::callAsFunction(ExecState *exec, JSObject *thisObj, const List &args)
0069 {
0070     if (!thisObj->inherits(&RegExpImp::info)) {
0071         if (thisObj->inherits(&RegExpPrototype::info)) {
0072             switch (id) {
0073             case ToString: return jsString("//");
0074             }
0075         }
0076 
0077         return throwError(exec, TypeError);
0078     }
0079 
0080     switch (id) {
0081     case Test:      // 15.10.6.2
0082     case Exec: {
0083         RegExp *regExp = static_cast<RegExpImp *>(thisObj)->regExp();
0084         RegExpObjectImp *regExpObj = static_cast<RegExpObjectImp *>(exec->lexicalInterpreter()->builtinRegExp());
0085 
0086         UString input;
0087         if (args.isEmpty()) {
0088             input = JSValue::toString(regExpObj->get(exec, exec->propertyNames().input), exec);
0089         } else {
0090             input = JSValue::toString(args[0], exec);
0091         }
0092 
0093         double lastIndex = JSValue::toInteger(thisObj->get(exec, exec->propertyNames().lastIndex), exec);
0094 
0095         bool globalFlag = JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().global), exec);
0096         if (!globalFlag) {
0097             lastIndex = 0;
0098         }
0099         if (lastIndex < 0 || lastIndex > input.size()) {
0100             thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
0101             return jsNull();
0102         }
0103 
0104         int foundIndex;
0105 
0106         RegExpStringContext ctx(input);
0107         UString match = regExpObj->performMatch(regExp, exec, ctx, input, static_cast<int>(lastIndex), &foundIndex);
0108 
0109         if (exec->hadException()) {
0110             return jsUndefined();
0111         }
0112 
0113         bool didMatch = !match.isNull();
0114 
0115         if (globalFlag) {
0116             if (didMatch) {
0117                 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(foundIndex + match.size()), DontDelete | DontEnum);
0118             } else {
0119                 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
0120             }
0121         }
0122 
0123         // Test
0124         if (id == Test) {
0125             return jsBoolean(didMatch);
0126         }
0127 
0128         // Exec
0129         if (didMatch) {
0130             return regExpObj->arrayOfMatches(exec, match);
0131         } else {
0132             return jsNull();
0133         }
0134     }
0135     break;
0136     case ToString: {
0137         UString result = "/" + JSValue::toString(thisObj->get(exec, exec->propertyNames().source), exec) + "/";
0138         if (JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().global), exec)) {
0139             result += "g";
0140         }
0141         if (JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().ignoreCase), exec)) {
0142             result += "i";
0143         }
0144         if (JSValue::toBoolean(thisObj->get(exec, exec->propertyNames().multiline), exec)) {
0145             result += "m";
0146         }
0147         return jsString(result);
0148     }
0149     case Compile: { // JS1.2 legacy, but still in use in the wild somewhat
0150         RegExpImp *instance = static_cast<RegExpImp *>(thisObj);
0151         RegExp *newEngine   = RegExpObjectImp::makeEngine(exec, JSValue::toString(args[0], exec), args[1]);
0152         if (!newEngine) {
0153             return exec->exception();
0154         }
0155         instance->setRegExp(exec, newEngine);
0156         return instance;
0157     }
0158     }
0159 
0160     return jsUndefined();
0161 }
0162 
0163 // ------------------------------ RegExpImp ------------------------------------
0164 
0165 const ClassInfo RegExpImp::info = {"RegExp", nullptr, nullptr, nullptr};
0166 
0167 RegExpImp::RegExpImp(RegExpPrototype *regexpProto)
0168     : JSObject(regexpProto), reg(nullptr)
0169 {
0170 }
0171 
0172 RegExpImp::~RegExpImp()
0173 {
0174     delete reg;
0175 }
0176 
0177 void RegExpImp::setRegExp(ExecState *exec, RegExp *r)
0178 {
0179     delete reg;
0180     reg = r;
0181 
0182     putDirect(exec->propertyNames().global, jsBoolean(r->flags() & RegExp::Global), DontDelete | ReadOnly | DontEnum);
0183     putDirect(exec->propertyNames().ignoreCase, jsBoolean(r->flags() & RegExp::IgnoreCase), DontDelete | ReadOnly | DontEnum);
0184     putDirect(exec->propertyNames().multiline,  jsBoolean(r->flags() & RegExp::Multiline), DontDelete | ReadOnly | DontEnum);
0185 
0186     putDirect(exec->propertyNames().source, jsString(r->pattern()), DontDelete | ReadOnly | DontEnum);
0187     putDirect(exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
0188 }
0189 
0190 JSObject *RegExpImp::valueClone(Interpreter *targetCtx) const
0191 {
0192     RegExpImp *copy = new RegExpImp(static_cast<RegExpPrototype *>(targetCtx->builtinRegExpPrototype()));
0193     copy->setRegExp(targetCtx->globalExec(), new RegExp(reg->pattern(), reg->flags()));
0194     return copy;
0195 }
0196 
0197 // ------------------------------ RegExpObjectImp ------------------------------
0198 
0199 const ClassInfo RegExpObjectImp::info = {"Function", &InternalFunctionImp::info, &RegExpTable, nullptr};
0200 
0201 /* Source for regexp_object.lut.h
0202 @begin RegExpTable 20
0203   input           RegExpObjectImp::Input          None
0204   $_              RegExpObjectImp::Input          DontEnum
0205   multiline       RegExpObjectImp::Multiline      None
0206   $*              RegExpObjectImp::Multiline      DontEnum
0207   lastMatch       RegExpObjectImp::LastMatch      DontDelete|ReadOnly
0208   $&              RegExpObjectImp::LastMatch      DontDelete|ReadOnly|DontEnum
0209   lastParen       RegExpObjectImp::LastParen      DontDelete|ReadOnly
0210   $+              RegExpObjectImp::LastParen      DontDelete|ReadOnly|DontEnum
0211   leftContext     RegExpObjectImp::LeftContext    DontDelete|ReadOnly
0212   $`              RegExpObjectImp::LeftContext    DontDelete|ReadOnly|DontEnum
0213   rightContext    RegExpObjectImp::RightContext   DontDelete|ReadOnly
0214   $'              RegExpObjectImp::RightContext   DontDelete|ReadOnly|DontEnum
0215   $1              RegExpObjectImp::Dollar1        DontDelete|ReadOnly
0216   $2              RegExpObjectImp::Dollar2        DontDelete|ReadOnly
0217   $3              RegExpObjectImp::Dollar3        DontDelete|ReadOnly
0218   $4              RegExpObjectImp::Dollar4        DontDelete|ReadOnly
0219   $5              RegExpObjectImp::Dollar5        DontDelete|ReadOnly
0220   $6              RegExpObjectImp::Dollar6        DontDelete|ReadOnly
0221   $7              RegExpObjectImp::Dollar7        DontDelete|ReadOnly
0222   $8              RegExpObjectImp::Dollar8        DontDelete|ReadOnly
0223   $9              RegExpObjectImp::Dollar9        DontDelete|ReadOnly
0224 @end
0225 */
0226 
0227 struct KJS::RegExpObjectImpPrivate {
0228     // Global search cache / settings
0229     RegExpObjectImpPrivate() : lastInput(""), lastNumSubPatterns(0), multiline(false) { }
0230     UString lastInput;
0231     OwnArrayPtr<int> lastOvector;
0232     unsigned lastNumSubPatterns : 31;
0233     bool multiline              : 1;
0234 };
0235 
0236 RegExpObjectImp::RegExpObjectImp(ExecState *exec,
0237                                  FunctionPrototype *funcProto,
0238                                  RegExpPrototype *regProto)
0239 
0240     : InternalFunctionImp(funcProto),
0241       d(new RegExpObjectImpPrivate)
0242 {
0243     // ECMA 15.10.5.1 RegExp.prototype
0244     putDirect(exec->propertyNames().prototype, regProto, DontEnum | DontDelete | ReadOnly);
0245 
0246     // no. of arguments for constructor
0247     putDirect(exec->propertyNames().length, jsNumber(2), ReadOnly | DontDelete | DontEnum);
0248 }
0249 
0250 void RegExpObjectImp::throwRegExpError(ExecState *exec)
0251 {
0252     throwError(exec, RangeError, "Resource exhaustion trying to perform regexp match.");
0253 }
0254 
0255 /*
0256   To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular
0257   expression matching through the performMatch function. We use cached results to calculate,
0258   e.g., RegExp.lastMatch and RegExp.leftParen.
0259 */
0260 UString RegExpObjectImp::performMatch(RegExp *r, ExecState *exec, const RegExpStringContext &c,
0261                                       const UString &s,
0262                                       int startOffset, int *endOffset, int **ovector)
0263 {
0264     int tmpOffset;
0265     int *tmpOvector;
0266     bool error = false;
0267     UString match = r->match(c, s, &error, startOffset, &tmpOffset, &tmpOvector);
0268     if (error) {
0269         if (endOffset) {
0270             *endOffset = -1;
0271         }
0272         throwRegExpError(exec);
0273         return match;
0274     }
0275 
0276     if (endOffset) {
0277         *endOffset = tmpOffset;
0278     }
0279     if (ovector) {
0280         *ovector = tmpOvector;
0281     }
0282 
0283     if (!match.isNull()) {
0284         ASSERT(tmpOvector);
0285 
0286         d->lastInput = s;
0287         d->lastOvector.set(tmpOvector);
0288         d->lastNumSubPatterns = r->subPatterns();
0289     }
0290 
0291     return match;
0292 }
0293 
0294 JSObject *RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const
0295 {
0296     List list;
0297     // The returned array contains 'result' as first item, followed by the list of matches
0298     list.append(jsString(result));
0299     if (d->lastOvector)
0300         for (int i = 1; i < d->lastNumSubPatterns + 1; ++i) {
0301             int start = d->lastOvector[2 * i];
0302             if (start == -1) {
0303                 list.append(jsUndefined());
0304             } else {
0305                 UString substring = d->lastInput.substr(start, d->lastOvector[2 * i + 1] - start);
0306                 list.append(jsString(substring));
0307             }
0308         }
0309     JSObject *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list);
0310     arr->put(exec, exec->propertyNames().index, jsNumber(d->lastOvector[0]));
0311     arr->put(exec, exec->propertyNames().input, jsString(d->lastInput));
0312     return arr;
0313 }
0314 
0315 JSValue *RegExpObjectImp::getBackref(int i) const
0316 {
0317     if (d->lastOvector && i < int(d->lastNumSubPatterns + 1)) {
0318         UString substring = d->lastInput.substr(d->lastOvector[2 * i], d->lastOvector[2 * i + 1] - d->lastOvector[2 * i]);
0319         return jsString(substring);
0320     }
0321 
0322     return jsString("");
0323 }
0324 
0325 JSValue *RegExpObjectImp::getLastMatch() const
0326 {
0327     if (d->lastOvector) {
0328         UString substring = d->lastInput.substr(d->lastOvector[0], d->lastOvector[1] - d->lastOvector[0]);
0329         return jsString(substring);
0330     }
0331 
0332     return jsString("");
0333 }
0334 
0335 JSValue *RegExpObjectImp::getLastParen() const
0336 {
0337     int i = d->lastNumSubPatterns;
0338     if (i > 0) {
0339         ASSERT(d->lastOvector);
0340         UString substring = d->lastInput.substr(d->lastOvector[2 * i], d->lastOvector[2 * i + 1] - d->lastOvector[2 * i]);
0341         return jsString(substring);
0342     }
0343 
0344     return jsString("");
0345 }
0346 
0347 JSValue *RegExpObjectImp::getLeftContext() const
0348 {
0349     if (d->lastOvector) {
0350         UString substring = d->lastInput.substr(0, d->lastOvector[0]);
0351         return jsString(substring);
0352     }
0353 
0354     return jsString("");
0355 }
0356 
0357 JSValue *RegExpObjectImp::getRightContext() const
0358 {
0359     if (d->lastOvector) {
0360         UString s = d->lastInput;
0361         UString substring = s.substr(d->lastOvector[1], s.size() - d->lastOvector[1]);
0362         return jsString(substring);
0363     }
0364 
0365     return jsString("");
0366 }
0367 
0368 bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier &propertyName, PropertySlot &slot)
0369 {
0370     return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot);
0371 }
0372 
0373 JSValue *RegExpObjectImp::getValueProperty(ExecState *, int token) const
0374 {
0375     switch (token) {
0376     case Dollar1:
0377         return getBackref(1);
0378     case Dollar2:
0379         return getBackref(2);
0380     case Dollar3:
0381         return getBackref(3);
0382     case Dollar4:
0383         return getBackref(4);
0384     case Dollar5:
0385         return getBackref(5);
0386     case Dollar6:
0387         return getBackref(6);
0388     case Dollar7:
0389         return getBackref(7);
0390     case Dollar8:
0391         return getBackref(8);
0392     case Dollar9:
0393         return getBackref(9);
0394     case Input:
0395         return jsString(d->lastInput);
0396     case Multiline:
0397         return jsBoolean(d->multiline);
0398     case LastMatch:
0399         return getLastMatch();
0400     case LastParen:
0401         return getLastParen();
0402     case LeftContext:
0403         return getLeftContext();
0404     case RightContext:
0405         return getRightContext();
0406     default:
0407         ASSERT(0);
0408     }
0409 
0410     return jsString("");
0411 }
0412 
0413 void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, JSValue *value, int attr)
0414 {
0415     lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this);
0416 }
0417 
0418 void RegExpObjectImp::putValueProperty(ExecState *exec, int token, JSValue *value, int /*attr*/)
0419 {
0420     switch (token) {
0421     case Input:
0422         d->lastInput = JSValue::toString(value, exec);
0423         break;
0424     case Multiline:
0425         d->multiline = JSValue::toBoolean(value, exec);
0426         break;
0427     default:
0428         ASSERT(0);
0429     }
0430 }
0431 
0432 bool RegExpObjectImp::implementsConstruct() const
0433 {
0434     return true;
0435 }
0436 
0437 RegExp *RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, JSValue *flagsInput)
0438 {
0439     int reflags = RegExp::None;
0440 
0441     if (!JSValue::isUndefined(flagsInput)) {
0442         const UString flags = JSValue::toString(flagsInput, exec);
0443 
0444         // Check flags
0445         for (int pos = 0; pos < flags.size(); ++pos) {
0446             switch (flags[pos].unicode()) {
0447             case 'g':
0448                 if (reflags & RegExp::Global) {
0449                     throwError(exec, SyntaxError,
0450                                "Regular expression flag 'g' given twice", 1, -1, "<regexp>");
0451                     return nullptr;
0452                 }
0453                 reflags |= RegExp::Global;
0454                 break;
0455             case 'i':
0456                 if (reflags & RegExp::IgnoreCase) {
0457                     throwError(exec, SyntaxError,
0458                                "Regular expression flag 'i' given twice", 1, -1, "<regexp>");
0459                     return nullptr;
0460                 }
0461                 reflags |= RegExp::IgnoreCase;
0462                 break;
0463             case 'm':
0464                 if (reflags & RegExp::Multiline) {
0465                     throwError(exec, SyntaxError,
0466                                "Regular expression flag 'm' given twice", 1, -1, "<regexp>");
0467                     return nullptr;
0468                 }
0469                 reflags |= RegExp::Multiline;
0470                 break;
0471             default: {
0472                 throwError(exec, SyntaxError,
0473                            "Invalid regular expression flags", 1, -1, "<regexp>");
0474                 return nullptr;
0475             }
0476             }
0477         }
0478     }
0479 
0480     RegExp *re = new RegExp(p, reflags);
0481     if (!re->isValid()) {
0482         throwError(exec, SyntaxError,
0483                    "Invalid regular expression", 1, -1, "<regexp>");
0484         delete re;
0485         return nullptr;
0486     }
0487     return re;
0488 }
0489 
0490 // ECMA 15.10.4
0491 JSObject *RegExpObjectImp::construct(ExecState *exec, const List &args)
0492 {
0493     JSObject *o = JSValue::getObject(args[0]);
0494     if (o && o->inherits(&RegExpImp::info)) {
0495         if (!JSValue::isUndefined(args[1])) {
0496             return throwError(exec, TypeError);
0497         }
0498         return o;
0499     }
0500 
0501     UString p = JSValue::isUndefined(args[0]) ? UString("") : JSValue::toString(args[0], exec);
0502 
0503     RegExp *re = makeEngine(exec, p, args[1]);
0504     if (!re) {
0505         return JSValue::toObject(exec->exception(), exec);
0506     }
0507 
0508     RegExpPrototype *proto = static_cast<RegExpPrototype *>(exec->lexicalInterpreter()->builtinRegExpPrototype());
0509     RegExpImp *dat = new RegExpImp(proto);
0510 
0511     dat->setRegExp(exec, re);
0512 
0513     return dat;
0514 }
0515 
0516 // ECMA 15.10.3
0517 JSValue *RegExpObjectImp::callAsFunction(ExecState *exec, JSObject * /*thisObj*/, const List &args)
0518 {
0519     // The RegExp argument case is handled by construct()
0520 
0521     return construct(exec, args);
0522 }