calligra/sheets/Formula.cpp

0001 /* This file is part of the KDE project
0002    Copyright (C) 2008-2010 Marijn Kruisselbrink <mkruisselbrink@kde.org>
0003    Copyright (C) 2003,2004 Ariya Hidayat <ariya@kde.org>
0004    Copyright (C) 2005 Tomas Mecir <mecirt@gmail.com>
0005
0006    This library is free software; you can redistribute it and/or
0007    modify it under the terms of the GNU Library General Public
0008    License as published by the Free Software Foundation; only
0009    version 2 of the License.
0010
0011    This library is distributed in the hope that it will be useful,
0012    but WITHOUT ANY WARRANTY; without even the implied warranty of
0013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014    Library General Public License for more details.
0015
0016    You should have received a copy of the GNU Library General Public License
0017    along with this library; see the file COPYING.LIB.  If not, write to
0018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0019    Boston, MA 02110-1301, USA.
0020 */
0021
0022 #include "Formula.h"
0023
0024 #include "CalculationSettings.h"
0025 #include "Cell.h"
0026 #include "CellStorage.h"
0027 #include "Function.h"
0028 #include "FunctionRepository.h"
0029 #include "Sheet.h"
0030 #include "Map.h"
0031 #include "NamedAreaManager.h"
0032 #include "Region.h"
0033 #include "Value.h"
0034 #include "Util.h"
0035
0036 #include "ValueCalc.h"
0037 #include "ValueConverter.h"
0038 #include "ValueParser.h"
0039
0040 #include <limits.h>
0041
0042 #include <QStack>
0043 #include <QString>
0044 #include <QTextStream>
0045
0046 #include <klocale.h>
0047
0048 #define CALLIGRA_SHEETS_UNICODE_OPERATORS
0049
0050 /*
0051   To understand how this formula engine works, please refer to the documentation
0052   in file DESIGN.html.
0053
0054   Useful references:
0055    - "Principles of Compiler Design", A.V.Aho, J.D.Ullman, Addison Wesley, 1978
0056    - "Writing Interactive Compilers and Interpreters", P.J. Brown,
0057      John Wiley and Sons, 1979.
0058    - "The Theory and Practice of Compiler Writing", J.Tremblay, P.G.Sorenson,
0059      McGraw-Hill, 1985.
0060    - "The Java(TM) Virtual Machine Specification", T.Lindholm, F.Yellin,
0061      Addison-Wesley, 1997.
0062    - "Java Virtual Machine", J.Meyer, T.Downing, O'Reilly, 1997.
0063
0064  */
0065
0066
0067 /*
0068 TODO - features:
0069 - handle Intersection
0070 - cell reference is made relative (absolute now)
0071 - shared formula (different owner, same data)
0072 - relative internal representation (independent of owner)
0073 - OASIS support
0074 TODO - optimizations:
0075 - handle initial formula marker = (and +)
0076 - reuse constant already in the pool
0077 - reuse references already in the pool
0078 - expression optimization (e.g. 1+2+A1 becomes 3+A1)
0079 */
0080
0081 namespace Calligra
0082 {
0083 namespace Sheets
0084 {
0085
0086 class Opcode
0087 {
0088 public:
0089
0090     enum { Nop = 0, Load, Ref, Cell, Range, Function, Add, Sub, Neg, Mul, Div,
0091            Pow, Concat, Intersect, Not, Equal, Less, Greater, Array, Union
0092          };
0093
0094     unsigned type;
0095     unsigned index;
0096
0097     Opcode(): type(Nop), index(0) {}
0098     Opcode(unsigned t): type(t), index(0) {}
0099     Opcode(unsigned t, unsigned i): type(t), index(i) {}
0100 };
0101
0102 // used when evaluation formulas
0103 struct stackEntry {
0104     void reset() {
0105         row1 = col1 = row2 = col2 = -1;
0106         reg = Calligra::Sheets::Region();
0107         regIsNamedOrLabeled = false;
0108     }
0109     Value val;
0110     Calligra::Sheets::Region reg;
0111     bool regIsNamedOrLabeled;
0112     int row1, col1, row2, col2;
0113 };
0114
0115 class Q_DECL_HIDDEN Formula::Private : public QSharedData
0116 {
0117 public:
0118     Cell cell;
0119     Sheet *sheet;
0120     mutable bool dirty;
0121     mutable bool valid;
0122     QString expression;
0123     mutable QVector<Opcode> codes;
0124     mutable QVector<Value> constants;
0125
0126     Value valueOrElement(FuncExtra &fe, const stackEntry& entry) const;
0127 };
0128
0129 class TokenStack : public QVector<Token>
0130 {
0131 public:
0132     TokenStack();
0133     unsigned itemCount() const;
0134     void push(const Token& token);
0135     Token pop();
0136     const Token& top();
0137     const Token& top(unsigned index);
0138 };
0139
0140 } // namespace Sheets
0141 } // namespace Calligra
0142
0143 using namespace Calligra::Sheets;
0144
0145 // for null token
0146 const Token Token::null;
0147
0148 // helper function: return operator of given token text
0149 // e.g. '*' yields Operator::Asterisk, and so on
0150 Token::Op Calligra::Sheets::matchOperator(const QString& text)
0151 {
0152     Token::Op result = Token::InvalidOp;
0153
0154     if (text.length() == 1) {
0155         QChar p = text[0];
0156         switch (p.unicode()) {
0157         case '+': result = Token::Plus; break;
0158         case '-': result = Token::Minus; break;
0159         case '*': result = Token::Asterisk; break;
0160         case '/': result = Token::Slash; break;
0161         case '^': result = Token::Caret; break;
0162         case ',': result = Token::Comma; break;
0163         case ';': result = Token::Semicolon; break;
0164         case ' ': result = Token::Intersect; break;
0165         case '(': result = Token::LeftPar; break;
0166         case ')': result = Token::RightPar; break;
0167         case '&': result = Token::Ampersand; break;
0168         case '=': result = Token::Equal; break;
0169         case '<': result = Token::Less; break;
0170         case '>': result = Token::Greater; break;
0171         case '%': result = Token::Percent; break;
0172         case '~': result = Token::Union; break;
0173 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
0174         case '{': result = Token::CurlyBra; break;
0175         case '}': result = Token::CurlyKet; break;
0176         case '|': result = Token::Pipe; break;
0177 #endif
0178 #ifdef CALLIGRA_SHEETS_UNICODE_OPERATORS
0179         case 0x2212: result = Token::Minus; break;
0180         case 0x00D7: result = Token::Asterisk; break;
0181         case 0x00F7: result = Token::Slash; break;
0182         case 0x2215: result = Token::Slash; break;
0183 #endif
0184         default : result = Token::InvalidOp; break;
0185         }
0186     }
0187
0188     if (text.length() == 2) {
0189         if (text == "<>") result = Token::NotEqual;
0190         if (text == "!=") result = Token::NotEqual;
0191         if (text == "<=") result = Token::LessEqual;
0192         if (text == ">=") result = Token::GreaterEqual;
0193         if (text == "==") result = Token::Equal;
0194     }
0195
0196     return result;
0197 }
0198
0199 bool Calligra::Sheets::parseOperator(const QChar *&data, QChar *&out)
0200 {
0201     bool retval = true;
0202     switch(data->unicode()) {
0203     case '+':
0204     case '-':
0205     case '*':
0206     case '/':
0207     case '^':
0208     case ',':
0209     case ';':
0210     case ' ':
0211     case '(':
0212     case ')':
0213     case '&':
0214     case '%':
0215     case '~':
0216 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
0217     case '{':
0218     case '}':
0219     case '|':
0220 #endif
0221 #ifdef CALLIGRA_SHEETS_UNICODE_OPERATORS
0222     case 0x2212:
0223     case 0x00D7:
0224     case 0x00F7:
0225     case 0x2215:
0226 #endif
0227         *out = *data;
0228         ++out;
0229         ++data;
0230         break;
0231     case '<':
0232         *out = *data;
0233         ++out;
0234         ++data;
0235         if (!data->isNull()) {
0236             if (*data == QChar('>', 0) || *data == QChar('=', 0)) {
0237                 *out = *data;
0238                 ++out;
0239                 ++data;
0240             }
0241         }
0242         break;
0243     case '>':
0244         *out = *data;
0245         ++out;
0246         ++data;
0247         if (!data->isNull() && *data == QChar('=', 0)) {
0248             *out = *data;
0249             ++out;
0250             ++data;
0251         }
0252         break;
0253     case '=':
0254         *out++ = *data++;
0255         if (!data->isNull() && *data == QChar('=', 0)) {
0256             *out++ = *data++;
0257         }
0258         break;
0259     case '!': {
0260         const QChar * next = data + 1;
0261         if (!next->isNull() && *next == QChar('=', 0)) {
0262             *out = *data;
0263             ++out;
0264             ++data;
0265             *out = *data;
0266             ++out;
0267             ++data;
0268         }
0269         else {
0270             retval = false;
0271         }
0272     }   break;
0273     default:
0274         retval = false;
0275         break;
0276     }
0277     return retval;
0278 }
0279
0280 // helper function: give operator precedence
0281 // e.g. '+' is 1 while '*' is 3
0282 static int opPrecedence(Token::Op op)
0283 {
0284     int prec = -1;
0285     switch (op) {
0286     case Token::Percent      : prec = 8; break;
0287     case Token::Caret        : prec = 7; break;
0288     case Token::Asterisk     : prec = 5; break;
0289     case Token::Slash        : prec = 6; break;
0290     case Token::Plus         : prec = 3; break;
0291     case Token::Minus        : prec = 3; break;
0292     case Token::Union        : prec = 2; break;
0293     case Token::Ampersand    : prec = 2; break;
0294     case Token::Intersect    : prec = 2; break;
0295     case Token::Equal        : prec = 1; break;
0296     case Token::NotEqual     : prec = 1; break;
0297     case Token::Less         : prec = 1; break;
0298     case Token::Greater      : prec = 1; break;
0299     case Token::LessEqual    : prec = 1; break;
0300     case Token::GreaterEqual : prec = 1; break;
0301 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
0302         // FIXME Stefan: I don't know whether zero is right for this case. :-(
0303     case Token::CurlyBra     : prec = 0; break;
0304     case Token::CurlyKet     : prec = 0; break;
0305     case Token::Pipe         : prec = 0; break;
0306 #endif
0307     case Token::Semicolon    : prec = 0; break;
0308     case Token::RightPar     : prec = 0; break;
0309     case Token::LeftPar      : prec = -1; break;
0310     default: prec = -1; break;
0311     }
0312     return prec;
0313 }
0314
0315 // helper function
0316 static Value tokenAsValue(const Token& token)
0317 {
0318     Value value;
0319     if (token.isBoolean()) value = Value(token.asBoolean());
0320     else if (token.isInteger()) value = Value(token.asInteger());
0321     else if (token.isFloat()) value = Value(token.asFloat());
0322     else if (token.isString()) value = Value(token.asString());
0323     else if (token.isError()) {
0324         const QString error = token.asError();
0325         if (error == Value::errorCIRCLE().errorMessage())
0326             value = Value::errorCIRCLE();
0327         else if (error == Value::errorDEPEND().errorMessage())
0328             value = Value::errorDEPEND();
0329         else if (error == Value::errorDIV0().errorMessage())
0330             value = Value::errorDIV0();
0331         else if (error == Value::errorNA().errorMessage())
0332             value = Value::errorNA();
0333         else if (error == Value::errorNAME().errorMessage())
0334             value = Value::errorNAME();
0335         else if (error == Value::errorNUM().errorMessage())
0336             value = Value::errorNUM();
0337         else if (error == Value::errorNULL().errorMessage())
0338             value = Value::errorNULL();
0339         else if (error == Value::errorPARSE().errorMessage())
0340             value = Value::errorPARSE();
0341         else if (error == Value::errorREF().errorMessage())
0342             value = Value::errorREF();
0343         else if (error == Value::errorVALUE().errorMessage())
0344             value = Value::errorVALUE();
0345         else {
0346             value = Value(Value::Error);
0347             value.setError(error);
0348         }
0349     }
0350     return value;
0351 }
0352
0353 /**********************
0354     Token
0355  **********************/
0356
0357 // creates a token
0358 Token::Token(Type type, const QString& text, int pos)
0359 : m_type(type)
0360 , m_text(text)
0361 , m_pos(pos)
0362 {
0363     // the detach is needed as we manipulate the string we use as input afterwards
0364     // by writing to QChar * data point which does nto detach automatically.
0365     m_text.detach();
0366 }
0367
0368 // copy constructor
0369 Token::Token(const Token& token)
0370 : m_type(token.m_type)
0371 , m_text(token.m_text)
0372 , m_pos(token.m_pos)
0373 {
0374 }
0375
0376 // assignment operator
0377 Token& Token::operator=(const Token & token)
0378 {
0379     m_type = token.m_type;
0380     m_text = token.m_text;
0381     m_pos = token.m_pos;
0382     return *this;
0383 }
0384
0385 bool Token::asBoolean() const
0386 {
0387     if (!isBoolean()) return false;
0388     return m_text.toLower() == "true";
0389     // FIXME check also for i18n version
0390 }
0391
0392 qint64 Token::asInteger() const
0393 {
0394     if (isInteger()) return m_text.toLongLong();
0395     else return 0;
0396 }
0397
0398 double Token::asFloat() const
0399 {
0400     if (isFloat()) return m_text.toDouble();
0401     else return 0.0;
0402 }
0403
0404 QString Token::asString() const
0405 {
0406     if (!isString()) return QString();
0407     QString res = m_text.mid(1, m_text.length() - 2);
0408     res = res.replace("\"\"", "\"");   // double quotes to single quotes
0409     return res;
0410 }
0411
0412 QString Token::asError() const
0413 {
0414     if (isError())
0415         return m_text;
0416     else
0417         return QString();
0418 }
0419
0420 Token::Op Token::asOperator() const
0421 {
0422     if (isOperator()) return matchOperator(m_text);
0423     else return InvalidOp;
0424 }
0425
0426 QString Token::sheetName() const
0427 {
0428     if (!isCell() && !isRange()) return QString();
0429     int i = m_text.indexOf('!');
0430     if (i < 0) return QString();
0431     QString sheet = m_text.left(i);
0432     return sheet;
0433 }
0434
0435 QString Token::description() const
0436 {
0437     QString desc;
0438
0439     switch (m_type) {
0440     case  Boolean:    desc = "Boolean"; break;
0441     case  Integer:    desc = "Integer"; break;
0442     case  Float:      desc = "Float"; break;
0443     case  String:     desc = "String"; break;
0444     case  Identifier: desc = "Identifier"; break;
0445     case  Cell:       desc = "Cell"; break;
0446     case  Range:      desc = "Range"; break;
0447     case  Operator:   desc = "Operator"; break;
0448     case  Error:      desc = "Error"; break;
0449     default:          desc = "Unknown"; break;
0450     }
0451
0452     while (desc.length() < 10) desc.prepend(' ');
0453     desc.prepend("  ");
0454     desc.prepend(QString::number(m_pos));
0455     desc.append(" : ").append(m_text);
0456
0457     return desc;
0458 }
0459
0460
0461 /**********************
0462     TokenStack
0463  **********************/
0464
0465 TokenStack::TokenStack(): QVector<Token>()
0466 {
0467 }
0468
0469 unsigned TokenStack::itemCount() const
0470 {
0471     return size();
0472 }
0473
0474 void TokenStack::push(const Token& token)
0475 {
0476     append(token);
0477 }
0478
0479 Token TokenStack::pop()
0480 {
0481     if (!isEmpty()) {
0482         Token token = last();
0483         pop_back();
0484         return token;
0485     }
0486     return Token();
0487 }
0488
0489 const Token& TokenStack::top()
0490 {
0491     return top(0);
0492 }
0493
0494 const Token& TokenStack::top(unsigned index)
0495 {
0496     unsigned top = size();
0497     if (top > index)
0498         return at(top - index - 1);
0499     return Token::null;
0500 }
0501
0502
0503 /**********************
0504     FormulaPrivate
0505  **********************/
0506
0507 // helper function: return true for valid identifier character
0508 bool Calligra::Sheets::isIdentifier(QChar ch)
0509 {
0510     switch(ch.unicode()) {
0511     case '_':
0512     case '$':
0513     case '.':
0514         return true;
0515     default:
0516         return ch.isLetter();
0517     }
0518 }
0519
0520
0521
0522
0523 /**********************
0524     Formula
0525  **********************/
0526
0527 // Constructor
0528
0529 Formula::Formula(Sheet *sheet, const Cell& cell)
0530         : d(new Private)
0531 {
0532     d->cell = cell;
0533     d->sheet = sheet;
0534     clear();
0535 }
0536
0537 Formula::Formula(Sheet *sheet)
0538         : d(new Private)
0539 {
0540     d->cell = Cell();
0541     d->sheet = sheet;
0542     clear();
0543 }
0544
0545 Formula::Formula()
0546         : d(new Private)
0547 {
0548     d->cell = Cell();
0549     d->sheet = 0;
0550     clear();
0551 }
0552
0553 Formula Formula::empty()
0554 {
0555     static Formula f;
0556     return f;
0557 }
0558
0559 Formula::Formula(const Formula& other)
0560         : d(other.d)
0561 {
0562 }
0563
0564 // Destructor
0565
0566 Formula::~Formula()
0567 {
0568 }
0569
0570 const Cell& Formula::cell() const
0571 {
0572     return d->cell;
0573 }
0574
0575 Sheet* Formula::sheet() const
0576 {
0577     return d->sheet;
0578 }
0579
0580 // Sets a new expression for this formula.
0581 // note that both the real lex and parse processes will happen later on
0582 // when needed (i.e. "lazy parse"), for example during formula evaluation.
0583
0584 void Formula::setExpression(const QString& expr)
0585 {
0586     d->expression = expr;
0587     d->dirty = true;
0588     d->valid = false;
0589 }
0590
0591 // Returns the expression associated with this formula.
0592
0593 QString Formula::expression() const
0594 {
0595     return d->expression;
0596 }
0597
0598 // Returns the validity of the formula.
0599 // note: empty formula is always invalid.
0600
0601 bool Formula::isValid() const
0602 {
0603     if (d->dirty) {
0604         KLocale* locale = !d->cell.isNull() ? d->cell.locale() : 0;
0605         if ((!locale) && d->sheet)
0606             locale = d->sheet->map()->calculationSettings()->locale();
0607         Tokens tokens = scan(d->expression, locale);
0608
0609         if (tokens.valid())
0610             compile(tokens);
0611         else
0612             d->valid = false;
0613     }
0614     return d->valid;
0615 }
0616
0617 // Clears everything, also mark the formula as invalid.
0618
0619 void Formula::clear()
0620 {
0621     d->expression.clear();
0622     d->dirty = true;
0623     d->valid = false;
0624     d->constants.clear();
0625     d->codes.clear();
0626 }
0627
0628 // Returns list of token for the expression.
0629 // this triggers again the lexical analysis step. it is however preferable
0630 // (even when there's small performance penalty) because otherwise we need to
0631 // store parsed tokens all the time which serves no good purpose.
0632
0633 Tokens Formula::tokens() const
0634 {
0635     KLocale* locale = !d->cell.isNull() ? d->cell.locale() : 0;
0636     if ((!locale) && d->sheet)
0637         locale = d->sheet->map()->calculationSettings()->locale();
0638     return scan(d->expression, locale);
0639 }
0640
0641 Tokens Formula::scan(const QString &expr, const KLocale* locale) const
0642 {
0643     // parsing state
0644     enum { Start, Finish, InNumber, InDecimal, InExpIndicator, InExponent,
0645            InString, InIdentifier, InCell, InRange, InSheetOrAreaName, InError
0646          } state;
0647
0648     // use locale settings if specified
0649     QString thousand = locale ? locale->thousandsSeparator() : "";
0650     QString decimal = locale ? locale->decimalSymbol() : ".";
0651
0652     const QChar *data = expr.constData();
0653
0654     Tokens tokens;
0655     if (data->isNull() || *data != QChar('=', 0)) {
0656         return tokens;
0657     }
0658     tokens.reserve(50);
0659
0660     ++data;
0661     const QChar * const start = data;
0662     const QChar * const end = start + expr.length();
0663     const QChar *tokenStart = data;
0664     const QChar *cellStart = data;
0665
0666     state = Start;
0667     bool parseError = false;
0668
0669     int length = expr.length() * 1.1; // TODO check if that is needed at all
0670     QString token(length, QChar());
0671     token.reserve(length); // needed to not realloc at the resize at the end
0672     QChar * out = token.data();
0673     QChar * const outStart = token.data();
0674
0675     while (state != Finish && data < end) {
0676         switch (state) {
0677         case Start:
0678             tokenStart = data;
0679             // Whitespaces can be used as intersect-operator for two arrays.
0680             if (data->isSpace()) {
0681                 ++data;
0682             }
0683             // check for number
0684             else if (data->isDigit()) {
0685                 state = InNumber;
0686                 *out++ = *data++;
0687             }
0688             // terminator character
0689             else if (data->isNull()) {
0690                 state = Finish;
0691             }
0692             else {
0693                 switch (data->unicode()) {
0694                 case '"': // a string ?
0695                     *out++ = *data++;
0696                     state = InString;
0697                     break;
0698                 case '\'': // aposthrophe (') marks sheet name for 3-d cell, e.g 'Sales Q3'!A4, or a named range
0699                     ++data;
0700                     state = InSheetOrAreaName;
0701                     break;
0702                 case '#': // error value?
0703                     *out++ = *data++;
0704                     state = InError;
0705                     break;
0706                 default:
0707                     // decimal dot ?
0708                     if (*data == decimal[0]) {
0709                         *out++ = *data++;
0710                         state = InDecimal;
0711                     }
0712                     // beginning with alphanumeric ?
0713                     // could be identifier, cell, range, or function...
0714                     else if (isIdentifier(*data)) {
0715                         *out++ = *data++;
0716                         state = InIdentifier;
0717                     }
0718                     else {
0719                         // look for operator match
0720                         if (parseOperator(data, out)) {
0721                             token.resize(out - outStart);
0722                             tokens.append(Token(Token::Operator, token, tokenStart - start));
0723                             token.resize(length);
0724                             out = outStart;
0725                         }
0726                         else {
0727                             // not matched an operator, add an Unknown token and remember we had a parse error
0728                             parseError = true;
0729                             *out++ = *data++;
0730                             token.resize(out - outStart);
0731                             tokens.append(Token(Token::Unknown, token, tokenStart - start));
0732                             token.resize(length);
0733                             out = outStart;
0734                         }
0735                     }
0736                     break;
0737                 }
0738             }
0739             break;
0740         case InIdentifier:
0741             // consume as long as alpha, dollar sign, underscore, or digit
0742             if (isIdentifier(*data) || data->isDigit()) {
0743                 *out = *data;
0744                 ++out;
0745                 ++data;
0746             }
0747             // a '!' ? then this must be sheet name, e.g "Sheet4!", unless the next character is '='
0748             else if (*data == QChar('!', 0) && !(data + 1)->isNull() && *(data + 1) != QChar('=', 0)) {
0749                 *out++ = *data++;
0750                 cellStart = out;
0751                 state = InCell;
0752             }
0753             // a '(' ? then this must be a function identifier
0754             else if (*data == QChar('(', 0)) {
0755                 token.resize(out - outStart);
0756                 tokens.append(Token(Token::Identifier, token, tokenStart - start));
0757                 token.resize(length);
0758                 out = outStart;
0759                 state = Start;
0760             }
0761             // we're done with identifier
0762             else {
0763                 *out = QChar();
0764                 // check for cell reference,  e.g A1, VV123, ...
0765                 if (Util::isCellReference(token)) {
0766                     // so up to now we've got something like A2 or Sheet2!F4
0767                     // check for range reference
0768                     if (*data == QChar(':', 0)) {
0769                         *out++ = *data++;
0770                         state = InRange;
0771                     }
0772                     // we're done with cell reference
0773                     else {
0774                         token.resize(out - outStart);
0775                         tokens.append(Token(Token::Cell, token, tokenStart - start));
0776                         token.resize(length);
0777                         out = outStart;
0778                         state = Start;
0779                     }
0780                 }
0781                 else {
0782                     token.resize(out - outStart);
0783                     if (isNamedArea(token)) {
0784                         tokens.append(Token(Token::Range, token, tokenStart - start));
0785                     }
0786                     else {
0787                         tokens.append(Token(Token::Identifier, token, tokenStart - start));
0788                     }
0789                     token.resize(length);
0790                     out = outStart;
0791                     state = Start;
0792                 }
0793             }
0794             break;
0795         case InCell:
0796             // consume as long as alpha, dollar sign, underscore, or digit
0797             if (isIdentifier(*data) || data->isDigit()) {
0798                 *out++ = *data++;
0799             }
0800             else {
0801                 *out = QChar();
0802                 // check if it's a cell ref like A32, not named area
0803                 if (!Util::isCellReference(token, cellStart - outStart)) {
0804                     // test failed, means we have something like "Sheet2!TotalSales"
0805                     // and not "Sheet2!A2"
0806                     // thus, assume so far that it's a named area
0807                     token.resize(out - outStart);
0808                     tokens.append(Token(Token::Range, token, tokenStart - start));
0809                     token.resize(length);
0810                     out = outStart;
0811                     state = Start;
0812                 }
0813                 else {
0814                     // so up to now we've got something like A2 or Sheet2!F4
0815                     // check for range reference
0816                     if (*data == QChar(':', 0)) {
0817                         *out++ = *data++;
0818                         state = InRange;
0819                     }
0820                     else {
0821                         // we're done with cell reference
0822                         token.resize(out - outStart);
0823                         tokens.append(Token(Token::Cell, token, tokenStart - start));
0824                         token.resize(length);
0825                         out = outStart;
0826                         state = Start;
0827                     }
0828                 }
0829             }
0830             break;
0831         case InRange:
0832             // consume as long as alpha, dollar sign, underscore, or digit or !
0833             if (isIdentifier(*data) || data->isDigit() || *data == QChar('!', 0)) {
0834                 *out++ = *data++;
0835             }
0836             // we're done with range reference
0837             else {
0838                 token.resize(out - outStart);
0839                 tokens.append(Token(Token::Range, token, tokenStart - start));
0840                 token.resize(length);
0841                 out = outStart;
0842                 state = Start;
0843             }
0844             break;
0845         case InSheetOrAreaName:
0846             // consume until '
0847             if (data->isNull()) {
0848                 parseError = true;
0849                 token.resize(out - outStart);
0850                 tokens.append(Token(Token::Unknown, '\'' + token + '\'', tokenStart - start));
0851                 state = Start;
0852             }
0853             else if (*data != QChar('\'', 0)) {
0854                 *out++ = *data++;
0855             }
0856             else {
0857                 // eat the aposthrophe itself
0858                 ++data;
0859                 // must be followed by '!' to be sheet name
0860                 if (!data->isNull() && *data == QChar('!', 0)) {
0861                     *out++ = *data++;
0862                     cellStart = out;
0863                     state = InCell;
0864                 }
0865                 else {
0866                     token.resize(out - outStart);
0867                     if (isNamedArea(token)) {
0868                         tokens.append(Token(Token::Range, token, tokenStart - start));
0869                     }
0870                     else {
0871                         // for compatibility with oocalc (and the openformula spec), don't parse single-quoted
0872                         // text as an identifier, instead add an Unknown token and remember we had an error
0873                         parseError = true;
0874                         tokens.append(Token(Token::Unknown, '\'' + token + '\'', tokenStart - start));
0875                     }
0876                     token.resize(length);
0877                     out = outStart;
0878                     state = Start;
0879                 }
0880             }
0881             break;
0882         case InNumber:
0883             // consume as long as it's digit
0884             if (data->isDigit()) {
0885                 *out++ = *data++;
0886             }
0887             // skip thousand separator
0888             else if (!thousand.isEmpty() && (*data == thousand[0])) {
0889                 ++data;
0890             }
0891             // convert decimal separator to '.', also support '.' directly
0892             // we always support '.' because of bug #98455
0893             else if ((!decimal.isEmpty() && (*data == decimal[0])) || *data == QChar('.', 0)) {
0894                 *out++ = QChar('.', 0);
0895                 ++data;
0896                 state = InDecimal;
0897             }
0898             // exponent ?
0899             else if (*data == QChar('E', 0) || *data == QChar('e', 0)) {
0900                 *out++ = QChar('E', 0);
0901                 ++data;
0902                 state = InExpIndicator;
0903             }
0904             // reference sheet delimiter?
0905             else if (*data == QChar('!', 0)) {
0906                 *out++ = *data++;
0907                 cellStart = out;
0908                 state = InCell;
0909             }
0910             // identifier?
0911             else if (isIdentifier(*data)) {
0912                 // has to be a sheet or area name then
0913                 *out++ = *data++;
0914                 state = InIdentifier;
0915             }
0916             // we're done with integer number
0917             else {
0918                 token.resize(out - outStart);
0919                 tokens.append(Token(Token::Integer, token, tokenStart - start));
0920                 token.resize(length);
0921                 out = outStart;
0922                 state = Start;
0923             }
0924             break;
0925         case InDecimal:
0926             // consume as long as it's digit
0927             if (data->isDigit()) {
0928                 *out++ = *data++;
0929             }
0930             // exponent ?
0931             else if (*data == QChar('E', 0) || *data == QChar('e', 0)) {
0932                 *out++ = QChar('E', 0);
0933                 ++data;
0934                 state = InExpIndicator;
0935             }
0936             // we're done with floating-point number
0937             else {
0938                 token.resize(out - outStart);
0939                 tokens.append(Token(Token::Float, token, tokenStart - start));
0940                 token.resize(length);
0941                 out = outStart;
0942                 state = Start;
0943             }
0944             break;
0945         case InExpIndicator:
0946             // possible + or - right after E, e.g 1.23E+12 or 4.67E-8
0947             if (*data == QChar('+', 0) || *data == QChar('-', 0)) {
0948                 *out++ = *data++;
0949             }
0950             // consume as long as it's digit
0951             else if (data->isDigit()) {
0952                 *out++ = *data++;
0953                 state = InExponent;
0954             }
0955             // invalid thing here
0956             else {
0957                 parseError = true;
0958                 token.resize(out - outStart);
0959                 tokens.append(Token(Token::Unknown, token, tokenStart - start));
0960                 token.resize(length);
0961                 out = outStart;
0962                 state = Start;
0963             }
0964             break;
0965         case InExponent:
0966             // consume as long as it's digit
0967             if (data->isDigit()) {
0968                 *out++ = *data++;
0969             }
0970             // we're done with floating-point number
0971             else {
0972                 token.resize(out - outStart);
0973                 tokens.append(Token(Token::Float, token, tokenStart - start));
0974                 token.resize(length);
0975                 out = outStart;
0976                 state = Start;
0977             }
0978             break;
0979         case InString:
0980             // consume until "
0981             if (*data != QChar('"', 0)) {
0982                 *out++ = *data++;
0983             }
0984             else {
0985                 *out++ = *data++;
0986                 // check for escaped ""
0987                 if ((!data->isNull()) && (*data == QChar('"', 0))) {
0988                     *out++ = *data++;
0989                 } else {
0990                     token.resize(out - outStart);
0991                     tokens.append(Token(Token::String, token, tokenStart - start));
0992                     token.resize(length);
0993                     out = outStart;
0994                     state = Start;
0995                 }
0996             }
0997             break;
0998         case InError: {
0999             ushort c = data->unicode();
1000             switch (c) {
1001             case '!':
1002             case '?':
1003                 // TODO check if there is at least one char that needs to be there
1004                 *out++ = *data++;
1005                 token.resize(out - outStart);
1006                 tokens.append(Token(Token::Error, token, tokenStart - start));
1007                 token.resize(length);
1008                 out = outStart;
1009                 state = Start;
1010                 break;
1011             case '/':
1012                 *out++ = *data++;
1013                 if (!data->isNull()) {
1014                     bool error = false;
1015                     if (*data >= 'A' && *data <= 'Z') {
1016                         *out++ = *data++;
1017                     }
1018                     else if (*data >= '0' && *data <= '9'){
1019                         *out++ = *data++;
1020                         if (!data->isNull() && (*data == QChar('!', 0) || *data == QChar('?', 0))) {
1021                             *out++ = *data++;
1022                         }
1023                     }
1024                     else {
1025                         error = true;
1026                     }
1027                     if (error) {
1028                         parseError = true;
1029                         token.resize(out - outStart);
1030                         tokens.append(Token(Token::Unknown, token, tokenStart - start));
1031                         token.resize(length);
1032                         out = outStart;
1033                         state = Start;
1034                     }
1035                     else {
1036                         token.resize(out - outStart);
1037                         tokens.append(Token(Token::Error, token, tokenStart - start));
1038                         token.resize(length);
1039                         out = outStart;
1040                         state = Start;
1041                     }
1042                 }
1043                 break;
1044             default:
1045                 if ((c >= 'A' && c <= 'Z') || (c >= '0' && c<= '9')) {
1046                     *out++ = *data++;
1047                 }
1048                 else {
1049                     parseError = true;
1050                     token.resize(out - outStart);
1051                     tokens.append(Token(Token::Unknown, token, tokenStart - start));
1052                     token.resize(length);
1053                     out = outStart;
1054                     state = Start;
1055                 }
1056                 break;
1057             }
1058         }   break;
1059         default:
1060             break;
1061         }
1062     }
1063
1064     // parse error if any text remains
1065     if (data+1 < end)  {
1066         tokens.append(Token(Token::Unknown, expr.mid(tokenStart - start), tokenStart - start));
1067         parseError = true;
1068     }
1069
1070     if (parseError)
1071         tokens.setValid(false);
1072     return tokens;
1073 }
1074
1075 // will affect: dirty, valid, codes, constants
1076 void Formula::compile(const Tokens& tokens) const
1077 {
1078     // initialize variables
1079     d->dirty = false;
1080     d->valid = false;
1081     d->codes.clear();
1082     d->constants.clear();
1083
1084     // sanity check
1085     if (tokens.count() == 0) return;
1086
1087     TokenStack syntaxStack;
1088     QStack<int> argStack;
1089     unsigned argCount = 1;
1090
1091     for (int i = 0; i <= tokens.count(); i++) {
1092         // helper token: InvalidOp is end-of-formula
1093         Token token = (i < tokens.count()) ? tokens[i] : Token(Token::Operator);
1094         Token::Type tokenType = token.type();
1095
1096         // unknown token is invalid
1097         if (tokenType == Token::Unknown) break;
1098
1099         // are we entering a function ?
1100         // if stack already has: id (
1101         if (syntaxStack.itemCount() >= 2) {
1102             Token par = syntaxStack.top();
1103             Token id = syntaxStack.top(1);
1104             if (par.asOperator() == Token::LeftPar)
1105                 if (id.isIdentifier()) {
1106                     argStack.push(argCount);
1107                     argCount = 1;
1108                 }
1109         }
1110
1111 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
1112         // are we entering an inline array ?
1113         // if stack already has: {
1114         if (syntaxStack.itemCount() >= 1) {
1115             Token bra = syntaxStack.top();
1116             if (bra.asOperator() == Token::CurlyBra) {
1117                 argStack.push(argCount);
1118                 argStack.push(1);   // row count
1119                 argCount = 1;
1120             }
1121         }
1122 #endif
1123
1124         // for constants, push immediately to stack
1125         // generate code to load from a constant
1126         if ((tokenType == Token::Integer) || (tokenType == Token::Float) ||
1127                 (tokenType == Token::String) || (tokenType == Token::Boolean) ||
1128                 (tokenType == Token::Error)) {
1129             syntaxStack.push(token);
1130             d->constants.append(tokenAsValue(token));
1131             d->codes.append(Opcode(Opcode::Load, d->constants.count() - 1));
1132         }
1133
1134         // for cell, range, or identifier, push immediately to stack
1135         // generate code to load from reference
1136         if ((tokenType == Token::Cell) || (tokenType == Token::Range) ||
1137                 (tokenType == Token::Identifier)) {
1138             syntaxStack.push(token);
1139             d->constants.append(Value(token.text()));
1140             if (tokenType == Token::Cell)
1141                 d->codes.append(Opcode(Opcode::Cell, d->constants.count() - 1));
1142             else if (tokenType == Token::Range)
1143                 d->codes.append(Opcode(Opcode::Range, d->constants.count() - 1));
1144             else
1145                 d->codes.append(Opcode(Opcode::Ref, d->constants.count() - 1));
1146         }
1147
1148         // special case for percentage
1149         if (tokenType == Token::Operator)
1150             if (token.asOperator() == Token::Percent)
1151                 if (syntaxStack.itemCount() >= 1)
1152                     if (!syntaxStack.top().isOperator()) {
1153                         d->constants.append(Value(0.01));
1154                         d->codes.append(Opcode(Opcode::Load, d->constants.count() - 1));
1155                         d->codes.append(Opcode(Opcode::Mul));
1156                     }
1157
1158         // for any other operator, try to apply all parsing rules
1159         if (tokenType == Token::Operator)
1160             if (token.asOperator() != Token::Percent) {
1161                 // repeat until no more rule applies
1162                 for (; ;) {
1163                     bool ruleFound = false;
1164
1165                     // rule for function arguments, if token is ; or )
1166                     // id ( arg1 ; arg2 -> id ( arg
1167                     if (!ruleFound)
1168                         if (syntaxStack.itemCount() >= 5)
1169                             if ((token.asOperator() == Token::RightPar) ||
1170                                     (token.asOperator() == Token::Semicolon)) {
1171                                 Token arg2 = syntaxStack.top();
1172                                 Token sep = syntaxStack.top(1);
1173                                 Token arg1 = syntaxStack.top(2);
1174                                 Token par = syntaxStack.top(3);
1175                                 Token id = syntaxStack.top(4);
1176                                 if (!arg2.isOperator())
1177                                     if (sep.asOperator() == Token::Semicolon)
1178                                         if (!arg1.isOperator())
1179                                             if (par.asOperator() == Token::LeftPar)
1180                                                 if (id.isIdentifier()) {
1181                                                     ruleFound = true;
1182                                                     syntaxStack.pop();
1183                                                     syntaxStack.pop();
1184                                                     argCount++;
1185                                                 }
1186                             }
1187
1188                     // rule for empty function arguments, if token is ; or )
1189                     // id ( arg ; -> id ( arg
1190                     if (!ruleFound)
1191                         if (syntaxStack.itemCount() >= 3)
1192                             if ((token.asOperator() == Token::RightPar) ||
1193                                     (token.asOperator() == Token::Semicolon)) {
1194                                 Token sep = syntaxStack.top();
1195                                 Token arg = syntaxStack.top(1);
1196                                 Token par = syntaxStack.top(2);
1197                                 Token id = syntaxStack.top(3);
1198                                 if (sep.asOperator() == Token::Semicolon)
1199                                     if (!arg.isOperator())
1200                                         if (par.asOperator() == Token::LeftPar)
1201                                             if (id.isIdentifier()) {
1202                                                 ruleFound = true;
1203                                                 syntaxStack.pop();
1204                                                 d->constants.append(Value::null());
1205                                                 d->codes.append(Opcode(Opcode::Load, d->constants.count() - 1));
1206                                                 argCount++;
1207                                             }
1208                             }
1209
1210                     // rule for function last argument:
1211                     //  id ( arg ) -> arg
1212                     if (!ruleFound)
1213                         if (syntaxStack.itemCount() >= 4) {
1214                             Token par2 = syntaxStack.top();
1215                             Token arg = syntaxStack.top(1);
1216                             Token par1 = syntaxStack.top(2);
1217                             Token id = syntaxStack.top(3);
1218                             if (par2.asOperator() == Token::RightPar)
1219                                 if (!arg.isOperator())
1220                                     if (par1.asOperator() == Token::LeftPar)
1221                                         if (id.isIdentifier()) {
1222                                             ruleFound = true;
1223                                             syntaxStack.pop();
1224                                             syntaxStack.pop();
1225                                             syntaxStack.pop();
1226                                             syntaxStack.pop();
1227                                             syntaxStack.push(arg);
1228                                             d->codes.append(Opcode(Opcode::Function, argCount));
1229                                             Q_ASSERT(!argStack.empty());
1230                                             argCount = argStack.empty() ? 0 : argStack.pop();
1231                                         }
1232                         }
1233
1234                     // rule for function call with parentheses, but without argument
1235                     // e.g. "2*PI()"
1236                     if (!ruleFound)
1237                         if (syntaxStack.itemCount() >= 3) {
1238                             Token par2 = syntaxStack.top();
1239                             Token par1 = syntaxStack.top(1);
1240                             Token id = syntaxStack.top(2);
1241                             if (par2.asOperator() == Token::RightPar)
1242                                 if (par1.asOperator() == Token::LeftPar)
1243                                     if (id.isIdentifier()) {
1244                                         ruleFound = true;
1245                                         syntaxStack.pop();
1246                                         syntaxStack.pop();
1247                                         syntaxStack.pop();
1248                                         syntaxStack.push(Token(Token::Integer));
1249                                         d->codes.append(Opcode(Opcode::Function, 0));
1250                                         Q_ASSERT(!argStack.empty());
1251                                         argCount = argStack.empty() ? 0 : argStack.pop();
1252                                     }
1253                         }
1254
1255 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
1256                     // rule for inline array elements, if token is ; or | or }
1257                     // { arg1 ; arg2 -> { arg
1258                     if (!ruleFound)
1259                         if (syntaxStack.itemCount() >= 4)
1260                             if ((token.asOperator() == Token::Semicolon) ||
1261                                     (token.asOperator() == Token::CurlyKet) ||
1262                                     (token.asOperator() == Token::Pipe)) {
1263                                 Token arg2 = syntaxStack.top();
1264                                 Token sep = syntaxStack.top(1);
1265                                 Token arg1 = syntaxStack.top(2);
1266                                 Token bra = syntaxStack.top(3);
1267                                 if (!arg2.isOperator())
1268                                     if (sep.asOperator() == Token::Semicolon)
1269                                         if (!arg1.isOperator())
1270                                             if (bra.asOperator() == Token::CurlyBra) {
1271                                                 ruleFound = true;
1272                                                 syntaxStack.pop();
1273                                                 syntaxStack.pop();
1274                                                 argCount++;
1275                                             }
1276                             }
1277
1278                     // rule for last array row element, if token is ; or | or }
1279                     //  { arg1 | arg2 -> { arg
1280                     if (!ruleFound)
1281                         if (syntaxStack.itemCount() >= 4)
1282                             if ((token.asOperator() == Token::Semicolon) ||
1283                                     (token.asOperator() == Token::CurlyKet) ||
1284                                     (token.asOperator() == Token::Pipe)) {
1285                                 Token arg2 = syntaxStack.top();
1286                                 Token sep = syntaxStack.top(1);
1287                                 Token arg1 = syntaxStack.top(2);
1288                                 Token bra = syntaxStack.top(3);
1289                                 if (!arg2.isOperator())
1290                                     if (sep.asOperator() == Token::Pipe)
1291                                         if (!arg1.isOperator())
1292                                             if (bra.asOperator() == Token::CurlyBra) {
1293                                                 ruleFound = true;
1294                                                 syntaxStack.pop();
1295                                                 syntaxStack.pop();
1296                                                 int rowCount = argStack.pop();
1297                                                 argStack.push(++rowCount);
1298                                                 argCount = 1;
1299                                             }
1300                             }
1301
1302                     // rule for last array element:
1303                     //  { arg } -> arg
1304                     if (!ruleFound)
1305                         if (syntaxStack.itemCount() >= 3) {
1306                             Token ket = syntaxStack.top();
1307                             Token arg = syntaxStack.top(1);
1308                             Token bra = syntaxStack.top(2);
1309                             if (ket.asOperator() == Token::CurlyKet)
1310                                 if (!arg.isOperator())
1311                                     if (bra.asOperator() == Token::CurlyBra) {
1312                                         ruleFound = true;
1313                                         syntaxStack.pop();
1314                                         syntaxStack.pop();
1315                                         syntaxStack.pop();
1316                                         syntaxStack.push(arg);
1317                                         const int rowCount = argStack.pop();
1318                                         d->constants.append(Value((int)argCount));     // cols
1319                                         d->constants.append(Value(rowCount));
1320                                         d->codes.append(Opcode(Opcode::Array, d->constants.count() - 2));
1321                                         Q_ASSERT(!argStack.empty());
1322                                         argCount = argStack.empty() ? 0 : argStack.pop();
1323                                     }
1324                         }
1325 #endif
1326                     // rule for parenthesis:  ( Y ) -> Y
1327                     if (!ruleFound)
1328                         if (syntaxStack.itemCount() >= 3) {
1329                             Token right = syntaxStack.top();
1330                             Token y = syntaxStack.top(1);
1331                             Token left = syntaxStack.top(2);
1332                             if (right.isOperator())
1333                                 if (!y.isOperator())
1334                                     if (left.isOperator())
1335                                         if (right.asOperator() == Token::RightPar)
1336                                             if (left.asOperator() == Token::LeftPar) {
1337                                                 ruleFound = true;
1338                                                 syntaxStack.pop();
1339                                                 syntaxStack.pop();
1340                                                 syntaxStack.pop();
1341                                                 syntaxStack.push(y);
1342                                             }
1343                         }
1344
1345                     // rule for binary operator:  A (op) B -> A
1346                     // conditions: precedence of op >= precedence of token
1347                     // action: push (op) to result
1348                     // e.g. "A * B" becomes 'A' if token is operator '+'
1349                     if (!ruleFound)
1350                         if (syntaxStack.itemCount() >= 3) {
1351                             Token b = syntaxStack.top();
1352                             Token op = syntaxStack.top(1);
1353                             Token a = syntaxStack.top(2);
1354                             if (!a.isOperator())
1355                                 if (!b.isOperator())
1356                                     if (op.isOperator())
1357                                         if (token.asOperator() != Token::LeftPar)
1358                                             if (opPrecedence(op.asOperator()) >= opPrecedence(token.asOperator())) {
1359                                                 ruleFound = true;
1360                                                 syntaxStack.pop();
1361                                                 syntaxStack.pop();
1362                                                 syntaxStack.pop();
1363                                                 syntaxStack.push(b);
1364                                                 switch (op.asOperator()) {
1365                                                     // simple binary operations
1366                                                 case Token::Plus:         d->codes.append(Opcode::Add); break;
1367                                                 case Token::Minus:        d->codes.append(Opcode::Sub); break;
1368                                                 case Token::Asterisk:     d->codes.append(Opcode::Mul); break;
1369                                                 case Token::Slash:        d->codes.append(Opcode::Div); break;
1370                                                 case Token::Caret:        d->codes.append(Opcode::Pow); break;
1371                                                 case Token::Ampersand:    d->codes.append(Opcode::Concat); break;
1372                                                 case Token::Intersect:    d->codes.append(Opcode::Intersect); break;
1373                                                 case Token::Union:        d->codes.append(Opcode::Union); break;
1374
1375                                                     // simple value comparisons
1376                                                 case Token::Equal:        d->codes.append(Opcode::Equal); break;
1377                                                 case Token::Less:         d->codes.append(Opcode::Less); break;
1378                                                 case Token::Greater:      d->codes.append(Opcode::Greater); break;
1379
1380                                                     // NotEqual is Equal, followed by Not
1381                                                 case Token::NotEqual:
1382                                                     d->codes.append(Opcode::Equal);
1383                                                     d->codes.append(Opcode::Not);
1384                                                     break;
1385
1386                                                     // LessOrEqual is Greater, followed by Not
1387                                                 case Token::LessEqual:
1388                                                     d->codes.append(Opcode::Greater);
1389                                                     d->codes.append(Opcode::Not);
1390                                                     break;
1391
1392                                                     // GreaterOrEqual is Less, followed by Not
1393                                                 case Token::GreaterEqual:
1394                                                     d->codes.append(Opcode::Less);
1395                                                     d->codes.append(Opcode::Not);
1396                                                     break;
1397                                                 default: break;
1398                                                 };
1399                                             }
1400                         }
1401
1402                     // rule for unary operator:  (op1) (op2) X -> (op1) X
1403                     // conditions: op2 is unary, token is not '('
1404                     // action: push (op2) to result
1405                     // e.g.  "* - 2" becomes '*'
1406                     if (!ruleFound)
1407                         if (token.asOperator() != Token::LeftPar)
1408                             if (syntaxStack.itemCount() >= 3) {
1409                                 Token x = syntaxStack.top();
1410                                 Token op2 = syntaxStack.top(1);
1411                                 Token op1 = syntaxStack.top(2);
1412                                 if (!x.isOperator())
1413                                     if (op1.isOperator())
1414                                         if (op2.isOperator())
1415                                             if ((op2.asOperator() == Token::Plus) ||
1416                                                     (op2.asOperator() == Token::Minus)) {
1417                                                 ruleFound = true;
1418                                                 syntaxStack.pop();
1419                                                 syntaxStack.pop();
1420                                                 syntaxStack.push(x);
1421                                                 if (op2.asOperator() == Token::Minus)
1422                                                     d->codes.append(Opcode(Opcode::Neg));
1423                                             }
1424                             }
1425
1426                     // auxiliary rule for unary operator:  (op) X -> X
1427                     // conditions: op is unary, op is first in syntax stack, token is not '('
1428                     // action: push (op) to result
1429                     if (!ruleFound)
1430                         if (token.asOperator() != Token::LeftPar)
1431                             if (syntaxStack.itemCount() == 2) {
1432                                 Token x = syntaxStack.top();
1433                                 Token op = syntaxStack.top(1);
1434                                 if (!x.isOperator())
1435                                     if (op.isOperator())
1436                                         if ((op.asOperator() == Token::Plus) ||
1437                                                 (op.asOperator() == Token::Minus)) {
1438                                             ruleFound = true;
1439                                             syntaxStack.pop();
1440                                             syntaxStack.pop();
1441                                             syntaxStack.push(x);
1442                                             if (op.asOperator() == Token::Minus)
1443                                                 d->codes.append(Opcode(Opcode::Neg));
1444                                         }
1445                             }
1446
1447                     if (!ruleFound) break;
1448                 }
1449
1450                 // can't apply rules anymore, push the token
1451                 if (token.asOperator() != Token::Percent)
1452                     syntaxStack.push(token);
1453             }
1454     }
1455
1456     // syntaxStack must left only one operand and end-of-formula (i.e. InvalidOp)
1457     d->valid = false;
1458     if (syntaxStack.itemCount() == 2)
1459         if (syntaxStack.top().isOperator())
1460             if (syntaxStack.top().asOperator() == Token::InvalidOp)
1461                 if (!syntaxStack.top(1).isOperator())
1462                     d->valid = true;
1463
1464     // bad parsing ? clean-up everything
1465     if (!d->valid) {
1466         d->constants.clear();
1467         d->codes.clear();
1468     }
1469 }
1470
1471 bool Formula::isNamedArea(const QString& expr) const
1472 {
1473     return d->sheet ? d->sheet->map()->namedAreaManager()->contains(expr) : false;
1474 }
1475
1476
1477 // Evaluates the formula, returns the result.
1478
1479 // evaluate the cellIndirections
1480 Value Formula::eval(CellIndirection cellIndirections) const
1481 {
1482     QHash<Cell, Value> values;
1483     return evalRecursive(cellIndirections, values);
1484 }
1485
1486 // We need to unroll arrays. Do use the same logic to unroll like OpenOffice.org and Excel are using.
1487 Value Formula::Private::valueOrElement(FuncExtra &fe, const stackEntry& entry) const
1488 {
1489     const Value& v = entry.val;
1490     const Region& region = entry.reg;
1491     if(v.isArray()) {
1492         if(v.count() == 1) // if there is only one item, use that one
1493             return v.element(0);
1494
1495         if(region.isValid() && entry.regIsNamedOrLabeled) {
1496             const QPoint position = region.firstRange().topLeft();
1497             const int idx = fe.myrow - position.y(); // do we need to do the same for columns?
1498             if(idx >= 0 && idx < int(v.count()))
1499                 return v.element(idx); // within the range returns the selected element
1500         }
1501     }
1502     return v;
1503 }
1504
1505 // On OO.org Calc and MS Excel operations done with +, -, * and / do fail if one of the values is
1506 // non-numeric. This differs from formulas like SUM which just ignores non numeric values.
1507 Value numericOrError(const ValueConverter* converter, const Value &v)
1508 {
1509     switch (v.type()) {
1510     case Value::Empty:
1511     case Value::Boolean:
1512     case Value::Integer:
1513     case Value::Float:
1514     case Value::Complex:
1515     case Value::Error:
1516         return v;
1517     case Value::String: {
1518         if (v.asString().isEmpty())
1519             return v;
1520         bool ok;
1521         converter->asNumeric(v, &ok);
1522         if (ok)
1523             return v;
1524     } break;
1525     case Value::Array:
1526     case Value::CellRange:
1527         return v;
1528     }
1529     return Value::errorVALUE();
1530 }
1531
1532 Value Formula::evalRecursive(CellIndirection cellIndirections, QHash<Cell, Value>& values) const
1533 {
1534     QStack<stackEntry> stack;
1535     stackEntry entry;
1536     int index;
1537     Value val1, val2;
1538     QString c;
1539     QVector<Value> args;
1540
1541     const Map* map = d->sheet ? d->sheet->map() : new Map(0 /*document*/);
1542     const ValueConverter* converter = map->converter();
1543     ValueCalc* calc = map->calc();
1544
1545     QSharedPointer<Function> function;
1546     FuncExtra fe;
1547     fe.mycol = fe.myrow = 0;
1548     if (!d->cell.isNull()) {
1549         fe.mycol = d->cell.column();
1550         fe.myrow = d->cell.row();
1551     }
1552
1553     if (d->dirty) {
1554         Tokens tokens = scan(d->expression);
1555         d->valid = tokens.valid();
1556         if (tokens.valid())
1557             compile(tokens);
1558     }
1559
1560     if (!d->valid)
1561         return Value::errorPARSE();
1562
1563     for (int pc = 0; pc < d->codes.count(); pc++) {
1564         Value ret;   // for the function caller
1565         Opcode& opcode = d->codes[pc];
1566         index = opcode.index;
1567         switch (opcode.type) {
1568             // no operation
1569         case Opcode::Nop:
1570             break;
1571
1572             // load a constant, push to stack
1573         case Opcode::Load:
1574             entry.reset();
1575             entry.val = d->constants[index];
1576             stack.push(entry);
1577             break;
1578
1579             // unary operation
1580         case Opcode::Neg:
1581             entry.reset();
1582             entry.val = d->valueOrElement(fe, stack.pop());
1583             if (!entry.val.isError()) // do nothing if we got an error
1584                 entry.val = calc->mul(entry.val, -1);
1585             stack.push(entry);
1586             break;
1587
1588             // binary operation: take two values from stack, do the operation,
1589             // push the result to stack
1590         case Opcode::Add:
1591             entry.reset();
1592             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1593             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1594             val2 = calc->add(val1, val2);
1595             entry.reset();
1596             entry.val = val2;
1597             stack.push(entry);
1598             break;
1599
1600         case Opcode::Sub:
1601             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1602             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1603             val2 = calc->sub(val1, val2);
1604             entry.reset();
1605             entry.val = val2;
1606             stack.push(entry);
1607             break;
1608
1609         case Opcode::Mul:
1610             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1611             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1612             val2 = calc->mul(val1, val2);
1613             entry.reset();
1614             entry.val = val2;
1615             stack.push(entry);
1616             break;
1617
1618         case Opcode::Div:
1619             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1620             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1621             val2 = calc->div(val1, val2);
1622             entry.reset();
1623             entry.val = val2;
1624             stack.push(entry);
1625             break;
1626
1627         case Opcode::Pow:
1628             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1629             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1630             val2 = calc->pow(val1, val2);
1631             entry.reset();
1632             entry.val = val2;
1633             stack.push(entry);
1634             break;
1635
1636             // string concatenation
1637         case Opcode::Concat:
1638             val1 = converter->asString(stack.pop().val);
1639             val2 = converter->asString(stack.pop().val);
1640             if (val1.isError() || val2.isError())
1641                 val1 = Value::errorVALUE();
1642             else
1643                 val1 = Value(val2.asString().append(val1.asString()));
1644             entry.reset();
1645             entry.val = val1;
1646             stack.push(entry);
1647             break;
1648
1649            // array intersection
1650         case Opcode::Intersect: {
1651             val1 = stack.pop().val;
1652             val2 = stack.pop().val;
1653             Region r1(d->constants[index].asString(), map, d->sheet);
1654             Region r2(d->constants[index+1].asString(), map, d->sheet);
1655             if(!r1.isValid() || !r2.isValid()) {
1656                 val1 = Value::errorNULL();
1657             } else {
1658                 Region r = r1.intersected(r2);
1659                 QRect rect = r.boundingRect();
1660                 Cell cell;
1661                 if(rect.top() == rect.bottom())
1662                     cell = Cell(r.firstSheet(), fe.mycol, rect.top());
1663                 else if(rect.left() == rect.right())
1664                     cell = Cell(r.firstSheet(), rect.left(), fe.mycol);
1665                 if(cell.isNull())
1666                     val1 = Value::errorNULL();
1667                 else if(cell.isEmpty())
1668                     val1 = Value::errorNULL();
1669                 else
1670                     val1 = cell.value();
1671             }
1672             entry.reset();
1673             entry.val = val1;
1674             stack.push(entry);
1675         } break;
1676
1677           // region union
1678         case Opcode::Union: {
1679             Region r = stack.pop().reg;
1680             Region r2 = stack.pop().reg;
1681             entry.reset();
1682             if (!r.isValid() || !r2.isValid()) {
1683                 val1 = Value::errorVALUE();
1684                 r = Region();
1685             } else {
1686                 r.add(r2);
1687                 r.firstSheet()->cellStorage()->valueRegion(r);
1688                 // store the reference, so we can use it within functions (not entirely correct)
1689                 entry.col1 = r.boundingRect().left();
1690                 entry.row1 = r.boundingRect().top();
1691                 entry.col2 = r.boundingRect().right();
1692                 entry.row2 = r.boundingRect().bottom();
1693             }
1694             entry.val = val1;
1695             entry.reg = r;
1696             stack.push(entry);
1697         } break;
1698
1699             // logical not
1700         case Opcode::Not:
1701             val1 = converter->asBoolean(d->valueOrElement(fe, stack.pop()));
1702             if (val1.isError())
1703                 val1 = Value::errorVALUE();
1704             else
1705                 val1 = Value(!val1.asBoolean());
1706             entry.reset();
1707             entry.val = val1;
1708             stack.push(entry);
1709             break;
1710
1711             // comparison
1712         case Opcode::Equal:
1713             val1 = d->valueOrElement(fe, stack.pop());
1714             val2 = d->valueOrElement(fe, stack.pop());
1715             if (val1.isError())
1716                 ;
1717             else if (val2.isError())
1718                 val1 = val2;
1719             else if (val2.compare(val1, calc->settings()->caseSensitiveComparisons()) == 0)
1720                 val1 = Value(true);
1721             else
1722                 val1 = Value(false);
1723             entry.reset();
1724             entry.val = val1;
1725             stack.push(entry);
1726             break;
1727
1728             // less than
1729         case Opcode::Less:
1730             val1 = d->valueOrElement(fe, stack.pop());
1731             val2 = d->valueOrElement(fe, stack.pop());
1732             if (val1.isError())
1733                 ;
1734             else if (val2.isError())
1735                 val1 = val2;
1736             else if (val2.compare(val1, calc->settings()->caseSensitiveComparisons()) < 0)
1737                 val1 = Value(true);
1738             else
1739                 val1 = Value(false);
1740             entry.reset();
1741             entry.val = val1;
1742             stack.push(entry);
1743             break;
1744
1745             // greater than
1746         case Opcode::Greater: {
1747             val1 = d->valueOrElement(fe, stack.pop());
1748             val2 = d->valueOrElement(fe, stack.pop());
1749             if (val1.isError())
1750                 ;
1751             else if (val2.isError())
1752                 val1 = val2;
1753             else if (val2.compare(val1, calc->settings()->caseSensitiveComparisons()) > 0)
1754                 val1 = Value(true);
1755             else
1756                 val1 = Value(false);
1757             entry.reset();
1758             entry.val = val1;
1759             stack.push(entry);
1760         }
1761         break;
1762
1763         // cell in a sheet
1764         case Opcode::Cell: {
1765             c = d->constants[index].asString();
1766             val1 = Value::empty();
1767             entry.reset();
1768
1769             const Region region(c, map, d->sheet);
1770             if (!region.isValid()) {
1771                 val1 = Value::errorREF();
1772             } else if (region.isSingular()) {
1773                 const QPoint position = region.firstRange().topLeft();
1774                 if (cellIndirections.isEmpty())
1775                     val1 = Cell(region.firstSheet(), position).value();
1776                 else {
1777                     Cell cell(region.firstSheet(), position);
1778                     cell = cellIndirections.value(cell, cell);
1779                     if (values.contains(cell))
1780                         val1 = values.value(cell);
1781                     else {
1782                         values[cell] = Value::errorCIRCLE();
1783                         if (cell.isFormula())
1784                             val1 = cell.formula().evalRecursive(cellIndirections, values);
1785                         else
1786                             val1 = cell.value();
1787                         values[cell] = val1;
1788                     }
1789                 }
1790                 // store the reference, so we can use it within functions
1791                 entry.col1 = entry.col2 = position.x();
1792                 entry.row1 = entry.row2 = position.y();
1793                 entry.reg = region;
1794                 entry.regIsNamedOrLabeled = map->namedAreaManager()->contains(c);
1795             } else {
1796                 warnSheets << "Unhandled non singular region in Opcode::Cell with rects=" << region.rects();
1797             }
1798             entry.val = val1;
1799             stack.push(entry);
1800         }
1801         break;
1802
1803         // selected range in a sheet
1804         case Opcode::Range: {
1805             c = d->constants[index].asString();
1806             val1 = Value::empty();
1807             entry.reset();
1808
1809             const Region region(c, map, d->sheet);
1810             if (region.isValid()) {
1811                 val1 = region.firstSheet()->cellStorage()->valueRegion(region);
1812                 // store the reference, so we can use it within functions
1813                 entry.col1 = region.firstRange().left();
1814                 entry.row1 = region.firstRange().top();
1815                 entry.col2 = region.firstRange().right();
1816                 entry.row2 = region.firstRange().bottom();
1817                 entry.reg = region;
1818                 entry.regIsNamedOrLabeled = map->namedAreaManager()->contains(c);
1819             }
1820
1821             entry.val = val1; // any array is valid here
1822             stack.push(entry);
1823         }
1824         break;
1825
1826         // reference
1827         case Opcode::Ref:
1828             val1 = d->constants[index];
1829             entry.reset();
1830             entry.val = val1;
1831             stack.push(entry);
1832             break;
1833
1834             // calling function
1835         case Opcode::Function:
1836             // sanity check, this should not happen unless opcode is wrong
1837             // (i.e. there's a bug in the compile() function)
1838             if (stack.count() < index)
1839                 return Value::errorVALUE(); // not enough arguments
1840
1841             args.clear();
1842             fe.ranges.clear();
1843             fe.ranges.resize(index);
1844             fe.regions.clear();
1845             fe.regions.resize(index);
1846             fe.sheet = d->sheet;
1847             for (; index; index--) {
1848                 stackEntry e = stack.pop();
1849                 args.insert(args.begin(), e.val);
1850                 // fill the FunctionExtra object
1851                 fe.ranges[index - 1].col1 = e.col1;
1852                 fe.ranges[index - 1].row1 = e.row1;
1853                 fe.ranges[index - 1].col2 = e.col2;
1854                 fe.ranges[index - 1].row2 = e.row2;
1855                 fe.regions[index - 1] = e.reg;
1856             }
1857
1858             // function name as string value
1859             val1 = converter->asString(stack.pop().val);
1860             if (val1.isError())
1861                 return val1;
1862             function = FunctionRepository::self()->function(val1.asString());
1863             if (!function)
1864                 return Value::errorNAME(); // no such function
1865
1866             ret = function->exec(args, calc, &fe);
1867             entry.reset();
1868             entry.val = ret;
1869             stack.push(entry);
1870
1871             break;
1872
1873 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
1874             // creating an array
1875         case Opcode::Array: {
1876             const int cols = d->constants[index].asInteger();
1877             const int rows = d->constants[index+1].asInteger();
1878             // check if enough array elements are available
1879             if (stack.count() < cols * rows)
1880                 return Value::errorVALUE();
1881             Value array(Value::Array);
1882             for (int row = rows - 1; row >= 0; --row) {
1883                 for (int col = cols - 1; col >= 0; --col) {
1884                     array.setElement(col, row, stack.pop().val);
1885                 }
1886             }
1887             entry.reset();
1888             entry.val = array;
1889             stack.push(entry);
1890             break;
1891         }
1892 #endif
1893         default:
1894             break;
1895         }
1896     }
1897
1898     if (!d->sheet)
1899         delete map;
1900
1901     // more than one value in stack ? unsuccessful execution...
1902     if (stack.count() != 1)
1903         return Value::errorVALUE();
1904
1905     return stack.pop().val;
1906 }
1907
1908 Formula& Formula::operator=(const Formula & other)
1909 {
1910     d = other.d;
1911     return *this;
1912 }
1913
1914 bool Formula::operator==(const Formula& other) const
1915 {
1916     return (d->expression == other.d->expression);
1917 }
1918
1919 // Debugging aid
1920
1921 QString Formula::dump() const
1922 {
1923     QString result;
1924
1925     if (d->dirty) {
1926         Tokens tokens = scan(d->expression);
1927         compile(tokens);
1928     }
1929
1930     result = QString("Expression: [%1]\n").arg(d->expression);
1931 #if 0
1932     Value value = eval();
1933     result.append(QString("Result: %1\n").arg(
1934                       converter->asString(value).asString()));
1935 #endif
1936
1937     result.append("  Constants:\n");
1938     for (int c = 0; c < d->constants.count(); c++) {
1939         QString vtext;
1940         Value val = d->constants[c];
1941         if (val.isString()) vtext = QString("[%1]").arg(val.asString());
1942         else if (val.isNumber()) vtext = QString("%1").arg((double) numToDouble(val.asFloat()));
1943         else if (val.isBoolean()) vtext = QString("%1").arg(val.asBoolean() ? "True" : "False");
1944         else if (val.isError()) vtext = "error";
1945         else vtext = "???";
1946         result += QString("    #%1 = %2\n").arg(c).arg(vtext);
1947     }
1948
1949     result.append("\n");
1950     result.append("  Code:\n");
1951     for (int i = 0; i < d->codes.count(); i++) {
1952         QString ctext;
1953         switch (d->codes[i].type) {
1954         case Opcode::Load:      ctext = QString("Load #%1").arg(d->codes[i].index); break;
1955         case Opcode::Ref:       ctext = QString("Ref #%1").arg(d->codes[i].index); break;
1956         case Opcode::Function:  ctext = QString("Function (%1)").arg(d->codes[i].index); break;
1957         case Opcode::Add:       ctext = "Add"; break;
1958         case Opcode::Sub:       ctext = "Sub"; break;
1959         case Opcode::Mul:       ctext = "Mul"; break;
1960         case Opcode::Div:       ctext = "Div"; break;
1961         case Opcode::Neg:       ctext = "Neg"; break;
1962         case Opcode::Concat:    ctext = "Concat"; break;
1963         case Opcode::Pow:       ctext = "Pow"; break;
1964         case Opcode::Intersect: ctext = "Intersect"; break;
1965         case Opcode::Equal:     ctext = "Equal"; break;
1966         case Opcode::Not:       ctext = "Not"; break;
1967         case Opcode::Less:      ctext = "Less"; break;
1968         case Opcode::Greater:   ctext = "Greater"; break;
1969         case Opcode::Array:     ctext = QString("Array (%1x%2)").arg(d->constants[d->codes[i].index].asInteger()).arg(d->constants[d->codes[i].index+1].asInteger()); break;
1970         case Opcode::Nop:       ctext = "Nop"; break;
1971         case Opcode::Cell:      ctext = "Cell"; break;
1972         case Opcode::Range:     ctext = "Range"; break;
1973         default: ctext = "Unknown"; break;
1974         }
1975         result.append("   ").append(ctext).append("\n");
1976     }
1977
1978     return result;
1979 }
1980
1981 QTextStream& operator<<(QTextStream& ts, Formula formula)
1982 {
1983     ts << formula.dump();
1984     return ts;
1985 }