Warning, file /office/calligra/libs/store/KoXmlWriter.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /* This file is part of the KDE project
0002    Copyright (C) 2004 David Faure <faure@kde.org>
0003    Copyright (C) 2007 Thomas Zander <zander@kde.org>
0004 
0005    This library is free software; you can redistribute it and/or
0006    modify it under the terms of the GNU Library General Public
0007    License as published by the Free Software Foundation; either
0008    version 2 of the License, or (at your option) any later version.
0009 
0010    This library is distributed in the hope that it will be useful,
0011    but WITHOUT ANY WARRANTY; without even the implied warranty of
0012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0013    Library General Public License for more details.
0014 
0015    You should have received a copy of the GNU Library General Public License
0016    along with this library; see the file COPYING.LIB.  If not, write to
0017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0018  * Boston, MA 02110-1301, USA.
0019 */
0020 
0021 #include "KoXmlWriter.h"
0022 
0023 #include <StoreDebug.h>
0024 #include <QByteArray>
0025 #include <QStack>
0026 #include <float.h>
0027 
0028 static const int s_indentBufferLength = 100;
0029 static const int s_escapeBufferLen = 10000;
0030 
0031 class Q_DECL_HIDDEN KoXmlWriter::Private
0032 {
0033 public:
0034     Private(QIODevice* dev_, int indentLevel = 0) : dev(dev_), baseIndentLevel(indentLevel) {}
0035     ~Private() {
0036         delete[] indentBuffer;
0037         delete[] escapeBuffer;
0038         //TODO: look at if we must delete "dev". For me we must delete it otherwise we will leak it
0039     }
0040 
0041     QIODevice* dev;
0042     QStack<Tag> tags;
0043     int baseIndentLevel;
0044 
0045     char* indentBuffer; // maybe make it static, but then it needs a K_GLOBAL_STATIC
0046     // and would eat 1K all the time... Maybe refcount it :)
0047     char* escapeBuffer; // can't really be static if we want to be thread-safe
0048 };
0049 
0050 KoXmlWriter::KoXmlWriter(QIODevice* dev, int indentLevel)
0051         : d(new Private(dev, indentLevel))
0052 {
0053     init();
0054 }
0055 
0056 void KoXmlWriter::init()
0057 {
0058     d->indentBuffer = new char[ s_indentBufferLength ];
0059     memset(d->indentBuffer, ' ', s_indentBufferLength);
0060     *d->indentBuffer = '\n'; // write newline before indentation, in one go
0061 
0062     d->escapeBuffer = new char[s_escapeBufferLen];
0063     if (!d->dev->isOpen())
0064         d->dev->open(QIODevice::WriteOnly);
0065 }
0066 
0067 KoXmlWriter::~KoXmlWriter()
0068 {
0069     delete d;
0070 }
0071 
0072 void KoXmlWriter::startDocument(const char* rootElemName, const char* publicId, const char* systemId)
0073 {
0074     Q_ASSERT(d->tags.isEmpty());
0075     writeCString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
0076     // There isn't much point in a doctype if there's no DTD to refer to
0077     // (I'm told that files that are validated by a RelaxNG schema cannot refer to the schema)
0078     if (publicId) {
0079         writeCString("<!DOCTYPE ");
0080         writeCString(rootElemName);
0081         writeCString(" PUBLIC \"");
0082         writeCString(publicId);
0083         writeCString("\" \"");
0084         writeCString(systemId);
0085         writeCString("\"");
0086         writeCString(">\n");
0087     }
0088 }
0089 
0090 void KoXmlWriter::endDocument()
0091 {
0092     // just to do exactly like QDom does (newline at end of file).
0093     writeChar('\n');
0094     Q_ASSERT(d->tags.isEmpty());
0095 }
0096 
0097 // returns the value of indentInside of the parent
0098 bool KoXmlWriter::prepareForChild()
0099 {
0100     if (!d->tags.isEmpty()) {
0101         Tag& parent = d->tags.top();
0102         if (!parent.hasChildren) {
0103             closeStartElement(parent);
0104             parent.hasChildren = true;
0105             parent.lastChildIsText = false;
0106         }
0107         if (parent.indentInside) {
0108             writeIndent();
0109         }
0110         return parent.indentInside;
0111     }
0112     return true;
0113 }
0114 
0115 void KoXmlWriter::prepareForTextNode()
0116 {
0117     if (d->tags.isEmpty())
0118         return;
0119     Tag& parent = d->tags.top();
0120     if (!parent.hasChildren) {
0121         closeStartElement(parent);
0122         parent.hasChildren = true;
0123         parent.lastChildIsText = true;
0124     }
0125 }
0126 
0127 void KoXmlWriter::startElement(const char* tagName, bool indentInside)
0128 {
0129     Q_ASSERT(tagName != 0);
0130 
0131     // Tell parent that it has children
0132     bool parentIndent = prepareForChild();
0133 
0134     d->tags.push(Tag(tagName, parentIndent && indentInside));
0135     writeChar('<');
0136     writeCString(tagName);
0137     //kDebug(s_area) << tagName;
0138 }
0139 
0140 void KoXmlWriter::addCompleteElement(const char* cstr)
0141 {
0142     prepareForChild();
0143     writeCString(cstr);
0144 }
0145 
0146 
0147 void KoXmlWriter::addCompleteElement(QIODevice* indev)
0148 {
0149     prepareForChild();
0150     const bool wasOpen = indev->isOpen();
0151     // Always (re)open the device in readonly mode, it might be
0152     // already open but for writing, and we need to rewind.
0153     const bool openOk = indev->open(QIODevice::ReadOnly);
0154     Q_ASSERT(openOk);
0155     if (!openOk) {
0156         warnStore << "Failed to re-open the device! wasOpen=" << wasOpen;
0157         return;
0158     }
0159 
0160     static const int MAX_CHUNK_SIZE = 8 * 1024; // 8 KB
0161     QByteArray buffer;
0162     buffer.resize(MAX_CHUNK_SIZE);
0163     while (!indev->atEnd()) {
0164         qint64 len = indev->read(buffer.data(), buffer.size());
0165         if (len <= 0)   // e.g. on error
0166             break;
0167         d->dev->write(buffer.data(), len);
0168     }
0169     if (!wasOpen) {
0170         // Restore initial state
0171         indev->close();
0172     }
0173 }
0174 
0175 void KoXmlWriter::endElement()
0176 {
0177     if (d->tags.isEmpty())
0178         warnStore << "EndElement() was called more times than startElement(). "
0179                      "The generated XML will be invalid! "
0180                      "Please report this bug (by saving the document to another format...)" << endl;
0181 
0182     Tag tag = d->tags.pop();
0183 
0184     if (!tag.hasChildren) {
0185         writeCString("/>");
0186     } else {
0187         if (tag.indentInside && !tag.lastChildIsText) {
0188             writeIndent();
0189         }
0190         writeCString("</");
0191         Q_ASSERT(tag.tagName != 0);
0192         writeCString(tag.tagName);
0193         writeChar('>');
0194     }
0195 }
0196 
0197 void KoXmlWriter::addTextNode(const QByteArray& cstr)
0198 {
0199     // Same as the const char* version below, but here we know the size
0200     prepareForTextNode();
0201     char* escaped = escapeForXML(cstr.constData(), cstr.size());
0202     writeCString(escaped);
0203     if (escaped != d->escapeBuffer)
0204         delete[] escaped;
0205 }
0206 
0207 void KoXmlWriter::addTextNode(const char* cstr)
0208 {
0209     prepareForTextNode();
0210     char* escaped = escapeForXML(cstr, -1);
0211     writeCString(escaped);
0212     if (escaped != d->escapeBuffer)
0213         delete[] escaped;
0214 }
0215 
0216 void KoXmlWriter::addProcessingInstruction(const char* cstr)
0217 {
0218     prepareForTextNode();
0219     writeCString("<?");
0220     addTextNode(cstr);
0221     writeCString("?>");
0222 }
0223 
0224 void KoXmlWriter::addAttribute(const char* attrName, const QByteArray& value)
0225 {
0226     // Same as the const char* one, but here we know the size
0227     writeChar(' ');
0228     writeCString(attrName);
0229     writeCString("=\"");
0230     char* escaped = escapeForXML(value.constData(), value.size());
0231     writeCString(escaped);
0232     if (escaped != d->escapeBuffer)
0233         delete[] escaped;
0234     writeChar('"');
0235 }
0236 
0237 void KoXmlWriter::addAttribute(const char* attrName, const char* value)
0238 {
0239     writeChar(' ');
0240     writeCString(attrName);
0241     writeCString("=\"");
0242     char* escaped = escapeForXML(value, -1);
0243     writeCString(escaped);
0244     if (escaped != d->escapeBuffer)
0245         delete[] escaped;
0246     writeChar('"');
0247 }
0248 
0249 void KoXmlWriter::addAttribute(const char* attrName, double value)
0250 {
0251     QByteArray str;
0252     str.setNum(value, 'f', 11);
0253     addAttribute(attrName, str.data());
0254 }
0255 
0256 void KoXmlWriter::addAttribute(const char* attrName, float value)
0257 {
0258     QByteArray str;
0259     str.setNum(value, 'f', FLT_DIG);
0260     addAttribute(attrName, str.data());
0261 }
0262 
0263 void KoXmlWriter::addAttributePt(const char* attrName, double value)
0264 {
0265     QByteArray str;
0266     str.setNum(value, 'f', 11);
0267     str += "pt";
0268     addAttribute(attrName, str.data());
0269 }
0270 
0271 void KoXmlWriter::addAttributePt(const char* attrName, float value)
0272 {
0273     QByteArray str;
0274     str.setNum(value, 'f', FLT_DIG);
0275     str += "pt";
0276     addAttribute(attrName, str.data());
0277 }
0278 
0279 void KoXmlWriter::writeIndent()
0280 {
0281     // +1 because of the leading '\n'
0282     d->dev->write(d->indentBuffer, qMin(indentLevel() + 1,
0283                                         s_indentBufferLength));
0284 }
0285 
0286 void KoXmlWriter::writeString(const QString& str)
0287 {
0288     // cachegrind says .utf8() is where most of the time is spent
0289     const QByteArray cstr = str.toUtf8();
0290     d->dev->write(cstr);
0291 }
0292 
0293 // In case of a reallocation (ret value != d->buffer), the caller owns the return value,
0294 // it must delete it (with [])
0295 char* KoXmlWriter::escapeForXML(const char* source, int length = -1) const
0296 {
0297     // we're going to be pessimistic on char length; so lets make the outputLength less
0298     // the amount one char can take: 6
0299     char* destBoundary = d->escapeBuffer + s_escapeBufferLen - 6;
0300     char* destination = d->escapeBuffer;
0301     char* output = d->escapeBuffer;
0302     const char* src = source; // src moves, source remains
0303     for (;;) {
0304         if (destination >= destBoundary) {
0305             // When we come to realize that our escaped string is going to
0306             // be bigger than the escape buffer (this shouldn't happen very often...),
0307             // we drop the idea of using it, and we allocate a bigger buffer.
0308             // Note that this if() can only be hit once per call to the method.
0309             if (length == -1)
0310                 length = qstrlen(source);   // expensive...
0311             uint newLength = length * 6 + 1; // worst case. 6 is due to &quot; and &apos;
0312             char* buffer = new char[ newLength ];
0313             destBoundary = buffer + newLength;
0314             uint amountOfCharsAlreadyCopied = destination - d->escapeBuffer;
0315             memcpy(buffer, d->escapeBuffer, amountOfCharsAlreadyCopied);
0316             output = buffer;
0317             destination = buffer + amountOfCharsAlreadyCopied;
0318         }
0319         switch (*src) {
0320         case 60: // <
0321             memcpy(destination, "&lt;", 4);
0322             destination += 4;
0323             break;
0324         case 62: // >
0325             memcpy(destination, "&gt;", 4);
0326             destination += 4;
0327             break;
0328         case 34: // "
0329             memcpy(destination, "&quot;", 6);
0330             destination += 6;
0331             break;
0332 #if 0 // needed?
0333         case 39: // '
0334             memcpy(destination, "&apos;", 6);
0335             destination += 6;
0336             break;
0337 #endif
0338         case 38: // &
0339             memcpy(destination, "&amp;", 5);
0340             destination += 5;
0341             break;
0342         case 0:
0343             *destination = '\0';
0344             return output;
0345         // Control codes accepted in XML 1.0 documents.
0346         case 9:
0347         case 10:
0348         case 13:
0349             *destination++ = *src++;
0350             continue;
0351         default:
0352             // Don't add control codes not accepted in XML 1.0 documents.
0353             if (*src > 0 && *src < 32) {
0354                 ++src;
0355             } else {
0356                 *destination++ = *src++;
0357             }
0358             continue;
0359         }
0360         ++src;
0361     }
0362     // NOTREACHED (see case 0)
0363     return output;
0364 }
0365 
0366 void KoXmlWriter::addManifestEntry(const QString& fullPath, const QString& mediaType)
0367 {
0368     startElement("manifest:file-entry");
0369     addAttribute("manifest:media-type", mediaType);
0370     addAttribute("manifest:full-path", fullPath);
0371     endElement();
0372 }
0373 
0374 void KoXmlWriter::addConfigItem(const QString & configName, const QString& value)
0375 {
0376     startElement("config:config-item");
0377     addAttribute("config:name", configName);
0378     addAttribute("config:type",  "string");
0379     addTextNode(value);
0380     endElement();
0381 }
0382 
0383 void KoXmlWriter::addConfigItem(const QString & configName, bool value)
0384 {
0385     startElement("config:config-item");
0386     addAttribute("config:name", configName);
0387     addAttribute("config:type",  "boolean");
0388     addTextNode(value ? "true" : "false");
0389     endElement();
0390 }
0391 
0392 void KoXmlWriter::addConfigItem(const QString & configName, int value)
0393 {
0394     startElement("config:config-item");
0395     addAttribute("config:name", configName);
0396     addAttribute("config:type",  "int");
0397     addTextNode(QString::number(value));
0398     endElement();
0399 }
0400 
0401 void KoXmlWriter::addConfigItem(const QString & configName, double value)
0402 {
0403     startElement("config:config-item");
0404     addAttribute("config:name", configName);
0405     addAttribute("config:type", "double");
0406     addTextNode(QString::number(value));
0407     endElement();
0408 }
0409 
0410 void KoXmlWriter::addConfigItem(const QString & configName, float value)
0411 {
0412     startElement("config:config-item");
0413     addAttribute("config:name", configName);
0414     addAttribute("config:type", "double");
0415     addTextNode(QString::number(value));
0416     endElement();
0417 }
0418 
0419 void KoXmlWriter::addConfigItem(const QString & configName, long value)
0420 {
0421     startElement("config:config-item");
0422     addAttribute("config:name", configName);
0423     addAttribute("config:type", "long");
0424     addTextNode(QString::number(value));
0425     endElement();
0426 }
0427 
0428 void KoXmlWriter::addConfigItem(const QString & configName, short value)
0429 {
0430     startElement("config:config-item");
0431     addAttribute("config:name", configName);
0432     addAttribute("config:type", "short");
0433     addTextNode(QString::number(value));
0434     endElement();
0435 }
0436 
0437 void KoXmlWriter::addTextSpan(const QString& text)
0438 {
0439     QMap<int, int> tabCache;
0440     addTextSpan(text, tabCache);
0441 }
0442 
0443 void KoXmlWriter::addTextSpan(const QString& text, const QMap<int, int>& tabCache)
0444 {
0445     int len = text.length();
0446     int nrSpaces = 0; // number of consecutive spaces
0447     bool leadingSpace = false;
0448     QString str;
0449     str.reserve(len);
0450 
0451     // Accumulate chars either in str or in nrSpaces (for spaces).
0452     // Flush str when writing a subelement (for spaces or for another reason)
0453     // Flush nrSpaces when encountering two or more consecutive spaces
0454     for (int i = 0; i < len ; ++i) {
0455         QChar ch = text[i];
0456         ushort unicode = ch.unicode();
0457         if (unicode == ' ') {
0458             if (i == 0)
0459                 leadingSpace = true;
0460             ++nrSpaces;
0461         } else {
0462             if (nrSpaces > 0) {
0463                 // For the first space we use ' '.
0464                 // "it is good practice to use (text:s) for the second and all following SPACE
0465                 // characters in a sequence." (per the ODF spec)
0466                 // however, per the HTML spec, "authors should not rely on user agents to render
0467                 // white space immediately after a start tag or immediately before an end tag"
0468                 // (and both we and OO.o ignore leading spaces in <text:p> or <text:h> elements...)
0469                 if (!leadingSpace) {
0470                     str += ' ';
0471                     --nrSpaces;
0472                 }
0473                 if (nrSpaces > 0) {   // there are more spaces
0474                     if (!str.isEmpty())
0475                         addTextNode(str);
0476                     str.clear();
0477                     startElement("text:s");
0478                     if (nrSpaces > 1)   // it's 1 by default
0479                         addAttribute("text:c", nrSpaces);
0480                     endElement();
0481                 }
0482             }
0483             nrSpaces = 0;
0484             leadingSpace = false;
0485 
0486             switch (unicode) {
0487             case '\t':
0488                 if (!str.isEmpty())
0489                     addTextNode(str);
0490                 str.clear();
0491                 startElement("text:tab");
0492                 if (tabCache.contains(i))
0493                     addAttribute("text:tab-ref", tabCache[i] + 1);
0494                 endElement();
0495                 break;
0496             // gracefully handle \f form feed in text input.
0497             // otherwise the xml will not be valid.
0498             // \f can be added e.g. in ascii import filter.
0499             case '\f':
0500             case '\n':
0501             case QChar::LineSeparator:
0502                 if (!str.isEmpty())
0503                     addTextNode(str);
0504                 str.clear();
0505                 startElement("text:line-break");
0506                 endElement();
0507                 break;
0508             default:
0509                 // don't add stuff that is not allowed in xml. The stuff we need we have already handled above
0510                 if (ch.unicode() >= 0x20) {
0511                     str += text[i];
0512                 }
0513                 break;
0514             }
0515         }
0516     }
0517     // either we still have text in str or we have spaces in nrSpaces
0518     if (!str.isEmpty()) {
0519         addTextNode(str);
0520     }
0521     if (nrSpaces > 0) {   // there are more spaces
0522         startElement("text:s");
0523         if (nrSpaces > 1)   // it's 1 by default
0524             addAttribute("text:c", nrSpaces);
0525         endElement();
0526     }
0527 }
0528 
0529 QIODevice *KoXmlWriter::device() const
0530 {
0531     return d->dev;
0532 }
0533 
0534 int KoXmlWriter::indentLevel() const
0535 {
0536     return d->tags.size() + d->baseIndentLevel;
0537 }
0538 
0539 QList<const char*> KoXmlWriter::tagHierarchy() const
0540 {
0541     QList<const char*> answer;
0542     foreach(const Tag & tag, d->tags)
0543         answer.append(tag.tagName);
0544 
0545     return answer;
0546 }
0547 
0548 QString KoXmlWriter::toString() const
0549 {
0550     Q_ASSERT(!d->dev->isSequential());
0551     if (d->dev->isSequential())
0552         return QString();
0553     bool wasOpen = d->dev->isOpen();
0554     qint64 oldPos = -1;
0555     if (wasOpen) {
0556         oldPos = d->dev->pos();
0557         if (oldPos > 0)
0558             d->dev->seek(0);
0559     } else {
0560         const bool openOk = d->dev->open(QIODevice::ReadOnly);
0561         Q_ASSERT(openOk);
0562         if (!openOk)
0563             return QString();
0564     }
0565     QString s = QString::fromUtf8(d->dev->readAll());
0566     if (wasOpen)
0567         d->dev->seek(oldPos);
0568     else
0569         d->dev->close();
0570     return s;
0571 }