Warning, file /office/calligra/libs/store/KoXmlWriter.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* This file is part of the KDE project 0002 Copyright (C) 2004 David Faure <faure@kde.org> 0003 Copyright (C) 2007 Thomas Zander <zander@kde.org> 0004 0005 This library is free software; you can redistribute it and/or 0006 modify it under the terms of the GNU Library General Public 0007 License as published by the Free Software Foundation; either 0008 version 2 of the License, or (at your option) any later version. 0009 0010 This library is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 Library General Public License for more details. 0014 0015 You should have received a copy of the GNU Library General Public License 0016 along with this library; see the file COPYING.LIB. If not, write to 0017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0018 * Boston, MA 02110-1301, USA. 0019 */ 0020 0021 #include "KoXmlWriter.h" 0022 0023 #include <StoreDebug.h> 0024 #include <QByteArray> 0025 #include <QStack> 0026 #include <float.h> 0027 0028 static const int s_indentBufferLength = 100; 0029 static const int s_escapeBufferLen = 10000; 0030 0031 class Q_DECL_HIDDEN KoXmlWriter::Private 0032 { 0033 public: 0034 Private(QIODevice* dev_, int indentLevel = 0) : dev(dev_), baseIndentLevel(indentLevel) {} 0035 ~Private() { 0036 delete[] indentBuffer; 0037 delete[] escapeBuffer; 0038 //TODO: look at if we must delete "dev". For me we must delete it otherwise we will leak it 0039 } 0040 0041 QIODevice* dev; 0042 QStack<Tag> tags; 0043 int baseIndentLevel; 0044 0045 char* indentBuffer; // maybe make it static, but then it needs a K_GLOBAL_STATIC 0046 // and would eat 1K all the time... Maybe refcount it :) 0047 char* escapeBuffer; // can't really be static if we want to be thread-safe 0048 }; 0049 0050 KoXmlWriter::KoXmlWriter(QIODevice* dev, int indentLevel) 0051 : d(new Private(dev, indentLevel)) 0052 { 0053 init(); 0054 } 0055 0056 void KoXmlWriter::init() 0057 { 0058 d->indentBuffer = new char[ s_indentBufferLength ]; 0059 memset(d->indentBuffer, ' ', s_indentBufferLength); 0060 *d->indentBuffer = '\n'; // write newline before indentation, in one go 0061 0062 d->escapeBuffer = new char[s_escapeBufferLen]; 0063 if (!d->dev->isOpen()) 0064 d->dev->open(QIODevice::WriteOnly); 0065 } 0066 0067 KoXmlWriter::~KoXmlWriter() 0068 { 0069 delete d; 0070 } 0071 0072 void KoXmlWriter::startDocument(const char* rootElemName, const char* publicId, const char* systemId) 0073 { 0074 Q_ASSERT(d->tags.isEmpty()); 0075 writeCString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); 0076 // There isn't much point in a doctype if there's no DTD to refer to 0077 // (I'm told that files that are validated by a RelaxNG schema cannot refer to the schema) 0078 if (publicId) { 0079 writeCString("<!DOCTYPE "); 0080 writeCString(rootElemName); 0081 writeCString(" PUBLIC \""); 0082 writeCString(publicId); 0083 writeCString("\" \""); 0084 writeCString(systemId); 0085 writeCString("\""); 0086 writeCString(">\n"); 0087 } 0088 } 0089 0090 void KoXmlWriter::endDocument() 0091 { 0092 // just to do exactly like QDom does (newline at end of file). 0093 writeChar('\n'); 0094 Q_ASSERT(d->tags.isEmpty()); 0095 } 0096 0097 // returns the value of indentInside of the parent 0098 bool KoXmlWriter::prepareForChild() 0099 { 0100 if (!d->tags.isEmpty()) { 0101 Tag& parent = d->tags.top(); 0102 if (!parent.hasChildren) { 0103 closeStartElement(parent); 0104 parent.hasChildren = true; 0105 parent.lastChildIsText = false; 0106 } 0107 if (parent.indentInside) { 0108 writeIndent(); 0109 } 0110 return parent.indentInside; 0111 } 0112 return true; 0113 } 0114 0115 void KoXmlWriter::prepareForTextNode() 0116 { 0117 if (d->tags.isEmpty()) 0118 return; 0119 Tag& parent = d->tags.top(); 0120 if (!parent.hasChildren) { 0121 closeStartElement(parent); 0122 parent.hasChildren = true; 0123 parent.lastChildIsText = true; 0124 } 0125 } 0126 0127 void KoXmlWriter::startElement(const char* tagName, bool indentInside) 0128 { 0129 Q_ASSERT(tagName != 0); 0130 0131 // Tell parent that it has children 0132 bool parentIndent = prepareForChild(); 0133 0134 d->tags.push(Tag(tagName, parentIndent && indentInside)); 0135 writeChar('<'); 0136 writeCString(tagName); 0137 //kDebug(s_area) << tagName; 0138 } 0139 0140 void KoXmlWriter::addCompleteElement(const char* cstr) 0141 { 0142 prepareForChild(); 0143 writeCString(cstr); 0144 } 0145 0146 0147 void KoXmlWriter::addCompleteElement(QIODevice* indev) 0148 { 0149 prepareForChild(); 0150 const bool wasOpen = indev->isOpen(); 0151 // Always (re)open the device in readonly mode, it might be 0152 // already open but for writing, and we need to rewind. 0153 const bool openOk = indev->open(QIODevice::ReadOnly); 0154 Q_ASSERT(openOk); 0155 if (!openOk) { 0156 warnStore << "Failed to re-open the device! wasOpen=" << wasOpen; 0157 return; 0158 } 0159 0160 static const int MAX_CHUNK_SIZE = 8 * 1024; // 8 KB 0161 QByteArray buffer; 0162 buffer.resize(MAX_CHUNK_SIZE); 0163 while (!indev->atEnd()) { 0164 qint64 len = indev->read(buffer.data(), buffer.size()); 0165 if (len <= 0) // e.g. on error 0166 break; 0167 d->dev->write(buffer.data(), len); 0168 } 0169 if (!wasOpen) { 0170 // Restore initial state 0171 indev->close(); 0172 } 0173 } 0174 0175 void KoXmlWriter::endElement() 0176 { 0177 if (d->tags.isEmpty()) 0178 warnStore << "EndElement() was called more times than startElement(). " 0179 "The generated XML will be invalid! " 0180 "Please report this bug (by saving the document to another format...)" << endl; 0181 0182 Tag tag = d->tags.pop(); 0183 0184 if (!tag.hasChildren) { 0185 writeCString("/>"); 0186 } else { 0187 if (tag.indentInside && !tag.lastChildIsText) { 0188 writeIndent(); 0189 } 0190 writeCString("</"); 0191 Q_ASSERT(tag.tagName != 0); 0192 writeCString(tag.tagName); 0193 writeChar('>'); 0194 } 0195 } 0196 0197 void KoXmlWriter::addTextNode(const QByteArray& cstr) 0198 { 0199 // Same as the const char* version below, but here we know the size 0200 prepareForTextNode(); 0201 char* escaped = escapeForXML(cstr.constData(), cstr.size()); 0202 writeCString(escaped); 0203 if (escaped != d->escapeBuffer) 0204 delete[] escaped; 0205 } 0206 0207 void KoXmlWriter::addTextNode(const char* cstr) 0208 { 0209 prepareForTextNode(); 0210 char* escaped = escapeForXML(cstr, -1); 0211 writeCString(escaped); 0212 if (escaped != d->escapeBuffer) 0213 delete[] escaped; 0214 } 0215 0216 void KoXmlWriter::addProcessingInstruction(const char* cstr) 0217 { 0218 prepareForTextNode(); 0219 writeCString("<?"); 0220 addTextNode(cstr); 0221 writeCString("?>"); 0222 } 0223 0224 void KoXmlWriter::addAttribute(const char* attrName, const QByteArray& value) 0225 { 0226 // Same as the const char* one, but here we know the size 0227 writeChar(' '); 0228 writeCString(attrName); 0229 writeCString("=\""); 0230 char* escaped = escapeForXML(value.constData(), value.size()); 0231 writeCString(escaped); 0232 if (escaped != d->escapeBuffer) 0233 delete[] escaped; 0234 writeChar('"'); 0235 } 0236 0237 void KoXmlWriter::addAttribute(const char* attrName, const char* value) 0238 { 0239 writeChar(' '); 0240 writeCString(attrName); 0241 writeCString("=\""); 0242 char* escaped = escapeForXML(value, -1); 0243 writeCString(escaped); 0244 if (escaped != d->escapeBuffer) 0245 delete[] escaped; 0246 writeChar('"'); 0247 } 0248 0249 void KoXmlWriter::addAttribute(const char* attrName, double value) 0250 { 0251 QByteArray str; 0252 str.setNum(value, 'f', 11); 0253 addAttribute(attrName, str.data()); 0254 } 0255 0256 void KoXmlWriter::addAttribute(const char* attrName, float value) 0257 { 0258 QByteArray str; 0259 str.setNum(value, 'f', FLT_DIG); 0260 addAttribute(attrName, str.data()); 0261 } 0262 0263 void KoXmlWriter::addAttributePt(const char* attrName, double value) 0264 { 0265 QByteArray str; 0266 str.setNum(value, 'f', 11); 0267 str += "pt"; 0268 addAttribute(attrName, str.data()); 0269 } 0270 0271 void KoXmlWriter::addAttributePt(const char* attrName, float value) 0272 { 0273 QByteArray str; 0274 str.setNum(value, 'f', FLT_DIG); 0275 str += "pt"; 0276 addAttribute(attrName, str.data()); 0277 } 0278 0279 void KoXmlWriter::writeIndent() 0280 { 0281 // +1 because of the leading '\n' 0282 d->dev->write(d->indentBuffer, qMin(indentLevel() + 1, 0283 s_indentBufferLength)); 0284 } 0285 0286 void KoXmlWriter::writeString(const QString& str) 0287 { 0288 // cachegrind says .utf8() is where most of the time is spent 0289 const QByteArray cstr = str.toUtf8(); 0290 d->dev->write(cstr); 0291 } 0292 0293 // In case of a reallocation (ret value != d->buffer), the caller owns the return value, 0294 // it must delete it (with []) 0295 char* KoXmlWriter::escapeForXML(const char* source, int length = -1) const 0296 { 0297 // we're going to be pessimistic on char length; so lets make the outputLength less 0298 // the amount one char can take: 6 0299 char* destBoundary = d->escapeBuffer + s_escapeBufferLen - 6; 0300 char* destination = d->escapeBuffer; 0301 char* output = d->escapeBuffer; 0302 const char* src = source; // src moves, source remains 0303 for (;;) { 0304 if (destination >= destBoundary) { 0305 // When we come to realize that our escaped string is going to 0306 // be bigger than the escape buffer (this shouldn't happen very often...), 0307 // we drop the idea of using it, and we allocate a bigger buffer. 0308 // Note that this if() can only be hit once per call to the method. 0309 if (length == -1) 0310 length = qstrlen(source); // expensive... 0311 uint newLength = length * 6 + 1; // worst case. 6 is due to " and ' 0312 char* buffer = new char[ newLength ]; 0313 destBoundary = buffer + newLength; 0314 uint amountOfCharsAlreadyCopied = destination - d->escapeBuffer; 0315 memcpy(buffer, d->escapeBuffer, amountOfCharsAlreadyCopied); 0316 output = buffer; 0317 destination = buffer + amountOfCharsAlreadyCopied; 0318 } 0319 switch (*src) { 0320 case 60: // < 0321 memcpy(destination, "<", 4); 0322 destination += 4; 0323 break; 0324 case 62: // > 0325 memcpy(destination, ">", 4); 0326 destination += 4; 0327 break; 0328 case 34: // " 0329 memcpy(destination, """, 6); 0330 destination += 6; 0331 break; 0332 #if 0 // needed? 0333 case 39: // ' 0334 memcpy(destination, "'", 6); 0335 destination += 6; 0336 break; 0337 #endif 0338 case 38: // & 0339 memcpy(destination, "&", 5); 0340 destination += 5; 0341 break; 0342 case 0: 0343 *destination = '\0'; 0344 return output; 0345 // Control codes accepted in XML 1.0 documents. 0346 case 9: 0347 case 10: 0348 case 13: 0349 *destination++ = *src++; 0350 continue; 0351 default: 0352 // Don't add control codes not accepted in XML 1.0 documents. 0353 if (*src > 0 && *src < 32) { 0354 ++src; 0355 } else { 0356 *destination++ = *src++; 0357 } 0358 continue; 0359 } 0360 ++src; 0361 } 0362 // NOTREACHED (see case 0) 0363 return output; 0364 } 0365 0366 void KoXmlWriter::addManifestEntry(const QString& fullPath, const QString& mediaType) 0367 { 0368 startElement("manifest:file-entry"); 0369 addAttribute("manifest:media-type", mediaType); 0370 addAttribute("manifest:full-path", fullPath); 0371 endElement(); 0372 } 0373 0374 void KoXmlWriter::addConfigItem(const QString & configName, const QString& value) 0375 { 0376 startElement("config:config-item"); 0377 addAttribute("config:name", configName); 0378 addAttribute("config:type", "string"); 0379 addTextNode(value); 0380 endElement(); 0381 } 0382 0383 void KoXmlWriter::addConfigItem(const QString & configName, bool value) 0384 { 0385 startElement("config:config-item"); 0386 addAttribute("config:name", configName); 0387 addAttribute("config:type", "boolean"); 0388 addTextNode(value ? "true" : "false"); 0389 endElement(); 0390 } 0391 0392 void KoXmlWriter::addConfigItem(const QString & configName, int value) 0393 { 0394 startElement("config:config-item"); 0395 addAttribute("config:name", configName); 0396 addAttribute("config:type", "int"); 0397 addTextNode(QString::number(value)); 0398 endElement(); 0399 } 0400 0401 void KoXmlWriter::addConfigItem(const QString & configName, double value) 0402 { 0403 startElement("config:config-item"); 0404 addAttribute("config:name", configName); 0405 addAttribute("config:type", "double"); 0406 addTextNode(QString::number(value)); 0407 endElement(); 0408 } 0409 0410 void KoXmlWriter::addConfigItem(const QString & configName, float value) 0411 { 0412 startElement("config:config-item"); 0413 addAttribute("config:name", configName); 0414 addAttribute("config:type", "double"); 0415 addTextNode(QString::number(value)); 0416 endElement(); 0417 } 0418 0419 void KoXmlWriter::addConfigItem(const QString & configName, long value) 0420 { 0421 startElement("config:config-item"); 0422 addAttribute("config:name", configName); 0423 addAttribute("config:type", "long"); 0424 addTextNode(QString::number(value)); 0425 endElement(); 0426 } 0427 0428 void KoXmlWriter::addConfigItem(const QString & configName, short value) 0429 { 0430 startElement("config:config-item"); 0431 addAttribute("config:name", configName); 0432 addAttribute("config:type", "short"); 0433 addTextNode(QString::number(value)); 0434 endElement(); 0435 } 0436 0437 void KoXmlWriter::addTextSpan(const QString& text) 0438 { 0439 QMap<int, int> tabCache; 0440 addTextSpan(text, tabCache); 0441 } 0442 0443 void KoXmlWriter::addTextSpan(const QString& text, const QMap<int, int>& tabCache) 0444 { 0445 int len = text.length(); 0446 int nrSpaces = 0; // number of consecutive spaces 0447 bool leadingSpace = false; 0448 QString str; 0449 str.reserve(len); 0450 0451 // Accumulate chars either in str or in nrSpaces (for spaces). 0452 // Flush str when writing a subelement (for spaces or for another reason) 0453 // Flush nrSpaces when encountering two or more consecutive spaces 0454 for (int i = 0; i < len ; ++i) { 0455 QChar ch = text[i]; 0456 ushort unicode = ch.unicode(); 0457 if (unicode == ' ') { 0458 if (i == 0) 0459 leadingSpace = true; 0460 ++nrSpaces; 0461 } else { 0462 if (nrSpaces > 0) { 0463 // For the first space we use ' '. 0464 // "it is good practice to use (text:s) for the second and all following SPACE 0465 // characters in a sequence." (per the ODF spec) 0466 // however, per the HTML spec, "authors should not rely on user agents to render 0467 // white space immediately after a start tag or immediately before an end tag" 0468 // (and both we and OO.o ignore leading spaces in <text:p> or <text:h> elements...) 0469 if (!leadingSpace) { 0470 str += ' '; 0471 --nrSpaces; 0472 } 0473 if (nrSpaces > 0) { // there are more spaces 0474 if (!str.isEmpty()) 0475 addTextNode(str); 0476 str.clear(); 0477 startElement("text:s"); 0478 if (nrSpaces > 1) // it's 1 by default 0479 addAttribute("text:c", nrSpaces); 0480 endElement(); 0481 } 0482 } 0483 nrSpaces = 0; 0484 leadingSpace = false; 0485 0486 switch (unicode) { 0487 case '\t': 0488 if (!str.isEmpty()) 0489 addTextNode(str); 0490 str.clear(); 0491 startElement("text:tab"); 0492 if (tabCache.contains(i)) 0493 addAttribute("text:tab-ref", tabCache[i] + 1); 0494 endElement(); 0495 break; 0496 // gracefully handle \f form feed in text input. 0497 // otherwise the xml will not be valid. 0498 // \f can be added e.g. in ascii import filter. 0499 case '\f': 0500 case '\n': 0501 case QChar::LineSeparator: 0502 if (!str.isEmpty()) 0503 addTextNode(str); 0504 str.clear(); 0505 startElement("text:line-break"); 0506 endElement(); 0507 break; 0508 default: 0509 // don't add stuff that is not allowed in xml. The stuff we need we have already handled above 0510 if (ch.unicode() >= 0x20) { 0511 str += text[i]; 0512 } 0513 break; 0514 } 0515 } 0516 } 0517 // either we still have text in str or we have spaces in nrSpaces 0518 if (!str.isEmpty()) { 0519 addTextNode(str); 0520 } 0521 if (nrSpaces > 0) { // there are more spaces 0522 startElement("text:s"); 0523 if (nrSpaces > 1) // it's 1 by default 0524 addAttribute("text:c", nrSpaces); 0525 endElement(); 0526 } 0527 } 0528 0529 QIODevice *KoXmlWriter::device() const 0530 { 0531 return d->dev; 0532 } 0533 0534 int KoXmlWriter::indentLevel() const 0535 { 0536 return d->tags.size() + d->baseIndentLevel; 0537 } 0538 0539 QList<const char*> KoXmlWriter::tagHierarchy() const 0540 { 0541 QList<const char*> answer; 0542 foreach(const Tag & tag, d->tags) 0543 answer.append(tag.tagName); 0544 0545 return answer; 0546 } 0547 0548 QString KoXmlWriter::toString() const 0549 { 0550 Q_ASSERT(!d->dev->isSequential()); 0551 if (d->dev->isSequential()) 0552 return QString(); 0553 bool wasOpen = d->dev->isOpen(); 0554 qint64 oldPos = -1; 0555 if (wasOpen) { 0556 oldPos = d->dev->pos(); 0557 if (oldPos > 0) 0558 d->dev->seek(0); 0559 } else { 0560 const bool openOk = d->dev->open(QIODevice::ReadOnly); 0561 Q_ASSERT(openOk); 0562 if (!openOk) 0563 return QString(); 0564 } 0565 QString s = QString::fromUtf8(d->dev->readAll()); 0566 if (wasOpen) 0567 d->dev->seek(oldPos); 0568 else 0569 d->dev->close(); 0570 return s; 0571 }