File indexing completed on 2024-12-01 13:11:46
0001 /* This file is part of the KDE project 0002 0003 Copyright (C) 2012-2014 Inge Wallin <inge@lysator.liu.se> 0004 0005 This library is free software; you can redistribute it and/or 0006 modify it under the terms of the GNU Library General Public 0007 License as published by the Free Software Foundation; either 0008 version 2 of the License, or (at your option) any later version. 0009 0010 This library is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 Library General Public License for more details. 0014 0015 You should have received a copy of the GNU Library General Public License 0016 along with this library; see the file COPYING.LIB. If not, write to 0017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0018 Boston, MA 02110-1301, USA. 0019 */ 0020 0021 0022 // Own 0023 #include "OdfReader.h" 0024 0025 // Qt 0026 #include <QStringList> 0027 #include <QBuffer> 0028 0029 // KF5 0030 #include <klocalizedstring.h> 0031 0032 // Calligra 0033 #include <KoStore.h> 0034 #include <KoXmlStreamReader.h> 0035 #include <KoXmlNS.h> 0036 #include <KoXmlWriter.h> // For copyXmlElement 0037 #include <KoOdfReadStore.h> 0038 0039 // Reader library 0040 #include "OdfReaderBackend.h" 0041 #include "OdfReaderContext.h" 0042 #include "OdfTextReader.h" 0043 #include "OdfDrawReader.h" 0044 #include "OdfReaderDebug.h" 0045 0046 0047 static void prepareForOdfInternal(KoXmlStreamReader &reader); 0048 0049 0050 #if 0 0051 static int debugIndent = 0; 0052 #define DEBUGSTART() \ 0053 ++debugIndent; \ 0054 DEBUG_READING("entering") 0055 #define DEBUGEND() \ 0056 DEBUG_READING("exiting"); \ 0057 --debugIndent 0058 #define DEBUG_READING(param) \ 0059 debugOdfReader << QString("%1").arg(" ", debugIndent * 2) << param << ": " \ 0060 << (reader.isStartElement() ? "start": (reader.isEndElement() ? "end" : "other")) \ 0061 << reader.qualifiedName().toString() 0062 #else 0063 #define DEBUGSTART() \ 0064 // NOTHING 0065 #define DEBUGEND() \ 0066 // NOTHING 0067 #define DEBUG_READING(param) \ 0068 // NOTHING 0069 #endif 0070 0071 0072 OdfReader::OdfReader() 0073 : m_backend(0) 0074 , m_context(0) 0075 , m_textReader(0) 0076 , m_drawReader(0) 0077 { 0078 } 0079 0080 OdfReader::~OdfReader() 0081 { 0082 } 0083 0084 0085 OdfTextReader *OdfReader::textReader() const 0086 { 0087 return m_textReader; 0088 } 0089 0090 void OdfReader::setTextReader(OdfTextReader *textReader) 0091 { 0092 m_textReader = textReader; 0093 if (textReader) { 0094 textReader->setParent(this); 0095 } 0096 } 0097 0098 OdfDrawReader *OdfReader::drawReader() const 0099 { 0100 return m_drawReader; 0101 } 0102 0103 void OdfReader::setDrawReader(OdfDrawReader *drawReader) 0104 { 0105 m_drawReader = drawReader; 0106 if (drawReader) { 0107 drawReader->setParent(this); 0108 } 0109 } 0110 0111 bool OdfReader::analyzeContent(OdfReaderContext *context) 0112 { 0113 // Extract styles, manifest, settings, etc 0114 if (context->analyzeOdfFile() != KoFilter::OK) { 0115 return false; 0116 } 0117 debugOdfReader << "analyze ok"; 0118 return true; 0119 } 0120 0121 bool OdfReader::readContent(OdfReaderBackend *backend, OdfReaderContext *context) 0122 { 0123 debugOdfReader << "entering"; 0124 0125 m_backend = backend; 0126 m_context = context; 0127 0128 if (m_textReader) { 0129 m_textReader->setContext(context); 0130 } 0131 0132 // ---------------------------------------------------------------- 0133 // Read the body from content.xml 0134 0135 KoStore *odfStore = m_context->odfStore(); 0136 0137 if (!odfStore->open("content.xml")) { 0138 errorOdfReader << "Unable to open input file content.xml" << endl; 0139 return false; 0140 } 0141 debugOdfReader << "open content.xml ok"; 0142 0143 KoXmlStreamReader reader; 0144 prepareForOdfInternal(reader); 0145 0146 reader.setDevice(odfStore->device()); 0147 bool foundContent = false; 0148 while (!reader.atEnd()) { 0149 reader.readNext(); 0150 0151 if (reader.isStartElement() && reader.qualifiedName() == "office:document-content") { 0152 foundContent = true; 0153 break; 0154 } 0155 } 0156 if (!foundContent) { 0157 errorOdfReader << "Couldn't find the content in content.xml" << endl; 0158 } 0159 0160 m_backend->elementOfficeDocumentcontent(reader, m_context); 0161 0162 // <office:document-content> has the following children in ODF 1.2: 0163 // <office:automatic-styles> 3.15.3 0164 // [done] <office:body> 3.3 0165 // <office:font-face-decls> 3.14 0166 // <office:scripts> 3.12. 0167 while (reader.readNextStartElement()) { 0168 QString tagName = reader.qualifiedName().toString(); 0169 0170 if (tagName == "office:automatic-styles") { 0171 // We already have the styles in the context. No need to read them again. 0172 reader.skipCurrentElement(); 0173 } 0174 else if (tagName == "office:body") { 0175 // This is the big one. 0176 readElementOfficeBody(reader); 0177 } 0178 else if (tagName == "office:font-face-decls") { 0179 // FIXME: Not yet implemented 0180 reader.skipCurrentElement(); 0181 } 0182 else if (tagName == "office:scripts") { 0183 // FIXME: Not yet implemented 0184 reader.skipCurrentElement(); 0185 } 0186 else { 0187 reader.skipCurrentElement(); 0188 } 0189 } 0190 0191 m_backend->elementOfficeDocumentcontent(reader, m_context); 0192 odfStore->close(); 0193 0194 return true; 0195 } 0196 0197 0198 #if 0 0199 // This is a template function for the reader library. 0200 // Copy this one and change the name and fill in the code. 0201 void OdfReader::readElementNamespaceTagname(KoXmlStreamReader &reader) 0202 { 0203 DEBUGSTART(); 0204 0205 // <namespace:tagname> has the following children in ODF 1.2: 0206 // FILL IN THE CHILDREN LIKE THIS EXAMPLE (taken from office:document-content): 0207 // <office:automatic-styles> 3.15.3 0208 // <office:body> 3.3 0209 // <office:font-face-decls> 3.14 0210 // <office:scripts> 3.12. 0211 while (reader.readNextStartElement()) { 0212 QString tagName = reader.qualifiedName().toString(); 0213 0214 if (tagName == "office:automatic-styles") { 0215 // FIXME: NYI 0216 reader.skipCurrentElement(); 0217 } 0218 else if (tagName == "office:body") { 0219 readElementOfficeBody(reader); 0220 } 0221 ... MORE else if () HERE 0222 else { 0223 reader.skipCurrentElement(); 0224 } 0225 } 0226 0227 m_backend->elementNamespaceTagname(reader, m_context); 0228 DEBUGEND(); 0229 } 0230 #endif 0231 0232 0233 void OdfReader::readElementOfficeBody(KoXmlStreamReader &reader) 0234 { 0235 DEBUGSTART(); 0236 m_backend->elementOfficeBody(reader, m_context); 0237 0238 // <office:body> has the following children in ODF 1.2: 0239 // <office:chart> 3.8, 0240 // <office:database> 12.1 0241 // <office:drawing> 3.5 0242 // <office:image> 3.9 0243 // [done] <office:presentation> 3.6 0244 // [done] <office:spreadsheet> 3.7 0245 // [done] <office:text> 3.4 0246 // 0247 // Of those only <office:text> is present in a text document (odf). 0248 while (reader.readNextStartElement()) { 0249 QString tagName = reader.qualifiedName().toString(); 0250 0251 if (tagName == "office:text") { 0252 readElementOfficeText(reader); 0253 } 0254 else if (tagName == "office:spreadsheet") { 0255 readElementOfficeSpreadsheet(reader); 0256 } 0257 else if (tagName == "office:presentation") { 0258 readElementOfficePresentation(reader); 0259 } 0260 else { 0261 reader.skipCurrentElement(); 0262 } 0263 } 0264 0265 m_backend->elementOfficeBody(reader, m_context); 0266 DEBUGEND(); 0267 } 0268 0269 0270 // ---------------------------------------------------------------- 0271 // 0272 // The following functions are just NULL versions of the actual functions. All 0273 // of these are virtual and the real functionality should be implemented in a 0274 // subclass. 0275 // 0276 0277 void OdfReader::readElementOfficeText(KoXmlStreamReader &reader) 0278 { 0279 DEBUGSTART(); 0280 0281 errorOdfReader << "Unimplemented function"; 0282 reader.skipCurrentElement(); 0283 0284 DEBUGEND(); 0285 } 0286 0287 void OdfReader::readElementOfficeSpreadsheet(KoXmlStreamReader &reader) 0288 { 0289 DEBUGSTART(); 0290 0291 errorOdfReader << "Unimplemented function"; 0292 reader.skipCurrentElement(); 0293 0294 DEBUGEND(); 0295 } 0296 0297 void OdfReader::readElementOfficePresentation(KoXmlStreamReader &reader) 0298 { 0299 DEBUGSTART(); 0300 0301 errorOdfReader << "Unimplemented function"; 0302 reader.skipCurrentElement(); 0303 0304 DEBUGEND(); 0305 } 0306 0307 0308 // ---------------------------------------------------------------- 0309 // Other functions 0310 0311 0312 void OdfReader::readUnknownElement(KoXmlStreamReader &reader) 0313 { 0314 DEBUGSTART(); 0315 0316 #if 1 0317 // FIXME: We need to handle this. 0318 reader.skipCurrentElement(); 0319 #else 0320 if (m_context->isInsideParagraph()) { 0321 // readParagraphContents expect to have the reader point to the 0322 // contents of the paragraph so we have to read past the text:p 0323 // start tag here. 0324 reader.readNext(); 0325 readParagraphContents(reader); 0326 } 0327 else { 0328 while (reader.readNextStartElement()) { 0329 readTextLevelElement(reader); 0330 } 0331 } 0332 #endif 0333 0334 DEBUGEND(); 0335 } 0336 0337 0338 // FIXME: Remove this function when it is exported from libs/odf/KoXmlStreamReader.cpp 0339 // 0340 static void prepareForOdfInternal(KoXmlStreamReader &reader) 0341 { 0342 // This list of namespaces is taken from KoXmlNs.cpp 0343 // Maybe not all of them are expected in an ODF document? 0344 reader.addExpectedNamespace("office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0"); 0345 reader.addExpectedNamespace("meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"); 0346 reader.addExpectedNamespace("config", "urn:oasis:names:tc:opendocument:xmlns:config:1.0"); 0347 reader.addExpectedNamespace("text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0"); 0348 reader.addExpectedNamespace("table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0"); 0349 reader.addExpectedNamespace("draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"); 0350 reader.addExpectedNamespace("presentation", "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"); 0351 reader.addExpectedNamespace("dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"); 0352 reader.addExpectedNamespace("chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0"); 0353 reader.addExpectedNamespace("form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0"); 0354 reader.addExpectedNamespace("script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0"); 0355 reader.addExpectedNamespace("style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0"); 0356 reader.addExpectedNamespace("number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"); 0357 reader.addExpectedNamespace("manifest", "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"); 0358 reader.addExpectedNamespace("anim", "urn:oasis:names:tc:opendocument:xmlns:animation:1.0"); 0359 0360 reader.addExpectedNamespace("math", "http://www.w3.org/1998/Math/MathML"); 0361 reader.addExpectedNamespace("svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"); 0362 reader.addExpectedNamespace("fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"); 0363 reader.addExpectedNamespace("dc", "http://purl.org/dc/elements/1.1/"); 0364 reader.addExpectedNamespace("xlink", "http://www.w3.org/1999/xlink"); 0365 reader.addExpectedNamespace("VL", "http://openoffice.org/2001/versions-list"); 0366 reader.addExpectedNamespace("smil", "urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"); 0367 reader.addExpectedNamespace("xhtml", "http://www.w3.org/1999/xhtml"); 0368 reader.addExpectedNamespace("xml", "http://www.w3.org/XML/1998/namespace"); 0369 0370 reader.addExpectedNamespace("calligra", "http://www.calligra.org/2005/"); 0371 reader.addExpectedNamespace("officeooo", "http://openoffice.org/2009/office"); 0372 reader.addExpectedNamespace("ooo", "http://openoffice.org/2004/office"); 0373 0374 reader.addExpectedNamespace("delta", "http://www.deltaxml.com/ns/track-changes/delta-namespace"); 0375 reader.addExpectedNamespace("split", "http://www.deltaxml.com/ns/track-changes/split-namespace"); 0376 reader.addExpectedNamespace("ac", "http://www.deltaxml.com/ns/track-changes/attribute-change-namespace"); 0377 0378 // This list of namespaces is taken from KoXmlReader::fixNamespace() 0379 // They were generated by old versions of OpenOffice.org. 0380 reader.addExtraNamespace("office", "http://openoffice.org/2000/office"); 0381 reader.addExtraNamespace("text", "http://openoffice.org/2000/text"); 0382 reader.addExtraNamespace("style", "http://openoffice.org/2000/style"); 0383 reader.addExtraNamespace("fo", "http://www.w3.org/1999/XSL/Format"); 0384 reader.addExtraNamespace("table", "http://openoffice.org/2000/table"); 0385 reader.addExtraNamespace("drawing", "http://openoffice.org/2000/drawing"); 0386 reader.addExtraNamespace("datastyle", "http://openoffice.org/2000/datastyle"); 0387 reader.addExtraNamespace("svg", "http://www.w3.org/2000/svg"); 0388 reader.addExtraNamespace("chart", "http://openoffice.org/2000/chart"); 0389 reader.addExtraNamespace("dr3d", "http://openoffice.org/2000/dr3d"); 0390 reader.addExtraNamespace("form", "http://openoffice.org/2000/form"); 0391 reader.addExtraNamespace("script", "http://openoffice.org/2000/script"); 0392 reader.addExtraNamespace("meta", "http://openoffice.org/2000/meta"); 0393 reader.addExtraNamespace("config", "http://openoffice.org/2001/config"); 0394 reader.addExtraNamespace("pres", "http://openoffice.org/2000/presentation"); 0395 reader.addExtraNamespace("manifest", "http://openoffice.org/2001/manifest"); 0396 }