File indexing completed on 2025-01-19 10:49:27
0001 /* This file is part of the KDE project 0002 0003 SPDX-FileCopyrightText: 2012-2014 Inge Wallin <inge@lysator.liu.se> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 0009 // Own 0010 #include "OdfReader.h" 0011 0012 // Qt 0013 #include <QStringList> 0014 #include <QBuffer> 0015 0016 // KF5 0017 #include <klocalizedstring.h> 0018 0019 // Calligra 0020 #include <KoStore.h> 0021 #include <KoXmlStreamReader.h> 0022 #include <KoXmlNS.h> 0023 #include <KoXmlWriter.h> // For copyXmlElement 0024 #include <KoOdfReadStore.h> 0025 0026 // Reader library 0027 #include "OdfReaderBackend.h" 0028 #include "OdfReaderContext.h" 0029 #include "OdfTextReader.h" 0030 #include "OdfDrawReader.h" 0031 #include "OdfReaderDebug.h" 0032 0033 0034 static void prepareForOdfInternal(KoXmlStreamReader &reader); 0035 0036 0037 #if 0 0038 static int debugIndent = 0; 0039 #define DEBUGSTART() \ 0040 ++debugIndent; \ 0041 DEBUG_READING("entering") 0042 #define DEBUGEND() \ 0043 DEBUG_READING("exiting"); \ 0044 --debugIndent 0045 #define DEBUG_READING(param) \ 0046 debugOdfReader << QString("%1").arg(" ", debugIndent * 2) << param << ": " \ 0047 << (reader.isStartElement() ? "start": (reader.isEndElement() ? "end" : "other")) \ 0048 << reader.qualifiedName().toString() 0049 #else 0050 #define DEBUGSTART() \ 0051 // NOTHING 0052 #define DEBUGEND() \ 0053 // NOTHING 0054 #define DEBUG_READING(param) \ 0055 // NOTHING 0056 #endif 0057 0058 0059 OdfReader::OdfReader() 0060 : m_backend(0) 0061 , m_context(0) 0062 , m_textReader(0) 0063 , m_drawReader(0) 0064 { 0065 } 0066 0067 OdfReader::~OdfReader() 0068 { 0069 } 0070 0071 0072 OdfTextReader *OdfReader::textReader() const 0073 { 0074 return m_textReader; 0075 } 0076 0077 void OdfReader::setTextReader(OdfTextReader *textReader) 0078 { 0079 m_textReader = textReader; 0080 if (textReader) { 0081 textReader->setParent(this); 0082 } 0083 } 0084 0085 OdfDrawReader *OdfReader::drawReader() const 0086 { 0087 return m_drawReader; 0088 } 0089 0090 void OdfReader::setDrawReader(OdfDrawReader *drawReader) 0091 { 0092 m_drawReader = drawReader; 0093 if (drawReader) { 0094 drawReader->setParent(this); 0095 } 0096 } 0097 0098 bool OdfReader::analyzeContent(OdfReaderContext *context) 0099 { 0100 // Extract styles, manifest, settings, etc 0101 if (context->analyzeOdfFile() != KoFilter::OK) { 0102 return false; 0103 } 0104 debugOdfReader << "analyze ok"; 0105 return true; 0106 } 0107 0108 bool OdfReader::readContent(OdfReaderBackend *backend, OdfReaderContext *context) 0109 { 0110 debugOdfReader << "entering"; 0111 0112 m_backend = backend; 0113 m_context = context; 0114 0115 if (m_textReader) { 0116 m_textReader->setContext(context); 0117 } 0118 0119 // ---------------------------------------------------------------- 0120 // Read the body from content.xml 0121 0122 KoStore *odfStore = m_context->odfStore(); 0123 0124 if (!odfStore->open("content.xml")) { 0125 errorOdfReader << "Unable to open input file content.xml" << endl; 0126 return false; 0127 } 0128 debugOdfReader << "open content.xml ok"; 0129 0130 KoXmlStreamReader reader; 0131 prepareForOdfInternal(reader); 0132 0133 reader.setDevice(odfStore->device()); 0134 bool foundContent = false; 0135 while (!reader.atEnd()) { 0136 reader.readNext(); 0137 0138 if (reader.isStartElement() && reader.qualifiedName() == "office:document-content") { 0139 foundContent = true; 0140 break; 0141 } 0142 } 0143 if (!foundContent) { 0144 errorOdfReader << "Couldn't find the content in content.xml" << endl; 0145 } 0146 0147 m_backend->elementOfficeDocumentcontent(reader, m_context); 0148 0149 // <office:document-content> has the following children in ODF 1.2: 0150 // <office:automatic-styles> 3.15.3 0151 // [done] <office:body> 3.3 0152 // <office:font-face-decls> 3.14 0153 // <office:scripts> 3.12. 0154 while (reader.readNextStartElement()) { 0155 QString tagName = reader.qualifiedName().toString(); 0156 0157 if (tagName == "office:automatic-styles") { 0158 // We already have the styles in the context. No need to read them again. 0159 reader.skipCurrentElement(); 0160 } 0161 else if (tagName == "office:body") { 0162 // This is the big one. 0163 readElementOfficeBody(reader); 0164 } 0165 else if (tagName == "office:font-face-decls") { 0166 // FIXME: Not yet implemented 0167 reader.skipCurrentElement(); 0168 } 0169 else if (tagName == "office:scripts") { 0170 // FIXME: Not yet implemented 0171 reader.skipCurrentElement(); 0172 } 0173 else { 0174 reader.skipCurrentElement(); 0175 } 0176 } 0177 0178 m_backend->elementOfficeDocumentcontent(reader, m_context); 0179 odfStore->close(); 0180 0181 return true; 0182 } 0183 0184 0185 #if 0 0186 // This is a template function for the reader library. 0187 // Copy this one and change the name and fill in the code. 0188 void OdfReader::readElementNamespaceTagname(KoXmlStreamReader &reader) 0189 { 0190 DEBUGSTART(); 0191 0192 // <namespace:tagname> has the following children in ODF 1.2: 0193 // FILL IN THE CHILDREN LIKE THIS EXAMPLE (taken from office:document-content): 0194 // <office:automatic-styles> 3.15.3 0195 // <office:body> 3.3 0196 // <office:font-face-decls> 3.14 0197 // <office:scripts> 3.12. 0198 while (reader.readNextStartElement()) { 0199 QString tagName = reader.qualifiedName().toString(); 0200 0201 if (tagName == "office:automatic-styles") { 0202 // FIXME: NYI 0203 reader.skipCurrentElement(); 0204 } 0205 else if (tagName == "office:body") { 0206 readElementOfficeBody(reader); 0207 } 0208 ... MORE else if () HERE 0209 else { 0210 reader.skipCurrentElement(); 0211 } 0212 } 0213 0214 m_backend->elementNamespaceTagname(reader, m_context); 0215 DEBUGEND(); 0216 } 0217 #endif 0218 0219 0220 void OdfReader::readElementOfficeBody(KoXmlStreamReader &reader) 0221 { 0222 DEBUGSTART(); 0223 m_backend->elementOfficeBody(reader, m_context); 0224 0225 // <office:body> has the following children in ODF 1.2: 0226 // <office:chart> 3.8, 0227 // <office:database> 12.1 0228 // <office:drawing> 3.5 0229 // <office:image> 3.9 0230 // [done] <office:presentation> 3.6 0231 // [done] <office:spreadsheet> 3.7 0232 // [done] <office:text> 3.4 0233 // 0234 // Of those only <office:text> is present in a text document (odf). 0235 while (reader.readNextStartElement()) { 0236 QString tagName = reader.qualifiedName().toString(); 0237 0238 if (tagName == "office:text") { 0239 readElementOfficeText(reader); 0240 } 0241 else if (tagName == "office:spreadsheet") { 0242 readElementOfficeSpreadsheet(reader); 0243 } 0244 else if (tagName == "office:presentation") { 0245 readElementOfficePresentation(reader); 0246 } 0247 else { 0248 reader.skipCurrentElement(); 0249 } 0250 } 0251 0252 m_backend->elementOfficeBody(reader, m_context); 0253 DEBUGEND(); 0254 } 0255 0256 0257 // ---------------------------------------------------------------- 0258 // 0259 // The following functions are just NULL versions of the actual functions. All 0260 // of these are virtual and the real functionality should be implemented in a 0261 // subclass. 0262 // 0263 0264 void OdfReader::readElementOfficeText(KoXmlStreamReader &reader) 0265 { 0266 DEBUGSTART(); 0267 0268 errorOdfReader << "Unimplemented function"; 0269 reader.skipCurrentElement(); 0270 0271 DEBUGEND(); 0272 } 0273 0274 void OdfReader::readElementOfficeSpreadsheet(KoXmlStreamReader &reader) 0275 { 0276 DEBUGSTART(); 0277 0278 errorOdfReader << "Unimplemented function"; 0279 reader.skipCurrentElement(); 0280 0281 DEBUGEND(); 0282 } 0283 0284 void OdfReader::readElementOfficePresentation(KoXmlStreamReader &reader) 0285 { 0286 DEBUGSTART(); 0287 0288 errorOdfReader << "Unimplemented function"; 0289 reader.skipCurrentElement(); 0290 0291 DEBUGEND(); 0292 } 0293 0294 0295 // ---------------------------------------------------------------- 0296 // Other functions 0297 0298 0299 void OdfReader::readUnknownElement(KoXmlStreamReader &reader) 0300 { 0301 DEBUGSTART(); 0302 0303 #if 1 0304 // FIXME: We need to handle this. 0305 reader.skipCurrentElement(); 0306 #else 0307 if (m_context->isInsideParagraph()) { 0308 // readParagraphContents expect to have the reader point to the 0309 // contents of the paragraph so we have to read past the text:p 0310 // start tag here. 0311 reader.readNext(); 0312 readParagraphContents(reader); 0313 } 0314 else { 0315 while (reader.readNextStartElement()) { 0316 readTextLevelElement(reader); 0317 } 0318 } 0319 #endif 0320 0321 DEBUGEND(); 0322 } 0323 0324 0325 // FIXME: Remove this function when it is exported from libs/odf/KoXmlStreamReader.cpp 0326 // 0327 static void prepareForOdfInternal(KoXmlStreamReader &reader) 0328 { 0329 // This list of namespaces is taken from KoXmlNs.cpp 0330 // Maybe not all of them are expected in an ODF document? 0331 reader.addExpectedNamespace("office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0"); 0332 reader.addExpectedNamespace("meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"); 0333 reader.addExpectedNamespace("config", "urn:oasis:names:tc:opendocument:xmlns:config:1.0"); 0334 reader.addExpectedNamespace("text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0"); 0335 reader.addExpectedNamespace("table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0"); 0336 reader.addExpectedNamespace("draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"); 0337 reader.addExpectedNamespace("presentation", "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"); 0338 reader.addExpectedNamespace("dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"); 0339 reader.addExpectedNamespace("chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0"); 0340 reader.addExpectedNamespace("form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0"); 0341 reader.addExpectedNamespace("script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0"); 0342 reader.addExpectedNamespace("style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0"); 0343 reader.addExpectedNamespace("number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"); 0344 reader.addExpectedNamespace("manifest", "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"); 0345 reader.addExpectedNamespace("anim", "urn:oasis:names:tc:opendocument:xmlns:animation:1.0"); 0346 0347 reader.addExpectedNamespace("math", "http://www.w3.org/1998/Math/MathML"); 0348 reader.addExpectedNamespace("svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"); 0349 reader.addExpectedNamespace("fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"); 0350 reader.addExpectedNamespace("dc", "http://purl.org/dc/elements/1.1/"); 0351 reader.addExpectedNamespace("xlink", "http://www.w3.org/1999/xlink"); 0352 reader.addExpectedNamespace("VL", "http://openoffice.org/2001/versions-list"); 0353 reader.addExpectedNamespace("smil", "urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"); 0354 reader.addExpectedNamespace("xhtml", "http://www.w3.org/1999/xhtml"); 0355 reader.addExpectedNamespace("xml", "http://www.w3.org/XML/1998/namespace"); 0356 0357 reader.addExpectedNamespace("calligra", "http://www.calligra.org/2005/"); 0358 reader.addExpectedNamespace("officeooo", "http://openoffice.org/2009/office"); 0359 reader.addExpectedNamespace("ooo", "http://openoffice.org/2004/office"); 0360 0361 reader.addExpectedNamespace("delta", "http://www.deltaxml.com/ns/track-changes/delta-namespace"); 0362 reader.addExpectedNamespace("split", "http://www.deltaxml.com/ns/track-changes/split-namespace"); 0363 reader.addExpectedNamespace("ac", "http://www.deltaxml.com/ns/track-changes/attribute-change-namespace"); 0364 0365 // This list of namespaces is taken from KoXmlReader::fixNamespace() 0366 // They were generated by old versions of OpenOffice.org. 0367 reader.addExtraNamespace("office", "http://openoffice.org/2000/office"); 0368 reader.addExtraNamespace("text", "http://openoffice.org/2000/text"); 0369 reader.addExtraNamespace("style", "http://openoffice.org/2000/style"); 0370 reader.addExtraNamespace("fo", "http://www.w3.org/1999/XSL/Format"); 0371 reader.addExtraNamespace("table", "http://openoffice.org/2000/table"); 0372 reader.addExtraNamespace("drawing", "http://openoffice.org/2000/drawing"); 0373 reader.addExtraNamespace("datastyle", "http://openoffice.org/2000/datastyle"); 0374 reader.addExtraNamespace("svg", "http://www.w3.org/2000/svg"); 0375 reader.addExtraNamespace("chart", "http://openoffice.org/2000/chart"); 0376 reader.addExtraNamespace("dr3d", "http://openoffice.org/2000/dr3d"); 0377 reader.addExtraNamespace("form", "http://openoffice.org/2000/form"); 0378 reader.addExtraNamespace("script", "http://openoffice.org/2000/script"); 0379 reader.addExtraNamespace("meta", "http://openoffice.org/2000/meta"); 0380 reader.addExtraNamespace("config", "http://openoffice.org/2001/config"); 0381 reader.addExtraNamespace("pres", "http://openoffice.org/2000/presentation"); 0382 reader.addExtraNamespace("manifest", "http://openoffice.org/2001/manifest"); 0383 }