File indexing completed on 2025-01-19 10:49:27

0001 /* This file is part of the KDE project
0002 
0003    SPDX-FileCopyrightText: 2012-2014 Inge Wallin <inge@lysator.liu.se>
0004 
0005    SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 
0009 // Own
0010 #include "OdfReader.h"
0011 
0012 // Qt
0013 #include <QStringList>
0014 #include <QBuffer>
0015 
0016 // KF5
0017 #include <klocalizedstring.h>
0018 
0019 // Calligra
0020 #include <KoStore.h>
0021 #include <KoXmlStreamReader.h>
0022 #include <KoXmlNS.h>
0023 #include <KoXmlWriter.h>  // For copyXmlElement
0024 #include <KoOdfReadStore.h>
0025 
0026 // Reader library
0027 #include "OdfReaderBackend.h"
0028 #include "OdfReaderContext.h"
0029 #include "OdfTextReader.h"
0030 #include "OdfDrawReader.h"
0031 #include "OdfReaderDebug.h"
0032 
0033 
0034 static void prepareForOdfInternal(KoXmlStreamReader &reader);
0035 
0036 
0037 #if 0
0038 static int debugIndent = 0;
0039 #define DEBUGSTART() \
0040     ++debugIndent; \
0041     DEBUG_READING("entering")
0042 #define DEBUGEND() \
0043     DEBUG_READING("exiting"); \
0044     --debugIndent
0045 #define DEBUG_READING(param) \
0046     debugOdfReader << QString("%1").arg(" ", debugIndent * 2) << param << ": " \
0047     << (reader.isStartElement() ? "start": (reader.isEndElement() ? "end" : "other")) \
0048     << reader.qualifiedName().toString()
0049 #else
0050 #define DEBUGSTART() \
0051     // NOTHING
0052 #define DEBUGEND() \
0053     // NOTHING
0054 #define DEBUG_READING(param) \
0055     // NOTHING
0056 #endif
0057 
0058 
0059 OdfReader::OdfReader()
0060     : m_backend(0)
0061     , m_context(0)
0062     , m_textReader(0)
0063     , m_drawReader(0)
0064 {
0065 }
0066 
0067 OdfReader::~OdfReader()
0068 {
0069 }
0070 
0071 
0072 OdfTextReader *OdfReader::textReader() const
0073 {
0074     return m_textReader;
0075 }
0076 
0077 void OdfReader::setTextReader(OdfTextReader *textReader)
0078 {
0079     m_textReader = textReader;
0080     if (textReader) {
0081         textReader->setParent(this);
0082     }
0083 }
0084 
0085 OdfDrawReader *OdfReader::drawReader() const
0086 {
0087     return m_drawReader;
0088 }
0089 
0090 void OdfReader::setDrawReader(OdfDrawReader *drawReader)
0091 {
0092     m_drawReader = drawReader;
0093     if (drawReader) {
0094         drawReader->setParent(this);
0095     }
0096 }
0097 
0098 bool OdfReader::analyzeContent(OdfReaderContext *context)
0099 {
0100     // Extract styles, manifest, settings, etc
0101     if (context->analyzeOdfFile() != KoFilter::OK) {
0102         return false;
0103     }
0104     debugOdfReader << "analyze ok";
0105     return true;
0106 }
0107 
0108 bool OdfReader::readContent(OdfReaderBackend *backend, OdfReaderContext *context)
0109 {
0110     debugOdfReader << "entering";
0111 
0112     m_backend = backend;
0113     m_context = context;
0114 
0115     if (m_textReader) {
0116         m_textReader->setContext(context);
0117     }
0118 
0119     // ----------------------------------------------------------------
0120     // Read the body from content.xml
0121 
0122     KoStore *odfStore = m_context->odfStore();
0123 
0124     if (!odfStore->open("content.xml")) {
0125         errorOdfReader << "Unable to open input file content.xml" << endl;
0126         return false;
0127     }
0128     debugOdfReader << "open content.xml ok";
0129 
0130     KoXmlStreamReader reader;
0131     prepareForOdfInternal(reader);
0132 
0133     reader.setDevice(odfStore->device());
0134     bool  foundContent = false;
0135     while (!reader.atEnd()) {
0136         reader.readNext();
0137 
0138         if (reader.isStartElement() && reader.qualifiedName() == "office:document-content") {
0139             foundContent = true;
0140             break;
0141         }
0142     }
0143     if (!foundContent) {
0144         errorOdfReader << "Couldn't find the content in content.xml" << endl;
0145     }
0146 
0147     m_backend->elementOfficeDocumentcontent(reader, m_context);
0148 
0149     // <office:document-content> has the following children in ODF 1.2:
0150     //          <office:automatic-styles> 3.15.3
0151     //   [done] <office:body> 3.3
0152     //          <office:font-face-decls> 3.14
0153     //          <office:scripts> 3.12.
0154     while (reader.readNextStartElement()) {
0155         QString tagName = reader.qualifiedName().toString();
0156         
0157         if (tagName == "office:automatic-styles") {
0158             // We already have the styles in the context.  No need to read them again.
0159             reader.skipCurrentElement();
0160         }
0161         else if (tagName == "office:body") {
0162             // This is the big one.
0163             readElementOfficeBody(reader);
0164         }
0165         else if (tagName == "office:font-face-decls") {
0166             // FIXME: Not yet implemented
0167             reader.skipCurrentElement();
0168         }
0169         else if (tagName == "office:scripts") {
0170             // FIXME: Not yet implemented
0171             reader.skipCurrentElement();
0172         }
0173         else {
0174             reader.skipCurrentElement();
0175         }
0176     }
0177 
0178     m_backend->elementOfficeDocumentcontent(reader, m_context);
0179     odfStore->close();
0180 
0181     return true;
0182 }
0183 
0184 
0185 #if 0
0186 // This is a template function for the reader library.
0187 // Copy this one and change the name and fill in the code.
0188 void OdfReader::readElementNamespaceTagname(KoXmlStreamReader &reader)
0189 { 
0190    DEBUGSTART();
0191 
0192     // <namespace:tagname> has the following children in ODF 1.2:
0193     //   FILL IN THE CHILDREN LIKE THIS EXAMPLE (taken from office:document-content):
0194     //          <office:automatic-styles> 3.15.3
0195     //          <office:body> 3.3
0196     //          <office:font-face-decls> 3.14
0197     //          <office:scripts> 3.12.
0198     while (reader.readNextStartElement()) {
0199         QString tagName = reader.qualifiedName().toString();
0200         
0201         if (tagName == "office:automatic-styles") {
0202             // FIXME: NYI
0203             reader.skipCurrentElement();
0204         }
0205         else if (tagName == "office:body") {
0206             readElementOfficeBody(reader);
0207         }
0208         ...  MORE else if () HERE
0209         else {
0210             reader.skipCurrentElement();
0211         }
0212     }
0213 
0214     m_backend->elementNamespaceTagname(reader, m_context);
0215     DEBUGEND();
0216 }
0217 #endif
0218 
0219 
0220 void OdfReader::readElementOfficeBody(KoXmlStreamReader &reader)
0221 {
0222     DEBUGSTART();
0223     m_backend->elementOfficeBody(reader, m_context);
0224 
0225     // <office:body> has the following children in ODF 1.2:
0226     //          <office:chart> 3.8,
0227     //          <office:database> 12.1
0228     //          <office:drawing> 3.5
0229     //          <office:image> 3.9
0230     //   [done] <office:presentation> 3.6
0231     //   [done] <office:spreadsheet> 3.7
0232     //   [done] <office:text> 3.4
0233     //
0234     // Of those only <office:text> is present in a text document (odf).
0235     while (reader.readNextStartElement()) {
0236         QString tagName = reader.qualifiedName().toString();
0237         
0238         if (tagName == "office:text") {
0239             readElementOfficeText(reader);
0240         }
0241         else if (tagName == "office:spreadsheet") {
0242             readElementOfficeSpreadsheet(reader);
0243         }
0244         else if (tagName == "office:presentation") {
0245             readElementOfficePresentation(reader);
0246         }
0247         else {
0248             reader.skipCurrentElement();
0249         }
0250     }
0251 
0252     m_backend->elementOfficeBody(reader, m_context);
0253     DEBUGEND();
0254 }
0255 
0256 
0257 // ----------------------------------------------------------------
0258 //
0259 // The following functions are just NULL versions of the actual functions. All
0260 // of these are virtual and the real functionality should be implemented in a
0261 // subclass.
0262 //
0263 
0264 void OdfReader::readElementOfficeText(KoXmlStreamReader &reader)
0265 {
0266     DEBUGSTART();
0267 
0268     errorOdfReader << "Unimplemented function";
0269     reader.skipCurrentElement();  
0270 
0271     DEBUGEND();
0272 }
0273 
0274 void OdfReader::readElementOfficeSpreadsheet(KoXmlStreamReader &reader)
0275 {
0276     DEBUGSTART();
0277 
0278     errorOdfReader << "Unimplemented function";
0279     reader.skipCurrentElement();  
0280 
0281     DEBUGEND();
0282 }
0283 
0284 void OdfReader::readElementOfficePresentation(KoXmlStreamReader &reader)
0285 {
0286     DEBUGSTART();
0287 
0288     errorOdfReader << "Unimplemented function";
0289     reader.skipCurrentElement();  
0290 
0291     DEBUGEND();
0292 }
0293 
0294 
0295 // ----------------------------------------------------------------
0296 //                             Other functions
0297 
0298 
0299 void OdfReader::readUnknownElement(KoXmlStreamReader &reader)
0300 {
0301     DEBUGSTART();
0302 
0303 #if 1
0304     // FIXME: We need to handle this.
0305     reader.skipCurrentElement();
0306 #else
0307     if (m_context->isInsideParagraph()) {
0308         // readParagraphContents expect to have the reader point to the
0309         // contents of the paragraph so we have to read past the text:p
0310         // start tag here.
0311         reader.readNext();
0312         readParagraphContents(reader);
0313     }
0314     else {
0315         while (reader.readNextStartElement()) {
0316             readTextLevelElement(reader);
0317         }
0318     }
0319 #endif
0320 
0321     DEBUGEND();
0322 }
0323 
0324 
0325 // FIXME: Remove this function when it is exported from libs/odf/KoXmlStreamReader.cpp
0326 //
0327 static void prepareForOdfInternal(KoXmlStreamReader &reader)
0328 {
0329     // This list of namespaces is taken from KoXmlNs.cpp
0330     // Maybe not all of them are expected in an ODF document?
0331     reader.addExpectedNamespace("office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0");
0332     reader.addExpectedNamespace("meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0");
0333     reader.addExpectedNamespace("config", "urn:oasis:names:tc:opendocument:xmlns:config:1.0");
0334     reader.addExpectedNamespace("text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0");
0335     reader.addExpectedNamespace("table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0");
0336     reader.addExpectedNamespace("draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0");
0337     reader.addExpectedNamespace("presentation", "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0");
0338     reader.addExpectedNamespace("dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0");
0339     reader.addExpectedNamespace("chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0");
0340     reader.addExpectedNamespace("form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0");
0341     reader.addExpectedNamespace("script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0");
0342     reader.addExpectedNamespace("style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0");
0343     reader.addExpectedNamespace("number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0");
0344     reader.addExpectedNamespace("manifest", "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0");
0345     reader.addExpectedNamespace("anim", "urn:oasis:names:tc:opendocument:xmlns:animation:1.0");
0346 
0347     reader.addExpectedNamespace("math", "http://www.w3.org/1998/Math/MathML");
0348     reader.addExpectedNamespace("svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
0349     reader.addExpectedNamespace("fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
0350     reader.addExpectedNamespace("dc", "http://purl.org/dc/elements/1.1/");
0351     reader.addExpectedNamespace("xlink", "http://www.w3.org/1999/xlink");
0352     reader.addExpectedNamespace("VL", "http://openoffice.org/2001/versions-list");
0353     reader.addExpectedNamespace("smil", "urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
0354     reader.addExpectedNamespace("xhtml", "http://www.w3.org/1999/xhtml");
0355     reader.addExpectedNamespace("xml", "http://www.w3.org/XML/1998/namespace");
0356 
0357     reader.addExpectedNamespace("calligra", "http://www.calligra.org/2005/");
0358     reader.addExpectedNamespace("officeooo", "http://openoffice.org/2009/office");
0359     reader.addExpectedNamespace("ooo", "http://openoffice.org/2004/office");
0360 
0361     reader.addExpectedNamespace("delta", "http://www.deltaxml.com/ns/track-changes/delta-namespace");
0362     reader.addExpectedNamespace("split", "http://www.deltaxml.com/ns/track-changes/split-namespace");
0363     reader.addExpectedNamespace("ac", "http://www.deltaxml.com/ns/track-changes/attribute-change-namespace");
0364 
0365     // This list of namespaces is taken from KoXmlReader::fixNamespace()
0366     // They were generated by old versions of OpenOffice.org.
0367     reader.addExtraNamespace("office",    "http://openoffice.org/2000/office");
0368     reader.addExtraNamespace("text",      "http://openoffice.org/2000/text");
0369     reader.addExtraNamespace("style",     "http://openoffice.org/2000/style");
0370     reader.addExtraNamespace("fo",        "http://www.w3.org/1999/XSL/Format");
0371     reader.addExtraNamespace("table",     "http://openoffice.org/2000/table");
0372     reader.addExtraNamespace("drawing",   "http://openoffice.org/2000/drawing");
0373     reader.addExtraNamespace("datastyle", "http://openoffice.org/2000/datastyle");
0374     reader.addExtraNamespace("svg",       "http://www.w3.org/2000/svg");
0375     reader.addExtraNamespace("chart",     "http://openoffice.org/2000/chart");
0376     reader.addExtraNamespace("dr3d",      "http://openoffice.org/2000/dr3d");
0377     reader.addExtraNamespace("form",      "http://openoffice.org/2000/form");
0378     reader.addExtraNamespace("script",    "http://openoffice.org/2000/script");
0379     reader.addExtraNamespace("meta",      "http://openoffice.org/2000/meta");
0380     reader.addExtraNamespace("config",    "http://openoffice.org/2001/config");
0381     reader.addExtraNamespace("pres",      "http://openoffice.org/2000/presentation");
0382     reader.addExtraNamespace("manifest",  "http://openoffice.org/2001/manifest");
0383 }