File indexing completed on 2024-05-12 16:29:19

0001 /* This file is part of the KDE project
0002 
0003    Copyright (C) 2012-2014 Inge Wallin            <inge@lysator.liu.se>
0004 
0005    This library is free software; you can redistribute it and/or
0006    modify it under the terms of the GNU Library General Public
0007    License as published by the Free Software Foundation; either
0008    version 2 of the License, or (at your option) any later version.
0009 
0010    This library is distributed in the hope that it will be useful,
0011    but WITHOUT ANY WARRANTY; without even the implied warranty of
0012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0013    Library General Public License for more details.
0014 
0015    You should have received a copy of the GNU Library General Public License
0016    along with this library; see the file COPYING.LIB.  If not, write to
0017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0018    Boston, MA 02110-1301, USA.
0019 */
0020 
0021 
0022 // Own
0023 #include "OdfReader.h"
0024 
0025 // Qt
0026 #include <QStringList>
0027 #include <QBuffer>
0028 
0029 // KF5
0030 #include <klocalizedstring.h>
0031 
0032 // Calligra
0033 #include <KoStore.h>
0034 #include <KoXmlStreamReader.h>
0035 #include <KoXmlNS.h>
0036 #include <KoXmlWriter.h>  // For copyXmlElement
0037 #include <KoOdfReadStore.h>
0038 
0039 // Reader library
0040 #include "OdfReaderBackend.h"
0041 #include "OdfReaderContext.h"
0042 #include "OdfTextReader.h"
0043 #include "OdfDrawReader.h"
0044 #include "OdfReaderDebug.h"
0045 
0046 
0047 static void prepareForOdfInternal(KoXmlStreamReader &reader);
0048 
0049 
0050 #if 0
0051 static int debugIndent = 0;
0052 #define DEBUGSTART() \
0053     ++debugIndent; \
0054     DEBUG_READING("entering")
0055 #define DEBUGEND() \
0056     DEBUG_READING("exiting"); \
0057     --debugIndent
0058 #define DEBUG_READING(param) \
0059     debugOdfReader << QString("%1").arg(" ", debugIndent * 2) << param << ": " \
0060     << (reader.isStartElement() ? "start": (reader.isEndElement() ? "end" : "other")) \
0061     << reader.qualifiedName().toString()
0062 #else
0063 #define DEBUGSTART() \
0064     // NOTHING
0065 #define DEBUGEND() \
0066     // NOTHING
0067 #define DEBUG_READING(param) \
0068     // NOTHING
0069 #endif
0070 
0071 
0072 OdfReader::OdfReader()
0073     : m_backend(0)
0074     , m_context(0)
0075     , m_textReader(0)
0076     , m_drawReader(0)
0077 {
0078 }
0079 
0080 OdfReader::~OdfReader()
0081 {
0082 }
0083 
0084 
0085 OdfTextReader *OdfReader::textReader() const
0086 {
0087     return m_textReader;
0088 }
0089 
0090 void OdfReader::setTextReader(OdfTextReader *textReader)
0091 {
0092     m_textReader = textReader;
0093     if (textReader) {
0094         textReader->setParent(this);
0095     }
0096 }
0097 
0098 OdfDrawReader *OdfReader::drawReader() const
0099 {
0100     return m_drawReader;
0101 }
0102 
0103 void OdfReader::setDrawReader(OdfDrawReader *drawReader)
0104 {
0105     m_drawReader = drawReader;
0106     if (drawReader) {
0107         drawReader->setParent(this);
0108     }
0109 }
0110 
0111 bool OdfReader::analyzeContent(OdfReaderContext *context)
0112 {
0113     // Extract styles, manifest, settings, etc
0114     if (context->analyzeOdfFile() != KoFilter::OK) {
0115         return false;
0116     }
0117     debugOdfReader << "analyze ok";
0118     return true;
0119 }
0120 
0121 bool OdfReader::readContent(OdfReaderBackend *backend, OdfReaderContext *context)
0122 {
0123     debugOdfReader << "entering";
0124 
0125     m_backend = backend;
0126     m_context = context;
0127 
0128     if (m_textReader) {
0129         m_textReader->setContext(context);
0130     }
0131 
0132     // ----------------------------------------------------------------
0133     // Read the body from content.xml
0134 
0135     KoStore *odfStore = m_context->odfStore();
0136 
0137     if (!odfStore->open("content.xml")) {
0138         errorOdfReader << "Unable to open input file content.xml" << endl;
0139         return false;
0140     }
0141     debugOdfReader << "open content.xml ok";
0142 
0143     KoXmlStreamReader reader;
0144     prepareForOdfInternal(reader);
0145 
0146     reader.setDevice(odfStore->device());
0147     bool  foundContent = false;
0148     while (!reader.atEnd()) {
0149         reader.readNext();
0150 
0151         if (reader.isStartElement() && reader.qualifiedName() == "office:document-content") {
0152             foundContent = true;
0153             break;
0154         }
0155     }
0156     if (!foundContent) {
0157         errorOdfReader << "Couldn't find the content in content.xml" << endl;
0158     }
0159 
0160     m_backend->elementOfficeDocumentcontent(reader, m_context);
0161 
0162     // <office:document-content> has the following children in ODF 1.2:
0163     //          <office:automatic-styles> 3.15.3
0164     //   [done] <office:body> 3.3
0165     //          <office:font-face-decls> 3.14
0166     //          <office:scripts> 3.12.
0167     while (reader.readNextStartElement()) {
0168         QString tagName = reader.qualifiedName().toString();
0169         
0170         if (tagName == "office:automatic-styles") {
0171             // We already have the styles in the context.  No need to read them again.
0172             reader.skipCurrentElement();
0173         }
0174         else if (tagName == "office:body") {
0175             // This is the big one.
0176             readElementOfficeBody(reader);
0177         }
0178         else if (tagName == "office:font-face-decls") {
0179             // FIXME: Not yet implemented
0180             reader.skipCurrentElement();
0181         }
0182         else if (tagName == "office:scripts") {
0183             // FIXME: Not yet implemented
0184             reader.skipCurrentElement();
0185         }
0186         else {
0187             reader.skipCurrentElement();
0188         }
0189     }
0190 
0191     m_backend->elementOfficeDocumentcontent(reader, m_context);
0192     odfStore->close();
0193 
0194     return true;
0195 }
0196 
0197 
0198 #if 0
0199 // This is a template function for the reader library.
0200 // Copy this one and change the name and fill in the code.
0201 void OdfReader::readElementNamespaceTagname(KoXmlStreamReader &reader)
0202 { 
0203    DEBUGSTART();
0204 
0205     // <namespace:tagname> has the following children in ODF 1.2:
0206     //   FILL IN THE CHILDREN LIKE THIS EXAMPLE (taken from office:document-content):
0207     //          <office:automatic-styles> 3.15.3
0208     //          <office:body> 3.3
0209     //          <office:font-face-decls> 3.14
0210     //          <office:scripts> 3.12.
0211     while (reader.readNextStartElement()) {
0212         QString tagName = reader.qualifiedName().toString();
0213         
0214         if (tagName == "office:automatic-styles") {
0215             // FIXME: NYI
0216             reader.skipCurrentElement();
0217         }
0218         else if (tagName == "office:body") {
0219             readElementOfficeBody(reader);
0220         }
0221         ...  MORE else if () HERE
0222         else {
0223             reader.skipCurrentElement();
0224         }
0225     }
0226 
0227     m_backend->elementNamespaceTagname(reader, m_context);
0228     DEBUGEND();
0229 }
0230 #endif
0231 
0232 
0233 void OdfReader::readElementOfficeBody(KoXmlStreamReader &reader)
0234 {
0235     DEBUGSTART();
0236     m_backend->elementOfficeBody(reader, m_context);
0237 
0238     // <office:body> has the following children in ODF 1.2:
0239     //          <office:chart> 3.8,
0240     //          <office:database> 12.1
0241     //          <office:drawing> 3.5
0242     //          <office:image> 3.9
0243     //   [done] <office:presentation> 3.6
0244     //   [done] <office:spreadsheet> 3.7
0245     //   [done] <office:text> 3.4
0246     //
0247     // Of those only <office:text> is present in a text document (odf).
0248     while (reader.readNextStartElement()) {
0249         QString tagName = reader.qualifiedName().toString();
0250         
0251         if (tagName == "office:text") {
0252             readElementOfficeText(reader);
0253         }
0254         else if (tagName == "office:spreadsheet") {
0255             readElementOfficeSpreadsheet(reader);
0256         }
0257         else if (tagName == "office:presentation") {
0258             readElementOfficePresentation(reader);
0259         }
0260         else {
0261             reader.skipCurrentElement();
0262         }
0263     }
0264 
0265     m_backend->elementOfficeBody(reader, m_context);
0266     DEBUGEND();
0267 }
0268 
0269 
0270 // ----------------------------------------------------------------
0271 //
0272 // The following functions are just NULL versions of the actual functions. All
0273 // of these are virtual and the real functionality should be implemented in a
0274 // subclass.
0275 //
0276 
0277 void OdfReader::readElementOfficeText(KoXmlStreamReader &reader)
0278 {
0279     DEBUGSTART();
0280 
0281     errorOdfReader << "Unimplemented function";
0282     reader.skipCurrentElement();  
0283 
0284     DEBUGEND();
0285 }
0286 
0287 void OdfReader::readElementOfficeSpreadsheet(KoXmlStreamReader &reader)
0288 {
0289     DEBUGSTART();
0290 
0291     errorOdfReader << "Unimplemented function";
0292     reader.skipCurrentElement();  
0293 
0294     DEBUGEND();
0295 }
0296 
0297 void OdfReader::readElementOfficePresentation(KoXmlStreamReader &reader)
0298 {
0299     DEBUGSTART();
0300 
0301     errorOdfReader << "Unimplemented function";
0302     reader.skipCurrentElement();  
0303 
0304     DEBUGEND();
0305 }
0306 
0307 
0308 // ----------------------------------------------------------------
0309 //                             Other functions
0310 
0311 
0312 void OdfReader::readUnknownElement(KoXmlStreamReader &reader)
0313 {
0314     DEBUGSTART();
0315 
0316 #if 1
0317     // FIXME: We need to handle this.
0318     reader.skipCurrentElement();
0319 #else
0320     if (m_context->isInsideParagraph()) {
0321         // readParagraphContents expect to have the reader point to the
0322         // contents of the paragraph so we have to read past the text:p
0323         // start tag here.
0324         reader.readNext();
0325         readParagraphContents(reader);
0326     }
0327     else {
0328         while (reader.readNextStartElement()) {
0329             readTextLevelElement(reader);
0330         }
0331     }
0332 #endif
0333 
0334     DEBUGEND();
0335 }
0336 
0337 
0338 // FIXME: Remove this function when it is exported from libs/odf/KoXmlStreamReader.cpp
0339 //
0340 static void prepareForOdfInternal(KoXmlStreamReader &reader)
0341 {
0342     // This list of namespaces is taken from KoXmlNs.cpp
0343     // Maybe not all of them are expected in an ODF document?
0344     reader.addExpectedNamespace("office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0");
0345     reader.addExpectedNamespace("meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0");
0346     reader.addExpectedNamespace("config", "urn:oasis:names:tc:opendocument:xmlns:config:1.0");
0347     reader.addExpectedNamespace("text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0");
0348     reader.addExpectedNamespace("table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0");
0349     reader.addExpectedNamespace("draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0");
0350     reader.addExpectedNamespace("presentation", "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0");
0351     reader.addExpectedNamespace("dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0");
0352     reader.addExpectedNamespace("chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0");
0353     reader.addExpectedNamespace("form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0");
0354     reader.addExpectedNamespace("script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0");
0355     reader.addExpectedNamespace("style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0");
0356     reader.addExpectedNamespace("number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0");
0357     reader.addExpectedNamespace("manifest", "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0");
0358     reader.addExpectedNamespace("anim", "urn:oasis:names:tc:opendocument:xmlns:animation:1.0");
0359 
0360     reader.addExpectedNamespace("math", "http://www.w3.org/1998/Math/MathML");
0361     reader.addExpectedNamespace("svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
0362     reader.addExpectedNamespace("fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
0363     reader.addExpectedNamespace("dc", "http://purl.org/dc/elements/1.1/");
0364     reader.addExpectedNamespace("xlink", "http://www.w3.org/1999/xlink");
0365     reader.addExpectedNamespace("VL", "http://openoffice.org/2001/versions-list");
0366     reader.addExpectedNamespace("smil", "urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
0367     reader.addExpectedNamespace("xhtml", "http://www.w3.org/1999/xhtml");
0368     reader.addExpectedNamespace("xml", "http://www.w3.org/XML/1998/namespace");
0369 
0370     reader.addExpectedNamespace("calligra", "http://www.calligra.org/2005/");
0371     reader.addExpectedNamespace("officeooo", "http://openoffice.org/2009/office");
0372     reader.addExpectedNamespace("ooo", "http://openoffice.org/2004/office");
0373 
0374     reader.addExpectedNamespace("delta", "http://www.deltaxml.com/ns/track-changes/delta-namespace");
0375     reader.addExpectedNamespace("split", "http://www.deltaxml.com/ns/track-changes/split-namespace");
0376     reader.addExpectedNamespace("ac", "http://www.deltaxml.com/ns/track-changes/attribute-change-namespace");
0377 
0378     // This list of namespaces is taken from KoXmlReader::fixNamespace()
0379     // They were generated by old versions of OpenOffice.org.
0380     reader.addExtraNamespace("office",    "http://openoffice.org/2000/office");
0381     reader.addExtraNamespace("text",      "http://openoffice.org/2000/text");
0382     reader.addExtraNamespace("style",     "http://openoffice.org/2000/style");
0383     reader.addExtraNamespace("fo",        "http://www.w3.org/1999/XSL/Format");
0384     reader.addExtraNamespace("table",     "http://openoffice.org/2000/table");
0385     reader.addExtraNamespace("drawing",   "http://openoffice.org/2000/drawing");
0386     reader.addExtraNamespace("datastyle", "http://openoffice.org/2000/datastyle");
0387     reader.addExtraNamespace("svg",       "http://www.w3.org/2000/svg");
0388     reader.addExtraNamespace("chart",     "http://openoffice.org/2000/chart");
0389     reader.addExtraNamespace("dr3d",      "http://openoffice.org/2000/dr3d");
0390     reader.addExtraNamespace("form",      "http://openoffice.org/2000/form");
0391     reader.addExtraNamespace("script",    "http://openoffice.org/2000/script");
0392     reader.addExtraNamespace("meta",      "http://openoffice.org/2000/meta");
0393     reader.addExtraNamespace("config",    "http://openoffice.org/2001/config");
0394     reader.addExtraNamespace("pres",      "http://openoffice.org/2000/presentation");
0395     reader.addExtraNamespace("manifest",  "http://openoffice.org/2001/manifest");
0396 }