File indexing completed on 2025-01-19 13:27:35

0001 /*
0002  * This file is part of Office 2007 Filters for Calligra
0003  *
0004  * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
0005  *
0006  * Contact: Suresh Chande suresh.chande@nokia.com
0007  *
0008  * This library is free software; you can redistribute it and/or
0009  * modify it under the terms of the GNU Lesser General Public License
0010  * version 2.1 as published by the Free Software Foundation.
0011  *
0012  * This library is distributed in the hope that it will be useful, but
0013  * WITHOUT ANY WARRANTY; without even the implied warranty of
0014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0015  * Lesser General Public License for more details.
0016  *
0017  * You should have received a copy of the GNU Lesser General Public
0018  * License along with this library; if not, write to the Free Software
0019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
0020  * 02110-1301 USA
0021  *
0022  */
0023 
0024 // Own
0025 #include "XlsxUtils.h"
0026 #include "XlsxXmlDocumentReader.h"
0027 
0028 #include <KoBorder.h>  // needed by DrawingMLMethodso
0029 
0030 #include "XlsxXmlWorksheetReader.h"
0031 #include "XlsxXmlCommentsReader.h"
0032 #include "XlsxImport.h"
0033 #include <MsooXmlSchemas.h>
0034 #include <MsooXmlUtils.h>
0035 #include <MsooXmlRelationships.h>
0036 #include <KoXmlWriter.h>
0037 #include <KoFontFace.h>
0038 #include <VmlDrawingReader.h>
0039 
0040 #undef MSOOXML_CURRENT_NS
0041 #define MSOOXML_CURRENT_CLASS XlsxXmlDocumentReader
0042 #define BIND_READ_CLASS MSOOXML_CURRENT_CLASS
0043 
0044 #include <MsooXmlReader_p.h>
0045 
0046 XlsxXmlDocumentReaderContext::XlsxXmlDocumentReaderContext(
0047     XlsxImport& _import,
0048     MSOOXML::DrawingMLTheme* _themes,
0049     const QVector<QString>& _sharedStrings,
0050     const XlsxComments& _comments,
0051     const XlsxStyles& _styles,
0052     MSOOXML::MsooXmlRelationships& _relationships,
0053     const QString &_file,
0054     const QString &_path
0055     )
0056         : MSOOXML::MsooXmlReaderContext(&_relationships)
0057         , import(&_import)
0058         , themes(_themes)
0059         , sharedStrings(&_sharedStrings)
0060         , comments(&_comments)
0061         , styles(&_styles)
0062         , file(_file)
0063         , path(_path)
0064 {
0065 }
0066 
0067 class XlsxXmlDocumentReader::Private
0068 {
0069 public:
0070     Private()
0071             : worksheetNumber(0) {
0072     }
0073     ~Private() {
0074     }
0075     uint worksheetNumber;
0076 private:
0077 };
0078 
0079 XlsxXmlDocumentReader::XlsxXmlDocumentReader(KoOdfWriters *writers)
0080         : MSOOXML::MsooXmlReader(writers)
0081         , m_context(0)
0082         , d(new Private)
0083 {
0084     init();
0085 }
0086 
0087 XlsxXmlDocumentReader::~XlsxXmlDocumentReader()
0088 {
0089     delete d;
0090 }
0091 
0092 void XlsxXmlDocumentReader::init()
0093 {
0094     m_defaultNamespace = "";
0095 }
0096 
0097 KoFilter::ConversionStatus XlsxXmlDocumentReader::read(MSOOXML::MsooXmlReaderContext* context)
0098 {
0099     m_context = dynamic_cast<XlsxXmlDocumentReaderContext*>(context);
0100     Q_ASSERT(m_context);
0101     const KoFilter::ConversionStatus result = readInternal();
0102     m_context = 0;
0103     if (result == KoFilter::OK)
0104         return KoFilter::OK;
0105     return result;
0106 }
0107 
0108 KoFilter::ConversionStatus XlsxXmlDocumentReader::readInternal()
0109 {
0110     qCDebug(lcXlsxImport) << "=============================";
0111     readNext();
0112     if (!isStartDocument()) {
0113         return KoFilter::WrongFormat;
0114     }
0115 
0116     // workbook
0117     readNext();
0118     qCDebug(lcXlsxImport) << *this << namespaceUri();
0119 
0120     if (!expectEl("workbook")) {
0121         return KoFilter::WrongFormat;
0122     }
0123     if (!expectNS(MSOOXML::Schemas::spreadsheetml)) {
0124         return KoFilter::WrongFormat;
0125     }
0126     /*
0127         const QXmlStreamAttributes attrs( attributes() );
0128         for (int i=0; i<attrs.count(); i++) {
0129             qCDebug(lcXlsxImport) << "1 NS prefix:" << attrs[i].name() << "uri:" << attrs[i].namespaceUri();
0130         }*/
0131 
0132     QXmlStreamNamespaceDeclarations namespaces(namespaceDeclarations());
0133     for (int i = 0; i < namespaces.count(); i++) {
0134         qCDebug(lcXlsxImport) << "NS prefix:" << namespaces[i].prefix() << "uri:" << namespaces[i].namespaceUri();
0135     }
0136 //! @todo find out whether the namespace returned by namespaceUri()
0137 //!       is exactly the same ref as the element of namespaceDeclarations()
0138     if (!namespaces.contains(QXmlStreamNamespaceDeclaration(QString(), MSOOXML::Schemas::spreadsheetml))) {
0139         raiseError(i18n("Namespace \"%1\" not found", QLatin1String(MSOOXML::Schemas::spreadsheetml)));
0140         return KoFilter::WrongFormat;
0141     }
0142 //! @todo expect other namespaces too...
0143 
0144     TRY_READ(workbook)
0145 
0146 //! @todo hardcoded font face list; look at fonts used by theme
0147     mainStyles->insertFontFace(KoFontFace("Calibri"));
0148     mainStyles->insertFontFace(KoFontFace("Arial"));
0149     mainStyles->insertFontFace(KoFontFace("Tahoma"));
0150 
0151     qCDebug(lcXlsxImport) << "===========finished============";
0152     return KoFilter::OK;
0153 }
0154 
0155 #undef CURRENT_EL
0156 #define CURRENT_EL workbook
0157 //! workbook handler (Workbook)
0158 /*! ECMA-376, 18.2.27, p. 1746.
0159  Root element.
0160 
0161  Child elements:
0162  - bookViews (Workbook Views) §18.2.1
0163  - calcPr (Calculation Properties) §18.2.2
0164  - customWorkbookViews (Custom Workbook Views) §18.2.4
0165  - definedNames (Defined Names) §18.2.6
0166  - externalReferences (External References) §18.2.9
0167  - extLst (Future Feature Data Storage Area) §18.2.10
0168  - fileRecoveryPr (File Recovery Properties) §18.2.11
0169  - fileSharing (File Sharing) §18.2.12
0170  - fileVersion (File Version) §18.2.13
0171  - functionGroups (Function Groups) §18.2.15
0172  - oleSize (Embedded Object Size) §18.2.16
0173  - pivotCaches (PivotCaches) §18.2.18
0174  - [done] sheets (Sheets) §18.2.20
0175  - smartTagPr (Smart Tag Properties) §18.2.21
0176  - smartTagTypes (Smart Tag Types) §18.2.23
0177  - webPublishing (Web Publishing Properties) §18.2.24
0178  - webPublishObjects (Web Publish Objects) §18.2.26
0179  - workbookPr (Workbook Properties) §18.2.28
0180  - workbookProtection (Workbook Protection) §18.2.29
0181 
0182  @todo support all child elements
0183 */
0184 KoFilter::ConversionStatus XlsxXmlDocumentReader::read_workbook()
0185 {
0186     READ_PROLOGUE
0187 
0188     QXmlStreamNamespaceDeclarations namespaces = namespaceDeclarations();
0189     for (int i = 0; i < namespaces.count(); i++) {
0190         qCDebug(lcXlsxImport) << "NS prefix:" << namespaces[i].prefix() << "uri:" << namespaces[i].namespaceUri();
0191     }
0192 
0193     while (!atEnd()) {
0194         readNext();
0195         qCDebug(lcXlsxImport) << *this;
0196         BREAK_IF_END_OF(CURRENT_EL)
0197         if (isStartElement()) {
0198             TRY_READ_IF(sheets)
0199             SKIP_UNKNOWN
0200 //! @todo add ELSE_WRONG_FORMAT
0201         }
0202     }
0203 
0204     READ_EPILOGUE
0205 }
0206 
0207 #undef CURRENT_EL
0208 #define CURRENT_EL sheets
0209 //! sheets handler (Sheets)
0210 /*! ECMA-376, 18.2.20, p. 1740.
0211  This element represents the collection of sheets in the workbook.
0212 
0213  Parent elements:
0214  - [done] workbook (§18.2.27)
0215 
0216  Child elements:
0217  - [done] sheet (Sheet Information) §18.2.19
0218 */
0219 KoFilter::ConversionStatus XlsxXmlDocumentReader::read_sheets()
0220 {
0221     READ_PROLOGUE
0222 
0223     unsigned numberOfWorkSheets = m_context->relationships->targetCountWithWord("worksheets") +
0224         m_context->relationships->targetCountWithWord("dialogsheets") +
0225         m_context->relationships->targetCountWithWord("chartsheets");
0226     unsigned worksheet = 1;
0227 
0228     while (!atEnd()) {
0229         readNext();
0230         qCDebug(lcXlsxImport) << *this;
0231         BREAK_IF_END_OF(CURRENT_EL)
0232         if (isStartElement()) {
0233             if (name() == "sheet") {
0234                 TRY_READ(sheet)
0235                 ++worksheet;
0236                 m_context->import->reportProgress(45 + (55/numberOfWorkSheets) * worksheet);
0237             }
0238             ELSE_WRONG_FORMAT
0239         }
0240     }
0241 
0242     if (!m_context->autoFilters.isEmpty()) {
0243         body->startElement("table:database-ranges");
0244         int index = 0;
0245         while (index < m_context->autoFilters.size()) {
0246             body->startElement("table:database-range");
0247             body->addAttribute("table:target-range-address", m_context->autoFilters.at(index).area);
0248             body->addAttribute("table:display-filter-buttons", "true");
0249             body->addAttribute("table:name", QString("excel-database-%1").arg(index));
0250             QString type = m_context->autoFilters.at(index).type;
0251             int filterConditionSize = m_context->autoFilters.at(index).filterConditions.size();
0252             if (filterConditionSize > 0) {
0253                 if (type == "and") {
0254                     body->startElement("table:filter-and");
0255                 }
0256                 else if (type == "or") {
0257                     body->startElement("table:filter-or");
0258                 }
0259                 else {
0260                     body->startElement("table:filter");
0261                 }
0262                 int conditionIndex = 0;
0263                 while (conditionIndex < filterConditionSize) {
0264                     body->startElement("table:filter-condition");
0265                     body->addAttribute("table:field-number", m_context->autoFilters.at(index).filterConditions.at(conditionIndex).field);
0266                     body->addAttribute("table:value", m_context->autoFilters.at(index).filterConditions.at(conditionIndex).value);
0267                     body->addAttribute("table:operator", m_context->autoFilters.at(index).filterConditions.at(conditionIndex).opField);
0268                     body->endElement(); // table:filter-condition
0269                     ++conditionIndex;
0270                 }
0271                 body->endElement(); // table:filter | table:filter-or | table:filter-and
0272             }
0273             body->endElement(); // table:database-range
0274             ++index;
0275         }
0276 
0277         body->endElement(); // table:database-ranges
0278     }
0279 
0280 
0281     READ_EPILOGUE
0282 }
0283 
0284 #undef CURRENT_EL
0285 #define CURRENT_EL sheet
0286 //! sheet handler (Sheet Information)
0287 /*! ECMA-376, 18.2.19, p. 1740.
0288  This element defines a sheet in this workbook. Sheet data is stored in a separate part.
0289 
0290  Parent elements:
0291  - [done] sheets (§18.2.20)
0292 
0293  No child elements.
0294 */
0295 KoFilter::ConversionStatus XlsxXmlDocumentReader::read_sheet()
0296 {
0297     READ_PROLOGUE
0298 
0299     const QXmlStreamAttributes attrs(attributes());
0300     READ_ATTR_WITH_NS(r, id)
0301     READ_ATTR_WITHOUT_NS(sheetId)
0302     READ_ATTR_WITHOUT_NS(name)
0303     TRY_READ_ATTR_WITHOUT_NS(state)
0304     qCDebug(lcXlsxImport) << "r:id:" << r_id << "sheetId:" << sheetId << "name:" << name << "state:" << state;
0305 
0306     unsigned numberOfWorkSheets = m_context->relationships->targetCountWithWord("worksheets") +
0307         m_context->relationships->targetCountWithWord("dialogsheets") +
0308         m_context->relationships->targetCountWithWord("chartsheets");
0309     d->worksheetNumber++; // counted from 1
0310     QString path, file;
0311     QString filepath = m_context->relationships->target(m_context->path, m_context->file, r_id);
0312     MSOOXML::Utils::splitPathAndFile(filepath, &path, &file);
0313     qCDebug(lcXlsxImport) << "path:" << path << "file:" << file;
0314 
0315     // Loading potential ole replacements
0316     VmlDrawingReader vmlreader(this);
0317     QString vmlTarget = m_context->relationships->targetForType(path, file,
0318         "http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing");
0319 
0320     if (!vmlTarget.isEmpty()) {
0321         QString errorMessage, vmlPath, vmlFile;
0322 
0323         MSOOXML::Utils::splitPathAndFile(vmlTarget, &vmlPath, &vmlFile);
0324 
0325         VmlDrawingReaderContext vmlContext(*m_context->import,
0326             vmlPath, vmlFile, *m_context->relationships);
0327 
0328         const KoFilter::ConversionStatus status =
0329             m_context->import->loadAndParseDocument(&vmlreader, vmlTarget, errorMessage, &vmlContext);
0330         if (status != KoFilter::OK) {
0331             vmlreader.raiseError(errorMessage);
0332         }
0333     }
0334 
0335     XlsxXmlWorksheetReader worksheetReader(this);
0336     XlsxXmlWorksheetReaderContext context(d->worksheetNumber, numberOfWorkSheets, name, state, path, file,
0337                                           m_context->themes, *m_context->sharedStrings,
0338                                           *m_context->comments,
0339                                           *m_context->styles,
0340                                           *m_context->relationships, m_context->import,
0341                                           vmlreader.content(),
0342                                           vmlreader.frames(),
0343                                           m_context->autoFilters);
0344     // Due to some information being available only in the later part of the document, we have to read twice
0345     // In the first round we get the later information and in 2nd round we read the rest and use the information
0346     context.firstRoundOfReading = true;
0347     KoFilter::ConversionStatus status = m_context->import->loadAndParseDocument(&worksheetReader, filepath, &context);
0348     if (status != KoFilter::OK) {
0349         raiseError(worksheetReader.errorString());
0350         return status;
0351     }
0352     context.firstRoundOfReading = false;
0353     status = m_context->import->loadAndParseDocument(&worksheetReader, filepath, &context);
0354     if (status != KoFilter::OK) {
0355         raiseError(worksheetReader.errorString());
0356         return status;
0357     }
0358 
0359     readNext();
0360     READ_EPILOGUE
0361 }