Warning, file /office/calligra/filters/sheets/html/htmlimport.cc was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /* This file is part of the KDE project
0002    SPDX-FileCopyrightText: 2001 Eva Brucherseifer <eva@kde.org>
0003    SPDX-FileCopyrightText: 2005 Bram Schoenmakers <bramschoenmakers@kde.nl>
0004    based on kspread csv export filter by David Faure
0005 
0006    SPDX-License-Identifier: LGPL-2.0-or-later
0007 */
0008 
0009 #include "htmlimport.h"
0010 
0011 #include "HtmlImportDebug.h"
0012 //#include <exportdialog.h>
0013 
0014 #include <QFile>
0015 #include <QFileInfo>
0016 #include <QTextCodec>
0017 #include <QTextStream>
0018 #include <QByteArray>
0019 #include <QEventLoop>
0020 #include <kpluginfactory.h>
0021 #include <KoFilterChain.h>
0022 #include <KoXmlWriter.h>
0023 #include <KoOdfWriteStore.h>
0024 #include <KoGenStyles.h>
0025 #include <KoGenStyle.h>
0026 
0027 #include <QDomText>
0028 #include <QDomDocument>
0029 #include <QDomElement>
0030 #include <QString>
0031 
0032 //using namespace Calligra::Sheets;
0033 
0034 K_PLUGIN_FACTORY_WITH_JSON(HTMLImportFactory, "calligra_filter_html2ods.json",
0035                            registerPlugin<HTMLImport>();)
0036 
0037 HTMLImport::HTMLImport(QObject* parent, const QVariantList&)
0038     : KoFilter(parent)
0039 {
0040 }
0041 
0042 HTMLImport::~HTMLImport()
0043 {
0044 }
0045 
0046 KoFilter::ConversionStatus HTMLImport::convert(const QByteArray& from, const QByteArray& to)
0047 {
0048     if (to != "application/vnd.oasis.opendocument.spreadsheet" || from != "text/html") {
0049         warnHtml << "Invalid mimetypes " << to << " " << from;
0050         return KoFilter::NotImplemented;
0051     }
0052 
0053     QString inputFile = m_chain->inputFile();
0054     QString outputFile = m_chain->outputFile();
0055     debugHtml<<"inputFile="<<inputFile<<"outputFile="<<outputFile;
0056 
0057     // check if the inout file exists
0058     m_inputDir = QFileInfo(m_chain->inputFile()).dir();
0059     if(!m_inputDir.exists())
0060         return KoFilter::StupidError;
0061 
0062     // create output store
0063     KoStore* storeout = KoStore::createStore(outputFile, KoStore::Write, "application/vnd.oasis.opendocument.spreadsheet", KoStore::Zip);
0064     if (!storeout)
0065         return KoFilter::FileNotFound;
0066 
0067     KoOdfWriteStore oasisStore(storeout);
0068     m_manifestWriter = oasisStore.manifestWriter("application/vnd.oasis.opendocument.spreadsheet");
0069     m_store = &oasisStore;
0070 
0071     m_mainStyles = new KoGenStyles();
0072 
0073     KoXmlWriter* bodyWriter = m_store->bodyWriter();
0074     m_store->contentWriter(); // we need to create the instance even if the contentWriter is not used
0075 
0076     bodyWriter->startElement("office:body");
0077     KoFilter::ConversionStatus result = loadUrl(QUrl::fromLocalFile(m_chain->inputFile()));
0078     if(result != KoFilter::OK)
0079         warnHtml << "Failed to load url=" << m_chain->inputFile();
0080     bodyWriter->endElement(); // office:body
0081 
0082     if(m_store->closeContentWriter())
0083         m_manifestWriter->addManifestEntry("content.xml", "text/xml");
0084 
0085     if(createStyle())
0086         m_manifestWriter->addManifestEntry("styles.xml", "text/xml");
0087 
0088     if(createMeta())
0089         m_manifestWriter->addManifestEntry("meta.xml", "text/xml");
0090 
0091     m_store->closeManifestWriter();
0092     delete storeout;
0093     m_manifestWriter = 0;
0094     m_store = 0;
0095     return result;
0096 }
0097 
0098 bool HTMLImport::createStyle()
0099 {
0100     if (!m_store->store()->open("styles.xml"))
0101         return false;
0102     KoStoreDevice dev(m_store->store());
0103     KoXmlWriter* stylesWriter = new KoXmlWriter(&dev);
0104 
0105     stylesWriter->startDocument("office:document-styles");
0106     stylesWriter->startElement("office:document-styles");
0107     stylesWriter->addAttribute("xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0");
0108     stylesWriter->addAttribute("xmlns:style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0");
0109     stylesWriter->addAttribute("xmlns:text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0");
0110     stylesWriter->addAttribute("xmlns:table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0");
0111     stylesWriter->addAttribute("xmlns:draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0");
0112     stylesWriter->addAttribute("xmlns:fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
0113     stylesWriter->addAttribute("xmlns:svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
0114     stylesWriter->addAttribute("office:version", "1.0");
0115 
0116     m_mainStyles->saveOdfStyles(KoGenStyles::MasterStyles, stylesWriter);
0117     m_mainStyles->saveOdfStyles(KoGenStyles::DocumentStyles, stylesWriter); // office:style
0118     m_mainStyles->saveOdfStyles(KoGenStyles::DocumentAutomaticStyles, stylesWriter); // office:automatic-styles
0119 
0120     stylesWriter->endElement();  // office:document-styles
0121     stylesWriter->endDocument();
0122 
0123     delete stylesWriter;
0124     return m_store->store()->close();
0125 }
0126 
0127 bool HTMLImport::createMeta()
0128 {
0129     if (!m_store->store()->open("meta.xml"))
0130         return false;
0131 
0132     KoStoreDevice dev(m_store->store());
0133     KoXmlWriter* metaWriter = new KoXmlWriter(&dev);
0134     metaWriter->startDocument("office:document-meta");
0135     metaWriter->startElement("office:document-meta");
0136     metaWriter->addAttribute("xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0");
0137     metaWriter->addAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
0138     metaWriter->addAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
0139     metaWriter->addAttribute("xmlns:meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0");
0140     metaWriter->startElement("office:meta");
0141 
0142     //metaWriter->startElement("dc:title");
0143     //metaWriter->addTextNode(workbook->property(Workbook::PIDSI_TITLE).toString());
0144     //metaWriter->endElement();
0145 
0146     //metaWriter->startElement("dc:subject", false);
0147     //metaWriter->addTextNode(workbook->property(Workbook::PIDSI_SUBJECT).toString());
0148     //metaWriter->endElement();
0149 
0150     metaWriter->endElement(); // office:meta
0151     metaWriter->endElement(); // office:document-meta
0152     metaWriter->endDocument();
0153 
0154     delete metaWriter;
0155     return m_store->store()->close();
0156 }
0157 
0158 KoFilter::ConversionStatus HTMLImport::loadUrl(const QUrl &url)
0159 {
0160     debugHtml << url;
0161 
0162     KoXmlWriter* bodyWriter = m_store->bodyWriter();
0163     //KoXmlWriter* contentWriter = m_store->contentWriter();
0164 
0165     QStringList sheets;
0166     {
0167         QDomDocument doc("mydocument");
0168         QFile file(url.toLocalFile());
0169         if (!file.open(QIODevice::ReadOnly))
0170             return KoFilter::ConversionStatus::StorageCreationError;
0171         if (!doc.setContent(&file)) {
0172             file.close();
0173             return KoFilter::ConversionStatus::FileNotFound;
0174         }
0175         file.close();
0176         QDomNodeList body = doc.elementsByTagName("body");
0177         QDomNode docbody = body.item(0);
0178 
0179         if (!docbody.isNull()) {
0180             m_states.push(InBody);
0181             bodyWriter->startElement("office:spreadsheet");
0182             parseNode(docbody);
0183             bodyWriter->endElement(); // office:spreadsheet
0184             m_states.pop();
0185         }
0186 
0187         // frames
0188         QDomNodeList frameset = doc.elementsByTagName("frameset");
0189         QDomNode frame = frameset.item(0);
0190 
0191         if (!frame.isNull()) {
0192             for(int i = 0; i < frameset.length(); ++i) {
0193                 for (QDomNode n = frameset.item(i).firstChild(); !n.isNull(); n = n.nextSibling()) {
0194                     QDomElement f = n.toElement();
0195                     if(!f.isNull() && f.nodeName().toLower() == "frame" && f.attribute("name") == "frSheet")
0196                         sheets.append(f.attribute("src"));
0197                 }
0198             }
0199         }
0200     }
0201 
0202     // the  QDOMDocument is no more and we can call us recursively now.
0203     if(!sheets.isEmpty()) {
0204         m_states.push(InFrameset);
0205         foreach(const QString &src, sheets) {
0206             const QUrl u = QUrl::fromLocalFile(QFileInfo(m_inputDir, src).absoluteFilePath());
0207             loadUrl(u);
0208         }
0209         m_states.pop();
0210     }
0211 
0212     return KoFilter::OK;
0213 }
0214 
0215 void HTMLImport::parseNode(QDomNode node)
0216 {
0217     KoXmlWriter* bodyWriter = m_store->bodyWriter();
0218     //KoXmlWriter* contentWriter = m_store->contentWriter();
0219 
0220     // check if this is a text node.
0221     if (node.isText()) {
0222         QDomText t = node.toText();
0223         if(!m_states.isEmpty() && m_states.top() == InCell) {
0224             const QString s = t.data().trimmed();
0225             if(!s.isEmpty()) {
0226                 //debugHtml<<"TEXT tagname=" << node.nodeName() << "TEXT="<<t.data().string();
0227                 bodyWriter->addAttribute("office:value-type", "string");
0228                 bodyWriter->addAttribute("office:string-value", s);
0229             }
0230         }
0231         return; // no children anymore...
0232     }
0233 
0234     QString tag = node.nodeName().toLower();
0235 
0236     if(tag == "table") {
0237         m_states.push(InTable);
0238         bodyWriter->startElement("table:table");
0239 
0240         // hack to get some name defined
0241         static int sheetCount = 0;
0242         bodyWriter->addAttribute("table:name", QString("Sheet %1").arg(++sheetCount));
0243     }
0244     else if(tag == "tr") {
0245         m_states.push(InRow);
0246         bodyWriter->startElement("table:table-row");
0247         //xmlWriter->addAttribute("table:number-columns-spanned", );
0248         //xmlWriter->addAttribute("table:number-rows-spanned", );
0249     }
0250     else if(tag == "td") {
0251         m_states.push(InCell);
0252         bodyWriter->startElement("table:table-cell");
0253     } else {
0254         m_states.push(InNone);
0255     }
0256 
0257     //debugHtml<<"...START nodeName="<<node.nodeName();
0258 
0259     QDomElement e = node.toElement();
0260     bool go_recursive = true;
0261     if (!e.isNull()) {
0262         //parseStyle(e); // get the CSS information
0263         go_recursive = parseTag(e); // get the tag information
0264     }
0265     if (go_recursive) {
0266         for (QDomNode n = node.firstChild(); !n.isNull(); n = n.nextSibling()) {
0267             parseNode(n);
0268         }
0269     }
0270 
0271     State state = m_states.pop();
0272     if(state == InTable || state == InRow || state == InCell) {
0273         bodyWriter->endElement();
0274     }
0275 
0276     //debugHtml<<"...END nodeName="<<node.nodeName();
0277 }
0278 
0279 bool HTMLImport::parseTag(QDomElement element)
0280 {
0281     QString tag = element.tagName().toLower();
0282 
0283     // Don't handle the content of comment- or script-nodes.
0284     return !(element.nodeType() == QDomNode::NodeType::CommentNode || tag == "script");
0285 }
0286 
0287 #include <htmlimport.moc>