Warning, file /office/calligra/filters/sheets/html/htmlimport.cc was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* This file is part of the KDE project 0002 SPDX-FileCopyrightText: 2001 Eva Brucherseifer <eva@kde.org> 0003 SPDX-FileCopyrightText: 2005 Bram Schoenmakers <bramschoenmakers@kde.nl> 0004 based on kspread csv export filter by David Faure 0005 0006 SPDX-License-Identifier: LGPL-2.0-or-later 0007 */ 0008 0009 #include "htmlimport.h" 0010 0011 #include "HtmlImportDebug.h" 0012 //#include <exportdialog.h> 0013 0014 #include <QFile> 0015 #include <QFileInfo> 0016 #include <QTextCodec> 0017 #include <QTextStream> 0018 #include <QByteArray> 0019 #include <QEventLoop> 0020 #include <kpluginfactory.h> 0021 #include <KoFilterChain.h> 0022 #include <KoXmlWriter.h> 0023 #include <KoOdfWriteStore.h> 0024 #include <KoGenStyles.h> 0025 #include <KoGenStyle.h> 0026 0027 #include <QDomText> 0028 #include <QDomDocument> 0029 #include <QDomElement> 0030 #include <QString> 0031 0032 //using namespace Calligra::Sheets; 0033 0034 K_PLUGIN_FACTORY_WITH_JSON(HTMLImportFactory, "calligra_filter_html2ods.json", 0035 registerPlugin<HTMLImport>();) 0036 0037 HTMLImport::HTMLImport(QObject* parent, const QVariantList&) 0038 : KoFilter(parent) 0039 { 0040 } 0041 0042 HTMLImport::~HTMLImport() 0043 { 0044 } 0045 0046 KoFilter::ConversionStatus HTMLImport::convert(const QByteArray& from, const QByteArray& to) 0047 { 0048 if (to != "application/vnd.oasis.opendocument.spreadsheet" || from != "text/html") { 0049 warnHtml << "Invalid mimetypes " << to << " " << from; 0050 return KoFilter::NotImplemented; 0051 } 0052 0053 QString inputFile = m_chain->inputFile(); 0054 QString outputFile = m_chain->outputFile(); 0055 debugHtml<<"inputFile="<<inputFile<<"outputFile="<<outputFile; 0056 0057 // check if the inout file exists 0058 m_inputDir = QFileInfo(m_chain->inputFile()).dir(); 0059 if(!m_inputDir.exists()) 0060 return KoFilter::StupidError; 0061 0062 // create output store 0063 KoStore* storeout = KoStore::createStore(outputFile, KoStore::Write, "application/vnd.oasis.opendocument.spreadsheet", KoStore::Zip); 0064 if (!storeout) 0065 return KoFilter::FileNotFound; 0066 0067 KoOdfWriteStore oasisStore(storeout); 0068 m_manifestWriter = oasisStore.manifestWriter("application/vnd.oasis.opendocument.spreadsheet"); 0069 m_store = &oasisStore; 0070 0071 m_mainStyles = new KoGenStyles(); 0072 0073 KoXmlWriter* bodyWriter = m_store->bodyWriter(); 0074 m_store->contentWriter(); // we need to create the instance even if the contentWriter is not used 0075 0076 bodyWriter->startElement("office:body"); 0077 KoFilter::ConversionStatus result = loadUrl(QUrl::fromLocalFile(m_chain->inputFile())); 0078 if(result != KoFilter::OK) 0079 warnHtml << "Failed to load url=" << m_chain->inputFile(); 0080 bodyWriter->endElement(); // office:body 0081 0082 if(m_store->closeContentWriter()) 0083 m_manifestWriter->addManifestEntry("content.xml", "text/xml"); 0084 0085 if(createStyle()) 0086 m_manifestWriter->addManifestEntry("styles.xml", "text/xml"); 0087 0088 if(createMeta()) 0089 m_manifestWriter->addManifestEntry("meta.xml", "text/xml"); 0090 0091 m_store->closeManifestWriter(); 0092 delete storeout; 0093 m_manifestWriter = 0; 0094 m_store = 0; 0095 return result; 0096 } 0097 0098 bool HTMLImport::createStyle() 0099 { 0100 if (!m_store->store()->open("styles.xml")) 0101 return false; 0102 KoStoreDevice dev(m_store->store()); 0103 KoXmlWriter* stylesWriter = new KoXmlWriter(&dev); 0104 0105 stylesWriter->startDocument("office:document-styles"); 0106 stylesWriter->startElement("office:document-styles"); 0107 stylesWriter->addAttribute("xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0"); 0108 stylesWriter->addAttribute("xmlns:style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0"); 0109 stylesWriter->addAttribute("xmlns:text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0"); 0110 stylesWriter->addAttribute("xmlns:table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0"); 0111 stylesWriter->addAttribute("xmlns:draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"); 0112 stylesWriter->addAttribute("xmlns:fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"); 0113 stylesWriter->addAttribute("xmlns:svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"); 0114 stylesWriter->addAttribute("office:version", "1.0"); 0115 0116 m_mainStyles->saveOdfStyles(KoGenStyles::MasterStyles, stylesWriter); 0117 m_mainStyles->saveOdfStyles(KoGenStyles::DocumentStyles, stylesWriter); // office:style 0118 m_mainStyles->saveOdfStyles(KoGenStyles::DocumentAutomaticStyles, stylesWriter); // office:automatic-styles 0119 0120 stylesWriter->endElement(); // office:document-styles 0121 stylesWriter->endDocument(); 0122 0123 delete stylesWriter; 0124 return m_store->store()->close(); 0125 } 0126 0127 bool HTMLImport::createMeta() 0128 { 0129 if (!m_store->store()->open("meta.xml")) 0130 return false; 0131 0132 KoStoreDevice dev(m_store->store()); 0133 KoXmlWriter* metaWriter = new KoXmlWriter(&dev); 0134 metaWriter->startDocument("office:document-meta"); 0135 metaWriter->startElement("office:document-meta"); 0136 metaWriter->addAttribute("xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0"); 0137 metaWriter->addAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink"); 0138 metaWriter->addAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/"); 0139 metaWriter->addAttribute("xmlns:meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"); 0140 metaWriter->startElement("office:meta"); 0141 0142 //metaWriter->startElement("dc:title"); 0143 //metaWriter->addTextNode(workbook->property(Workbook::PIDSI_TITLE).toString()); 0144 //metaWriter->endElement(); 0145 0146 //metaWriter->startElement("dc:subject", false); 0147 //metaWriter->addTextNode(workbook->property(Workbook::PIDSI_SUBJECT).toString()); 0148 //metaWriter->endElement(); 0149 0150 metaWriter->endElement(); // office:meta 0151 metaWriter->endElement(); // office:document-meta 0152 metaWriter->endDocument(); 0153 0154 delete metaWriter; 0155 return m_store->store()->close(); 0156 } 0157 0158 KoFilter::ConversionStatus HTMLImport::loadUrl(const QUrl &url) 0159 { 0160 debugHtml << url; 0161 0162 KoXmlWriter* bodyWriter = m_store->bodyWriter(); 0163 //KoXmlWriter* contentWriter = m_store->contentWriter(); 0164 0165 QStringList sheets; 0166 { 0167 QDomDocument doc("mydocument"); 0168 QFile file(url.toLocalFile()); 0169 if (!file.open(QIODevice::ReadOnly)) 0170 return KoFilter::ConversionStatus::StorageCreationError; 0171 if (!doc.setContent(&file)) { 0172 file.close(); 0173 return KoFilter::ConversionStatus::FileNotFound; 0174 } 0175 file.close(); 0176 QDomNodeList body = doc.elementsByTagName("body"); 0177 QDomNode docbody = body.item(0); 0178 0179 if (!docbody.isNull()) { 0180 m_states.push(InBody); 0181 bodyWriter->startElement("office:spreadsheet"); 0182 parseNode(docbody); 0183 bodyWriter->endElement(); // office:spreadsheet 0184 m_states.pop(); 0185 } 0186 0187 // frames 0188 QDomNodeList frameset = doc.elementsByTagName("frameset"); 0189 QDomNode frame = frameset.item(0); 0190 0191 if (!frame.isNull()) { 0192 for(int i = 0; i < frameset.length(); ++i) { 0193 for (QDomNode n = frameset.item(i).firstChild(); !n.isNull(); n = n.nextSibling()) { 0194 QDomElement f = n.toElement(); 0195 if(!f.isNull() && f.nodeName().toLower() == "frame" && f.attribute("name") == "frSheet") 0196 sheets.append(f.attribute("src")); 0197 } 0198 } 0199 } 0200 } 0201 0202 // the QDOMDocument is no more and we can call us recursively now. 0203 if(!sheets.isEmpty()) { 0204 m_states.push(InFrameset); 0205 foreach(const QString &src, sheets) { 0206 const QUrl u = QUrl::fromLocalFile(QFileInfo(m_inputDir, src).absoluteFilePath()); 0207 loadUrl(u); 0208 } 0209 m_states.pop(); 0210 } 0211 0212 return KoFilter::OK; 0213 } 0214 0215 void HTMLImport::parseNode(QDomNode node) 0216 { 0217 KoXmlWriter* bodyWriter = m_store->bodyWriter(); 0218 //KoXmlWriter* contentWriter = m_store->contentWriter(); 0219 0220 // check if this is a text node. 0221 if (node.isText()) { 0222 QDomText t = node.toText(); 0223 if(!m_states.isEmpty() && m_states.top() == InCell) { 0224 const QString s = t.data().trimmed(); 0225 if(!s.isEmpty()) { 0226 //debugHtml<<"TEXT tagname=" << node.nodeName() << "TEXT="<<t.data().string(); 0227 bodyWriter->addAttribute("office:value-type", "string"); 0228 bodyWriter->addAttribute("office:string-value", s); 0229 } 0230 } 0231 return; // no children anymore... 0232 } 0233 0234 QString tag = node.nodeName().toLower(); 0235 0236 if(tag == "table") { 0237 m_states.push(InTable); 0238 bodyWriter->startElement("table:table"); 0239 0240 // hack to get some name defined 0241 static int sheetCount = 0; 0242 bodyWriter->addAttribute("table:name", QString("Sheet %1").arg(++sheetCount)); 0243 } 0244 else if(tag == "tr") { 0245 m_states.push(InRow); 0246 bodyWriter->startElement("table:table-row"); 0247 //xmlWriter->addAttribute("table:number-columns-spanned", ); 0248 //xmlWriter->addAttribute("table:number-rows-spanned", ); 0249 } 0250 else if(tag == "td") { 0251 m_states.push(InCell); 0252 bodyWriter->startElement("table:table-cell"); 0253 } else { 0254 m_states.push(InNone); 0255 } 0256 0257 //debugHtml<<"...START nodeName="<<node.nodeName(); 0258 0259 QDomElement e = node.toElement(); 0260 bool go_recursive = true; 0261 if (!e.isNull()) { 0262 //parseStyle(e); // get the CSS information 0263 go_recursive = parseTag(e); // get the tag information 0264 } 0265 if (go_recursive) { 0266 for (QDomNode n = node.firstChild(); !n.isNull(); n = n.nextSibling()) { 0267 parseNode(n); 0268 } 0269 } 0270 0271 State state = m_states.pop(); 0272 if(state == InTable || state == InRow || state == InCell) { 0273 bodyWriter->endElement(); 0274 } 0275 0276 //debugHtml<<"...END nodeName="<<node.nodeName(); 0277 } 0278 0279 bool HTMLImport::parseTag(QDomElement element) 0280 { 0281 QString tag = element.tagName().toLower(); 0282 0283 // Don't handle the content of comment- or script-nodes. 0284 return !(element.nodeType() == QDomNode::NodeType::CommentNode || tag == "script"); 0285 } 0286 0287 #include <htmlimport.moc>