File indexing completed on 2024-06-16 03:42:48
0001 /* 0002 File : OdsFilter.cpp 0003 Project : LabPlot 0004 Description : Ods I/O-filter 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2023 Stefan Gerlach <stefan.gerlach@uni.kn> 0007 SPDX-License-Identifier: GPL-2.0-or-later 0008 */ 0009 0010 #include "backend/datasources/filters/OdsFilter.h" 0011 #include "backend/core/column/Column.h" 0012 #include "backend/datasources/AbstractDataSource.h" 0013 #include "backend/datasources/filters/OdsFilterPrivate.h" 0014 #include "backend/matrix/Matrix.h" 0015 #include "backend/spreadsheet/Spreadsheet.h" 0016 0017 #include <KLocalizedString> 0018 #include <QTreeWidgetItem> 0019 0020 #ifdef HAVE_ORCUS 0021 #include <orcus/orcus_ods.hpp> 0022 #include <orcus/spreadsheet/factory.hpp> 0023 #include <orcus/spreadsheet/sheet.hpp> 0024 0025 #include <ixion/model_context.hpp> 0026 0027 using namespace orcus; 0028 #endif 0029 0030 // TODO: 0031 // * export data when Orcus support is stable 0032 // * datetime support? 0033 OdsFilter::OdsFilter() 0034 : AbstractFileFilter(FileType::Ods) 0035 , d(new OdsFilterPrivate(this)) { 0036 } 0037 0038 OdsFilter::~OdsFilter() { 0039 } 0040 0041 QString OdsFilter::fileInfoString(const QString& fileName) { 0042 #ifdef HAVE_ORCUS 0043 spreadsheet::range_size_t ss{1048576, 16384}; 0044 spreadsheet::document doc{ss}; 0045 spreadsheet::import_factory factory{doc}; 0046 orcus_ods loader(&factory); 0047 0048 loader.read_file(fileName.toStdString()); 0049 0050 const size_t nrSheets = doc.get_sheet_count(); 0051 auto dt = doc.get_origin_date(); 0052 0053 QString info(i18n("Sheet count: %1", QString::number(nrSheets))); 0054 info += QStringLiteral("<br>"); 0055 0056 for (size_t i = 0; i < nrSheets; ++i) { 0057 auto name = doc.get_sheet_name(i); 0058 info += QString::fromStdString(std::string(name)); 0059 const auto* s = doc.get_sheet(i); 0060 auto r = s->get_data_range(); 0061 0062 info += QStringLiteral(" (") + QString::number(r.last.row - r.first.row + 1) + QStringLiteral(" x ") 0063 + QString::number(r.last.column - r.first.column + 1) + QStringLiteral(")"); 0064 if (i < nrSheets - 1) 0065 info += QStringLiteral(", "); 0066 } 0067 info += QStringLiteral("<br>"); 0068 0069 return info; 0070 #else 0071 Q_UNUSED(fileName) 0072 #endif 0073 return {}; 0074 } 0075 0076 void OdsFilter::readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) { 0077 d->readDataFromFile(fileName, dataSource, importMode); 0078 } 0079 void OdsFilter::write(const QString& fileName, AbstractDataSource* dataSource) { 0080 d->write(fileName, dataSource); 0081 } 0082 0083 /////////////////////////////////////////////////////////////////////// 0084 0085 void OdsFilter::setSelectedSheetNames(const QStringList& names) { 0086 d->currentSheetName = names.first(); 0087 d->selectedSheetNames = names; 0088 } 0089 const QStringList OdsFilter::selectedSheetNames() const { 0090 return d->selectedSheetNames; 0091 } 0092 0093 void OdsFilter::setFirstRowAsColumnNames(const bool b) { 0094 d->firstRowAsColumnNames = b; 0095 } 0096 0097 QVector<QStringList> OdsFilter::preview(const QString& sheetName, int lines) { 0098 return d->preview(sheetName, lines); 0099 } 0100 0101 void OdsFilter::parse(const QString& fileName, QTreeWidgetItem* root) { 0102 d->parse(fileName, root); 0103 } 0104 0105 /*! 0106 * \brief Sets the startColumn to \a column 0107 * \param column the column to be set 0108 */ 0109 void OdsFilter::setStartColumn(const int column) { 0110 d->startColumn = column; 0111 } 0112 0113 /*! 0114 * \brief Returns startColumn 0115 * \return The startColumn 0116 */ 0117 int OdsFilter::startColumn() const { 0118 return d->startColumn; 0119 } 0120 0121 /*! 0122 * \brief Sets the endColumn to \a column 0123 * \param column the column to be set 0124 */ 0125 void OdsFilter::setEndColumn(const int column) { 0126 d->endColumn = column; 0127 } 0128 0129 /*! 0130 * \brief Returns endColumn 0131 * \return The endColumn 0132 */ 0133 int OdsFilter::endColumn() const { 0134 return d->endColumn; 0135 } 0136 0137 /* actual start column (including range) */ 0138 int OdsFilter::firstColumn() const { 0139 return d->firstColumn; 0140 } 0141 0142 /*! 0143 * \brief Sets the startRow to \a row 0144 * \param row the row to be set 0145 */ 0146 void OdsFilter::setStartRow(const int row) { 0147 d->startRow = row; 0148 } 0149 0150 /*! 0151 * \brief Returns startRow 0152 * \return The startRow 0153 */ 0154 int OdsFilter::startRow() const { 0155 return d->startRow; 0156 } 0157 0158 /*! 0159 * \brief Sets the endRow to \a row 0160 * \param row the row to be set 0161 */ 0162 void OdsFilter::setEndRow(const int row) { 0163 d->endRow = row; 0164 } 0165 0166 /*! 0167 * \brief Returns endRow 0168 * \return The endRow 0169 */ 0170 int OdsFilter::endRow() const { 0171 return d->endRow; 0172 } 0173 0174 void OdsFilter::setCurrentSheetName(const QString& sheetName) { 0175 d->currentSheetName = sheetName; 0176 } 0177 0178 // ############################################################################## 0179 // ################## Serialization/Deserialization ########################### 0180 // ############################################################################## 0181 0182 /*! 0183 Saves as XML. 0184 */ 0185 0186 void OdsFilter::save(QXmlStreamWriter*) const { 0187 } 0188 0189 bool OdsFilter::load(XmlStreamReader*) { 0190 return true; 0191 } 0192 0193 // ##################################################################### 0194 // ################### Private implementation ########################## 0195 // ##################################################################### 0196 0197 OdsFilterPrivate::OdsFilterPrivate(OdsFilter*) { 0198 } 0199 0200 OdsFilterPrivate::~OdsFilterPrivate() { 0201 } 0202 0203 void OdsFilterPrivate::write(const QString& /*fileName*/, AbstractDataSource* /*dataSource*/) { 0204 DEBUG(Q_FUNC_INFO) 0205 // TODO: "The export functionality of the Orcus library is highly experimental." 0206 DEBUG(Q_FUNC_INFO << ", not implemented yet!") 0207 } 0208 0209 void OdsFilterPrivate::readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) { 0210 DEBUG(Q_FUNC_INFO) 0211 0212 if (selectedSheetNames.isEmpty()) { 0213 DEBUG(Q_FUNC_INFO << ", WARNING: no sheet selected"); 0214 return; 0215 } 0216 0217 // read data from selected sheets into dataSource using importMode 0218 // QDEBUG(Q_FUNC_INFO << ", Reading sheets" << selectedSheetNames) 0219 for (const auto& sheetName : selectedSheetNames) { 0220 // DEBUG(Q_FUNC_INFO << ", sheet " << sheetName.toStdString()) 0221 currentSheetName = sheetName.split(QLatin1Char('!')).last(); 0222 readCurrentSheet(fileName, dataSource, importMode); 0223 importMode = AbstractFileFilter::ImportMode::Append; // columns of other sheets are appended 0224 } 0225 } 0226 0227 void OdsFilterPrivate::readCurrentSheet(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) { 0228 DEBUG(Q_FUNC_INFO << ", current sheet name = " << currentSheetName.toStdString() << ", first row as column names = " << firstRowAsColumnNames) 0229 0230 if (!dataSource) 0231 return; 0232 0233 #ifdef HAVE_ORCUS 0234 DEBUG(Q_FUNC_INFO << ", sheet count = " << m_document.get_sheet_count()) 0235 if (m_document.get_sheet_count() == 0) { // not loaded yet 0236 DEBUG(Q_FUNC_INFO << ", loading file " << fileName.toStdString()) 0237 m_document.clear(); 0238 spreadsheet::import_factory factory{m_document}; 0239 orcus_ods loader(&factory); 0240 0241 loader.read_file(fileName.toStdString()); 0242 } 0243 0244 // get sheet index from name and read data into dataSource 0245 auto* sheet = m_document.get_sheet(currentSheetName.toStdString()); 0246 if (!sheet) { 0247 DEBUG(Q_FUNC_INFO << ", sheet not found: " << currentSheetName.toStdString()) 0248 return; 0249 } 0250 0251 const auto sheetIndex = sheet->get_index(); 0252 if (sheetIndex == ixion::invalid_sheet) { 0253 DEBUG(Q_FUNC_INFO << ", invalid sheet") 0254 return; 0255 } 0256 0257 auto ranges = sheet->get_data_range(); 0258 DEBUG(Q_FUNC_INFO << ", data range: col " << ranges.first.column << ".." << ranges.last.column << ", row " << ranges.first.row << ".." << ranges.last.row) 0259 if (firstRowAsColumnNames) // skip first row 0260 ranges.first.row++; 0261 size_t actualRows = ranges.last.row - ranges.first.row + 1; 0262 size_t actualEndRow = (endRow == -1 ? ranges.last.row + 1 : endRow); 0263 if ((size_t)startRow > actualRows) 0264 startRow = 1; // start from the begining 0265 DEBUG(Q_FUNC_INFO << ", start/end row = " << startRow << " " << endRow) 0266 DEBUG(Q_FUNC_INFO << ", start/end col = " << startColumn << " " << endColumn) 0267 actualRows = std::min(actualRows - startRow, (size_t)(actualEndRow - startRow)) + 1; 0268 0269 size_t actualCols = ranges.last.column - ranges.first.column + 1; 0270 size_t actualEndColumn = (endColumn == -1 ? ranges.last.column + 1 : endColumn); 0271 if ((size_t)startColumn > actualCols) 0272 startColumn = 1; // start from the begining 0273 actualCols = std::min(actualCols - startColumn, (size_t)(actualEndColumn - startColumn)) + 1; 0274 0275 DEBUG(Q_FUNC_INFO << ", actual rows/cols = " << actualRows << " " << actualCols) 0276 if (actualRows < 1 || actualCols < 1) { 0277 DEBUG(Q_FUNC_INFO << ", no actual rows of columns") 0278 return; 0279 } 0280 0281 // column modes 0282 QVector<AbstractColumn::ColumnMode> columnModes; 0283 columnModes.resize(actualCols); 0284 0285 // set column modes (only for spreadsheet, matrix uses default: Double) 0286 const auto& model = m_document.get_model_context(); 0287 if (dynamic_cast<Spreadsheet*>(dataSource)) { 0288 for (size_t col = 0; col < actualCols; col++) { 0289 // check start row 0290 ixion::abs_address_t pos(sheetIndex, ranges.first.row + startRow - 1, ranges.first.column + startColumn - 1 + col); 0291 0292 auto type = model.get_celltype(pos); 0293 switch (type) { 0294 case ixion::celltype_t::string: 0295 columnModes[col] = AbstractColumn::ColumnMode::Text; 0296 break; 0297 case ixion::celltype_t::numeric: // numeric values are always double (can't detect if integer) 0298 // default: Double 0299 break; 0300 case ixion::celltype_t::formula: { 0301 auto formula = model.get_formula_result(pos); 0302 switch (formula.get_type()) { // conside formula type 0303 case ixion::formula_result::result_type::value: 0304 columnModes[col] = AbstractColumn::ColumnMode::Double; 0305 break; 0306 case ixion::formula_result::result_type::string: 0307 columnModes[col] = AbstractColumn::ColumnMode::Text; 0308 break; 0309 case ixion::formula_result::result_type::error: 0310 // TODO: not available in ixion 0.17 ? 0311 // case ixion::formula_result::result_type::boolean: 0312 case ixion::formula_result::result_type::matrix: 0313 DEBUG(Q_FUNC_INFO << ", formula type not supported yet.") 0314 break; 0315 } 0316 break; 0317 } 0318 case ixion::celltype_t::boolean: 0319 case ixion::celltype_t::empty: 0320 case ixion::celltype_t::unknown: // default: Double 0321 break; 0322 } 0323 } 0324 } 0325 0326 QStringList vectorNames; 0327 if (firstRowAsColumnNames) { 0328 for (size_t col = 0; col < actualCols; col++) { 0329 ixion::abs_address_t pos(sheetIndex, ranges.first.row - 1 + startRow - 1, ranges.first.column + startColumn - 1 + col); 0330 0331 auto type = model.get_celltype(pos); 0332 switch (type) { 0333 case ixion::celltype_t::string: { 0334 auto value = model.get_string_value(pos); 0335 vectorNames << QString::fromStdString(std::string(value)); 0336 break; 0337 } 0338 case ixion::celltype_t::numeric: { 0339 double value = model.get_numeric_value(pos); 0340 vectorNames << QLocale().toString(value); 0341 break; 0342 } 0343 case ixion::celltype_t::formula: { 0344 auto formula = model.get_formula_result(pos); 0345 switch (formula.get_type()) { 0346 case ixion::formula_result::result_type::value: { 0347 auto value = formula.get_value(); 0348 vectorNames << QLocale().toString(value); 0349 break; 0350 } 0351 case ixion::formula_result::result_type::string: 0352 vectorNames << QString::fromStdString(formula.get_string()); 0353 break; 0354 case ixion::formula_result::result_type::error: 0355 // TODO: not available in ixion 0.17 ? 0356 // case ixion::formula_result::result_type::boolean: 0357 case ixion::formula_result::result_type::matrix: 0358 vectorNames << AbstractFileFilter::convertFromNumberToColumn(ranges.first.column + startColumn - 1 + col); 0359 ; 0360 break; 0361 } 0362 // TODO 0363 break; 0364 } 0365 case ixion::celltype_t::empty: 0366 case ixion::celltype_t::unknown: 0367 case ixion::celltype_t::boolean: 0368 vectorNames << AbstractFileFilter::convertFromNumberToColumn(ranges.first.column + startColumn - 1 + col); 0369 ; 0370 } 0371 } 0372 } else { 0373 for (size_t col = 0; col < actualCols; col++) 0374 vectorNames << AbstractFileFilter::convertFromNumberToColumn(ranges.first.column + startColumn - 1 + col); 0375 } 0376 0377 std::vector<void*> dataContainer; 0378 0379 // prepare import 0380 int columnOffset = dataSource->prepareImport(dataContainer, importMode, actualRows, actualCols, vectorNames, columnModes); 0381 DEBUG(Q_FUNC_INFO << ", column offset = " << columnOffset) 0382 0383 // import data 0384 for (size_t row = 0; row < actualRows; row++) { 0385 for (size_t col = 0; col < actualCols; col++) { 0386 ixion::abs_address_t pos(sheetIndex, ranges.first.row + row + startRow - 1, ranges.first.column + col + startColumn - 1); 0387 0388 auto type = model.get_celltype(pos); 0389 switch (type) { 0390 case ixion::celltype_t::numeric: { 0391 double value = model.get_numeric_value(pos); 0392 // column mode may be non-numeric 0393 if (columnModes.at(col) == AbstractColumn::ColumnMode::Double) 0394 (*static_cast<QVector<double>*>(dataContainer[col]))[row] = value; 0395 else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text) 0396 (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QLocale().toString(value); 0397 break; 0398 } 0399 case ixion::celltype_t::formula: { 0400 // read formula result. We can't handle formulas yet (?) 0401 auto formula = model.get_formula_result(pos); 0402 switch (formula.get_type()) { 0403 case ixion::formula_result::result_type::value: { 0404 DEBUG(Q_FUNC_INFO << ", value formula found") 0405 auto value = formula.get_value(); 0406 // text column may have value-type formula 0407 if (columnModes.at(col) == AbstractColumn::ColumnMode::Double) 0408 (*static_cast<QVector<double>*>(dataContainer[col]))[row] = value; 0409 else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text) 0410 (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QLocale().toString(value); 0411 break; 0412 } 0413 case ixion::formula_result::result_type::string: 0414 DEBUG(Q_FUNC_INFO << ", string formula found") 0415 // column mode may be numeric 0416 if (columnModes.at(col) == AbstractColumn::ColumnMode::Double) 0417 (*static_cast<QVector<double>*>(dataContainer[col]))[row] = formula.get_value(); 0418 else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text) 0419 (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QString::fromStdString(formula.get_string()); 0420 break; 0421 case ixion::formula_result::result_type::error: 0422 // TODO: not available in ixion 0.17 ? 0423 // case ixion::formula_result::result_type::boolean: 0424 case ixion::formula_result::result_type::matrix: 0425 DEBUG(Q_FUNC_INFO << ", formula type not supported yet.") 0426 break; 0427 } 0428 break; 0429 } 0430 case ixion::celltype_t::string: { 0431 // column mode may be numeric 0432 if (columnModes.at(col) == AbstractColumn::ColumnMode::Double) 0433 (*static_cast<QVector<double>*>(dataContainer[col]))[row] = model.get_numeric_value(pos); 0434 else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text) 0435 (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QString::fromStdString(std::string(model.get_string_value(pos))); 0436 break; 0437 } 0438 case ixion::celltype_t::empty: // nothing to do 0439 break; 0440 case ixion::celltype_t::unknown: 0441 case ixion::celltype_t::boolean: 0442 DEBUG(Q_FUNC_INFO << ", cell type unknown or boolean not supported yet.") 0443 } 0444 } 0445 } 0446 0447 dataSource->finalizeImport(columnOffset, 1, actualCols, QString(), importMode); 0448 #else 0449 Q_UNUSED(fileName) 0450 Q_UNUSED(dataSource) 0451 Q_UNUSED(importMode) 0452 #endif 0453 } 0454 0455 QVector<QStringList> OdsFilterPrivate::preview(const QString& sheetName, int lines) { 0456 QVector<QStringList> dataString; 0457 #ifdef HAVE_ORCUS 0458 // get sheet index by name and read lines of data into dataString 0459 const auto* sheet = m_document.get_sheet(sheetName.toStdString()); 0460 if (!sheet) { 0461 DEBUG(Q_FUNC_INFO << ", sheet not found: " << sheetName.toStdString()) 0462 return dataString; 0463 } 0464 0465 const auto sheetIndex = sheet->get_index(); 0466 if (sheetIndex == ixion::invalid_sheet) { 0467 DEBUG(Q_FUNC_INFO << ", invalid sheet index " << sheetIndex) 0468 return dataString; 0469 } 0470 0471 const auto ranges = sheet->get_data_range(); 0472 DEBUG(Q_FUNC_INFO << ", data range: col " << ranges.first.column << ".." << ranges.last.column << ", row " << ranges.first.row << ".." << ranges.last.row) 0473 0474 const int maxCols = 100; 0475 DEBUG(Q_FUNC_INFO << ", start/end row = " << startRow << " " << endRow) 0476 DEBUG(Q_FUNC_INFO << ", start/end col = " << startColumn << " " << endColumn) 0477 int actualStartRow = (startRow > (ranges.last.row - ranges.first.row + 1) ? ranges.first.row : ranges.first.row + startRow - 1); 0478 const int actualEndRow = (endRow == -1 ? ranges.last.row : std::min(ranges.last.row, ranges.first.row + endRow - 1)); 0479 int actualStartCol = (startColumn > (ranges.last.column - ranges.first.column + 1) ? ranges.first.column : ranges.first.column + startColumn - 1); 0480 firstColumn = actualStartCol; 0481 const int actualEndCol = (endColumn == -1 ? ranges.last.column : std::min(ranges.last.column, ranges.first.column + endColumn - 1)); 0482 0483 const auto& model = m_document.get_model_context(); 0484 for (ixion::row_t row = actualStartRow; row <= std::min(actualEndRow, actualStartRow + lines); row++) { 0485 DEBUG(Q_FUNC_INFO << ", row " << row) 0486 QStringList line; 0487 for (ixion::col_t col = actualStartCol; col <= std::min(actualEndCol, actualStartCol + maxCols); col++) { 0488 ixion::abs_address_t pos(sheetIndex, row, col); 0489 0490 auto type = model.get_celltype(pos); 0491 switch (type) { 0492 case ixion::celltype_t::string: { 0493 auto value = model.get_string_value(pos); 0494 DEBUG(Q_FUNC_INFO << " " << value) 0495 line << QString::fromStdString(std::string(value)); 0496 break; 0497 } 0498 case ixion::celltype_t::numeric: { 0499 double value = model.get_numeric_value(pos); 0500 DEBUG(Q_FUNC_INFO << " " << value) 0501 line << QLocale().toString(value); 0502 break; 0503 } 0504 case ixion::celltype_t::formula: { 0505 // read formula result. We can't handle formulas yet (?) 0506 auto formula = model.get_formula_result(pos); 0507 switch (formula.get_type()) { 0508 case ixion::formula_result::result_type::value: 0509 line << QLocale().toString(formula.get_value()); 0510 break; 0511 case ixion::formula_result::result_type::string: 0512 line << QString::fromStdString(formula.get_string()); 0513 break; 0514 case ixion::formula_result::result_type::error: 0515 // TODO: not available in ixion 0.17 ? 0516 // case ixion::formula_result::result_type::boolean: 0517 case ixion::formula_result::result_type::matrix: 0518 line << QString(); 0519 DEBUG(Q_FUNC_INFO << ", formula type error, boolean or matrix not implemented yet.") 0520 break; 0521 } 0522 break; 0523 } 0524 case ixion::celltype_t::empty: 0525 line << QString(); 0526 break; 0527 case ixion::celltype_t::unknown: 0528 case ixion::celltype_t::boolean: 0529 line << QString(); 0530 DEBUG(Q_FUNC_INFO << ", cell type unknown or boolean not implemented yet.") 0531 break; 0532 } 0533 } 0534 dataString << line; 0535 } 0536 #else 0537 Q_UNUSED(sheetName) 0538 Q_UNUSED(lines) 0539 #endif 0540 0541 return dataString; 0542 } 0543 0544 void OdsFilterPrivate::parse(const QString& fileName, QTreeWidgetItem* parentItem) { 0545 DEBUG(Q_FUNC_INFO) 0546 #ifdef HAVE_ORCUS 0547 m_document.clear(); 0548 spreadsheet::import_factory factory{m_document}; 0549 orcus_ods loader(&factory); 0550 0551 loader.read_file(fileName.toStdString()); 0552 0553 auto* fileNameItem = new QTreeWidgetItem(QStringList() << fileName); 0554 parentItem->addChild(fileNameItem); 0555 0556 const size_t nrSheets = m_document.get_sheet_count(); 0557 for (size_t i = 0; i < nrSheets; i++) { 0558 auto name = m_document.get_sheet_name(i); 0559 0560 auto* sheetItem = new QTreeWidgetItem(QStringList() << QString::fromStdString(std::string(name))); 0561 sheetItem->setIcon(0, QIcon::fromTheme(QStringLiteral("folder"))); 0562 0563 fileNameItem->addChild(sheetItem); 0564 } 0565 #else 0566 Q_UNUSED(fileName) 0567 Q_UNUSED(parentItem) 0568 #endif 0569 }