File indexing completed on 2024-06-16 03:42:48

0001 /*
0002     File                 : OdsFilter.cpp
0003     Project              : LabPlot
0004     Description          : Ods I/O-filter
0005     --------------------------------------------------------------------
0006     SPDX-FileCopyrightText: 2023 Stefan Gerlach <stefan.gerlach@uni.kn>
0007     SPDX-License-Identifier: GPL-2.0-or-later
0008 */
0009 
0010 #include "backend/datasources/filters/OdsFilter.h"
0011 #include "backend/core/column/Column.h"
0012 #include "backend/datasources/AbstractDataSource.h"
0013 #include "backend/datasources/filters/OdsFilterPrivate.h"
0014 #include "backend/matrix/Matrix.h"
0015 #include "backend/spreadsheet/Spreadsheet.h"
0016 
0017 #include <KLocalizedString>
0018 #include <QTreeWidgetItem>
0019 
0020 #ifdef HAVE_ORCUS
0021 #include <orcus/orcus_ods.hpp>
0022 #include <orcus/spreadsheet/factory.hpp>
0023 #include <orcus/spreadsheet/sheet.hpp>
0024 
0025 #include <ixion/model_context.hpp>
0026 
0027 using namespace orcus;
0028 #endif
0029 
0030 // TODO:
0031 // * export data when Orcus support is stable
0032 // * datetime support?
0033 OdsFilter::OdsFilter()
0034     : AbstractFileFilter(FileType::Ods)
0035     , d(new OdsFilterPrivate(this)) {
0036 }
0037 
0038 OdsFilter::~OdsFilter() {
0039 }
0040 
0041 QString OdsFilter::fileInfoString(const QString& fileName) {
0042 #ifdef HAVE_ORCUS
0043     spreadsheet::range_size_t ss{1048576, 16384};
0044     spreadsheet::document doc{ss};
0045     spreadsheet::import_factory factory{doc};
0046     orcus_ods loader(&factory);
0047 
0048     loader.read_file(fileName.toStdString());
0049 
0050     const size_t nrSheets = doc.get_sheet_count();
0051     auto dt = doc.get_origin_date();
0052 
0053     QString info(i18n("Sheet count: %1", QString::number(nrSheets)));
0054     info += QStringLiteral("<br>");
0055 
0056     for (size_t i = 0; i < nrSheets; ++i) {
0057         auto name = doc.get_sheet_name(i);
0058         info += QString::fromStdString(std::string(name));
0059         const auto* s = doc.get_sheet(i);
0060         auto r = s->get_data_range();
0061 
0062         info += QStringLiteral(" (") + QString::number(r.last.row - r.first.row + 1) + QStringLiteral(" x ")
0063             + QString::number(r.last.column - r.first.column + 1) + QStringLiteral(")");
0064         if (i < nrSheets - 1)
0065             info += QStringLiteral(", ");
0066     }
0067     info += QStringLiteral("<br>");
0068 
0069     return info;
0070 #else
0071     Q_UNUSED(fileName)
0072 #endif
0073     return {};
0074 }
0075 
0076 void OdsFilter::readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) {
0077     d->readDataFromFile(fileName, dataSource, importMode);
0078 }
0079 void OdsFilter::write(const QString& fileName, AbstractDataSource* dataSource) {
0080     d->write(fileName, dataSource);
0081 }
0082 
0083 ///////////////////////////////////////////////////////////////////////
0084 
0085 void OdsFilter::setSelectedSheetNames(const QStringList& names) {
0086     d->currentSheetName = names.first();
0087     d->selectedSheetNames = names;
0088 }
0089 const QStringList OdsFilter::selectedSheetNames() const {
0090     return d->selectedSheetNames;
0091 }
0092 
0093 void OdsFilter::setFirstRowAsColumnNames(const bool b) {
0094     d->firstRowAsColumnNames = b;
0095 }
0096 
0097 QVector<QStringList> OdsFilter::preview(const QString& sheetName, int lines) {
0098     return d->preview(sheetName, lines);
0099 }
0100 
0101 void OdsFilter::parse(const QString& fileName, QTreeWidgetItem* root) {
0102     d->parse(fileName, root);
0103 }
0104 
0105 /*!
0106  * \brief Sets the startColumn to \a column
0107  * \param column the column to be set
0108  */
0109 void OdsFilter::setStartColumn(const int column) {
0110     d->startColumn = column;
0111 }
0112 
0113 /*!
0114  * \brief Returns startColumn
0115  * \return The startColumn
0116  */
0117 int OdsFilter::startColumn() const {
0118     return d->startColumn;
0119 }
0120 
0121 /*!
0122  * \brief Sets the endColumn to \a column
0123  * \param column the column to be set
0124  */
0125 void OdsFilter::setEndColumn(const int column) {
0126     d->endColumn = column;
0127 }
0128 
0129 /*!
0130  * \brief Returns endColumn
0131  * \return The endColumn
0132  */
0133 int OdsFilter::endColumn() const {
0134     return d->endColumn;
0135 }
0136 
0137 /* actual start column (including range) */
0138 int OdsFilter::firstColumn() const {
0139     return d->firstColumn;
0140 }
0141 
0142 /*!
0143  * \brief Sets the startRow to \a row
0144  * \param row the row to be set
0145  */
0146 void OdsFilter::setStartRow(const int row) {
0147     d->startRow = row;
0148 }
0149 
0150 /*!
0151  * \brief Returns startRow
0152  * \return The startRow
0153  */
0154 int OdsFilter::startRow() const {
0155     return d->startRow;
0156 }
0157 
0158 /*!
0159  * \brief Sets the endRow to \a row
0160  * \param row the row to be set
0161  */
0162 void OdsFilter::setEndRow(const int row) {
0163     d->endRow = row;
0164 }
0165 
0166 /*!
0167  * \brief Returns endRow
0168  * \return The endRow
0169  */
0170 int OdsFilter::endRow() const {
0171     return d->endRow;
0172 }
0173 
0174 void OdsFilter::setCurrentSheetName(const QString& sheetName) {
0175     d->currentSheetName = sheetName;
0176 }
0177 
0178 // ##############################################################################
0179 // ##################  Serialization/Deserialization  ###########################
0180 // ##############################################################################
0181 
0182 /*!
0183   Saves as XML.
0184 */
0185 
0186 void OdsFilter::save(QXmlStreamWriter*) const {
0187 }
0188 
0189 bool OdsFilter::load(XmlStreamReader*) {
0190     return true;
0191 }
0192 
0193 // #####################################################################
0194 // ################### Private implementation ##########################
0195 // #####################################################################
0196 
0197 OdsFilterPrivate::OdsFilterPrivate(OdsFilter*) {
0198 }
0199 
0200 OdsFilterPrivate::~OdsFilterPrivate() {
0201 }
0202 
0203 void OdsFilterPrivate::write(const QString& /*fileName*/, AbstractDataSource* /*dataSource*/) {
0204     DEBUG(Q_FUNC_INFO)
0205     // TODO: "The export functionality of the Orcus library is highly experimental."
0206     DEBUG(Q_FUNC_INFO << ", not implemented yet!")
0207 }
0208 
0209 void OdsFilterPrivate::readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) {
0210     DEBUG(Q_FUNC_INFO)
0211 
0212     if (selectedSheetNames.isEmpty()) {
0213         DEBUG(Q_FUNC_INFO << ", WARNING: no sheet selected");
0214         return;
0215     }
0216 
0217     // read data from selected sheets into dataSource using importMode
0218     // QDEBUG(Q_FUNC_INFO << ", Reading sheets" << selectedSheetNames)
0219     for (const auto& sheetName : selectedSheetNames) {
0220         // DEBUG(Q_FUNC_INFO << ", sheet " << sheetName.toStdString())
0221         currentSheetName = sheetName.split(QLatin1Char('!')).last();
0222         readCurrentSheet(fileName, dataSource, importMode);
0223         importMode = AbstractFileFilter::ImportMode::Append; // columns of other sheets are appended
0224     }
0225 }
0226 
0227 void OdsFilterPrivate::readCurrentSheet(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode) {
0228     DEBUG(Q_FUNC_INFO << ", current sheet name = " << currentSheetName.toStdString() << ", first row as column names = " << firstRowAsColumnNames)
0229 
0230     if (!dataSource)
0231         return;
0232 
0233 #ifdef HAVE_ORCUS
0234     DEBUG(Q_FUNC_INFO << ", sheet count = " << m_document.get_sheet_count())
0235     if (m_document.get_sheet_count() == 0) { // not loaded yet
0236         DEBUG(Q_FUNC_INFO << ", loading file " << fileName.toStdString())
0237         m_document.clear();
0238         spreadsheet::import_factory factory{m_document};
0239         orcus_ods loader(&factory);
0240 
0241         loader.read_file(fileName.toStdString());
0242     }
0243 
0244     // get sheet index from name and read data into dataSource
0245     auto* sheet = m_document.get_sheet(currentSheetName.toStdString());
0246     if (!sheet) {
0247         DEBUG(Q_FUNC_INFO << ", sheet not found: " << currentSheetName.toStdString())
0248         return;
0249     }
0250 
0251     const auto sheetIndex = sheet->get_index();
0252     if (sheetIndex == ixion::invalid_sheet) {
0253         DEBUG(Q_FUNC_INFO << ", invalid sheet")
0254         return;
0255     }
0256 
0257     auto ranges = sheet->get_data_range();
0258     DEBUG(Q_FUNC_INFO << ", data range: col " << ranges.first.column << ".." << ranges.last.column << ", row " << ranges.first.row << ".." << ranges.last.row)
0259     if (firstRowAsColumnNames) // skip first row
0260         ranges.first.row++;
0261     size_t actualRows = ranges.last.row - ranges.first.row + 1;
0262     size_t actualEndRow = (endRow == -1 ? ranges.last.row + 1 : endRow);
0263     if ((size_t)startRow > actualRows)
0264         startRow = 1; // start from the begining
0265     DEBUG(Q_FUNC_INFO << ", start/end row = " << startRow << " " << endRow)
0266     DEBUG(Q_FUNC_INFO << ", start/end col = " << startColumn << " " << endColumn)
0267     actualRows = std::min(actualRows - startRow, (size_t)(actualEndRow - startRow)) + 1;
0268 
0269     size_t actualCols = ranges.last.column - ranges.first.column + 1;
0270     size_t actualEndColumn = (endColumn == -1 ? ranges.last.column + 1 : endColumn);
0271     if ((size_t)startColumn > actualCols)
0272         startColumn = 1; // start from the begining
0273     actualCols = std::min(actualCols - startColumn, (size_t)(actualEndColumn - startColumn)) + 1;
0274 
0275     DEBUG(Q_FUNC_INFO << ", actual rows/cols = " << actualRows << " " << actualCols)
0276     if (actualRows < 1 || actualCols < 1) {
0277         DEBUG(Q_FUNC_INFO << ", no actual rows of columns")
0278         return;
0279     }
0280 
0281     // column modes
0282     QVector<AbstractColumn::ColumnMode> columnModes;
0283     columnModes.resize(actualCols);
0284 
0285     // set column modes (only for spreadsheet, matrix uses default: Double)
0286     const auto& model = m_document.get_model_context();
0287     if (dynamic_cast<Spreadsheet*>(dataSource)) {
0288         for (size_t col = 0; col < actualCols; col++) {
0289             // check start row
0290             ixion::abs_address_t pos(sheetIndex, ranges.first.row + startRow - 1, ranges.first.column + startColumn - 1 + col);
0291 
0292             auto type = model.get_celltype(pos);
0293             switch (type) {
0294             case ixion::celltype_t::string:
0295                 columnModes[col] = AbstractColumn::ColumnMode::Text;
0296                 break;
0297             case ixion::celltype_t::numeric: // numeric values are always double (can't detect if integer)
0298                 // default: Double
0299                 break;
0300             case ixion::celltype_t::formula: {
0301                 auto formula = model.get_formula_result(pos);
0302                 switch (formula.get_type()) { // conside formula type
0303                 case ixion::formula_result::result_type::value:
0304                     columnModes[col] = AbstractColumn::ColumnMode::Double;
0305                     break;
0306                 case ixion::formula_result::result_type::string:
0307                     columnModes[col] = AbstractColumn::ColumnMode::Text;
0308                     break;
0309                 case ixion::formula_result::result_type::error:
0310                 // TODO: not available in ixion 0.17 ?
0311                 // case ixion::formula_result::result_type::boolean:
0312                 case ixion::formula_result::result_type::matrix:
0313                     DEBUG(Q_FUNC_INFO << ", formula type not supported yet.")
0314                     break;
0315                 }
0316                 break;
0317             }
0318             case ixion::celltype_t::boolean:
0319             case ixion::celltype_t::empty:
0320             case ixion::celltype_t::unknown: // default: Double
0321                 break;
0322             }
0323         }
0324     }
0325 
0326     QStringList vectorNames;
0327     if (firstRowAsColumnNames) {
0328         for (size_t col = 0; col < actualCols; col++) {
0329             ixion::abs_address_t pos(sheetIndex, ranges.first.row - 1 + startRow - 1, ranges.first.column + startColumn - 1 + col);
0330 
0331             auto type = model.get_celltype(pos);
0332             switch (type) {
0333             case ixion::celltype_t::string: {
0334                 auto value = model.get_string_value(pos);
0335                 vectorNames << QString::fromStdString(std::string(value));
0336                 break;
0337             }
0338             case ixion::celltype_t::numeric: {
0339                 double value = model.get_numeric_value(pos);
0340                 vectorNames << QLocale().toString(value);
0341                 break;
0342             }
0343             case ixion::celltype_t::formula: {
0344                 auto formula = model.get_formula_result(pos);
0345                 switch (formula.get_type()) {
0346                 case ixion::formula_result::result_type::value: {
0347                     auto value = formula.get_value();
0348                     vectorNames << QLocale().toString(value);
0349                     break;
0350                 }
0351                 case ixion::formula_result::result_type::string:
0352                     vectorNames << QString::fromStdString(formula.get_string());
0353                     break;
0354                 case ixion::formula_result::result_type::error:
0355                 // TODO: not available in ixion 0.17 ?
0356                 // case ixion::formula_result::result_type::boolean:
0357                 case ixion::formula_result::result_type::matrix:
0358                     vectorNames << AbstractFileFilter::convertFromNumberToColumn(ranges.first.column + startColumn - 1 + col);
0359                     ;
0360                     break;
0361                 }
0362                 // TODO
0363                 break;
0364             }
0365             case ixion::celltype_t::empty:
0366             case ixion::celltype_t::unknown:
0367             case ixion::celltype_t::boolean:
0368                 vectorNames << AbstractFileFilter::convertFromNumberToColumn(ranges.first.column + startColumn - 1 + col);
0369                 ;
0370             }
0371         }
0372     } else {
0373         for (size_t col = 0; col < actualCols; col++)
0374             vectorNames << AbstractFileFilter::convertFromNumberToColumn(ranges.first.column + startColumn - 1 + col);
0375     }
0376 
0377     std::vector<void*> dataContainer;
0378 
0379     // prepare import
0380     int columnOffset = dataSource->prepareImport(dataContainer, importMode, actualRows, actualCols, vectorNames, columnModes);
0381     DEBUG(Q_FUNC_INFO << ", column offset = " << columnOffset)
0382 
0383     // import data
0384     for (size_t row = 0; row < actualRows; row++) {
0385         for (size_t col = 0; col < actualCols; col++) {
0386             ixion::abs_address_t pos(sheetIndex, ranges.first.row + row + startRow - 1, ranges.first.column + col + startColumn - 1);
0387 
0388             auto type = model.get_celltype(pos);
0389             switch (type) {
0390             case ixion::celltype_t::numeric: {
0391                 double value = model.get_numeric_value(pos);
0392                 // column mode may be non-numeric
0393                 if (columnModes.at(col) == AbstractColumn::ColumnMode::Double)
0394                     (*static_cast<QVector<double>*>(dataContainer[col]))[row] = value;
0395                 else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text)
0396                     (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QLocale().toString(value);
0397                 break;
0398             }
0399             case ixion::celltype_t::formula: {
0400                 // read formula result. We can't handle formulas yet (?)
0401                 auto formula = model.get_formula_result(pos);
0402                 switch (formula.get_type()) {
0403                 case ixion::formula_result::result_type::value: {
0404                     DEBUG(Q_FUNC_INFO << ", value formula found")
0405                     auto value = formula.get_value();
0406                     // text column may have value-type formula
0407                     if (columnModes.at(col) == AbstractColumn::ColumnMode::Double)
0408                         (*static_cast<QVector<double>*>(dataContainer[col]))[row] = value;
0409                     else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text)
0410                         (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QLocale().toString(value);
0411                     break;
0412                 }
0413                 case ixion::formula_result::result_type::string:
0414                     DEBUG(Q_FUNC_INFO << ", string formula found")
0415                     // column mode may be numeric
0416                     if (columnModes.at(col) == AbstractColumn::ColumnMode::Double)
0417                         (*static_cast<QVector<double>*>(dataContainer[col]))[row] = formula.get_value();
0418                     else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text)
0419                         (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QString::fromStdString(formula.get_string());
0420                     break;
0421                 case ixion::formula_result::result_type::error:
0422                 // TODO: not available in ixion 0.17 ?
0423                 // case ixion::formula_result::result_type::boolean:
0424                 case ixion::formula_result::result_type::matrix:
0425                     DEBUG(Q_FUNC_INFO << ", formula type not supported yet.")
0426                     break;
0427                 }
0428                 break;
0429             }
0430             case ixion::celltype_t::string: {
0431                 // column mode may be numeric
0432                 if (columnModes.at(col) == AbstractColumn::ColumnMode::Double)
0433                     (*static_cast<QVector<double>*>(dataContainer[col]))[row] = model.get_numeric_value(pos);
0434                 else if (columnModes.at(col) == AbstractColumn::ColumnMode::Text)
0435                     (*static_cast<QVector<QString>*>(dataContainer[col]))[row] = QString::fromStdString(std::string(model.get_string_value(pos)));
0436                 break;
0437             }
0438             case ixion::celltype_t::empty: // nothing to do
0439                 break;
0440             case ixion::celltype_t::unknown:
0441             case ixion::celltype_t::boolean:
0442                 DEBUG(Q_FUNC_INFO << ", cell type unknown or boolean not supported yet.")
0443             }
0444         }
0445     }
0446 
0447     dataSource->finalizeImport(columnOffset, 1, actualCols, QString(), importMode);
0448 #else
0449     Q_UNUSED(fileName)
0450     Q_UNUSED(dataSource)
0451     Q_UNUSED(importMode)
0452 #endif
0453 }
0454 
0455 QVector<QStringList> OdsFilterPrivate::preview(const QString& sheetName, int lines) {
0456     QVector<QStringList> dataString;
0457 #ifdef HAVE_ORCUS
0458     // get sheet index by name and read lines of data into dataString
0459     const auto* sheet = m_document.get_sheet(sheetName.toStdString());
0460     if (!sheet) {
0461         DEBUG(Q_FUNC_INFO << ", sheet not found: " << sheetName.toStdString())
0462         return dataString;
0463     }
0464 
0465     const auto sheetIndex = sheet->get_index();
0466     if (sheetIndex == ixion::invalid_sheet) {
0467         DEBUG(Q_FUNC_INFO << ", invalid sheet index " << sheetIndex)
0468         return dataString;
0469     }
0470 
0471     const auto ranges = sheet->get_data_range();
0472     DEBUG(Q_FUNC_INFO << ", data range: col " << ranges.first.column << ".." << ranges.last.column << ", row " << ranges.first.row << ".." << ranges.last.row)
0473 
0474     const int maxCols = 100;
0475     DEBUG(Q_FUNC_INFO << ", start/end row = " << startRow << " " << endRow)
0476     DEBUG(Q_FUNC_INFO << ", start/end col = " << startColumn << " " << endColumn)
0477     int actualStartRow = (startRow > (ranges.last.row - ranges.first.row + 1) ? ranges.first.row : ranges.first.row + startRow - 1);
0478     const int actualEndRow = (endRow == -1 ? ranges.last.row : std::min(ranges.last.row, ranges.first.row + endRow - 1));
0479     int actualStartCol = (startColumn > (ranges.last.column - ranges.first.column + 1) ? ranges.first.column : ranges.first.column + startColumn - 1);
0480     firstColumn = actualStartCol;
0481     const int actualEndCol = (endColumn == -1 ? ranges.last.column : std::min(ranges.last.column, ranges.first.column + endColumn - 1));
0482 
0483     const auto& model = m_document.get_model_context();
0484     for (ixion::row_t row = actualStartRow; row <= std::min(actualEndRow, actualStartRow + lines); row++) {
0485         DEBUG(Q_FUNC_INFO << ", row " << row)
0486         QStringList line;
0487         for (ixion::col_t col = actualStartCol; col <= std::min(actualEndCol, actualStartCol + maxCols); col++) {
0488             ixion::abs_address_t pos(sheetIndex, row, col);
0489 
0490             auto type = model.get_celltype(pos);
0491             switch (type) {
0492             case ixion::celltype_t::string: {
0493                 auto value = model.get_string_value(pos);
0494                 DEBUG(Q_FUNC_INFO << " " << value)
0495                 line << QString::fromStdString(std::string(value));
0496                 break;
0497             }
0498             case ixion::celltype_t::numeric: {
0499                 double value = model.get_numeric_value(pos);
0500                 DEBUG(Q_FUNC_INFO << " " << value)
0501                 line << QLocale().toString(value);
0502                 break;
0503             }
0504             case ixion::celltype_t::formula: {
0505                 // read formula result. We can't handle formulas yet (?)
0506                 auto formula = model.get_formula_result(pos);
0507                 switch (formula.get_type()) {
0508                 case ixion::formula_result::result_type::value:
0509                     line << QLocale().toString(formula.get_value());
0510                     break;
0511                 case ixion::formula_result::result_type::string:
0512                     line << QString::fromStdString(formula.get_string());
0513                     break;
0514                 case ixion::formula_result::result_type::error:
0515                 // TODO: not available in ixion 0.17 ?
0516                 // case ixion::formula_result::result_type::boolean:
0517                 case ixion::formula_result::result_type::matrix:
0518                     line << QString();
0519                     DEBUG(Q_FUNC_INFO << ", formula type error, boolean or matrix not implemented yet.")
0520                     break;
0521                 }
0522                 break;
0523             }
0524             case ixion::celltype_t::empty:
0525                 line << QString();
0526                 break;
0527             case ixion::celltype_t::unknown:
0528             case ixion::celltype_t::boolean:
0529                 line << QString();
0530                 DEBUG(Q_FUNC_INFO << ", cell type unknown or boolean not implemented yet.")
0531                 break;
0532             }
0533         }
0534         dataString << line;
0535     }
0536 #else
0537     Q_UNUSED(sheetName)
0538     Q_UNUSED(lines)
0539 #endif
0540 
0541     return dataString;
0542 }
0543 
0544 void OdsFilterPrivate::parse(const QString& fileName, QTreeWidgetItem* parentItem) {
0545     DEBUG(Q_FUNC_INFO)
0546 #ifdef HAVE_ORCUS
0547     m_document.clear();
0548     spreadsheet::import_factory factory{m_document};
0549     orcus_ods loader(&factory);
0550 
0551     loader.read_file(fileName.toStdString());
0552 
0553     auto* fileNameItem = new QTreeWidgetItem(QStringList() << fileName);
0554     parentItem->addChild(fileNameItem);
0555 
0556     const size_t nrSheets = m_document.get_sheet_count();
0557     for (size_t i = 0; i < nrSheets; i++) {
0558         auto name = m_document.get_sheet_name(i);
0559 
0560         auto* sheetItem = new QTreeWidgetItem(QStringList() << QString::fromStdString(std::string(name)));
0561         sheetItem->setIcon(0, QIcon::fromTheme(QStringLiteral("folder")));
0562 
0563         fileNameItem->addChild(sheetItem);
0564     }
0565 #else
0566     Q_UNUSED(fileName)
0567     Q_UNUSED(parentItem)
0568 #endif
0569 }