File indexing completed on 2025-01-05 03:35:43

0001 /*
0002     File                 : StatisticsColumnWidget.cpp
0003     Project              : LabPlot
0004     Description          : Widget showing statistics for column values
0005     --------------------------------------------------------------------
0006     SPDX-FileCopyrightText: 2021-2023 Alexander Semke <alexander.semke@web.de>
0007     SPDX-FileCopyrightText: 2022 Stefan Gerlach <stefan.gerlach@uni.kn>
0008 
0009     SPDX-License-Identifier: GPL-2.0-or-later
0010 */
0011 
0012 #include "StatisticsColumnWidget.h"
0013 #include "backend/core/Project.h"
0014 #include "backend/core/column/Column.h"
0015 #include "backend/core/datatypes/DateTime2StringFilter.h"
0016 #include "backend/worksheet/Background.h"
0017 #include "backend/worksheet/Line.h"
0018 #include "backend/worksheet/TextLabel.h"
0019 #include "backend/worksheet/Worksheet.h"
0020 #include "backend/worksheet/plots/PlotArea.h"
0021 #include "backend/worksheet/plots/cartesian/Axis.h"
0022 #include "backend/worksheet/plots/cartesian/BarPlot.h"
0023 #include "backend/worksheet/plots/cartesian/BoxPlot.h"
0024 #include "backend/worksheet/plots/cartesian/CartesianPlot.h"
0025 #include "backend/worksheet/plots/cartesian/Histogram.h"
0026 #include "backend/worksheet/plots/cartesian/KDEPlot.h"
0027 #include "backend/worksheet/plots/cartesian/QQPlot.h"
0028 #include "backend/worksheet/plots/cartesian/Symbol.h"
0029 #include "backend/worksheet/plots/cartesian/Value.h"
0030 #include "backend/worksheet/plots/cartesian/XYCurve.h"
0031 #include "kdefrontend/GuiTools.h"
0032 
0033 #include <QTabWidget>
0034 #include <QTextEdit>
0035 #include <QTimer>
0036 #include <QVBoxLayout>
0037 
0038 #include <KLocalizedString>
0039 
0040 #include <algorithm> //for min_element and max_element
0041 
0042 extern "C" {
0043 #include "backend/nsl/nsl_kde.h"
0044 }
0045 #include <gsl/gsl_cdf.h>
0046 #include <gsl/gsl_math.h>
0047 #include <gsl/gsl_statistics.h>
0048 
0049 StatisticsColumnWidget::StatisticsColumnWidget(const Column* column, QWidget* parent)
0050     : QWidget(parent)
0051     , m_column(column)
0052     , m_project(new Project)
0053     , m_tabWidget(new QTabWidget) {
0054     auto* layout = new QVBoxLayout;
0055     layout->addWidget(m_tabWidget);
0056     setLayout(layout);
0057 
0058     const QString htmlColor = GuiTools::isDarkMode() ? QLatin1String("#5f5f5f") : QLatin1String("#D1D1D1");
0059     // clang-format off
0060     if (column->isNumeric()) {
0061         m_htmlOverview = QStringLiteral("<table border=0 width=100%><tr><td colspan=2 align=center bgcolor=") + htmlColor
0062             + QStringLiteral("><b><big>") + i18n("Location Measures") + QStringLiteral("</big><b></td></tr>")
0063             + QStringLiteral("<tr><td width=60%><b>") + i18n("Count") + QStringLiteral("<b></td><td>%1</td></tr>")
0064             + QStringLiteral("<tr><td><b>") + i18n("Minimum") + QStringLiteral("<b></td><td>%2</td></tr>")
0065             + QStringLiteral("<tr><td><b>") + i18n("Maximum") + QStringLiteral("<b></td><td>%3</td></tr>")
0066             + QStringLiteral("<tr><td><b>") + i18n("Arithmetic mean") + QStringLiteral("<b></td><td>%4</td></tr>")
0067             + QStringLiteral("<tr><td><b>") + i18n("Geometric mean") + QStringLiteral("<b></td><td>%5</td></tr>")
0068             + QStringLiteral("<tr><td><b>") + i18n("Harmonic mean") + QStringLiteral("<b></td><td>%6</td></tr>")
0069             + QStringLiteral("<tr><td><b>") + i18n("Contraharmonic mean") + QStringLiteral("<b></td><td>%7</td></tr>")
0070             + QStringLiteral("<tr><td><b>") + i18n("Mode") + QStringLiteral("<b></td><td>%8</td></tr>")
0071             + QStringLiteral("<tr><td><b>") + i18n("First Quartile") + QStringLiteral("<b></td><td>%9</td></tr>")
0072             + QStringLiteral("<tr><td><b>") + i18n("Median") + QStringLiteral("<b></td><td>%10</td></tr>")
0073             + QStringLiteral("<tr><td><b>") + i18n("Third Quartile") + QStringLiteral("<b></td><td>%11</td></tr>")
0074             + QStringLiteral("<tr><td><b>") + i18n("Trimean") + QStringLiteral("<b></td><td>%12</td></tr>")
0075             + QStringLiteral("<tr></tr>")
0076             + QStringLiteral("<tr><td colspan=2 align=center bgcolor=") + htmlColor + QStringLiteral("><b><big>")
0077             + i18n("Dispersion Measures") + QStringLiteral("</big></b></td></tr>")
0078             + QStringLiteral("<tr><td><b>") + i18n("Variance") + QStringLiteral("<b></td><td>%13</td></tr>")
0079             + QStringLiteral("<tr><td><b>") + i18n("Standard deviation") + QStringLiteral("<b></td><td>%14</td></tr>")
0080             + QStringLiteral("<tr><td><b>") + i18n("Mean absolute deviation around mean") + QStringLiteral("<b></td><td>%15</td></tr>")
0081             + QStringLiteral("<tr><td><b>") + i18n("Mean absolute deviation around median") + QStringLiteral("<b></td><td>%16</td></tr>")
0082             + QStringLiteral("<tr><td><b>") + i18n("Median absolute deviation") + QStringLiteral("<b></td><td>%17</td></tr>")
0083             + QStringLiteral("<tr><td><b>") + i18n("Interquartile Range") + QStringLiteral("<b></td><td>%18</td></tr>")
0084             + QStringLiteral("<tr></tr>")
0085             + QStringLiteral("<tr><td colspan=2 align=center bgcolor=") + htmlColor + QStringLiteral("><b><big>")
0086             + i18n("Shape Measures") + QStringLiteral("</big></b></td></tr>")
0087             + QStringLiteral("<tr><td><b>") + i18n("Skewness") + QStringLiteral("<b></td><td>%19</td></tr>")
0088             + QStringLiteral("<tr><td><b>") + i18n("Kurtosis") + QStringLiteral("<b></td><td>%20</td></tr>")
0089             + QStringLiteral("<tr><td><b>") + i18n("Entropy") + QStringLiteral("<b></td><td>%21</td></tr>")
0090             + QStringLiteral("</table>");
0091     } else if (column->columnMode() == AbstractColumn::ColumnMode::Text) {
0092         m_htmlOverview = QStringLiteral("<table border=0 width=100%><tr><td colspan=2 align=center bgcolor=")
0093             + htmlColor + QStringLiteral("><b><big>") + i18n("General") + QStringLiteral("</big><b></td></tr><tr>")
0094             + QStringLiteral("<td width=60%><b>") + i18n("Count") + QStringLiteral("<b></td><td>%1</td></tr>")
0095             + QStringLiteral("<tr><td><b>") + i18n("Unique Values") + QStringLiteral("<b></td><td>%2</td></tr>")
0096             + QStringLiteral("</table>");
0097     } else { // datetime
0098         m_htmlOverview = QStringLiteral("<table border=0 width=100%><tr><td colspan=2 align=center bgcolor=")
0099             + htmlColor + QStringLiteral("><b><big>") + i18n("General") + QStringLiteral("</big><b></td></tr>")
0100             + QStringLiteral("<tr><td width=60%><b>") + i18n("Count") + QStringLiteral("<b></td><td>%1</td></tr>")
0101             + QStringLiteral("<tr><td><b>") + i18n("Minimum") + QStringLiteral("<b></td><td>%2</td></tr>")
0102             + QStringLiteral("<tr><td><b>") + i18n("Maximum") + QStringLiteral("<b></td><td>%3</td></tr>")
0103             + QStringLiteral("</table>");
0104     }
0105     // clang-format on
0106 
0107     // create tab widgets for every column and show the initial text with the placeholders
0108     auto* vBoxLayout = new QVBoxLayout(&m_overviewWidget);
0109     vBoxLayout->setSpacing(0);
0110     m_overviewWidget.setLayout(vBoxLayout);
0111     m_overviewPlotWidget.setMaximumHeight(150);
0112     vBoxLayout->addWidget(&m_overviewPlotWidget);
0113 
0114     m_teOverview = new QTextEdit(this);
0115     m_teOverview->setReadOnly(true);
0116     vBoxLayout->addWidget(m_teOverview);
0117 
0118     m_tabWidget->addTab(&m_overviewWidget, i18n("Overview"));
0119 
0120     if (column->isNumeric()) {
0121         m_teOverview->setHtml(m_htmlOverview
0122                                   .arg(QLatin1String("-"),
0123                                        QLatin1String("-"),
0124                                        QLatin1String("-"),
0125                                        QLatin1String("-"),
0126                                        QLatin1String("-"),
0127                                        QLatin1String("-"),
0128                                        QLatin1String("-"),
0129                                        QLatin1String("-"),
0130                                        QLatin1String("-"))
0131                                   .arg(QLatin1String("-"),
0132                                        QLatin1String("-"),
0133                                        QLatin1String("-"),
0134                                        QLatin1String("-"),
0135                                        QLatin1String("-"),
0136                                        QLatin1String("-"),
0137                                        QLatin1String("-"),
0138                                        QLatin1String("-"),
0139                                        QLatin1String("-"))
0140                                   .arg(QLatin1String("-"), QLatin1String("-"), QLatin1String("-")));
0141         m_tabWidget->addTab(&m_histogramWidget, i18n("Histogram"));
0142         m_tabWidget->addTab(&m_kdePlotWidget, i18n("KDE Plot"));
0143         m_tabWidget->addTab(&m_qqPlotWidget, i18n("Normal Q-Q Plot"));
0144         m_tabWidget->addTab(&m_boxPlotWidget, i18n("Box Plot"));
0145     } else if (column->columnMode() == AbstractColumn::ColumnMode::Text) {
0146         m_teOverview->setHtml(m_htmlOverview.arg(QLatin1String("-"), QLatin1String("-")));
0147         m_tabWidget->addTab(&m_barPlotWidget, i18n("Bar Plot"));
0148         m_tabWidget->addTab(&m_paretoPlotWidget, i18n("Pareto Plot"));
0149     } else { // datetime
0150         m_teOverview->setHtml(m_htmlOverview.arg(QLatin1String("-"), QLatin1String("-"), QLatin1String("-")));
0151     }
0152 
0153     connect(m_tabWidget, &QTabWidget::currentChanged, this, &StatisticsColumnWidget::currentTabChanged);
0154 }
0155 
0156 StatisticsColumnWidget::~StatisticsColumnWidget() {
0157     disconnect(m_tabWidget, nullptr, this, nullptr); // don't react on currentChanged signal
0158     delete m_project;
0159 }
0160 
0161 void StatisticsColumnWidget::setCurrentTab(int index) {
0162     if (index == m_tabWidget->currentIndex())
0163         currentTabChanged(index); // manually call the slot so we get the data shown
0164     else
0165         m_tabWidget->setCurrentIndex(index);
0166 }
0167 
0168 void StatisticsColumnWidget::currentTabChanged(int index) {
0169     WAIT_CURSOR;
0170     if (m_column->isNumeric()) {
0171         if (index == 0 && !m_overviewInitialized)
0172             showOverview();
0173         else if (index == 1 && !m_histogramInitialized)
0174             showHistogram();
0175         else if (index == 2 && !m_kdePlotInitialized)
0176             showKDEPlot();
0177         else if (index == 3 && !m_qqPlotInitialized)
0178             showQQPlot();
0179         else if (index == 4 && !m_boxPlotInitialized)
0180             showBoxPlot();
0181     } else {
0182         if (index == 0 && !m_overviewInitialized)
0183             showOverview();
0184         else if (index == 1 && !m_barPlotInitialized)
0185             showBarPlot();
0186         else if (index == 2 && !m_paretoPlotInitialized)
0187             showParetoPlot();
0188     }
0189 
0190     Q_EMIT tabChanged(index);
0191     RESET_CURSOR;
0192 }
0193 
0194 void StatisticsColumnWidget::showOverview() {
0195     const Column::ColumnStatistics& statistics = m_column->statistics();
0196 
0197     if (m_column->isNumeric()) {
0198         m_teOverview->setHtml(m_htmlOverview
0199                                   .arg(QString::number(statistics.size),
0200                                        isNanValue(statistics.minimum == INFINITY ? NAN : statistics.minimum),
0201                                        isNanValue(statistics.maximum == -INFINITY ? NAN : statistics.maximum),
0202                                        isNanValue(statistics.arithmeticMean),
0203                                        isNanValue(statistics.geometricMean),
0204                                        isNanValue(statistics.harmonicMean),
0205                                        isNanValue(statistics.contraharmonicMean),
0206                                        modeValue(m_column, statistics.mode),
0207                                        isNanValue(statistics.firstQuartile))
0208                                   .arg(isNanValue(statistics.median),
0209                                        isNanValue(statistics.thirdQuartile),
0210                                        isNanValue(statistics.trimean),
0211                                        isNanValue(statistics.variance),
0212                                        isNanValue(statistics.standardDeviation),
0213                                        isNanValue(statistics.meanDeviation),
0214                                        isNanValue(statistics.meanDeviationAroundMedian),
0215                                        isNanValue(statistics.medianDeviation),
0216                                        isNanValue(statistics.iqr))
0217                                   .arg(isNanValue(statistics.skewness), isNanValue(statistics.kurtosis), isNanValue(statistics.entropy)));
0218     } else if (m_column->columnMode() == AbstractColumn::ColumnMode::Text) {
0219         // add the frequencies table
0220         const auto& frequencies = m_column->frequencies();
0221         const QString htmlColor = GuiTools::isDarkMode() ? QStringLiteral("#5f5f5f") : QStringLiteral("#D1D1D1");
0222         m_htmlOverview += QStringLiteral("<br><table border=0 width=100%>") + QStringLiteral("<tr>") + QStringLiteral("<td colspan=3 align=center bgcolor=")
0223             + htmlColor + QStringLiteral("><b><big>") + i18n("Frequency Table") + QStringLiteral("</big><b></td>") + QStringLiteral("</tr>")
0224             + QStringLiteral("<tr>") + QStringLiteral("<td width=60%></td>") + QStringLiteral("<td>") + i18n("Frequency") + QStringLiteral("</td>")
0225             + QStringLiteral("<td>") + i18n("Percent") + QStringLiteral("</td>") + QStringLiteral("</tr>");
0226 
0227         auto i = frequencies.constBegin();
0228         while (i != frequencies.constEnd()) {
0229             int count = i.value();
0230             double percent = (double)count / statistics.size * 100;
0231             m_htmlOverview += QStringLiteral("<tr>") + QStringLiteral("<td>") + i.key() + QStringLiteral("</td>") + QStringLiteral("<td>")
0232                 + QString::number(count) + QStringLiteral("</td>") + QStringLiteral("<td>") + QString::number(percent) + QStringLiteral("%</td>")
0233                 + QStringLiteral("</tr>");
0234             ++i;
0235         }
0236 
0237         m_htmlOverview += QStringLiteral("</table>");
0238         m_teOverview->setHtml(m_htmlOverview.arg(QString::number(statistics.size), QString::number(statistics.unique)));
0239     } else { // datetime
0240         auto* filter = static_cast<DateTime2StringFilter*>(m_column->outputFilter());
0241         m_teOverview->setHtml(m_htmlOverview.arg(QString::number(statistics.size),
0242                                                  QDateTime::fromMSecsSinceEpoch(statistics.minimum, Qt::UTC).toString(filter->format()),
0243                                                  QDateTime::fromMSecsSinceEpoch(statistics.maximum, Qt::UTC).toString(filter->format())));
0244     }
0245 
0246     showOverviewPlot();
0247     m_overviewInitialized = true;
0248 }
0249 
0250 void StatisticsColumnWidget::showOverviewPlot() {
0251     if (!m_column->isNumeric())
0252         return;
0253 
0254     // add plot
0255     auto* plot = addPlot(&m_overviewPlotWidget);
0256     plot->setSymmetricPadding(false);
0257     const double padding = Worksheet::convertToSceneUnits(0.5, Worksheet::Unit::Centimeter);
0258     plot->setHorizontalPadding(2 * padding);
0259     plot->setRightPadding(2 * padding);
0260     plot->setVerticalPadding(padding);
0261     plot->setBottomPadding(padding);
0262     plot->plotArea()->borderLine()->setStyle(Qt::NoPen);
0263 
0264     // set the axes labels
0265     auto axes = plot->children<Axis>();
0266     for (auto* axis : qAsConst(axes)) {
0267         axis->setSuppressRetransform(true);
0268         if (axis->orientation() == Axis::Orientation::Vertical)
0269             axis->title()->setText(QString());
0270         else {
0271             // TODO: set the font and the offset smaller and show the "Index" title after this
0272             // axis->title()->setText(i18n("Index"));
0273             axis->title()->setText(QString());
0274         }
0275 
0276         auto font = axis->labelsFont();
0277         font.setPixelSize(Worksheet::convertToSceneUnits(8, Worksheet::Unit::Point));
0278         axis->setLabelsFont(font);
0279         axis->setLabelsOffset(2);
0280         axis->setMajorTicksDirection(Axis::ticksIn);
0281         axis->majorGridLine()->setStyle(Qt::NoPen);
0282         axis->setMinorTicksDirection(Axis::noTicks);
0283         axis->setArrowType(Axis::ArrowType::NoArrow);
0284         axis->setSuppressRetransform(false);
0285     }
0286 
0287     QApplication::processEvents(QEventLoop::AllEvents, 100);
0288 
0289     // x
0290     auto* xColumn = new Column(QStringLiteral("x"), AbstractColumn::ColumnMode::Integer);
0291     m_project->addChild(xColumn);
0292     int rows = m_column->rowCount();
0293     QVector<int> xData;
0294     xData.resize(rows);
0295     for (int i = 0; i < rows; ++i)
0296         xData[i] = i;
0297     xColumn->setIntegers(xData);
0298 
0299     // add curve
0300     auto* curve = new XYCurve(QString());
0301     curve->setSuppressRetransform(false);
0302     plot->addChild(curve);
0303     curve->line()->setStyle(Qt::SolidLine);
0304     curve->symbol()->setStyle(Symbol::Style::NoSymbols);
0305     curve->background()->setPosition(Background::Position::No);
0306     curve->setXColumn(xColumn);
0307     curve->setYColumn(m_column);
0308 
0309     curve->setSuppressRetransform(false);
0310     plot->retransform();
0311 }
0312 
0313 void StatisticsColumnWidget::showHistogram() {
0314     // add plot
0315     auto* plot = addPlot(&m_histogramWidget);
0316 
0317     auto axes = plot->children<Axis>();
0318     for (auto* axis : qAsConst(axes)) {
0319         if (axis->orientation() == Axis::Orientation::Horizontal) {
0320             axis->title()->setText(m_column->name());
0321             axis->majorGridLine()->setStyle(Qt::NoPen);
0322         } else
0323             axis->title()->setText(i18n("Frequency"));
0324 
0325         axis->setMinorTicksDirection(Axis::noTicks);
0326     }
0327     QApplication::processEvents(QEventLoop::AllEvents, 100);
0328 
0329     auto* histogram = new Histogram(QString());
0330     plot->addChild(histogram);
0331     histogram->setDataColumn(m_column);
0332 
0333     plot->retransform();
0334     m_histogramInitialized = true;
0335 }
0336 
0337 void StatisticsColumnWidget::showKDEPlot() {
0338     // add plot
0339     auto* plot = addPlot(&m_kdePlotWidget);
0340 
0341     // set the axes labels
0342     auto axes = plot->children<Axis>();
0343     for (auto* axis : qAsConst(axes)) {
0344         if (axis->orientation() == Axis::Orientation::Horizontal)
0345             axis->title()->setText(m_column->name());
0346         else
0347             axis->title()->setText(i18n("Density"));
0348 
0349         axis->setMinorTicksDirection(Axis::noTicks);
0350     }
0351 
0352     QApplication::processEvents(QEventLoop::AllEvents, 100);
0353 
0354     // add normalized histogram
0355     auto* histogram = new Histogram(QString());
0356     plot->addChild(histogram);
0357     histogram->setNormalization(Histogram::ProbabilityDensity);
0358     histogram->setDataColumn(m_column);
0359 
0360     // add KDE Plot
0361     auto* kdePlot = new KDEPlot(QString());
0362     plot->addChild(kdePlot);
0363     kdePlot->setKernelType(nsl_kernel_gauss);
0364     kdePlot->setBandwidthType(nsl_kde_bandwidth_silverman);
0365     kdePlot->setDataColumn(m_column);
0366 
0367     plot->retransform();
0368     m_kdePlotInitialized = true;
0369 }
0370 
0371 void StatisticsColumnWidget::showQQPlot() {
0372     // add plot
0373     auto* plot = addPlot(&m_qqPlotWidget);
0374 
0375     auto axes = plot->children<Axis>();
0376     for (auto* axis : qAsConst(axes)) {
0377         if (axis->orientation() == Axis::Orientation::Horizontal)
0378             axis->title()->setText(i18n("Theoretical Quantiles"));
0379         else
0380             axis->title()->setText(i18n("Sample Quantiles"));
0381 
0382         axis->setMinorTicksDirection(Axis::noTicks);
0383     }
0384     QApplication::processEvents(QEventLoop::AllEvents, 100);
0385 
0386     auto* qqPlot = new QQPlot(QString());
0387     plot->addChild(qqPlot);
0388     qqPlot->setDataColumn(m_column);
0389 
0390     plot->retransform();
0391     m_qqPlotInitialized = true;
0392 }
0393 
0394 void StatisticsColumnWidget::showBoxPlot() {
0395     // add plot
0396     auto* plot = addPlot(&m_boxPlotWidget);
0397 
0398     auto axes = plot->children<Axis>();
0399     for (auto* axis : qAsConst(axes)) {
0400         if (axis->orientation() == Axis::Orientation::Horizontal) {
0401             axis->setLabelsPosition(Axis::LabelsPosition::NoLabels);
0402             axis->setMajorTicksDirection(Axis::noTicks);
0403             axis->majorGridLine()->setStyle(Qt::NoPen);
0404             axis->minorGridLine()->setStyle(Qt::NoPen);
0405             axis->title()->setText(QString());
0406         } else
0407             axis->title()->setText(m_column->name());
0408 
0409         axis->setMinorTicksDirection(Axis::noTicks);
0410     }
0411     QApplication::processEvents(QEventLoop::AllEvents, 100);
0412 
0413     auto* boxPlot = new BoxPlot(QString());
0414     boxPlot->setOrientation(BoxPlot::Orientation::Vertical);
0415     boxPlot->setWhiskersType(BoxPlot::WhiskersType::IQR);
0416     plot->addChild(boxPlot);
0417 
0418     QVector<const AbstractColumn*> columns;
0419     columns << const_cast<Column*>(m_column);
0420     boxPlot->setDataColumns(columns);
0421 
0422     plot->retransform();
0423     m_boxPlotInitialized = true;
0424 }
0425 
0426 void StatisticsColumnWidget::showBarPlot() {
0427     // add plot
0428     auto* plot = addPlot(&m_barPlotWidget);
0429     plot->title()->setText(m_column->name());
0430     QApplication::processEvents(QEventLoop::AllEvents, 100);
0431 
0432     auto* barPlot = new BarPlot(QString());
0433     plot->addChild(barPlot);
0434     barPlot->setOrientation(BoxPlot::Orientation::Vertical);
0435     barPlot->value()->setType(Value::Type::BinEntries);
0436     barPlot->value()->setPosition(Value::Position::Above);
0437 
0438     // generate columns holding the data and the labels
0439     auto* dataColumn = new Column(QStringLiteral("data"));
0440     dataColumn->setColumnMode(AbstractColumn::ColumnMode::Integer);
0441     m_project->addChild(dataColumn);
0442 
0443     auto* labelsColumn = new Column(QStringLiteral("labels"));
0444     labelsColumn->setColumnMode(AbstractColumn::ColumnMode::Text);
0445     m_project->addChild(labelsColumn);
0446 
0447     // sort the frequencies and the accompanying labels
0448     const auto& frequencies = m_column->frequencies();
0449     auto i = frequencies.constBegin();
0450     QVector<QPair<QString, int>> pairs;
0451     while (i != frequencies.constEnd()) {
0452         pairs << QPair<QString, int>(i.key(), i.value());
0453         ++i;
0454     }
0455 
0456     std::sort(pairs.begin(), pairs.end(), [](QPair<QString, int> a, QPair<QString, int> b) {
0457         return a.second > b.second;
0458     });
0459 
0460     QVector<int> data;
0461     QVector<QString> labels;
0462     for (const auto& pair : pairs) {
0463         labels << pair.first;
0464         data << pair.second;
0465     }
0466     dataColumn->replaceInteger(0, data);
0467     labelsColumn->replaceTexts(0, labels);
0468 
0469     QVector<const AbstractColumn*> columns;
0470     columns << dataColumn;
0471     barPlot->setDataColumns(columns);
0472 
0473     // axes properties
0474     auto axes = plot->children<Axis>();
0475     for (auto* axis : qAsConst(axes)) {
0476         if (axis->orientation() == Axis::Orientation::Horizontal) {
0477             axis->title()->setText(QString());
0478             axis->majorGridLine()->setStyle(Qt::NoPen);
0479             axis->setMajorTicksStartType(Axis::TicksStartType::Offset);
0480             axis->setMajorTickStartOffset(0.5);
0481             axis->setMajorTicksType(Axis::TicksType::Spacing);
0482             axis->setMajorTicksSpacing(1.);
0483             axis->setLabelsTextType(Axis::LabelsTextType::CustomValues);
0484             axis->setLabelsTextColumn(labelsColumn);
0485         } else {
0486             axis->title()->setText(i18n("Frequency"));
0487             axis->setTitleOffsetX(Worksheet::convertToSceneUnits(-5, Worksheet::Unit::Point));
0488         }
0489 
0490         axis->setMinorTicksDirection(Axis::noTicks);
0491         axis->setArrowType(Axis::ArrowType::NoArrow);
0492     }
0493 
0494     plot->retransform();
0495     m_barPlotInitialized = true;
0496 }
0497 
0498 void StatisticsColumnWidget::showParetoPlot() {
0499     DEBUG(Q_FUNC_INFO)
0500     auto* plot = addPlot(&m_paretoPlotWidget);
0501     plot->title()->setText(m_column->name());
0502     plot->setHorizontalPadding(Worksheet::convertToSceneUnits(2, Worksheet::Unit::Centimeter));
0503     plot->setRightPadding(Worksheet::convertToSceneUnits(3.2, Worksheet::Unit::Centimeter));
0504 
0505     // add second range for the cumulative percentage of the total number of occurences
0506     plot->addYRange(Range<double>(0, 100)); // add second y range
0507     plot->addCoordinateSystem(); // add cs for second y range
0508     plot->setCoordinateSystemRangeIndex(plot->coordinateSystemCount() - 1, Dimension::Y, 1); // specify new y range for new cs
0509     plot->enableAutoScale(Dimension::Y, 1, false); // disable auto scale to stay at 0 .. 100
0510 
0511     // add second y-axis
0512     auto* axis = new Axis(QLatin1String("y2"));
0513     plot->addChild(axis);
0514     axis->setOrientation(Axis::Orientation::Vertical);
0515     axis->setPosition(Axis::Position::Right);
0516     axis->setMajorTicksDirection(Axis::ticksBoth);
0517     axis->setLabelsPosition(Axis::LabelsPosition::In);
0518     axis->setLabelsSuffix(QLatin1String("%"));
0519     axis->title()->setRotationAngle(90);
0520     axis->setCoordinateSystemIndex(1);
0521 
0522     QApplication::processEvents(QEventLoop::AllEvents, 100);
0523 
0524     auto* barPlot = new BarPlot(QString());
0525     barPlot->setOrientation(BoxPlot::Orientation::Vertical);
0526     plot->addChild(barPlot);
0527 
0528     // generate columns holding the data and the labels
0529     int count = m_column->statistics().unique;
0530 
0531     auto* dataColumn = new Column(QStringLiteral("data"));
0532     dataColumn->setColumnMode(AbstractColumn::ColumnMode::Integer);
0533     m_project->addChild(dataColumn);
0534 
0535     auto* xColumn = new Column(QStringLiteral("x"));
0536     xColumn->setColumnMode(AbstractColumn::ColumnMode::Double);
0537     m_project->addChild(xColumn);
0538     QVector<double> xData(count);
0539 
0540     auto* yColumn = new Column(QStringLiteral("y"));
0541     m_project->addChild(yColumn);
0542     QVector<double> yData(count);
0543 
0544     auto* labelsColumn = new Column(QStringLiteral("labels"));
0545     labelsColumn->setColumnMode(AbstractColumn::ColumnMode::Text);
0546 
0547     // sort the frequencies and the accompanying labels and calculate the total sum of frequencies
0548     const auto& frequencies = m_column->frequencies();
0549     auto i = frequencies.constBegin();
0550     QVector<QPair<QString, int>> pairs;
0551     int row = 0;
0552     int totalSumOfFrequencies = 0;
0553     while (i != frequencies.constEnd()) {
0554         pairs << QPair<QString, int>(i.key(), i.value());
0555         xData[row] = 0.5 + row;
0556         totalSumOfFrequencies += i.value();
0557         ++row;
0558         ++i;
0559     }
0560 
0561     std::sort(pairs.begin(), pairs.end(), [](QPair<QString, int> a, QPair<QString, int> b) {
0562         return a.second > b.second;
0563     });
0564 
0565     QVector<int> data;
0566     QVector<QString> labels;
0567     for (const auto& pair : pairs) {
0568         labels << pair.first;
0569         data << pair.second;
0570     }
0571 
0572     // calculate the cummulative values
0573     int sum = 0;
0574     row = 0;
0575     for (auto value : data) {
0576         sum += value;
0577         if (totalSumOfFrequencies != 0)
0578             yData[row] = (double)sum / totalSumOfFrequencies * 100;
0579         ++row;
0580     }
0581 
0582     dataColumn->replaceInteger(0, data);
0583     labelsColumn->replaceTexts(0, labels);
0584     xColumn->setValues(xData);
0585     yColumn->setValues(yData);
0586 
0587     QVector<const AbstractColumn*> columns;
0588     columns << dataColumn;
0589     barPlot->setDataColumns(columns);
0590 
0591     // add cumulated percentage curve
0592     auto* curve = new XYCurve(QStringLiteral("curve"));
0593     curve->setCoordinateSystemIndex(1); // asign to the second y-range going from 0 to 100%
0594     curve->setXColumn(xColumn);
0595     curve->setYColumn(yColumn);
0596     curve->line()->setStyle(Qt::SolidLine);
0597     curve->symbol()->setStyle(Symbol::Style::Circle);
0598     plot->addChild(curve);
0599     curve->setValuesType(XYCurve::ValuesType::Y);
0600     curve->setValuesPosition(XYCurve::ValuesPosition::Right);
0601     curve->setValuesDistance(Worksheet::convertToSceneUnits(10, Worksheet::Unit::Point));
0602     curve->setValuesSuffix(QStringLiteral("%"));
0603 
0604     // resize the first y range to have the first point of the xy-curve at the top of the first bar
0605     if (yData.at(0) != 0) {
0606         const double max = (double)data.at(0) * 100. / yData.at(0);
0607         plot->setMax(Dimension::Y, 0, max);
0608     }
0609 
0610     // axes properties
0611     auto axes = plot->children<Axis>();
0612     bool firstYAxis = false;
0613     for (auto* axis : qAsConst(axes)) {
0614         if (axis->orientation() == Axis::Orientation::Horizontal) {
0615             axis->title()->setText(QString());
0616             axis->majorGridLine()->setStyle(Qt::NoPen);
0617             axis->setMajorTicksStartType(Axis::TicksStartType::Offset);
0618             axis->setMajorTickStartOffset(0.5);
0619             axis->setMajorTicksType(Axis::TicksType::Spacing);
0620             axis->setMajorTicksSpacing(1.);
0621             axis->setLabelsTextType(Axis::LabelsTextType::CustomValues);
0622             axis->setLabelsTextColumn(labelsColumn);
0623         } else {
0624             if (!firstYAxis) {
0625                 axis->title()->setText(i18n("Frequency"));
0626                 axis->setTitleOffsetX(Worksheet::convertToSceneUnits(-5, Worksheet::Unit::Point));
0627                 axis->setMajorTicksNumber(10 + 1); // same tick number as percentage axis
0628                 firstYAxis = true;
0629             } else {
0630                 axis->title()->setText(i18n("Cumulative Percentage"));
0631                 // TODO: work with the same offset as for the first axis after https://invent.kde.org/education/labplot/-/issues/368 was adressed
0632                 axis->setTitleOffsetX(Worksheet::convertToSceneUnits(1.8, Worksheet::Unit::Centimeter));
0633             }
0634         }
0635 
0636         axis->setMinorTicksDirection(Axis::noTicks);
0637         axis->setArrowType(Axis::ArrowType::NoArrow);
0638     }
0639 
0640     plot->retransform();
0641     m_paretoPlotInitialized = true;
0642 }
0643 
0644 CartesianPlot* StatisticsColumnWidget::addPlot(QWidget* widget) {
0645     auto* ws = new Worksheet(QString());
0646     ws->setUseViewSize(true);
0647     ws->setLayoutTopMargin(0.);
0648     ws->setLayoutBottomMargin(0.);
0649     ws->setLayoutLeftMargin(0.);
0650     ws->setLayoutRightMargin(0.);
0651     m_project->addChild(ws);
0652 
0653     auto* plot = new CartesianPlot(QString());
0654     plot->setSuppressRetransform(true);
0655     plot->setType(CartesianPlot::Type::TwoAxes);
0656     plot->setSymmetricPadding(false);
0657     const double padding = Worksheet::convertToSceneUnits(1.0, Worksheet::Unit::Centimeter);
0658     plot->setRightPadding(padding);
0659     plot->setVerticalPadding(padding);
0660     plot->plotArea()->borderLine()->setStyle(Qt::NoPen);
0661 
0662     ws->addChild(plot);
0663     plot->setSuppressRetransform(false);
0664 
0665     auto* layout = new QVBoxLayout(widget);
0666     layout->setSpacing(0);
0667     layout->addWidget(ws->view());
0668     ws->setInteractive(false);
0669     widget->setLayout(layout);
0670 
0671     return plot;
0672 }
0673 
0674 // helpers
0675 const QString StatisticsColumnWidget::isNanValue(const double value) const {
0676     return (std::isnan(value) ? QLatin1String("-") : QLocale().toString(value, 'f'));
0677 }
0678 
0679 QString StatisticsColumnWidget::modeValue(const Column* column, double value) const {
0680     if (std::isnan(value))
0681         return QLatin1String("-");
0682 
0683     const auto numberLocale = QLocale();
0684     switch (column->columnMode()) {
0685     case AbstractColumn::ColumnMode::Integer:
0686         return numberLocale.toString((int)value);
0687     case AbstractColumn::ColumnMode::BigInt:
0688         return numberLocale.toString((qint64)value);
0689     case AbstractColumn::ColumnMode::Text:
0690         // TODO
0691     case AbstractColumn::ColumnMode::DateTime:
0692         // TODO
0693     case AbstractColumn::ColumnMode::Day:
0694         // TODO
0695     case AbstractColumn::ColumnMode::Month:
0696         // TODO
0697     case AbstractColumn::ColumnMode::Double:
0698         return numberLocale.toString(value, 'f');
0699     }
0700 
0701     return {};
0702 }
0703 
0704 /*!
0705  * copy the non-nan and not masked values of the current column
0706  * into the vector \c data.
0707  */
0708 void StatisticsColumnWidget::copyValidData(QVector<double>& data) const {
0709     const int rowCount = m_column->rowCount();
0710     data.reserve(rowCount);
0711     double val;
0712     if (m_column->columnMode() == AbstractColumn::ColumnMode::Double) {
0713         auto* rowValues = reinterpret_cast<QVector<double>*>(m_column->data());
0714         for (int row = 0; row < rowCount; ++row) {
0715             val = rowValues->value(row);
0716             if (std::isnan(val) || m_column->isMasked(row))
0717                 continue;
0718 
0719             data.push_back(val);
0720         }
0721     } else if (m_column->columnMode() == AbstractColumn::ColumnMode::Integer) {
0722         auto* rowValues = reinterpret_cast<QVector<int>*>(m_column->data());
0723         for (int row = 0; row < rowCount; ++row) {
0724             val = rowValues->value(row);
0725             if (std::isnan(val) || m_column->isMasked(row))
0726                 continue;
0727 
0728             data.push_back(val);
0729         }
0730     } else if (m_column->columnMode() == AbstractColumn::ColumnMode::BigInt) {
0731         auto* rowValues = reinterpret_cast<QVector<qint64>*>(m_column->data());
0732         for (int row = 0; row < rowCount; ++row) {
0733             val = rowValues->value(row);
0734             if (std::isnan(val) || m_column->isMasked(row))
0735                 continue;
0736 
0737             data.push_back(val);
0738         }
0739     }
0740 
0741     if (data.size() < rowCount)
0742         data.squeeze();
0743 }