File indexing completed on 2025-01-05 03:35:43
0001 /* 0002 File : StatisticsColumnWidget.cpp 0003 Project : LabPlot 0004 Description : Widget showing statistics for column values 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2021-2023 Alexander Semke <alexander.semke@web.de> 0007 SPDX-FileCopyrightText: 2022 Stefan Gerlach <stefan.gerlach@uni.kn> 0008 0009 SPDX-License-Identifier: GPL-2.0-or-later 0010 */ 0011 0012 #include "StatisticsColumnWidget.h" 0013 #include "backend/core/Project.h" 0014 #include "backend/core/column/Column.h" 0015 #include "backend/core/datatypes/DateTime2StringFilter.h" 0016 #include "backend/worksheet/Background.h" 0017 #include "backend/worksheet/Line.h" 0018 #include "backend/worksheet/TextLabel.h" 0019 #include "backend/worksheet/Worksheet.h" 0020 #include "backend/worksheet/plots/PlotArea.h" 0021 #include "backend/worksheet/plots/cartesian/Axis.h" 0022 #include "backend/worksheet/plots/cartesian/BarPlot.h" 0023 #include "backend/worksheet/plots/cartesian/BoxPlot.h" 0024 #include "backend/worksheet/plots/cartesian/CartesianPlot.h" 0025 #include "backend/worksheet/plots/cartesian/Histogram.h" 0026 #include "backend/worksheet/plots/cartesian/KDEPlot.h" 0027 #include "backend/worksheet/plots/cartesian/QQPlot.h" 0028 #include "backend/worksheet/plots/cartesian/Symbol.h" 0029 #include "backend/worksheet/plots/cartesian/Value.h" 0030 #include "backend/worksheet/plots/cartesian/XYCurve.h" 0031 #include "kdefrontend/GuiTools.h" 0032 0033 #include <QTabWidget> 0034 #include <QTextEdit> 0035 #include <QTimer> 0036 #include <QVBoxLayout> 0037 0038 #include <KLocalizedString> 0039 0040 #include <algorithm> //for min_element and max_element 0041 0042 extern "C" { 0043 #include "backend/nsl/nsl_kde.h" 0044 } 0045 #include <gsl/gsl_cdf.h> 0046 #include <gsl/gsl_math.h> 0047 #include <gsl/gsl_statistics.h> 0048 0049 StatisticsColumnWidget::StatisticsColumnWidget(const Column* column, QWidget* parent) 0050 : QWidget(parent) 0051 , m_column(column) 0052 , m_project(new Project) 0053 , m_tabWidget(new QTabWidget) { 0054 auto* layout = new QVBoxLayout; 0055 layout->addWidget(m_tabWidget); 0056 setLayout(layout); 0057 0058 const QString htmlColor = GuiTools::isDarkMode() ? QLatin1String("#5f5f5f") : QLatin1String("#D1D1D1"); 0059 // clang-format off 0060 if (column->isNumeric()) { 0061 m_htmlOverview = QStringLiteral("<table border=0 width=100%><tr><td colspan=2 align=center bgcolor=") + htmlColor 0062 + QStringLiteral("><b><big>") + i18n("Location Measures") + QStringLiteral("</big><b></td></tr>") 0063 + QStringLiteral("<tr><td width=60%><b>") + i18n("Count") + QStringLiteral("<b></td><td>%1</td></tr>") 0064 + QStringLiteral("<tr><td><b>") + i18n("Minimum") + QStringLiteral("<b></td><td>%2</td></tr>") 0065 + QStringLiteral("<tr><td><b>") + i18n("Maximum") + QStringLiteral("<b></td><td>%3</td></tr>") 0066 + QStringLiteral("<tr><td><b>") + i18n("Arithmetic mean") + QStringLiteral("<b></td><td>%4</td></tr>") 0067 + QStringLiteral("<tr><td><b>") + i18n("Geometric mean") + QStringLiteral("<b></td><td>%5</td></tr>") 0068 + QStringLiteral("<tr><td><b>") + i18n("Harmonic mean") + QStringLiteral("<b></td><td>%6</td></tr>") 0069 + QStringLiteral("<tr><td><b>") + i18n("Contraharmonic mean") + QStringLiteral("<b></td><td>%7</td></tr>") 0070 + QStringLiteral("<tr><td><b>") + i18n("Mode") + QStringLiteral("<b></td><td>%8</td></tr>") 0071 + QStringLiteral("<tr><td><b>") + i18n("First Quartile") + QStringLiteral("<b></td><td>%9</td></tr>") 0072 + QStringLiteral("<tr><td><b>") + i18n("Median") + QStringLiteral("<b></td><td>%10</td></tr>") 0073 + QStringLiteral("<tr><td><b>") + i18n("Third Quartile") + QStringLiteral("<b></td><td>%11</td></tr>") 0074 + QStringLiteral("<tr><td><b>") + i18n("Trimean") + QStringLiteral("<b></td><td>%12</td></tr>") 0075 + QStringLiteral("<tr></tr>") 0076 + QStringLiteral("<tr><td colspan=2 align=center bgcolor=") + htmlColor + QStringLiteral("><b><big>") 0077 + i18n("Dispersion Measures") + QStringLiteral("</big></b></td></tr>") 0078 + QStringLiteral("<tr><td><b>") + i18n("Variance") + QStringLiteral("<b></td><td>%13</td></tr>") 0079 + QStringLiteral("<tr><td><b>") + i18n("Standard deviation") + QStringLiteral("<b></td><td>%14</td></tr>") 0080 + QStringLiteral("<tr><td><b>") + i18n("Mean absolute deviation around mean") + QStringLiteral("<b></td><td>%15</td></tr>") 0081 + QStringLiteral("<tr><td><b>") + i18n("Mean absolute deviation around median") + QStringLiteral("<b></td><td>%16</td></tr>") 0082 + QStringLiteral("<tr><td><b>") + i18n("Median absolute deviation") + QStringLiteral("<b></td><td>%17</td></tr>") 0083 + QStringLiteral("<tr><td><b>") + i18n("Interquartile Range") + QStringLiteral("<b></td><td>%18</td></tr>") 0084 + QStringLiteral("<tr></tr>") 0085 + QStringLiteral("<tr><td colspan=2 align=center bgcolor=") + htmlColor + QStringLiteral("><b><big>") 0086 + i18n("Shape Measures") + QStringLiteral("</big></b></td></tr>") 0087 + QStringLiteral("<tr><td><b>") + i18n("Skewness") + QStringLiteral("<b></td><td>%19</td></tr>") 0088 + QStringLiteral("<tr><td><b>") + i18n("Kurtosis") + QStringLiteral("<b></td><td>%20</td></tr>") 0089 + QStringLiteral("<tr><td><b>") + i18n("Entropy") + QStringLiteral("<b></td><td>%21</td></tr>") 0090 + QStringLiteral("</table>"); 0091 } else if (column->columnMode() == AbstractColumn::ColumnMode::Text) { 0092 m_htmlOverview = QStringLiteral("<table border=0 width=100%><tr><td colspan=2 align=center bgcolor=") 0093 + htmlColor + QStringLiteral("><b><big>") + i18n("General") + QStringLiteral("</big><b></td></tr><tr>") 0094 + QStringLiteral("<td width=60%><b>") + i18n("Count") + QStringLiteral("<b></td><td>%1</td></tr>") 0095 + QStringLiteral("<tr><td><b>") + i18n("Unique Values") + QStringLiteral("<b></td><td>%2</td></tr>") 0096 + QStringLiteral("</table>"); 0097 } else { // datetime 0098 m_htmlOverview = QStringLiteral("<table border=0 width=100%><tr><td colspan=2 align=center bgcolor=") 0099 + htmlColor + QStringLiteral("><b><big>") + i18n("General") + QStringLiteral("</big><b></td></tr>") 0100 + QStringLiteral("<tr><td width=60%><b>") + i18n("Count") + QStringLiteral("<b></td><td>%1</td></tr>") 0101 + QStringLiteral("<tr><td><b>") + i18n("Minimum") + QStringLiteral("<b></td><td>%2</td></tr>") 0102 + QStringLiteral("<tr><td><b>") + i18n("Maximum") + QStringLiteral("<b></td><td>%3</td></tr>") 0103 + QStringLiteral("</table>"); 0104 } 0105 // clang-format on 0106 0107 // create tab widgets for every column and show the initial text with the placeholders 0108 auto* vBoxLayout = new QVBoxLayout(&m_overviewWidget); 0109 vBoxLayout->setSpacing(0); 0110 m_overviewWidget.setLayout(vBoxLayout); 0111 m_overviewPlotWidget.setMaximumHeight(150); 0112 vBoxLayout->addWidget(&m_overviewPlotWidget); 0113 0114 m_teOverview = new QTextEdit(this); 0115 m_teOverview->setReadOnly(true); 0116 vBoxLayout->addWidget(m_teOverview); 0117 0118 m_tabWidget->addTab(&m_overviewWidget, i18n("Overview")); 0119 0120 if (column->isNumeric()) { 0121 m_teOverview->setHtml(m_htmlOverview 0122 .arg(QLatin1String("-"), 0123 QLatin1String("-"), 0124 QLatin1String("-"), 0125 QLatin1String("-"), 0126 QLatin1String("-"), 0127 QLatin1String("-"), 0128 QLatin1String("-"), 0129 QLatin1String("-"), 0130 QLatin1String("-")) 0131 .arg(QLatin1String("-"), 0132 QLatin1String("-"), 0133 QLatin1String("-"), 0134 QLatin1String("-"), 0135 QLatin1String("-"), 0136 QLatin1String("-"), 0137 QLatin1String("-"), 0138 QLatin1String("-"), 0139 QLatin1String("-")) 0140 .arg(QLatin1String("-"), QLatin1String("-"), QLatin1String("-"))); 0141 m_tabWidget->addTab(&m_histogramWidget, i18n("Histogram")); 0142 m_tabWidget->addTab(&m_kdePlotWidget, i18n("KDE Plot")); 0143 m_tabWidget->addTab(&m_qqPlotWidget, i18n("Normal Q-Q Plot")); 0144 m_tabWidget->addTab(&m_boxPlotWidget, i18n("Box Plot")); 0145 } else if (column->columnMode() == AbstractColumn::ColumnMode::Text) { 0146 m_teOverview->setHtml(m_htmlOverview.arg(QLatin1String("-"), QLatin1String("-"))); 0147 m_tabWidget->addTab(&m_barPlotWidget, i18n("Bar Plot")); 0148 m_tabWidget->addTab(&m_paretoPlotWidget, i18n("Pareto Plot")); 0149 } else { // datetime 0150 m_teOverview->setHtml(m_htmlOverview.arg(QLatin1String("-"), QLatin1String("-"), QLatin1String("-"))); 0151 } 0152 0153 connect(m_tabWidget, &QTabWidget::currentChanged, this, &StatisticsColumnWidget::currentTabChanged); 0154 } 0155 0156 StatisticsColumnWidget::~StatisticsColumnWidget() { 0157 disconnect(m_tabWidget, nullptr, this, nullptr); // don't react on currentChanged signal 0158 delete m_project; 0159 } 0160 0161 void StatisticsColumnWidget::setCurrentTab(int index) { 0162 if (index == m_tabWidget->currentIndex()) 0163 currentTabChanged(index); // manually call the slot so we get the data shown 0164 else 0165 m_tabWidget->setCurrentIndex(index); 0166 } 0167 0168 void StatisticsColumnWidget::currentTabChanged(int index) { 0169 WAIT_CURSOR; 0170 if (m_column->isNumeric()) { 0171 if (index == 0 && !m_overviewInitialized) 0172 showOverview(); 0173 else if (index == 1 && !m_histogramInitialized) 0174 showHistogram(); 0175 else if (index == 2 && !m_kdePlotInitialized) 0176 showKDEPlot(); 0177 else if (index == 3 && !m_qqPlotInitialized) 0178 showQQPlot(); 0179 else if (index == 4 && !m_boxPlotInitialized) 0180 showBoxPlot(); 0181 } else { 0182 if (index == 0 && !m_overviewInitialized) 0183 showOverview(); 0184 else if (index == 1 && !m_barPlotInitialized) 0185 showBarPlot(); 0186 else if (index == 2 && !m_paretoPlotInitialized) 0187 showParetoPlot(); 0188 } 0189 0190 Q_EMIT tabChanged(index); 0191 RESET_CURSOR; 0192 } 0193 0194 void StatisticsColumnWidget::showOverview() { 0195 const Column::ColumnStatistics& statistics = m_column->statistics(); 0196 0197 if (m_column->isNumeric()) { 0198 m_teOverview->setHtml(m_htmlOverview 0199 .arg(QString::number(statistics.size), 0200 isNanValue(statistics.minimum == INFINITY ? NAN : statistics.minimum), 0201 isNanValue(statistics.maximum == -INFINITY ? NAN : statistics.maximum), 0202 isNanValue(statistics.arithmeticMean), 0203 isNanValue(statistics.geometricMean), 0204 isNanValue(statistics.harmonicMean), 0205 isNanValue(statistics.contraharmonicMean), 0206 modeValue(m_column, statistics.mode), 0207 isNanValue(statistics.firstQuartile)) 0208 .arg(isNanValue(statistics.median), 0209 isNanValue(statistics.thirdQuartile), 0210 isNanValue(statistics.trimean), 0211 isNanValue(statistics.variance), 0212 isNanValue(statistics.standardDeviation), 0213 isNanValue(statistics.meanDeviation), 0214 isNanValue(statistics.meanDeviationAroundMedian), 0215 isNanValue(statistics.medianDeviation), 0216 isNanValue(statistics.iqr)) 0217 .arg(isNanValue(statistics.skewness), isNanValue(statistics.kurtosis), isNanValue(statistics.entropy))); 0218 } else if (m_column->columnMode() == AbstractColumn::ColumnMode::Text) { 0219 // add the frequencies table 0220 const auto& frequencies = m_column->frequencies(); 0221 const QString htmlColor = GuiTools::isDarkMode() ? QStringLiteral("#5f5f5f") : QStringLiteral("#D1D1D1"); 0222 m_htmlOverview += QStringLiteral("<br><table border=0 width=100%>") + QStringLiteral("<tr>") + QStringLiteral("<td colspan=3 align=center bgcolor=") 0223 + htmlColor + QStringLiteral("><b><big>") + i18n("Frequency Table") + QStringLiteral("</big><b></td>") + QStringLiteral("</tr>") 0224 + QStringLiteral("<tr>") + QStringLiteral("<td width=60%></td>") + QStringLiteral("<td>") + i18n("Frequency") + QStringLiteral("</td>") 0225 + QStringLiteral("<td>") + i18n("Percent") + QStringLiteral("</td>") + QStringLiteral("</tr>"); 0226 0227 auto i = frequencies.constBegin(); 0228 while (i != frequencies.constEnd()) { 0229 int count = i.value(); 0230 double percent = (double)count / statistics.size * 100; 0231 m_htmlOverview += QStringLiteral("<tr>") + QStringLiteral("<td>") + i.key() + QStringLiteral("</td>") + QStringLiteral("<td>") 0232 + QString::number(count) + QStringLiteral("</td>") + QStringLiteral("<td>") + QString::number(percent) + QStringLiteral("%</td>") 0233 + QStringLiteral("</tr>"); 0234 ++i; 0235 } 0236 0237 m_htmlOverview += QStringLiteral("</table>"); 0238 m_teOverview->setHtml(m_htmlOverview.arg(QString::number(statistics.size), QString::number(statistics.unique))); 0239 } else { // datetime 0240 auto* filter = static_cast<DateTime2StringFilter*>(m_column->outputFilter()); 0241 m_teOverview->setHtml(m_htmlOverview.arg(QString::number(statistics.size), 0242 QDateTime::fromMSecsSinceEpoch(statistics.minimum, Qt::UTC).toString(filter->format()), 0243 QDateTime::fromMSecsSinceEpoch(statistics.maximum, Qt::UTC).toString(filter->format()))); 0244 } 0245 0246 showOverviewPlot(); 0247 m_overviewInitialized = true; 0248 } 0249 0250 void StatisticsColumnWidget::showOverviewPlot() { 0251 if (!m_column->isNumeric()) 0252 return; 0253 0254 // add plot 0255 auto* plot = addPlot(&m_overviewPlotWidget); 0256 plot->setSymmetricPadding(false); 0257 const double padding = Worksheet::convertToSceneUnits(0.5, Worksheet::Unit::Centimeter); 0258 plot->setHorizontalPadding(2 * padding); 0259 plot->setRightPadding(2 * padding); 0260 plot->setVerticalPadding(padding); 0261 plot->setBottomPadding(padding); 0262 plot->plotArea()->borderLine()->setStyle(Qt::NoPen); 0263 0264 // set the axes labels 0265 auto axes = plot->children<Axis>(); 0266 for (auto* axis : qAsConst(axes)) { 0267 axis->setSuppressRetransform(true); 0268 if (axis->orientation() == Axis::Orientation::Vertical) 0269 axis->title()->setText(QString()); 0270 else { 0271 // TODO: set the font and the offset smaller and show the "Index" title after this 0272 // axis->title()->setText(i18n("Index")); 0273 axis->title()->setText(QString()); 0274 } 0275 0276 auto font = axis->labelsFont(); 0277 font.setPixelSize(Worksheet::convertToSceneUnits(8, Worksheet::Unit::Point)); 0278 axis->setLabelsFont(font); 0279 axis->setLabelsOffset(2); 0280 axis->setMajorTicksDirection(Axis::ticksIn); 0281 axis->majorGridLine()->setStyle(Qt::NoPen); 0282 axis->setMinorTicksDirection(Axis::noTicks); 0283 axis->setArrowType(Axis::ArrowType::NoArrow); 0284 axis->setSuppressRetransform(false); 0285 } 0286 0287 QApplication::processEvents(QEventLoop::AllEvents, 100); 0288 0289 // x 0290 auto* xColumn = new Column(QStringLiteral("x"), AbstractColumn::ColumnMode::Integer); 0291 m_project->addChild(xColumn); 0292 int rows = m_column->rowCount(); 0293 QVector<int> xData; 0294 xData.resize(rows); 0295 for (int i = 0; i < rows; ++i) 0296 xData[i] = i; 0297 xColumn->setIntegers(xData); 0298 0299 // add curve 0300 auto* curve = new XYCurve(QString()); 0301 curve->setSuppressRetransform(false); 0302 plot->addChild(curve); 0303 curve->line()->setStyle(Qt::SolidLine); 0304 curve->symbol()->setStyle(Symbol::Style::NoSymbols); 0305 curve->background()->setPosition(Background::Position::No); 0306 curve->setXColumn(xColumn); 0307 curve->setYColumn(m_column); 0308 0309 curve->setSuppressRetransform(false); 0310 plot->retransform(); 0311 } 0312 0313 void StatisticsColumnWidget::showHistogram() { 0314 // add plot 0315 auto* plot = addPlot(&m_histogramWidget); 0316 0317 auto axes = plot->children<Axis>(); 0318 for (auto* axis : qAsConst(axes)) { 0319 if (axis->orientation() == Axis::Orientation::Horizontal) { 0320 axis->title()->setText(m_column->name()); 0321 axis->majorGridLine()->setStyle(Qt::NoPen); 0322 } else 0323 axis->title()->setText(i18n("Frequency")); 0324 0325 axis->setMinorTicksDirection(Axis::noTicks); 0326 } 0327 QApplication::processEvents(QEventLoop::AllEvents, 100); 0328 0329 auto* histogram = new Histogram(QString()); 0330 plot->addChild(histogram); 0331 histogram->setDataColumn(m_column); 0332 0333 plot->retransform(); 0334 m_histogramInitialized = true; 0335 } 0336 0337 void StatisticsColumnWidget::showKDEPlot() { 0338 // add plot 0339 auto* plot = addPlot(&m_kdePlotWidget); 0340 0341 // set the axes labels 0342 auto axes = plot->children<Axis>(); 0343 for (auto* axis : qAsConst(axes)) { 0344 if (axis->orientation() == Axis::Orientation::Horizontal) 0345 axis->title()->setText(m_column->name()); 0346 else 0347 axis->title()->setText(i18n("Density")); 0348 0349 axis->setMinorTicksDirection(Axis::noTicks); 0350 } 0351 0352 QApplication::processEvents(QEventLoop::AllEvents, 100); 0353 0354 // add normalized histogram 0355 auto* histogram = new Histogram(QString()); 0356 plot->addChild(histogram); 0357 histogram->setNormalization(Histogram::ProbabilityDensity); 0358 histogram->setDataColumn(m_column); 0359 0360 // add KDE Plot 0361 auto* kdePlot = new KDEPlot(QString()); 0362 plot->addChild(kdePlot); 0363 kdePlot->setKernelType(nsl_kernel_gauss); 0364 kdePlot->setBandwidthType(nsl_kde_bandwidth_silverman); 0365 kdePlot->setDataColumn(m_column); 0366 0367 plot->retransform(); 0368 m_kdePlotInitialized = true; 0369 } 0370 0371 void StatisticsColumnWidget::showQQPlot() { 0372 // add plot 0373 auto* plot = addPlot(&m_qqPlotWidget); 0374 0375 auto axes = plot->children<Axis>(); 0376 for (auto* axis : qAsConst(axes)) { 0377 if (axis->orientation() == Axis::Orientation::Horizontal) 0378 axis->title()->setText(i18n("Theoretical Quantiles")); 0379 else 0380 axis->title()->setText(i18n("Sample Quantiles")); 0381 0382 axis->setMinorTicksDirection(Axis::noTicks); 0383 } 0384 QApplication::processEvents(QEventLoop::AllEvents, 100); 0385 0386 auto* qqPlot = new QQPlot(QString()); 0387 plot->addChild(qqPlot); 0388 qqPlot->setDataColumn(m_column); 0389 0390 plot->retransform(); 0391 m_qqPlotInitialized = true; 0392 } 0393 0394 void StatisticsColumnWidget::showBoxPlot() { 0395 // add plot 0396 auto* plot = addPlot(&m_boxPlotWidget); 0397 0398 auto axes = plot->children<Axis>(); 0399 for (auto* axis : qAsConst(axes)) { 0400 if (axis->orientation() == Axis::Orientation::Horizontal) { 0401 axis->setLabelsPosition(Axis::LabelsPosition::NoLabels); 0402 axis->setMajorTicksDirection(Axis::noTicks); 0403 axis->majorGridLine()->setStyle(Qt::NoPen); 0404 axis->minorGridLine()->setStyle(Qt::NoPen); 0405 axis->title()->setText(QString()); 0406 } else 0407 axis->title()->setText(m_column->name()); 0408 0409 axis->setMinorTicksDirection(Axis::noTicks); 0410 } 0411 QApplication::processEvents(QEventLoop::AllEvents, 100); 0412 0413 auto* boxPlot = new BoxPlot(QString()); 0414 boxPlot->setOrientation(BoxPlot::Orientation::Vertical); 0415 boxPlot->setWhiskersType(BoxPlot::WhiskersType::IQR); 0416 plot->addChild(boxPlot); 0417 0418 QVector<const AbstractColumn*> columns; 0419 columns << const_cast<Column*>(m_column); 0420 boxPlot->setDataColumns(columns); 0421 0422 plot->retransform(); 0423 m_boxPlotInitialized = true; 0424 } 0425 0426 void StatisticsColumnWidget::showBarPlot() { 0427 // add plot 0428 auto* plot = addPlot(&m_barPlotWidget); 0429 plot->title()->setText(m_column->name()); 0430 QApplication::processEvents(QEventLoop::AllEvents, 100); 0431 0432 auto* barPlot = new BarPlot(QString()); 0433 plot->addChild(barPlot); 0434 barPlot->setOrientation(BoxPlot::Orientation::Vertical); 0435 barPlot->value()->setType(Value::Type::BinEntries); 0436 barPlot->value()->setPosition(Value::Position::Above); 0437 0438 // generate columns holding the data and the labels 0439 auto* dataColumn = new Column(QStringLiteral("data")); 0440 dataColumn->setColumnMode(AbstractColumn::ColumnMode::Integer); 0441 m_project->addChild(dataColumn); 0442 0443 auto* labelsColumn = new Column(QStringLiteral("labels")); 0444 labelsColumn->setColumnMode(AbstractColumn::ColumnMode::Text); 0445 m_project->addChild(labelsColumn); 0446 0447 // sort the frequencies and the accompanying labels 0448 const auto& frequencies = m_column->frequencies(); 0449 auto i = frequencies.constBegin(); 0450 QVector<QPair<QString, int>> pairs; 0451 while (i != frequencies.constEnd()) { 0452 pairs << QPair<QString, int>(i.key(), i.value()); 0453 ++i; 0454 } 0455 0456 std::sort(pairs.begin(), pairs.end(), [](QPair<QString, int> a, QPair<QString, int> b) { 0457 return a.second > b.second; 0458 }); 0459 0460 QVector<int> data; 0461 QVector<QString> labels; 0462 for (const auto& pair : pairs) { 0463 labels << pair.first; 0464 data << pair.second; 0465 } 0466 dataColumn->replaceInteger(0, data); 0467 labelsColumn->replaceTexts(0, labels); 0468 0469 QVector<const AbstractColumn*> columns; 0470 columns << dataColumn; 0471 barPlot->setDataColumns(columns); 0472 0473 // axes properties 0474 auto axes = plot->children<Axis>(); 0475 for (auto* axis : qAsConst(axes)) { 0476 if (axis->orientation() == Axis::Orientation::Horizontal) { 0477 axis->title()->setText(QString()); 0478 axis->majorGridLine()->setStyle(Qt::NoPen); 0479 axis->setMajorTicksStartType(Axis::TicksStartType::Offset); 0480 axis->setMajorTickStartOffset(0.5); 0481 axis->setMajorTicksType(Axis::TicksType::Spacing); 0482 axis->setMajorTicksSpacing(1.); 0483 axis->setLabelsTextType(Axis::LabelsTextType::CustomValues); 0484 axis->setLabelsTextColumn(labelsColumn); 0485 } else { 0486 axis->title()->setText(i18n("Frequency")); 0487 axis->setTitleOffsetX(Worksheet::convertToSceneUnits(-5, Worksheet::Unit::Point)); 0488 } 0489 0490 axis->setMinorTicksDirection(Axis::noTicks); 0491 axis->setArrowType(Axis::ArrowType::NoArrow); 0492 } 0493 0494 plot->retransform(); 0495 m_barPlotInitialized = true; 0496 } 0497 0498 void StatisticsColumnWidget::showParetoPlot() { 0499 DEBUG(Q_FUNC_INFO) 0500 auto* plot = addPlot(&m_paretoPlotWidget); 0501 plot->title()->setText(m_column->name()); 0502 plot->setHorizontalPadding(Worksheet::convertToSceneUnits(2, Worksheet::Unit::Centimeter)); 0503 plot->setRightPadding(Worksheet::convertToSceneUnits(3.2, Worksheet::Unit::Centimeter)); 0504 0505 // add second range for the cumulative percentage of the total number of occurences 0506 plot->addYRange(Range<double>(0, 100)); // add second y range 0507 plot->addCoordinateSystem(); // add cs for second y range 0508 plot->setCoordinateSystemRangeIndex(plot->coordinateSystemCount() - 1, Dimension::Y, 1); // specify new y range for new cs 0509 plot->enableAutoScale(Dimension::Y, 1, false); // disable auto scale to stay at 0 .. 100 0510 0511 // add second y-axis 0512 auto* axis = new Axis(QLatin1String("y2")); 0513 plot->addChild(axis); 0514 axis->setOrientation(Axis::Orientation::Vertical); 0515 axis->setPosition(Axis::Position::Right); 0516 axis->setMajorTicksDirection(Axis::ticksBoth); 0517 axis->setLabelsPosition(Axis::LabelsPosition::In); 0518 axis->setLabelsSuffix(QLatin1String("%")); 0519 axis->title()->setRotationAngle(90); 0520 axis->setCoordinateSystemIndex(1); 0521 0522 QApplication::processEvents(QEventLoop::AllEvents, 100); 0523 0524 auto* barPlot = new BarPlot(QString()); 0525 barPlot->setOrientation(BoxPlot::Orientation::Vertical); 0526 plot->addChild(barPlot); 0527 0528 // generate columns holding the data and the labels 0529 int count = m_column->statistics().unique; 0530 0531 auto* dataColumn = new Column(QStringLiteral("data")); 0532 dataColumn->setColumnMode(AbstractColumn::ColumnMode::Integer); 0533 m_project->addChild(dataColumn); 0534 0535 auto* xColumn = new Column(QStringLiteral("x")); 0536 xColumn->setColumnMode(AbstractColumn::ColumnMode::Double); 0537 m_project->addChild(xColumn); 0538 QVector<double> xData(count); 0539 0540 auto* yColumn = new Column(QStringLiteral("y")); 0541 m_project->addChild(yColumn); 0542 QVector<double> yData(count); 0543 0544 auto* labelsColumn = new Column(QStringLiteral("labels")); 0545 labelsColumn->setColumnMode(AbstractColumn::ColumnMode::Text); 0546 0547 // sort the frequencies and the accompanying labels and calculate the total sum of frequencies 0548 const auto& frequencies = m_column->frequencies(); 0549 auto i = frequencies.constBegin(); 0550 QVector<QPair<QString, int>> pairs; 0551 int row = 0; 0552 int totalSumOfFrequencies = 0; 0553 while (i != frequencies.constEnd()) { 0554 pairs << QPair<QString, int>(i.key(), i.value()); 0555 xData[row] = 0.5 + row; 0556 totalSumOfFrequencies += i.value(); 0557 ++row; 0558 ++i; 0559 } 0560 0561 std::sort(pairs.begin(), pairs.end(), [](QPair<QString, int> a, QPair<QString, int> b) { 0562 return a.second > b.second; 0563 }); 0564 0565 QVector<int> data; 0566 QVector<QString> labels; 0567 for (const auto& pair : pairs) { 0568 labels << pair.first; 0569 data << pair.second; 0570 } 0571 0572 // calculate the cummulative values 0573 int sum = 0; 0574 row = 0; 0575 for (auto value : data) { 0576 sum += value; 0577 if (totalSumOfFrequencies != 0) 0578 yData[row] = (double)sum / totalSumOfFrequencies * 100; 0579 ++row; 0580 } 0581 0582 dataColumn->replaceInteger(0, data); 0583 labelsColumn->replaceTexts(0, labels); 0584 xColumn->setValues(xData); 0585 yColumn->setValues(yData); 0586 0587 QVector<const AbstractColumn*> columns; 0588 columns << dataColumn; 0589 barPlot->setDataColumns(columns); 0590 0591 // add cumulated percentage curve 0592 auto* curve = new XYCurve(QStringLiteral("curve")); 0593 curve->setCoordinateSystemIndex(1); // asign to the second y-range going from 0 to 100% 0594 curve->setXColumn(xColumn); 0595 curve->setYColumn(yColumn); 0596 curve->line()->setStyle(Qt::SolidLine); 0597 curve->symbol()->setStyle(Symbol::Style::Circle); 0598 plot->addChild(curve); 0599 curve->setValuesType(XYCurve::ValuesType::Y); 0600 curve->setValuesPosition(XYCurve::ValuesPosition::Right); 0601 curve->setValuesDistance(Worksheet::convertToSceneUnits(10, Worksheet::Unit::Point)); 0602 curve->setValuesSuffix(QStringLiteral("%")); 0603 0604 // resize the first y range to have the first point of the xy-curve at the top of the first bar 0605 if (yData.at(0) != 0) { 0606 const double max = (double)data.at(0) * 100. / yData.at(0); 0607 plot->setMax(Dimension::Y, 0, max); 0608 } 0609 0610 // axes properties 0611 auto axes = plot->children<Axis>(); 0612 bool firstYAxis = false; 0613 for (auto* axis : qAsConst(axes)) { 0614 if (axis->orientation() == Axis::Orientation::Horizontal) { 0615 axis->title()->setText(QString()); 0616 axis->majorGridLine()->setStyle(Qt::NoPen); 0617 axis->setMajorTicksStartType(Axis::TicksStartType::Offset); 0618 axis->setMajorTickStartOffset(0.5); 0619 axis->setMajorTicksType(Axis::TicksType::Spacing); 0620 axis->setMajorTicksSpacing(1.); 0621 axis->setLabelsTextType(Axis::LabelsTextType::CustomValues); 0622 axis->setLabelsTextColumn(labelsColumn); 0623 } else { 0624 if (!firstYAxis) { 0625 axis->title()->setText(i18n("Frequency")); 0626 axis->setTitleOffsetX(Worksheet::convertToSceneUnits(-5, Worksheet::Unit::Point)); 0627 axis->setMajorTicksNumber(10 + 1); // same tick number as percentage axis 0628 firstYAxis = true; 0629 } else { 0630 axis->title()->setText(i18n("Cumulative Percentage")); 0631 // TODO: work with the same offset as for the first axis after https://invent.kde.org/education/labplot/-/issues/368 was adressed 0632 axis->setTitleOffsetX(Worksheet::convertToSceneUnits(1.8, Worksheet::Unit::Centimeter)); 0633 } 0634 } 0635 0636 axis->setMinorTicksDirection(Axis::noTicks); 0637 axis->setArrowType(Axis::ArrowType::NoArrow); 0638 } 0639 0640 plot->retransform(); 0641 m_paretoPlotInitialized = true; 0642 } 0643 0644 CartesianPlot* StatisticsColumnWidget::addPlot(QWidget* widget) { 0645 auto* ws = new Worksheet(QString()); 0646 ws->setUseViewSize(true); 0647 ws->setLayoutTopMargin(0.); 0648 ws->setLayoutBottomMargin(0.); 0649 ws->setLayoutLeftMargin(0.); 0650 ws->setLayoutRightMargin(0.); 0651 m_project->addChild(ws); 0652 0653 auto* plot = new CartesianPlot(QString()); 0654 plot->setSuppressRetransform(true); 0655 plot->setType(CartesianPlot::Type::TwoAxes); 0656 plot->setSymmetricPadding(false); 0657 const double padding = Worksheet::convertToSceneUnits(1.0, Worksheet::Unit::Centimeter); 0658 plot->setRightPadding(padding); 0659 plot->setVerticalPadding(padding); 0660 plot->plotArea()->borderLine()->setStyle(Qt::NoPen); 0661 0662 ws->addChild(plot); 0663 plot->setSuppressRetransform(false); 0664 0665 auto* layout = new QVBoxLayout(widget); 0666 layout->setSpacing(0); 0667 layout->addWidget(ws->view()); 0668 ws->setInteractive(false); 0669 widget->setLayout(layout); 0670 0671 return plot; 0672 } 0673 0674 // helpers 0675 const QString StatisticsColumnWidget::isNanValue(const double value) const { 0676 return (std::isnan(value) ? QLatin1String("-") : QLocale().toString(value, 'f')); 0677 } 0678 0679 QString StatisticsColumnWidget::modeValue(const Column* column, double value) const { 0680 if (std::isnan(value)) 0681 return QLatin1String("-"); 0682 0683 const auto numberLocale = QLocale(); 0684 switch (column->columnMode()) { 0685 case AbstractColumn::ColumnMode::Integer: 0686 return numberLocale.toString((int)value); 0687 case AbstractColumn::ColumnMode::BigInt: 0688 return numberLocale.toString((qint64)value); 0689 case AbstractColumn::ColumnMode::Text: 0690 // TODO 0691 case AbstractColumn::ColumnMode::DateTime: 0692 // TODO 0693 case AbstractColumn::ColumnMode::Day: 0694 // TODO 0695 case AbstractColumn::ColumnMode::Month: 0696 // TODO 0697 case AbstractColumn::ColumnMode::Double: 0698 return numberLocale.toString(value, 'f'); 0699 } 0700 0701 return {}; 0702 } 0703 0704 /*! 0705 * copy the non-nan and not masked values of the current column 0706 * into the vector \c data. 0707 */ 0708 void StatisticsColumnWidget::copyValidData(QVector<double>& data) const { 0709 const int rowCount = m_column->rowCount(); 0710 data.reserve(rowCount); 0711 double val; 0712 if (m_column->columnMode() == AbstractColumn::ColumnMode::Double) { 0713 auto* rowValues = reinterpret_cast<QVector<double>*>(m_column->data()); 0714 for (int row = 0; row < rowCount; ++row) { 0715 val = rowValues->value(row); 0716 if (std::isnan(val) || m_column->isMasked(row)) 0717 continue; 0718 0719 data.push_back(val); 0720 } 0721 } else if (m_column->columnMode() == AbstractColumn::ColumnMode::Integer) { 0722 auto* rowValues = reinterpret_cast<QVector<int>*>(m_column->data()); 0723 for (int row = 0; row < rowCount; ++row) { 0724 val = rowValues->value(row); 0725 if (std::isnan(val) || m_column->isMasked(row)) 0726 continue; 0727 0728 data.push_back(val); 0729 } 0730 } else if (m_column->columnMode() == AbstractColumn::ColumnMode::BigInt) { 0731 auto* rowValues = reinterpret_cast<QVector<qint64>*>(m_column->data()); 0732 for (int row = 0; row < rowCount; ++row) { 0733 val = rowValues->value(row); 0734 if (std::isnan(val) || m_column->isMasked(row)) 0735 continue; 0736 0737 data.push_back(val); 0738 } 0739 } 0740 0741 if (data.size() < rowCount) 0742 data.squeeze(); 0743 }