File indexing completed on 2024-04-28 11:20:47

0001 /*
0002     SPDX-License-Identifier: GPL-2.0-or-later
0003     SPDX-FileCopyrightText: 2019 Sirgienko Nikita <warquark@gmail.com>
0004 */
0005 
0006 #include "jupyterutils.h"
0007 #include "backend.h"
0008 
0009 #include <tuple>
0010 
0011 #include <QJsonValue>
0012 #include <QJsonObject>
0013 #include <QJsonArray>
0014 #include <QJsonDocument>
0015 #include <QSet>
0016 #include <QImageReader>
0017 #include <QImageWriter>
0018 #include <QBuffer>
0019 #include <QString>
0020 #include <QUrl>
0021 #include <QTemporaryFile>
0022 
0023 using namespace Cantor;
0024 
0025 const QString JupyterUtils::cellsKey = QLatin1String("cells");
0026 const QString JupyterUtils::metadataKey = QLatin1String("metadata");
0027 const QString JupyterUtils::cantorMetadataKey = QLatin1String("cantor");
0028 const QString JupyterUtils::nbformatKey = QLatin1String("nbformat");
0029 const QString JupyterUtils::nbformatMinorKey = QLatin1String("nbformat_minor");
0030 const QString JupyterUtils::cellTypeKey = QLatin1String("cell_type");
0031 const QString JupyterUtils::sourceKey = QLatin1String("source");
0032 const QString JupyterUtils::outputTypeKey = QLatin1String("output_type");
0033 const QString JupyterUtils::executionCountKey = QLatin1String("execution_count");
0034 const QString JupyterUtils::outputsKey = QLatin1String("outputs");
0035 const QString JupyterUtils::dataKey = QLatin1String("data");
0036 
0037 const QString JupyterUtils::pngMime = QLatin1String("image/png");
0038 const QString JupyterUtils::gifMime = QLatin1String("image/gif");
0039 const QString JupyterUtils::textMime = QLatin1String("text/plain");
0040 const QString JupyterUtils::htmlMime = QLatin1String("text/html");
0041 const QString JupyterUtils::latexMime = QLatin1String("text/latex");
0042 const QString JupyterUtils::svgMime = QLatin1String("image/svg+xml");
0043 
0044 const QMimeDatabase JupyterUtils::mimeDatabase;
0045 
0046 QJsonValue JupyterUtils::toJupyterMultiline(const QString& source)
0047 {
0048     if (source.contains(QLatin1Char('\n')))
0049     {
0050         QJsonArray text;
0051         const QStringList& lines = source.split(QLatin1Char('\n'));
0052         for (int i = 0; i < lines.size(); i++)
0053         {
0054             QString line = lines[i];
0055             // Don't add \n to last line
0056             if (i != lines.size() - 1)
0057                 line.append(QLatin1Char('\n'));
0058             // Ignore last line, if it is an empty line
0059             else if (line.isEmpty())
0060                 break;
0061 
0062             text.append(line);
0063         }
0064         return text;
0065     }
0066     else
0067         return QJsonArray::fromStringList(QStringList(source));
0068 }
0069 
0070 QString JupyterUtils::fromJupyterMultiline(const QJsonValue& source)
0071 {
0072     QString code;
0073     if (source.isString())
0074         code = source.toString();
0075     else if (source.isArray())
0076         for (const QJsonValue& line : source.toArray())
0077             code += line.toString();
0078     return code;
0079 }
0080 
0081 bool JupyterUtils::isJupyterNotebook(const QJsonDocument& doc)
0082 {
0083     static const QSet<QString> notebookScheme
0084         = QSet<QString>::fromList({cellsKey, metadataKey, nbformatKey, nbformatMinorKey});
0085 
0086     bool isNotebook =
0087             doc.isObject()
0088         && QSet<QString>::fromList(doc.object().keys()) == notebookScheme
0089         && doc.object().value(cellsKey).isArray()
0090         && doc.object().value(metadataKey).isObject()
0091         && doc.object().value(nbformatKey).isDouble()
0092         && doc.object().value(nbformatMinorKey).isDouble();
0093 
0094     return isNotebook;
0095 }
0096 
0097 bool JupyterUtils::isJupyterCell(const QJsonValue& cell)
0098 {
0099     bool isCell =
0100            cell.isObject()
0101         && cell.toObject().value(cellTypeKey).isString()
0102         &&
0103         (    cell.toObject().value(cellTypeKey).toString() == QLatin1String("markdown")
0104           || cell.toObject().value(cellTypeKey).toString() == QLatin1String("code")
0105           || cell.toObject().value(cellTypeKey).toString() == QLatin1String("raw")
0106         )
0107         && cell.toObject().value(metadataKey).isObject()
0108         &&
0109         (    cell.toObject().value(sourceKey).isString()
0110           || cell.toObject().value(sourceKey).isArray()
0111         );
0112 
0113     return isCell;
0114 }
0115 
0116 bool JupyterUtils::isJupyterOutput(const QJsonValue& output)
0117 {
0118     bool isOutput =
0119            output.isObject()
0120         && output.toObject().value(outputTypeKey).isString()
0121         &&
0122         (    output.toObject().value(outputTypeKey).toString() == QLatin1String("stream")
0123           || output.toObject().value(outputTypeKey).toString() == QLatin1String("display_data")
0124           || output.toObject().value(outputTypeKey).toString() == QLatin1String("execute_result")
0125           || output.toObject().value(outputTypeKey).toString() == QLatin1String("error")
0126         );
0127 
0128     return isOutput;
0129 }
0130 
0131 bool JupyterUtils::isJupyterTextOutput(const QJsonValue& output)
0132 {
0133     return
0134            isJupyterOutput(output)
0135         && output.toObject().value(outputTypeKey).toString() == QLatin1String("stream")
0136         && output.toObject().value(QLatin1String("name")).isString()
0137         && output.toObject().value(QLatin1String("text")).isArray();
0138 }
0139 
0140 bool JupyterUtils::isJupyterErrorOutput(const QJsonValue& output)
0141 {
0142     return
0143            isJupyterOutput(output)
0144         && output.toObject().value(outputTypeKey).toString() == QLatin1String("error")
0145         && output.toObject().value(QLatin1String("ename")).isString()
0146         && output.toObject().value(QLatin1String("evalue")).isString()
0147         && output.toObject().value(QLatin1String("traceback")).isArray();
0148 }
0149 
0150 bool JupyterUtils::isJupyterExecutionResult(const QJsonValue& output)
0151 {
0152     return
0153            isJupyterOutput(output)
0154         && output.toObject().value(outputTypeKey).toString() == QLatin1String("execute_result")
0155         && output.toObject().value(QLatin1String("execution_count")).isDouble()
0156         && output.toObject().value(metadataKey).isObject()
0157         && output.toObject().value(QLatin1String("data")).isObject();
0158 }
0159 
0160 bool JupyterUtils::isJupyterDisplayOutput(const QJsonValue& output)
0161 {
0162     return
0163            isJupyterOutput(output)
0164         && output.toObject().value(outputTypeKey).toString() == QLatin1String("display_data")
0165         && output.toObject().value(metadataKey).isObject()
0166         && output.toObject().value(QLatin1String("data")).isObject();
0167 }
0168 
0169 bool JupyterUtils::isMarkdownCell(const QJsonValue& cell)
0170 {
0171     return isJupyterCell(cell) && getCellType(cell.toObject()) == QLatin1String("markdown");
0172 }
0173 
0174 bool JupyterUtils::isCodeCell(const QJsonValue& cell)
0175 {
0176     return
0177            isJupyterCell(cell)
0178         && getCellType(cell.toObject()) == QLatin1String("code")
0179         &&
0180         (      cell.toObject().value(executionCountKey).isDouble()
0181             || cell.toObject().value(executionCountKey).isNull()
0182         )
0183         && cell.toObject().value(outputsKey).isArray();
0184 }
0185 
0186 bool JupyterUtils::isRawCell(const QJsonValue& cell)
0187 {
0188     return isJupyterCell(cell) && getCellType(cell.toObject()) == QLatin1String("raw");
0189 }
0190 
0191 QJsonObject JupyterUtils::getMetadata(const QJsonObject& object)
0192 {
0193     return object.value(metadataKey).toObject();
0194 }
0195 
0196 QJsonArray JupyterUtils::getCells(const QJsonObject notebook)
0197 {
0198     return notebook.value(cellsKey).toArray();
0199 }
0200 
0201 std::tuple<int, int> JupyterUtils::getNbformatVersion(const QJsonObject& notebook)
0202 {
0203     int nbformatMajor = notebook.value(nbformatKey).toInt();
0204     int nbformatMinor = notebook.value(nbformatMinorKey).toInt();
0205 
0206     return {nbformatMajor, nbformatMinor};
0207 }
0208 
0209 QString JupyterUtils::getCellType(const QJsonObject& cell)
0210 {
0211     return cell.value(cellTypeKey).toString();
0212 }
0213 
0214 QString JupyterUtils::getSource(const QJsonObject& cell)
0215 {
0216     return fromJupyterMultiline(cell.value(sourceKey));
0217 }
0218 
0219 void JupyterUtils::setSource(QJsonObject& cell, const QString& source)
0220 {
0221     cell.insert(sourceKey, toJupyterMultiline(source));
0222 }
0223 
0224 QString JupyterUtils::getOutputType(const QJsonObject& output)
0225 {
0226     return output.value(outputTypeKey).toString();
0227 }
0228 
0229 QJsonObject JupyterUtils::getCantorMetadata(const QJsonObject object)
0230 {
0231     return getMetadata(object).value(cantorMetadataKey).toObject();
0232 }
0233 
0234 QString JupyterUtils::getKernelName(const QJsonValue& kernelspecValue)
0235 {
0236     QString name;
0237 
0238     if (kernelspecValue.isObject())
0239     {
0240         const QJsonObject& kernelspec = kernelspecValue.toObject();
0241         QString kernelName = kernelspec.value(QLatin1String("name")).toString();
0242         if (!kernelName.isEmpty())
0243         {
0244             if (kernelName.startsWith(QLatin1String("julia")))
0245                 kernelName = QLatin1String("julia");
0246             else if (kernelName == QLatin1String("sagemath"))
0247                 kernelName = QLatin1String("sage");
0248             else if (kernelName == QLatin1String("ir"))
0249                 kernelName = QLatin1String("r");
0250             name = kernelName;
0251         }
0252         else
0253         {
0254             name = kernelspec.value(QLatin1String("language")).toString();
0255         }
0256     }
0257 
0258     return name;
0259 }
0260 
0261 QJsonObject JupyterUtils::getKernelspec(const Cantor::Backend* backend)
0262 {
0263     QJsonObject kernelspec;
0264 
0265     if (backend)
0266     {
0267         QString id = backend->id();
0268 
0269         if (id == QLatin1String("sage"))
0270             id = QLatin1String("sagemath");
0271         else if (id == QLatin1String("r"))
0272             id = QLatin1String("ir");
0273 
0274         kernelspec.insert(QLatin1String("name"), id);
0275 
0276         QString lang = backend->id();
0277         if (lang.startsWith(QLatin1String("python")))
0278             lang = QLatin1String("python");
0279         lang[0] = lang[0].toUpper();
0280 
0281         kernelspec.insert(QLatin1String("language"), lang);
0282 
0283         kernelspec.insert(QLatin1String("display_name"), backend->name());
0284     }
0285 
0286     return kernelspec;
0287 }
0288 
0289 QImage JupyterUtils::loadImage(const QJsonValue& mimeBundle, const QString& key)
0290 {
0291     QImage image;
0292 
0293     if (mimeBundle.isObject())
0294     {
0295         const QJsonObject& bundleObject = mimeBundle.toObject();
0296         const QJsonValue& data = bundleObject.value(key);
0297         if (data.isString() || data.isArray())
0298         {
0299             // In jupyter mime-bundle key for data is mime type of this data
0300             // So we need convert mimetype to format, for example "image/png" to "png"
0301             // for loading from data
0302             if (QImageReader::supportedMimeTypes().contains(key.toLatin1()))
0303             {
0304                 const QByteArray& format = mimeDatabase.mimeTypeForName(key).preferredSuffix().toLatin1();
0305                 // Handle svg separately, because Jupyter don't encode svg in base64
0306                 // and store as jupyter multiline text
0307                 if (key == QLatin1String("image/svg+xml") && data.isArray())
0308                 {
0309                     image.loadFromData(fromJupyterMultiline(data).toLatin1(), format.data());
0310                 }
0311                 else if (data.isString())
0312                 {
0313                     // https://doc.qt.io/qt-5/qimagereader.html#supportedImageFormats
0314                     // Maybe there is a better way to convert image key to image format
0315                     // but this is all that I could to do
0316                     const QString& base64 = data.toString();
0317                     image.loadFromData(QByteArray::fromBase64(base64.toLatin1()), format.data());
0318                 }
0319             }
0320         }
0321     }
0322 
0323     return image;
0324 }
0325 
0326 QJsonObject JupyterUtils::packMimeBundle(const QImage& image, const QString& mime)
0327 {
0328     QJsonObject mimeBundle;
0329 
0330     if (QImageWriter::supportedMimeTypes().contains(mime.toLatin1()))
0331     {
0332         const QByteArray& format = mimeDatabase.mimeTypeForName(mime).preferredSuffix().toLatin1();
0333 
0334         QByteArray ba;
0335         QBuffer buffer(&ba);
0336         buffer.open(QIODevice::WriteOnly);
0337         image.save(&buffer, format.data());
0338         mimeBundle.insert(mime, QString::fromLatin1(ba.toBase64()));
0339     }
0340 
0341     return mimeBundle;
0342 }
0343 
0344 QStringList JupyterUtils::imageKeys(const QJsonValue& mimeBundle)
0345 {
0346     QStringList imageKeys;
0347 
0348     if (mimeBundle.isObject())
0349     {
0350         const QStringList& keys = mimeBundle.toObject().keys();
0351         const QList<QByteArray>& mimes = QImageReader::supportedMimeTypes();
0352         for (const QString& key : keys)
0353             if (mimes.contains(key.toLatin1()))
0354                 imageKeys.append(key);
0355     }
0356 
0357     return imageKeys;
0358 }
0359 
0360 QString JupyterUtils::firstImageKey(const QJsonValue& mimeBundle)
0361 {
0362     const QStringList& keys = imageKeys(mimeBundle);
0363     return keys.size() >= 1 ? keys[0] : QString();
0364 }
0365 
0366 QString JupyterUtils::mainBundleKey(const QJsonValue& mimeBundle)
0367 {
0368     QString mainKey;
0369 
0370     if (mimeBundle.isObject())
0371     {
0372         const QStringList& keys = mimeBundle.toObject().keys();
0373         if (keys.size() == 1)
0374             mainKey = keys[0];
0375         else if (keys.size() == 2)
0376         {
0377             int idx = keys.indexOf(textMime);
0378             if (idx != -1)
0379                 // Getting not 'text/plain' key, because often it's just a caption
0380                 mainKey = keys[1 - idx];
0381             else
0382                 // Not sure, that this is valid, but return first keys
0383                 mainKey = keys[0];
0384         }
0385         else if (keys.size() > 2)
0386         {
0387             // Also not sure about it
0388             // Specification is not very clean on cases, such that
0389             // Just in case, if we will have duplications of information
0390             // Something like keys == {'image/png', 'image/bmp', 'text/plain'}
0391             // Or something like keys == {'text/html', 'text/latex', 'text/plain'}
0392             // Set priority for html->latex->plain (in this order)
0393             if (keys.contains(htmlMime))
0394                 mainKey = htmlMime;
0395             else if (keys.contains(latexMime))
0396                 mainKey = latexMime;
0397             else if (keys.contains(textMime))
0398                 mainKey = textMime;
0399             else
0400             {
0401                 // Search for image keys, if no
0402                 // then just use first key
0403                 mainKey = firstImageKey(mimeBundle);
0404                 if (mainKey.isEmpty())
0405                     mainKey = keys[0];
0406             }
0407         }
0408     }
0409 
0410     return mainKey;
0411 }
0412 
0413 bool JupyterUtils::isGifHtml(const QJsonValue& html)
0414 {
0415     return html.isString()
0416         && html.toString().startsWith(QLatin1String("<img src=\"data:image/gif;base64,"))
0417         && html.toString().endsWith(QLatin1String("/>"));
0418 }
0419 
0420 QUrl JupyterUtils::loadGifHtml(const QJsonValue& html)
0421 {
0422     QUrl gif;
0423 
0424     if (html.isString())
0425     {
0426         QString data = html.toString();
0427         data.remove(0, QString::fromLatin1("<img src=\"data:image/gif;base64,").size());
0428         data.chop(QString::fromLatin1("/>").size());
0429         const QByteArray& bytes = QByteArray::fromBase64(data.toLatin1());
0430 
0431         QTemporaryFile file;
0432         file.setAutoRemove(false);
0433         file.open();
0434         file.write(bytes);
0435         file.close();
0436 
0437         gif = QUrl::fromLocalFile(file.fileName());
0438     }
0439 
0440     return gif;
0441 }