File indexing completed on 2024-05-12 05:10:12

0001 /***************************************************************************
0002     Copyright (C) 2022 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "marcimporter.h"
0026 #include "../translators/xslthandler.h"
0027 #include "../translators/tellicoimporter.h"
0028 #include "../core/filehandler.h"
0029 #include "../utils/datafileregistry.h"
0030 #include "../tellico_debug.h"
0031 
0032 #include <KLocalizedString>
0033 #include <KComboBox>
0034 #include <KSharedConfig>
0035 #include <KConfigGroup>
0036 
0037 #include <QProcess>
0038 #include <QStandardPaths>
0039 #include <QGroupBox>
0040 #include <QVBoxLayout>
0041 #include <QFormLayout>
0042 
0043 using Tellico::Import::MarcImporter;
0044 
0045 MarcImporter::MarcImporter(const QUrl& url_) : Tellico::Import::Importer(url_)
0046     , m_coll(nullptr)
0047     , m_cancelled(false)
0048     , m_isUnimarc(false)
0049     , m_MARCHandler(nullptr)
0050     , m_MODSHandler(nullptr)
0051     , m_widget(nullptr)
0052     , m_charSetCombo(nullptr)
0053     , m_marcFormatCombo(nullptr) {
0054 }
0055 
0056 MarcImporter::~MarcImporter() {
0057   delete m_MARCHandler;
0058   m_MARCHandler = nullptr;
0059   delete m_MODSHandler;
0060   m_MODSHandler = nullptr;
0061 }
0062 
0063 bool MarcImporter::canImport(int type) const {
0064   return type == Data::Collection::Book || type == Data::Collection::Bibtex;
0065 }
0066 
0067 Tellico::Data::CollPtr MarcImporter::collection() {
0068   if(m_coll) {
0069     return m_coll;
0070   }
0071 
0072   m_marcdump = QStandardPaths::findExecutable(QStringLiteral("yaz-marcdump"));
0073   if(m_marcdump.isEmpty()) {
0074     myDebug() << "Could not find yaz-marcdump executable";
0075     return Data::CollPtr();
0076   }
0077 
0078   if(urls().count() > 1) {
0079     myDebug() << "MarcImporter only importing first file";
0080   }
0081 
0082   const QUrl url = this->url();
0083   if(url.isEmpty() || !url.isLocalFile()) {
0084     myDebug() << "MarcImporter can only read local files";
0085     return Data::CollPtr();
0086   }
0087 
0088   if(m_widget) {
0089     m_marcCharSet = m_charSetCombo->currentText().toUpper();
0090     QStringList charSets;
0091     for(int i = 0; i < m_charSetCombo->count(); ++i) {
0092       charSets += m_charSetCombo->itemText(i).toUpper();
0093     }
0094     charSets += m_marcCharSet;
0095     charSets.removeDuplicates();
0096     KConfigGroup config(KSharedConfig::openConfig(), QStringLiteral("ImportOptions - MARC"));
0097     config.writeEntry("CharacterSets", charSets);
0098     config.writeEntry("Last Character Set", m_marcCharSet);
0099 
0100     const QString format = m_marcFormatCombo->currentText();
0101     m_isUnimarc = format == QLatin1String("UNIMARC");
0102     config.writeEntry("Format", format);
0103   }
0104   if(m_marcCharSet.isEmpty()) {
0105     m_marcCharSet = QStringLiteral("UTF-8");
0106   }
0107   QStringList dumpArgs = { QStringLiteral("-f"),
0108                            m_marcCharSet,
0109                            QStringLiteral("-t"),
0110                            QStringLiteral("utf-8"),
0111                            QStringLiteral("-o"),
0112                            QStringLiteral("marcxml"),
0113                            url.toLocalFile()
0114   };
0115   QProcess dumpProc;
0116   dumpProc.start(m_marcdump, dumpArgs);
0117   if(!dumpProc.waitForStarted() || !dumpProc.waitForFinished()) {
0118     myDebug() << "yaz-marcdump failed to start or finish";
0119     myDebug() << "arguments:" << dumpArgs;
0120     return Data::CollPtr();
0121   }
0122 
0123   const QByteArray marcxml = dumpProc.readAllStandardOutput();
0124   if(!initMARCHandler() || !initMODSHandler()) {
0125     return Data::CollPtr();
0126   }
0127   if(m_cancelled) {
0128     return Data::CollPtr();
0129   }
0130   // reading a non-MARC file results in "<!-- Skipping bad byte"
0131   if(marcxml.isEmpty() || marcxml.startsWith("<!--")) {
0132     setStatusMessage(i18n("Tellico was unable to read any data."));
0133     return Data::CollPtr();
0134   }
0135 
0136   const QString mods = m_MARCHandler->applyStylesheet(QString::fromUtf8(marcxml));
0137   const QString output = m_MODSHandler->applyStylesheet(mods);
0138   Import::TellicoImporter imp(output);
0139   imp.setOptions(imp.options() ^ Import::ImportProgress); // no progress needed
0140   m_coll = imp.collection();
0141   return m_coll;
0142 }
0143 
0144 QWidget* MarcImporter::widget(QWidget* parent_) {
0145   if(m_widget) {
0146     return m_widget;
0147   }
0148   m_widget = new QWidget(parent_);
0149   QVBoxLayout* l = new QVBoxLayout(m_widget);
0150 
0151   QGroupBox* gbox = new QGroupBox(i18n("MARC Options"), m_widget);
0152   QFormLayout* lay = new QFormLayout(gbox);
0153 
0154   m_charSetCombo = new KComboBox(gbox);
0155   m_charSetCombo->setEditable(true);
0156   lay->addRow(i18n("Character set:"), m_charSetCombo);
0157 
0158   m_marcFormatCombo = new KComboBox(gbox);
0159   m_marcFormatCombo->addItem(QStringLiteral("MARC21"));
0160   m_marcFormatCombo->addItem(QStringLiteral("UNIMARC"));
0161   lay->addRow(i18n("MARC Format:"), m_marcFormatCombo);
0162 
0163   l->addWidget(gbox);
0164   l->addStretch(1);
0165 
0166   // now read config options
0167   KConfigGroup config(KSharedConfig::openConfig(), QStringLiteral("ImportOptions - MARC"));
0168   QStringList charSets = config.readEntry("Character Sets", QStringList());
0169   if(charSets.isEmpty()) {
0170     charSets += QStringLiteral("UTF-8");
0171     charSets += QStringLiteral("ISO-8859-1");
0172   }
0173 #if (QT_VERSION < QT_VERSION_CHECK(5, 11, 0))
0174   auto textWidth = parent_->fontMetrics().width(charSets.last());
0175 #else
0176   auto textWidth = parent_->fontMetrics().horizontalAdvance(charSets.last());
0177 #endif
0178   m_charSetCombo->setMinimumWidth(1.5*textWidth);
0179   QString lastCharSet = config.readEntry("Last Character Set");
0180   if(!lastCharSet.isEmpty()) {
0181     if(!charSets.contains(lastCharSet)) charSets += lastCharSet;
0182     m_charSetCombo->setCurrentText(lastCharSet);
0183   }
0184   m_charSetCombo->addItems(charSets);
0185   const QString marcFormat = config.readEntry("Format");
0186   if(!marcFormat.isEmpty()) {
0187     m_marcFormatCombo->setCurrentText(marcFormat);
0188   }
0189   return m_widget;
0190 }
0191 
0192 void MarcImporter::setCharacterSet(const QString& charSet_) {
0193   m_marcCharSet = charSet_;
0194   if(m_widget) {
0195     m_charSetCombo->setEditText(charSet_);
0196   }
0197 }
0198 
0199 void MarcImporter::slotCancel() {
0200   m_cancelled = true;
0201 }
0202 
0203 bool MarcImporter::initMARCHandler() {
0204   if(m_MARCHandler) {
0205     return true;
0206   }
0207 
0208   QString xsltName = m_isUnimarc ? QStringLiteral("UNIMARC2MODS3.xsl")
0209                                  : QStringLiteral("MARC21slim2MODS3.xsl");
0210   QString xsltfile = DataFileRegistry::self()->locate(xsltName);
0211   if(xsltfile.isEmpty()) {
0212     myWarning() << "can not locate" << xsltName;
0213     return false;
0214   }
0215 
0216   m_MARCHandler = new XSLTHandler(QUrl::fromLocalFile(xsltfile));
0217   if(!m_MARCHandler->isValid()) {
0218     myWarning() << "error in MARC21slim2MODS3.xsl.";
0219     delete m_MARCHandler;
0220     m_MARCHandler = nullptr;
0221     return false;
0222   }
0223   return true;
0224 }
0225 
0226 bool MarcImporter::initMODSHandler() {
0227   if(m_MODSHandler) {
0228     return true;
0229   }
0230 
0231   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("mods2tellico.xsl"));
0232   if(xsltfile.isEmpty()) {
0233     myWarning() << "can not locate mods2tellico.xsl.";
0234     return false;
0235   }
0236 
0237   m_MODSHandler = new XSLTHandler(QUrl::fromLocalFile(xsltfile));
0238   if(!m_MODSHandler->isValid()) {
0239     myWarning() << "error in mods2tellico.xsl.";
0240     delete m_MODSHandler;
0241     m_MODSHandler = nullptr;
0242     return false;
0243   }
0244   return true;
0245 }