File indexing completed on 2024-04-28 16:32:00

0001 /***************************************************************************
0002     Copyright (C) 2009-2020 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "fieldformat.h"
0026 #include "config/tellico_config.h"
0027 
0028 using Tellico::FieldFormat;
0029 
0030 QString FieldFormat::delimiterString() {
0031   static QString ds(QStringLiteral("; "));
0032   return ds;
0033 }
0034 
0035 QRegularExpression FieldFormat::delimiterRegularExpression() {
0036   static const QRegularExpression drx(QStringLiteral("\\s*;\\s*"));
0037   return drx;
0038 }
0039 
0040 QRegularExpression FieldFormat::commaSplitRegularExpression() {
0041   static const QRegularExpression commaSplitRx(QStringLiteral("\\s*,\\s*"));
0042   return commaSplitRx;
0043 }
0044 
0045 QString FieldFormat::fixupValue(const QString& value_) {
0046   QString value = value_;
0047   value.replace(delimiterRegularExpression(), delimiterString());
0048   return value;
0049 }
0050 
0051 QString FieldFormat::columnDelimiterString() {
0052   static QString cds(QStringLiteral("::"));
0053   return cds;
0054 }
0055 
0056 QString FieldFormat::rowDelimiterString() {
0057   return QChar(0x2028);
0058 }
0059 
0060 QString FieldFormat::matchValueRegularExpression(const QString& value_) {
0061   // The regular expression accounts for values serialized either with multiple values,
0062   // values in table columns, or values in table rows
0063   // Beginning characters don't have to include the column delimiter since the filter
0064   // only matches values in the first column
0065   static const QString beginChars = FieldFormat::delimiterString()
0066                                   + QLatin1String("|")
0067                                   + FieldFormat::rowDelimiterString();
0068   static const QString endChars = QLatin1String("[")
0069                                 + FieldFormat::delimiterString().at(0)
0070                                 + FieldFormat::columnDelimiterString().at(0)
0071                                 + FieldFormat::rowDelimiterString().at(0)
0072                                 + QLatin1String("]");
0073   return QLatin1String("(^|") + beginChars + QLatin1String(")") +
0074          QRegularExpression::escape(value_) +
0075          QLatin1String("($|") + endChars + QLatin1String(")");
0076 }
0077 
0078 QStringList FieldFormat::splitValue(const QString& string_, SplitParsing parsing_) {
0079   if(string_.isEmpty()) {
0080     return QStringList();
0081   }
0082 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0083   return parsing_ == StringSplit ? string_.split(delimiterString(), QString::KeepEmptyParts)
0084                                  : string_.split(delimiterRegularExpression(), QString::KeepEmptyParts);
0085 #else
0086   return parsing_ == StringSplit ? string_.split(delimiterString(), Qt::KeepEmptyParts)
0087                                  : string_.split(delimiterRegularExpression(), Qt::KeepEmptyParts);
0088 #endif
0089 }
0090 
0091 QStringList FieldFormat::splitRow(const QString& string_) {
0092 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0093   return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), QString::KeepEmptyParts);
0094 #else
0095   return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), Qt::KeepEmptyParts);
0096 #endif
0097 }
0098 
0099 QStringList FieldFormat::splitTable(const QString& string_) {
0100 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0101   return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), QString::KeepEmptyParts);
0102 #else
0103   return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), Qt::KeepEmptyParts);
0104 #endif
0105 }
0106 
0107 QString FieldFormat::sortKeyTitle(const QString& title_) {
0108   foreach(const QString& article, Config::articleList()) {
0109     // assume white space is already stripped
0110     // the articles are already in lower-case
0111     if(title_.startsWith(article + QLatin1Char(' '))) {
0112       return title_.mid(article.length() + 1);
0113     }
0114   }
0115   // check apostrophes, too
0116   foreach(const QString& article, Config::articleAposList()) {
0117     if(title_.startsWith(article)) {
0118       return title_.mid(article.length());
0119     }
0120   }
0121   return title_;
0122 }
0123 
0124 void FieldFormat::stripArticles(QString& value) {
0125   static QStringList oldArticleList;
0126   static QList<QRegularExpression> rxList;
0127   if(oldArticleList != Config::articleList()) {
0128     oldArticleList = Config::articleList();
0129     rxList.clear();
0130     foreach(const QString& article, oldArticleList) {
0131       rxList << QRegularExpression(QLatin1String("\\b") +
0132                                    QRegularExpression::escape(article) +
0133                                    QLatin1String("\\b"));
0134     }
0135   }
0136   foreach(const QRegularExpression& rx, rxList) {
0137     value.remove(rx);
0138   }
0139   value = value.trimmed();
0140   if(value.endsWith(QLatin1Char(','))) {
0141     value.chop(1);
0142   }
0143 }
0144 
0145 QString FieldFormat::format(const QString& value_, Type type_, Request request_) {
0146   if(value_.isEmpty()) {
0147     return value_;
0148   }
0149 
0150   Options options;
0151   if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoCapitalization())) {
0152     options |= FormatCapitalize;
0153   }
0154   if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoFormat())) {
0155     options |= FormatAuto;
0156   }
0157 
0158   QString text;
0159   switch(type_) {
0160     case FormatTitle:
0161       text = title(value_, options);
0162       break;
0163     case FormatName:
0164       text = name(value_, options);
0165       break;
0166     case FormatDate:
0167       text = date(value_);
0168       break;
0169     case FormatPlain:
0170       text = options.testFlag(FormatCapitalize) ? capitalize(value_) : value_;
0171       break;
0172     case FormatNone:
0173       text = value_;
0174       break;
0175   }
0176   return text;
0177 }
0178 
0179 QString FieldFormat::title(const QString& title_, Options opt_) {
0180   QString newTitle = title_;
0181   QString tail;
0182   if(opt_.testFlag(FormatAuto)) {
0183     // special case for multi-column tables, assume user never has column delimiter in a value
0184     const int pos = newTitle.indexOf(columnDelimiterString());
0185     if(pos > -1) {
0186       tail = columnDelimiterString() + newTitle.mid(pos + columnDelimiterString().length());
0187       newTitle = newTitle.left(pos);
0188     }
0189 
0190     // arbitrarily impose rule that a space must follow every comma
0191     // has to come before the capitalization since the space is significant
0192     newTitle.replace(commaSplitRegularExpression(), QStringLiteral(", "));
0193   }
0194 
0195   if(opt_.testFlag(FormatCapitalize)) {
0196     newTitle = capitalize(newTitle);
0197   }
0198 
0199   if(opt_.testFlag(FormatAuto)) {
0200     const QString lower = newTitle.toLower();
0201     // TODO if the title has ",the" at the end, put it at the front
0202     foreach(const QString& article, Config::articleList()) {
0203       // assume white space is already stripped
0204       // the articles are already in lower-case
0205       if(lower.startsWith(article + QLatin1Char(' '))) {
0206         QRegularExpression rx(QLatin1Char('^') + QRegularExpression::escape(article) + QLatin1String("\\s*"),
0207                               QRegularExpression::CaseInsensitiveOption);
0208         // can't just use article since it's in lower-case
0209         QString titleArticle = newTitle.left(article.length());
0210         newTitle = newTitle.remove(rx)
0211                            .append(QLatin1String(", "))
0212                            .append(titleArticle);
0213         break;
0214       }
0215     }
0216   }
0217 
0218   return newTitle + tail;
0219 }
0220 
0221 QString FieldFormat::name(const QString& name_, Options opt_) {
0222   static const QRegularExpression spaceComma(QLatin1String("[\\s,]"));
0223   // the ending look-ahead is so that a space is not added at the end
0224   static const QRegularExpression periodSpace(QLatin1String("\\.\\s*(?=.)"));
0225 
0226   QString name = name_;
0227   name.replace(periodSpace, QStringLiteral(". "));
0228   if(opt_.testFlag(FormatCapitalize)) {
0229     name = capitalize(name);
0230   }
0231 
0232   // split the name by white space and commas
0233 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0234   QStringList words = name.split(spaceComma, QString::SkipEmptyParts);
0235 #else
0236   QStringList words = name.split(spaceComma, Qt::SkipEmptyParts);
0237 #endif
0238   // psycho case where name == ","
0239   if(words.isEmpty()) {
0240     return name;
0241   }
0242 
0243   // if it contains a comma already and the last word is not a suffix, don't format it
0244   if(!opt_.testFlag(FormatAuto) ||
0245       (name.indexOf(QLatin1Char(',')) > -1 && !Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive))) {
0246     // arbitrarily impose rule that no spaces before a comma and
0247     // a single space after every comma
0248     name.replace(commaSplitRegularExpression(), QStringLiteral(", "));
0249   } else if(words.count() > 1) {
0250     // otherwise split it by white space, move the last word to the front
0251     // but only if there is more than one word
0252 
0253     // if the last word is a suffix, it has to be kept with last name
0254     if(Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive)) {
0255       words.prepend(words.last().append(QLatin1Char(',')));
0256       words.removeLast();
0257     }
0258 
0259     // now move the word
0260     // adding comma here when there had been a suffix is because it was originally split with space or comma
0261     words.prepend(words.last().append(QLatin1Char(',')));
0262     words.removeLast();
0263 
0264     // this is probably just something for me, limited to english
0265     // In a previous version of Tellico, using a prefix such as "van der" (with a space) would work
0266     // because QStringList::contains did substring matching, but now need to add a function for tokenizing
0267     // the list with whitespace as well as comma
0268     while(Config::surnamePrefixTokens().contains(words.last(), Qt::CaseInsensitive)) {
0269       words.prepend(words.last());
0270       words.removeLast();
0271     }
0272 
0273     name = words.join(QLatin1String(" "));
0274   }
0275 
0276   return name;
0277 }
0278 
0279 QString FieldFormat::date(const QString& date_) {
0280   // internally, this is "year-month-day"
0281   // any of the three may be empty
0282   // if they're not digits, return the original string
0283   bool empty = true;
0284   // for empty year, use current
0285   // for empty month or date, use 1
0286   QStringList s = date_.split(QLatin1Char('-'));
0287   bool ok = true;
0288   int y = s.count() > 0 ? s[0].toInt(&ok) : QDate::currentDate().year();
0289   if(ok) {
0290     empty = false;
0291   } else {
0292     y = QDate::currentDate().year();
0293   }
0294   int m = s.count() > 1 ? s[1].toInt(&ok) : 1;
0295   if(ok) {
0296     empty = false;
0297   } else {
0298     m = 1;
0299   }
0300   int d = s.count() > 2 ? s[2].toInt(&ok) : 1;
0301   if(ok) {
0302     empty = false;
0303   } else {
0304     d = 1;
0305   }
0306   // rather use ISO date formatting than locale formatting for now. Primarily, it makes sorting just work.
0307   return empty ? date_ : QDate(y, m, d).toString(Qt::ISODate);
0308 }
0309 
0310 QString FieldFormat::capitalize(QString str_) {
0311   if(str_.isEmpty()) {
0312     return str_;
0313   }
0314 
0315   // first letter is always capitalized
0316   str_.replace(0, 1, str_.at(0).toUpper());
0317 
0318   // regexp to split words
0319   static const QRegularExpression rx(QLatin1String("[-\\s,.;]"));
0320 
0321   // special case for french words like l'espace
0322   QRegularExpressionMatch match = rx.match(str_, 1);
0323   int pos = match.capturedStart();
0324   int nextPos;
0325 
0326   QString word = str_.mid(0, pos);
0327   // now check to see if words starts with apostrophe list
0328   foreach(const QString& aposArticle, Config::articleAposList()) {
0329     if(word.startsWith(aposArticle, Qt::CaseInsensitive)) {
0330       const uint l = aposArticle.length();
0331       str_.replace(l, 1, str_.at(l).toUpper());
0332       break;
0333     }
0334   }
0335 
0336   while(pos > -1) {
0337     // also need to compare against list of non-capitalized words
0338     match = rx.match(str_, pos+1);
0339     nextPos = match.capturedStart();
0340     if(nextPos == -1) {
0341       nextPos = str_.length();
0342     }
0343     word = str_.mid(pos+1, nextPos-pos-1);
0344     bool aposMatch = false;
0345     // now check to see if words starts with apostrophe list
0346     foreach(const QString& aposArticle, Config::articleAposList()) {
0347       if(word.startsWith(aposArticle, Qt::CaseInsensitive)) {
0348         const uint l = aposArticle.length();
0349         // if the word is not the end of the string, capitalize the letter after it
0350         if(int(pos+l+1) < str_.length()) {
0351           str_.replace(pos+l+1, 1, str_.at(pos+l+1).toUpper());
0352         }
0353         aposMatch = true;
0354         break;
0355       }
0356     }
0357 
0358     if(!aposMatch) {
0359       // check against the noCapitalization list AND the surnamePrefix list
0360       // does this hold true everywhere other than english?
0361       if(!Config::noCapitalizationList().contains(word, Qt::CaseInsensitive) &&
0362          !Config::surnamePrefixTokens().contains(word, Qt::CaseInsensitive) &&
0363          nextPos-pos > 1) {
0364         str_.replace(pos+1, 1, str_.at(pos+1).toUpper());
0365       }
0366     }
0367 
0368     match = rx.match(str_, pos+1);
0369     pos = match.capturedStart();
0370   }
0371   return str_;
0372 }