File indexing completed on 2024-04-28 05:08:22

0001 /***************************************************************************
0002     Copyright (C) 2009-2020 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "fieldformat.h"
0026 #include "config/tellico_config.h"
0027 
0028 using Tellico::FieldFormat;
0029 
0030 QString FieldFormat::delimiterString() {
0031   static QString ds(QStringLiteral("; "));
0032   return ds;
0033 }
0034 
0035 QRegularExpression FieldFormat::delimiterRegularExpression() {
0036   static const QRegularExpression drx(QStringLiteral("\\s*;\\s*"));
0037   return drx;
0038 }
0039 
0040 QRegularExpression FieldFormat::commaSplitRegularExpression() {
0041   static const QRegularExpression commaSplitRx(QStringLiteral("\\s*,\\s*"));
0042   return commaSplitRx;
0043 }
0044 
0045 QString FieldFormat::fixupValue(const QString& value_) {
0046   QString value = value_;
0047   value.replace(delimiterRegularExpression(), delimiterString());
0048   return value;
0049 }
0050 
0051 QString FieldFormat::columnDelimiterString() {
0052   static QString cds(QStringLiteral("::"));
0053   return cds;
0054 }
0055 
0056 QString FieldFormat::rowDelimiterString() {
0057   return QChar(0x2028);
0058 }
0059 
0060 QString FieldFormat::matchValueRegularExpression(const QString& value_) {
0061   // The regular expression accounts for values serialized either with multiple values,
0062   // values in table columns, or values in table rows
0063   // Beginning characters don't have to include the column delimiter since the filter
0064   // only matches values in the first column
0065   static const QString beginChars = FieldFormat::delimiterString()
0066                                   + QLatin1String("|")
0067                                   + FieldFormat::rowDelimiterString();
0068   static const QString endChars = QLatin1String("[")
0069                                 + FieldFormat::delimiterString().at(0)
0070                                 + FieldFormat::columnDelimiterString().at(0)
0071                                 + FieldFormat::rowDelimiterString().at(0)
0072                                 + QLatin1String("]");
0073   return QLatin1String("(^|") + beginChars + QLatin1String(")") +
0074          QRegularExpression::escape(value_) +
0075          QLatin1String("($|") + endChars + QLatin1String(")");
0076 }
0077 
0078 QStringList FieldFormat::splitValue(const QString& string_, SplitParsing parsing_) {
0079   if(string_.isEmpty()) {
0080     return QStringList();
0081   }
0082 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0083   const auto keepFlag = QString::KeepEmptyParts;
0084 #else
0085   const auto keepFlag = Qt::KeepEmptyParts;
0086 #endif
0087   switch(parsing_) {
0088     case StringSplit:
0089       return string_.split(delimiterString(), keepFlag);
0090     case RegExpSplit:
0091       return string_.split(delimiterRegularExpression(), keepFlag);
0092     case CommaRegExpSplit:
0093       return string_.split(commaSplitRegularExpression(), keepFlag);
0094   }
0095   // not needed, but stops warning messages
0096   return QStringList();
0097 }
0098 
0099 QStringList FieldFormat::splitRow(const QString& string_) {
0100 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0101   return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), QString::KeepEmptyParts);
0102 #else
0103   return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), Qt::KeepEmptyParts);
0104 #endif
0105 }
0106 
0107 QStringList FieldFormat::splitTable(const QString& string_) {
0108 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0109   return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), QString::KeepEmptyParts);
0110 #else
0111   return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), Qt::KeepEmptyParts);
0112 #endif
0113 }
0114 
0115 QString FieldFormat::sortKeyTitle(const QString& title_) {
0116   foreach(const QString& article, Config::articleList()) {
0117     // assume white space is already stripped
0118     // the articles are already in lower-case
0119     if(title_.startsWith(article + QLatin1Char(' '))) {
0120       return title_.mid(article.length() + 1);
0121     }
0122   }
0123   // check apostrophes, too
0124   foreach(const QString& article, Config::articleAposList()) {
0125     if(title_.startsWith(article)) {
0126       return title_.mid(article.length());
0127     }
0128   }
0129   return title_;
0130 }
0131 
0132 void FieldFormat::stripArticles(QString& value) {
0133   static QStringList oldArticleList;
0134   static QList<QRegularExpression> rxList;
0135   if(oldArticleList != Config::articleList()) {
0136     oldArticleList = Config::articleList();
0137     rxList.clear();
0138     foreach(const QString& article, oldArticleList) {
0139       rxList << QRegularExpression(QLatin1String("\\b") +
0140                                    QRegularExpression::escape(article) +
0141                                    QLatin1String("\\b"));
0142     }
0143   }
0144   foreach(const QRegularExpression& rx, rxList) {
0145     value.remove(rx);
0146   }
0147   value = value.trimmed();
0148   if(value.endsWith(QLatin1Char(','))) {
0149     value.chop(1);
0150   }
0151 }
0152 
0153 QString FieldFormat::format(const QString& value_, Type type_, Request request_) {
0154   if(value_.isEmpty()) {
0155     return value_;
0156   }
0157 
0158   Options options;
0159   if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoCapitalization())) {
0160     options |= FormatCapitalize;
0161   }
0162   if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoFormat())) {
0163     options |= FormatAuto;
0164   }
0165 
0166   QString text;
0167   switch(type_) {
0168     case FormatTitle:
0169       text = title(value_, options);
0170       break;
0171     case FormatName:
0172       text = name(value_, options);
0173       break;
0174     case FormatDate:
0175       text = date(value_);
0176       break;
0177     case FormatPlain:
0178       text = options.testFlag(FormatCapitalize) ? capitalize(value_) : value_;
0179       break;
0180     case FormatNone:
0181       text = value_;
0182       break;
0183   }
0184   return text;
0185 }
0186 
0187 QString FieldFormat::title(const QString& title_, Options opt_) {
0188   QString newTitle = title_;
0189   QString tail;
0190   if(opt_.testFlag(FormatAuto)) {
0191     // special case for multi-column tables, assume user never has column delimiter in a value
0192     const int pos = newTitle.indexOf(columnDelimiterString());
0193     if(pos > -1) {
0194       tail = columnDelimiterString() + newTitle.mid(pos + columnDelimiterString().length());
0195       newTitle = newTitle.left(pos);
0196     }
0197 
0198     // arbitrarily impose rule that a space must follow every comma
0199     // has to come before the capitalization since the space is significant
0200     newTitle.replace(commaSplitRegularExpression(), QStringLiteral(", "));
0201   }
0202 
0203   if(opt_.testFlag(FormatCapitalize)) {
0204     newTitle = capitalize(newTitle);
0205   }
0206 
0207   if(opt_.testFlag(FormatAuto)) {
0208     const QString lower = newTitle.toLower();
0209     // TODO if the title has ",the" at the end, put it at the front
0210     foreach(const QString& article, Config::articleList()) {
0211       // assume white space is already stripped
0212       // the articles are already in lower-case
0213       if(lower.startsWith(article + QLatin1Char(' '))) {
0214         QRegularExpression rx(QLatin1Char('^') + QRegularExpression::escape(article) + QLatin1String("\\s*"),
0215                               QRegularExpression::CaseInsensitiveOption);
0216         // can't just use article since it's in lower-case
0217         QString titleArticle = newTitle.left(article.length());
0218         newTitle = newTitle.remove(rx)
0219                            .append(QLatin1String(", "))
0220                            .append(titleArticle);
0221         break;
0222       }
0223     }
0224   }
0225 
0226   return newTitle + tail;
0227 }
0228 
0229 QString FieldFormat::name(const QString& name_, Options opt_) {
0230   static const QRegularExpression spaceComma(QLatin1String("[\\s,]"));
0231   // the ending look-ahead is so that a space is not added at the end
0232   static const QRegularExpression periodSpace(QLatin1String("\\.\\s*(?=.)"));
0233 
0234   QString name = name_;
0235   name.replace(periodSpace, QStringLiteral(". "));
0236   if(opt_.testFlag(FormatCapitalize)) {
0237     name = capitalize(name);
0238   }
0239 
0240   // split the name by white space and commas
0241 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0242   QStringList words = name.split(spaceComma, QString::SkipEmptyParts);
0243 #else
0244   QStringList words = name.split(spaceComma, Qt::SkipEmptyParts);
0245 #endif
0246   // psycho case where name == ","
0247   if(words.isEmpty()) {
0248     return name;
0249   }
0250 
0251   // if it contains a comma already and the last word is not a suffix, don't format it
0252   if(!opt_.testFlag(FormatAuto) ||
0253       (name.indexOf(QLatin1Char(',')) > -1 && !Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive))) {
0254     // arbitrarily impose rule that no spaces before a comma and
0255     // a single space after every comma
0256     name.replace(commaSplitRegularExpression(), QStringLiteral(", "));
0257   } else if(words.count() > 1) {
0258     // otherwise split it by white space, move the last word to the front
0259     // but only if there is more than one word
0260 
0261     // if the last word is a suffix, it has to be kept with last name
0262     if(Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive)) {
0263       words.prepend(words.last().append(QLatin1Char(',')));
0264       words.removeLast();
0265     }
0266 
0267     // now move the word
0268     // adding comma here when there had been a suffix is because it was originally split with space or comma
0269     words.prepend(words.last().append(QLatin1Char(',')));
0270     words.removeLast();
0271 
0272     // this is probably just something for me, limited to english
0273     // In a previous version of Tellico, using a prefix such as "van der" (with a space) would work
0274     // because QStringList::contains did substring matching, but now need to add a function for tokenizing
0275     // the list with whitespace as well as comma
0276     while(Config::surnamePrefixTokens().contains(words.last(), Qt::CaseInsensitive)) {
0277       words.prepend(words.last());
0278       words.removeLast();
0279     }
0280 
0281     name = words.join(QLatin1String(" "));
0282   }
0283 
0284   return name;
0285 }
0286 
0287 QString FieldFormat::date(const QString& date_) {
0288   // internally, this is "year-month-day"
0289   // any of the three may be empty
0290   // if they're not digits, return the original string
0291   bool empty = true;
0292   // for empty year, use current
0293   // for empty month or date, use 1
0294   QStringList s = date_.split(QLatin1Char('-'));
0295   bool ok = true;
0296   int y = s.count() > 0 ? s[0].toInt(&ok) : QDate::currentDate().year();
0297   if(ok) {
0298     empty = false;
0299   } else {
0300     y = QDate::currentDate().year();
0301   }
0302   int m = s.count() > 1 ? s[1].toInt(&ok) : 1;
0303   if(ok) {
0304     empty = false;
0305   } else {
0306     m = 1;
0307   }
0308   int d = s.count() > 2 ? s[2].toInt(&ok) : 1;
0309   if(ok) {
0310     empty = false;
0311   } else {
0312     d = 1;
0313   }
0314   // rather use ISO date formatting than locale formatting for now. Primarily, it makes sorting just work.
0315   return empty ? date_ : QDate(y, m, d).toString(Qt::ISODate);
0316 }
0317 
0318 QString FieldFormat::capitalize(QString str_) {
0319   if(str_.isEmpty()) {
0320     return str_;
0321   }
0322 
0323   // first letter is always capitalized
0324   str_.replace(0, 1, str_.at(0).toUpper());
0325 
0326   // regexp to split words
0327   static const QRegularExpression rx(QLatin1String("[-\\s,.;]"));
0328 
0329   // special case for french words like l'espace
0330   QRegularExpressionMatch match = rx.match(str_, 1);
0331   int pos = match.capturedStart();
0332   int nextPos;
0333 
0334   QString word = str_.mid(0, pos);
0335   // now check to see if words starts with apostrophe list
0336   foreach(const QString& aposArticle, Config::articleAposList()) {
0337     if(word.startsWith(aposArticle, Qt::CaseInsensitive)) {
0338       const uint l = aposArticle.length();
0339       str_.replace(l, 1, str_.at(l).toUpper());
0340       break;
0341     }
0342   }
0343 
0344   while(pos > -1) {
0345     // also need to compare against list of non-capitalized words
0346     match = rx.match(str_, pos+1);
0347     nextPos = match.capturedStart();
0348     if(nextPos == -1) {
0349       nextPos = str_.length();
0350     }
0351     word = str_.mid(pos+1, nextPos-pos-1);
0352     bool aposMatch = false;
0353     // now check to see if words starts with apostrophe list
0354     foreach(const QString& aposArticle, Config::articleAposList()) {
0355       if(word.startsWith(aposArticle, Qt::CaseInsensitive)) {
0356         const uint l = aposArticle.length();
0357         // if the word is not the end of the string, capitalize the letter after it
0358         if(int(pos+l+1) < str_.length()) {
0359           str_.replace(pos+l+1, 1, str_.at(pos+l+1).toUpper());
0360         }
0361         aposMatch = true;
0362         break;
0363       }
0364     }
0365 
0366     if(!aposMatch) {
0367       // check against the noCapitalization list AND the surnamePrefix list
0368       // does this hold true everywhere other than english?
0369       if(!Config::noCapitalizationList().contains(word, Qt::CaseInsensitive) &&
0370          !Config::surnamePrefixTokens().contains(word, Qt::CaseInsensitive) &&
0371          nextPos-pos > 1) {
0372         str_.replace(pos+1, 1, str_.at(pos+1).toUpper());
0373       }
0374     }
0375 
0376     match = rx.match(str_, pos+1);
0377     pos = match.capturedStart();
0378   }
0379   return str_;
0380 }