File indexing completed on 2024-04-28 05:08:22
0001 /*************************************************************************** 0002 Copyright (C) 2009-2020 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "fieldformat.h" 0026 #include "config/tellico_config.h" 0027 0028 using Tellico::FieldFormat; 0029 0030 QString FieldFormat::delimiterString() { 0031 static QString ds(QStringLiteral("; ")); 0032 return ds; 0033 } 0034 0035 QRegularExpression FieldFormat::delimiterRegularExpression() { 0036 static const QRegularExpression drx(QStringLiteral("\\s*;\\s*")); 0037 return drx; 0038 } 0039 0040 QRegularExpression FieldFormat::commaSplitRegularExpression() { 0041 static const QRegularExpression commaSplitRx(QStringLiteral("\\s*,\\s*")); 0042 return commaSplitRx; 0043 } 0044 0045 QString FieldFormat::fixupValue(const QString& value_) { 0046 QString value = value_; 0047 value.replace(delimiterRegularExpression(), delimiterString()); 0048 return value; 0049 } 0050 0051 QString FieldFormat::columnDelimiterString() { 0052 static QString cds(QStringLiteral("::")); 0053 return cds; 0054 } 0055 0056 QString FieldFormat::rowDelimiterString() { 0057 return QChar(0x2028); 0058 } 0059 0060 QString FieldFormat::matchValueRegularExpression(const QString& value_) { 0061 // The regular expression accounts for values serialized either with multiple values, 0062 // values in table columns, or values in table rows 0063 // Beginning characters don't have to include the column delimiter since the filter 0064 // only matches values in the first column 0065 static const QString beginChars = FieldFormat::delimiterString() 0066 + QLatin1String("|") 0067 + FieldFormat::rowDelimiterString(); 0068 static const QString endChars = QLatin1String("[") 0069 + FieldFormat::delimiterString().at(0) 0070 + FieldFormat::columnDelimiterString().at(0) 0071 + FieldFormat::rowDelimiterString().at(0) 0072 + QLatin1String("]"); 0073 return QLatin1String("(^|") + beginChars + QLatin1String(")") + 0074 QRegularExpression::escape(value_) + 0075 QLatin1String("($|") + endChars + QLatin1String(")"); 0076 } 0077 0078 QStringList FieldFormat::splitValue(const QString& string_, SplitParsing parsing_) { 0079 if(string_.isEmpty()) { 0080 return QStringList(); 0081 } 0082 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0083 const auto keepFlag = QString::KeepEmptyParts; 0084 #else 0085 const auto keepFlag = Qt::KeepEmptyParts; 0086 #endif 0087 switch(parsing_) { 0088 case StringSplit: 0089 return string_.split(delimiterString(), keepFlag); 0090 case RegExpSplit: 0091 return string_.split(delimiterRegularExpression(), keepFlag); 0092 case CommaRegExpSplit: 0093 return string_.split(commaSplitRegularExpression(), keepFlag); 0094 } 0095 // not needed, but stops warning messages 0096 return QStringList(); 0097 } 0098 0099 QStringList FieldFormat::splitRow(const QString& string_) { 0100 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0101 return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), QString::KeepEmptyParts); 0102 #else 0103 return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), Qt::KeepEmptyParts); 0104 #endif 0105 } 0106 0107 QStringList FieldFormat::splitTable(const QString& string_) { 0108 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0109 return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), QString::KeepEmptyParts); 0110 #else 0111 return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), Qt::KeepEmptyParts); 0112 #endif 0113 } 0114 0115 QString FieldFormat::sortKeyTitle(const QString& title_) { 0116 foreach(const QString& article, Config::articleList()) { 0117 // assume white space is already stripped 0118 // the articles are already in lower-case 0119 if(title_.startsWith(article + QLatin1Char(' '))) { 0120 return title_.mid(article.length() + 1); 0121 } 0122 } 0123 // check apostrophes, too 0124 foreach(const QString& article, Config::articleAposList()) { 0125 if(title_.startsWith(article)) { 0126 return title_.mid(article.length()); 0127 } 0128 } 0129 return title_; 0130 } 0131 0132 void FieldFormat::stripArticles(QString& value) { 0133 static QStringList oldArticleList; 0134 static QList<QRegularExpression> rxList; 0135 if(oldArticleList != Config::articleList()) { 0136 oldArticleList = Config::articleList(); 0137 rxList.clear(); 0138 foreach(const QString& article, oldArticleList) { 0139 rxList << QRegularExpression(QLatin1String("\\b") + 0140 QRegularExpression::escape(article) + 0141 QLatin1String("\\b")); 0142 } 0143 } 0144 foreach(const QRegularExpression& rx, rxList) { 0145 value.remove(rx); 0146 } 0147 value = value.trimmed(); 0148 if(value.endsWith(QLatin1Char(','))) { 0149 value.chop(1); 0150 } 0151 } 0152 0153 QString FieldFormat::format(const QString& value_, Type type_, Request request_) { 0154 if(value_.isEmpty()) { 0155 return value_; 0156 } 0157 0158 Options options; 0159 if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoCapitalization())) { 0160 options |= FormatCapitalize; 0161 } 0162 if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoFormat())) { 0163 options |= FormatAuto; 0164 } 0165 0166 QString text; 0167 switch(type_) { 0168 case FormatTitle: 0169 text = title(value_, options); 0170 break; 0171 case FormatName: 0172 text = name(value_, options); 0173 break; 0174 case FormatDate: 0175 text = date(value_); 0176 break; 0177 case FormatPlain: 0178 text = options.testFlag(FormatCapitalize) ? capitalize(value_) : value_; 0179 break; 0180 case FormatNone: 0181 text = value_; 0182 break; 0183 } 0184 return text; 0185 } 0186 0187 QString FieldFormat::title(const QString& title_, Options opt_) { 0188 QString newTitle = title_; 0189 QString tail; 0190 if(opt_.testFlag(FormatAuto)) { 0191 // special case for multi-column tables, assume user never has column delimiter in a value 0192 const int pos = newTitle.indexOf(columnDelimiterString()); 0193 if(pos > -1) { 0194 tail = columnDelimiterString() + newTitle.mid(pos + columnDelimiterString().length()); 0195 newTitle = newTitle.left(pos); 0196 } 0197 0198 // arbitrarily impose rule that a space must follow every comma 0199 // has to come before the capitalization since the space is significant 0200 newTitle.replace(commaSplitRegularExpression(), QStringLiteral(", ")); 0201 } 0202 0203 if(opt_.testFlag(FormatCapitalize)) { 0204 newTitle = capitalize(newTitle); 0205 } 0206 0207 if(opt_.testFlag(FormatAuto)) { 0208 const QString lower = newTitle.toLower(); 0209 // TODO if the title has ",the" at the end, put it at the front 0210 foreach(const QString& article, Config::articleList()) { 0211 // assume white space is already stripped 0212 // the articles are already in lower-case 0213 if(lower.startsWith(article + QLatin1Char(' '))) { 0214 QRegularExpression rx(QLatin1Char('^') + QRegularExpression::escape(article) + QLatin1String("\\s*"), 0215 QRegularExpression::CaseInsensitiveOption); 0216 // can't just use article since it's in lower-case 0217 QString titleArticle = newTitle.left(article.length()); 0218 newTitle = newTitle.remove(rx) 0219 .append(QLatin1String(", ")) 0220 .append(titleArticle); 0221 break; 0222 } 0223 } 0224 } 0225 0226 return newTitle + tail; 0227 } 0228 0229 QString FieldFormat::name(const QString& name_, Options opt_) { 0230 static const QRegularExpression spaceComma(QLatin1String("[\\s,]")); 0231 // the ending look-ahead is so that a space is not added at the end 0232 static const QRegularExpression periodSpace(QLatin1String("\\.\\s*(?=.)")); 0233 0234 QString name = name_; 0235 name.replace(periodSpace, QStringLiteral(". ")); 0236 if(opt_.testFlag(FormatCapitalize)) { 0237 name = capitalize(name); 0238 } 0239 0240 // split the name by white space and commas 0241 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0242 QStringList words = name.split(spaceComma, QString::SkipEmptyParts); 0243 #else 0244 QStringList words = name.split(spaceComma, Qt::SkipEmptyParts); 0245 #endif 0246 // psycho case where name == "," 0247 if(words.isEmpty()) { 0248 return name; 0249 } 0250 0251 // if it contains a comma already and the last word is not a suffix, don't format it 0252 if(!opt_.testFlag(FormatAuto) || 0253 (name.indexOf(QLatin1Char(',')) > -1 && !Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive))) { 0254 // arbitrarily impose rule that no spaces before a comma and 0255 // a single space after every comma 0256 name.replace(commaSplitRegularExpression(), QStringLiteral(", ")); 0257 } else if(words.count() > 1) { 0258 // otherwise split it by white space, move the last word to the front 0259 // but only if there is more than one word 0260 0261 // if the last word is a suffix, it has to be kept with last name 0262 if(Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive)) { 0263 words.prepend(words.last().append(QLatin1Char(','))); 0264 words.removeLast(); 0265 } 0266 0267 // now move the word 0268 // adding comma here when there had been a suffix is because it was originally split with space or comma 0269 words.prepend(words.last().append(QLatin1Char(','))); 0270 words.removeLast(); 0271 0272 // this is probably just something for me, limited to english 0273 // In a previous version of Tellico, using a prefix such as "van der" (with a space) would work 0274 // because QStringList::contains did substring matching, but now need to add a function for tokenizing 0275 // the list with whitespace as well as comma 0276 while(Config::surnamePrefixTokens().contains(words.last(), Qt::CaseInsensitive)) { 0277 words.prepend(words.last()); 0278 words.removeLast(); 0279 } 0280 0281 name = words.join(QLatin1String(" ")); 0282 } 0283 0284 return name; 0285 } 0286 0287 QString FieldFormat::date(const QString& date_) { 0288 // internally, this is "year-month-day" 0289 // any of the three may be empty 0290 // if they're not digits, return the original string 0291 bool empty = true; 0292 // for empty year, use current 0293 // for empty month or date, use 1 0294 QStringList s = date_.split(QLatin1Char('-')); 0295 bool ok = true; 0296 int y = s.count() > 0 ? s[0].toInt(&ok) : QDate::currentDate().year(); 0297 if(ok) { 0298 empty = false; 0299 } else { 0300 y = QDate::currentDate().year(); 0301 } 0302 int m = s.count() > 1 ? s[1].toInt(&ok) : 1; 0303 if(ok) { 0304 empty = false; 0305 } else { 0306 m = 1; 0307 } 0308 int d = s.count() > 2 ? s[2].toInt(&ok) : 1; 0309 if(ok) { 0310 empty = false; 0311 } else { 0312 d = 1; 0313 } 0314 // rather use ISO date formatting than locale formatting for now. Primarily, it makes sorting just work. 0315 return empty ? date_ : QDate(y, m, d).toString(Qt::ISODate); 0316 } 0317 0318 QString FieldFormat::capitalize(QString str_) { 0319 if(str_.isEmpty()) { 0320 return str_; 0321 } 0322 0323 // first letter is always capitalized 0324 str_.replace(0, 1, str_.at(0).toUpper()); 0325 0326 // regexp to split words 0327 static const QRegularExpression rx(QLatin1String("[-\\s,.;]")); 0328 0329 // special case for french words like l'espace 0330 QRegularExpressionMatch match = rx.match(str_, 1); 0331 int pos = match.capturedStart(); 0332 int nextPos; 0333 0334 QString word = str_.mid(0, pos); 0335 // now check to see if words starts with apostrophe list 0336 foreach(const QString& aposArticle, Config::articleAposList()) { 0337 if(word.startsWith(aposArticle, Qt::CaseInsensitive)) { 0338 const uint l = aposArticle.length(); 0339 str_.replace(l, 1, str_.at(l).toUpper()); 0340 break; 0341 } 0342 } 0343 0344 while(pos > -1) { 0345 // also need to compare against list of non-capitalized words 0346 match = rx.match(str_, pos+1); 0347 nextPos = match.capturedStart(); 0348 if(nextPos == -1) { 0349 nextPos = str_.length(); 0350 } 0351 word = str_.mid(pos+1, nextPos-pos-1); 0352 bool aposMatch = false; 0353 // now check to see if words starts with apostrophe list 0354 foreach(const QString& aposArticle, Config::articleAposList()) { 0355 if(word.startsWith(aposArticle, Qt::CaseInsensitive)) { 0356 const uint l = aposArticle.length(); 0357 // if the word is not the end of the string, capitalize the letter after it 0358 if(int(pos+l+1) < str_.length()) { 0359 str_.replace(pos+l+1, 1, str_.at(pos+l+1).toUpper()); 0360 } 0361 aposMatch = true; 0362 break; 0363 } 0364 } 0365 0366 if(!aposMatch) { 0367 // check against the noCapitalization list AND the surnamePrefix list 0368 // does this hold true everywhere other than english? 0369 if(!Config::noCapitalizationList().contains(word, Qt::CaseInsensitive) && 0370 !Config::surnamePrefixTokens().contains(word, Qt::CaseInsensitive) && 0371 nextPos-pos > 1) { 0372 str_.replace(pos+1, 1, str_.at(pos+1).toUpper()); 0373 } 0374 } 0375 0376 match = rx.match(str_, pos+1); 0377 pos = match.capturedStart(); 0378 } 0379 return str_; 0380 }