File indexing completed on 2024-04-28 16:32:00
0001 /*************************************************************************** 0002 Copyright (C) 2009-2020 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "fieldformat.h" 0026 #include "config/tellico_config.h" 0027 0028 using Tellico::FieldFormat; 0029 0030 QString FieldFormat::delimiterString() { 0031 static QString ds(QStringLiteral("; ")); 0032 return ds; 0033 } 0034 0035 QRegularExpression FieldFormat::delimiterRegularExpression() { 0036 static const QRegularExpression drx(QStringLiteral("\\s*;\\s*")); 0037 return drx; 0038 } 0039 0040 QRegularExpression FieldFormat::commaSplitRegularExpression() { 0041 static const QRegularExpression commaSplitRx(QStringLiteral("\\s*,\\s*")); 0042 return commaSplitRx; 0043 } 0044 0045 QString FieldFormat::fixupValue(const QString& value_) { 0046 QString value = value_; 0047 value.replace(delimiterRegularExpression(), delimiterString()); 0048 return value; 0049 } 0050 0051 QString FieldFormat::columnDelimiterString() { 0052 static QString cds(QStringLiteral("::")); 0053 return cds; 0054 } 0055 0056 QString FieldFormat::rowDelimiterString() { 0057 return QChar(0x2028); 0058 } 0059 0060 QString FieldFormat::matchValueRegularExpression(const QString& value_) { 0061 // The regular expression accounts for values serialized either with multiple values, 0062 // values in table columns, or values in table rows 0063 // Beginning characters don't have to include the column delimiter since the filter 0064 // only matches values in the first column 0065 static const QString beginChars = FieldFormat::delimiterString() 0066 + QLatin1String("|") 0067 + FieldFormat::rowDelimiterString(); 0068 static const QString endChars = QLatin1String("[") 0069 + FieldFormat::delimiterString().at(0) 0070 + FieldFormat::columnDelimiterString().at(0) 0071 + FieldFormat::rowDelimiterString().at(0) 0072 + QLatin1String("]"); 0073 return QLatin1String("(^|") + beginChars + QLatin1String(")") + 0074 QRegularExpression::escape(value_) + 0075 QLatin1String("($|") + endChars + QLatin1String(")"); 0076 } 0077 0078 QStringList FieldFormat::splitValue(const QString& string_, SplitParsing parsing_) { 0079 if(string_.isEmpty()) { 0080 return QStringList(); 0081 } 0082 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0083 return parsing_ == StringSplit ? string_.split(delimiterString(), QString::KeepEmptyParts) 0084 : string_.split(delimiterRegularExpression(), QString::KeepEmptyParts); 0085 #else 0086 return parsing_ == StringSplit ? string_.split(delimiterString(), Qt::KeepEmptyParts) 0087 : string_.split(delimiterRegularExpression(), Qt::KeepEmptyParts); 0088 #endif 0089 } 0090 0091 QStringList FieldFormat::splitRow(const QString& string_) { 0092 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0093 return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), QString::KeepEmptyParts); 0094 #else 0095 return string_.isEmpty() ? QStringList() : string_.split(columnDelimiterString(), Qt::KeepEmptyParts); 0096 #endif 0097 } 0098 0099 QStringList FieldFormat::splitTable(const QString& string_) { 0100 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0101 return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), QString::KeepEmptyParts); 0102 #else 0103 return string_.isEmpty() ? QStringList() : string_.split(rowDelimiterString(), Qt::KeepEmptyParts); 0104 #endif 0105 } 0106 0107 QString FieldFormat::sortKeyTitle(const QString& title_) { 0108 foreach(const QString& article, Config::articleList()) { 0109 // assume white space is already stripped 0110 // the articles are already in lower-case 0111 if(title_.startsWith(article + QLatin1Char(' '))) { 0112 return title_.mid(article.length() + 1); 0113 } 0114 } 0115 // check apostrophes, too 0116 foreach(const QString& article, Config::articleAposList()) { 0117 if(title_.startsWith(article)) { 0118 return title_.mid(article.length()); 0119 } 0120 } 0121 return title_; 0122 } 0123 0124 void FieldFormat::stripArticles(QString& value) { 0125 static QStringList oldArticleList; 0126 static QList<QRegularExpression> rxList; 0127 if(oldArticleList != Config::articleList()) { 0128 oldArticleList = Config::articleList(); 0129 rxList.clear(); 0130 foreach(const QString& article, oldArticleList) { 0131 rxList << QRegularExpression(QLatin1String("\\b") + 0132 QRegularExpression::escape(article) + 0133 QLatin1String("\\b")); 0134 } 0135 } 0136 foreach(const QRegularExpression& rx, rxList) { 0137 value.remove(rx); 0138 } 0139 value = value.trimmed(); 0140 if(value.endsWith(QLatin1Char(','))) { 0141 value.chop(1); 0142 } 0143 } 0144 0145 QString FieldFormat::format(const QString& value_, Type type_, Request request_) { 0146 if(value_.isEmpty()) { 0147 return value_; 0148 } 0149 0150 Options options; 0151 if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoCapitalization())) { 0152 options |= FormatCapitalize; 0153 } 0154 if(request_ == ForceFormat || (request_ != AsIsFormat && Config::autoFormat())) { 0155 options |= FormatAuto; 0156 } 0157 0158 QString text; 0159 switch(type_) { 0160 case FormatTitle: 0161 text = title(value_, options); 0162 break; 0163 case FormatName: 0164 text = name(value_, options); 0165 break; 0166 case FormatDate: 0167 text = date(value_); 0168 break; 0169 case FormatPlain: 0170 text = options.testFlag(FormatCapitalize) ? capitalize(value_) : value_; 0171 break; 0172 case FormatNone: 0173 text = value_; 0174 break; 0175 } 0176 return text; 0177 } 0178 0179 QString FieldFormat::title(const QString& title_, Options opt_) { 0180 QString newTitle = title_; 0181 QString tail; 0182 if(opt_.testFlag(FormatAuto)) { 0183 // special case for multi-column tables, assume user never has column delimiter in a value 0184 const int pos = newTitle.indexOf(columnDelimiterString()); 0185 if(pos > -1) { 0186 tail = columnDelimiterString() + newTitle.mid(pos + columnDelimiterString().length()); 0187 newTitle = newTitle.left(pos); 0188 } 0189 0190 // arbitrarily impose rule that a space must follow every comma 0191 // has to come before the capitalization since the space is significant 0192 newTitle.replace(commaSplitRegularExpression(), QStringLiteral(", ")); 0193 } 0194 0195 if(opt_.testFlag(FormatCapitalize)) { 0196 newTitle = capitalize(newTitle); 0197 } 0198 0199 if(opt_.testFlag(FormatAuto)) { 0200 const QString lower = newTitle.toLower(); 0201 // TODO if the title has ",the" at the end, put it at the front 0202 foreach(const QString& article, Config::articleList()) { 0203 // assume white space is already stripped 0204 // the articles are already in lower-case 0205 if(lower.startsWith(article + QLatin1Char(' '))) { 0206 QRegularExpression rx(QLatin1Char('^') + QRegularExpression::escape(article) + QLatin1String("\\s*"), 0207 QRegularExpression::CaseInsensitiveOption); 0208 // can't just use article since it's in lower-case 0209 QString titleArticle = newTitle.left(article.length()); 0210 newTitle = newTitle.remove(rx) 0211 .append(QLatin1String(", ")) 0212 .append(titleArticle); 0213 break; 0214 } 0215 } 0216 } 0217 0218 return newTitle + tail; 0219 } 0220 0221 QString FieldFormat::name(const QString& name_, Options opt_) { 0222 static const QRegularExpression spaceComma(QLatin1String("[\\s,]")); 0223 // the ending look-ahead is so that a space is not added at the end 0224 static const QRegularExpression periodSpace(QLatin1String("\\.\\s*(?=.)")); 0225 0226 QString name = name_; 0227 name.replace(periodSpace, QStringLiteral(". ")); 0228 if(opt_.testFlag(FormatCapitalize)) { 0229 name = capitalize(name); 0230 } 0231 0232 // split the name by white space and commas 0233 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0234 QStringList words = name.split(spaceComma, QString::SkipEmptyParts); 0235 #else 0236 QStringList words = name.split(spaceComma, Qt::SkipEmptyParts); 0237 #endif 0238 // psycho case where name == "," 0239 if(words.isEmpty()) { 0240 return name; 0241 } 0242 0243 // if it contains a comma already and the last word is not a suffix, don't format it 0244 if(!opt_.testFlag(FormatAuto) || 0245 (name.indexOf(QLatin1Char(',')) > -1 && !Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive))) { 0246 // arbitrarily impose rule that no spaces before a comma and 0247 // a single space after every comma 0248 name.replace(commaSplitRegularExpression(), QStringLiteral(", ")); 0249 } else if(words.count() > 1) { 0250 // otherwise split it by white space, move the last word to the front 0251 // but only if there is more than one word 0252 0253 // if the last word is a suffix, it has to be kept with last name 0254 if(Config::nameSuffixList().contains(words.last(), Qt::CaseInsensitive)) { 0255 words.prepend(words.last().append(QLatin1Char(','))); 0256 words.removeLast(); 0257 } 0258 0259 // now move the word 0260 // adding comma here when there had been a suffix is because it was originally split with space or comma 0261 words.prepend(words.last().append(QLatin1Char(','))); 0262 words.removeLast(); 0263 0264 // this is probably just something for me, limited to english 0265 // In a previous version of Tellico, using a prefix such as "van der" (with a space) would work 0266 // because QStringList::contains did substring matching, but now need to add a function for tokenizing 0267 // the list with whitespace as well as comma 0268 while(Config::surnamePrefixTokens().contains(words.last(), Qt::CaseInsensitive)) { 0269 words.prepend(words.last()); 0270 words.removeLast(); 0271 } 0272 0273 name = words.join(QLatin1String(" ")); 0274 } 0275 0276 return name; 0277 } 0278 0279 QString FieldFormat::date(const QString& date_) { 0280 // internally, this is "year-month-day" 0281 // any of the three may be empty 0282 // if they're not digits, return the original string 0283 bool empty = true; 0284 // for empty year, use current 0285 // for empty month or date, use 1 0286 QStringList s = date_.split(QLatin1Char('-')); 0287 bool ok = true; 0288 int y = s.count() > 0 ? s[0].toInt(&ok) : QDate::currentDate().year(); 0289 if(ok) { 0290 empty = false; 0291 } else { 0292 y = QDate::currentDate().year(); 0293 } 0294 int m = s.count() > 1 ? s[1].toInt(&ok) : 1; 0295 if(ok) { 0296 empty = false; 0297 } else { 0298 m = 1; 0299 } 0300 int d = s.count() > 2 ? s[2].toInt(&ok) : 1; 0301 if(ok) { 0302 empty = false; 0303 } else { 0304 d = 1; 0305 } 0306 // rather use ISO date formatting than locale formatting for now. Primarily, it makes sorting just work. 0307 return empty ? date_ : QDate(y, m, d).toString(Qt::ISODate); 0308 } 0309 0310 QString FieldFormat::capitalize(QString str_) { 0311 if(str_.isEmpty()) { 0312 return str_; 0313 } 0314 0315 // first letter is always capitalized 0316 str_.replace(0, 1, str_.at(0).toUpper()); 0317 0318 // regexp to split words 0319 static const QRegularExpression rx(QLatin1String("[-\\s,.;]")); 0320 0321 // special case for french words like l'espace 0322 QRegularExpressionMatch match = rx.match(str_, 1); 0323 int pos = match.capturedStart(); 0324 int nextPos; 0325 0326 QString word = str_.mid(0, pos); 0327 // now check to see if words starts with apostrophe list 0328 foreach(const QString& aposArticle, Config::articleAposList()) { 0329 if(word.startsWith(aposArticle, Qt::CaseInsensitive)) { 0330 const uint l = aposArticle.length(); 0331 str_.replace(l, 1, str_.at(l).toUpper()); 0332 break; 0333 } 0334 } 0335 0336 while(pos > -1) { 0337 // also need to compare against list of non-capitalized words 0338 match = rx.match(str_, pos+1); 0339 nextPos = match.capturedStart(); 0340 if(nextPos == -1) { 0341 nextPos = str_.length(); 0342 } 0343 word = str_.mid(pos+1, nextPos-pos-1); 0344 bool aposMatch = false; 0345 // now check to see if words starts with apostrophe list 0346 foreach(const QString& aposArticle, Config::articleAposList()) { 0347 if(word.startsWith(aposArticle, Qt::CaseInsensitive)) { 0348 const uint l = aposArticle.length(); 0349 // if the word is not the end of the string, capitalize the letter after it 0350 if(int(pos+l+1) < str_.length()) { 0351 str_.replace(pos+l+1, 1, str_.at(pos+l+1).toUpper()); 0352 } 0353 aposMatch = true; 0354 break; 0355 } 0356 } 0357 0358 if(!aposMatch) { 0359 // check against the noCapitalization list AND the surnamePrefix list 0360 // does this hold true everywhere other than english? 0361 if(!Config::noCapitalizationList().contains(word, Qt::CaseInsensitive) && 0362 !Config::surnamePrefixTokens().contains(word, Qt::CaseInsensitive) && 0363 nextPos-pos > 1) { 0364 str_.replace(pos+1, 1, str_.at(pos+1).toUpper()); 0365 } 0366 } 0367 0368 match = rx.match(str_, pos+1); 0369 pos = match.capturedStart(); 0370 } 0371 return str_; 0372 }