File indexing completed on 2024-12-29 04:51:06

0001 /*
0002     SPDX-FileCopyrightText: 2022 Volker Krause <vkrause@kde.org>
0003     SPDX-License-Identifier: LGPL-2.0-or-later
0004 */
0005 
0006 #include "addressparser_p.h"
0007 
0008 #include <KContacts/AddressFormat>
0009 
0010 #include <QDebug>
0011 #include <QRegularExpression>
0012 
0013 using namespace KItinerary;
0014 
0015 AddressParser::AddressParser() = default;
0016 AddressParser::~AddressParser() = default;
0017 
0018 void AddressParser::setFallbackCountry(const QString &countryCode)
0019 {
0020     m_fallbackCountry = countryCode;
0021 }
0022 
0023 void AddressParser::parse(PostalAddress addr)
0024 {
0025     m_address = addr;
0026     if ((m_address.postalCode().isEmpty() && !m_address.addressLocality().isEmpty()) ||
0027         (!m_address.postalCode().isEmpty() && m_address.addressLocality().contains(m_address.postalCode())))
0028     {
0029         splitPostalCode();
0030     }
0031 
0032     // crude fallback if the above doesn't work (yet), e.g. in the UK
0033     if (!m_address.postalCode().isEmpty() && m_address.addressLocality().contains(m_address.postalCode())) {
0034         m_address.setAddressLocality(m_address.addressLocality().remove(m_address.postalCode()).trimmed());
0035     }
0036 }
0037 
0038 PostalAddress AddressParser::result() const
0039 {
0040     return m_address;
0041 }
0042 
0043 KContacts::AddressFormat AddressParser::addressFormat() const
0044 {
0045     // TODO detect script
0046     return KContacts::AddressFormatRepository::formatForCountry(m_address.addressCountry().isEmpty() ? m_fallbackCountry : m_address.addressCountry(), KContacts::AddressFormatScriptPreference::Local);
0047 }
0048 
0049 static QString captureName(KContacts::AddressFormatField field)
0050 {
0051     switch (field) {
0052         case KContacts::AddressFormatField::PostalCode:
0053             return QStringLiteral("postalCode");
0054         case KContacts::AddressFormatField::Locality:
0055             return QStringLiteral("locality");
0056         default:
0057             return {};
0058     }
0059 }
0060 
0061 static QString captureExpression(KContacts::AddressFormatField field)
0062 {
0063   return QLatin1StringView("?<") + captureName(field) + QLatin1Char('>');
0064 }
0065 
0066 void AddressParser::splitPostalCode()
0067 {
0068     const auto format = addressFormat();
0069     if (format.elements().empty() || format.postalCodeRegularExpression().isEmpty()) {
0070         return;
0071     }
0072 
0073     // find the format line containing the postal code and locality
0074     using namespace KContacts;
0075     auto startIt = format.elements().begin();
0076     auto endIt = startIt;
0077     enum {
0078         None = 0,
0079         HasLocality = 1,
0080         HasPostalCode = 2,
0081         HasBoth = 3,
0082     };
0083     int inRelevantLine = None;
0084     for (auto it = format.elements().begin(); it != format.elements().end(); ++it) {
0085         if ((*it).isSeparator() && inRelevantLine != HasBoth) {
0086             startIt = endIt = it;
0087             inRelevantLine = None;
0088         }
0089         if ((*it).isSeparator() && inRelevantLine == HasBoth) {
0090             endIt = it;
0091             inRelevantLine = None;
0092             break;
0093         }
0094         if ((*it).isField() && (*it).field() == AddressFormatField::Locality) {
0095             inRelevantLine |= HasLocality;
0096         }
0097         if ((*it).isField() && (*it).field() == AddressFormatField::PostalCode) {
0098             inRelevantLine |= HasPostalCode;
0099         }
0100     }
0101     if (inRelevantLine == HasBoth) {
0102         endIt = format.elements().end();
0103     }
0104     std::vector<AddressFormatElement> line(startIt, endIt);
0105     // TODO also handle the case the region is part of the same line
0106     if (line.empty() || std::count_if(line.begin(), line.end(), std::mem_fn(&AddressFormatElement::isField)) > 2) {
0107         return;
0108     }
0109 
0110     // build regex for that format line
0111     QString regex;
0112     regex.push_back(QLatin1Char('^'));
0113     for (auto it = line.begin(); it != line.end(); ++it) {
0114         if ((*it).isField()) {
0115           regex += QLatin1Char('(') + captureExpression((*it).field()) +
0116                    ((*it).field() == AddressFormatField::PostalCode
0117                         ? format.postalCodeRegularExpression()
0118                         : QLatin1StringView("\\S.*")) +
0119                    QLatin1Char(')');
0120         }
0121         if ((*it).isLiteral()) {
0122             regex += (*it).literal();
0123         }
0124     }
0125 
0126     QRegularExpression re(regex);
0127     if (!re.isValid()) {
0128         qWarning() << "generated invalid address parsing pattern:" << regex;
0129         return;
0130     }
0131 
0132     // match against the input
0133     const auto match = re.match(m_address.addressLocality());
0134     if (!match.hasMatch()) {
0135         return;
0136     }
0137 
0138     const auto postalCode = match.captured(captureName(AddressFormatField::PostalCode));
0139     const auto locality = match.captured(captureName(AddressFormatField::Locality));
0140     if (!locality.isEmpty() && !postalCode.isEmpty()) {
0141         m_address.setPostalCode(postalCode);
0142         m_address.setAddressLocality(locality);
0143     }
0144 }