File indexing completed on 2024-12-29 04:51:06
0001 /* 0002 SPDX-FileCopyrightText: 2022 Volker Krause <vkrause@kde.org> 0003 SPDX-License-Identifier: LGPL-2.0-or-later 0004 */ 0005 0006 #include "addressparser_p.h" 0007 0008 #include <KContacts/AddressFormat> 0009 0010 #include <QDebug> 0011 #include <QRegularExpression> 0012 0013 using namespace KItinerary; 0014 0015 AddressParser::AddressParser() = default; 0016 AddressParser::~AddressParser() = default; 0017 0018 void AddressParser::setFallbackCountry(const QString &countryCode) 0019 { 0020 m_fallbackCountry = countryCode; 0021 } 0022 0023 void AddressParser::parse(PostalAddress addr) 0024 { 0025 m_address = addr; 0026 if ((m_address.postalCode().isEmpty() && !m_address.addressLocality().isEmpty()) || 0027 (!m_address.postalCode().isEmpty() && m_address.addressLocality().contains(m_address.postalCode()))) 0028 { 0029 splitPostalCode(); 0030 } 0031 0032 // crude fallback if the above doesn't work (yet), e.g. in the UK 0033 if (!m_address.postalCode().isEmpty() && m_address.addressLocality().contains(m_address.postalCode())) { 0034 m_address.setAddressLocality(m_address.addressLocality().remove(m_address.postalCode()).trimmed()); 0035 } 0036 } 0037 0038 PostalAddress AddressParser::result() const 0039 { 0040 return m_address; 0041 } 0042 0043 KContacts::AddressFormat AddressParser::addressFormat() const 0044 { 0045 // TODO detect script 0046 return KContacts::AddressFormatRepository::formatForCountry(m_address.addressCountry().isEmpty() ? m_fallbackCountry : m_address.addressCountry(), KContacts::AddressFormatScriptPreference::Local); 0047 } 0048 0049 static QString captureName(KContacts::AddressFormatField field) 0050 { 0051 switch (field) { 0052 case KContacts::AddressFormatField::PostalCode: 0053 return QStringLiteral("postalCode"); 0054 case KContacts::AddressFormatField::Locality: 0055 return QStringLiteral("locality"); 0056 default: 0057 return {}; 0058 } 0059 } 0060 0061 static QString captureExpression(KContacts::AddressFormatField field) 0062 { 0063 return QLatin1StringView("?<") + captureName(field) + QLatin1Char('>'); 0064 } 0065 0066 void AddressParser::splitPostalCode() 0067 { 0068 const auto format = addressFormat(); 0069 if (format.elements().empty() || format.postalCodeRegularExpression().isEmpty()) { 0070 return; 0071 } 0072 0073 // find the format line containing the postal code and locality 0074 using namespace KContacts; 0075 auto startIt = format.elements().begin(); 0076 auto endIt = startIt; 0077 enum { 0078 None = 0, 0079 HasLocality = 1, 0080 HasPostalCode = 2, 0081 HasBoth = 3, 0082 }; 0083 int inRelevantLine = None; 0084 for (auto it = format.elements().begin(); it != format.elements().end(); ++it) { 0085 if ((*it).isSeparator() && inRelevantLine != HasBoth) { 0086 startIt = endIt = it; 0087 inRelevantLine = None; 0088 } 0089 if ((*it).isSeparator() && inRelevantLine == HasBoth) { 0090 endIt = it; 0091 inRelevantLine = None; 0092 break; 0093 } 0094 if ((*it).isField() && (*it).field() == AddressFormatField::Locality) { 0095 inRelevantLine |= HasLocality; 0096 } 0097 if ((*it).isField() && (*it).field() == AddressFormatField::PostalCode) { 0098 inRelevantLine |= HasPostalCode; 0099 } 0100 } 0101 if (inRelevantLine == HasBoth) { 0102 endIt = format.elements().end(); 0103 } 0104 std::vector<AddressFormatElement> line(startIt, endIt); 0105 // TODO also handle the case the region is part of the same line 0106 if (line.empty() || std::count_if(line.begin(), line.end(), std::mem_fn(&AddressFormatElement::isField)) > 2) { 0107 return; 0108 } 0109 0110 // build regex for that format line 0111 QString regex; 0112 regex.push_back(QLatin1Char('^')); 0113 for (auto it = line.begin(); it != line.end(); ++it) { 0114 if ((*it).isField()) { 0115 regex += QLatin1Char('(') + captureExpression((*it).field()) + 0116 ((*it).field() == AddressFormatField::PostalCode 0117 ? format.postalCodeRegularExpression() 0118 : QLatin1StringView("\\S.*")) + 0119 QLatin1Char(')'); 0120 } 0121 if ((*it).isLiteral()) { 0122 regex += (*it).literal(); 0123 } 0124 } 0125 0126 QRegularExpression re(regex); 0127 if (!re.isValid()) { 0128 qWarning() << "generated invalid address parsing pattern:" << regex; 0129 return; 0130 } 0131 0132 // match against the input 0133 const auto match = re.match(m_address.addressLocality()); 0134 if (!match.hasMatch()) { 0135 return; 0136 } 0137 0138 const auto postalCode = match.captured(captureName(AddressFormatField::PostalCode)); 0139 const auto locality = match.captured(captureName(AddressFormatField::Locality)); 0140 if (!locality.isEmpty() && !postalCode.isEmpty()) { 0141 m_address.setPostalCode(postalCode); 0142 m_address.setAddressLocality(locality); 0143 } 0144 }