File indexing completed on 2024-04-21 03:41:36
0001 /* 0002 SPDX-FileCopyrightText: 2005 Inge Wallin <inge@lysator.liu.se> 0003 SPDX-License-Identifier: GPL-2.0-or-later 0004 */ 0005 0006 #include "moleculeparser.h" 0007 0008 #include <cctype> 0009 0010 #include "kalzium_libscience_debug.h" 0011 #include <QFile> 0012 #include <QStandardPaths> 0013 0014 // ================================================================ 0015 // class ElementCountMap 0016 0017 ElementCountMap::ElementCountMap() 0018 { 0019 m_map.clear(); 0020 } 0021 0022 ElementCountMap::~ElementCountMap() = default; 0023 0024 ElementCount *ElementCountMap::search(Element *_element) 0025 { 0026 for (ElementCount *c : std::as_const(m_map)) { 0027 if (c->element() == _element) { 0028 return c; 0029 } 0030 } 0031 0032 return nullptr; 0033 } 0034 0035 void ElementCountMap::add(ElementCountMap &_map) 0036 { 0037 for (ElementCount *c : std::as_const(_map.m_map)) { 0038 add(c->m_element, c->m_count); 0039 } 0040 } 0041 0042 QList<Element *> ElementCountMap::elements() const 0043 { 0044 QList<Element *> list; 0045 0046 for (ElementCount *c : std::as_const(m_map)) { 0047 Element *e = c->m_element; 0048 if (!list.contains(e)) { 0049 list << e; 0050 } 0051 } 0052 0053 return list; 0054 } 0055 0056 void ElementCountMap::add(Element *_element, int _count) 0057 { 0058 ElementCount *elemCount; 0059 0060 elemCount = search(_element); 0061 if (elemCount) { 0062 elemCount->m_count += _count; 0063 } else { 0064 m_map.append(new ElementCount(_element, _count)); 0065 } 0066 } 0067 0068 void ElementCountMap::multiply(int _factor) 0069 { 0070 for (ElementCount *count : std::as_const(m_map)) { 0071 count->multiply(_factor); 0072 } 0073 } 0074 0075 // ================================================================ 0076 // class MoleculeParser 0077 0078 MoleculeParser::MoleculeParser(const QList<Element *> &list) 0079 : Parser() 0080 { 0081 m_elementList = list; 0082 m_aliasList = new QSet<QString>; 0083 } 0084 0085 MoleculeParser::MoleculeParser(const QString &_str) 0086 : Parser(_str) 0087 { 0088 m_aliasList = new QSet<QString>; 0089 } 0090 0091 MoleculeParser::~MoleculeParser() 0092 { 0093 delete m_aliasList; 0094 } 0095 0096 // ---------------------------------------------------------------- 0097 // public methods 0098 0099 // Try to parse the molecule and get the weight of it. 0100 // 0101 // This method also acts as the main loop. 0102 0103 bool MoleculeParser::weight(const QString &_shortMoleculeString, double *_resultMass, ElementCountMap *_resultMap) 0104 { 0105 if (_shortMoleculeString.isEmpty()) { 0106 return false; 0107 } 0108 // Clear the list of aliases and start filling it again. 0109 0110 m_aliasList->clear(); 0111 QString _moleculeString; 0112 // Clear the result variables and set m_error to false 0113 _resultMap->clear(); 0114 m_error = false; 0115 *_resultMass = 0.0; 0116 0117 // Expand the molecule string 0118 // Example : MeOH -> (CH3)OH 0119 qCDebug(KALZIUM_LIBSCIENCE_LOG) << _shortMoleculeString << "is going to be expanded"; 0120 _moleculeString = expandFormula(_shortMoleculeString); 0121 qCDebug(KALZIUM_LIBSCIENCE_LOG) << _moleculeString << "is the expanded string"; 0122 0123 // Now set the expanded string 0124 // Initialize the parsing process, and parse te molecule. 0125 start(_moleculeString); 0126 parseSubmolecule(_resultMass, _resultMap); 0127 0128 if (nextToken() != -1) { 0129 return false; 0130 } 0131 0132 if (m_error) { // there was an error in the input... 0133 return false; 0134 } 0135 0136 return true; 0137 } 0138 0139 QSet<QString> MoleculeParser::aliasList() 0140 { 0141 return *m_aliasList; 0142 } 0143 // ---------------------------------------------------------------- 0144 // helper methods for the public methods 0145 0146 // Parse a submolecule. This is a list of terms. 0147 // 0148 0149 bool MoleculeParser::parseSubmolecule(double *_resultMass, ElementCountMap *_resultMap) 0150 { 0151 double subMass = 0.0; 0152 ElementCountMap subMap; 0153 0154 *_resultMass = 0.0; 0155 _resultMap->clear(); 0156 while (parseTerm(&subMass, &subMap)) { 0157 // qCDebug(KALZIUM_LIBSCIENCE_LOG) << "Parsed a term, weight = " << subresult; 0158 0159 // Add the mass and composition of the submolecule to the total. 0160 *_resultMass += subMass; 0161 _resultMap->add(subMap); 0162 } 0163 0164 return true; 0165 } 0166 0167 // Parse a term within the molecule, i.e. a single atom or a 0168 // submolecule within parenthesis followed by an optional number. 0169 // Examples: Bk, Mn2, (COOH)2 0170 // 0171 // Return true if correct, otherwise return false. 0172 0173 // If correct, the mass of the term is returned in *_resultMass, and 0174 // the flattened composition of the molecule in *_resultMap. 0175 // 0176 0177 bool MoleculeParser::parseTerm(double *_resultMass, ElementCountMap *_resultMap) 0178 { 0179 *_resultMass = 0.0; 0180 _resultMap->clear(); 0181 0182 if (nextToken() == ELEMENT_TOKEN) { 0183 // qCDebug(KALZIUM_LIBSCIENCE_LOG) << "Parsed an element: " << m_elementVal->symbol(); 0184 *_resultMass = m_elementVal->dataAsVariant(ChemicalDataObject::mass).toDouble(); 0185 _resultMap->add(m_elementVal, 1); 0186 0187 getNextToken(); 0188 } else if (nextToken() == '(') { 0189 // A submolecule. 0190 0191 getNextToken(); 0192 parseSubmolecule(_resultMass, _resultMap); 0193 0194 // Must end in a ")". 0195 if (nextToken() == ')') { 0196 // qCDebug(KALZIUM_LIBSCIENCE_LOG) << "Parsed a submolecule. weight = " << *_result; 0197 getNextToken(); 0198 } else { 0199 return false; 0200 } 0201 } else { 0202 // Neither an element nor a list within (). 0203 return false; 0204 } 0205 0206 // Optional number. 0207 if (nextToken() == INT_TOKEN) { 0208 // qCDebug(KALZIUM_LIBSCIENCE_LOG) << "Parsed a number: " << intVal(); 0209 0210 *_resultMass *= intVal(); 0211 _resultMap->multiply(intVal()); 0212 0213 getNextToken(); 0214 } 0215 0216 qCDebug(KALZIUM_LIBSCIENCE_LOG) << "Weight of term = " << *_resultMass; 0217 return true; 0218 } 0219 0220 // ---------------------------------------------------------------- 0221 // protected methods 0222 0223 // Extend Parser::getNextToken with elements. 0224 0225 int MoleculeParser::getNextToken() 0226 { 0227 QString name; 0228 0229 #if 0 0230 qCDebug(KALZIUM_LIBSCIENCE_LOG) << "getNextToken(): Next character = " 0231 << nextChar() << endl; 0232 #endif 0233 0234 // Check if the token is an element name. 0235 if ('A' <= nextChar() && nextChar() <= 'Z') { 0236 name = char(nextChar()); 0237 getNextChar(); 0238 0239 if ('a' <= nextChar() && nextChar() <= 'z') { 0240 name.append(char(nextChar())); 0241 getNextChar(); 0242 } 0243 0244 // Look up the element from the name.. 0245 m_elementVal = lookupElement(name); 0246 if (m_elementVal) { 0247 m_nextToken = ELEMENT_TOKEN; 0248 } else { 0249 m_nextToken = -1; 0250 } 0251 } else { 0252 return Parser::getNextToken(); 0253 } 0254 0255 return m_nextToken; 0256 } 0257 0258 // ---------------------------------------------------------------- 0259 // private methods 0260 0261 Element *MoleculeParser::lookupElement(const QString &_name) 0262 { 0263 qCDebug(KALZIUM_LIBSCIENCE_LOG) << "looking up " << _name; 0264 0265 for (Element *e : std::as_const(m_elementList)) { 0266 if (e->dataAsVariant(ChemicalDataObject::symbol) == _name) { 0267 qCDebug(KALZIUM_LIBSCIENCE_LOG) << "Found element " << _name; 0268 return e; 0269 } 0270 } 0271 0272 // if there is an error make m_error true. 0273 m_error = true; 0274 0275 qCDebug(KALZIUM_LIBSCIENCE_LOG) << "no such element!: " << _name; 0276 0277 return nullptr; 0278 } 0279 0280 QString MoleculeParser::expandFormula(const QString &_shortString) 0281 { 0282 QString _fullString; // the expanded string that will be returned 0283 QString::const_iterator i; // iterator 0284 QString temp; // a temporary string that will contain a single element/group 0285 QString expandedTerm; // expansion of a particular term 0286 0287 // Go through all letters in the string. 0288 for (i = _shortString.constBegin(); i != _shortString.constEnd();) { 0289 temp = QLatin1String(""); 0290 0291 // If a capital letter was found 0292 if ((*i).category() == QChar::Letter_Uppercase) { 0293 temp += (*i); 0294 ++i; 0295 0296 // A small letter following a capital letter 0297 if (i != _shortString.end() && (*i).category() == QChar::Letter_Lowercase) { 0298 temp += (*i); 0299 ++i; 0300 } 0301 0302 // If element is found, append it 0303 if (lookupElement(temp)) { 0304 _fullString += temp; 0305 } else if (!((expandedTerm = expandTerm(temp)).isEmpty())) { 0306 // If an expansion was made, return the expansion 0307 qCDebug(KALZIUM_LIBSCIENCE_LOG) << "expanded" << temp << "to" << expandedTerm; 0308 _fullString += '(' + expandedTerm + ')'; 0309 } else { // invalid term, append it. (Validation is done later anyway.) 0310 _fullString += temp; 0311 } 0312 } else if (*i == '(') { // Return parenthesis as and when found 0313 _fullString += '('; 0314 ++i; 0315 } else if (*i == ')') { 0316 _fullString += ')'; 0317 ++i; 0318 } else if (*i == '#') { // If # is found, we have a short-form eg #EDTA# 0319 ++i; // go to the next character 0320 // Get the term between # and # 0321 while (*i != '#' && i != _shortString.constEnd()) { 0322 temp += *i; 0323 ++i; 0324 } 0325 // If the string ended, just add the part that comes after # 0326 if (i == _shortString.constEnd()) { 0327 _fullString += temp; 0328 break; 0329 } else if (!temp.isEmpty()) { // else expand the term between # and # 0330 // if alias is not found, just add without expanding the term 0331 if ((expandedTerm = expandTerm(temp)).isEmpty()) { 0332 _fullString += temp; 0333 } else { // else add the expanded term 0334 _fullString += expandedTerm; 0335 } 0336 } 0337 ++i; 0338 } else if ((*i).category() == QChar::Number_DecimalDigit) { // If number was found, return it 0339 _fullString += *i; 0340 ++i; 0341 } else { // invalid character, return it, validation is done again later 0342 _fullString += *i; 0343 ++i; 0344 qCDebug(KALZIUM_LIBSCIENCE_LOG) << *i << "invalid character!"; 0345 } 0346 } 0347 0348 // Reset all "element not found" errors. 0349 m_error = false; 0350 return _fullString; 0351 } 0352 0353 QString MoleculeParser::expandTerm(const QString &_group) 0354 { 0355 QString shortForm, fullForm; // short form (symbol) and full form (expansion) 0356 QString temp; // A temporary QString used in Regular expressions 0357 0358 // Search in User defined aliases. 0359 QString fileName = QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("libkdeedu/data/symbols2.csv")); 0360 QFile file(fileName); 0361 0362 // Check file validity 0363 if (!(!file.open(QIODevice::ReadOnly | QIODevice::Text))) { 0364 qCDebug(KALZIUM_LIBSCIENCE_LOG) << fileName << " opened"; 0365 QTextStream in(&file); 0366 0367 // Get all shortForms and fullForms in the file. 0368 while (!in.atEnd()) { 0369 QString line = in.readLine(); 0370 shortForm = line.section(',', 0, 0); 0371 shortForm.remove(QChar('\"')); 0372 fullForm = line.section(',', 1, 1); 0373 fullForm.remove(QChar('\"')); 0374 0375 // If short term is found, return fullForm 0376 if (shortForm == _group) { 0377 *m_aliasList << (_group + " : " + fullForm); 0378 return (fullForm); 0379 } 0380 } 0381 } else { 0382 qCDebug(KALZIUM_LIBSCIENCE_LOG) << fileName << " could not be opened!"; 0383 } 0384 0385 // Find the system defined aliases 0386 // Open the file 0387 fileName = QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("libkdeedu/data/symbols.csv")); 0388 QFile file2(fileName); 0389 0390 // Check file validity 0391 if (file2.open(QIODevice::ReadOnly | QIODevice::Text)) { 0392 qCDebug(KALZIUM_LIBSCIENCE_LOG) << fileName << " opened"; 0393 QTextStream in(&file2); 0394 0395 // Get all shortForms and fullForms in the file. 0396 while (!in.atEnd()) { 0397 QString line = in.readLine(); 0398 shortForm = line.section(',', 0, 0); 0399 shortForm.remove(QChar('\"')); 0400 fullForm = line.section(',', 1, 1); 0401 fullForm.remove(QChar('\"')); 0402 0403 if (shortForm == _group) { 0404 *m_aliasList << (_group + " : " + fullForm); 0405 return (fullForm); 0406 } 0407 } 0408 } else { 0409 qCDebug(KALZIUM_LIBSCIENCE_LOG) << fileName << " could not be opened!"; 0410 } 0411 0412 // Sample expansions, work even when file is not found, testing purposes 0413 if (_group == QLatin1String("Me")) { 0414 return ("CH3"); 0415 } else if (_group == QLatin1String("Et")) { 0416 return ("C2H5"); 0417 } else { // If not found return an empty string. 0418 return (""); 0419 } 0420 }