File indexing completed on 2024-05-05 04:48:35
0001 /**************************************************************************************** 0002 * Copyright (c) 2010 Sergey Ivanov <123kash@gmail.com> * 0003 * Copyright (c) 2013 Alberto Villa <avilla@FreeBSD.org> * 0004 * * 0005 * This program is free software; you can redistribute it and/or modify it under * 0006 * the terms of the GNU General Public License as published by the Free Software * 0007 * Foundation; either version 2 of the License, or (at your option) any later * 0008 * version. * 0009 * * 0010 * This program is distributed in the hope that it will be useful, but WITHOUT ANY * 0011 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * 0012 * PARTICULAR PURPOSE. See the GNU General Public License for more details. * 0013 * * 0014 * You should have received a copy of the GNU General Public License along with * 0015 * this program. If not, see <http://www.gnu.org/licenses/>. * 0016 ****************************************************************************************/ 0017 0018 #define DEBUG_PREFIX "MusicBrainzXmlParser" 0019 0020 #include "MusicBrainzXmlParser.h" 0021 0022 #include "core/meta/support/MetaConstants.h" 0023 #include "core/support/Debug.h" 0024 #include "MusicBrainzMeta.h" 0025 0026 #include <QStringList> 0027 #include <QVariantList> 0028 0029 MusicBrainzXmlParser::MusicBrainzXmlParser( const QString &doc ) 0030 : QObject() 0031 , ThreadWeaver::Job() 0032 , m_doc( "musicbrainz" ) 0033 , m_type( 0 ) 0034 { 0035 m_doc.setContent( doc ); 0036 } 0037 0038 void 0039 MusicBrainzXmlParser::run(ThreadWeaver::JobPointer self, ThreadWeaver::Thread *thread) 0040 { 0041 Q_UNUSED(self); 0042 Q_UNUSED(thread); 0043 DEBUG_BLOCK 0044 0045 QDomElement docElem = m_doc.documentElement(); 0046 parseElement( docElem ); 0047 } 0048 0049 void 0050 MusicBrainzXmlParser::defaultBegin(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread) 0051 { 0052 Q_EMIT started(self); 0053 ThreadWeaver::Job::defaultBegin(self, thread); 0054 } 0055 0056 void 0057 MusicBrainzXmlParser::defaultEnd(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread) 0058 { 0059 ThreadWeaver::Job::defaultEnd(self, thread); 0060 if (!self->success()) { 0061 Q_EMIT failed(self); 0062 } 0063 Q_EMIT done(self); 0064 } 0065 0066 int 0067 MusicBrainzXmlParser::type() 0068 { 0069 return m_type; 0070 } 0071 0072 void 0073 MusicBrainzXmlParser::parseElement( const QDomElement &e ) 0074 { 0075 QString elementName = e.tagName(); 0076 if( elementName == "recording-list" ) 0077 { 0078 m_type = TrackList; 0079 parseRecordingList( e ); 0080 } 0081 else if( elementName == "release-group" ) 0082 { 0083 m_type = ReleaseGroup; 0084 parseReleaseGroup( e ); 0085 } 0086 else 0087 parseChildren( e ); 0088 } 0089 0090 void 0091 MusicBrainzXmlParser::parseChildren( const QDomElement &e ) 0092 { 0093 QDomNode child = e.firstChild(); 0094 while( !child.isNull() ) 0095 { 0096 if( child.isElement() ) 0097 parseElement( child.toElement() ); 0098 child = child.nextSibling(); 0099 } 0100 } 0101 0102 QStringList 0103 MusicBrainzXmlParser::parseRecordingList( const QDomElement &e ) 0104 { 0105 QDomNode dNode = e.firstChild(); 0106 QDomElement dElement; 0107 QStringList list; 0108 0109 while( !dNode.isNull() ) 0110 { 0111 if( dNode.isElement() ) 0112 { 0113 dElement = dNode.toElement(); 0114 0115 if( dElement.tagName() == "recording" ) 0116 list << parseRecording( dElement ); 0117 } 0118 dNode = dNode.nextSibling(); 0119 } 0120 return list; 0121 } 0122 0123 QString 0124 MusicBrainzXmlParser::parseRecording( const QDomElement &e ) 0125 { 0126 QString id; 0127 QVariantMap track; 0128 0129 if( e.hasAttribute( "id" ) ) 0130 id = e.attribute( "id" ); 0131 if( id.isEmpty() ) 0132 return id; 0133 0134 if( tracks.contains( id ) ) 0135 track = tracks.value( id ); 0136 else 0137 track.insert( MusicBrainz::TRACKID, id ); 0138 if( track.isEmpty() ) 0139 return id; 0140 0141 if( e.hasAttribute( "ext:score" ) ) 0142 track.insert( Meta::Field::SCORE, e.attribute( "ext:score" ).toInt() ); 0143 0144 QDomNode dNode = e.firstChild(); 0145 QDomElement dElement; 0146 QString elementName; 0147 0148 while( !dNode.isNull() ) 0149 { 0150 if( dNode.isElement() ) 0151 { 0152 dElement = dNode.toElement(); 0153 elementName = dElement.tagName(); 0154 0155 if( elementName == "title" ) 0156 track.insert( Meta::Field::TITLE, dElement.text() ); 0157 else if( elementName == "length" ) 0158 { 0159 int length = dElement.text().toInt(); 0160 if( length > 0 ) 0161 track.insert( Meta::Field::LENGTH, length ); 0162 } 0163 else if( elementName == "artist-credit" ) 0164 { 0165 QStringList idList = parseArtist( dElement ); 0166 if( !idList.isEmpty() ) 0167 { 0168 QString artist; 0169 QVariantMap artistInfo; 0170 foreach( const QString &id, idList ) 0171 { 0172 if( artists.contains( id ) ) 0173 { 0174 artistInfo.insert( id, artists.value( id ) ); 0175 artist += artists.value( id ); 0176 } 0177 else 0178 // If it's not among IDs, it's a joinphrase attribute. 0179 artist += id; 0180 } 0181 if( !artistInfo.isEmpty() ) 0182 { 0183 track.insert( MusicBrainz::ARTISTID, artistInfo ); 0184 track.insert( Meta::Field::ARTIST, artist ); 0185 } 0186 } 0187 } 0188 else if( elementName == "release-list" ) 0189 { 0190 m_currentTrackInfo.clear(); 0191 track.insert( MusicBrainz::RELEASELIST, parseReleaseList( dElement ) ); 0192 track.insert( MusicBrainz::TRACKINFO, m_currentTrackInfo ); 0193 } 0194 } 0195 dNode = dNode.nextSibling(); 0196 } 0197 0198 tracks.insert( id, track ); 0199 return id; 0200 } 0201 0202 QStringList 0203 MusicBrainzXmlParser::parseReleaseList( const QDomElement &e ) 0204 { 0205 QDomNode dNode = e.firstChild(); 0206 QDomElement dElement; 0207 QStringList list; 0208 0209 while( !dNode.isNull() ) 0210 { 0211 if( dNode.isElement() ) 0212 { 0213 dElement = dNode.toElement(); 0214 0215 if( dElement.tagName() == "release" ) 0216 list << parseRelease( dElement ); 0217 } 0218 dNode = dNode.nextSibling(); 0219 } 0220 list.removeDuplicates(); 0221 return list; 0222 } 0223 0224 QString 0225 MusicBrainzXmlParser::parseRelease( const QDomElement &e ) 0226 { 0227 QString id; 0228 QVariantMap release; 0229 0230 if( e.hasAttribute( "id" ) ) 0231 id = e.attribute( "id" ); 0232 if( id.isEmpty() ) 0233 return id; 0234 0235 if( releases.contains( id ) ) 0236 release = releases.value( id ); 0237 else 0238 release.insert( MusicBrainz::RELEASEID, id ); 0239 if( release.isEmpty() ) 0240 return id; 0241 0242 QDomNode dNode = e.firstChild(); 0243 QDomElement dElement; 0244 QString elementName; 0245 0246 while( !dNode.isNull() ) 0247 { 0248 if( dNode.isElement() ) 0249 { 0250 dElement = dNode.toElement(); 0251 elementName = dElement.tagName(); 0252 0253 if( elementName == "title" ) 0254 /* 0255 * Avoid checking for "(disc N)" string as it's not a safe way to detect 0256 * disc number. 0257 */ 0258 release.insert( Meta::Field::TITLE, dElement.text() ); 0259 else if( elementName == "medium-list" ) 0260 { 0261 QVariantMap info = parseMediumList( dElement ); 0262 QVariantList trackCountList = info.values( MusicBrainz::TRACKCOUNT ); 0263 int trackCount = 0; 0264 foreach( const QVariant &count, trackCountList ) 0265 { 0266 trackCount += count.toInt(); 0267 if( count.toInt() > 0 ) 0268 release.insert( MusicBrainz::TRACKCOUNT, count.toInt() ); 0269 } 0270 if( info.contains( Meta::Field::DISCNUMBER ) ) 0271 { 0272 int discNumber = info.value( Meta::Field::DISCNUMBER ).toInt(); 0273 if( discNumber < 1 || ( discNumber == 1 && 0274 ( trackCount <= 0 || trackCountList.size() != 2 ) ) ) 0275 info.remove( Meta::Field::DISCNUMBER ); 0276 } 0277 QVariantList trackInfoList = m_currentTrackInfo.value( id ).toList(); 0278 trackInfoList.append( info ); 0279 m_currentTrackInfo.insert( id, trackInfoList ); 0280 } 0281 else if( elementName == "release-group" ) 0282 release.insert( MusicBrainz::RELEASEGROUPID, parseReleaseGroup( dElement ) ); 0283 } 0284 dNode = dNode.nextSibling(); 0285 } 0286 0287 releases.insert( id, release ); 0288 return id; 0289 } 0290 0291 QVariantMap 0292 MusicBrainzXmlParser::parseMediumList( const QDomElement &e ) 0293 { 0294 QDomNode dNode = e.firstChild(); 0295 QDomElement dElement; 0296 QString elementName; 0297 QVariantMap info; 0298 0299 while( !dNode.isNull() ) 0300 { 0301 if( dNode.isElement() ) 0302 { 0303 dElement = dNode.toElement(); 0304 elementName = dElement.tagName(); 0305 0306 if( elementName == "track-count" ) 0307 info.insert( MusicBrainz::TRACKCOUNT, dElement.text().toInt() ); 0308 else if( elementName == "medium" ) 0309 info.unite( parseMedium( dElement ) ); 0310 } 0311 dNode = dNode.nextSibling(); 0312 } 0313 return info; 0314 } 0315 0316 QVariantMap 0317 MusicBrainzXmlParser::parseMedium( const QDomElement &e ) 0318 { 0319 QDomNode dNode = e.firstChild(); 0320 QDomElement dElement; 0321 QString elementName; 0322 QVariantMap info; 0323 0324 while( !dNode.isNull() ) 0325 { 0326 if( dNode.isElement() ) 0327 { 0328 dElement = dNode.toElement(); 0329 elementName = dElement.tagName(); 0330 0331 if( elementName == "position" ) 0332 { 0333 int discNumber = dElement.text().toInt(); 0334 if( discNumber > 0 ) 0335 info.insert( Meta::Field::DISCNUMBER, discNumber ); 0336 } 0337 else if( elementName == "track-list" ) 0338 { 0339 if( dElement.hasAttribute( "count" ) ) 0340 info.insert( MusicBrainz::TRACKCOUNT, 0341 -1 * dElement.attribute( "count" ).toInt() ); 0342 info.unite( parseTrackList( dElement ) ); 0343 } 0344 } 0345 dNode = dNode.nextSibling(); 0346 } 0347 return info; 0348 } 0349 0350 QVariantMap 0351 MusicBrainzXmlParser::parseTrackList( const QDomElement &e ) 0352 { 0353 QDomNode dNode = e.firstChild(); 0354 QDomElement dElement; 0355 QVariantMap info; 0356 0357 while( !dNode.isNull() ) 0358 { 0359 if( dNode.isElement() ) 0360 { 0361 dElement = dNode.toElement(); 0362 0363 if( dElement.tagName() == "track" ) 0364 info = parseTrack( dElement ); 0365 } 0366 dNode = dNode.nextSibling(); 0367 } 0368 return info; 0369 } 0370 0371 QVariantMap 0372 MusicBrainzXmlParser::parseTrack( const QDomElement &e ) 0373 { 0374 QDomNode dNode = e.firstChild(); 0375 QDomElement dElement; 0376 QString elementName; 0377 QVariantMap info; 0378 0379 while( !dNode.isNull() ) 0380 { 0381 if( dNode.isElement() ) 0382 { 0383 dElement = dNode.toElement(); 0384 elementName = dElement.tagName(); 0385 0386 /* 0387 * Ignore any <artist-credit /> tag because per track-artists are used 0388 * inconsistently (especially with classical). Composer tag should be used to 0389 * get more information. Should the tag differ from the main (<recording />'s) 0390 * one only by language, "joinphrase" attribute, etc., we better use the main 0391 * one (as confirmed by MusicBrainz developers). 0392 */ 0393 if( elementName == "title" ) 0394 info.insert( Meta::Field::TITLE, dElement.text() ); 0395 else if( elementName == "length" ) 0396 { 0397 int length = dElement.text().toInt(); 0398 if( length > 0 ) 0399 info.insert( Meta::Field::LENGTH, length ); 0400 } 0401 else if( elementName == "number" ) 0402 { 0403 int number = dElement.text().toInt(); 0404 if( number > 0 ) 0405 info.insert( Meta::Field::TRACKNUMBER, number ); 0406 } 0407 } 0408 dNode = dNode.nextSibling(); 0409 } 0410 return info; 0411 } 0412 0413 QString 0414 MusicBrainzXmlParser::parseReleaseGroup( const QDomElement &e ) 0415 { 0416 QString id; 0417 QVariantMap releaseGroup; 0418 0419 if( e.hasAttribute( "id" ) ) 0420 id = e.attribute( "id" ); 0421 if( id.isEmpty() ) 0422 return id; 0423 0424 if( releaseGroups.contains( id ) ) 0425 releaseGroup = releaseGroups.value( id ); 0426 else 0427 releaseGroup.insert( MusicBrainz::RELEASEGROUPID, id ); 0428 if( releaseGroup.isEmpty() ) 0429 return id; 0430 0431 if( m_type != ReleaseGroup ) 0432 return id; 0433 0434 QDomNode dNode = e.firstChild(); 0435 QDomElement dElement; 0436 QString elementName; 0437 0438 while( !dNode.isNull() ) 0439 { 0440 if( dNode.isElement() ) 0441 { 0442 dElement = dNode.toElement(); 0443 elementName = dElement.tagName(); 0444 0445 if( elementName == "artist-credit" ) 0446 { 0447 QStringList idList = parseArtist( dElement ); 0448 if( !idList.isEmpty() ) 0449 { 0450 QString artist; 0451 QVariantMap artistInfo; 0452 foreach( const QString &id, idList ) 0453 { 0454 if( artists.contains( id ) ) 0455 { 0456 artistInfo.insert( id, artists.value( id ) ); 0457 artist += artists.value( id ); 0458 } 0459 else 0460 // If it's not among IDs, it's a joinphrase attribute. 0461 artist += id; 0462 } 0463 if( !artistInfo.isEmpty() ) 0464 { 0465 releaseGroup.insert( MusicBrainz::ARTISTID, artistInfo ); 0466 releaseGroup.insert( Meta::Field::ARTIST, artist ); 0467 } 0468 } 0469 } 0470 else if( elementName == "first-release-date" ) 0471 { 0472 int year = 0; 0473 QRegExp yearMatcher( "^(\\d{4}).*$" ); 0474 if( yearMatcher.exactMatch( dElement.text() ) ) 0475 year = yearMatcher.cap( 1 ).toInt(); 0476 if( year > 0 ) 0477 releaseGroup.insert( Meta::Field::YEAR, year ); 0478 } 0479 } 0480 dNode = dNode.nextSibling(); 0481 } 0482 0483 releaseGroups.insert( id, releaseGroup ); 0484 return id; 0485 } 0486 0487 QStringList 0488 MusicBrainzXmlParser::parseArtist( const QDomElement &e ) 0489 { 0490 QDomNode dNode = e.firstChild(), dNode2, dNode3; 0491 QDomElement dElement, dElement2, dElement3; 0492 QStringList idList; 0493 QString id; 0494 0495 while( !dNode.isNull() ) 0496 { 0497 if( dNode.isElement() ) 0498 { 0499 dElement = dNode.toElement(); 0500 0501 if( dElement.tagName() == "name-credit" ) 0502 { 0503 /* 0504 * <name-credit /> can have a <name /> tag which overwrites the 0505 * <artist />'s one. It's set per track or per release, so it's better to 0506 * ignore it to avoid having the same artist twice, maybe spelled 0507 * differently, which is bad for library organization. The <name /> tag 0508 * under <artist /> is global, instead, so let's use it. 0509 */ 0510 dNode2 = dNode.firstChild(); 0511 while( !dNode2.isNull() ) 0512 { 0513 if( dNode2.isElement() ) 0514 { 0515 dElement2 = dNode2.toElement(); 0516 0517 if( dElement2.tagName() == "artist" ) 0518 { 0519 dNode3 = dNode2.firstChild(); 0520 while( !dNode3.isNull() ) 0521 { 0522 if( dNode3.isElement() ) 0523 { 0524 dElement3 = dNode3.toElement(); 0525 0526 if( dElement3.tagName() == "name" ) 0527 { 0528 if( dElement2.hasAttribute( "id" ) ) 0529 id = dElement2.attribute( "id" ); 0530 if( id.isEmpty() ) 0531 return QStringList(); 0532 artists.insert( id, dElement3.text() ); 0533 idList.append( id ); 0534 if( dElement.hasAttribute( "joinphrase" ) ) 0535 idList.append( dElement.attribute( "joinphrase" ) ); 0536 } 0537 } 0538 dNode3 = dNode3.nextSibling(); 0539 } 0540 } 0541 } 0542 dNode2 = dNode2.nextSibling(); 0543 } 0544 } 0545 } 0546 dNode = dNode.nextSibling(); 0547 } 0548 0549 return idList; 0550 }