File indexing completed on 2024-05-05 04:48:35

0001 /****************************************************************************************
0002  * Copyright (c) 2010 Sergey Ivanov <123kash@gmail.com>                                 *
0003  * Copyright (c) 2013 Alberto Villa <avilla@FreeBSD.org>                                *
0004  *                                                                                      *
0005  * This program is free software; you can redistribute it and/or modify it under        *
0006  * the terms of the GNU General Public License as published by the Free Software        *
0007  * Foundation; either version 2 of the License, or (at your option) any later           *
0008  * version.                                                                             *
0009  *                                                                                      *
0010  * This program is distributed in the hope that it will be useful, but WITHOUT ANY      *
0011  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A      *
0012  * PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
0013  *                                                                                      *
0014  * You should have received a copy of the GNU General Public License along with         *
0015  * this program.  If not, see <http://www.gnu.org/licenses/>.                           *
0016  ****************************************************************************************/
0017 
0018 #define DEBUG_PREFIX "MusicBrainzXmlParser"
0019 
0020 #include "MusicBrainzXmlParser.h"
0021 
0022 #include "core/meta/support/MetaConstants.h"
0023 #include "core/support/Debug.h"
0024 #include "MusicBrainzMeta.h"
0025 
0026 #include <QStringList>
0027 #include <QVariantList>
0028 
0029 MusicBrainzXmlParser::MusicBrainzXmlParser( const QString &doc )
0030     : QObject()
0031     , ThreadWeaver::Job()
0032     , m_doc( "musicbrainz" )
0033     , m_type( 0 )
0034 {
0035     m_doc.setContent( doc );
0036 }
0037 
0038 void
0039 MusicBrainzXmlParser::run(ThreadWeaver::JobPointer self, ThreadWeaver::Thread *thread)
0040 {
0041     Q_UNUSED(self);
0042     Q_UNUSED(thread);
0043     DEBUG_BLOCK
0044 
0045     QDomElement docElem = m_doc.documentElement();
0046     parseElement( docElem );
0047 }
0048 
0049 void
0050 MusicBrainzXmlParser::defaultBegin(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
0051 {
0052     Q_EMIT started(self);
0053     ThreadWeaver::Job::defaultBegin(self, thread);
0054 }
0055 
0056 void
0057 MusicBrainzXmlParser::defaultEnd(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
0058 {
0059     ThreadWeaver::Job::defaultEnd(self, thread);
0060     if (!self->success()) {
0061         Q_EMIT failed(self);
0062     }
0063     Q_EMIT done(self);
0064 }
0065 
0066 int
0067 MusicBrainzXmlParser::type()
0068 {
0069     return m_type;
0070 }
0071 
0072 void
0073 MusicBrainzXmlParser::parseElement( const QDomElement &e )
0074 {
0075     QString elementName = e.tagName();
0076     if( elementName == "recording-list" )
0077     {
0078         m_type = TrackList;
0079         parseRecordingList( e );
0080     }
0081     else if( elementName == "release-group" )
0082     {
0083         m_type = ReleaseGroup;
0084         parseReleaseGroup( e );
0085     }
0086     else
0087         parseChildren( e );
0088 }
0089 
0090 void
0091 MusicBrainzXmlParser::parseChildren( const QDomElement &e )
0092 {
0093     QDomNode child = e.firstChild();
0094     while( !child.isNull() )
0095     {
0096         if( child.isElement() )
0097             parseElement( child.toElement() );
0098         child = child.nextSibling();
0099     }
0100 }
0101 
0102 QStringList
0103 MusicBrainzXmlParser::parseRecordingList( const QDomElement &e )
0104 {
0105     QDomNode dNode = e.firstChild();
0106     QDomElement dElement;
0107     QStringList list;
0108 
0109     while( !dNode.isNull() )
0110     {
0111         if( dNode.isElement() )
0112         {
0113             dElement = dNode.toElement();
0114 
0115             if( dElement.tagName() == "recording" )
0116                 list << parseRecording( dElement );
0117         }
0118         dNode = dNode.nextSibling();
0119     }
0120     return list;
0121 }
0122 
0123 QString
0124 MusicBrainzXmlParser::parseRecording( const QDomElement &e )
0125 {
0126     QString id;
0127     QVariantMap track;
0128 
0129     if( e.hasAttribute( "id" ) )
0130         id = e.attribute( "id" );
0131     if( id.isEmpty() )
0132         return id;
0133 
0134     if( tracks.contains( id ) )
0135         track = tracks.value( id );
0136     else
0137         track.insert( MusicBrainz::TRACKID, id );
0138     if( track.isEmpty() )
0139         return id;
0140 
0141     if( e.hasAttribute( "ext:score" ) )
0142         track.insert( Meta::Field::SCORE, e.attribute( "ext:score" ).toInt() );
0143 
0144     QDomNode dNode = e.firstChild();
0145     QDomElement dElement;
0146     QString elementName;
0147 
0148     while( !dNode.isNull() )
0149     {
0150         if( dNode.isElement() )
0151         {
0152             dElement = dNode.toElement();
0153             elementName = dElement.tagName();
0154 
0155             if( elementName == "title" )
0156                 track.insert( Meta::Field::TITLE, dElement.text() );
0157             else if( elementName == "length" )
0158             {
0159                 int length = dElement.text().toInt();
0160                 if( length > 0 )
0161                     track.insert( Meta::Field::LENGTH, length );
0162             }
0163             else if( elementName == "artist-credit" )
0164             {
0165                 QStringList idList = parseArtist( dElement );
0166                 if( !idList.isEmpty() )
0167                 {
0168                     QString artist;
0169                     QVariantMap artistInfo;
0170                     foreach( const QString &id, idList )
0171                     {
0172                         if( artists.contains( id ) )
0173                         {
0174                             artistInfo.insert( id, artists.value( id ) );
0175                             artist += artists.value( id );
0176                         }
0177                         else
0178                             // If it's not among IDs, it's a joinphrase attribute.
0179                             artist += id;
0180                     }
0181                     if( !artistInfo.isEmpty() )
0182                     {
0183                         track.insert( MusicBrainz::ARTISTID, artistInfo );
0184                         track.insert( Meta::Field::ARTIST, artist );
0185                     }
0186                 }
0187             }
0188             else if( elementName == "release-list" )
0189             {
0190                 m_currentTrackInfo.clear();
0191                 track.insert( MusicBrainz::RELEASELIST, parseReleaseList( dElement ) );
0192                 track.insert( MusicBrainz::TRACKINFO, m_currentTrackInfo );
0193             }
0194         }
0195         dNode = dNode.nextSibling();
0196     }
0197 
0198     tracks.insert( id, track );
0199     return id;
0200 }
0201 
0202 QStringList
0203 MusicBrainzXmlParser::parseReleaseList( const QDomElement &e )
0204 {
0205     QDomNode dNode = e.firstChild();
0206     QDomElement dElement;
0207     QStringList list;
0208 
0209     while( !dNode.isNull() )
0210     {
0211         if( dNode.isElement() )
0212         {
0213             dElement = dNode.toElement();
0214 
0215             if( dElement.tagName() == "release" )
0216                 list << parseRelease( dElement );
0217         }
0218         dNode = dNode.nextSibling();
0219     }
0220     list.removeDuplicates();
0221     return list;
0222 }
0223 
0224 QString
0225 MusicBrainzXmlParser::parseRelease( const QDomElement &e )
0226 {
0227     QString id;
0228     QVariantMap release;
0229 
0230     if( e.hasAttribute( "id" ) )
0231         id = e.attribute( "id" );
0232     if( id.isEmpty() )
0233         return id;
0234 
0235     if( releases.contains( id ) )
0236         release = releases.value( id );
0237     else
0238         release.insert( MusicBrainz::RELEASEID, id );
0239     if( release.isEmpty() )
0240         return id;
0241 
0242     QDomNode dNode = e.firstChild();
0243     QDomElement dElement;
0244     QString elementName;
0245 
0246     while( !dNode.isNull() )
0247     {
0248         if( dNode.isElement() )
0249         {
0250             dElement = dNode.toElement();
0251             elementName = dElement.tagName();
0252 
0253             if( elementName == "title" )
0254                 /*
0255                  * Avoid checking for "(disc N)" string as it's not a safe way to detect
0256                  * disc number.
0257                  */
0258                 release.insert( Meta::Field::TITLE, dElement.text() );
0259             else if( elementName == "medium-list" )
0260             {
0261                 QVariantMap info = parseMediumList( dElement );
0262                 QVariantList trackCountList = info.values( MusicBrainz::TRACKCOUNT );
0263                 int trackCount = 0;
0264                 foreach( const QVariant &count, trackCountList )
0265                 {
0266                     trackCount += count.toInt();
0267                     if( count.toInt() > 0 )
0268                         release.insert( MusicBrainz::TRACKCOUNT, count.toInt() );
0269                 }
0270                 if( info.contains( Meta::Field::DISCNUMBER ) )
0271                 {
0272                     int discNumber = info.value( Meta::Field::DISCNUMBER ).toInt();
0273                     if( discNumber < 1 || ( discNumber == 1 &&
0274                         ( trackCount <= 0 || trackCountList.size() != 2 ) ) )
0275                         info.remove( Meta::Field::DISCNUMBER );
0276                 }
0277                 QVariantList trackInfoList = m_currentTrackInfo.value( id ).toList();
0278                 trackInfoList.append( info );
0279                 m_currentTrackInfo.insert( id, trackInfoList );
0280             }
0281             else if( elementName == "release-group" )
0282                 release.insert( MusicBrainz::RELEASEGROUPID, parseReleaseGroup( dElement ) );
0283         }
0284         dNode = dNode.nextSibling();
0285     }
0286 
0287     releases.insert( id, release );
0288     return id;
0289 }
0290 
0291 QVariantMap
0292 MusicBrainzXmlParser::parseMediumList( const QDomElement &e )
0293 {
0294     QDomNode dNode = e.firstChild();
0295     QDomElement dElement;
0296     QString elementName;
0297     QVariantMap info;
0298 
0299     while( !dNode.isNull() )
0300     {
0301         if( dNode.isElement() )
0302         {
0303             dElement = dNode.toElement();
0304             elementName = dElement.tagName();
0305 
0306             if( elementName == "track-count" )
0307                 info.insert( MusicBrainz::TRACKCOUNT, dElement.text().toInt() );
0308             else if( elementName == "medium" )
0309                 info.unite( parseMedium( dElement ) );
0310         }
0311         dNode = dNode.nextSibling();
0312     }
0313     return info;
0314 }
0315 
0316 QVariantMap
0317 MusicBrainzXmlParser::parseMedium( const QDomElement &e )
0318 {
0319     QDomNode dNode = e.firstChild();
0320     QDomElement dElement;
0321     QString elementName;
0322     QVariantMap info;
0323 
0324     while( !dNode.isNull() )
0325     {
0326         if( dNode.isElement() )
0327         {
0328             dElement = dNode.toElement();
0329             elementName = dElement.tagName();
0330 
0331             if( elementName == "position" )
0332             {
0333                 int discNumber = dElement.text().toInt();
0334                 if( discNumber > 0 )
0335                     info.insert( Meta::Field::DISCNUMBER, discNumber );
0336             }
0337             else if( elementName == "track-list" )
0338             {
0339                 if( dElement.hasAttribute( "count" ) )
0340                     info.insert( MusicBrainz::TRACKCOUNT,
0341                                  -1 * dElement.attribute( "count" ).toInt() );
0342                 info.unite( parseTrackList( dElement ) );
0343             }
0344         }
0345         dNode = dNode.nextSibling();
0346     }
0347     return info;
0348 }
0349 
0350 QVariantMap
0351 MusicBrainzXmlParser::parseTrackList( const QDomElement &e )
0352 {
0353     QDomNode dNode = e.firstChild();
0354     QDomElement dElement;
0355     QVariantMap info;
0356 
0357     while( !dNode.isNull() )
0358     {
0359         if( dNode.isElement() )
0360         {
0361             dElement = dNode.toElement();
0362 
0363             if( dElement.tagName() == "track" )
0364                 info = parseTrack( dElement );
0365         }
0366         dNode = dNode.nextSibling();
0367     }
0368     return info;
0369 }
0370 
0371 QVariantMap
0372 MusicBrainzXmlParser::parseTrack( const QDomElement &e )
0373 {
0374     QDomNode dNode = e.firstChild();
0375     QDomElement dElement;
0376     QString elementName;
0377     QVariantMap info;
0378 
0379     while( !dNode.isNull() )
0380     {
0381         if( dNode.isElement() )
0382         {
0383             dElement = dNode.toElement();
0384             elementName = dElement.tagName();
0385 
0386             /*
0387              * Ignore any <artist-credit /> tag because per track-artists are used
0388              * inconsistently (especially with classical). Composer tag should be used to
0389              * get more information. Should the tag differ from the main (<recording />'s)
0390              * one only by language, "joinphrase" attribute, etc., we better use the main
0391              * one (as confirmed by MusicBrainz developers).
0392              */
0393             if( elementName == "title" )
0394                 info.insert( Meta::Field::TITLE, dElement.text() );
0395             else if( elementName == "length" )
0396             {
0397                 int length = dElement.text().toInt();
0398                 if( length > 0 )
0399                     info.insert( Meta::Field::LENGTH, length );
0400             }
0401             else if( elementName == "number" )
0402             {
0403                 int number = dElement.text().toInt();
0404                 if( number > 0 )
0405                     info.insert( Meta::Field::TRACKNUMBER, number );
0406             }
0407         }
0408         dNode = dNode.nextSibling();
0409     }
0410     return info;
0411 }
0412 
0413 QString
0414 MusicBrainzXmlParser::parseReleaseGroup( const QDomElement &e )
0415 {
0416     QString id;
0417     QVariantMap releaseGroup;
0418 
0419     if( e.hasAttribute( "id" ) )
0420         id = e.attribute( "id" );
0421     if( id.isEmpty() )
0422         return id;
0423 
0424     if( releaseGroups.contains( id ) )
0425         releaseGroup = releaseGroups.value( id );
0426     else
0427         releaseGroup.insert( MusicBrainz::RELEASEGROUPID, id );
0428     if( releaseGroup.isEmpty() )
0429         return id;
0430 
0431     if( m_type != ReleaseGroup )
0432         return id;
0433 
0434     QDomNode dNode = e.firstChild();
0435     QDomElement dElement;
0436     QString elementName;
0437 
0438     while( !dNode.isNull() )
0439     {
0440         if( dNode.isElement() )
0441         {
0442             dElement = dNode.toElement();
0443             elementName = dElement.tagName();
0444 
0445             if( elementName == "artist-credit" )
0446             {
0447                 QStringList idList = parseArtist( dElement );
0448                 if( !idList.isEmpty() )
0449                 {
0450                     QString artist;
0451                     QVariantMap artistInfo;
0452                     foreach( const QString &id, idList )
0453                     {
0454                         if( artists.contains( id ) )
0455                         {
0456                             artistInfo.insert( id, artists.value( id ) );
0457                             artist += artists.value( id );
0458                         }
0459                         else
0460                             // If it's not among IDs, it's a joinphrase attribute.
0461                             artist += id;
0462                     }
0463                     if( !artistInfo.isEmpty() )
0464                     {
0465                         releaseGroup.insert( MusicBrainz::ARTISTID, artistInfo );
0466                         releaseGroup.insert( Meta::Field::ARTIST, artist );
0467                     }
0468                 }
0469             }
0470             else if( elementName == "first-release-date" )
0471             {
0472                 int year = 0;
0473                 QRegExp yearMatcher( "^(\\d{4}).*$" );
0474                 if( yearMatcher.exactMatch( dElement.text() ) )
0475                     year = yearMatcher.cap( 1 ).toInt();
0476                 if( year > 0 )
0477                     releaseGroup.insert( Meta::Field::YEAR, year );
0478             }
0479         }
0480         dNode = dNode.nextSibling();
0481     }
0482 
0483     releaseGroups.insert( id, releaseGroup );
0484     return id;
0485 }
0486 
0487 QStringList
0488 MusicBrainzXmlParser::parseArtist( const QDomElement &e )
0489 {
0490     QDomNode dNode = e.firstChild(), dNode2, dNode3;
0491     QDomElement dElement, dElement2, dElement3;
0492     QStringList idList;
0493     QString id;
0494 
0495     while( !dNode.isNull() )
0496     {
0497         if( dNode.isElement() )
0498         {
0499             dElement = dNode.toElement();
0500 
0501             if( dElement.tagName() == "name-credit" )
0502             {
0503                 /*
0504                  * <name-credit /> can have a <name /> tag which overwrites the
0505                  * <artist />'s one. It's set per track or per release, so it's better to
0506                  * ignore it to avoid having the same artist twice, maybe spelled
0507                  * differently, which is bad for library organization. The <name /> tag
0508                  * under <artist /> is global, instead, so let's use it.
0509                  */
0510                 dNode2 = dNode.firstChild();
0511                 while( !dNode2.isNull() )
0512                 {
0513                     if( dNode2.isElement() )
0514                     {
0515                         dElement2 = dNode2.toElement();
0516 
0517                         if( dElement2.tagName() == "artist" )
0518                         {
0519                             dNode3 = dNode2.firstChild();
0520                             while( !dNode3.isNull() )
0521                             {
0522                                 if( dNode3.isElement() )
0523                                 {
0524                                     dElement3 = dNode3.toElement();
0525 
0526                                     if( dElement3.tagName() == "name" )
0527                                     {
0528                                         if( dElement2.hasAttribute( "id" ) )
0529                                             id = dElement2.attribute( "id" );
0530                                         if( id.isEmpty() )
0531                                             return QStringList();
0532                                         artists.insert( id, dElement3.text() );
0533                                         idList.append( id );
0534                                         if( dElement.hasAttribute( "joinphrase" ) )
0535                                             idList.append( dElement.attribute( "joinphrase" ) );
0536                                     }
0537                                 }
0538                                 dNode3 = dNode3.nextSibling();
0539                             }
0540                         }
0541                     }
0542                     dNode2 = dNode2.nextSibling();
0543                 }
0544             }
0545         }
0546         dNode = dNode.nextSibling();
0547     }
0548 
0549     return idList;
0550 }