File indexing completed on 2024-05-19 04:50:14

0001 /****************************************************************************************
0002  * Copyright (c) 2007 Nikolaj Hald Nielsen <nhn@kde.org>                                *
0003  *                                                                                      *
0004  * This program is free software; you can redistribute it and/or modify it under        *
0005  * the terms of the GNU General Public License as published by the Free Software        *
0006  * Foundation; either version 2 of the License, or (at your option) any later           *
0007  * version.                                                                             *
0008  *                                                                                      *
0009  * This program is distributed in the hope that it will be useful, but WITHOUT ANY      *
0010  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A      *
0011  * PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
0012  *                                                                                      *
0013  * You should have received a copy of the GNU General Public License along with         *
0014  * this program.  If not, see <http://www.gnu.org/licenses/>.                           *
0015  ****************************************************************************************/
0016 
0017 #include "JamendoXmlParser.h"
0018 
0019 #include "core/support/Amarok.h"
0020 #include "core/support/Debug.h"
0021 #include "core/support/Components.h"
0022 #include "core/logger/Logger.h"
0023 
0024 #include <QFile>
0025 
0026 #include <KFilterDev>
0027 #include <KLocalizedString>
0028 
0029 using namespace Meta;
0030 
0031 static const QString COVERURL_BASE = "http://api.jamendo.com/get2/image/album/redirect/?id=%1&imagesize=100";
0032 
0033 JamendoXmlParser::JamendoXmlParser( const QString &filename )
0034     : QObject()
0035     , ThreadWeaver::Job()
0036     , m_sFileName( filename )
0037     , n_numberOfTransactions ( 0 )
0038     , n_maxNumberOfTransactions ( 5000 )
0039     , m_aborted( false )
0040 {
0041     DEBUG_BLOCK
0042 
0043     // From: http://www.linuxselfhelp.com/HOWTO/MP3-HOWTO-13.html#ss13.3
0044     m_id3GenreHash.insert(  0, "Blues"             );
0045     m_id3GenreHash.insert(  1, "Classic Rock"      );
0046     m_id3GenreHash.insert(  2, "Country"           );
0047     m_id3GenreHash.insert(  3, "Dance"             );
0048     m_id3GenreHash.insert(  4, "Disco"             );
0049     m_id3GenreHash.insert(  5, "Funk"              );
0050     m_id3GenreHash.insert(  6, "Grunge"            );
0051     m_id3GenreHash.insert(  7, "Hip-Hop"           );
0052     m_id3GenreHash.insert(  8, "Jazz"              );
0053     m_id3GenreHash.insert(  9, "Metal"             );
0054     m_id3GenreHash.insert( 10, "New Age"           );
0055     m_id3GenreHash.insert( 11, "Oldies"            );
0056     m_id3GenreHash.insert( 12, "Other"             );
0057     m_id3GenreHash.insert( 13, "Pop"               );
0058     m_id3GenreHash.insert( 14, "R&B"               );
0059     m_id3GenreHash.insert( 15, "Rap"               );
0060     m_id3GenreHash.insert( 16, "Reggae"            );
0061     m_id3GenreHash.insert( 17, "Rock"              );
0062     m_id3GenreHash.insert( 18, "Techno"            );
0063     m_id3GenreHash.insert( 19, "Industrial"        );
0064     m_id3GenreHash.insert( 20, "Alternative"       );
0065     m_id3GenreHash.insert( 21, "Ska"               );
0066     m_id3GenreHash.insert( 22, "Death Metal"       );
0067     m_id3GenreHash.insert( 23, "Pranks"            );
0068     m_id3GenreHash.insert( 24, "Soundtrack"        );
0069     m_id3GenreHash.insert( 25, "Euro-Techno"       );
0070     m_id3GenreHash.insert( 26, "Ambient"           );
0071     m_id3GenreHash.insert( 27, "Trip-Hop"          );
0072     m_id3GenreHash.insert( 28, "Vocal"             );
0073     m_id3GenreHash.insert( 29, "Jazz+Funk"         );
0074     m_id3GenreHash.insert( 30, "Fusion"            );
0075     m_id3GenreHash.insert( 31, "Trance"            );
0076     m_id3GenreHash.insert( 32, "Classical"         );
0077     m_id3GenreHash.insert( 33, "Instrumental"      );
0078     m_id3GenreHash.insert( 34, "Acid"              );
0079     m_id3GenreHash.insert( 35, "House"             );
0080     m_id3GenreHash.insert( 36, "Game"              );
0081     m_id3GenreHash.insert( 37, "Sound Clip"        );
0082     m_id3GenreHash.insert( 38, "Gospel"            );
0083     m_id3GenreHash.insert( 39, "Noise"             );
0084     m_id3GenreHash.insert( 40, "AlternRock"        );
0085     m_id3GenreHash.insert( 41, "Bass"              );
0086     m_id3GenreHash.insert( 42, "Soul"              );
0087     m_id3GenreHash.insert( 43, "Punk"              );
0088     m_id3GenreHash.insert( 44, "Space"             );
0089     m_id3GenreHash.insert( 45, "Meditative"        );
0090     m_id3GenreHash.insert( 46, "Instrumental Pop"  );
0091     m_id3GenreHash.insert( 47, "Instrumental Rock" );
0092     m_id3GenreHash.insert( 48, "Ethnic"            );
0093     m_id3GenreHash.insert( 49, "Gothic"            );
0094     m_id3GenreHash.insert( 50, "Darkwave"          );
0095     m_id3GenreHash.insert( 51, "Techno-Industrial" );
0096     m_id3GenreHash.insert( 52, "Electronic"        );
0097     m_id3GenreHash.insert( 53, "Pop-Folk"          );
0098     m_id3GenreHash.insert( 54, "Eurodance"         );
0099     m_id3GenreHash.insert( 55, "Dream"             );
0100     m_id3GenreHash.insert( 56, "Southern Rock"     );
0101     m_id3GenreHash.insert( 57, "Comedy"            );
0102     m_id3GenreHash.insert( 58, "Cult"              );
0103     m_id3GenreHash.insert( 59, "Gangsta"           );
0104     m_id3GenreHash.insert( 60, "Top 40"            );
0105     m_id3GenreHash.insert( 61, "Christian Rap"     );
0106     m_id3GenreHash.insert( 62, "Pop/Funk"          );
0107     m_id3GenreHash.insert( 63, "Jungle"            );
0108     m_id3GenreHash.insert( 64, "Native American"   );
0109     m_id3GenreHash.insert( 65, "Cabaret"           );
0110     m_id3GenreHash.insert( 66, "New Wave"          );
0111     m_id3GenreHash.insert( 67, "Psychedelic"       );
0112     m_id3GenreHash.insert( 68, "Rave"              );
0113     m_id3GenreHash.insert( 69, "Showtunes"         );
0114     m_id3GenreHash.insert( 70, "Trailer"           );
0115     m_id3GenreHash.insert( 71, "Lo-Fi"             );
0116     m_id3GenreHash.insert( 72, "Tribal"            );
0117     m_id3GenreHash.insert( 73, "Acid Punk"         );
0118     m_id3GenreHash.insert( 74, "Acid Jazz"         );
0119     m_id3GenreHash.insert( 75, "Polka"             );
0120     m_id3GenreHash.insert( 76, "Retro"             );
0121     m_id3GenreHash.insert( 77, "Musical"           );
0122     m_id3GenreHash.insert( 78, "Rock & Roll"       );
0123     m_id3GenreHash.insert( 79, "Hard Rock"         );
0124 
0125     albumTags.clear();
0126     m_dbHandler = new JamendoDatabaseHandler();
0127     connect( this, &JamendoXmlParser::done, this, &JamendoXmlParser::completeJob );
0128 }
0129 
0130 JamendoXmlParser::~JamendoXmlParser()
0131 {
0132     DEBUG_BLOCK
0133     m_reader.clear();
0134     delete m_dbHandler;
0135 }
0136 
0137 void
0138 JamendoXmlParser::run(ThreadWeaver::JobPointer self, ThreadWeaver::Thread *thread)
0139 {
0140     Q_UNUSED(self);
0141     Q_UNUSED(thread);
0142     if( m_aborted )
0143         return;
0144     
0145     readConfigFile( m_sFileName );
0146 }
0147 void
0148 JamendoXmlParser::defaultBegin(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
0149 {
0150     Q_EMIT started(self);
0151     ThreadWeaver::Job::defaultBegin(self, thread);
0152 }
0153 
0154 void
0155 JamendoXmlParser::defaultEnd(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
0156 {
0157     ThreadWeaver::Job::defaultEnd(self, thread);
0158     if (!self->success()) {
0159         Q_EMIT failed(self);
0160     }
0161     Q_EMIT done(self);
0162 }
0163 
0164 void
0165 JamendoXmlParser::completeJob()
0166 {
0167     if( m_aborted )
0168         return;
0169     
0170     Amarok::Logger::longMessage(
0171           i18ncp( "First part of: Jamendo.com database update complete. Added 3 tracks on 4 albums from 5 artists.", "Jamendo.com database update complete. Added 1 track on ", "Jamendo.com database update complete. Added %1 tracks on ", m_nNumberOfTracks)
0172         + i18ncp( "Middle part of: Jamendo.com database update complete. Added 3 tracks on 4 albums from 5 artists.", "1 album from ", "%1 albums from ", m_nNumberOfAlbums)
0173         + i18ncp( "Last part of: Jamendo.com database update complete. Added 3 tracks on 4 albums from 5 artists.", "1 artist.", "%1 artists.", m_nNumberOfArtists )
0174         , Amarok::Logger::Information );
0175 
0176     debug() << "JamendoXmlParser: total number of artists: " << m_nNumberOfArtists;
0177     debug() << "JamendoXmlParser: total number of albums: " << m_nNumberOfAlbums;
0178     debug() << "JamendoXmlParser: total number of tracks: " << m_nNumberOfTracks;
0179     emit doneParsing();
0180     deleteLater();
0181 }
0182 
0183 void
0184 JamendoXmlParser::readConfigFile( const QString &filename )
0185 {
0186     if( m_aborted )
0187         return;
0188  
0189     m_nNumberOfTracks = 0;
0190     m_nNumberOfAlbums = 0;
0191     m_nNumberOfArtists = 0;
0192 
0193     if( !QFile::exists( filename ) )
0194     {
0195         debug() << "jamendo xml file does not exist";
0196         return;
0197     }
0198 
0199     KFilterDev *file = new KFilterDev( filename );
0200 
0201     if( !file->open( QIODevice::ReadOnly ) )
0202     {
0203         debug() << "JamendoXmlParser::readConfigFile error reading file";
0204         return;
0205     }
0206 
0207     m_reader.setDevice( file );
0208 
0209     m_dbHandler->destroyDatabase();
0210     m_dbHandler->createDatabase();
0211 
0212     m_dbHandler->begin(); //start transaction (MAJOR speedup!!)
0213     while( !m_reader.atEnd() )
0214     {
0215         m_reader.readNext();
0216         if( m_reader.isStartElement() )
0217         {
0218             QStringRef localname = m_reader.name();
0219             if( localname == "artist" )
0220             {
0221                 readArtist();
0222             }
0223         }
0224     }
0225 
0226 
0227     m_dbHandler->commit(); //complete transaction
0228     //as genres are just user tags, remove any that are not applied to at least 10 albums to weed out the worst crap
0229     //perhaps make this a config option
0230     m_dbHandler->trimGenres( 10 );
0231 
0232     file->close();
0233     delete file;
0234     QFile::remove( filename );
0235 }
0236 
0237 void
0238 JamendoXmlParser::readArtist()
0239 {
0240     if( m_aborted )
0241         return;
0242     
0243     Q_ASSERT( m_reader.isStartElement() && m_reader.name() == "artist" );
0244 
0245 //     debug() << "Found artist: ";
0246     m_nNumberOfArtists++;
0247 
0248     QString name;
0249     QString description;
0250     QString imageUrl;
0251     QString jamendoUrl;
0252 
0253     while( !m_reader.atEnd() )
0254     {
0255         m_reader.readNext();
0256 
0257         if( m_reader.isEndElement() && m_reader.name() == "artist" )
0258             break;
0259         if( m_reader.isStartElement() )
0260         {
0261             QStringRef localname = m_reader.name();
0262             if( localname == "id" )
0263                 m_currentArtistId = m_reader.readElementText().toInt();
0264             else if ( localname == "name" )
0265                 name = m_reader.readElementText();
0266             else if( localname == "url" )
0267                 jamendoUrl = m_reader.readElementText();
0268             else if( localname == "image" )
0269                 imageUrl = m_reader.readElementText();
0270             else if( localname == "album" )
0271                 readAlbum();
0272         }
0273     }
0274 
0275     JamendoArtist currentArtist( name );
0276     currentArtist.setDescription( description );
0277 
0278     currentArtist.setId( m_currentArtistId );
0279     currentArtist.setPhotoURL( imageUrl );
0280     currentArtist.setJamendoURL( jamendoUrl );
0281 
0282     m_dbHandler->insertArtist( &currentArtist );
0283     countTransaction();
0284 
0285 //     debug() << "    Name:       " << currentArtist.name();
0286 //     debug() << "    Id:         " << currentArtist.id();
0287 //     debug() << "    Photo:      " << currentArtist.photoURL();
0288 //     debug() << "    J_url:      " << currentArtist.jamendoURL();
0289 //     debug() << "    H_url:      " << currentArtist.homeURL();
0290 //     debug() << "    Description:" << currentArtist.description();
0291 
0292 }
0293 
0294 void
0295 JamendoXmlParser::readAlbum()
0296 {
0297     if( m_aborted )
0298         return;
0299     
0300     Q_ASSERT( m_reader.isStartElement() && m_reader.name() == "album" );
0301 
0302     //debug() << "Found album: ";
0303     
0304 
0305     QString name;
0306     QString genre;
0307     QString description;
0308     QStringList tags;
0309     QString coverUrl;
0310     QString releaseDate;
0311 
0312    while( !m_reader.atEnd() )
0313     {
0314         m_reader.readNext();
0315 
0316         if( m_reader.isEndElement() && m_reader.name() == "album" )
0317             break;
0318         if( m_reader.isStartElement() )
0319         {
0320             QStringRef localname = m_reader.name();
0321 
0322             if( localname == "id" )
0323                 m_currentAlbumId = m_reader.readElementText().toInt();
0324             else if ( localname == "name" )
0325                 name = m_reader.readElementText();
0326             else if( localname == "id3genre" )
0327                 genre = m_id3GenreHash.value( m_reader.readElementText().toInt() );
0328             else if( localname == "releasedate" )
0329                 releaseDate = m_reader.readElementText();
0330             else if( localname == "track" )
0331                 readTrack();
0332 //             else if ( currentChildElement.tagName() == "description" )
0333 //                  description = currentChildElement.text();
0334             //we use tags instead of genres for creating genres in the database, as the
0335             //Jamendo.com genres are messy at best
0336 //             else if ( currentChildElement.tagName() == "tags" )
0337 //                 tags = currentChildElement.text().split(' ', Qt::SkipEmptyParts);
0338 //             n = n.nextSibling();
0339         }
0340     }
0341 
0342     //We really do not like albums with no genres, makes the service freeze, so simply ignore this.
0343     if( !genre.isEmpty() && genre != "Unknown" )
0344     {
0345         m_nNumberOfAlbums++;
0346         JamendoAlbum currentAlbum( name );
0347         currentAlbum.setGenre( genre );
0348         currentAlbum.setDescription( description );
0349         currentAlbum.setId( m_currentAlbumId );
0350         currentAlbum.setArtistId( m_currentArtistId );
0351         currentAlbum.setLaunchYear( releaseDate.left( 4 ).toInt() );
0352         currentAlbum.setCoverUrl( COVERURL_BASE.arg( m_currentAlbumId ) );
0353         m_albumArtistMap.insert( currentAlbum.id(), currentAlbum.artistId() );
0354 
0355         int newId = m_dbHandler->insertAlbum( &currentAlbum );
0356         countTransaction();
0357 
0358         //debug() << "inserting genre with album_id = " << newId << " and name = " << genreName;
0359         ServiceGenre currentGenre( genre );
0360         currentGenre.setAlbumId( newId );
0361         m_dbHandler->insertGenre( &currentGenre );
0362         countTransaction();
0363     }
0364 }
0365 
0366 void
0367 JamendoXmlParser::readTrack()
0368 {
0369     if( m_aborted )
0370         return;
0371     
0372     Q_ASSERT( m_reader.isStartElement() && m_reader.name() == "track" );
0373     //debug() << "Found track: ";
0374     m_nNumberOfTracks++;
0375 
0376     QString name;
0377     QString id;
0378     qint64     length = 0LL;
0379     QString trackNumber;
0380     QString genre;
0381 
0382     while( !m_reader.atEnd() )
0383     {
0384         m_reader.readNext();
0385 
0386         if( m_reader.isEndElement() && m_reader.name() == "track" )
0387             break;
0388         if( m_reader.isStartElement() )
0389         {
0390             QStringRef localname = m_reader.name();
0391             if( localname == "name" )
0392                 name = m_reader.readElementText();
0393             else if( localname == "id" )
0394                 id = m_reader.readElementText();
0395             else if( localname == "duration" )
0396                 length = m_reader.readElementText().toFloat() * 1000;
0397             else if ( localname == "numalbum" )
0398                 trackNumber = m_reader.readElementText();
0399             else if ( localname == "id3genre" )
0400                 genre = m_id3GenreHash.value( m_reader.readElementText().toInt() );
0401         }
0402     }
0403 
0404     static const QString previewUrl =
0405         "http://api.jamendo.com/get2/stream/track/redirect/?id=%1&streamencoding=mp32";
0406 
0407     JamendoTrack currentTrack( name );
0408     currentTrack.setId( id.toInt() );
0409     currentTrack.setUidUrl( previewUrl.arg( id ) );
0410     currentTrack.setAlbumId( m_currentAlbumId );
0411     currentTrack.setArtistId( m_currentArtistId );
0412     currentTrack.setLength( length );
0413     currentTrack.setTrackNumber( trackNumber.toInt() );
0414     currentTrack.setGenre( genre );
0415 
0416     if( m_albumArtistMap.contains( currentTrack.albumId() ) )
0417         currentTrack.setArtistId( m_albumArtistMap.value( currentTrack.albumId() ) );
0418 
0419    // debug() << "inserting track with artist id: " << currentTrack.artistId();
0420 
0421     m_dbHandler->insertTrack( &currentTrack );
0422     countTransaction();
0423 }
0424 
0425 void
0426 JamendoXmlParser::countTransaction()
0427 {
0428     n_numberOfTransactions++;
0429     if ( n_numberOfTransactions >= n_maxNumberOfTransactions )
0430     {
0431         m_dbHandler->commit();
0432         m_dbHandler->begin();
0433         n_numberOfTransactions = 0;
0434     }
0435 }
0436 
0437 void
0438 JamendoXmlParser::requestAbort()
0439 {
0440     m_aborted = true;
0441 }
0442 
0443