libk3b/plugin/k3baudiodecoder.cpp

0001 /*
0002     SPDX-FileCopyrightText: 1998-2009 Sebastian Trueg <trueg@k3b.org>
0003     SPDX-License-Identifier: GPL-2.0-or-later
0004 */
0005 #include <config-libk3b.h>
0006
0007 #include "k3bcore.h"
0008 #include "k3baudiodecoder.h"
0009 #include "k3bpluginmanager.h"
0010 #include "k3b_i18n.h"
0011
0012 #include <KFileMetaData/ExtractionResult>
0013 #include <KFileMetaData/Extractor>
0014 #include <KFileMetaData/ExtractorCollection>
0015 #include <KFileMetaData/Properties>
0016
0017 #include <QDebug>
0018 #include <QMap>
0019 #include <QMimeDatabase>
0020 #include <QMimeType>
0021
0022 #include <math.h>
0023
0024 #include <samplerate.h>
0025
0026 #if !(HAVE_LRINT && HAVE_LRINTF)
0027 #define lrint(dbl)              ((int) (dbl+0.5))
0028 #define lrintf(flt)             ((int) (flt+0.5))
0029 #endif
0030
0031 // use a one second buffer
0032 static const int DECODING_BUFFER_SIZE = 75*2352;
0033
0034 namespace
0035 {
0036
0037 typedef QMap<K3b::AudioDecoder::MetaDataField, QString> MetaInfoMap;
0038
0039 class ExtractionResult : public KFileMetaData::ExtractionResult
0040 {
0041 public:
0042     ExtractionResult( const QString& filename, const QString& mimetype, MetaInfoMap& metaInfoMap )
0043         : KFileMetaData::ExtractionResult( filename, mimetype, KFileMetaData::ExtractionResult::ExtractMetaData ),
0044           metaInfoMap_( metaInfoMap ) {}
0045
0046     void append(const QString& /*text*/) override {}
0047
0048     void addType(KFileMetaData::Type::Type /*type*/) override {}
0049
0050     void add(KFileMetaData::Property::Property property, const QVariant& value) override
0051     {
0052         switch(property)
0053         {
0054         case KFileMetaData::Property::Title:
0055             metaInfoMap_.insert( K3b::AudioDecoder::META_TITLE, value.toString() );
0056             break;
0057         case KFileMetaData::Property::Artist:
0058             metaInfoMap_.insert( K3b::AudioDecoder::META_ARTIST, value.toString() );
0059             break;
0060         case KFileMetaData::Property::Lyricist:
0061             metaInfoMap_.insert( K3b::AudioDecoder::META_SONGWRITER, value.toString() );
0062             break;
0063         case KFileMetaData::Property::Composer:
0064             metaInfoMap_.insert( K3b::AudioDecoder::META_COMPOSER, value.toString() );
0065             break;
0066         case KFileMetaData::Property::Comment:
0067             metaInfoMap_.insert( K3b::AudioDecoder::META_COMMENT, value.toString() );
0068             break;
0069         default:
0070             break;
0071         }
0072     }
0073
0074 private:
0075     MetaInfoMap& metaInfoMap_;
0076 };
0077
0078 } // namespace
0079
0080 class K3b::AudioDecoder::Private
0081 {
0082 public:
0083     Private()
0084         : metaDataCollection(NULL),
0085           resampleState(0),
0086           resampleData(0),
0087           inBuffer(0),
0088           inBufferPos(0),
0089           inBufferFill(0),
0090           outBuffer(0),
0091           monoBuffer(0),
0092           decodingBufferPos(0),
0093           decodingBufferFill(0),
0094           valid(true) {
0095     }
0096
0097     // the current position of the decoder
0098     // This does NOT include the decodingBuffer
0099     K3b::Msf currentPos;
0100
0101     // since the current position above is measured in frames
0102     // there might be a little offset since the decoded data is not
0103     // always a multiple of 2353 bytes
0104     int currentPosOffset;
0105
0106     // already decoded bytes from last init or last seek
0107     // TODO: replace alreadyDecoded with currentPos
0108     unsigned long alreadyDecoded;
0109
0110     K3b::Msf decodingStartPos;
0111
0112     KFileMetaData::ExtractorCollection *metaDataCollection;
0113     QMimeDatabase mimeDatabase;
0114     QMimeType mimeType;
0115
0116     // set to true once decodeInternal() returned 0
0117     bool decoderFinished;
0118
0119     // resampling
0120     SRC_STATE* resampleState;
0121     SRC_DATA* resampleData;
0122
0123     float* inBuffer;
0124     float* inBufferPos;
0125     int inBufferFill;
0126
0127     float* outBuffer;
0128
0129     int samplerate;
0130     int channels;
0131
0132     // mono -> stereo conversion
0133     char* monoBuffer;
0134
0135     char decodingBuffer[DECODING_BUFFER_SIZE];
0136     char* decodingBufferPos;
0137     int decodingBufferFill;
0138
0139     QMap<QString, QString> technicalInfoMap;
0140     MetaInfoMap metaInfoMap;
0141
0142     bool valid;
0143 };
0144
0145
0146
0147 K3b::AudioDecoder::AudioDecoder( QObject* parent )
0148     : QObject( parent )
0149 {
0150     d = new Private();
0151 }
0152
0153
0154 K3b::AudioDecoder::~AudioDecoder()
0155 {
0156     cleanup();
0157
0158     if( d->inBuffer ) delete [] d->inBuffer;
0159     if( d->outBuffer ) delete [] d->outBuffer;
0160     if( d->monoBuffer ) delete [] d->monoBuffer;
0161
0162     delete d->resampleData;
0163     if (d->resampleState) {
0164         src_delete(d->resampleState);
0165         d->resampleState = NULL;
0166     }
0167     delete d;
0168 }
0169
0170
0171 void K3b::AudioDecoder::setFilename( const QString& filename )
0172 {
0173     m_fileName = filename;
0174     d->mimeType = QMimeType();
0175 }
0176
0177
0178 bool K3b::AudioDecoder::isValid() const
0179 {
0180     return d->valid;
0181 }
0182
0183
0184 bool K3b::AudioDecoder::analyseFile()
0185 {
0186     d->technicalInfoMap.clear();
0187     d->metaInfoMap.clear();
0188     d->mimeType = QMimeType();
0189
0190     cleanup();
0191
0192     bool ret = analyseFileInternal( m_length, d->samplerate, d->channels );
0193     if( ret && ( d->channels == 1 || d->channels == 2 ) && m_length > 0 ) {
0194         d->valid = initDecoder();
0195         return d->valid;
0196     }
0197     else {
0198         d->valid = false;
0199         return false;
0200     }
0201 }
0202
0203
0204 bool K3b::AudioDecoder::initDecoder( const K3b::Msf& startOffset )
0205 {
0206     if( initDecoder() ) {
0207         if( startOffset > 0 )
0208             return seek( startOffset );
0209         else
0210             return true;
0211     }
0212     else
0213         return false;
0214 }
0215
0216
0217 bool K3b::AudioDecoder::initDecoder()
0218 {
0219     cleanup();
0220
0221     if( d->resampleState )
0222         src_reset( d->resampleState );
0223
0224     d->alreadyDecoded = 0;
0225     d->currentPos = 0;
0226     d->currentPosOffset = 0;
0227     d->decodingBufferFill = 0;
0228     d->decodingBufferPos = 0;
0229     d->decodingStartPos = 0;
0230     d->inBufferFill = 0;
0231
0232     d->decoderFinished = false;
0233
0234     return initDecoderInternal();
0235 }
0236
0237
0238 int K3b::AudioDecoder::decode( char* _data, int maxLen )
0239 {
0240     unsigned long lengthToDecode = (m_length - d->decodingStartPos).audioBytes();
0241
0242     if( d->alreadyDecoded >= lengthToDecode )
0243         return 0;
0244
0245     if( maxLen <= 0 )
0246         return 0;
0247
0248     int read = 0;
0249
0250     if( d->decodingBufferFill == 0 ) {
0251         //
0252         // now we decode into the decoding buffer
0253         // to ensure a minimum buffer size
0254         //
0255         d->decodingBufferFill = 0;
0256         d->decodingBufferPos = d->decodingBuffer;
0257
0258         if( !d->decoderFinished ) {
0259             if( d->samplerate != 44100 ) {
0260
0261                 // check if we have data left from some previous conversion
0262                 if( d->inBufferFill > 0 ) {
0263                     read = resample( d->decodingBuffer, DECODING_BUFFER_SIZE );
0264                 }
0265                 else {
0266                     if( !d->inBuffer ) {
0267                         d->inBuffer = new float[DECODING_BUFFER_SIZE/2];
0268                     }
0269
0270                     if( (read = decodeInternal( d->decodingBuffer, DECODING_BUFFER_SIZE )) == 0 )
0271                         d->decoderFinished = true;
0272
0273                     d->inBufferFill = read/2;
0274                     d->inBufferPos = d->inBuffer;
0275                     from16bitBeSignedToFloat( d->decodingBuffer, d->inBuffer, d->inBufferFill );
0276
0277                     read = resample( d->decodingBuffer, DECODING_BUFFER_SIZE );
0278                 }
0279             }
0280             else if( d->channels == 1 ) {
0281                 if( !d->monoBuffer ) {
0282                     d->monoBuffer = new char[DECODING_BUFFER_SIZE/2];
0283                 }
0284
0285                 // we simply duplicate every frame
0286                 if( (read = decodeInternal( d->monoBuffer, DECODING_BUFFER_SIZE/2 )) == 0 )
0287                     d->decoderFinished = true;
0288
0289                 for( int i = 0; i < read; i+=2 ) {
0290                     d->decodingBuffer[2*i] = d->decodingBuffer[2*i+2] = d->monoBuffer[i];
0291                     d->decodingBuffer[2*i+1] = d->decodingBuffer[2*i+3] = d->monoBuffer[i+1];
0292                 }
0293
0294                 read *= 2;
0295             }
0296             else {
0297                 if( (read = decodeInternal( d->decodingBuffer, DECODING_BUFFER_SIZE )) == 0 )
0298                     d->decoderFinished = true;
0299             }
0300         }
0301
0302         if( read < 0 ) {
0303             return -1;
0304         }
0305         else if( read == 0 ) {
0306             // check if we need to pad
0307             int bytesToPad = lengthToDecode - d->alreadyDecoded;
0308             if( bytesToPad > 0 ) {
0309                 qDebug() << "(K3b::AudioDecoder) track length: " << lengthToDecode
0310                          << "; decoded module data: " << d->alreadyDecoded
0311                          << "; we need to pad " << bytesToPad << " bytes." << Qt::endl;
0312
0313                 if( DECODING_BUFFER_SIZE < bytesToPad )
0314                     bytesToPad = DECODING_BUFFER_SIZE;
0315
0316                 ::memset( d->decodingBuffer, 0, bytesToPad );
0317
0318                 qDebug() << "(K3b::AudioDecoder) padded " << bytesToPad << " bytes.";
0319
0320                 read = bytesToPad;
0321             }
0322             else {
0323                 qDebug() << "(K3b::AudioDecoder) decoded " << d->alreadyDecoded << " bytes.";
0324                 return 0;
0325             }
0326         }
0327         else {
0328
0329             // check if we decoded too much
0330             if( d->alreadyDecoded + read > lengthToDecode ) {
0331                 qDebug() << "(K3b::AudioDecoder) we decoded too much. Cutting output by "
0332                          << (read + d->alreadyDecoded - lengthToDecode) << Qt::endl;
0333                 read = lengthToDecode - d->alreadyDecoded;
0334             }
0335         }
0336
0337         d->decodingBufferFill = read;
0338     }
0339
0340
0341     // clear out the decoding buffer
0342     read = qMin( maxLen, d->decodingBufferFill );
0343     ::memcpy( _data, d->decodingBufferPos, read );
0344     d->decodingBufferPos += read;
0345     d->decodingBufferFill -= read;
0346
0347     d->alreadyDecoded += read;
0348     d->currentPos += (read+d->currentPosOffset)/2352;
0349     d->currentPosOffset = (read+d->currentPosOffset)%2352;
0350
0351     return read;
0352 }
0353
0354
0355 // resample data in d->inBufferPos and save the result to data
0356 //
0357 //
0358 int K3b::AudioDecoder::resample( char* data, int maxLen )
0359 {
0360     if( !d->resampleState ) {
0361         d->resampleState = src_new( SRC_SINC_MEDIUM_QUALITY, d->channels, 0 );
0362         if( !d->resampleState ) {
0363             qDebug() << "(K3b::AudioDecoder) unable to initialize resampler.";
0364             return -1;
0365         }
0366         d->resampleData = new SRC_DATA;
0367     }
0368
0369     if( !d->outBuffer ) {
0370         d->outBuffer = new float[DECODING_BUFFER_SIZE/2];
0371     }
0372
0373     d->resampleData->data_in = d->inBufferPos;
0374     d->resampleData->data_out = d->outBuffer;
0375     d->resampleData->input_frames = d->inBufferFill/d->channels;
0376     d->resampleData->output_frames = maxLen/2/2;  // in case of mono files we need the space anyway
0377     d->resampleData->src_ratio = 44100.0/(double)d->samplerate;
0378     if( d->inBufferFill == 0 )
0379         d->resampleData->end_of_input = 1;  // this should force libsamplerate to output the last frames
0380     else
0381         d->resampleData->end_of_input = 0;
0382
0383     int len = 0;
0384     if( (len = src_process( d->resampleState, d->resampleData ) ) ) {
0385         qDebug() << "(K3b::AudioDecoder) error while resampling: " << src_strerror(len);
0386         return -1;
0387     }
0388
0389     if( d->channels == 2 )
0390         fromFloatTo16BitBeSigned( d->outBuffer, data, d->resampleData->output_frames_gen*d->channels );
0391     else {
0392         for( int i = 0; i < d->resampleData->output_frames_gen; ++i ) {
0393             fromFloatTo16BitBeSigned( &d->outBuffer[i], &data[4*i], 1 );
0394             fromFloatTo16BitBeSigned( &d->outBuffer[i], &data[4*i+2], 1 );
0395         }
0396     }
0397
0398     d->inBufferPos += d->resampleData->input_frames_used*d->channels;
0399     d->inBufferFill -= d->resampleData->input_frames_used*d->channels;
0400     if( d->inBufferFill <= 0 ) {
0401         d->inBufferPos = d->inBuffer;
0402         d->inBufferFill = 0;
0403     }
0404
0405     // 16 bit frames, so we need to multiply by 2
0406     // and we always have two channels
0407     return d->resampleData->output_frames_gen*2*2;
0408 }
0409
0410
0411 void K3b::AudioDecoder::from16bitBeSignedToFloat( char* src, float* dest, int samples )
0412 {
0413     while( samples ) {
0414         samples--;
0415         dest[samples] = static_cast<float>( qint16(((src[2*samples]<<8)&0xff00)|(src[2*samples+1]&0x00ff)) / 32768.0 );
0416     }
0417 }
0418
0419
0420 void K3b::AudioDecoder::fromFloatTo16BitBeSigned( float* src, char* dest, int samples )
0421 {
0422     while( samples ) {
0423         samples--;
0424
0425         float scaled = src[samples] * 32768.0;
0426         qint16 val = 0;
0427
0428         // clipping
0429         if( scaled >= ( 1.0 * 0x7FFF ) )
0430             val = 32767;
0431         else if( scaled <= ( -8.0 * 0x1000 ) )
0432             val = -32768;
0433         else
0434             val = lrintf(scaled);
0435
0436         dest[2*samples]   = val>>8;
0437         dest[2*samples+1] = val;
0438     }
0439 }
0440
0441
0442 void K3b::AudioDecoder::from8BitTo16BitBeSigned( char* src, char* dest, int samples )
0443 {
0444     while( samples ) {
0445         samples--;
0446
0447         float scaled = static_cast<float>(quint8(src[samples])-128) / 128.0 * 32768.0;
0448         qint16 val = 0;
0449
0450         // clipping
0451         if( scaled >= ( 1.0 * 0x7FFF ) )
0452             val = 32767;
0453         else if( scaled <= ( -8.0 * 0x1000 ) )
0454             val = -32768;
0455         else
0456             val = lrintf(scaled);
0457
0458         dest[2*samples]   = val>>8;
0459         dest[2*samples+1] = val;
0460     }
0461 }
0462
0463
0464 bool K3b::AudioDecoder::seek( const K3b::Msf& pos )
0465 {
0466     qDebug() << "(K3b::AudioDecoder) seek from " << d->currentPos.toString() << " (+" << d->currentPosOffset
0467              << ") to " << pos.toString() << Qt::endl;
0468
0469     if( pos > length() )
0470         return false;
0471
0472     d->decoderFinished = false;
0473
0474     if( pos == d->currentPos && d->currentPosOffset == 0 )
0475         return true;
0476
0477     if( pos == 0 )
0478         return initDecoder();
0479
0480     bool success = false;
0481
0482     //
0483     // First check if we may do a "perfect seek".
0484     // We cannot rely on the decoding plugins to seek perfectly. Especially
0485     // the mp3 decoder does not. But in case we want to split a live recording
0486     // it is absolutely necessary to perform a perfect seek.
0487     // So if we did not already decode past the seek position and the difference
0488     // between the current position and the seek position is less than some fixed
0489     // value we simply decode up to the seek position.
0490     //
0491     if( ( pos > d->currentPos ||
0492           ( pos == d->currentPos && d->currentPosOffset == 0 ) )
0493         &&
0494         ( pos - d->currentPos < K3b::Msf(0,10,0) ) ) {  // < 10 seconds is ok
0495         qDebug() << "(K3b::AudioDecoder) performing perfect seek from " << d->currentPos.toString()
0496                  << " to " << pos.toString() << ". :)" << Qt::endl;
0497
0498         qint64 bytesToDecode = pos.audioBytes() - d->currentPos.audioBytes() - d->currentPosOffset;
0499         qDebug() << "(K3b::AudioDecoder) seeking " << bytesToDecode << " bytes.";
0500         char buffi[10*2352];
0501         while( bytesToDecode > 0 ) {
0502             int read = decode( buffi, qMin(( qint64 )( 10*2352 ), bytesToDecode) );
0503             if( read <= 0 )
0504                 return false;
0505
0506             bytesToDecode -= read;
0507         }
0508
0509         qDebug() << "(K3b::AudioDecoder) perfect seek done.";
0510
0511         success = true;
0512     }
0513     else {
0514         //
0515         // Here we have to reset the resampling stuff since we restart decoding at another position.
0516         //
0517         if( d->resampleState )
0518             src_reset( d->resampleState );
0519         d->inBufferFill = 0;
0520
0521         //
0522         // And also reset the decoding buffer to not return any garbage from previous decoding.
0523         //
0524         d->decodingBufferFill = 0;
0525
0526         success = seekInternal( pos );
0527     }
0528
0529     d->alreadyDecoded = 0;
0530     d->currentPos = d->decodingStartPos = pos;
0531     d->currentPosOffset = 0;
0532
0533     return success;
0534 }
0535
0536
0537 void K3b::AudioDecoder::cleanup()
0538 {
0539     if (d->metaDataCollection) {
0540         delete d->metaDataCollection;
0541         d->metaDataCollection = NULL;
0542     }
0543 }
0544
0545
0546 QString K3b::AudioDecoder::metaInfo( MetaDataField f )
0547 {
0548     if( d->metaInfoMap.contains( f ) )
0549         return d->metaInfoMap[f];
0550
0551     // fall back to KFileMetaData
0552     if( !d->mimeType.isValid() )
0553     {
0554         d->mimeType = d->mimeDatabase.mimeTypeForFile( m_fileName );
0555         if (!d->metaDataCollection)
0556             d->metaDataCollection = new KFileMetaData::ExtractorCollection;
0557         for( KFileMetaData::Extractor* plugin : d->metaDataCollection->fetchExtractors( d->mimeType.name() ) )
0558         {
0559             ExtractionResult extractionResult(m_fileName, d->mimeType.name(), d->metaInfoMap);
0560             plugin->extract(&extractionResult);
0561         }
0562
0563         if( d->metaInfoMap.contains( f ) )
0564             return d->metaInfoMap[f];
0565     }
0566
0567     return QString();
0568 }
0569
0570
0571 void K3b::AudioDecoder::addMetaInfo( MetaDataField f, const QString& value )
0572 {
0573     if( !value.isEmpty() )
0574         d->metaInfoMap[f] = value;
0575     else
0576         qDebug() << "(K3b::AudioDecoder) empty meta data field.";
0577 }
0578
0579
0580 QStringList K3b::AudioDecoder::supportedTechnicalInfos() const
0581 {
0582     QStringList l;
0583     for( QMap<QString, QString>::const_iterator it = d->technicalInfoMap.constBegin();
0584          it != d->technicalInfoMap.constEnd(); ++it )
0585         l.append( it.key() );
0586     return l;
0587 }
0588
0589
0590 QString K3b::AudioDecoder::technicalInfo( const QString& key ) const
0591 {
0592     return d->technicalInfoMap[key];
0593 }
0594
0595
0596 void K3b::AudioDecoder::addTechnicalInfo( const QString& key, const QString& value )
0597 {
0598     d->technicalInfoMap[key] = value;
0599 }
0600
0601
0602 K3b::AudioDecoder* K3b::AudioDecoderFactory::createDecoder( const QUrl& url )
0603 {
0604     qDebug() << "(K3b::AudioDecoderFactory::createDecoder( " << url.toLocalFile() << " )";
0605     QList<K3b::Plugin*> fl = k3bcore->pluginManager()->plugins( "AudioDecoder" );
0606
0607     // first search for a single format decoder
0608     Q_FOREACH( K3b::Plugin* plugin, fl ) {
0609         K3b::AudioDecoderFactory* f = dynamic_cast<K3b::AudioDecoderFactory*>( plugin );
0610         if( f && !f->multiFormatDecoder() && f->canDecode( url ) ) {
0611             qDebug() << "1"; return f->createDecoder();}
0612     }
0613
0614     // no single format decoder. Search for a multi format decoder
0615     Q_FOREACH( K3b::Plugin* plugin, fl ) {
0616         K3b::AudioDecoderFactory* f = dynamic_cast<K3b::AudioDecoderFactory*>( plugin );
0617         if( f && f->multiFormatDecoder() && f->canDecode( url ) ) {
0618             qDebug() << "2"; return f->createDecoder();}
0619     }
0620
0621     qDebug() << "(K3b::AudioDecoderFactory::createDecoder( " << url.toLocalFile() << " ) no success";
0622
0623     // nothing found
0624     return 0;
0625 }
0626
0627
0628 QString K3b::AudioDecoderFactory::categoryName() const
0629 {
0630     return i18nc( "plugin type", "Audio Decoder" );
0631 }
0632
0633 #include "moc_k3baudiodecoder.cpp"