File indexing completed on 2024-12-29 04:11:44

0001 /***************************************************************************
0002  *                                                                         *
0003  *   Copyright : (C) 2012 Peter Kümmel                                     *
0004  *   email     : syntheticpp@gmx.net                                       *
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  ***************************************************************************/
0012 
0013 #include "asciifilebuffer.h"
0014 #include "debug.h"
0015 
0016 #include <QFile>
0017 #include <QDebug>
0018 #include <QVarLengthArray>
0019 
0020 
0021 //-------------------------------------------------------------------------------------------
0022 extern int MB;
0023 extern size_t maxAllocate;
0024 
0025 //-------------------------------------------------------------------------------------------
0026 AsciiFileBuffer::AsciiFileBuffer() : 
0027   _file(0), _begin(-1), _bytesRead(0)
0028 {
0029 }
0030 
0031 //-------------------------------------------------------------------------------------------
0032 AsciiFileBuffer::~AsciiFileBuffer()
0033 {
0034   clear();
0035 }
0036 
0037 //-------------------------------------------------------------------------------------------
0038 void AsciiFileBuffer::setFile(QFile* file)
0039 {
0040   delete _file;
0041   _file = file; 
0042 }
0043 
0044 //-------------------------------------------------------------------------------------------
0045 bool AsciiFileBuffer::openFile(QFile &file) 
0046 {
0047   // Don't use 'QIODevice::Text'!
0048   // Because CR LF line ending breaks row offset calculation
0049   return file.open(QIODevice::ReadOnly);
0050 }
0051 
0052 //-------------------------------------------------------------------------------------------
0053 void AsciiFileBuffer::clear()
0054 {
0055   _fileData.clear();
0056   _begin = -1;
0057   _bytesRead = 0;
0058 }
0059 
0060 //-------------------------------------------------------------------------------------------
0061 qint64 AsciiFileBuffer::findRowOfPosition(const AsciiFileBuffer::RowIndex& rowIndex, qint64 searchStart, qint64 pos) const
0062 {
0063   if (rowIndex.isEmpty() ||
0064       pos < 0 || pos >= rowIndex[rowIndex.size() - 1] || // within the file
0065       searchStart > rowIndex.size()-1 || pos < rowIndex[searchStart]) //within the search region
0066     return -1;
0067 
0068   // get close with a binary search...
0069   const qint64 indexOfLastRow = rowIndex.size() - 2;
0070 
0071   qint64 i0 = searchStart;
0072   qint64 i1 = indexOfLastRow;
0073   qint64 im = (i0+i1)/2;
0074 
0075   while (i1 -  i0 > 1L ) {
0076     if (pos < rowIndex[im]) {
0077       i1 = im;
0078     } else {
0079       i0 = im;
0080     }
0081     im = (i0+i1)/2;
0082   }
0083 
0084   // now find the exact row... (FIXME - could be cleaner!)
0085   im = qMax(im-4, searchStart);
0086   for (qint64 row = im; row <= indexOfLastRow; ++row) {
0087     if (pos < rowIndex[row]) {
0088       return row - 1;
0089     }
0090   }
0091   if (pos < rowIndex[indexOfLastRow + 1]) // length of file in the last element
0092     return indexOfLastRow;
0093   return -1;
0094 }
0095 
0096 //-------------------------------------------------------------------------------------------
0097 const QVector<AsciiFileData> AsciiFileBuffer::splitFile(qint64 chunkSize, const RowIndex& rowIndex, qint64 start, qint64 bytesToRead) const
0098 {
0099   const qint64 end = start + bytesToRead; // position behind last valid seekable byte in file
0100   if (chunkSize <= 0 || rowIndex.isEmpty() || start >= end || start < 0
0101       || bytesToRead <= 0 || start + bytesToRead > rowIndex[rowIndex.size() - 1])
0102     return QVector<AsciiFileData>();
0103 
0104   qint64 nextRow = findRowOfPosition(rowIndex, 0, start);
0105   QVector<AsciiFileData> chunks;
0106   chunks.reserve(bytesToRead / chunkSize);
0107   qint64 pos = start;
0108   qint64 rows = rowIndex.size();
0109   while (pos < end) {
0110     // use for storing reading information only
0111     AsciiFileData chunk;
0112     // error if chunkSize is too small for one row
0113     if (nextRow + 1 < rows && rowIndex[nextRow + 1] - rowIndex[nextRow] > chunkSize)
0114       return  QVector<AsciiFileData>();
0115     // read complete chunk or to end of file
0116     qint64 endRead = (pos + chunkSize < end ? pos + chunkSize : end);
0117     // adjust to row end: pos + chunkRead is in the middle of a row, find index of this row
0118     const qint64 rowBegin = nextRow;
0119     nextRow = findRowOfPosition(rowIndex, nextRow, endRead - 1);
0120     if (nextRow == -1 || nextRow >= rows)
0121       return  QVector<AsciiFileData>();
0122     // read until the beginning of the found row
0123     if (nextRow == rows - 2) { // last valid row
0124       // if exactly at the end of the row, read this row
0125       if (endRead == rowIndex[rows - 1]) {
0126         nextRow++;
0127         endRead = end;
0128       }  else {
0129         // find complete last row next time
0130         endRead = end - 1;
0131       }
0132     } else {
0133       // if exactly at the end of the row, read this row
0134       if (endRead == rowIndex[nextRow + 1])
0135         nextRow++;
0136       endRead = rowIndex[nextRow];
0137     }
0138     // set information about positions in the file
0139     chunk.setBegin(rowIndex[rowBegin]);
0140     chunk.setBytesRead(rowIndex[nextRow] - rowIndex[rowBegin]);
0141     // set information about rows
0142     chunk.setRowBegin(rowBegin);
0143     chunk.setRowsRead(nextRow - rowBegin);
0144     chunks << chunk;
0145     pos = rowIndex[nextRow];
0146   }
0147   //qDebug() << "File split into " << chunks.size() << " chunks:"; AsciiFileData::logData(chunks);
0148   return chunks;
0149 }
0150 
0151 //-------------------------------------------------------------------------------------------
0152 void AsciiFileBuffer::useOneWindowWithChunks(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, int numChunks)
0153 {
0154   useSlidingWindowWithChunks(rowIndex, start, bytesToRead, bytesToRead, numChunks, false);
0155 }
0156 
0157 //-------------------------------------------------------------------------------------------
0158 void AsciiFileBuffer::useSlidingWindow(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, qint64 windowSize)
0159 {
0160   useSlidingWindowWithChunks(rowIndex, start, bytesToRead, windowSize, 1, true);
0161 }
0162 
0163 //-------------------------------------------------------------------------------------------
0164 void AsciiFileBuffer::useSlidingWindowWithChunks(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, qint64 windowSize, int numWindowChunks)
0165 {
0166   useSlidingWindowWithChunks(rowIndex, start, bytesToRead, windowSize, numWindowChunks, true);
0167 }
0168 
0169 //-------------------------------------------------------------------------------------------
0170 void AsciiFileBuffer::useSlidingWindowWithChunks(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, qint64 windowSize, int numWindowChunks, bool reread)
0171 {
0172   clear();
0173   if (!_file)
0174     return;
0175 
0176   if (bytesToRead <= 0 || numWindowChunks <= 0 || windowSize <= 0)
0177     return;
0178 
0179   qint64 chunkSize = windowSize / numWindowChunks;
0180   QVector<AsciiFileData> chunks = splitFile(chunkSize, rowIndex, start, bytesToRead);
0181   // chunks.size() could be greater than numWindowChunks!
0182 
0183   // no sliding window
0184   if (bytesToRead == windowSize)
0185   {
0186     for (int i = 0; i < chunks.size(); i++) {
0187       chunks[i].setFile(_file);
0188       chunks[i].setReread(reread);
0189       _bytesRead += chunks[i].bytesRead();
0190     }
0191     _fileData.push_back(chunks);
0192   }
0193   else
0194   {
0195     // sliding window
0196     // prepare window with numSubChunks chunks
0197     QVector<AsciiFileData> window;
0198     window.reserve(numWindowChunks);
0199     for (int i = 0; i < numWindowChunks; i++) {
0200       AsciiFileData sharedArray;
0201       if (!sharedArray.resize(chunkSize)) {
0202         Kst::Debug::self()->log(QString("AsciiFileBuffer: not enough memory available for sliding window"));
0203         return;
0204       }
0205       sharedArray.setFile(_file);
0206       window.push_back(sharedArray);
0207     }
0208 
0209     _fileData.reserve(bytesToRead / windowSize);
0210     int i = 0;
0211     while (i < chunks.size()) {
0212       QVector<AsciiFileData> windowChunks;
0213       windowChunks.reserve(window.size());
0214       for (int s = 0; s < window.size(); s++) {
0215         AsciiFileData chunk = chunks[i];
0216         chunk.setSharedArray(window[s]);
0217         chunk.setFile(_file);
0218         chunk.setReread(reread);
0219         _bytesRead += chunk.bytesRead();
0220         windowChunks.push_back(chunk);
0221         i++;
0222         if (i >= chunks.size())
0223           break;
0224       }
0225       // each entry is one slide of the window
0226       _fileData.push_back(windowChunks);
0227       //qDebug() << "Window chunks:"; AsciiFileData::logData(windowChunks);
0228     }
0229   }
0230 
0231   _begin = start;
0232   if (_bytesRead != bytesToRead) {
0233     clear();
0234     Kst::Debug::self()->log(QString("AsciiFileBuffer: error while splitting into file %1 chunks").arg(_fileData.size()));
0235   }
0236 }
0237 
0238 //-------------------------------------------------------------------------------------------
0239 bool AsciiFileBuffer::readWindow(QVector<AsciiFileData>& window) const
0240 {
0241   for (int i = 0; i < window.size(); i++) {
0242     if (!window[i].read()) {
0243       return false;
0244     }
0245   }
0246   return true;
0247 }
0248