File indexing completed on 2025-10-26 04:14:49
0001 /*************************************************************************** 0002 * * 0003 * Copyright : (C) 2012 Peter Kümmel * 0004 * email : syntheticpp@gmx.net * 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 ***************************************************************************/ 0012 0013 #include "asciifilebuffer.h" 0014 #include "debug.h" 0015 0016 #include <QFile> 0017 #include <QDebug> 0018 #include <QVarLengthArray> 0019 0020 0021 //------------------------------------------------------------------------------------------- 0022 extern int MB; 0023 extern size_t maxAllocate; 0024 0025 //------------------------------------------------------------------------------------------- 0026 AsciiFileBuffer::AsciiFileBuffer() : 0027 _file(0), _begin(-1), _bytesRead(0) 0028 { 0029 } 0030 0031 //------------------------------------------------------------------------------------------- 0032 AsciiFileBuffer::~AsciiFileBuffer() 0033 { 0034 clear(); 0035 } 0036 0037 //------------------------------------------------------------------------------------------- 0038 void AsciiFileBuffer::setFile(QFile* file) 0039 { 0040 delete _file; 0041 _file = file; 0042 } 0043 0044 //------------------------------------------------------------------------------------------- 0045 bool AsciiFileBuffer::openFile(QFile &file) 0046 { 0047 // Don't use 'QIODevice::Text'! 0048 // Because CR LF line ending breaks row offset calculation 0049 return file.open(QIODevice::ReadOnly); 0050 } 0051 0052 //------------------------------------------------------------------------------------------- 0053 void AsciiFileBuffer::clear() 0054 { 0055 _fileData.clear(); 0056 _begin = -1; 0057 _bytesRead = 0; 0058 } 0059 0060 //------------------------------------------------------------------------------------------- 0061 qint64 AsciiFileBuffer::findRowOfPosition(const AsciiFileBuffer::RowIndex& rowIndex, qint64 searchStart, qint64 pos) const 0062 { 0063 if (rowIndex.isEmpty() || 0064 pos < 0 || pos >= rowIndex[rowIndex.size() - 1] || // within the file 0065 searchStart > rowIndex.size()-1 || pos < rowIndex[searchStart]) //within the search region 0066 return -1; 0067 0068 // get close with a binary search... 0069 const qint64 indexOfLastRow = rowIndex.size() - 2; 0070 0071 qint64 i0 = searchStart; 0072 qint64 i1 = indexOfLastRow; 0073 qint64 im = (i0+i1)/2; 0074 0075 while (i1 - i0 > 1L ) { 0076 if (pos < rowIndex[im]) { 0077 i1 = im; 0078 } else { 0079 i0 = im; 0080 } 0081 im = (i0+i1)/2; 0082 } 0083 0084 // now find the exact row... (FIXME - could be cleaner!) 0085 im = qMax(im-4, searchStart); 0086 for (qint64 row = im; row <= indexOfLastRow; ++row) { 0087 if (pos < rowIndex[row]) { 0088 return row - 1; 0089 } 0090 } 0091 if (pos < rowIndex[indexOfLastRow + 1]) // length of file in the last element 0092 return indexOfLastRow; 0093 return -1; 0094 } 0095 0096 //------------------------------------------------------------------------------------------- 0097 const QVector<AsciiFileData> AsciiFileBuffer::splitFile(qint64 chunkSize, const RowIndex& rowIndex, qint64 start, qint64 bytesToRead) const 0098 { 0099 const qint64 end = start + bytesToRead; // position behind last valid seekable byte in file 0100 if (chunkSize <= 0 || rowIndex.isEmpty() || start >= end || start < 0 0101 || bytesToRead <= 0 || start + bytesToRead > rowIndex[rowIndex.size() - 1]) 0102 return QVector<AsciiFileData>(); 0103 0104 qint64 nextRow = findRowOfPosition(rowIndex, 0, start); 0105 QVector<AsciiFileData> chunks; 0106 chunks.reserve(bytesToRead / chunkSize); 0107 qint64 pos = start; 0108 qint64 rows = rowIndex.size(); 0109 while (pos < end) { 0110 // use for storing reading information only 0111 AsciiFileData chunk; 0112 // error if chunkSize is too small for one row 0113 if (nextRow + 1 < rows && rowIndex[nextRow + 1] - rowIndex[nextRow] > chunkSize) 0114 return QVector<AsciiFileData>(); 0115 // read complete chunk or to end of file 0116 qint64 endRead = (pos + chunkSize < end ? pos + chunkSize : end); 0117 // adjust to row end: pos + chunkRead is in the middle of a row, find index of this row 0118 const qint64 rowBegin = nextRow; 0119 nextRow = findRowOfPosition(rowIndex, nextRow, endRead - 1); 0120 if (nextRow == -1 || nextRow >= rows) 0121 return QVector<AsciiFileData>(); 0122 // read until the beginning of the found row 0123 if (nextRow == rows - 2) { // last valid row 0124 // if exactly at the end of the row, read this row 0125 if (endRead == rowIndex[rows - 1]) { 0126 nextRow++; 0127 endRead = end; 0128 } else { 0129 // find complete last row next time 0130 endRead = end - 1; 0131 } 0132 } else { 0133 // if exactly at the end of the row, read this row 0134 if (endRead == rowIndex[nextRow + 1]) 0135 nextRow++; 0136 endRead = rowIndex[nextRow]; 0137 } 0138 // set information about positions in the file 0139 chunk.setBegin(rowIndex[rowBegin]); 0140 chunk.setBytesRead(rowIndex[nextRow] - rowIndex[rowBegin]); 0141 // set information about rows 0142 chunk.setRowBegin(rowBegin); 0143 chunk.setRowsRead(nextRow - rowBegin); 0144 chunks << chunk; 0145 pos = rowIndex[nextRow]; 0146 } 0147 //qDebug() << "File split into " << chunks.size() << " chunks:"; AsciiFileData::logData(chunks); 0148 return chunks; 0149 } 0150 0151 //------------------------------------------------------------------------------------------- 0152 void AsciiFileBuffer::useOneWindowWithChunks(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, int numChunks) 0153 { 0154 useSlidingWindowWithChunks(rowIndex, start, bytesToRead, bytesToRead, numChunks, false); 0155 } 0156 0157 //------------------------------------------------------------------------------------------- 0158 void AsciiFileBuffer::useSlidingWindow(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, qint64 windowSize) 0159 { 0160 useSlidingWindowWithChunks(rowIndex, start, bytesToRead, windowSize, 1, true); 0161 } 0162 0163 //------------------------------------------------------------------------------------------- 0164 void AsciiFileBuffer::useSlidingWindowWithChunks(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, qint64 windowSize, int numWindowChunks) 0165 { 0166 useSlidingWindowWithChunks(rowIndex, start, bytesToRead, windowSize, numWindowChunks, true); 0167 } 0168 0169 //------------------------------------------------------------------------------------------- 0170 void AsciiFileBuffer::useSlidingWindowWithChunks(const RowIndex& rowIndex, qint64 start, qint64 bytesToRead, qint64 windowSize, int numWindowChunks, bool reread) 0171 { 0172 clear(); 0173 if (!_file) 0174 return; 0175 0176 if (bytesToRead <= 0 || numWindowChunks <= 0 || windowSize <= 0) 0177 return; 0178 0179 qint64 chunkSize = windowSize / numWindowChunks; 0180 QVector<AsciiFileData> chunks = splitFile(chunkSize, rowIndex, start, bytesToRead); 0181 // chunks.size() could be greater than numWindowChunks! 0182 0183 // no sliding window 0184 if (bytesToRead == windowSize) 0185 { 0186 for (int i = 0; i < chunks.size(); i++) { 0187 chunks[i].setFile(_file); 0188 chunks[i].setReread(reread); 0189 _bytesRead += chunks[i].bytesRead(); 0190 } 0191 _fileData.push_back(chunks); 0192 } 0193 else 0194 { 0195 // sliding window 0196 // prepare window with numSubChunks chunks 0197 QVector<AsciiFileData> window; 0198 window.reserve(numWindowChunks); 0199 for (int i = 0; i < numWindowChunks; i++) { 0200 AsciiFileData sharedArray; 0201 if (!sharedArray.resize(chunkSize)) { 0202 Kst::Debug::self()->log(QString("AsciiFileBuffer: not enough memory available for sliding window")); 0203 return; 0204 } 0205 sharedArray.setFile(_file); 0206 window.push_back(sharedArray); 0207 } 0208 0209 _fileData.reserve(bytesToRead / windowSize); 0210 int i = 0; 0211 while (i < chunks.size()) { 0212 QVector<AsciiFileData> windowChunks; 0213 windowChunks.reserve(window.size()); 0214 for (int s = 0; s < window.size(); s++) { 0215 AsciiFileData chunk = chunks[i]; 0216 chunk.setSharedArray(window[s]); 0217 chunk.setFile(_file); 0218 chunk.setReread(reread); 0219 _bytesRead += chunk.bytesRead(); 0220 windowChunks.push_back(chunk); 0221 i++; 0222 if (i >= chunks.size()) 0223 break; 0224 } 0225 // each entry is one slide of the window 0226 _fileData.push_back(windowChunks); 0227 //qDebug() << "Window chunks:"; AsciiFileData::logData(windowChunks); 0228 } 0229 } 0230 0231 _begin = start; 0232 if (_bytesRead != bytesToRead) { 0233 clear(); 0234 Kst::Debug::self()->log(QString("AsciiFileBuffer: error while splitting into file %1 chunks").arg(_fileData.size())); 0235 } 0236 } 0237 0238 //------------------------------------------------------------------------------------------- 0239 bool AsciiFileBuffer::readWindow(QVector<AsciiFileData>& window) const 0240 { 0241 for (int i = 0; i < window.size(); i++) { 0242 if (!window[i].read()) { 0243 return false; 0244 } 0245 } 0246 return true; 0247 } 0248