File indexing completed on 2024-03-24 03:45:20
0001 /* 0002 SPDX-FileCopyrightText: 2017-2021 Kristofer Berggren <https://github.com/d99kris/rapidcsv> 0003 SPDX-License-Identifier: BSD-3-Clause 0004 */ 0005 0006 #pragma once 0007 0008 #include <algorithm> 0009 #include <cassert> 0010 #include <cmath> 0011 #ifdef HAS_CODECVT 0012 #include <codecvt> 0013 #endif 0014 #include <fstream> 0015 #include <functional> 0016 #include <iostream> 0017 #include <map> 0018 #include <sstream> 0019 #include <string> 0020 #include <typeinfo> 0021 #include <vector> 0022 0023 #if defined(_MSC_VER) 0024 #include <BaseTsd.h> 0025 typedef SSIZE_T ssize_t; 0026 #endif 0027 0028 namespace rapidcsv 0029 { 0030 #if defined(_MSC_VER) 0031 static const bool sPlatformHasCR = true; 0032 #else 0033 static const bool sPlatformHasCR = false; 0034 #endif 0035 0036 /** 0037 * @brief Datastructure holding parameters controlling how invalid numbers (including 0038 * empty strings) should be handled. 0039 */ 0040 struct ConverterParams 0041 { 0042 /** 0043 * @brief Constructor 0044 * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be 0045 * converted to a default numerical value, instead of causing 0046 * an exception to be thrown (default). 0047 * @param pDefaultFloat floating-point default value to represent invalid numbers. 0048 * @param pDefaultInteger integer default value to represent invalid numbers. 0049 */ 0050 explicit ConverterParams(const bool pHasDefaultConverter = false, 0051 const long double pDefaultFloat = 0052 std::numeric_limits<long double>::signaling_NaN(), 0053 const long long pDefaultInteger = 0) 0054 : mHasDefaultConverter(pHasDefaultConverter), mDefaultFloat(pDefaultFloat), 0055 mDefaultInteger(pDefaultInteger) 0056 { 0057 } 0058 0059 /** 0060 * @brief specifies if conversion of non-numerical strings shall be converted to a default 0061 * numerical value, instead of causing an exception to be thrown (default). 0062 */ 0063 bool mHasDefaultConverter; 0064 0065 /** 0066 * @brief floating-point default value to represent invalid numbers. 0067 */ 0068 long double mDefaultFloat; 0069 0070 /** 0071 * @brief integer default value to represent invalid numbers. 0072 */ 0073 long long mDefaultInteger; 0074 }; 0075 0076 /** 0077 * @brief Exception thrown when attempting to access Document data in a datatype which 0078 * is not supported by the Converter class. 0079 */ 0080 class no_converter : public std::exception 0081 { 0082 /** 0083 * @brief Provides details about the exception 0084 * @returns an explanatory string 0085 */ 0086 virtual const char *what() const throw() override { return "unsupported conversion datatype"; } 0087 }; 0088 0089 /** 0090 * @brief Class providing conversion to/from numerical datatypes and strings. Only 0091 * intended for rapidcsv internal usage, but exposed externally to allow 0092 * specialization for custom datatype conversions. 0093 */ 0094 template <typename T> 0095 class Converter 0096 { 0097 public: 0098 /** 0099 * @brief Constructor 0100 * @param pConverterParams specifies how conversion of non-numerical values to 0101 * numerical datatype shall be handled. 0102 */ 0103 Converter(const ConverterParams &pConverterParams) 0104 : mConverterParams(pConverterParams) 0105 { 0106 } 0107 0108 /** 0109 * @brief Converts numerical value to string representation. 0110 * @param pVal numerical value 0111 * @param pStr output string 0112 */ 0113 void ToStr(const T &pVal, std::string &pStr) const 0114 { 0115 if (typeid(T) == typeid(int) || typeid(T) == typeid(long) || 0116 typeid(T) == typeid(long long) || typeid(T) == typeid(unsigned) || 0117 typeid(T) == typeid(unsigned long) || 0118 typeid(T) == typeid(unsigned long long) || typeid(T) == typeid(float) || 0119 typeid(T) == typeid(double) || typeid(T) == typeid(long double) || 0120 typeid(T) == typeid(char)) 0121 { 0122 std::ostringstream out; 0123 out << pVal; 0124 pStr = out.str(); 0125 } 0126 else 0127 { 0128 throw no_converter(); 0129 } 0130 } 0131 0132 /** 0133 * @brief Converts string holding a numerical value to numerical datatype representation. 0134 * @param pVal numerical value 0135 * @param pStr output string 0136 */ 0137 void ToVal(const std::string &pStr, T &pVal) const 0138 { 0139 try 0140 { 0141 if (typeid(T) == typeid(int)) 0142 { 0143 pVal = static_cast<T>(std::stoi(pStr)); 0144 return; 0145 } 0146 else if (typeid(T) == typeid(long)) 0147 { 0148 pVal = static_cast<T>(std::stol(pStr)); 0149 return; 0150 } 0151 else if (typeid(T) == typeid(long long)) 0152 { 0153 pVal = static_cast<T>(std::stoll(pStr)); 0154 return; 0155 } 0156 else if (typeid(T) == typeid(unsigned)) 0157 { 0158 pVal = static_cast<T>(std::stoul(pStr)); 0159 return; 0160 } 0161 else if (typeid(T) == typeid(unsigned long)) 0162 { 0163 pVal = static_cast<T>(std::stoul(pStr)); 0164 return; 0165 } 0166 else if (typeid(T) == typeid(unsigned long long)) 0167 { 0168 pVal = static_cast<T>(std::stoull(pStr)); 0169 return; 0170 } 0171 } 0172 catch (...) 0173 { 0174 if (!mConverterParams.mHasDefaultConverter) 0175 { 0176 throw; 0177 } 0178 else 0179 { 0180 pVal = static_cast<T>(mConverterParams.mDefaultInteger); 0181 return; 0182 } 0183 } 0184 0185 try 0186 { 0187 if (typeid(T) == typeid(float)) 0188 { 0189 pVal = static_cast<T>(std::stof(pStr)); 0190 return; 0191 } 0192 else if (typeid(T) == typeid(double)) 0193 { 0194 pVal = static_cast<T>(std::stod(pStr)); 0195 return; 0196 } 0197 else if (typeid(T) == typeid(long double)) 0198 { 0199 pVal = static_cast<T>(std::stold(pStr)); 0200 return; 0201 } 0202 } 0203 catch (...) 0204 { 0205 if (!mConverterParams.mHasDefaultConverter) 0206 { 0207 throw; 0208 } 0209 else 0210 { 0211 pVal = static_cast<T>(mConverterParams.mDefaultFloat); 0212 return; 0213 } 0214 } 0215 0216 if (typeid(T) == typeid(char)) 0217 { 0218 pVal = static_cast<T>(pStr[0]); 0219 return; 0220 } 0221 else 0222 { 0223 throw no_converter(); 0224 } 0225 } 0226 0227 private: 0228 const ConverterParams &mConverterParams; 0229 }; 0230 0231 /** 0232 * @brief Specialized implementation handling string to string conversion. 0233 * @param pVal string 0234 * @param pStr string 0235 */ 0236 template <> 0237 inline void Converter<std::string>::ToStr(const std::string &pVal, 0238 std::string &pStr) const 0239 { 0240 pStr = pVal; 0241 } 0242 0243 /** 0244 * @brief Specialized implementation handling string to string conversion. 0245 * @param pVal string 0246 * @param pStr string 0247 */ 0248 template <> 0249 inline void Converter<std::string>::ToVal(const std::string &pStr, 0250 std::string &pVal) const 0251 { 0252 pVal = pStr; 0253 } 0254 0255 template <typename T> 0256 using ConvFunc = std::function<void(const std::string &pStr, T &pVal)>; 0257 0258 /** 0259 * @brief Datastructure holding parameters controlling which row and column should be 0260 * treated as labels. 0261 */ 0262 struct LabelParams 0263 { 0264 /** 0265 * @brief Constructor 0266 * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting 0267 * it to -1 prevents column lookup by label name, and gives access 0268 * to all rows as document data. Default: 0 0269 * @param pRowNameIdx specifies the zero-based column index of the row labels, setting 0270 * it to -1 prevents row lookup by label name, and gives access 0271 * to all columns as document data. Default: -1 0272 */ 0273 explicit LabelParams(const int pColumnNameIdx = 0, const int pRowNameIdx = -1) 0274 : mColumnNameIdx(pColumnNameIdx), mRowNameIdx(pRowNameIdx) 0275 { 0276 } 0277 0278 /** 0279 * @brief specifies the zero-based row index of the column labels. 0280 */ 0281 int mColumnNameIdx; 0282 0283 /** 0284 * @brief specifies the zero-based column index of the row labels. 0285 */ 0286 int mRowNameIdx; 0287 }; 0288 0289 /** 0290 * @brief Datastructure holding parameters controlling how the CSV data fields are separated. 0291 */ 0292 struct SeparatorParams 0293 { 0294 /** 0295 * @brief Constructor 0296 * @param pSeparator specifies the column separator (default ','). 0297 * @param pTrim specifies whether to trim leading and trailing spaces from 0298 * cells read (default false). 0299 * @param pHasCR specifies whether a new document (i.e. not an existing document read) 0300 * should use CR/LF instead of only LF (default is to use standard 0301 * behavior of underlying platforms - CR/LF for Win, and LF for others). 0302 * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false) 0303 * @param pAutoQuote specifies whether to automatically dequote data during read, and add 0304 * quotes during write (default true). 0305 */ 0306 explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false, 0307 const bool pHasCR = sPlatformHasCR, 0308 const bool pQuotedLinebreaks = false, 0309 const bool pAutoQuote = true) 0310 : mSeparator(pSeparator), mTrim(pTrim), mHasCR(pHasCR), 0311 mQuotedLinebreaks(pQuotedLinebreaks), mAutoQuote(pAutoQuote) 0312 { 0313 } 0314 0315 /** 0316 * @brief specifies the column separator. 0317 */ 0318 char mSeparator; 0319 0320 /** 0321 * @brief specifies whether to trim leading and trailing spaces from cells read. 0322 */ 0323 bool mTrim; 0324 0325 /** 0326 * @brief specifies whether new documents should use CR/LF instead of LF. 0327 */ 0328 bool mHasCR; 0329 0330 /** 0331 * @brief specifies whether to allow line breaks in quoted text. 0332 */ 0333 bool mQuotedLinebreaks; 0334 0335 /** 0336 * @brief specifies whether to automatically dequote cell data. 0337 */ 0338 bool mAutoQuote; 0339 }; 0340 0341 /** 0342 * @brief Datastructure holding parameters controlling how special line formats should be 0343 * treated. 0344 */ 0345 struct LineReaderParams 0346 { 0347 /** 0348 * @brief Constructor 0349 * @param pSkipCommentLines specifies whether to skip lines prefixed with 0350 * mCommentPrefix. Default: false 0351 * @param pCommentPrefix specifies which prefix character to indicate a comment 0352 * line. Default: # 0353 * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false 0354 */ 0355 explicit LineReaderParams(const bool pSkipCommentLines = false, 0356 const char pCommentPrefix = '#', 0357 const bool pSkipEmptyLines = false) 0358 : mSkipCommentLines(pSkipCommentLines), mCommentPrefix(pCommentPrefix), 0359 mSkipEmptyLines(pSkipEmptyLines) 0360 { 0361 } 0362 0363 /** 0364 * @brief specifies whether to skip lines prefixed with mCommentPrefix. 0365 */ 0366 bool mSkipCommentLines; 0367 0368 /** 0369 * @brief specifies which prefix character to indicate a comment line. 0370 */ 0371 char mCommentPrefix; 0372 0373 /** 0374 * @brief specifies whether to skip empty lines. 0375 */ 0376 bool mSkipEmptyLines; 0377 }; 0378 0379 /** 0380 * @brief Class representing a CSV document. 0381 */ 0382 class Document 0383 { 0384 public: 0385 /** 0386 * @brief Constructor 0387 * @param pPath specifies the path of an existing CSV-file to populate the Document 0388 * data with. 0389 * @param pLabelParams specifies which row and column should be treated as labels. 0390 * @param pSeparatorParams specifies which field and row separators should be used. 0391 * @param pConverterParams specifies how invalid numbers (including empty strings) should be 0392 * handled. 0393 * @param pLineReaderParams specifies how special line formats should be treated. 0394 */ 0395 explicit Document(const std::string &pPath = std::string(), 0396 const LabelParams &pLabelParams = LabelParams(), 0397 const SeparatorParams &pSeparatorParams = SeparatorParams(), 0398 const ConverterParams &pConverterParams = ConverterParams(), 0399 const LineReaderParams &pLineReaderParams = LineReaderParams()) 0400 : mPath(pPath), mLabelParams(pLabelParams), mSeparatorParams(pSeparatorParams), 0401 mConverterParams(pConverterParams), mLineReaderParams(pLineReaderParams) 0402 { 0403 if (!mPath.empty()) 0404 { 0405 ReadCsv(); 0406 } 0407 } 0408 0409 /** 0410 * @brief Constructor 0411 * @param pStream specifies an input stream to read CSV data from. 0412 * @param pLabelParams specifies which row and column should be treated as labels. 0413 * @param pSeparatorParams specifies which field and row separators should be used. 0414 * @param pConverterParams specifies how invalid numbers (including empty strings) should be 0415 * handled. 0416 * @param pLineReaderParams specifies how special line formats should be treated. 0417 */ 0418 explicit Document(std::istream &pStream, 0419 const LabelParams &pLabelParams = LabelParams(), 0420 const SeparatorParams &pSeparatorParams = SeparatorParams(), 0421 const ConverterParams &pConverterParams = ConverterParams(), 0422 const LineReaderParams &pLineReaderParams = LineReaderParams()) 0423 : mPath(), mLabelParams(pLabelParams), mSeparatorParams(pSeparatorParams), 0424 mConverterParams(pConverterParams), mLineReaderParams(pLineReaderParams) 0425 { 0426 ReadCsv(pStream); 0427 } 0428 0429 /** 0430 * @brief Read Document data from file. 0431 * @param pPath specifies the path of an existing CSV-file to populate the Document 0432 * data with. 0433 * @param pLabelParams specifies which row and column should be treated as labels. 0434 * @param pSeparatorParams specifies which field and row separators should be used. 0435 * @param pConverterParams specifies how invalid numbers (including empty strings) should be 0436 * handled. 0437 * @param pLineReaderParams specifies how special line formats should be treated. 0438 */ 0439 void Load(const std::string &pPath, const LabelParams &pLabelParams = LabelParams(), 0440 const SeparatorParams &pSeparatorParams = SeparatorParams(), 0441 const ConverterParams &pConverterParams = ConverterParams(), 0442 const LineReaderParams &pLineReaderParams = LineReaderParams()) 0443 { 0444 mPath = pPath; 0445 mLabelParams = pLabelParams; 0446 mSeparatorParams = pSeparatorParams; 0447 mConverterParams = pConverterParams; 0448 mLineReaderParams = pLineReaderParams; 0449 ReadCsv(); 0450 } 0451 0452 /** 0453 * @brief Read Document data from stream. 0454 * @param pStream specifies an input stream to read CSV data from. 0455 * @param pLabelParams specifies which row and column should be treated as labels. 0456 * @param pSeparatorParams specifies which field and row separators should be used. 0457 * @param pConverterParams specifies how invalid numbers (including empty strings) should be 0458 * handled. 0459 * @param pLineReaderParams specifies how special line formats should be treated. 0460 */ 0461 void Load(std::istream &pStream, const LabelParams &pLabelParams = LabelParams(), 0462 const SeparatorParams &pSeparatorParams = SeparatorParams(), 0463 const ConverterParams &pConverterParams = ConverterParams(), 0464 const LineReaderParams &pLineReaderParams = LineReaderParams()) 0465 { 0466 mPath = ""; 0467 mLabelParams = pLabelParams; 0468 mSeparatorParams = pSeparatorParams; 0469 mConverterParams = pConverterParams; 0470 mLineReaderParams = pLineReaderParams; 0471 ReadCsv(pStream); 0472 } 0473 0474 /** 0475 * @brief Write Document data to file. 0476 * @param pPath optionally specifies the path where the CSV-file will be created 0477 * (if not specified, the original path provided when creating or 0478 * loading the Document data will be used). 0479 */ 0480 void Save(const std::string &pPath = std::string()) 0481 { 0482 if (!pPath.empty()) 0483 { 0484 mPath = pPath; 0485 } 0486 WriteCsv(); 0487 } 0488 0489 /** 0490 * @brief Write Document data to stream. 0491 * @param pStream specifies an output stream to write the data to. 0492 */ 0493 void Save(std::ostream &pStream) { WriteCsv(pStream); } 0494 0495 /** 0496 * @brief Clears loaded Document data. 0497 * 0498 */ 0499 void Clear() 0500 { 0501 mData.clear(); 0502 mColumnNames.clear(); 0503 mRowNames.clear(); 0504 #ifdef HAS_CODECVT 0505 mIsUtf16 = false; 0506 mIsLE = false; 0507 #endif 0508 } 0509 0510 /** 0511 * @brief Get column index by name. 0512 * @param pColumnName column label name. 0513 * @returns zero-based column index. 0514 */ 0515 ssize_t GetColumnIdx(const std::string &pColumnName) const 0516 { 0517 if (mLabelParams.mColumnNameIdx >= 0) 0518 { 0519 if (mColumnNames.find(pColumnName) != mColumnNames.end()) 0520 { 0521 return mColumnNames.at(pColumnName) - (mLabelParams.mRowNameIdx + 1); 0522 } 0523 } 0524 return -1; 0525 } 0526 0527 /** 0528 * @brief Get column by index. 0529 * @param pColumnIdx zero-based column index. 0530 * @returns vector of column data. 0531 */ 0532 template <typename T> 0533 std::vector<T> GetColumn(const size_t pColumnIdx) const 0534 { 0535 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 0536 std::vector<T> column; 0537 Converter<T> converter(mConverterParams); 0538 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 0539 { 0540 if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) 0541 { 0542 T val; 0543 converter.ToVal(itRow->at(columnIdx), val); 0544 column.push_back(val); 0545 } 0546 } 0547 return column; 0548 } 0549 0550 /** 0551 * @brief Get column by index. 0552 * @param pColumnIdx zero-based column index. 0553 * @param pToVal conversion function. 0554 * @returns vector of column data. 0555 */ 0556 template <typename T> 0557 std::vector<T> GetColumn(const size_t pColumnIdx, ConvFunc<T> pToVal) const 0558 { 0559 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 0560 std::vector<T> column; 0561 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 0562 { 0563 if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) 0564 { 0565 T val; 0566 pToVal(itRow->at(columnIdx), val); 0567 column.push_back(val); 0568 } 0569 } 0570 return column; 0571 } 0572 0573 /** 0574 * @brief Get column by name. 0575 * @param pColumnName column label name. 0576 * @returns vector of column data. 0577 */ 0578 template <typename T> 0579 std::vector<T> GetColumn(const std::string &pColumnName) const 0580 { 0581 const ssize_t columnIdx = GetColumnIdx(pColumnName); 0582 if (columnIdx < 0) 0583 { 0584 throw std::out_of_range("column not found: " + pColumnName); 0585 } 0586 return GetColumn<T>(columnIdx); 0587 } 0588 0589 /** 0590 * @brief Get column by name. 0591 * @param pColumnName column label name. 0592 * @param pToVal conversion function. 0593 * @returns vector of column data. 0594 */ 0595 template <typename T> 0596 std::vector<T> GetColumn(const std::string &pColumnName, ConvFunc<T> pToVal) const 0597 { 0598 const ssize_t columnIdx = GetColumnIdx(pColumnName); 0599 if (columnIdx < 0) 0600 { 0601 throw std::out_of_range("column not found: " + pColumnName); 0602 } 0603 return GetColumn<T>(columnIdx, pToVal); 0604 } 0605 0606 /** 0607 * @brief Set column by index. 0608 * @param pColumnIdx zero-based column index. 0609 * @param pColumn vector of column data. 0610 */ 0611 template <typename T> 0612 void SetColumn(const size_t pColumnIdx, const std::vector<T> &pColumn) 0613 { 0614 const size_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 0615 0616 while (pColumn.size() + (mLabelParams.mColumnNameIdx + 1) > GetDataRowCount()) 0617 { 0618 std::vector<std::string> row; 0619 row.resize(GetDataColumnCount()); 0620 mData.push_back(row); 0621 } 0622 0623 if ((columnIdx + 1) > GetDataColumnCount()) 0624 { 0625 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 0626 { 0627 itRow->resize(columnIdx + 1 + (mLabelParams.mRowNameIdx + 1)); 0628 } 0629 } 0630 0631 Converter<T> converter(mConverterParams); 0632 for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) 0633 { 0634 std::string str; 0635 converter.ToStr(*itRow, str); 0636 mData 0637 .at(std::distance(pColumn.begin(), itRow) + 0638 (mLabelParams.mColumnNameIdx + 1)) 0639 .at(columnIdx) = str; 0640 } 0641 } 0642 0643 /** 0644 * @brief Set column by name. 0645 * @param pColumnName column label name. 0646 * @param pColumn vector of column data. 0647 */ 0648 template <typename T> 0649 void SetColumn(const std::string &pColumnName, const std::vector<T> &pColumn) 0650 { 0651 const ssize_t columnIdx = GetColumnIdx(pColumnName); 0652 if (columnIdx < 0) 0653 { 0654 throw std::out_of_range("column not found: " + pColumnName); 0655 } 0656 SetColumn<T>(columnIdx, pColumn); 0657 } 0658 0659 /** 0660 * @brief Remove column by index. 0661 * @param pColumnIdx zero-based column index. 0662 */ 0663 void RemoveColumn(const size_t pColumnIdx) 0664 { 0665 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 0666 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 0667 { 0668 itRow->erase(itRow->begin() + columnIdx); 0669 } 0670 } 0671 0672 /** 0673 * @brief Remove column by name. 0674 * @param pColumnName column label name. 0675 */ 0676 void RemoveColumn(const std::string &pColumnName) 0677 { 0678 ssize_t columnIdx = GetColumnIdx(pColumnName); 0679 if (columnIdx < 0) 0680 { 0681 throw std::out_of_range("column not found: " + pColumnName); 0682 } 0683 0684 RemoveColumn(columnIdx); 0685 } 0686 0687 /** 0688 * @brief Insert column at specified index. 0689 * @param pColumnIdx zero-based column index. 0690 * @param pColumn vector of column data (optional argument). 0691 * @param pColumnName column label name (optional argument). 0692 */ 0693 template <typename T> 0694 void InsertColumn(const size_t pColumnIdx, 0695 const std::vector<T> &pColumn = std::vector<T>(), 0696 const std::string &pColumnName = std::string()) 0697 { 0698 const size_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 0699 0700 std::vector<std::string> column; 0701 if (pColumn.empty()) 0702 { 0703 column.resize(GetDataRowCount()); 0704 } 0705 else 0706 { 0707 column.resize(pColumn.size() + (mLabelParams.mColumnNameIdx + 1)); 0708 Converter<T> converter(mConverterParams); 0709 for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) 0710 { 0711 std::string str; 0712 converter.ToStr(*itRow, str); 0713 const size_t rowIdx = std::distance(pColumn.begin(), itRow) + 0714 (mLabelParams.mColumnNameIdx + 1); 0715 column.at(rowIdx) = str; 0716 } 0717 } 0718 0719 while (column.size() > GetDataRowCount()) 0720 { 0721 std::vector<std::string> row; 0722 const size_t columnCount = 0723 std::max(static_cast<size_t>(mLabelParams.mColumnNameIdx + 1), 0724 GetDataColumnCount()); 0725 row.resize(columnCount); 0726 mData.push_back(row); 0727 } 0728 0729 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 0730 { 0731 const size_t rowIdx = std::distance(mData.begin(), itRow); 0732 itRow->insert(itRow->begin() + columnIdx, column.at(rowIdx)); 0733 } 0734 0735 if (!pColumnName.empty()) 0736 { 0737 SetColumnName(pColumnIdx, pColumnName); 0738 } 0739 } 0740 0741 /** 0742 * @brief Get number of data columns (excluding label columns). 0743 * @returns column count. 0744 */ 0745 size_t GetColumnCount() const 0746 { 0747 const ssize_t count = 0748 static_cast<ssize_t>((mData.size() > 0) ? mData.at(0).size() : 0) - 0749 (mLabelParams.mRowNameIdx + 1); 0750 return (count >= 0) ? count : 0; 0751 } 0752 0753 /** 0754 * @brief Get row index by name. 0755 * @param pRowName row label name. 0756 * @returns zero-based row index. 0757 */ 0758 ssize_t GetRowIdx(const std::string &pRowName) const 0759 { 0760 if (mLabelParams.mRowNameIdx >= 0) 0761 { 0762 if (mRowNames.find(pRowName) != mRowNames.end()) 0763 { 0764 return mRowNames.at(pRowName) - (mLabelParams.mColumnNameIdx + 1); 0765 } 0766 } 0767 return -1; 0768 } 0769 0770 /** 0771 * @brief Get row by index. 0772 * @param pRowIdx zero-based row index. 0773 * @returns vector of row data. 0774 */ 0775 template <typename T> 0776 std::vector<T> GetRow(const size_t pRowIdx) const 0777 { 0778 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 0779 std::vector<T> row; 0780 Converter<T> converter(mConverterParams); 0781 for (auto itCol = mData.at(rowIdx).begin(); itCol != mData.at(rowIdx).end(); 0782 ++itCol) 0783 { 0784 if (std::distance(mData.at(rowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) 0785 { 0786 T val; 0787 converter.ToVal(*itCol, val); 0788 row.push_back(val); 0789 } 0790 } 0791 return row; 0792 } 0793 0794 /** 0795 * @brief Get row by index. 0796 * @param pRowIdx zero-based row index. 0797 * @param pToVal conversion function. 0798 * @returns vector of row data. 0799 */ 0800 template <typename T> 0801 std::vector<T> GetRow(const size_t pRowIdx, ConvFunc<T> pToVal) const 0802 { 0803 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 0804 std::vector<T> row; 0805 Converter<T> converter(mConverterParams); 0806 for (auto itCol = mData.at(rowIdx).begin(); itCol != mData.at(rowIdx).end(); 0807 ++itCol) 0808 { 0809 if (std::distance(mData.at(rowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) 0810 { 0811 T val; 0812 pToVal(*itCol, val); 0813 row.push_back(val); 0814 } 0815 } 0816 return row; 0817 } 0818 0819 /** 0820 * @brief Get row by name. 0821 * @param pRowName row label name. 0822 * @returns vector of row data. 0823 */ 0824 template <typename T> 0825 std::vector<T> GetRow(const std::string &pRowName) const 0826 { 0827 ssize_t rowIdx = GetRowIdx(pRowName); 0828 if (rowIdx < 0) 0829 { 0830 throw std::out_of_range("row not found: " + pRowName); 0831 } 0832 return GetRow<T>(rowIdx); 0833 } 0834 0835 /** 0836 * @brief Get row by name. 0837 * @param pRowName row label name. 0838 * @param pToVal conversion function. 0839 * @returns vector of row data. 0840 */ 0841 template <typename T> 0842 std::vector<T> GetRow(const std::string &pRowName, ConvFunc<T> pToVal) const 0843 { 0844 ssize_t rowIdx = GetRowIdx(pRowName); 0845 if (rowIdx < 0) 0846 { 0847 throw std::out_of_range("row not found: " + pRowName); 0848 } 0849 return GetRow<T>(rowIdx, pToVal); 0850 } 0851 0852 /** 0853 * @brief Set row by index. 0854 * @param pRowIdx zero-based row index. 0855 * @param pRow vector of row data. 0856 */ 0857 template <typename T> 0858 void SetRow(const size_t pRowIdx, const std::vector<T> &pRow) 0859 { 0860 const size_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 0861 0862 while ((rowIdx + 1) > GetDataRowCount()) 0863 { 0864 std::vector<std::string> row; 0865 row.resize(GetDataColumnCount()); 0866 mData.push_back(row); 0867 } 0868 0869 if (pRow.size() > GetDataColumnCount()) 0870 { 0871 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 0872 { 0873 itRow->resize(pRow.size() + (mLabelParams.mRowNameIdx + 1)); 0874 } 0875 } 0876 0877 Converter<T> converter(mConverterParams); 0878 for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) 0879 { 0880 std::string str; 0881 converter.ToStr(*itCol, str); 0882 mData.at(rowIdx).at(std::distance(pRow.begin(), itCol) + 0883 (mLabelParams.mRowNameIdx + 1)) = str; 0884 } 0885 } 0886 0887 /** 0888 * @brief Set row by name. 0889 * @param pRowName row label name. 0890 * @param pRow vector of row data. 0891 */ 0892 template <typename T> 0893 void SetRow(const std::string &pRowName, const std::vector<T> &pRow) 0894 { 0895 ssize_t rowIdx = GetRowIdx(pRowName); 0896 if (rowIdx < 0) 0897 { 0898 throw std::out_of_range("row not found: " + pRowName); 0899 } 0900 return SetRow<T>(rowIdx, pRow); 0901 } 0902 0903 /** 0904 * @brief Remove row by index. 0905 * @param pRowIdx zero-based row index. 0906 */ 0907 void RemoveRow(const size_t pRowIdx) 0908 { 0909 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 0910 mData.erase(mData.begin() + rowIdx); 0911 } 0912 0913 /** 0914 * @brief Remove row by name. 0915 * @param pRowName row label name. 0916 */ 0917 void RemoveRow(const std::string &pRowName) 0918 { 0919 ssize_t rowIdx = GetRowIdx(pRowName); 0920 if (rowIdx < 0) 0921 { 0922 throw std::out_of_range("row not found: " + pRowName); 0923 } 0924 0925 RemoveRow(rowIdx); 0926 } 0927 0928 /** 0929 * @brief Insert row at specified index. 0930 * @param pRowIdx zero-based row index. 0931 * @param pRow vector of row data (optional argument). 0932 * @param pRowName row label name (optional argument). 0933 */ 0934 template <typename T> 0935 void InsertRow(const size_t pRowIdx, const std::vector<T> &pRow = std::vector<T>(), 0936 const std::string &pRowName = std::string()) 0937 { 0938 const size_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 0939 0940 std::vector<std::string> row; 0941 if (pRow.empty()) 0942 { 0943 row.resize(GetDataColumnCount()); 0944 } 0945 else 0946 { 0947 row.resize(pRow.size() + (mLabelParams.mRowNameIdx + 1)); 0948 Converter<T> converter(mConverterParams); 0949 for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) 0950 { 0951 std::string str; 0952 converter.ToStr(*itCol, str); 0953 row.at(std::distance(pRow.begin(), itCol) + 0954 (mLabelParams.mRowNameIdx + 1)) = str; 0955 } 0956 } 0957 0958 while (rowIdx > GetDataRowCount()) 0959 { 0960 std::vector<std::string> tempRow; 0961 tempRow.resize(GetDataColumnCount()); 0962 mData.push_back(tempRow); 0963 } 0964 0965 mData.insert(mData.begin() + rowIdx, row); 0966 0967 if (!pRowName.empty()) 0968 { 0969 SetRowName(pRowIdx, pRowName); 0970 } 0971 } 0972 0973 /** 0974 * @brief Get number of data rows (excluding label rows). 0975 * @returns row count. 0976 */ 0977 size_t GetRowCount() const 0978 { 0979 const ssize_t count = 0980 static_cast<ssize_t>(mData.size()) - (mLabelParams.mColumnNameIdx + 1); 0981 return (count >= 0) ? count : 0; 0982 } 0983 0984 /** 0985 * @brief Get cell by index. 0986 * @param pColumnIdx zero-based column index. 0987 * @param pRowIdx zero-based row index. 0988 * @returns cell data. 0989 */ 0990 template <typename T> 0991 T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const 0992 { 0993 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 0994 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 0995 0996 T val; 0997 Converter<T> converter(mConverterParams); 0998 converter.ToVal(mData.at(rowIdx).at(columnIdx), val); 0999 return val; 1000 } 1001 1002 /** 1003 * @brief Get cell by index. 1004 * @param pColumnIdx zero-based column index. 1005 * @param pRowIdx zero-based row index. 1006 * @param pToVal conversion function. 1007 * @returns cell data. 1008 */ 1009 template <typename T> 1010 T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc<T> pToVal) const 1011 { 1012 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 1013 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 1014 1015 T val; 1016 pToVal(mData.at(rowIdx).at(columnIdx), val); 1017 return val; 1018 } 1019 1020 /** 1021 * @brief Get cell by name. 1022 * @param pColumnName column label name. 1023 * @param pRowName row label name. 1024 * @returns cell data. 1025 */ 1026 template <typename T> 1027 T GetCell(const std::string &pColumnName, const std::string &pRowName) const 1028 { 1029 const ssize_t columnIdx = GetColumnIdx(pColumnName); 1030 if (columnIdx < 0) 1031 { 1032 throw std::out_of_range("column not found: " + pColumnName); 1033 } 1034 1035 const ssize_t rowIdx = GetRowIdx(pRowName); 1036 if (rowIdx < 0) 1037 { 1038 throw std::out_of_range("row not found: " + pRowName); 1039 } 1040 1041 return GetCell<T>(columnIdx, rowIdx); 1042 } 1043 1044 /** 1045 * @brief Get cell by name. 1046 * @param pColumnName column label name. 1047 * @param pRowName row label name. 1048 * @param pToVal conversion function. 1049 * @returns cell data. 1050 */ 1051 template <typename T> 1052 T GetCell(const std::string &pColumnName, const std::string &pRowName, 1053 ConvFunc<T> pToVal) const 1054 { 1055 const ssize_t columnIdx = GetColumnIdx(pColumnName); 1056 if (columnIdx < 0) 1057 { 1058 throw std::out_of_range("column not found: " + pColumnName); 1059 } 1060 1061 const ssize_t rowIdx = GetRowIdx(pRowName); 1062 if (rowIdx < 0) 1063 { 1064 throw std::out_of_range("row not found: " + pRowName); 1065 } 1066 1067 return GetCell<T>(columnIdx, rowIdx, pToVal); 1068 } 1069 1070 /** 1071 * @brief Get cell by column name and row index. 1072 * @param pColumnName column label name. 1073 * @param pRowIdx zero-based row index. 1074 * @returns cell data. 1075 */ 1076 template <typename T> 1077 T GetCell(const std::string &pColumnName, const size_t pRowIdx) const 1078 { 1079 const ssize_t columnIdx = GetColumnIdx(pColumnName); 1080 if (columnIdx < 0) 1081 { 1082 throw std::out_of_range("column not found: " + pColumnName); 1083 } 1084 1085 return GetCell<T>(columnIdx, pRowIdx); 1086 } 1087 1088 /** 1089 * @brief Get cell by column name and row index. 1090 * @param pColumnName column label name. 1091 * @param pRowIdx zero-based row index. 1092 * @param pToVal conversion function. 1093 * @returns cell data. 1094 */ 1095 template <typename T> 1096 T GetCell(const std::string &pColumnName, const size_t pRowIdx, 1097 ConvFunc<T> pToVal) const 1098 { 1099 const ssize_t columnIdx = GetColumnIdx(pColumnName); 1100 if (columnIdx < 0) 1101 { 1102 throw std::out_of_range("column not found: " + pColumnName); 1103 } 1104 1105 return GetCell<T>(columnIdx, pRowIdx, pToVal); 1106 } 1107 1108 /** 1109 * @brief Get cell by column index and row name. 1110 * @param pColumnIdx zero-based column index. 1111 * @param pRowName row label name. 1112 * @returns cell data. 1113 */ 1114 template <typename T> 1115 T GetCell(const size_t pColumnIdx, const std::string &pRowName) const 1116 { 1117 const ssize_t rowIdx = GetRowIdx(pRowName); 1118 if (rowIdx < 0) 1119 { 1120 throw std::out_of_range("row not found: " + pRowName); 1121 } 1122 1123 return GetCell<T>(pColumnIdx, rowIdx); 1124 } 1125 1126 /** 1127 * @brief Get cell by column index and row name. 1128 * @param pColumnIdx zero-based column index. 1129 * @param pRowName row label name. 1130 * @param pToVal conversion function. 1131 * @returns cell data. 1132 */ 1133 template <typename T> 1134 T GetCell(const size_t pColumnIdx, const std::string &pRowName, 1135 ConvFunc<T> pToVal) const 1136 { 1137 const ssize_t rowIdx = GetRowIdx(pRowName); 1138 if (rowIdx < 0) 1139 { 1140 throw std::out_of_range("row not found: " + pRowName); 1141 } 1142 1143 return GetCell<T>(pColumnIdx, rowIdx, pToVal); 1144 } 1145 1146 /** 1147 * @brief Set cell by index. 1148 * @param pRowIdx zero-based row index. 1149 * @param pColumnIdx zero-based column index. 1150 * @param pCell cell data. 1151 */ 1152 template <typename T> 1153 void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T &pCell) 1154 { 1155 const size_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 1156 const size_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 1157 1158 while ((rowIdx + 1) > GetDataRowCount()) 1159 { 1160 std::vector<std::string> row; 1161 row.resize(GetDataColumnCount()); 1162 mData.push_back(row); 1163 } 1164 1165 if ((columnIdx + 1) > GetDataColumnCount()) 1166 { 1167 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 1168 { 1169 itRow->resize(columnIdx + 1); 1170 } 1171 } 1172 1173 std::string str; 1174 Converter<T> converter(mConverterParams); 1175 converter.ToStr(pCell, str); 1176 mData.at(rowIdx).at(columnIdx) = str; 1177 } 1178 1179 /** 1180 * @brief Set cell by name. 1181 * @param pColumnName column label name. 1182 * @param pRowName row label name. 1183 * @param pCell cell data. 1184 */ 1185 template <typename T> 1186 void SetCell(const std::string &pColumnName, const std::string &pRowName, 1187 const T &pCell) 1188 { 1189 const ssize_t columnIdx = GetColumnIdx(pColumnName); 1190 if (columnIdx < 0) 1191 { 1192 throw std::out_of_range("column not found: " + pColumnName); 1193 } 1194 1195 const ssize_t rowIdx = GetRowIdx(pRowName); 1196 if (rowIdx < 0) 1197 { 1198 throw std::out_of_range("row not found: " + pRowName); 1199 } 1200 1201 SetCell<T>(columnIdx, rowIdx, pCell); 1202 } 1203 1204 /** 1205 * @brief Get column name 1206 * @param pColumnIdx zero-based column index. 1207 * @returns column name. 1208 */ 1209 std::string GetColumnName(const ssize_t pColumnIdx) 1210 { 1211 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 1212 if (mLabelParams.mColumnNameIdx < 0) 1213 { 1214 throw std::out_of_range("column name row index < 0: " + 1215 std::to_string(mLabelParams.mColumnNameIdx)); 1216 } 1217 1218 return mData.at(mLabelParams.mColumnNameIdx).at(columnIdx); 1219 } 1220 1221 /** 1222 * @brief Set column name 1223 * @param pColumnIdx zero-based column index. 1224 * @param pColumnName column name. 1225 */ 1226 void SetColumnName(size_t pColumnIdx, const std::string &pColumnName) 1227 { 1228 const ssize_t columnIdx = pColumnIdx + (mLabelParams.mRowNameIdx + 1); 1229 mColumnNames[pColumnName] = columnIdx; 1230 if (mLabelParams.mColumnNameIdx < 0) 1231 { 1232 throw std::out_of_range("column name row index < 0: " + 1233 std::to_string(mLabelParams.mColumnNameIdx)); 1234 } 1235 1236 // increase table size if necessary: 1237 const int rowIdx = mLabelParams.mColumnNameIdx; 1238 if (rowIdx >= static_cast<int>(mData.size())) 1239 { 1240 mData.resize(rowIdx + 1); 1241 } 1242 auto &row = mData[rowIdx]; 1243 if (columnIdx >= static_cast<int>(row.size())) 1244 { 1245 row.resize(columnIdx + 1); 1246 } 1247 1248 mData.at(mLabelParams.mColumnNameIdx).at(columnIdx) = pColumnName; 1249 } 1250 1251 /** 1252 * @brief Get column names 1253 * @returns vector of column names. 1254 */ 1255 std::vector<std::string> GetColumnNames() 1256 { 1257 if (mLabelParams.mColumnNameIdx >= 0) 1258 { 1259 return std::vector<std::string>( 1260 mData.at(mLabelParams.mColumnNameIdx).begin() + 1261 (mLabelParams.mRowNameIdx + 1), 1262 mData.at(mLabelParams.mColumnNameIdx).end()); 1263 } 1264 1265 return std::vector<std::string>(); 1266 } 1267 1268 /** 1269 * @brief Get row name 1270 * @param pRowIdx zero-based column index. 1271 * @returns row name. 1272 */ 1273 std::string GetRowName(const ssize_t pRowIdx) 1274 { 1275 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 1276 if (mLabelParams.mRowNameIdx < 0) 1277 { 1278 throw std::out_of_range("row name column index < 0: " + 1279 std::to_string(mLabelParams.mRowNameIdx)); 1280 } 1281 1282 return mData.at(rowIdx).at(mLabelParams.mRowNameIdx); 1283 } 1284 1285 /** 1286 * @brief Set row name 1287 * @param pRowIdx zero-based row index. 1288 * @param pRowName row name. 1289 */ 1290 void SetRowName(size_t pRowIdx, const std::string &pRowName) 1291 { 1292 const ssize_t rowIdx = pRowIdx + (mLabelParams.mColumnNameIdx + 1); 1293 mRowNames[pRowName] = rowIdx; 1294 if (mLabelParams.mRowNameIdx < 0) 1295 { 1296 throw std::out_of_range("row name column index < 0: " + 1297 std::to_string(mLabelParams.mRowNameIdx)); 1298 } 1299 1300 // increase table size if necessary: 1301 if (rowIdx >= static_cast<int>(mData.size())) 1302 { 1303 mData.resize(rowIdx + 1); 1304 } 1305 auto &row = mData[rowIdx]; 1306 if (mLabelParams.mRowNameIdx >= static_cast<int>(row.size())) 1307 { 1308 row.resize(mLabelParams.mRowNameIdx + 1); 1309 } 1310 1311 mData.at(rowIdx).at(mLabelParams.mRowNameIdx) = pRowName; 1312 } 1313 1314 /** 1315 * @brief Get row names 1316 * @returns vector of row names. 1317 */ 1318 std::vector<std::string> GetRowNames() 1319 { 1320 std::vector<std::string> rownames; 1321 if (mLabelParams.mRowNameIdx >= 0) 1322 { 1323 for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) 1324 { 1325 if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) 1326 { 1327 rownames.push_back(itRow->at(mLabelParams.mRowNameIdx)); 1328 } 1329 } 1330 } 1331 return rownames; 1332 } 1333 1334 private: 1335 void ReadCsv() 1336 { 1337 std::ifstream stream; 1338 stream.exceptions(std::ifstream::failbit | std::ifstream::badbit); 1339 stream.open(mPath, std::ios::binary); 1340 ReadCsv(stream); 1341 } 1342 1343 void ReadCsv(std::istream &pStream) 1344 { 1345 Clear(); 1346 pStream.seekg(0, std::ios::end); 1347 std::streamsize length = pStream.tellg(); 1348 pStream.seekg(0, std::ios::beg); 1349 1350 #ifdef HAS_CODECVT 1351 std::vector<char> bom2b(2, '\0'); 1352 if (length >= 2) 1353 { 1354 pStream.read(bom2b.data(), 2); 1355 pStream.seekg(0, std::ios::beg); 1356 } 1357 1358 static const std::vector<char> bomU16le = { '\xff', '\xfe' }; 1359 static const std::vector<char> bomU16be = { '\xfe', '\xff' }; 1360 if ((bom2b == bomU16le) || (bom2b == bomU16be)) 1361 { 1362 mIsUtf16 = true; 1363 mIsLE = (bom2b == bomU16le); 1364 1365 std::wifstream wstream; 1366 wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit); 1367 wstream.open(mPath, std::ios::binary); 1368 if (mIsLE) 1369 { 1370 wstream.imbue( 1371 std::locale(wstream.getloc(), 1372 new std::codecvt_utf16<wchar_t, 0x10ffff, 1373 static_cast<std::codecvt_mode>( 1374 std::consume_header | 1375 std::little_endian)>)); 1376 } 1377 else 1378 { 1379 wstream.imbue(std::locale( 1380 wstream.getloc(), 1381 new std::codecvt_utf16<wchar_t, 0x10ffff, std::consume_header>)); 1382 } 1383 std::wstringstream wss; 1384 wss << wstream.rdbuf(); 1385 std::string utf8 = ToString(wss.str()); 1386 std::stringstream ss(utf8); 1387 ParseCsv(ss, utf8.size()); 1388 } 1389 else 1390 #endif 1391 { 1392 // check for UTF-8 Byte order mark and skip it when found 1393 if (length >= 3) 1394 { 1395 std::vector<char> bom3b(3, '\0'); 1396 pStream.read(bom3b.data(), 3); 1397 static const std::vector<char> bomU8 = { '\xef', '\xbb', '\xbf' }; 1398 if (bom3b != bomU8) 1399 { 1400 // file does not start with a UTF-8 Byte order mark 1401 pStream.seekg(0, std::ios::beg); 1402 } 1403 else 1404 { 1405 // file did start with a UTF-8 Byte order mark, simply skip it 1406 length -= 3; 1407 } 1408 } 1409 1410 ParseCsv(pStream, length); 1411 } 1412 } 1413 1414 void ParseCsv(std::istream &pStream, std::streamsize p_FileLength) 1415 { 1416 const std::streamsize bufLength = 64 * 1024; 1417 std::vector<char> buffer(bufLength); 1418 std::vector<std::string> row; 1419 std::string cell; 1420 bool quoted = false; 1421 int cr = 0; 1422 int lf = 0; 1423 1424 while (p_FileLength > 0) 1425 { 1426 std::streamsize readLength = 1427 std::min<std::streamsize>(p_FileLength, bufLength); 1428 pStream.read(buffer.data(), readLength); 1429 for (int i = 0; i < readLength; ++i) 1430 { 1431 if (buffer[i] == '"') 1432 { 1433 if (cell.empty() || cell[0] == '"') 1434 { 1435 quoted = !quoted; 1436 } 1437 cell += buffer[i]; 1438 } 1439 else if (buffer[i] == mSeparatorParams.mSeparator) 1440 { 1441 if (!quoted) 1442 { 1443 row.push_back(Unquote(Trim(cell))); 1444 cell.clear(); 1445 } 1446 else 1447 { 1448 cell += buffer[i]; 1449 } 1450 } 1451 else if (buffer[i] == '\r') 1452 { 1453 if (mSeparatorParams.mQuotedLinebreaks && quoted) 1454 { 1455 cell += buffer[i]; 1456 } 1457 else 1458 { 1459 ++cr; 1460 } 1461 } 1462 else if (buffer[i] == '\n') 1463 { 1464 if (mSeparatorParams.mQuotedLinebreaks && quoted) 1465 { 1466 cell += buffer[i]; 1467 } 1468 else 1469 { 1470 ++lf; 1471 if (mLineReaderParams.mSkipEmptyLines && row.empty() && 1472 cell.empty()) 1473 { 1474 // skip empty line 1475 } 1476 else 1477 { 1478 row.push_back(Unquote(Trim(cell))); 1479 1480 if (mLineReaderParams.mSkipCommentLines && 1481 !row.at(0).empty() && 1482 (row.at(0)[0] == mLineReaderParams.mCommentPrefix)) 1483 { 1484 // skip comment line 1485 } 1486 else 1487 { 1488 mData.push_back(row); 1489 } 1490 1491 cell.clear(); 1492 row.clear(); 1493 quoted = false; 1494 } 1495 } 1496 } 1497 else 1498 { 1499 cell += buffer[i]; 1500 } 1501 } 1502 p_FileLength -= readLength; 1503 } 1504 1505 // Handle last line without linebreak 1506 if (!cell.empty() || !row.empty()) 1507 { 1508 row.push_back(Unquote(Trim(cell))); 1509 cell.clear(); 1510 mData.push_back(row); 1511 row.clear(); 1512 } 1513 1514 // Assume CR/LF if at least half the linebreaks have CR 1515 mSeparatorParams.mHasCR = (cr > (lf / 2)); 1516 1517 // Set up column labels 1518 if ((mLabelParams.mColumnNameIdx >= 0) && 1519 (static_cast<ssize_t>(mData.size()) > mLabelParams.mColumnNameIdx)) 1520 { 1521 int i = 0; 1522 for (auto &columnName : mData[mLabelParams.mColumnNameIdx]) 1523 { 1524 mColumnNames[columnName] = i++; 1525 } 1526 } 1527 1528 // Set up row labels 1529 if ((mLabelParams.mRowNameIdx >= 0) && 1530 (static_cast<ssize_t>(mData.size()) > (mLabelParams.mColumnNameIdx + 1))) 1531 { 1532 int i = 0; 1533 for (auto &dataRow : mData) 1534 { 1535 if (static_cast<ssize_t>(dataRow.size()) > mLabelParams.mRowNameIdx) 1536 { 1537 mRowNames[dataRow[mLabelParams.mRowNameIdx]] = i++; 1538 } 1539 } 1540 } 1541 } 1542 1543 void WriteCsv() const 1544 { 1545 #ifdef HAS_CODECVT 1546 if (mIsUtf16) 1547 { 1548 std::stringstream ss; 1549 WriteCsv(ss); 1550 std::string utf8 = ss.str(); 1551 std::wstring wstr = ToWString(utf8); 1552 1553 std::wofstream wstream; 1554 wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit); 1555 wstream.open(mPath, std::ios::binary | std::ios::trunc); 1556 1557 if (mIsLE) 1558 { 1559 wstream.imbue( 1560 std::locale(wstream.getloc(), 1561 new std::codecvt_utf16<wchar_t, 0x10ffff, 1562 static_cast<std::codecvt_mode>( 1563 std::little_endian)>)); 1564 } 1565 else 1566 { 1567 wstream.imbue(std::locale(wstream.getloc(), 1568 new std::codecvt_utf16<wchar_t, 0x10ffff>)); 1569 } 1570 1571 wstream << static_cast<wchar_t>(0xfeff); 1572 wstream << wstr; 1573 } 1574 else 1575 #endif 1576 { 1577 std::ofstream stream; 1578 stream.exceptions(std::ofstream::failbit | std::ofstream::badbit); 1579 stream.open(mPath, std::ios::binary | std::ios::trunc); 1580 WriteCsv(stream); 1581 } 1582 } 1583 1584 void WriteCsv(std::ostream &pStream) const 1585 { 1586 for (auto itr = mData.begin(); itr != mData.end(); ++itr) 1587 { 1588 for (auto itc = itr->begin(); itc != itr->end(); ++itc) 1589 { 1590 if (mSeparatorParams.mAutoQuote && 1591 ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) || 1592 (itc->find(' ') != std::string::npos))) 1593 { 1594 // escape quotes in string 1595 std::string str = *itc; 1596 ReplaceString(str, "\"", "\"\""); 1597 1598 pStream << "\"" << str << "\""; 1599 } 1600 else 1601 { 1602 pStream << *itc; 1603 } 1604 1605 if (std::distance(itc, itr->end()) > 1) 1606 { 1607 pStream << mSeparatorParams.mSeparator; 1608 } 1609 } 1610 pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n"); 1611 } 1612 } 1613 1614 size_t GetDataRowCount() const { return mData.size(); } 1615 1616 size_t GetDataColumnCount() const 1617 { 1618 return (mData.size() > 0) ? mData.at(0).size() : 0; 1619 } 1620 1621 std::string Trim(const std::string &pStr) 1622 { 1623 if (mSeparatorParams.mTrim) 1624 { 1625 std::string str = pStr; 1626 1627 // ltrim 1628 str.erase(str.begin(), std::find_if(str.begin(), str.end(), 1629 [](int ch) { return !isspace(ch); })); 1630 1631 // rtrim 1632 str.erase(std::find_if(str.rbegin(), str.rend(), 1633 [](int ch) { return !isspace(ch); }) 1634 .base(), 1635 str.end()); 1636 1637 return str; 1638 } 1639 else 1640 { 1641 return pStr; 1642 } 1643 } 1644 1645 std::string Unquote(const std::string &pStr) 1646 { 1647 if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) && (pStr.front() == '"') && 1648 (pStr.back() == '"')) 1649 { 1650 // remove start/end quotes 1651 std::string str = pStr.substr(1, pStr.size() - 2); 1652 1653 // unescape quotes in string 1654 ReplaceString(str, "\"\"", "\""); 1655 1656 return str; 1657 } 1658 else 1659 { 1660 return pStr; 1661 } 1662 } 1663 1664 #ifdef HAS_CODECVT 1665 #if defined(_MSC_VER) 1666 #pragma warning(disable : 4996) 1667 #endif 1668 static std::string ToString(const std::wstring &pWStr) 1669 { 1670 size_t len = std::wcstombs(nullptr, pWStr.c_str(), 0) + 1; 1671 char *cstr = new char[len]; 1672 std::wcstombs(cstr, pWStr.c_str(), len); 1673 std::string str(cstr); 1674 delete[] cstr; 1675 return str; 1676 } 1677 1678 static std::wstring ToWString(const std::string &pStr) 1679 { 1680 size_t len = 1 + mbstowcs(nullptr, pStr.c_str(), 0); 1681 wchar_t *wcstr = new wchar_t[len]; 1682 std::mbstowcs(wcstr, pStr.c_str(), len); 1683 std::wstring wstr(wcstr); 1684 delete[] wcstr; 1685 return wstr; 1686 } 1687 #if defined(_MSC_VER) 1688 #pragma warning(default : 4996) 1689 #endif 1690 #endif 1691 1692 static void ReplaceString(std::string &pStr, const std::string &pSearch, 1693 const std::string &pReplace) 1694 { 1695 size_t pos = 0; 1696 1697 while ((pos = pStr.find(pSearch, pos)) != std::string::npos) 1698 { 1699 pStr.replace(pos, pSearch.size(), pReplace); 1700 pos += pReplace.size(); 1701 } 1702 } 1703 1704 private: 1705 std::string mPath; 1706 LabelParams mLabelParams; 1707 SeparatorParams mSeparatorParams; 1708 ConverterParams mConverterParams; 1709 LineReaderParams mLineReaderParams; 1710 std::vector<std::vector<std::string>> mData; 1711 std::map<std::string, size_t> mColumnNames; 1712 std::map<std::string, size_t> mRowNames; 1713 #ifdef HAS_CODECVT 1714 bool mIsUtf16 = false; 1715 bool mIsLE = false; 1716 #endif 1717 }; 1718 } // namespace rapidcsv