File indexing completed on 2024-12-22 03:35:49

0001 /*
0002     File                 : ROOTFilterPrivate.h
0003     Project              : LabPlot
0004     Description          : Private implementation class for ROOTFilter.
0005     --------------------------------------------------------------------
0006     SPDX-FileCopyrightText: 2018 Christoph Roick <chrisito@gmx.de>
0007 
0008     SPDX-License-Identifier: GPL-2.0-or-later
0009 */
0010 
0011 #ifndef ROOTFILTERPRIVATE_H
0012 #define ROOTFILTERPRIVATE_H
0013 
0014 #include "ROOTFilter.h"
0015 
0016 #include <QDateTime>
0017 #include <QVector>
0018 
0019 #include <map>
0020 #include <string>
0021 #include <vector>
0022 
0023 class AbstractDataSource;
0024 class AbstractColumn;
0025 
0026 /**
0027  * @brief Read TH1 histograms and TTrees from ROOT files without depending on ROOT libraries
0028  */
0029 class ROOTData {
0030 public:
0031     /**
0032      * @brief Open ROOT file and save file positions of histograms and trees
0033      *
0034      * Also checks for the compression level. Currently the default ZLIB and LZ4 compression
0035      * types are supported. The TStreamerInfo is read if it is available, otherwise the
0036      * data structure as of ROOT v6.15 is used. No tests were performed with data written
0037      * prior to ROOT v5.34.
0038      *
0039      * @param[in] filename ROOT file to be read
0040      */
0041     explicit ROOTData(const std::string& filename);
0042 
0043     /// Parameters to describe a bin
0044     struct BinPars {
0045         double content;
0046         double sumw2;
0047         double lowedge;
0048     };
0049 
0050     /**
0051      * @brief Identifiers for different data types
0052      *
0053      * Histograms are identified by their bin type. The lowest byte indicates the size
0054      * of the numeric types for cross checks during the import.
0055      */
0056     enum class ContentType {
0057         Invalid = 0,
0058         Tree = 0x10,
0059         NTuple = 0x11,
0060         Basket = 0x20,
0061         Streamer = 0x30,
0062         Double = 0x48,
0063         Float = 0x54,
0064         Long = 0x68,
0065         Int = 0x74,
0066         Short = 0x82,
0067         Byte = 0x91,
0068         Bool = 0xA1,
0069         CString = 0xB0
0070     };
0071 
0072     /// Information about leaf contents
0073     struct LeafInfo {
0074         std::string branch;
0075         std::string leaf;
0076         ContentType type;
0077         bool issigned;
0078         size_t elements;
0079     };
0080 
0081     /// Directory structure in a ROOT file where seek positions to the objects inside the file are stored
0082     struct Directory {
0083         Directory()
0084             : parent(0) {
0085         }
0086         Directory(const std::string& name, long int parent)
0087             : name(name)
0088             , parent(parent) {
0089         }
0090         std::string name;
0091         long int parent;
0092         std::vector<long int> content;
0093     };
0094 
0095     /// Return directory structure of file content with Histograms
0096     const std::map<long int, Directory>& listHistograms() const {
0097         return histdirs;
0098     }
0099 
0100     /// Return directory structure of file content with Trees
0101     const std::map<long int, Directory>& listTrees() const {
0102         return treedirs;
0103     }
0104 
0105     /**
0106      * @brief List information about data contained in leaves
0107      *
0108      * @param[in] pos Position of the tree inside the file
0109      */
0110     std::vector<LeafInfo> listLeaves(long int pos) const;
0111 
0112     /**
0113      * @brief Get entries of a leaf
0114      *
0115      * @param[in] pos Position of the tree inside the file
0116      * @param[in] branchname Name of the branch
0117      * @param[in] leafname Name of the leaf
0118      * @param[in] element Index, if leaf is an array
0119      * @param[in] nentries Maximum number of entries to be read
0120      */
0121     template<typename T>
0122     std::vector<T> listEntries(long int pos,
0123                                const std::string& branchname,
0124                                const std::string& leafname,
0125                                size_t element = 0,
0126                                size_t nentries = std::numeric_limits<size_t>::max()) const;
0127     /**
0128      * @brief Get entries of a leaf with the same name as its branch
0129      *
0130      * @param[in] pos Position of the tree inside the file
0131      * @param[in] branchname Name of the branch
0132      * @param[in] nentries Maximum number of entries to be read
0133      */
0134     template<typename T>
0135     std::vector<T> listEntries(long int pos, const std::string& branchname, size_t element = 0, size_t nentries = std::numeric_limits<size_t>::max()) const {
0136         return listEntries<T>(pos, branchname, branchname, element, nentries);
0137     }
0138 
0139     /**
0140      * @brief Read histogram from file
0141      *
0142      * Jumps to memoized file position, decompresses the object if required and analyzes
0143      * the buffer. Overflow and underflow bins are included.
0144      *
0145      * @param[in] pos Position of the histogram inside the file
0146      */
0147     std::vector<BinPars> readHistogram(long int pos);
0148 
0149     /**
0150      * @brief Get name of the histogram at a position in the file
0151      *
0152      * The name is stored in the buffer. No file access required.
0153      *
0154      * @param[in] pos Position of the histogram inside the file
0155      */
0156     std::string histogramName(long int pos);
0157 
0158     /**
0159      * @brief Get number of bins in histogram
0160      *
0161      * The number of bins is stored in the buffer. No file access required.
0162      *
0163      * @param[in] pos Position of the histogram inside the file
0164      */
0165     int histogramBins(long int pos);
0166 
0167     /**
0168      * @brief Get name of the tree at a position in the file
0169      *
0170      * The name is stored in the buffer. No file access required.
0171      *
0172      * @param[in] pos Position of the tree inside the file
0173      */
0174     std::string treeName(long int pos);
0175 
0176     /**
0177      * @brief Get number of entries in tree
0178      *
0179      * The number of entries is stored in the buffer. No file access required.
0180      *
0181      * @param[in] pos Position of the tree inside the file
0182      */
0183     int treeEntries(long int pos);
0184 
0185 private:
0186     struct KeyBuffer {
0187         ContentType type;
0188         std::string name;
0189         std::string title;
0190         int cycle;
0191         size_t keylength;
0192         enum class CompressionType { none, zlib, lz4 } compression;
0193         size_t start;
0194         size_t compressed_count;
0195         size_t count;
0196         int nrows;
0197     };
0198 
0199     struct StreamerInfo {
0200         std::string name;
0201         size_t size;
0202         std::string counter;
0203         bool iscounter;
0204         bool ispointer;
0205     };
0206 
0207     /// Get data type from histogram identifier
0208     static ContentType histType(char type);
0209     /// Get data type from leaf identifier
0210     static ContentType leafType(char type);
0211     /// Get function to read a buffer of the specified type
0212     template<class T>
0213     T (*readType(ContentType type, bool sign = true) const)
0214     (char*&);
0215 
0216     /// Get the number of bins contained in a histogram
0217     void readNBins(KeyBuffer& buffer);
0218     /// Get the number of entries contained in a tree
0219     void readNEntries(KeyBuffer& buffer);
0220     /// Get buffer from file content at histogram position
0221     std::string data(const KeyBuffer& buffer) const;
0222     /// Get buffer from file content at histogram position, uses already opened stream
0223     std::string data(const KeyBuffer& buffer, std::ifstream& is) const;
0224     /// Load streamer information
0225     void readStreamerInfo(const KeyBuffer& buffer);
0226     /**
0227      * @brief Advance to an object inside a class according to streamer information
0228      *
0229      * The number of entries is stored in the buffer. No file access required.
0230      *
0231      * @param[in] buf Pointer to the current position in the class object
0232      * @param[in] objects A list of objects in the class defined by the streamer information
0233      * @param[in] current The name of the current object
0234      * @param[in] target The name of the object to be advanced to
0235      * @param[in] counts A list of the number of entries in objects of dynamic length; updated while reading
0236      */
0237     static bool advanceTo(char*& buf,
0238                           const std::vector<StreamerInfo>& objects,
0239                           const std::string& current,
0240                           const std::string& target,
0241                           std::map<std::string, size_t>& counts);
0242 
0243     std::string filename;
0244     std::map<long int, Directory> histdirs, treedirs;
0245     std::map<long int, KeyBuffer> histkeys, treekeys;
0246     std::map<long int, KeyBuffer> basketkeys;
0247 
0248     std::map<std::string, std::vector<StreamerInfo>> streamerInfo;
0249 };
0250 
0251 class ROOTFilterPrivate {
0252 public:
0253     ROOTFilterPrivate();
0254     /**
0255      * @brief Read data from the currently selected histogram
0256      *
0257      * The ROOT file is kept open until the file name is changed
0258      */
0259     void readDataFromFile(const QString& fileName, AbstractDataSource* dataSource, AbstractFileFilter::ImportMode importMode);
0260     /// Currently writing to ROOT files is not supported
0261     void write(const QString& fileName, AbstractDataSource*);
0262 
0263     /// List names of histograms contained in ROOT file
0264     ROOTFilter::Directory listHistograms(const QString& fileName);
0265     /// List names of trees contained in ROOT file
0266     ROOTFilter::Directory listTrees(const QString& fileName);
0267     /// List names of leaves contained in ROOT tree
0268     QVector<QStringList> listLeaves(const QString& fileName, quint64 pos);
0269 
0270     /// Get preview data of the currently set histogram
0271     QVector<QStringList> previewCurrentObject(const QString& fileName, int first, int last);
0272 
0273     /// Get the number of bins in the current histogram
0274     int rowsInCurrentObject(const QString& fileName);
0275 
0276     // TODO: needs to be public?
0277     /// Identifier of the current histogram
0278     QString currentObject;
0279     /// First row to read (can be -1, skips the underflow bin 0)
0280     int startRow = -1;
0281     /// Last row to read (can be -1, skips the overflow bin)
0282     int endRow = -1;
0283     /// Columns to read
0284     QVector<QStringList> columns;
0285 
0286 private:
0287     enum class FileType { Invalid = 0, Hist, Tree };
0288     /**
0289      * @brief Parse currentObject to find the corresponding position in the file
0290      *
0291      * @param[in] fileName Name of the file that contains currentObject
0292      * @param[out] pos Position in the file
0293      *
0294      * @return Type of the object
0295      */
0296     FileType currentObjectPosition(const QString& fileName, long int& pos);
0297 
0298     /**
0299      * @brief Parse the internal directory structure of the ROOT file and return a human readable version
0300      *
0301      * @param[in] dataContent Reference to the internal map of directories
0302      * @param[in] nameFunc Pointer to the function that returns a name corresponding to an object position in the file
0303      */
0304     ROOTFilter::Directory listContent(const std::map<long int, ROOTData::Directory>& dataContent, std::string (ROOTData::*nameFunc)(long int));
0305 
0306     /// Checks and updates the current ROOT file path
0307     bool setFile(const QString& fileName);
0308     /// Calls ReadHistogram from ROOTData
0309     std::vector<ROOTData::BinPars> readHistogram(quint64 pos);
0310     /// Calls listEntries from ROOTData
0311     std::vector<double> readTree(quint64 pos, const QString& branchName, const QString& leafName, int element, int last);
0312 
0313     /// Information about currently set ROOT file
0314     struct {
0315         QString name;
0316         QDateTime modified;
0317         qint64 size;
0318     } currentFile;
0319     /// ROOTData instance kept alive while currentFile does not change
0320     std::unique_ptr<ROOTData> currentROOTData;
0321 };
0322 
0323 #endif