File indexing completed on 2024-05-19 05:01:19

0001 /*
0002     This file is part of the KDE project.
0003 
0004     SPDX-FileCopyrightText: 2022 Stefano Crocco <stefano.crocco@alice.it>
0005 
0006     SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
0007 */
0008 
0009 #ifndef NAVIGATIONRECORDER_H
0010 #define NAVIGATIONRECORDER_H
0011 
0012 #include <QObject>
0013 #include <QUrl>
0014 #include <QPointer>
0015 #include <QWebEngineUrlRequestInfo>
0016 #include <QMultiHash>
0017 
0018 class WebEnginePage;
0019 
0020 /**
0021  * Class used to integrate information from WebEnginePage::acceptNavigationRequest() and from
0022  * WebEngineUrlRequestInterceptor::interceptRequest()
0023  *
0024  * Sometimes it is important to know both which page requested navigating to a given URL and more
0025  * detailed information about the request itself
0026  * (for example, whether it is a GET or a POST request). Unfortunately, this kind information is only
0027  * available to WebEngineUrlRequestInterceptor::interceptRequest() which, in turn, does not have
0028  * information about which page made a given request. This class provides a workaround for such situation.
0029  *
0030  * When a WebEnginePage emits the \link WebEnginePage::mainFrameNavigationRequested mainFrameNavigationRequested() \endlink
0031  * signal, the corresponding URL, together with the page, is stored in the ::m_pendingNavigations instance variable.
0032  * WebEngineUrlRequestInterceptor::interceptRequest() calls recordRequestDetails() providing
0033  * information about the request. This information is associated which made a request for the given
0034  * URL and which is still pending and the relevant information is stored. That request is then
0035  * removed from ::m_pendingNavigations. When the page emits the `loadFinished()` signal, or when the
0036  * page is deleted, all information corresponding to its URL are removed.
0037  *
0038  * @note Currently, this class only records whether the request used the POST method.
0039  * @warning The algorithm described above is heuristic and it cannot guarantee that the association
0040  * between page and request information is always correct. In particular, if two different pages
0041  * request the same URL before \link WebEngineUrlRequestInterceptor::interceptRequest interceptRequest() \endlink
0042  * is called, when \link WebEngineUrlRequestInterceptor::interceptRequest interceptRequest() \endlink is called,
0043  * there will be no way to know which page the request refers to.
0044  * In this case, NavigationRecorder makes
0045  * the arbitrary assumption that the first call to \link WebEngineUrlRequestInterceptor::interceptRequest interceptRequest() \endlink
0046  * corresponds to the first call to \link WebEnginePage::acceptNavigationRequest() acceptNavigationRequest()\endlink.
0047  */
0048 class NavigationRecorder : public QObject
0049 {
0050     Q_OBJECT
0051 
0052 public:
0053     /**
0054      * @brief Constructor
0055      * @param parent the parent object
0056      */
0057     NavigationRecorder(QObject *parent=nullptr);
0058 
0059     /**
0060      * @brief Destructor
0061      */
0062     ~NavigationRecorder();
0063 
0064     /**
0065      * @brief Registers a page so that the NavigationRecorder can connect to its signals
0066      * @param page the page to register
0067      */
0068     void registerPage(WebEnginePage *page);
0069 
0070     /**
0071      * @brief Records details about a navigation request
0072      *
0073      * This method is called by WebEngineUrlRequestInterceptor when it intercepts a request for
0074      * a main frame. It matches the URL of the request with the URL of a previous call to
0075      * \link WebEnginePage::acceptNavigationRequest acceptNavigationRequest()\endlink and stores
0076      * information about it.
0077      * @param info the information about the request
0078      */
0079     void recordRequestDetails(const QWebEngineUrlRequestInfo &info);
0080 
0081     /**
0082      * @brief Whether the request made by a given page for a given URl is a POST request or not
0083      * @param url the requested URL
0084      * @param page the page which made the request
0085      * @return @c true if a request for @p url by @p page with POST method has been found and `false`
0086      * otherwise
0087      * @note a `false` return value may mean that no request for @p url by @p page has been
0088      * encountered or that it used a different method (typically GET).
0089      * @warning as explained in the general description of the class, the algorithm to associate
0090      * requests with pages need not always be 100% accurate
0091      */
0092     bool isPostRequest(const QUrl &url, WebEnginePage *page) const;
0093 
0094 public slots:
0095     /**
0096      * @brief Method called in response to the `WebEnginePage::loadFinished()` signal
0097      *
0098      * It removes all information about the given navigation.
0099      * @param page the page which finished loading
0100      * @param url the url
0101      */
0102     void recordNavigationFinished(WebEnginePage *page, const QUrl &url);
0103 
0104 private slots:
0105     /**
0106      * @brief Removes all references to deleted pages from the stored data
0107      */
0108     void removePage(QObject*);
0109 
0110     /**
0111      * @brief Records a navigation request from a page
0112      * @param page the page which requested the navigation
0113      * @param url the requested URL
0114      */
0115     void recordNavigation(WebEnginePage *page, const QUrl &url);
0116 
0117 private:
0118     /**
0119      * @brief A hash containing all POST requests and the pages which made them, grouped by URL
0120      */
0121     QMultiHash<QUrl, QPointer<WebEnginePage>> m_postNavigations;
0122 
0123     /**
0124      * @brief A hash containing all the navigation requests for which no call to
0125      * \link WebEnginePage::acceptNavigationRequest acceptNavigationRequest()\endlink has yet been
0126      * made, * grouped by URLs
0127      */
0128     QMultiHash<QUrl, QPointer<WebEnginePage>> m_pendingNavigations;
0129 };
0130 
0131 #endif // NAVIGATIONRECORDER_H