File indexing completed on 2024-12-22 05:36:24
0001 <?php 0002 /** 0003 * ocs-webserver 0004 * 0005 * Copyright 2016 by pling GmbH. 0006 * 0007 * This file is part of ocs-webserver. 0008 * 0009 * This program is free software: you can redistribute it and/or modify 0010 * it under the terms of the GNU Affero General Public License as 0011 * published by the Free Software Foundation, either version 3 of the 0012 * License, or (at your option) any later version. 0013 * 0014 * This program is distributed in the hope that it will be useful, 0015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0017 * GNU Affero General Public License for more details. 0018 * 0019 * You should have received a copy of the GNU Affero General Public License 0020 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0021 **/ 0022 0023 /** crawler detection 0024 * @param $USER_AGENT 0025 * @return bool 0026 */ 0027 function crawlerDetect($USER_AGENT) 0028 { 0029 // If the user agent is empty, we assume that it is not a bot. 0030 if (empty($USER_AGENT)) { 0031 return false; 0032 } 0033 0034 $crawlers = array( 0035 array('Googlebot', 'Googlebot'), 0036 array('MSN', 'MSN'), 0037 array('msnbot-media', 'MSN'), 0038 array('bingbot', 'MSN'), 0039 array('MegaIndex.ru' , 'MegaIndex.ru'), 0040 array('Baiduspider', 'Baiduspider'), 0041 array('YandexBot', 'YandexBot'), 0042 array('AhrefsBot', 'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)'), 0043 array('ltx71', 'ltx71'), 0044 array('msnbot', 'MSN'), 0045 array('Rambler', 'Rambler'), 0046 array('Yahoo', 'Yahoo'), 0047 array('AbachoBOT', 'AbachoBOT'), 0048 array('accoona', 'Accoona'), 0049 array('AcoiRobot', 'AcoiRobot'), 0050 array('ASPSeek', 'ASPSeek'), 0051 array('CrocCrawler', 'CrocCrawler'), 0052 array('Dumbot', 'Dumbot'), 0053 array('FAST-WebCrawler', 'FAST-WebCrawler'), 0054 array('GeonaBot', 'GeonaBot'), 0055 array('Gigabot', 'Gigabot'), 0056 array('Lycos', 'Lycos spider'), 0057 array('MSRBOT', 'MSRBOT'), 0058 array('Scooter', 'Altavista robot'), 0059 array('AltaVista', 'Altavista robot'), 0060 array('IDBot', 'ID-Search Bot'), 0061 array('eStyle', 'eStyle Bot'), 0062 array('Scrubby', 'Scrubby robot'), 0063 array('MJ12bot','Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)'), 0064 array('SemrushBot', 'SemrushBot'), 0065 array('bingbot','bingbot'), 0066 array('DotBot','http://www.opensiteexplorer.org/dotbot'), 0067 array('SEOkicks','https://www.seokicks.de/robot.html'), 0068 array('CCBot','CCBot/2.0 (https://commoncrawl.org/faq/)'), 0069 array('Sogou','Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)'), 0070 array('Bytespider','Bytespider;https://zhanzhang.toutiao.com/'), 0071 array('BLEXBot','BLEXBot/1.0; +http://webmeup-crawler.com/'), 0072 array('Applebot','Applebot/0.1; +http://www.apple.com/go/applebot'), 0073 array('serpstatbot','serpstatbot/1.0 (advanced backlink tracking bot; curl/7.58.0; http://serpstatbot.com/; abuse@serpstatbot.com)'), 0074 array('Linespider','Linespider/1.1;+https://lin.ee/4dwXkTH'), 0075 array('Yeti','Yeti/1.1; +http://naver.me/spd'), 0076 array('Feedspot','Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher; like FeedFetcher-Google)'), 0077 array('fantastic_search_engine_crawler','fantastic_search_engine_crawler/2.0 (Linux) fantastic-crawler@umich.edu'), 0078 array('Qwantify','Qwantify/Bleriot/1.1; +https://help.qwant.com/bot'), 0079 array('coccocbot','coccocbot-web/1.0; +http://help.coccoc.com/searchengine'), 0080 array('nagios-plugins','check_http/v2.2.1 (nagios-plugins 2.2.1)'), 0081 array('urlwatch','urlwatch/2.17 (+https://thp.io/2008/urlwatch/info.html)'), 0082 array('Buck','Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html)'), 0083 array('Anitya','Anitya 0.17.2 at release-monitoring.org'), 0084 array('MauiBot','MauiBot (crawler.feedback+dc@gmail.com)'), 0085 array('istellabot','istellabot/t.1.13'), 0086 array('SeznamBot','Mozilla/5.0 (compatible; SeznamBot/3.2-test1; +http://napoveda.seznam.cz/en/seznambot-intro/)'), 0087 array('TelegramBot','TelegramBot (like TwitterBot)'), 0088 array('Synapse','Synapse/1.0.0'), 0089 array('VelenPublicWebCrawler','Mozilla/5.0 (compatible; VelenPublicWebCrawler/1.0; +https://velen.io)'), 0090 array('MagiBot','Mozilla/5.0 (compatible; MagiBot/1.0.0; Matarael; +https://magi.com/bots)'), 0091 array('linkfluence','Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)'), 0092 array('repology','repology-linkchecker/1 (+https://repology.org/bots)'), 0093 array('yacybot','Mozilla/5.0 (compatible; yacybot/1.921/custom +https://searx.everdot.org/about)'), 0094 array('facebookexternalhit','facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'), 0095 array('ZoominfoBot','ZoominfoBot (zoominfobot at zoominfo dot com)'), 0096 array('curl','curl/7.66.0'), 0097 array('ZoomBot','ZoomBot (Linkbot 1.0 http://suite.seozoom.it/bot.html)'), 0098 array('PaperLiBot','Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)'), 0099 array('python-requests','python-requests/2.22.0'), 0100 array('Cliqzbot','Mozilla/5.0 (compatible; Cliqzbot/3.0; +http://cliqz.com/company/cliqzbot)'), 0101 array('YisouSpider','YisouSpider'), 0102 array('trendictionbot','Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20170101 Firefox/67.0'), 0103 array('Jetslide','Mozilla/5.0 (compatible; Jetslide; +http://jetsli.de/crawler)'), 0104 array('Seekport','Mozilla/5.0 (compatible; Seekport Crawler; http://seekport.com/)'), 0105 array('GarlikCrawler','GarlikCrawler/1.2 (http://garlik.com/, crawler@garlik.com)'), 0106 array('Mb2345Browser','Mozilla/5.0(Linux;Android 5.1.1;OPPO A33 Build/LMY47V;wv) AppleWebKit/537.36(KHTML,link Gecko) Version/4.0 Chrome/42.0.2311.138 Mobile Safari/537.36 Mb2345Browser/9.0'), 0107 array('LieBaoFast','Mozilla/5.0(Linux;Android 5.1.1;OPPO A33 Build/LMY47V;wv) AppleWebKit/537.36(KHTML,link Gecko) Version/4.0 Chrome/43.0.2357.121 Mobile Safari/537.36 LieBaoFast/4.51.3'), 0108 array('TBS/043602','Mozilla/5.0 (Linux; Android 7.0; FRD-AL00 Build/HUAWEIFRD-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043602 Safari/537.36 MicroMessenger/6.5.16.1120 NetType/WIFI Language/zh_CN'), 0109 array('zh-CN;OPPO A33 Build/LMY47V','Mozilla/5.0(Linux;U;Android 5.1.1;zh-CN;OPPO A33 Build/LMY47V) AppleWebKit/537.36(KHTML,like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.7.0.953 Mobile Safari/537.36') 0110 ); 0111 0112 foreach ($crawlers as $c) 0113 { 0114 if (stristr($USER_AGENT, $c[0])) 0115 { 0116 return true; 0117 } 0118 } 0119 0120 return false; 0121 }