File indexing completed on 2024-11-17 04:55:18
0001 /* 0002 SPDX-License-Identifier: MPL-2.0 0003 */ 0004 0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 0006 * This Source Code Form is subject to the terms of the Mozilla Public 0007 * License, v. 2.0. If a copy of the MPL was not distributed with this 0008 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 0009 0010 #include <ctype.h> 0011 #include "./protocol.h" 0012 0013 enum ProtocolParseState { 0014 ProtocolParseStateStart, 0015 ProtocolParseStateReadingBlob, 0016 ProtocolParseStatePostBlob, 0017 ProtocolParseStateReadingProtoWebSocket, 0018 ProtocolParseStateReadingProtoHTTP, 0019 ProtocolParseStatePostProto, 0020 ProtocolParseStateReadingSeperator, 0021 }; 0022 0023 /** 0024 * Checks to see if a URL is "blockable". 0025 * 0026 * Blockable URLs are ones that use one of the following protocols (any of 0027 * which can be prefixed by "blob:") 0028 * - http 0029 * - https 0030 * - ws 0031 * - wss 0032 */ 0033 bool isBlockableProtocol(const char *url, int urlLen) { 0034 // First check to see if this is a blob URL. If the URL is very short, 0035 // then trivially it isn't of the above protocols. 0036 if (urlLen <= 5) { 0037 return false; 0038 } 0039 0040 const char *curChar = url; 0041 int totalCharsRead = 0; 0042 int numCharsReadInState; 0043 char lowerChar; 0044 ProtocolParseState parseState = ProtocolParseStateStart; 0045 0046 // The below loop encodes a state machine. Free transitions between states 0047 // are continues. States that consume input "break" so that the can 0048 // share the common incrementing statements at the bottom of the loop. 0049 // 0050 // Its not quite as optimized as possible (some state transitions could 0051 // be collapsed) but its written in this _slightly_ more verbose way 0052 // to make it easier to grok. 0053 while (true) { 0054 switch (parseState) { 0055 case ProtocolParseStateStart: 0056 if (tolower(*curChar) == 'b') { 0057 parseState = ProtocolParseStateReadingBlob; 0058 continue; 0059 } 0060 // Intentional fall through 0061 [[fallthrough]]; 0062 case ProtocolParseStatePostBlob: 0063 lowerChar = tolower(*curChar); 0064 if (lowerChar == 'w') { 0065 parseState = ProtocolParseStateReadingProtoWebSocket; 0066 continue; 0067 } 0068 if (lowerChar == 'h') { 0069 parseState = ProtocolParseStateReadingProtoHTTP; 0070 continue; 0071 } 0072 // If we're in ProtocolParseStateStart and didn't see "blob:", 0073 // "ws" or "http", or in ProtocolParseStatePostBlob 0074 // and don't see "ws" or "http" starting, then the URL doesn't match 0075 // any protocol we're interested in. 0076 return false; 0077 0078 case ProtocolParseStateReadingBlob: 0079 if (tolower(*curChar) == 'b' && 0080 tolower(*(curChar + 1)) == 'l' && 0081 tolower(*(curChar + 2)) == 'o' && 0082 tolower(*(curChar + 3)) == 'b' && 0083 tolower(*(curChar + 4)) == ':') { 0084 parseState = ProtocolParseStatePostBlob; 0085 numCharsReadInState = 5; 0086 break; 0087 } 0088 // Unexpected character read when consuming "blob:" 0089 return false; 0090 0091 case ProtocolParseStateReadingProtoHTTP: 0092 if (tolower(*curChar) == 'h' && 0093 tolower(*(curChar + 1)) == 't' && 0094 tolower(*(curChar + 2)) == 't' && 0095 tolower(*(curChar + 3)) == 'p') { 0096 parseState = ProtocolParseStatePostProto; 0097 numCharsReadInState = 4; 0098 break; 0099 } 0100 // Unexpected character read when consuming "http" 0101 return false; 0102 0103 case ProtocolParseStateReadingProtoWebSocket: 0104 if (tolower(*curChar) == 'w' && 0105 tolower(*(curChar + 1)) == 's') { 0106 parseState = ProtocolParseStatePostProto; 0107 numCharsReadInState = 2; 0108 break; 0109 } 0110 // Unexpected character read when consuming "ws" 0111 return false; 0112 0113 case ProtocolParseStatePostProto: 0114 if (tolower(*curChar) == 's') { 0115 parseState = ProtocolParseStateReadingSeperator; 0116 numCharsReadInState = 1; 0117 break; 0118 } 0119 [[fallthrough]]; 0120 // Intentional fall through 0121 case ProtocolParseStateReadingSeperator: 0122 if (*curChar == ':' && 0123 (*(curChar + 1)) == '/' && 0124 (*(curChar + 2)) == '/') { 0125 return true; 0126 } 0127 // Unexpected character read when consuming "://" 0128 return false; 0129 } 0130 0131 // If we've read the entire URL and we haven't been able to determine 0132 // the protocol, then its trivially not a blockable protocol. 0133 totalCharsRead += numCharsReadInState; 0134 if (totalCharsRead >= urlLen) { 0135 return false; 0136 } 0137 curChar += numCharsReadInState; 0138 } 0139 } 0140