File indexing completed on 2024-05-19 04:19:32
0001 #!/usr/bin/env python3 0002 import os 0003 import re 0004 import sys 0005 import time 0006 import hashlib 0007 import argparse 0008 import requests 0009 from urllib.parse import urljoin, urlparse 0010 from xml.etree import ElementTree 0011 0012 0013 def download(indent, url): 0014 res = requests.get(url) 0015 if res.status_code != 200: 0016 error(indent, "Failed to fetch %s (%s)" % (url, res.status_code)) 0017 return None 0018 time.sleep(0.1) 0019 return res.content 0020 0021 0022 def log(indent, msg): 0023 print(" " * (indent*4) + msg) 0024 0025 0026 def error(indent, msg): 0027 global retcode 0028 retcode += 1 0029 sys.stderr.write(" " * (indent*4) + "\x1b[31m%s\x1b[m\n" % msg) 0030 0031 0032 class ReleaseRow: 0033 def __init__(self, name, package_url, hash_algo, hash_url, notes_url): 0034 self.name = name 0035 self.package_url = package_url 0036 self.hash_algo = hash_algo 0037 self.hash_url = hash_url 0038 self.notes_url = notes_url 0039 0040 def print(self): 0041 log(1, self.name) 0042 log(2, self.package_url) 0043 if self.notes_url: 0044 log(2, self.notes_url) 0045 log(2, self.hash_algo) 0046 log(3, self.hash_url) 0047 0048 def check_hash(self): 0049 log(1, self.name) 0050 0051 hasher = getattr(hashlib, self.hash_algo.lower(), None) 0052 if not hasher: 0053 error(2, "Unknown hashing algorithm: %s" % self.hash_algo) 0054 return 0055 0056 checksum = download(2, self.hash_url) 0057 if checksum is None: 0058 return 0059 0060 data = download(2, self.package_url) 0061 if data is None: 0062 return 0063 0064 if self.notes_url: 0065 download(2, self.notes_url) 0066 0067 eval_hash = hasher(data).hexdigest() 0068 dl_hash = re.search(b'^[0-9a-f]+', checksum).group(0).decode("utf-8") 0069 if dl_hash != eval_hash: 0070 error(2, "Hash mismatch: got %s, should be %s" % (dl_hash, eval_hash)) 0071 0072 def download_url(self, path, url, basename): 0073 full_path = os.path.join(path, basename) 0074 with open(full_path, "wb") as f: 0075 f.write(download(1, url)) 0076 0077 def download(self, path): 0078 basename = os.path.basename(urlparse(self.package_url).path) 0079 print(basename) 0080 self.download_url(path, self.package_url, basename) 0081 basename_hash = "%s.%s.txt" % (basename, self.hash_algo) 0082 self.download_url(path, self.hash_url, basename_hash) 0083 0084 0085 class ReleaseTable: 0086 def __init__(self): 0087 self.rows = [] 0088 0089 def get_link(self, td, row, col): 0090 found = None 0091 0092 for a in td: 0093 if a.tag == "a": 0094 if found is not None: 0095 error(2, "Too many links in row %s col %s" % (row, col)) 0096 return None 0097 found = a 0098 0099 if found is None: 0100 error(2, "No link in row %s col %s" % (row, col)) 0101 return None 0102 0103 return [found.attrib["href"], found.text] 0104 0105 def check_download_table_row_common(self, e, i): 0106 if e.tag != "tr": 0107 error(1, "Expected <tr> for row %s" % i) 0108 return False 0109 if len(e) != 3: 0110 error(1, "Unexpected number of elements in row %s" % i) 0111 return False 0112 return True 0113 0114 def check_download_table_row_head(self, e): 0115 if self.check_download_table_row_common(e, 0): 0116 if e[0].tag != "th": 0117 error(1, "First row should be headers") 0118 0119 def check_download_table_row_body(self, e, i, rel_url): 0120 if self.check_download_table_row_common(e, i): 0121 if any(c.tag != "td" for c in e): 0122 error(1, "Invalid elements in row %s" % i) 0123 0124 links = [self.get_link(td, i, col) for col, td in enumerate(e)] 0125 if any(x is None for x in links): 0126 return 0127 0128 download_link, download_name = links[0] 0129 sha_link, sha_algo = links[1] 0130 notes_link = links[2][0] 0131 0132 if notes_link.startswith("#"): 0133 notes_link = urljoin(rel_url, notes_link) 0134 else: 0135 notes_link = None 0136 0137 self.rows.append( 0138 ReleaseRow(download_name, download_link, sha_algo, sha_link, notes_link) 0139 ) 0140 0141 def check_download_table(self, description, rel_url=""): 0142 match_s = re.search("<table", description) 0143 match_e = re.search("</table>", description) 0144 if not match_s or not match_e: 0145 error(1, "No download table") 0146 return 0147 0148 html_text = description[match_s.start(0):match_e.end(0)] 0149 try: 0150 html = ElementTree.fromstring(html_text) 0151 except ElementTree.ParseError as e: 0152 print(html_text) 0153 error(1, "Invalid download table: %s" % e) 0154 return 0155 0156 if len(html) < 2: 0157 error(1, "Too few row in the table") 0158 return 0159 0160 if html[0].tag == "tr": 0161 for i, e in enumerate(html): 0162 if i == 0: 0163 self.check_download_table_row_head(e) 0164 else: 0165 self.check_download_table_row_body(e, i, rel_url) 0166 else: 0167 if html[0].tag != "thead": 0168 error(1, "Invalid html table (missing head)") 0169 elif len(html[0]) != 1: 0170 error(1, "Wrong number of rows in the table header") 0171 else: 0172 self.check_download_table_row_head(html[0][0]) 0173 0174 if html[1].tag != "tbody": 0175 error(1, "Invalid html table (missing body)") 0176 elif len(html[1]) < 1: 0177 error(1, "Wrong number of rows in the table body") 0178 else: 0179 for i, row in enumerate(html[1]): 0180 self.check_download_table_row_body(row, i, rel_url) 0181 0182 def check_download_page(self): 0183 url = "https://glaxnimate.mattbas.org/download" 0184 html = download(1, url) 0185 if not html: 0186 return 0187 self.check_download_table(html.decode("utf-8"), url) 0188 0189 0190 #def check_tag(): 0191 #log(0, "Checking tag") 0192 #response, status = api.project_request("GET", ["repository", "tags"], ns.version) 0193 #if status != 200: 0194 #error(1, "No tag") 0195 #return 0196 0197 #if "release" not in response or "description" not in response["release"]: 0198 #error(1, "No release") 0199 #return 0200 0201 #log(0, "Validating Release Page Downloads") 0202 #description = response["release"]["description"] 0203 #check_download_table(description) 0204 0205 0206 retcode = 0 0207 parser = argparse.ArgumentParser() 0208 #parser.add_argument("version") 0209 parser.add_argument("--action", default="check_hash", choices=["check_hash", "list", "download"]) 0210 parser.add_argument("--download-path", default=".") 0211 parser.add_argument("--package", default=None, nargs="+") 0212 0213 ns = parser.parse_args() 0214 0215 #api = GitlabApi() 0216 #log(0, "Checking %s" % ns.version) 0217 #check_tag() 0218 if retcode == 0: 0219 release = ReleaseTable() 0220 release.check_download_page() 0221 0222 rows = release.rows 0223 if ns.package: 0224 rows = [row for row in rows if row.name in ns.package] 0225 0226 if len(rows) != len(ns.package): 0227 error(0, "Not all packages found") 0228 log(0, "Available packages") 0229 for row in release.rows: 0230 log(1, row.name) 0231 log(0, "Requested packages") 0232 for row in ns.package: 0233 log(1, row) 0234 0235 if ns.action == "check_hash": 0236 log(0, "Validating Website Download Page") 0237 for row in rows: 0238 row.check_hash() 0239 elif ns.action == "list": 0240 for row in rows: 0241 row.print() 0242 elif ns.action == "download": 0243 for row in rows: 0244 row.download(ns.download_path) 0245 sys.exit(retcode)