File indexing completed on 2024-05-19 15:24:57

0001 #!/usr/bin/env python3
0002 import os
0003 import re
0004 import sys
0005 import time
0006 import hashlib
0007 import argparse
0008 import requests
0009 from urllib.parse import urljoin, urlparse
0010 from xml.etree import ElementTree
0011 
0012 
0013 def download(indent, url):
0014     res = requests.get(url)
0015     if res.status_code != 200:
0016         error(indent, "Failed to fetch %s (%s)" % (url, res.status_code))
0017         return None
0018     time.sleep(0.1)
0019     return res.content
0020 
0021 
0022 def log(indent, msg):
0023     print(" " * (indent*4) + msg)
0024 
0025 
0026 def error(indent, msg):
0027     global retcode
0028     retcode += 1
0029     sys.stderr.write(" " * (indent*4) + "\x1b[31m%s\x1b[m\n" % msg)
0030 
0031 
0032 class ReleaseRow:
0033     def __init__(self, name, package_url, hash_algo, hash_url, notes_url):
0034         self.name = name
0035         self.package_url = package_url
0036         self.hash_algo = hash_algo
0037         self.hash_url = hash_url
0038         self.notes_url = notes_url
0039 
0040     def print(self):
0041         log(1, self.name)
0042         log(2, self.package_url)
0043         if self.notes_url:
0044             log(2, self.notes_url)
0045         log(2, self.hash_algo)
0046         log(3, self.hash_url)
0047 
0048     def check_hash(self):
0049         log(1, self.name)
0050 
0051         hasher = getattr(hashlib, self.hash_algo.lower(), None)
0052         if not hasher:
0053             error(2, "Unknown hashing algorithm: %s" % self.hash_algo)
0054             return
0055 
0056         checksum = download(2, self.hash_url)
0057         if checksum is None:
0058             return
0059 
0060         data = download(2, self.package_url)
0061         if data is None:
0062             return
0063 
0064         if self.notes_url:
0065             download(2, self.notes_url)
0066 
0067         eval_hash = hasher(data).hexdigest()
0068         dl_hash = re.search(b'^[0-9a-f]+', checksum).group(0).decode("utf-8")
0069         if dl_hash != eval_hash:
0070             error(2, "Hash mismatch: got %s, should be %s" % (dl_hash, eval_hash))
0071 
0072     def download_url(self, path, url, basename):
0073         full_path = os.path.join(path, basename)
0074         with open(full_path, "wb") as f:
0075             f.write(download(1, url))
0076 
0077     def download(self, path):
0078         basename = os.path.basename(urlparse(self.package_url).path)
0079         print(basename)
0080         self.download_url(path, self.package_url, basename)
0081         basename_hash = "%s.%s.txt" % (basename, self.hash_algo)
0082         self.download_url(path, self.hash_url, basename_hash)
0083 
0084 
0085 class ReleaseTable:
0086     def __init__(self):
0087         self.rows = []
0088 
0089     def get_link(self, td, row, col):
0090         found = None
0091 
0092         for a in td:
0093             if a.tag == "a":
0094                 if found is not None:
0095                     error(2, "Too many links in row %s col %s" % (row, col))
0096                     return None
0097                 found = a
0098 
0099         if found is None:
0100             error(2, "No link in row %s col %s" % (row, col))
0101             return None
0102 
0103         return [found.attrib["href"], found.text]
0104 
0105     def check_download_table_row_common(self, e, i):
0106         if e.tag != "tr":
0107             error(1, "Expected <tr> for row %s" % i)
0108             return False
0109         if len(e) != 3:
0110             error(1, "Unexpected number of elements in row %s" % i)
0111             return False
0112         return True
0113 
0114     def check_download_table_row_head(self, e):
0115         if self.check_download_table_row_common(e, 0):
0116             if e[0].tag != "th":
0117                 error(1, "First row should be headers")
0118 
0119     def check_download_table_row_body(self, e, i, rel_url):
0120         if self.check_download_table_row_common(e, i):
0121             if any(c.tag != "td" for c in e):
0122                 error(1, "Invalid elements in row %s" % i)
0123 
0124             links = [self.get_link(td, i, col) for col, td in enumerate(e)]
0125             if any(x is None for x in links):
0126                 return
0127 
0128             download_link, download_name = links[0]
0129             sha_link, sha_algo = links[1]
0130             notes_link = links[2][0]
0131 
0132             if notes_link.startswith("#"):
0133                 notes_link = urljoin(rel_url, notes_link)
0134             else:
0135                 notes_link = None
0136 
0137             self.rows.append(
0138                 ReleaseRow(download_name, download_link, sha_algo, sha_link, notes_link)
0139             )
0140 
0141     def check_download_table(self, description, rel_url=""):
0142         match_s = re.search("<table", description)
0143         match_e = re.search("</table>", description)
0144         if not match_s or not match_e:
0145             error(1, "No download table")
0146             return
0147 
0148         html_text = description[match_s.start(0):match_e.end(0)]
0149         try:
0150             html = ElementTree.fromstring(html_text)
0151         except ElementTree.ParseError as e:
0152             print(html_text)
0153             error(1, "Invalid download table: %s" % e)
0154             return
0155 
0156         if len(html) < 2:
0157             error(1, "Too few row in the table")
0158             return
0159 
0160         if html[0].tag == "tr":
0161             for i, e in enumerate(html):
0162                 if i == 0:
0163                     self.check_download_table_row_head(e)
0164                 else:
0165                     self.check_download_table_row_body(e, i, rel_url)
0166         else:
0167             if html[0].tag != "thead":
0168                 error(1, "Invalid html table (missing head)")
0169             elif len(html[0]) != 1:
0170                 error(1, "Wrong number of rows in the table header")
0171             else:
0172                 self.check_download_table_row_head(html[0][0])
0173 
0174             if html[1].tag != "tbody":
0175                 error(1, "Invalid html table (missing body)")
0176             elif len(html[1]) < 1:
0177                 error(1, "Wrong number of rows in the table body")
0178             else:
0179                 for i, row in enumerate(html[1]):
0180                     self.check_download_table_row_body(row, i, rel_url)
0181 
0182     def check_download_page(self):
0183         url = "https://glaxnimate.mattbas.org/download"
0184         html = download(1, url)
0185         if not html:
0186             return
0187         self.check_download_table(html.decode("utf-8"), url)
0188 
0189 
0190 #def check_tag():
0191     #log(0, "Checking tag")
0192     #response, status = api.project_request("GET", ["repository", "tags"], ns.version)
0193     #if status != 200:
0194         #error(1, "No tag")
0195         #return
0196 
0197     #if "release" not in response or "description" not in response["release"]:
0198         #error(1, "No release")
0199         #return
0200 
0201     #log(0, "Validating Release Page Downloads")
0202     #description = response["release"]["description"]
0203     #check_download_table(description)
0204 
0205 
0206 retcode = 0
0207 parser = argparse.ArgumentParser()
0208 #parser.add_argument("version")
0209 parser.add_argument("--action", default="check_hash", choices=["check_hash", "list", "download"])
0210 parser.add_argument("--download-path", default=".")
0211 parser.add_argument("--package", default=None, nargs="+")
0212 
0213 ns = parser.parse_args()
0214 
0215 #api = GitlabApi()
0216 #log(0, "Checking %s" % ns.version)
0217 #check_tag()
0218 if retcode == 0:
0219     release = ReleaseTable()
0220     release.check_download_page()
0221 
0222     rows = release.rows
0223     if ns.package:
0224         rows = [row for row in rows if row.name in ns.package]
0225 
0226         if len(rows) != len(ns.package):
0227             error(0, "Not all packages found")
0228             log(0, "Available packages")
0229             for row in release.rows:
0230                 log(1, row.name)
0231             log(0, "Requested packages")
0232             for row in ns.package:
0233                 log(1, row)
0234 
0235     if ns.action == "check_hash":
0236         log(0, "Validating Website Download Page")
0237         for row in rows:
0238             row.check_hash()
0239     elif ns.action == "list":
0240         for row in rows:
0241             row.print()
0242     elif ns.action == "download":
0243         for row in rows:
0244             row.download(ns.download_path)
0245 sys.exit(retcode)