File indexing completed on 2024-11-24 04:44:41
0001 #!/usr/bin/env python3 0002 # SPDX-FileCopyrightText: 2022 Volker Krause <vkrause@kde.org> 0003 # SPDX-License-Identifier: LGPL-2.0-or-later 0004 0005 import argparse 0006 import csv 0007 import json 0008 import os 0009 import requests 0010 import zipfile 0011 0012 def writeToFile(fileName, content): 0013 f = open(fileName, 'wb') 0014 f.write(content) 0015 f.close(); 0016 0017 def countDepth(n): 0018 depth = 0 0019 while n[2*depth:].startswith('- '): 0020 depth += 1 0021 return depth + 2 0022 0023 def findNodes(node, func): 0024 res = [] 0025 if isinstance(node, list): 0026 for child in node: 0027 res += findNodes(child, func) 0028 else: 0029 if func(node): 0030 res.append(node) 0031 else: 0032 res += findNodes(node.get('children', []), func) 0033 return res; 0034 0035 def toCodeMap(node): 0036 l = [] 0037 if isinstance(node, list): 0038 for child in node: 0039 l += toCodeMap(child) 0040 else: 0041 if 'code' in node: 0042 l.append((node['code'], node['name'])) 0043 l += toCodeMap(node.get('children', [])) 0044 return l 0045 0046 0047 parser = argparse.ArgumentParser(description='Download and filter WHO ICD-11 data.') 0048 parser.add_argument('--output', type=str, required=True, help='Path to which the output should be written to') 0049 arguments = parser.parse_args() 0050 0051 # download WHO ICD-11 archive 0052 icd11archive = 'simpletabulation.zip' 0053 req = requests.get('https://icd.who.int/browse11/Downloads/Download?fileName=simpletabulation.zip') 0054 writeToFile(icd11archive, req.content) 0055 0056 # unpack the xls file in there 0057 zipFile = zipfile.ZipFile(icd11archive, 'r') 0058 icd11xls = 'simpletabulation.xlsx' 0059 writeToFile(icd11xls, zipFile.read(icd11xls)) 0060 0061 # convert xls to csv 0062 icd11csv = 'simpletabulation.csv' 0063 os.system(f"libreoffice --headless --convert-to csv --infilter=CSV:44,34,76 {icd11xls}") 0064 with open(icd11csv, newline='') as f: 0065 icd11table = list(csv.reader(f, delimiter=',', quotechar='"'))[1:] 0066 0067 # load ICD-11 tree 0068 root = { 'children': [], 'kind': 'root', 'name': '' } 0069 stack = [root] 0070 for row in icd11table: 0071 kind = row[5]; 0072 depth = countDepth(row[4]) 0073 0074 node = {} 0075 node['kind'] = kind 0076 node['name'] = row[4].lstrip('- ') 0077 if kind == 'category': 0078 node['code'] = row[2] 0079 elif kind == 'block': 0080 node['block'] = row[3] 0081 elif kind == 'chapter': 0082 node['chapter'] = row[8] 0083 else: 0084 continue 0085 if row[10] != 'True': 0086 node['children'] = [] 0087 0088 while len(stack) >= depth: 0089 stack.pop() 0090 parent = stack[-1] 0091 parent['children'].append(node) 0092 stack.append(node) 0093 0094 # extract relevant disease codes 0095 # also: block == Virus? 0096 diseases = findNodes(root, lambda n: n['kind'] == 'chapter' and (n['chapter'] == '01' or n['chapter'] == '25')) 0097 diseaseCodes = toCodeMap(diseases) 0098 diseaseCodes = list(filter(lambda entry: len(entry[0]) == 4, diseaseCodes)) # drop sub-categories 0099 diseaseCodes.sort(key=lambda entry: entry[0]) 0100 print("Diseases: ", len(diseaseCodes)) 0101 writeToFile(os.path.join(arguments.output, 'diseases.json'), json.dumps(dict(diseaseCodes)).encode('utf-8')) 0102 0103 # extract relevant medication codes 0104 vaccines = findNodes(root, lambda n: n['kind'] == 'block' and n['name'] == 'Vaccines') 0105 vaccineCodes = toCodeMap(vaccines) 0106 vaccineCodes.sort(key=lambda entry: entry[0]) 0107 print("Vaccines:", len(vaccineCodes)) 0108 writeToFile(os.path.join(arguments.output, 'vaccines.json'), json.dumps(dict(vaccineCodes)).encode('utf-8'))