File indexing completed on 2024-11-24 04:44:41

0001 #!/usr/bin/env python3
0002 # SPDX-FileCopyrightText: 2022 Volker Krause <vkrause@kde.org>
0003 # SPDX-License-Identifier: LGPL-2.0-or-later
0004 
0005 import argparse
0006 import csv
0007 import json
0008 import os
0009 import requests
0010 import zipfile
0011 
0012 def writeToFile(fileName, content):
0013     f = open(fileName, 'wb')
0014     f.write(content)
0015     f.close();
0016 
0017 def countDepth(n):
0018     depth = 0
0019     while n[2*depth:].startswith('- '):
0020         depth += 1
0021     return depth + 2
0022 
0023 def findNodes(node, func):
0024     res = []
0025     if isinstance(node, list):
0026         for child in node:
0027             res += findNodes(child, func)
0028     else:
0029         if func(node):
0030             res.append(node)
0031         else:
0032             res += findNodes(node.get('children', []), func)
0033     return res;
0034 
0035 def toCodeMap(node):
0036     l = []
0037     if isinstance(node, list):
0038         for child in node:
0039             l += toCodeMap(child)
0040     else:
0041         if 'code' in node:
0042             l.append((node['code'], node['name']))
0043         l += toCodeMap(node.get('children', []))
0044     return l
0045 
0046 
0047 parser = argparse.ArgumentParser(description='Download and filter WHO ICD-11 data.')
0048 parser.add_argument('--output', type=str, required=True, help='Path to which the output should be written to')
0049 arguments = parser.parse_args()
0050 
0051 # download WHO ICD-11 archive
0052 icd11archive = 'simpletabulation.zip'
0053 req = requests.get('https://icd.who.int/browse11/Downloads/Download?fileName=simpletabulation.zip')
0054 writeToFile(icd11archive, req.content)
0055 
0056 # unpack the xls file in there
0057 zipFile = zipfile.ZipFile(icd11archive, 'r')
0058 icd11xls = 'simpletabulation.xlsx'
0059 writeToFile(icd11xls, zipFile.read(icd11xls))
0060 
0061 # convert xls to csv
0062 icd11csv = 'simpletabulation.csv'
0063 os.system(f"libreoffice --headless --convert-to csv --infilter=CSV:44,34,76 {icd11xls}")
0064 with open(icd11csv, newline='') as f:
0065     icd11table = list(csv.reader(f, delimiter=',', quotechar='"'))[1:]
0066 
0067 # load ICD-11 tree
0068 root = { 'children': [], 'kind': 'root', 'name': '' }
0069 stack = [root]
0070 for row in icd11table:
0071     kind = row[5];
0072     depth = countDepth(row[4])
0073 
0074     node = {}
0075     node['kind'] = kind
0076     node['name'] = row[4].lstrip('- ')
0077     if kind == 'category':
0078         node['code'] = row[2]
0079     elif kind == 'block':
0080         node['block'] = row[3]
0081     elif kind == 'chapter':
0082         node['chapter'] = row[8]
0083     else:
0084         continue
0085     if row[10] != 'True':
0086         node['children'] = []
0087 
0088     while len(stack) >= depth:
0089         stack.pop()
0090     parent = stack[-1]
0091     parent['children'].append(node)
0092     stack.append(node)
0093 
0094 # extract relevant disease codes
0095 # also: block == Virus?
0096 diseases = findNodes(root, lambda n: n['kind'] == 'chapter' and (n['chapter'] == '01' or n['chapter'] == '25'))
0097 diseaseCodes = toCodeMap(diseases)
0098 diseaseCodes = list(filter(lambda entry: len(entry[0]) == 4, diseaseCodes)) # drop sub-categories
0099 diseaseCodes.sort(key=lambda entry: entry[0])
0100 print("Diseases: ", len(diseaseCodes))
0101 writeToFile(os.path.join(arguments.output, 'diseases.json'), json.dumps(dict(diseaseCodes)).encode('utf-8'))
0102 
0103 # extract relevant medication codes
0104 vaccines = findNodes(root, lambda n: n['kind'] == 'block' and n['name'] == 'Vaccines')
0105 vaccineCodes = toCodeMap(vaccines)
0106 vaccineCodes.sort(key=lambda entry: entry[0])
0107 print("Vaccines:", len(vaccineCodes))
0108 writeToFile(os.path.join(arguments.output, 'vaccines.json'), json.dumps(dict(vaccineCodes)).encode('utf-8'))