File indexing completed on 2023-05-30 10:40:45
0001 #!/usr/bin/python 0002 # 0003 # GCompris - check_voices.py 0004 # 0005 # Copyright (C) 2015 Bruno Coudoin <bruno.coudoin@gcompris.net> 0006 # 0007 # This program is free software; you can redistribute it and/or modify 0008 # it under the terms of the GNU General Public License as published by 0009 # the Free Software Foundation; either version 3 of the License, or 0010 # (at your option) any later version. 0011 # 0012 # This program is distributed in the hope that it will be useful, 0013 # but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0015 # GNU General Public License for more details. 0016 # 0017 # You should have received a copy of the GNU General Public License 0018 # along with this program; if not, see <https://www.gnu.org/licenses/>. 0019 # 0020 # 0021 # The output is in markdown. A web page can be generated with: 0022 # ./check_voices.py ../gcompris-kde 0023 # 0024 # (Requires python-markdown to be installed) 0025 # 0026 import os 0027 import sys 0028 import re 0029 import copy 0030 import json 0031 import codecs 0032 from io import StringIO 0033 from datetime import date 0034 import glob 0035 0036 import markdown 0037 import polib 0038 from PyQt5.QtCore import QCoreApplication, QUrl 0039 from PyQt5.QtQml import QQmlComponent, QQmlEngine 0040 0041 if len(sys.argv) < 2: 0042 print("Usage: check_voices.py path_to_gcompris [-v] [-nn]") 0043 print(" -v: verbose, show also files that are fine") 0044 print(" -nn: not needed, show extra file in the voice directory") 0045 sys.exit(1) 0046 0047 verbose = '-v' in sys.argv 0048 notneeded = '-nn' in sys.argv 0049 gcompris_qt = sys.argv[1] 0050 0051 # Force output as UTF-8 0052 ref_stdout = sys.stdout 0053 sys.stdout = codecs.getwriter('utf-8')(sys.stdout) 0054 0055 # A global hash to hold a description on a key file like the UTF-8 char of 0056 # the file. 0057 descriptions = {} 0058 0059 def get_html_header(): 0060 return """<!DOCTYPE html> 0061 <html xmlns="http://www.w3.org/1999/xhtml" lang="en"> 0062 <head> 0063 <meta charset="utf-8"/> 0064 <title>GCompris Voice Recording Status</title> 0065 </head> 0066 <body> 0067 """ 0068 0069 def get_html_footer(): 0070 today = date.today() 0071 return """ 0072 <hr> 0073 <p>Page generated the {:s}</p> 0074 </body> 0075 """.format(today.isoformat()) 0076 0077 def get_html_progress_bar(ratio): 0078 return '<td width=200 height=30pt>' + \ 0079 '<div style="border: 2px solid silver;background-color:#c00"><div style="background-color:#0c0;height:15px;width:{:d}%"></div></div>'.format(int(float(ratio) * 100)) 0080 0081 # '<hr style="color:#0c0;background-color:#0c0;height:15px; border:none;margin:0;" align="left" width={:d}% /></td>'.format(int(float(ratio) * 100)) 0082 0083 def title1(title): 0084 print(title) 0085 print('=' * len(title)) 0086 print('') 0087 0088 def title2(title): 0089 print(title) 0090 print('-' * len(title)) 0091 print('') 0092 0093 def title3(title): 0094 print('### ' + title) 0095 print('') 0096 0097 def get_intro_from_code(): 0098 '''Return a set for activities as found in GCompris ActivityInfo.qml''' 0099 0100 activity_info = set() 0101 0102 activity_dir = gcompris_qt + "/src/activities" 0103 for activity in os.listdir(activity_dir): 0104 # Skip unrelevant activities 0105 if activity == 'template' or \ 0106 activity == 'menu' or \ 0107 not os.path.isdir(activity_dir + "/" + activity): 0108 continue 0109 activity_info.add(activity + '.ogg') 0110 return activity_info 0111 0112 def init_intro_description_from_code(locale, gcompris_po): 0113 '''Init the intro description as found in GCompris ActivityInfo.qml''' 0114 '''in the global descriptions hash''' 0115 0116 voices_po = None 0117 try: 0118 voices_po = polib.pofile(gcompris_qt + '/po/'+locale+'/gcompris_voices.po', encoding='utf-8') 0119 except OSError: 0120 print("**ERROR: Failed to load po file %s**" % ('/po/'+locale+'/gcompris_voices.po')) 0121 print('') 0122 0123 activity_dir = gcompris_qt + "/src/activities" 0124 for activity in os.listdir(activity_dir): 0125 # Skip unrelevant activities 0126 if activity == 'template' or \ 0127 activity == 'menu' or \ 0128 not os.path.isdir(activity_dir + "/" + activity): 0129 continue 0130 0131 descriptions[activity + '.ogg'] = '' 0132 try: 0133 with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f: 0134 content = f.readlines() 0135 0136 for line in content: 0137 m = re.match('.*title:.*\"(.*)\"', line) 0138 if m: 0139 title = m.group(1) 0140 if gcompris_po: 0141 title_po = gcompris_po.find(title) 0142 title = title_po.msgstr if title_po else title 0143 descriptions[activity + '.ogg'] += ' title: ' + title 0144 0145 m = re.match('.*description:.*\"(.*)\"', line) 0146 if m: 0147 description = m.group(1) 0148 if gcompris_po: 0149 description_po = gcompris_po.find(description) 0150 description = description_po.msgstr if description_po else description 0151 descriptions[activity + '.ogg'] += ' description: ' + title 0152 0153 m = re.match('.*intro:.*\"(.*)\"', line) 0154 if m: 0155 voiceText = m.group(1) 0156 if voices_po: 0157 voice_text_po = voices_po.find(voiceText) 0158 voiceText = voice_text_po.msgstr if voice_text_po and voice_text_po.msgstr != "" else voiceText 0159 descriptions[activity + '.ogg'] += ' voice: ' + voiceText 0160 0161 if not activity + '.ogg' in descriptions: 0162 print("**ERROR: Missing intro tag in %s**" % (activity + "/ActivityInfo.qml")) 0163 except IOError: 0164 pass 0165 0166 print('') 0167 0168 0169 def init_country_names_from_code(component, locale, gcompris_po): 0170 '''Init the country description as found in GCompris geography/resource/board/board*.qml''' 0171 '''in the global descriptions hash''' 0172 0173 for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'): 0174 component.loadUrl(QUrl(qml)) 0175 board = component.create() 0176 levels = board.property('levels') 0177 for level in levels.toVariant(): 0178 if 'soundFile' in level and 'toolTipText' in level: 0179 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg') 0180 tooltip = level['toolTipText'] 0181 if gcompris_po: 0182 tooltip_po = gcompris_po.find(tooltip) 0183 tooltip = tooltip_po.msgstr if tooltip_po else tooltip 0184 descriptions[sound] = tooltip 0185 0186 0187 def get_locales_from_config(): 0188 '''Return a set for locales as found in GCompris src/core/LanguageList.qml''' 0189 0190 locales = set() 0191 0192 source = gcompris_qt + "/src/core/LanguageList.qml" 0193 try: 0194 with open(source, encoding='utf-8') as f: 0195 content = f.readlines() 0196 for line in content: 0197 m = re.match('.*\"locale\":.*\"(.*)\"', line) 0198 if m: 0199 locale = m.group(1).split('.')[0] 0200 if locale not in ('system', 'en_US'): 0201 locales.add(locale) 0202 except IOError as e: 0203 print(f"ERROR: Failed to parse {source}: {e.strerror}") 0204 0205 return locales 0206 0207 0208 def get_locales_from_po_files(): 0209 '''Return a set for locales for which we have a po file ''' 0210 0211 locales = set() 0212 0213 locales_dir = gcompris_qt + "/poqm" 0214 for locale in os.listdir(locales_dir): 0215 locales.add(locale) 0216 0217 return locales 0218 0219 def get_translation_status_from_po_files(): 0220 '''Return the translation status from the po file ''' 0221 '''For each locale as key we provide a list: ''' 0222 ''' [ translated_entries, untranslated_entries, fuzzy_entries, percent ]''' 0223 0224 # en locale has no translation file but mark it 100% done 0225 locales = {'en': [0, 0, 0, 1]} 0226 0227 descriptions['en'] = 'US English' 0228 0229 locales_dir = gcompris_qt + "/poqm" 0230 for locale in os.listdir(locales_dir): 0231 po = polib.pofile(locales_dir + '/' + locale + '/gcompris_qt.po', encoding='utf-8') 0232 # Calc a global translation percent 0233 untranslated = len(po.untranslated_entries()) 0234 translated = len(po.translated_entries()) 0235 fuzzy = len(po.fuzzy_entries()) 0236 percent = 1 - (float((untranslated + fuzzy)) / (translated + untranslated + fuzzy)) 0237 locales[locale] = [translated, untranslated, fuzzy, percent] 0238 0239 # Save the translation team in the global descriptions 0240 if 'Language-Team' in po.metadata: 0241 team = po.metadata['Language-Team'] 0242 team = re.sub(r' <.*>', '', team) 0243 descriptions[locale] = team 0244 else: 0245 descriptions[locale] = '' 0246 0247 return locales 0248 0249 def get_words_from_code(): 0250 '''Return a set for words as found in GCompris lang/resource/content-<locale>.json''' 0251 try: 0252 with open(gcompris_qt + '/src/activities/lang/resource/content-' + locale + '.json', encoding='utf-8') as data_file: 0253 data = json.load(data_file) 0254 except IOError: 0255 print('') 0256 print("**ERROR: missing resource file %s**" % ('/src/activities/lang/resource/content-' + locale + '.json')) 0257 print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt#Lang_word_list')) 0258 print('') 0259 return set() 0260 0261 # Consolidate letters 0262 words = set() 0263 for word in data.keys(): 0264 # Skip alphabet letter, they are already handled by the alphabet set 0265 if word[0] == 'U' or word[0] == '1': 0266 continue 0267 words.add(word) 0268 descriptions[word] = '[{:s}](https://gcompris.net/incoming/lang/words.html#{:s})'.format(data[word], word.replace('.ogg', '')) 0269 0270 return words 0271 0272 def get_wordsgame_from_code(): 0273 '''Return nothing but tells if the required GCompris wordsgame/resource/default-<locale>.json is there''' 0274 0275 if not os.path.isfile(gcompris_qt + '/src/activities/wordsgame/resource/default-' + locale + '.json'): 0276 print('') 0277 print("**ERROR: missing resource file %s**" % ('/src/activities/wordsgame/resource/default-' + locale + '.json')) 0278 print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Word_Lists_Qt#Wordsgame_.28Typing_words.29')) 0279 0280 return set() 0281 0282 # We don't really have voices needs here, just check the file exists 0283 return set() 0284 0285 def get_click_on_letter_from_code(): 0286 '''Return nothing but tells if the required GCompris click_on_letter/resource/levels-<locale>.json is there''' 0287 0288 if not os.path.isfile(gcompris_qt + '/src/activities/click_on_letter/resource/levels-' + locale + '.json'): 0289 print('') 0290 print("**ERROR: missing resource file %s**" % ('/src/activities/click_on_letter/resource/levels-' + locale + '.json')) 0291 print('[Instructions to create this file TBD](%s)' % ('TBD')) 0292 0293 return set() 0294 0295 # We don't really have voices needs here, just check the file exists 0296 return set() 0297 0298 def get_geography_on_letter_from_code(component): 0299 '''Return all the countries in geography/resource/board/board-x.json''' 0300 words = set() 0301 0302 for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'): 0303 component.loadUrl(QUrl(qml)) 0304 board = component.create() 0305 levels = board.property('levels') 0306 for level in levels.toVariant(): 0307 if 'soundFile' in level and ('type' not in level or level['type'] != "SHAPE_BACKGROUND"): 0308 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg') 0309 words.add(sound) 0310 return words 0311 0312 def get_files(locale, voiceset): 0313 to_remove = set(['README']) 0314 try: 0315 return set(os.listdir(locale + '/' + voiceset)) - to_remove 0316 except: 0317 return set() 0318 0319 def get_locales_from_file(): 0320 locales = set() 0321 for file in os.listdir('.'): 0322 if os.path.isdir(file) \ 0323 and not os.path.islink(file) \ 0324 and file[0] != '.': 0325 locales.add(file) 0326 0327 return locales 0328 0329 def get_gletter_alphabet(): 0330 try: 0331 with open(gcompris_qt + '/src/activities/gletters/resource/default-' + locale + '.json', encoding='utf-8') as data_file: 0332 data = json.load(data_file) 0333 except IOError: 0334 print('') 0335 print("**ERROR: Missing resource file %s**" % ('/src/activities/gletters/resource/default-' + locale + '.json')) 0336 print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Word_Lists_Qt#Simple_Letters_.28Typing_letters.29_level_design')) 0337 print('') 0338 return set() 0339 0340 # Consolidate letters 0341 letters = set() 0342 for level in data['levels']: 0343 for w in level['words']: 0344 multiletters = "" 0345 for one_char in w.lower(): 0346 multiletters += 'U{:04X}'.format(ord(one_char)) 0347 letters.add(multiletters + '.ogg') 0348 descriptions[multiletters + '.ogg'] = w.lower() 0349 0350 # Add numbers needed for words 0351 for i in range(10, 21): 0352 letters.add(str(i) + '.ogg') 0353 0354 return letters 0355 0356 def diff_set(title, code, files): 0357 '''Returns a stat from 0 to 1 for this report set''' 0358 0359 if not code and not files: 0360 return 0 0361 0362 title2(title) 0363 0364 if verbose and code & files: 0365 title3("These files are correct") 0366 print('| File | Description |') 0367 print('|------|-------------|') 0368 sorted_list = list(code & files) 0369 sorted_list.sort() 0370 for f in sorted_list: 0371 if f in descriptions: 0372 print('| %s | %s |' % (f, descriptions[f])) 0373 else: 0374 print('|%s | |' % (f)) 0375 print('') 0376 0377 if code - files: 0378 title3("These files are missing") 0379 print('| File | Description |') 0380 print('|------|-------------|') 0381 sorted_list = list(code - files) 0382 sorted_list.sort() 0383 for f in sorted_list: 0384 if f in descriptions: 0385 print('| %s | %s |' % (f, descriptions[f])) 0386 else: 0387 print('|%s | |' % (f)) 0388 print('') 0389 0390 if notneeded and files - code: 0391 title3("These files are not needed") 0392 print('| File | Description |') 0393 print('|------|-------------|') 0394 sorted_list = list(files - code) 0395 sorted_list.sort() 0396 for f in sorted_list: 0397 if f in descriptions: 0398 print('|%s | %s|' % (f, descriptions[f])) 0399 else: 0400 print('|%s | |' % (f)) 0401 print('') 0402 0403 return 1 - float(len(code - files)) / len(code | files) 0404 0405 def diff_locale_set(title, code, files): 0406 0407 if not code and not files: 0408 return 0409 0410 title2(title) 0411 if verbose: 0412 title3("We have voices for these locales:") 0413 missing = [] 0414 for locale in code: 0415 if os.path.isdir(locale): 0416 print('* ' + locale) 0417 else: 0418 # Shorten the locale and test again 0419 shorten = locale.split('_') 0420 if os.path.isdir(shorten[0]): 0421 print('* ' + locale) 0422 else: 0423 missing.append(locale) 0424 print('') 0425 print("We miss voices for these locales:") 0426 for f in missing: 0427 print('* ' + f) 0428 print('') 0429 0430 def check_locale_config(title, stats, locale_config): 0431 '''Display and return locales that are translated above a fixed threshold''' 0432 title2(title) 0433 LIMIT = 0.8 0434 sorted_config = list(locale_config) 0435 sorted_config.sort() 0436 good_locale = [] 0437 for locale in sorted_config: 0438 if locale in stats: 0439 if stats[locale][3] < LIMIT: 0440 print('* {:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale)) 0441 else: 0442 good_locale.append(descriptions[locale] if locale in descriptions else '') 0443 else: 0444 # Shorten the locale and test again 0445 shorten = locale.split('_')[0] 0446 if shorten in stats: 0447 if stats[shorten][3] < LIMIT: 0448 print('* {:s} ({:s})'.format((descriptions[shorten] if shorten in descriptions else ''), shorten)) 0449 else: 0450 good_locale.append(descriptions[shorten] if shorten in descriptions else '') 0451 else: 0452 print("* %s no translation at all" % (locale)) 0453 0454 print('') 0455 good_locale.sort() 0456 print('There are %d locales above %d%% translation: %s' % (len(good_locale), LIMIT * 100, 0457 ', '.join(good_locale))) 0458 0459 return good_locale 0460 0461 # 0462 # main 0463 # === 0464 0465 reports = {} 0466 sys.stdout = reports['stats'] = StringIO() 0467 0468 string_stats = get_translation_status_from_po_files() 0469 check_locale_config("Locales to remove from LanguageList.qml (translation level < 80%)", 0470 string_stats, get_locales_from_config()) 0471 0472 print('\n[Guide to contribute recording files](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt')) 0473 0474 # Calc the big list of locales we have to check 0475 all_locales = get_locales_from_po_files() | get_locales_from_file() 0476 all_locales = list(all_locales) 0477 all_locales.sort() 0478 0479 stats = {} 0480 global_descriptions = copy.deepcopy(descriptions) 0481 0482 app = QCoreApplication(sys.argv) 0483 engine = QQmlEngine() 0484 component = QQmlComponent(engine) 0485 0486 for locale in all_locales: 0487 sys.stdout = reports[locale] = StringIO() 0488 0489 descriptions = copy.deepcopy(global_descriptions) 0490 gcompris_po = None 0491 try: 0492 gcompris_po = polib.pofile(gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po', encoding='utf-8') 0493 except OSError: 0494 if gcompris_po is None: 0495 print("**ERROR: Failed to load po file %s**" % ('/poqm/'+locale+'gcompris_qt.po')) 0496 print('') 0497 0498 init_intro_description_from_code(locale, gcompris_po) 0499 init_country_names_from_code(component, locale, gcompris_po) 0500 0501 title1('{:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale)) 0502 0503 lstats = {'locale': locale} 0504 lstats['intro'] = diff_set("Intro ({:s}/intro/)".format(locale), get_intro_from_code(), get_files(locale, 'intro')) 0505 lstats['letter'] = diff_set("Letters ({:s}/alphabet/)".format(locale), get_gletter_alphabet(), get_files(locale, 'alphabet')) 0506 0507 descriptions['click_on_letter.ogg'] = "Must contains the voice: 'Click on the letter:'" 0508 lstats['misc'] = diff_set("Misc ({:s}/misc/)".format(locale), get_files('en', 'misc'), get_files(locale, 'misc')) 0509 0510 lstats['color'] = diff_set("Colors ({:s}/colors/)".format(locale), get_files('en', 'colors'), get_files(locale, 'colors')) 0511 lstats['geography'] = diff_set("Geography ({:s}/geography/)".format(locale), get_geography_on_letter_from_code(component), get_files(locale, 'geography')) 0512 lstats['words'] = diff_set("Words ({:s}/words/)".format(locale), get_words_from_code(), get_files(locale, 'words')) 0513 lstats['wordsgame'] = diff_set("Wordsgame", get_wordsgame_from_code(), set()) 0514 lstats['click_on_letter'] = diff_set("Click on letter", get_click_on_letter_from_code(), set()) 0515 stats[locale] = lstats 0516 0517 sys.stdout = reports['summary'] = StringIO() 0518 sorted_keys = sorted(stats) 0519 0520 title1("GCompris Voice Recording Status Summary") 0521 print('| Locale | Strings | Misc | Letters | Colors | Geography | Words | Intro|') 0522 print('|--------|---------|------|---------|--------|-----------|-------|------|') 0523 for locale in sorted_keys: 0524 stat = stats[locale] 0525 print('| [{:s} ({:s})](voice_status_{:s}.html) | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |' 0526 .format((descriptions[locale] if locale in descriptions else ''), stat['locale'], locale, 0527 string_stats[locale][3] if locale in string_stats else 0, 0528 stat['misc'], stat['letter'], stat['color'], stat['geography'], 0529 stat['words'], stat['intro'])) 0530 0531 # 0532 # Now we have all the reports 0533 # 0534 0535 extensions = ['markdown.extensions.tables'] 0536 0537 sys.stdout = ref_stdout 0538 0539 with codecs.open("index.html", "w", 0540 encoding="utf-8", 0541 errors="xmlcharrefreplace" 0542 ) as f: 0543 f.write(get_html_header()) 0544 0545 summary = markdown.markdown(reports['summary'].getvalue(), extensions=extensions) 0546 summary2 = "" 0547 for line in summary.split('\n'): 0548 m = re.match(r'<td>(\d\.\d\d)</td>', line) 0549 if m: 0550 rate = m.group(1) 0551 summary2 += get_html_progress_bar(rate) 0552 else: 0553 summary2 += line 0554 0555 summary2 += '\n' 0556 0557 f.write(summary2 + '\n') 0558 0559 f.write(markdown.markdown(reports['stats'].getvalue(), extensions=extensions)) 0560 f.write(get_html_footer()) 0561 0562 for locale in sorted_keys: 0563 with codecs.open("voice_status_{:s}.html".format(locale), "w", 0564 encoding="utf-8", 0565 errors="xmlcharrefreplace" 0566 ) as f: 0567 f.write(get_html_header()) 0568 f.write(markdown.markdown(reports[locale].getvalue(), extensions=extensions)) 0569 f.write(get_html_footer())