File indexing completed on 2024-12-08 09:26:30
0001 #!/usr/bin/python 0002 # 0003 # GCompris - check_voices.py 0004 # 0005 # Copyright (C) 2015 Bruno Coudoin <bruno.coudoin@gcompris.net> 0006 # 0007 # This program is free software; you can redistribute it and/or modify 0008 # it under the terms of the GNU General Public License as published by 0009 # the Free Software Foundation; either version 3 of the License, or 0010 # (at your option) any later version. 0011 # 0012 # This program is distributed in the hope that it will be useful, 0013 # but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0015 # GNU General Public License for more details. 0016 # 0017 # You should have received a copy of the GNU General Public License 0018 # along with this program; if not, see <https://www.gnu.org/licenses/>. 0019 # 0020 # 0021 # The output is in markdown. A web page can be generated with: 0022 # ./check_voices.py ../gcompris-kde 0023 # 0024 # (Requires python-markdown to be installed) 0025 # 0026 import os 0027 import sys 0028 import re 0029 import copy 0030 import json 0031 import codecs 0032 from io import StringIO 0033 from datetime import date 0034 import glob 0035 0036 import markdown 0037 import polib 0038 from PyQt5.QtCore import QCoreApplication, QUrl 0039 from PyQt5.QtQml import QQmlComponent, QQmlEngine 0040 0041 if len(sys.argv) < 2: 0042 print("Usage: check_voices.py path_to_gcompris [-v] [-nn]") 0043 print(" -v: verbose, show also files that are fine") 0044 print(" -nn: not needed, show extra file in the voice directory") 0045 sys.exit(1) 0046 0047 verbose = '-v' in sys.argv 0048 notneeded = '-nn' in sys.argv 0049 gcompris_qt = sys.argv[1] 0050 0051 # Force output as UTF-8 0052 ref_stdout = sys.stdout 0053 sys.stdout = codecs.getwriter('utf-8')(sys.stdout) 0054 0055 # A global hash to hold a description on a key file like the UTF-8 char of 0056 # the file. 0057 descriptions = {} 0058 0059 def get_html_header(): 0060 return """<!DOCTYPE html> 0061 <html xmlns="http://www.w3.org/1999/xhtml" lang="en"> 0062 <head> 0063 <meta charset="utf-8"/> 0064 <title>GCompris Voice Recording Status</title> 0065 </head> 0066 <body> 0067 """ 0068 0069 def get_html_footer(): 0070 today = date.today() 0071 return """ 0072 <hr> 0073 <p>Page generated the {:s}</p> 0074 </body> 0075 """.format(today.isoformat()) 0076 0077 def get_html_progress_bar(ratio): 0078 return '<td width=200 height=30pt>' + \ 0079 '<div style="border: 2px solid silver;background-color:#c00"><div style="background-color:#0c0;height:15px;width:{:d}%"></div></div>'.format(int(float(ratio) * 100)) 0080 0081 # '<hr style="color:#0c0;background-color:#0c0;height:15px; border:none;margin:0;" align="left" width={:d}% /></td>'.format(int(float(ratio) * 100)) 0082 0083 def title1(title): 0084 print(title) 0085 print('=' * len(title)) 0086 print('') 0087 0088 def title2(title): 0089 print(title) 0090 print('-' * len(title)) 0091 print('') 0092 0093 def title3(title): 0094 print('### ' + title) 0095 print('') 0096 0097 def get_intro_from_code(): 0098 '''Return a set for activities as found in GCompris ActivityInfo.qml''' 0099 0100 activity_info = set() 0101 0102 activity_dir = gcompris_qt + "/src/activities" 0103 for activity in os.listdir(activity_dir): 0104 # Skip unrelevant activities 0105 if activity == 'template' or \ 0106 activity == 'menu' or \ 0107 not os.path.isdir(activity_dir + "/" + activity): 0108 continue 0109 activity_info.add(activity + '.ogg') 0110 return activity_info 0111 0112 def init_intro_description_from_code(locale, gcompris_po): 0113 '''Init the intro description as found in GCompris ActivityInfo.qml''' 0114 '''in the global descriptions hash''' 0115 0116 voices_po = None 0117 try: 0118 voices_po = polib.pofile(gcompris_qt + '/po/'+locale+'/gcompris_voices.po', encoding='utf-8') 0119 except OSError: 0120 print("**ERROR: Failed to load po file %s**" % ('/po/'+locale+'/gcompris_voices.po')) 0121 print('') 0122 0123 activity_dir = gcompris_qt + "/src/activities" 0124 for activity in os.listdir(activity_dir): 0125 # Skip unrelevant activities 0126 if activity == 'template' or \ 0127 activity == 'menu' or \ 0128 not os.path.isdir(activity_dir + "/" + activity): 0129 continue 0130 0131 descriptions[activity + '.ogg'] = '' 0132 try: 0133 with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f: 0134 content = f.readlines() 0135 0136 for line in content: 0137 m = re.match('.*title:.*\"(.*)\"', line) 0138 if m: 0139 title = m.group(1) 0140 if gcompris_po: 0141 title_po = gcompris_po.find(title) 0142 title = title_po.msgstr if title_po else title 0143 descriptions[activity + '.ogg'] += ' title: ' + title 0144 0145 m = re.match('.*description:.*\"(.*)\"', line) 0146 if m: 0147 description = m.group(1) 0148 if gcompris_po: 0149 description_po = gcompris_po.find(description) 0150 description = description_po.msgstr if description_po else description 0151 descriptions[activity + '.ogg'] += ' description: ' + title 0152 0153 m = re.match('.*intro:.*\"(.*)\"', line) 0154 if m: 0155 voiceText = m.group(1) 0156 if voices_po: 0157 voice_text_po = voices_po.find(voiceText) 0158 voiceText = voice_text_po.msgstr if voice_text_po and voice_text_po.msgstr != "" else voiceText 0159 descriptions[activity + '.ogg'] += ' voice: ' + voiceText 0160 0161 if not activity + '.ogg' in descriptions: 0162 print("**ERROR: Missing intro tag in %s**" % (activity + "/ActivityInfo.qml")) 0163 except IOError: 0164 pass 0165 0166 print('') 0167 0168 0169 def init_country_names_from_code(component, locale, gcompris_po): 0170 '''Init the country description as found in GCompris geography/resource/board/board*.qml''' 0171 '''in the global descriptions hash''' 0172 0173 for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'): 0174 component.loadUrl(QUrl(qml)) 0175 board = component.create() 0176 levels = board.property('levels') 0177 for level in levels.toVariant(): 0178 if 'soundFile' in level and 'toolTipText' in level: 0179 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg') 0180 tooltip = level['toolTipText'] 0181 if gcompris_po: 0182 tooltip_po = gcompris_po.find(tooltip) 0183 tooltip = tooltip_po.msgstr if tooltip_po else tooltip 0184 descriptions[sound] = tooltip 0185 0186 0187 def get_locales_from_config(): 0188 '''Return a set for locales as found in GCompris src/core/LanguageList.qml''' 0189 0190 locales = set() 0191 0192 source = gcompris_qt + "/src/core/LanguageList.qml" 0193 try: 0194 with open(source, encoding='utf-8') as f: 0195 content = f.readlines() 0196 for line in content: 0197 m = re.match('.*\"locale\":.*\"(.*)\"', line) 0198 if m: 0199 locale = m.group(1).split('.')[0] 0200 if locale not in ('system', 'en_US'): 0201 locales.add(locale) 0202 except IOError as e: 0203 print(f"ERROR: Failed to parse {source}: {e.strerror}") 0204 0205 return locales 0206 0207 0208 def get_locales_from_po_files(): 0209 '''Return a set for locales for which we have a po file ''' 0210 0211 locales = set() 0212 0213 locales_dir = gcompris_qt + "/poqm" 0214 for locale in os.listdir(locales_dir): 0215 locales.add(locale) 0216 0217 return locales 0218 0219 def get_translation_status_from_po_files(): 0220 '''Return the translation status from the po file ''' 0221 '''For each locale as key we provide a list: ''' 0222 ''' [ translated_entries, untranslated_entries, fuzzy_entries, percent ]''' 0223 0224 # en locale has no translation file but mark it 100% done 0225 locales = {'en': [0, 0, 0, 1]} 0226 0227 descriptions['en'] = 'US English' 0228 0229 locales_dir = gcompris_qt + "/poqm" 0230 for locale in os.listdir(locales_dir): 0231 po = polib.pofile(locales_dir + '/' + locale + '/gcompris_qt.po', encoding='utf-8') 0232 # Calc a global translation percent 0233 untranslated = len(po.untranslated_entries()) 0234 translated = len(po.translated_entries()) 0235 fuzzy = len(po.fuzzy_entries()) 0236 percent = 1 - (float((untranslated + fuzzy)) / (translated + untranslated + fuzzy)) 0237 locales[locale] = [translated, untranslated, fuzzy, percent] 0238 0239 # Save the translation team in the global descriptions 0240 if 'Language-Team' in po.metadata: 0241 team = po.metadata['Language-Team'] 0242 team = re.sub(r' <.*>', '', team) 0243 descriptions[locale] = team 0244 else: 0245 descriptions[locale] = '' 0246 0247 return locales 0248 0249 def get_words_from_code(): 0250 '''Return a set for words as found in GCompris lang/resource/content-<locale>.json''' 0251 try: 0252 with open(gcompris_qt + '/src/activities/lang/resource/content-' + locale + '.json', encoding='utf-8') as data_file: 0253 data = json.load(data_file) 0254 except IOError: 0255 print('') 0256 print("**ERROR: missing resource file %s**" % ('/src/activities/lang/resource/content-' + locale + '.json')) 0257 print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt#Lang_word_list')) 0258 print('') 0259 return set() 0260 0261 # Consolidate letters 0262 words = set() 0263 for word in data.keys(): 0264 # Skip alphabet letter, they are already handled by the alphabet set 0265 if word[0] == 'U' or word[0] == '1': 0266 continue 0267 words.add(word) 0268 descriptions[word] = '[{:s}](https://gcompris.net/incoming/lang/words.html#{:s})'.format(data[word], word.replace('.ogg', '')) 0269 0270 return words 0271 0272 0273 def check_file_existence(filename, instructions): 0274 if not os.path.isfile(gcompris_qt + filename): 0275 print('') 0276 print("**ERROR: missing resource file %s**" % filename) 0277 print('[Instructions to create this file](%s)' % instructions) 0278 0279 # We don't really have voices needs here, just check the file exists 0280 return set() 0281 0282 0283 def get_grammar_analysis_from_code(): 0284 '''Return nothing but tells if the required GCompris grammar_analysis/resource/grammar_analysis-<locale>.json is there''' 0285 return check_file_existence('/src/activities/grammar_analysis/resource/grammar_analysis-' + locale + '.json', 'https://gcompris.net/wiki/How_to_translate#Dataset_to_translate') 0286 0287 0288 def get_grammar_classes_from_code(): 0289 '''Return nothing but tells if the required GCompris grammar_classes/resource/grammar_classes-<locale>.json is there''' 0290 return check_file_existence('/src/activities/grammar_classes/resource/grammar_classes-' + locale + '.json', 'https://gcompris.net/wiki/How_to_translate#Dataset_to_translate') 0291 0292 0293 def get_wordsgame_from_code(): 0294 '''Return nothing but tells if the required GCompris wordsgame/resource/default-<locale>.json is there''' 0295 return check_file_existence('/src/activities/wordsgame/resource/default-' + locale + '.json', 'https://gcompris.net/wiki/Word_Lists_Qt#Wordsgame_.28Typing_words.29') 0296 0297 0298 def get_click_on_letter_from_code(): 0299 '''Return nothing but tells if the required GCompris click_on_letter/resource/levels-<locale>.json is there''' 0300 return check_file_existence('/src/activities/click_on_letter/resource/levels-' + locale + '.json', 'https://gcompris.net/wiki/How_to_translate#Dataset_to_translate') 0301 0302 0303 def get_geography_on_letter_from_code(component): 0304 '''Return all the countries in geography/resource/board/board-x.json''' 0305 words = set() 0306 0307 for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'): 0308 component.loadUrl(QUrl(qml)) 0309 board = component.create() 0310 levels = board.property('levels') 0311 for level in levels.toVariant(): 0312 if 'soundFile' in level and ('type' not in level or level['type'] != "SHAPE_BACKGROUND"): 0313 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg') 0314 words.add(sound) 0315 return words 0316 0317 def get_files(locale, voiceset): 0318 to_remove = set(['README']) 0319 try: 0320 return set(os.listdir(locale + '/' + voiceset)) - to_remove 0321 except: 0322 return set() 0323 0324 def get_locales_from_file(): 0325 locales = set() 0326 for file in os.listdir('.'): 0327 if os.path.isdir(file) \ 0328 and not os.path.islink(file) \ 0329 and file[0] != '.': 0330 locales.add(file) 0331 0332 return locales 0333 0334 def get_gletter_alphabet(): 0335 try: 0336 with open(gcompris_qt + '/src/activities/gletters/resource/default-' + locale + '.json', encoding='utf-8') as data_file: 0337 data = json.load(data_file) 0338 except IOError: 0339 print('') 0340 print("**ERROR: Missing resource file %s**" % ('/src/activities/gletters/resource/default-' + locale + '.json')) 0341 print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Word_Lists_Qt#Simple_Letters_.28Typing_letters.29_level_design')) 0342 print('') 0343 return set() 0344 0345 # Consolidate letters 0346 letters = set() 0347 for level in data['levels']: 0348 for w in level['words']: 0349 multiletters = "" 0350 for one_char in w.lower(): 0351 multiletters += 'U{:04X}'.format(ord(one_char)) 0352 letters.add(multiletters + '.ogg') 0353 descriptions[multiletters + '.ogg'] = w.lower() 0354 0355 # Add numbers needed for words 0356 for i in range(10, 21): 0357 letters.add(str(i) + '.ogg') 0358 0359 return letters 0360 0361 def diff_set(title, code, files): 0362 '''Returns a stat from 0 to 1 for this report set''' 0363 0364 if not code and not files: 0365 return 0 0366 0367 title2(title) 0368 0369 if verbose and code & files: 0370 title3("These files are correct") 0371 print('| File | Description |') 0372 print('|------|-------------|') 0373 sorted_list = list(code & files) 0374 sorted_list.sort() 0375 for f in sorted_list: 0376 if f in descriptions: 0377 print('| %s | %s |' % (f, descriptions[f])) 0378 else: 0379 print('|%s | |' % (f)) 0380 print('') 0381 0382 if code - files: 0383 title3("These files are missing") 0384 print('| File | Description |') 0385 print('|------|-------------|') 0386 sorted_list = list(code - files) 0387 sorted_list.sort() 0388 for f in sorted_list: 0389 if f in descriptions: 0390 print('| %s | %s |' % (f, descriptions[f])) 0391 else: 0392 print('|%s | |' % (f)) 0393 print('') 0394 0395 if notneeded and files - code: 0396 title3("These files are not needed") 0397 print('| File | Description |') 0398 print('|------|-------------|') 0399 sorted_list = list(files - code) 0400 sorted_list.sort() 0401 for f in sorted_list: 0402 if f in descriptions: 0403 print('|%s | %s|' % (f, descriptions[f])) 0404 else: 0405 print('|%s | |' % (f)) 0406 print('') 0407 0408 return 1 - float(len(code - files)) / len(code | files) 0409 0410 def diff_locale_set(title, code, files): 0411 0412 if not code and not files: 0413 return 0414 0415 title2(title) 0416 if verbose: 0417 title3("We have voices for these locales:") 0418 missing = [] 0419 for locale in code: 0420 if os.path.isdir(locale): 0421 print('* ' + locale) 0422 else: 0423 # Shorten the locale and test again 0424 shorten = locale.split('_') 0425 if os.path.isdir(shorten[0]): 0426 print('* ' + locale) 0427 else: 0428 missing.append(locale) 0429 print('') 0430 print("We miss voices for these locales:") 0431 for f in missing: 0432 print('* ' + f) 0433 print('') 0434 0435 def check_locale_config(title, stats, locale_config): 0436 '''Display and return locales that are translated above a fixed threshold''' 0437 title2(title) 0438 LIMIT = 0.8 0439 sorted_config = list(locale_config) 0440 sorted_config.sort() 0441 good_locale = [] 0442 for locale in sorted_config: 0443 if locale in stats: 0444 if stats[locale][3] < LIMIT: 0445 print('* {:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale)) 0446 else: 0447 good_locale.append(descriptions[locale] if locale in descriptions else '') 0448 else: 0449 # Shorten the locale and test again 0450 shorten = locale.split('_')[0] 0451 if shorten in stats: 0452 if stats[shorten][3] < LIMIT: 0453 print('* {:s} ({:s})'.format((descriptions[shorten] if shorten in descriptions else ''), shorten)) 0454 else: 0455 good_locale.append(descriptions[shorten] if shorten in descriptions else '') 0456 else: 0457 print("* %s no translation at all" % (locale)) 0458 0459 print('') 0460 good_locale.sort() 0461 print('There are %d locales above %d%% translation: %s' % (len(good_locale), LIMIT * 100, 0462 ', '.join(good_locale))) 0463 0464 return good_locale 0465 0466 # 0467 # main 0468 # === 0469 0470 reports = {} 0471 sys.stdout = reports['stats'] = StringIO() 0472 0473 string_stats = get_translation_status_from_po_files() 0474 check_locale_config("Locales to remove from LanguageList.qml (translation level < 80%)", 0475 string_stats, get_locales_from_config()) 0476 0477 print('\n[Guide to contribute recording files](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt')) 0478 0479 # Calc the big list of locales we have to check 0480 all_locales = get_locales_from_po_files() | get_locales_from_file() 0481 all_locales = list(all_locales) 0482 all_locales.sort() 0483 0484 stats = {} 0485 global_descriptions = copy.deepcopy(descriptions) 0486 0487 app = QCoreApplication(sys.argv) 0488 engine = QQmlEngine() 0489 component = QQmlComponent(engine) 0490 0491 for locale in all_locales: 0492 sys.stdout = reports[locale] = StringIO() 0493 0494 descriptions = copy.deepcopy(global_descriptions) 0495 gcompris_po = None 0496 try: 0497 gcompris_po = polib.pofile(gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po', encoding='utf-8') 0498 except OSError: 0499 if gcompris_po is None: 0500 print("**ERROR: Failed to load po file %s**" % ('/poqm/'+locale+'gcompris_qt.po')) 0501 print('') 0502 0503 init_intro_description_from_code(locale, gcompris_po) 0504 init_country_names_from_code(component, locale, gcompris_po) 0505 0506 title1('{:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale)) 0507 0508 lstats = {'locale': locale} 0509 lstats['intro'] = diff_set("Intro ({:s}/intro/)".format(locale), get_intro_from_code(), get_files(locale, 'intro')) 0510 lstats['letter'] = diff_set("Letters ({:s}/alphabet/)".format(locale), get_gletter_alphabet(), get_files(locale, 'alphabet')) 0511 0512 descriptions['click_on_letter.ogg'] = "Must contains the voice: 'Click on the letter:'" 0513 lstats['misc'] = diff_set("Misc ({:s}/misc/)".format(locale), get_files('en', 'misc'), get_files(locale, 'misc')) 0514 0515 lstats['color'] = diff_set("Colors ({:s}/colors/)".format(locale), get_files('en', 'colors'), get_files(locale, 'colors')) 0516 lstats['geography'] = diff_set("Geography ({:s}/geography/)".format(locale), get_geography_on_letter_from_code(component), get_files(locale, 'geography')) 0517 lstats['words'] = diff_set("Words ({:s}/words/)".format(locale), get_words_from_code(), get_files(locale, 'words')) 0518 lstats['wordsgame'] = diff_set("Wordsgame", get_wordsgame_from_code(), set()) 0519 lstats['grammar_analysis'] = diff_set("Grammar Analysis", get_grammar_analysis_from_code(), set()) 0520 lstats['grammar_classes'] = diff_set("Grammar Classes", get_grammar_classes_from_code(), set()) 0521 lstats['click_on_letter'] = diff_set("Click on letter", get_click_on_letter_from_code(), set()) 0522 stats[locale] = lstats 0523 0524 sys.stdout = reports['summary'] = StringIO() 0525 sorted_keys = sorted(stats) 0526 0527 title1("GCompris Voice Recording Status Summary") 0528 print('| Locale | Strings | Misc | Letters | Colors | Geography | Words | Intro|') 0529 print('|--------|---------|------|---------|--------|-----------|-------|------|') 0530 for locale in sorted_keys: 0531 stat = stats[locale] 0532 print('| [{:s} ({:s})](voice_status_{:s}.html) | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |' 0533 .format((descriptions[locale] if locale in descriptions else ''), stat['locale'], locale, 0534 string_stats[locale][3] if locale in string_stats else 0, 0535 stat['misc'], stat['letter'], stat['color'], stat['geography'], 0536 stat['words'], stat['intro'])) 0537 0538 # 0539 # Now we have all the reports 0540 # 0541 0542 extensions = ['markdown.extensions.tables'] 0543 0544 sys.stdout = ref_stdout 0545 0546 with codecs.open("index.html", "w", 0547 encoding="utf-8", 0548 errors="xmlcharrefreplace" 0549 ) as f: 0550 f.write(get_html_header()) 0551 0552 summary = markdown.markdown(reports['summary'].getvalue(), extensions=extensions) 0553 summary2 = "" 0554 for line in summary.split('\n'): 0555 m = re.match(r'<td>(\d\.\d\d)</td>', line) 0556 if m: 0557 rate = m.group(1) 0558 summary2 += get_html_progress_bar(rate) 0559 else: 0560 summary2 += line 0561 0562 summary2 += '\n' 0563 0564 f.write(summary2 + '\n') 0565 0566 f.write(markdown.markdown(reports['stats'].getvalue(), extensions=extensions)) 0567 f.write(get_html_footer()) 0568 0569 for locale in sorted_keys: 0570 with codecs.open("voice_status_{:s}.html".format(locale), "w", 0571 encoding="utf-8", 0572 errors="xmlcharrefreplace" 0573 ) as f: 0574 f.write(get_html_header()) 0575 f.write(markdown.markdown(reports[locale].getvalue(), extensions=extensions)) 0576 f.write(get_html_footer())