Warning, file /education/gcompris-data/voices/check_voices.py was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 #!/usr/bin/python 0002 # 0003 # GCompris - check_voices.py 0004 # 0005 # Copyright (C) 2015 Bruno Coudoin <bruno.coudoin@gcompris.net> 0006 # 0007 # This program is free software; you can redistribute it and/or modify 0008 # it under the terms of the GNU General Public License as published by 0009 # the Free Software Foundation; either version 3 of the License, or 0010 # (at your option) any later version. 0011 # 0012 # This program is distributed in the hope that it will be useful, 0013 # but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0015 # GNU General Public License for more details. 0016 # 0017 # You should have received a copy of the GNU General Public License 0018 # along with this program; if not, see <https://www.gnu.org/licenses/>. 0019 # 0020 # 0021 # The output is in markdown. A web page can be generated with: 0022 # ./check_voices.py ../gcompris-kde 0023 # 0024 # (Requires python-markdown to be installed) 0025 # 0026 import os 0027 import sys 0028 import re 0029 import copy 0030 import json 0031 import polib 0032 import codecs 0033 from io import StringIO 0034 import markdown 0035 from datetime import date 0036 import glob 0037 0038 from PyQt5.QtCore import QCoreApplication, QUrl 0039 from PyQt5.QtQml import QQmlComponent, QQmlEngine 0040 0041 if len(sys.argv) < 2: 0042 print("Usage: check_voices.py path_to_gcompris [-v] [-nn]") 0043 print(" -v: verbose, show also files that are fine") 0044 print(" -nn: not needed, show extra file in the voice directory") 0045 sys.exit(1) 0046 0047 verbose = '-v' in sys.argv 0048 notneeded = '-nn' in sys.argv 0049 gcompris_qt = sys.argv[1] 0050 0051 # Force output as UTF-8 0052 ref_stdout = sys.stdout 0053 sys.stdout = codecs.getwriter('utf-8')(sys.stdout) 0054 0055 # A global hash to hold a description on a key file like the UTF-8 char of 0056 # the file. 0057 descriptions = {} 0058 0059 def get_html_header(): 0060 return """<!DOCTYPE html> 0061 <html xmlns="http://www.w3.org/1999/xhtml" lang="en"> 0062 <head> 0063 <meta charset="utf-8"/> 0064 <title>GCompris Voice Recording Status</title> 0065 </head> 0066 <body> 0067 """ 0068 0069 def get_html_footer(): 0070 today = date.today() 0071 return """ 0072 <hr> 0073 <p>Page generated the {:s}</p> 0074 </body> 0075 """.format(today.isoformat()) 0076 0077 def get_html_progress_bar(rate): 0078 return '<td width=200 height=30pt>' + \ 0079 '<div style="border: 2px solid silver;background-color:#c00"><div style="background-color:#0c0;height:15px;width:{:d}%"></div></div>'.format(int(float(rate) * 100)) 0080 0081 # '<hr style="color:#0c0;background-color:#0c0;height:15px; border:none;margin:0;" align="left" width={:d}% /></td>'.format(int(float(rate) * 100)) 0082 0083 def title1(title): 0084 print(title) 0085 print('=' * len(title)) 0086 print('') 0087 0088 def title2(title): 0089 print(title) 0090 print('-' * len(title)) 0091 print('') 0092 0093 def title3(title): 0094 print('### ' + title) 0095 print('') 0096 0097 def get_intro_from_code(): 0098 '''Return a set for activities as found in GCompris ActivityInfo.qml''' 0099 0100 activity_info = set() 0101 0102 activity_dir = gcompris_qt + "/src/activities" 0103 for activity in os.listdir(activity_dir): 0104 # Skip unrelevant activities 0105 if activity == 'template' or \ 0106 activity == 'menu' or \ 0107 not os.path.isdir(activity_dir + "/" + activity): 0108 continue 0109 0110 try: 0111 with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f: 0112 activity_info.add(activity + '.ogg') 0113 # TODO if we want to grab the string to translate 0114 #content = f.readlines() 0115 #for line in content: 0116 # m = re.match('.*intro:.*\"(.*)\"', line) 0117 # if m: 0118 # # Intro voice is in m.group(1) 0119 # break 0120 except IOError as e: 0121 pass 0122 0123 return activity_info 0124 0125 def init_intro_description_from_code(locale): 0126 '''Init the intro description as found in GCompris ActivityInfo.qml''' 0127 '''in the global descriptions hash''' 0128 0129 gcomprisPo = None 0130 voicesPo = None 0131 try: 0132 gcomprisPo = polib.pofile(gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po') 0133 except OSError as e: 0134 print("**ERROR: Failed to load po file %s**" %('/poqm/'+locale+'/gcompris_qt.po')) 0135 print('') 0136 try: 0137 voicesPo = polib.pofile(gcompris_qt + '/po/'+locale+'/gcompris_voices.po') 0138 except OSError as e: 0139 print("**ERROR: Failed to load po file %s**" %('/po/'+locale+'/gcompris_voices.po')) 0140 print('') 0141 0142 activity_dir = gcompris_qt + "/src/activities" 0143 for activity in os.listdir(activity_dir): 0144 # Skip unrelevant activities 0145 if activity == 'template' or \ 0146 activity == 'menu' or \ 0147 not os.path.isdir(activity_dir + "/" + activity): 0148 continue 0149 0150 descriptions[activity + '.ogg'] = '' 0151 try: 0152 with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f: 0153 content = f.readlines() 0154 0155 for line in content: 0156 m = re.match('.*title:.*\"(.*)\"', line) 0157 if m: 0158 title = m.group(1) 0159 if gcomprisPo: 0160 title = gcomprisPo.find(title).msgstr if gcomprisPo.find(title) else title 0161 descriptions[activity + '.ogg'] += ' title: ' + title 0162 0163 m = re.match('.*description:.*\"(.*)\"', line) 0164 if m: 0165 description = m.group(1) 0166 if gcomprisPo: 0167 description = gcomprisPo.find(description).msgstr if gcomprisPo.find(description) else description 0168 descriptions[activity + '.ogg'] += ' description: ' + title 0169 0170 m = re.match('.*intro:.*\"(.*)\"', line) 0171 if m: 0172 voiceText = m.group(1) 0173 if voicesPo: 0174 voiceText = voicesPo.find(voiceText).msgstr if voicesPo.find(voiceText) and voicesPo.find(voiceText).msgstr != "" else voiceText 0175 descriptions[activity + '.ogg'] += ' voice: ' + voiceText 0176 0177 0178 if not activity + '.ogg' in descriptions: 0179 print("**ERROR: Missing intro tag in %s**" %(activity + "/ActivityInfo.qml")) 0180 except IOError as e: 0181 pass 0182 0183 print('') 0184 0185 0186 def init_country_names_from_code(locale): 0187 '''Init the country description as found in GCompris geography/resource/board/board*.qml''' 0188 '''in the global descriptions hash''' 0189 0190 po = None 0191 try: 0192 po = polib.pofile( gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po') 0193 except OSError as e: 0194 print("**ERROR: Failed to load po file %s**" %('/poqm/'+locale+'gcompris_qt.po')) 0195 print('') 0196 0197 app = QCoreApplication(sys.argv) 0198 engine = QQmlEngine() 0199 component = QQmlComponent(engine) 0200 0201 for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'): 0202 component.loadUrl(QUrl(qml)) 0203 board = component.create() 0204 levels = board.property('levels') 0205 for level in levels.toVariant(): 0206 if 'soundFile' in level and 'toolTipText' in level: 0207 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg') 0208 tooltip = level['toolTipText'] 0209 if po: 0210 tooltip = po.find(tooltip).msgstr if po.find(tooltip) else tooltip 0211 descriptions[sound] = tooltip 0212 0213 0214 def get_locales_from_config(): 0215 '''Return a set for locales as found in GCompris src/core/LanguageList.qml''' 0216 0217 locales = set() 0218 0219 source = gcompris_qt + "/src/core/LanguageList.qml" 0220 try: 0221 with open(source) as f: 0222 content = f.readlines() 0223 for line in content: 0224 m = re.match('.*\"locale\":.*\"(.*)\"', line) 0225 if m: 0226 locale = m.group(1).split('.')[0] 0227 if locale != 'system' and locale != 'en_US': 0228 locales.add(locale) 0229 except IOError as e: 0230 print("ERROR: Failed to parse %s: %s" %(source, e.strerror)) 0231 0232 return locales 0233 0234 0235 def get_locales_from_po_files(): 0236 '''Return a set for locales for which we have a po file ''' 0237 0238 locales = set() 0239 0240 locales_dir = gcompris_qt + "/poqm" 0241 for locale in os.listdir(locales_dir): 0242 locales.add(locale) 0243 0244 return locales 0245 0246 def get_translation_status_from_po_files(): 0247 '''Return the translation status from the po file ''' 0248 '''For each locale as key we provide a list: ''' 0249 ''' [ translated_entries, untranslated_entries, fuzzy_entries, percent ]''' 0250 0251 # en locale has no translation file but mark it 100% done 0252 locales = {'en': [0, 0, 0, 1]} 0253 0254 descriptions['en'] = 'US English' 0255 0256 locales_dir = gcompris_qt + "/poqm" 0257 for locale in os.listdir(locales_dir): 0258 po = polib.pofile(locales_dir + '/' + locale + '/gcompris_qt.po') 0259 # Calc a global translation percent 0260 percent = 1 - \ 0261 (float((len(po.untranslated_entries()) + 0262 len(po.fuzzy_entries()))) / 0263 (len(po.translated_entries()) + 0264 len(po.untranslated_entries()) + 0265 len(po.fuzzy_entries()))) 0266 locales[locale] = \ 0267 [ len(po.translated_entries()), 0268 len(po.untranslated_entries()), 0269 len(po.fuzzy_entries()), 0270 percent ] 0271 0272 # Save the translation team in the global descriptions 0273 if 'Language-Team' in po.metadata: 0274 team = po.metadata['Language-Team'] 0275 team = re.sub(r' <.*>', '', team) 0276 descriptions[locale] = team 0277 else: 0278 descriptions[locale] = '' 0279 0280 return locales 0281 0282 def get_words_from_code(): 0283 '''Return a set for words as found in GCompris lang/resource/content-<locale>.json''' 0284 try: 0285 with open(gcompris_qt + '/src/activities/lang/resource/content-' + locale + '.json') as data_file: 0286 data = json.load(data_file) 0287 except: 0288 print('') 0289 print("**ERROR: missing resource file %s**" %('/src/activities/lang/resource/content-' + locale + '.json')) 0290 print('[Instructions to create this file](%s)' %('https://gcompris.net/wiki/Voice_translation_Qt#Lang_word_list')) 0291 print('') 0292 return set() 0293 0294 # Consolidate letters 0295 words = set() 0296 for word in data.keys(): 0297 # Skip alphabet letter, they are already handled by the alphabet set 0298 if word[0] == 'U' or word[0] == '1': 0299 continue 0300 words.add(word) 0301 descriptions[word] = u'[{:s}](https://gcompris.net/incoming/lang/words.html#{:s})'.format(data[word], word.replace('.ogg', '')) 0302 0303 return words 0304 0305 def get_wordsgame_from_code(): 0306 '''Return nothing but tells if the required GCompris wordsgame/resource/default-<locale>.json is there''' 0307 0308 if not os.path.isfile(gcompris_qt + '/src/activities/wordsgame/resource/default-' + locale + '.json'): 0309 print('') 0310 print("**ERROR: missing resource file %s**" %('/src/activities/wordsgame/resource/default-' + locale + '.json')) 0311 print('[Instructions to create this file](%s)' %('https://gcompris.net/wiki/Word_Lists_Qt#Wordsgame_.28Typing_words.29')) 0312 0313 return set() 0314 0315 # We don't really have voices needs here, just check the file exists 0316 return set() 0317 0318 def get_click_on_letter_from_code(): 0319 '''Return nothing but tells if the required GCompris click_on_letter/resource/levels-<locale>.json is there''' 0320 0321 if not os.path.isfile(gcompris_qt + '/src/activities/click_on_letter/resource/levels-' + locale + '.json'): 0322 print('') 0323 print("**ERROR: missing resource file %s**" %('/src/activities/click_on_letter/resource/levels-' + locale + '.json')) 0324 print('[Instructions to create this file TBD](%s)' %('TBD')) 0325 0326 return set() 0327 0328 # We don't really have voices needs here, just check the file exists 0329 return set() 0330 0331 def get_geography_on_letter_from_code(): 0332 '''Return all the countries in geography/resource/board/board-x.json''' 0333 words = set() 0334 0335 app = QCoreApplication(sys.argv) 0336 engine = QQmlEngine() 0337 component = QQmlComponent(engine) 0338 for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'): 0339 component.loadUrl(QUrl(qml)) 0340 board = component.create() 0341 levels = board.property('levels') 0342 for level in levels.toVariant(): 0343 if 'soundFile' in level and (not 'type' in level or level['type'] != "SHAPE_BACKGROUND"): 0344 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg') 0345 words.add(sound) 0346 return words 0347 0348 def get_files(locale, voiceset): 0349 to_remove = set(['README']) 0350 try: 0351 return set(os.listdir(locale + '/' + voiceset)) - to_remove 0352 except: 0353 return set() 0354 0355 def get_locales_from_file(): 0356 locales = set() 0357 for file in os.listdir('.'): 0358 if os.path.isdir(file) \ 0359 and not os.path.islink(file) \ 0360 and file[0] != '.': 0361 locales.add(file) 0362 0363 return locales 0364 0365 def get_gletter_alphabet(): 0366 try: 0367 with open(gcompris_qt + '/src/activities/gletters/resource/default-' + locale + '.json') as data_file: 0368 data = json.load(data_file) 0369 except: 0370 print('') 0371 print("**ERROR: Missing resource file %s**" %('/src/activities/gletters/resource/default-' + locale + '.json')) 0372 print('[Instructions to create this file](%s)' %('https://gcompris.net/wiki/Word_Lists_Qt#Simple_Letters_.28Typing_letters.29_level_design')) 0373 print('') 0374 return set() 0375 0376 # Consolidate letters 0377 letters = set() 0378 for level in data['levels']: 0379 for w in level['words']: 0380 multiletters = "" 0381 for one_char in w.lower(): 0382 multiletters += 'U{:04X}'.format(ord(one_char)) 0383 letters.add(multiletters + '.ogg') 0384 descriptions[multiletters + '.ogg'] = w.lower() 0385 0386 # Add numbers needed for words 0387 for i in range(10, 21): 0388 letters.add(str(i) + '.ogg') 0389 0390 return letters 0391 0392 def diff_set(title, code, files): 0393 '''Returns a stat from 0 to 1 for this report set''' 0394 0395 if not code and not files: 0396 return 0 0397 0398 title2(title) 0399 0400 if verbose and code & files: 0401 title3("These files are correct") 0402 print('| File | Description |') 0403 print('|------|-------------|') 0404 sorted = list(code & files) 0405 sorted.sort() 0406 for f in sorted: 0407 if f in descriptions: 0408 print(u'| %s | %s |' %(f, descriptions[f])) 0409 else: 0410 print('|%s | |' %(f)) 0411 print('') 0412 0413 if code - files: 0414 title3("These files are missing") 0415 print('| File | Description |') 0416 print('|------|-------------|') 0417 sorted = list(code - files) 0418 sorted.sort() 0419 for f in sorted: 0420 if f in descriptions: 0421 print(u'| %s | %s |' % (f, descriptions[f])) 0422 else: 0423 print('|%s | |' % (f)) 0424 print('') 0425 0426 if notneeded and files - code: 0427 title3("These files are not needed") 0428 print('| File | Description |') 0429 print('|------|-------------|') 0430 sorted = list(files - code) 0431 sorted.sort() 0432 for f in sorted: 0433 if f in descriptions: 0434 print(u'|%s | %s|' %(f, descriptions[f])) 0435 else: 0436 print('|%s | |' %(f)) 0437 print('') 0438 0439 return 1 - float(len(code - files)) / len(code | files) 0440 0441 def diff_locale_set(title, code, files): 0442 0443 if not code and not files: 0444 return 0445 0446 title2(title) 0447 if verbose: 0448 title3("We have voices for these locales:") 0449 missing = [] 0450 for locale in code: 0451 if os.path.isdir(locale): 0452 print('* ' + locale) 0453 else: 0454 # Shorten the locale and test again 0455 shorten = locale.split('_') 0456 if os.path.isdir(shorten[0]): 0457 print('* ' + locale) 0458 else: 0459 missing.append(locale) 0460 print('') 0461 print("We miss voices for these locales:") 0462 for f in missing: 0463 print('* ' + f) 0464 print('') 0465 0466 def check_locale_config(title, stats, locale_config): 0467 '''Display and return locales that are translated above a fixed threshold''' 0468 title2(title) 0469 LIMIT = 0.8 0470 sorted_config = list(locale_config) 0471 sorted_config.sort() 0472 good_locale = [] 0473 for locale in sorted_config: 0474 if locale in stats: 0475 if stats[locale][3] < LIMIT: 0476 print(u'* {:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale)) 0477 else: 0478 good_locale.append(descriptions[locale] if locale in descriptions else '') 0479 else: 0480 # Shorten the locale and test again 0481 shorten = locale.split('_')[0] 0482 if shorten in stats: 0483 if stats[shorten][3] < LIMIT: 0484 print(u'* {:s} ({:s})'.format((descriptions[shorten] if shorten in descriptions else ''), shorten)) 0485 else: 0486 good_locale.append(descriptions[shorten] if shorten in descriptions else '') 0487 else: 0488 print("* %s no translation at all" % (locale)) 0489 0490 print('') 0491 good_locale.sort() 0492 print('There are %d locales above %d%% translation: %s' %(len(good_locale), LIMIT * 100, 0493 ', '.join(good_locale))) 0494 0495 return good_locale 0496 0497 # 0498 # main 0499 # === 0500 0501 reports = {} 0502 sys.stdout = reports['stats'] = StringIO() 0503 0504 string_stats = get_translation_status_from_po_files() 0505 check_locale_config("Locales to remove from LanguageList.qml (translation level < 80%)", 0506 string_stats, get_locales_from_config()) 0507 0508 print('\n[Guide to contribute recording files](%s)' %('https://gcompris.net/wiki/Voice_translation_Qt')) 0509 0510 # Calc the big list of locales we have to check 0511 all_locales = get_locales_from_po_files() | get_locales_from_file() 0512 all_locales = list(all_locales) 0513 all_locales.sort() 0514 0515 stats = {} 0516 global_descriptions = copy.deepcopy(descriptions) 0517 0518 for locale in all_locales: 0519 sys.stdout = reports[locale] = StringIO() 0520 0521 descriptions = copy.deepcopy(global_descriptions) 0522 init_intro_description_from_code(locale) 0523 init_country_names_from_code(locale) 0524 0525 title1(u'{:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale)) 0526 0527 lstats = {'locale': locale} 0528 lstats['intro'] = diff_set("Intro ({:s}/intro/)".format(locale), get_intro_from_code(), get_files(locale, 'intro')) 0529 lstats['letter'] = diff_set("Letters ({:s}/alphabet/)".format(locale), get_gletter_alphabet(), get_files(locale, 'alphabet')) 0530 0531 descriptions['click_on_letter.ogg'] = "Must contains the voice: 'Click on the letter:'" 0532 lstats['misc'] = diff_set("Misc ({:s}/misc/)".format(locale), get_files('en', 'misc'), get_files(locale, 'misc')) 0533 0534 lstats['color'] = diff_set("Colors ({:s}/colors/)".format(locale), get_files('en', 'colors'), get_files(locale, 'colors')) 0535 lstats['geography'] = diff_set("Geography ({:s}/geography/)".format(locale), get_geography_on_letter_from_code(), get_files(locale, 'geography')) 0536 lstats['words'] = diff_set("Words ({:s}/words/)".format(locale), get_words_from_code(), get_files(locale, 'words')) 0537 lstats['wordsgame'] = diff_set("Wordsgame", get_wordsgame_from_code(), set()) 0538 lstats['click_on_letter'] = diff_set("Click on letter", get_click_on_letter_from_code(), set()) 0539 stats[locale] = lstats 0540 0541 sys.stdout = reports['summary'] = StringIO() 0542 sorted_keys = sorted(stats) 0543 0544 title1("GCompris Voice Recording Status Summary") 0545 print('| Locale | Strings | Misc | Letters | Colors | Geography | Words | Intro|') 0546 print('|--------|---------|------|---------|--------|-----------|-------|------|') 0547 for locale in sorted_keys: 0548 stat = stats[locale] 0549 print(u'| [{:s} ({:s})](voice_status_{:s}.html) | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |' \ 0550 .format((descriptions[locale] if locale in descriptions else ''), stat['locale'], locale, 0551 string_stats[locale][3] if locale in string_stats else 0, 0552 stat['misc'], stat['letter'], stat['color'], stat['geography'], 0553 stat['words'], stat['intro'])) 0554 0555 # 0556 # Now we have all the reports 0557 # 0558 0559 extensions=['markdown.extensions.tables'] 0560 0561 sys.stdout = ref_stdout 0562 0563 with codecs.open("index.html", "w", 0564 encoding="utf-8", 0565 errors="xmlcharrefreplace" 0566 ) as f: 0567 f.write(get_html_header()) 0568 0569 summary = markdown.markdown(reports['summary'].getvalue(), extensions=extensions) 0570 summary2 = "" 0571 for line in summary.split('\n'): 0572 m = re.match('<td>(\d\.\d\d)</td>', line) 0573 if m: 0574 rate = m.group(1) 0575 summary2 += get_html_progress_bar(rate) 0576 else: 0577 summary2 += line 0578 0579 summary2 += '\n' 0580 0581 f.write(summary2 + '\n') 0582 0583 f.write(markdown.markdown(reports['stats'].getvalue(), extensions=extensions)) 0584 f.write(get_html_footer()) 0585 0586 for locale in sorted_keys: 0587 with codecs.open("voice_status_{:s}.html".format(locale), "w", 0588 encoding="utf-8", 0589 errors="xmlcharrefreplace" 0590 ) as f: 0591 f.write(get_html_header()) 0592 f.write(markdown.markdown(reports[locale].getvalue(), extensions=extensions)) 0593 f.write(get_html_footer())