Warning, file /education/gcompris-data/voices/check_voices.py was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 #!/usr/bin/python
0002 #
0003 # GCompris - check_voices.py
0004 #
0005 # Copyright (C) 2015 Bruno Coudoin <bruno.coudoin@gcompris.net>
0006 #
0007 #   This program is free software; you can redistribute it and/or modify
0008 #   it under the terms of the GNU General Public License as published by
0009 #   the Free Software Foundation; either version 3 of the License, or
0010 #   (at your option) any later version.
0011 #
0012 #   This program is distributed in the hope that it will be useful,
0013 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
0014 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0015 #   GNU General Public License for more details.
0016 #
0017 #   You should have received a copy of the GNU General Public License
0018 #   along with this program; if not, see <https://www.gnu.org/licenses/>.
0019 #
0020 #
0021 # The output is in markdown. A web page can be generated with:
0022 # ./check_voices.py ../gcompris-kde
0023 #
0024 # (Requires python-markdown to be installed)
0025 #
0026 import os
0027 import sys
0028 import re
0029 import copy
0030 import json
0031 import polib
0032 import codecs
0033 from io import StringIO
0034 import markdown
0035 from datetime import date
0036 import glob
0037 
0038 from PyQt5.QtCore import QCoreApplication, QUrl
0039 from PyQt5.QtQml import QQmlComponent, QQmlEngine
0040 
0041 if len(sys.argv) < 2:
0042     print("Usage: check_voices.py path_to_gcompris [-v] [-nn]")
0043     print("  -v:  verbose, show also files that are fine")
0044     print("  -nn: not needed, show extra file in the voice directory")
0045     sys.exit(1)
0046 
0047 verbose = '-v' in sys.argv
0048 notneeded = '-nn' in sys.argv
0049 gcompris_qt = sys.argv[1]
0050 
0051 # Force output as UTF-8
0052 ref_stdout = sys.stdout
0053 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
0054 
0055 # A global hash to hold a description on a key file like the UTF-8 char of
0056 # the file.
0057 descriptions = {}
0058 
0059 def get_html_header():
0060     return """<!DOCTYPE html>
0061 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
0062 <head>
0063   <meta charset="utf-8"/>
0064   <title>GCompris Voice Recording Status</title>
0065 </head>
0066 <body>
0067 """
0068 
0069 def get_html_footer():
0070     today = date.today()
0071     return """
0072 <hr>
0073 <p>Page generated the {:s}</p>
0074 </body>
0075 """.format(today.isoformat())
0076 
0077 def get_html_progress_bar(rate):
0078     return '<td width=200 height=30pt>' + \
0079         '<div style="border: 2px solid silver;background-color:#c00"><div style="background-color:#0c0;height:15px;width:{:d}%"></div></div>'.format(int(float(rate) * 100))
0080 
0081 # '<hr style="color:#0c0;background-color:#0c0;height:15px; border:none;margin:0;" align="left" width={:d}% /></td>'.format(int(float(rate) * 100))
0082 
0083 def title1(title):
0084     print(title)
0085     print('=' * len(title))
0086     print('')
0087 
0088 def title2(title):
0089     print(title)
0090     print('-' * len(title))
0091     print('')
0092 
0093 def title3(title):
0094     print('### ' + title)
0095     print('')
0096 
0097 def get_intro_from_code():
0098     '''Return a set for activities as found in GCompris ActivityInfo.qml'''
0099 
0100     activity_info = set()
0101 
0102     activity_dir = gcompris_qt + "/src/activities"
0103     for activity in os.listdir(activity_dir):
0104         # Skip unrelevant activities
0105         if activity == 'template' or \
0106            activity == 'menu' or \
0107            not os.path.isdir(activity_dir + "/" + activity):
0108             continue
0109 
0110         try:
0111             with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f:
0112                 activity_info.add(activity + '.ogg')
0113                 # TODO if we want to grab the string to translate
0114                 #content = f.readlines()
0115                 #for line in content:
0116                 #    m = re.match('.*intro:.*\"(.*)\"', line)
0117                 #    if m:
0118                 #        # Intro voice is in m.group(1)
0119                 #        break
0120         except IOError as e:
0121             pass
0122 
0123     return activity_info
0124 
0125 def init_intro_description_from_code(locale):
0126     '''Init the intro description as found in GCompris ActivityInfo.qml'''
0127     '''in the global descriptions hash'''
0128 
0129     gcomprisPo = None
0130     voicesPo = None
0131     try:
0132         gcomprisPo = polib.pofile(gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po')
0133     except OSError as e:
0134         print("**ERROR: Failed to load po file %s**" %('/poqm/'+locale+'/gcompris_qt.po'))
0135         print('')
0136     try:
0137         voicesPo = polib.pofile(gcompris_qt + '/po/'+locale+'/gcompris_voices.po')
0138     except OSError as e:
0139         print("**ERROR: Failed to load po file %s**" %('/po/'+locale+'/gcompris_voices.po'))
0140         print('')
0141 
0142     activity_dir = gcompris_qt + "/src/activities"
0143     for activity in os.listdir(activity_dir):
0144         # Skip unrelevant activities
0145         if activity == 'template' or \
0146            activity == 'menu' or \
0147            not os.path.isdir(activity_dir + "/" + activity):
0148             continue
0149 
0150         descriptions[activity + '.ogg'] = ''
0151         try:
0152             with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f:
0153                 content = f.readlines()
0154 
0155                 for line in content:
0156                     m = re.match('.*title:.*\"(.*)\"', line)
0157                     if m:
0158                         title = m.group(1)
0159                         if gcomprisPo:
0160                             title = gcomprisPo.find(title).msgstr if gcomprisPo.find(title) else title
0161                         descriptions[activity + '.ogg'] += ' title: ' + title
0162 
0163                     m = re.match('.*description:.*\"(.*)\"', line)
0164                     if m:
0165                         description = m.group(1)
0166                         if gcomprisPo:
0167                             description = gcomprisPo.find(description).msgstr if gcomprisPo.find(description) else description
0168                         descriptions[activity + '.ogg'] += ' description: ' + title
0169 
0170                     m = re.match('.*intro:.*\"(.*)\"', line)
0171                     if m:
0172                         voiceText = m.group(1)
0173                         if voicesPo:
0174                             voiceText = voicesPo.find(voiceText).msgstr if voicesPo.find(voiceText) and voicesPo.find(voiceText).msgstr != "" else voiceText
0175                         descriptions[activity + '.ogg'] += ' voice: ' + voiceText
0176 
0177 
0178             if not activity + '.ogg' in descriptions:
0179                 print("**ERROR: Missing intro tag in %s**" %(activity + "/ActivityInfo.qml"))
0180         except IOError as e:
0181             pass
0182 
0183     print('')
0184 
0185 
0186 def init_country_names_from_code(locale):
0187     '''Init the country description as found in GCompris geography/resource/board/board*.qml'''
0188     '''in the global descriptions hash'''
0189 
0190     po = None
0191     try:
0192         po = polib.pofile( gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po')
0193     except OSError as e:
0194         print("**ERROR: Failed to load po file %s**" %('/poqm/'+locale+'gcompris_qt.po'))
0195         print('')
0196 
0197     app = QCoreApplication(sys.argv)
0198     engine = QQmlEngine()
0199     component = QQmlComponent(engine)
0200 
0201     for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'):
0202         component.loadUrl(QUrl(qml))
0203         board = component.create()
0204         levels = board.property('levels')
0205         for level in levels.toVariant():
0206             if 'soundFile' in level and 'toolTipText' in level:
0207                 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg')
0208                 tooltip = level['toolTipText']
0209                 if po:
0210                     tooltip = po.find(tooltip).msgstr if po.find(tooltip) else tooltip
0211                 descriptions[sound] = tooltip
0212 
0213 
0214 def get_locales_from_config():
0215     '''Return a set for locales as found in GCompris src/core/LanguageList.qml'''
0216 
0217     locales = set()
0218 
0219     source = gcompris_qt + "/src/core/LanguageList.qml"
0220     try:
0221         with open(source) as f:
0222             content = f.readlines()
0223             for line in content:
0224                 m = re.match('.*\"locale\":.*\"(.*)\"', line)
0225                 if m:
0226                     locale = m.group(1).split('.')[0]
0227                     if locale != 'system' and locale != 'en_US':
0228                         locales.add(locale)
0229     except IOError as e:
0230         print("ERROR: Failed to parse %s: %s" %(source, e.strerror))
0231 
0232     return locales
0233 
0234 
0235 def get_locales_from_po_files():
0236     '''Return a set for locales for which we have a po file '''
0237 
0238     locales = set()
0239 
0240     locales_dir = gcompris_qt + "/poqm"
0241     for locale in os.listdir(locales_dir):
0242         locales.add(locale)
0243 
0244     return locales
0245 
0246 def get_translation_status_from_po_files():
0247     '''Return the translation status from the po file '''
0248     '''For each locale as key we provide a list: '''
0249     ''' [ translated_entries, untranslated_entries, fuzzy_entries, percent ]'''
0250 
0251     # en locale has no translation file but mark it 100% done
0252     locales = {'en': [0, 0, 0, 1]}
0253 
0254     descriptions['en'] = 'US English'
0255 
0256     locales_dir = gcompris_qt + "/poqm"
0257     for locale in os.listdir(locales_dir):
0258         po = polib.pofile(locales_dir + '/' + locale + '/gcompris_qt.po')
0259         # Calc a global translation percent
0260         percent = 1 - \
0261             (float((len(po.untranslated_entries()) +
0262                     len(po.fuzzy_entries()))) /
0263              (len(po.translated_entries()) +
0264               len(po.untranslated_entries()) +
0265               len(po.fuzzy_entries())))
0266         locales[locale] = \
0267             [ len(po.translated_entries()),
0268               len(po.untranslated_entries()),
0269               len(po.fuzzy_entries()),
0270               percent ]
0271 
0272         # Save the translation team in the global descriptions
0273         if 'Language-Team' in po.metadata:
0274             team = po.metadata['Language-Team']
0275             team = re.sub(r' <.*>', '', team)
0276             descriptions[locale] = team
0277         else:
0278             descriptions[locale] = ''
0279 
0280     return locales
0281 
0282 def get_words_from_code():
0283     '''Return a set for words as found in GCompris lang/resource/content-<locale>.json'''
0284     try:
0285         with open(gcompris_qt + '/src/activities/lang/resource/content-' + locale + '.json') as data_file:
0286             data = json.load(data_file)
0287     except:
0288         print('')
0289         print("**ERROR: missing resource file %s**" %('/src/activities/lang/resource/content-' + locale + '.json'))
0290         print('[Instructions to create this file](%s)' %('https://gcompris.net/wiki/Voice_translation_Qt#Lang_word_list'))
0291         print('')
0292         return set()
0293 
0294     # Consolidate letters
0295     words = set()
0296     for word in data.keys():
0297         # Skip alphabet letter, they are already handled by the alphabet set
0298         if word[0] == 'U' or word[0] == '1':
0299             continue
0300         words.add(word)
0301         descriptions[word] = u'[{:s}](https://gcompris.net/incoming/lang/words.html#{:s})'.format(data[word], word.replace('.ogg', ''))
0302 
0303     return words
0304 
0305 def get_wordsgame_from_code():
0306     '''Return nothing but tells if the required GCompris wordsgame/resource/default-<locale>.json is there'''
0307 
0308     if not os.path.isfile(gcompris_qt + '/src/activities/wordsgame/resource/default-' + locale + '.json'):
0309         print('')
0310         print("**ERROR: missing resource file %s**" %('/src/activities/wordsgame/resource/default-' + locale + '.json'))
0311         print('[Instructions to create this file](%s)' %('https://gcompris.net/wiki/Word_Lists_Qt#Wordsgame_.28Typing_words.29'))
0312 
0313         return set()
0314 
0315     # We don't really have voices needs here, just check the file exists
0316     return set()
0317 
0318 def get_click_on_letter_from_code():
0319     '''Return nothing but tells if the required GCompris click_on_letter/resource/levels-<locale>.json is there'''
0320 
0321     if not os.path.isfile(gcompris_qt + '/src/activities/click_on_letter/resource/levels-' + locale + '.json'):
0322         print('')
0323         print("**ERROR: missing resource file %s**" %('/src/activities/click_on_letter/resource/levels-' + locale + '.json'))
0324         print('[Instructions to create this file TBD](%s)' %('TBD'))
0325 
0326         return set()
0327 
0328     # We don't really have voices needs here, just check the file exists
0329     return set()
0330 
0331 def get_geography_on_letter_from_code():
0332     '''Return all the countries in geography/resource/board/board-x.json'''
0333     words = set()
0334     
0335     app = QCoreApplication(sys.argv)
0336     engine = QQmlEngine()
0337     component = QQmlComponent(engine)
0338     for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'):
0339         component.loadUrl(QUrl(qml))
0340         board = component.create()
0341         levels = board.property('levels')
0342         for level in levels.toVariant():
0343             if 'soundFile' in level and (not 'type' in level or level['type'] != "SHAPE_BACKGROUND"):
0344                 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg')
0345                 words.add(sound)
0346     return words
0347 
0348 def get_files(locale, voiceset):
0349     to_remove = set(['README'])
0350     try:
0351         return set(os.listdir(locale + '/' + voiceset)) - to_remove
0352     except:
0353         return set()
0354 
0355 def get_locales_from_file():
0356     locales = set()
0357     for file in os.listdir('.'):
0358         if os.path.isdir(file) \
0359            and not os.path.islink(file) \
0360            and file[0] != '.':
0361             locales.add(file)
0362 
0363     return locales
0364 
0365 def get_gletter_alphabet():
0366     try:
0367         with open(gcompris_qt + '/src/activities/gletters/resource/default-' + locale + '.json') as data_file:
0368             data = json.load(data_file)
0369     except:
0370         print('')
0371         print("**ERROR: Missing resource file %s**" %('/src/activities/gletters/resource/default-' + locale + '.json'))
0372         print('[Instructions to create this file](%s)' %('https://gcompris.net/wiki/Word_Lists_Qt#Simple_Letters_.28Typing_letters.29_level_design'))
0373         print('')
0374         return set()
0375 
0376     # Consolidate letters
0377     letters = set()
0378     for level in data['levels']:
0379         for w in level['words']:
0380             multiletters = ""
0381             for one_char in w.lower():
0382                 multiletters += 'U{:04X}'.format(ord(one_char))
0383             letters.add(multiletters + '.ogg')
0384             descriptions[multiletters + '.ogg'] = w.lower()
0385 
0386     # Add numbers needed for words
0387     for i in range(10, 21):
0388         letters.add(str(i) + '.ogg')
0389 
0390     return letters
0391 
0392 def diff_set(title, code, files):
0393     '''Returns a stat from 0 to 1 for this report set'''
0394 
0395     if not code and not files:
0396         return 0
0397 
0398     title2(title)
0399 
0400     if verbose and code & files:
0401         title3("These files are correct")
0402         print('| File | Description |')
0403         print('|------|-------------|')
0404         sorted = list(code & files)
0405         sorted.sort()
0406         for f in sorted:
0407             if f in descriptions:
0408                 print(u'| %s | %s |' %(f, descriptions[f]))
0409             else:
0410                 print('|%s |  |' %(f))
0411         print('')
0412 
0413     if code - files:
0414         title3("These files are missing")
0415         print('| File | Description |')
0416         print('|------|-------------|')
0417         sorted = list(code - files)
0418         sorted.sort()
0419         for f in sorted:
0420             if f in descriptions:
0421                 print(u'| %s | %s |' % (f, descriptions[f]))
0422             else:
0423                 print('|%s |  |' % (f))
0424         print('')
0425 
0426     if notneeded and files - code:
0427         title3("These files are not needed")
0428         print('| File | Description |')
0429         print('|------|-------------|')
0430         sorted = list(files - code)
0431         sorted.sort()
0432         for f in sorted:
0433             if f in descriptions:
0434                 print(u'|%s | %s|' %(f, descriptions[f]))
0435             else:
0436                 print('|%s |  |' %(f))
0437         print('')
0438 
0439     return 1 - float(len(code - files)) / len(code | files)
0440 
0441 def diff_locale_set(title, code, files):
0442 
0443     if not code and not files:
0444         return
0445 
0446     title2(title)
0447     if verbose:
0448         title3("We have voices for these locales:")
0449         missing = []
0450         for locale in code:
0451             if os.path.isdir(locale):
0452                 print('* ' + locale)
0453             else:
0454                 # Shorten the locale and test again
0455                 shorten = locale.split('_')
0456                 if os.path.isdir(shorten[0]):
0457                     print('* ' + locale)
0458                 else:
0459                     missing.append(locale)
0460     print('')
0461     print("We miss voices for these locales:")
0462     for f in missing:
0463         print('* ' + f)
0464     print('')
0465 
0466 def check_locale_config(title, stats, locale_config):
0467     '''Display and return locales that are translated above a fixed threshold'''
0468     title2(title)
0469     LIMIT = 0.8
0470     sorted_config = list(locale_config)
0471     sorted_config.sort()
0472     good_locale = []
0473     for locale in sorted_config:
0474         if locale in stats:
0475             if stats[locale][3] < LIMIT:
0476                 print(u'* {:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale))
0477             else:
0478                 good_locale.append(descriptions[locale] if locale in descriptions else '')
0479         else:
0480             # Shorten the locale and test again
0481             shorten = locale.split('_')[0]
0482             if shorten in stats:
0483                 if stats[shorten][3] < LIMIT:
0484                     print(u'* {:s} ({:s})'.format((descriptions[shorten] if shorten in descriptions else ''), shorten))
0485                 else:
0486                     good_locale.append(descriptions[shorten] if shorten in descriptions else '')
0487             else:
0488                 print("* %s no translation at all" % (locale))
0489 
0490     print('')
0491     good_locale.sort()
0492     print('There are %d locales above %d%% translation: %s' %(len(good_locale), LIMIT * 100,
0493                                                               ', '.join(good_locale)))
0494 
0495     return good_locale
0496 
0497 #
0498 # main
0499 # ===
0500 
0501 reports = {}
0502 sys.stdout = reports['stats'] = StringIO()
0503 
0504 string_stats = get_translation_status_from_po_files()
0505 check_locale_config("Locales to remove from LanguageList.qml (translation level < 80%)",
0506                     string_stats, get_locales_from_config())
0507 
0508 print('\n[Guide to contribute recording files](%s)' %('https://gcompris.net/wiki/Voice_translation_Qt'))
0509 
0510 # Calc the big list of locales we have to check
0511 all_locales = get_locales_from_po_files() | get_locales_from_file()
0512 all_locales = list(all_locales)
0513 all_locales.sort()
0514 
0515 stats = {}
0516 global_descriptions = copy.deepcopy(descriptions)
0517 
0518 for locale in all_locales:
0519     sys.stdout = reports[locale] = StringIO()
0520 
0521     descriptions = copy.deepcopy(global_descriptions)
0522     init_intro_description_from_code(locale)
0523     init_country_names_from_code(locale)
0524 
0525     title1(u'{:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale))
0526 
0527     lstats = {'locale': locale}
0528     lstats['intro'] = diff_set("Intro ({:s}/intro/)".format(locale), get_intro_from_code(), get_files(locale, 'intro'))
0529     lstats['letter'] = diff_set("Letters ({:s}/alphabet/)".format(locale), get_gletter_alphabet(), get_files(locale, 'alphabet'))
0530 
0531     descriptions['click_on_letter.ogg'] = "Must contains the voice: 'Click on the letter:'"
0532     lstats['misc'] = diff_set("Misc ({:s}/misc/)".format(locale), get_files('en', 'misc'), get_files(locale, 'misc'))
0533 
0534     lstats['color'] = diff_set("Colors ({:s}/colors/)".format(locale), get_files('en', 'colors'), get_files(locale, 'colors'))
0535     lstats['geography'] = diff_set("Geography ({:s}/geography/)".format(locale), get_geography_on_letter_from_code(), get_files(locale, 'geography'))
0536     lstats['words'] = diff_set("Words ({:s}/words/)".format(locale), get_words_from_code(), get_files(locale, 'words'))
0537     lstats['wordsgame'] = diff_set("Wordsgame", get_wordsgame_from_code(), set())
0538     lstats['click_on_letter'] = diff_set("Click on letter", get_click_on_letter_from_code(), set())
0539     stats[locale] = lstats
0540 
0541 sys.stdout = reports['summary'] = StringIO()
0542 sorted_keys = sorted(stats)
0543 
0544 title1("GCompris Voice Recording Status Summary")
0545 print('| Locale | Strings | Misc | Letters | Colors | Geography | Words | Intro|')
0546 print('|--------|---------|------|---------|--------|-----------|-------|------|')
0547 for locale in sorted_keys:
0548     stat = stats[locale]
0549     print(u'| [{:s} ({:s})](voice_status_{:s}.html) | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |' \
0550         .format((descriptions[locale] if locale in descriptions else ''), stat['locale'], locale,
0551                 string_stats[locale][3] if locale in string_stats else 0,
0552                 stat['misc'], stat['letter'], stat['color'], stat['geography'],
0553                 stat['words'], stat['intro']))
0554 
0555 #
0556 # Now we have all the reports
0557 #
0558 
0559 extensions=['markdown.extensions.tables']
0560 
0561 sys.stdout = ref_stdout
0562 
0563 with codecs.open("index.html", "w",
0564                  encoding="utf-8",
0565                  errors="xmlcharrefreplace"
0566              ) as f:
0567     f.write(get_html_header())
0568 
0569     summary = markdown.markdown(reports['summary'].getvalue(), extensions=extensions)
0570     summary2 = ""
0571     for line in summary.split('\n'):
0572         m = re.match('<td>(\d\.\d\d)</td>', line)
0573         if m:
0574             rate = m.group(1)
0575             summary2 += get_html_progress_bar(rate)
0576         else:
0577             summary2 += line
0578 
0579         summary2 += '\n'
0580 
0581     f.write(summary2 + '\n')
0582 
0583     f.write(markdown.markdown(reports['stats'].getvalue(), extensions=extensions))
0584     f.write(get_html_footer())
0585 
0586 for locale in sorted_keys:
0587     with codecs.open("voice_status_{:s}.html".format(locale), "w",
0588                      encoding="utf-8",
0589                      errors="xmlcharrefreplace"
0590                  ) as f:
0591         f.write(get_html_header())
0592         f.write(markdown.markdown(reports[locale].getvalue(), extensions=extensions))
0593         f.write(get_html_footer())