File indexing completed on 2023-05-30 10:40:45

0001 #!/usr/bin/python
0002 #
0003 # GCompris - check_voices.py
0004 #
0005 # Copyright (C) 2015 Bruno Coudoin <bruno.coudoin@gcompris.net>
0006 #
0007 #   This program is free software; you can redistribute it and/or modify
0008 #   it under the terms of the GNU General Public License as published by
0009 #   the Free Software Foundation; either version 3 of the License, or
0010 #   (at your option) any later version.
0011 #
0012 #   This program is distributed in the hope that it will be useful,
0013 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
0014 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0015 #   GNU General Public License for more details.
0016 #
0017 #   You should have received a copy of the GNU General Public License
0018 #   along with this program; if not, see <https://www.gnu.org/licenses/>.
0019 #
0020 #
0021 # The output is in markdown. A web page can be generated with:
0022 # ./check_voices.py ../gcompris-kde
0023 #
0024 # (Requires python-markdown to be installed)
0025 #
0026 import os
0027 import sys
0028 import re
0029 import copy
0030 import json
0031 import codecs
0032 from io import StringIO
0033 from datetime import date
0034 import glob
0035 
0036 import markdown
0037 import polib
0038 from PyQt5.QtCore import QCoreApplication, QUrl
0039 from PyQt5.QtQml import QQmlComponent, QQmlEngine
0040 
0041 if len(sys.argv) < 2:
0042     print("Usage: check_voices.py path_to_gcompris [-v] [-nn]")
0043     print("  -v:  verbose, show also files that are fine")
0044     print("  -nn: not needed, show extra file in the voice directory")
0045     sys.exit(1)
0046 
0047 verbose = '-v' in sys.argv
0048 notneeded = '-nn' in sys.argv
0049 gcompris_qt = sys.argv[1]
0050 
0051 # Force output as UTF-8
0052 ref_stdout = sys.stdout
0053 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
0054 
0055 # A global hash to hold a description on a key file like the UTF-8 char of
0056 # the file.
0057 descriptions = {}
0058 
0059 def get_html_header():
0060     return """<!DOCTYPE html>
0061 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
0062 <head>
0063   <meta charset="utf-8"/>
0064   <title>GCompris Voice Recording Status</title>
0065 </head>
0066 <body>
0067 """
0068 
0069 def get_html_footer():
0070     today = date.today()
0071     return """
0072 <hr>
0073 <p>Page generated the {:s}</p>
0074 </body>
0075 """.format(today.isoformat())
0076 
0077 def get_html_progress_bar(ratio):
0078     return '<td width=200 height=30pt>' + \
0079         '<div style="border: 2px solid silver;background-color:#c00"><div style="background-color:#0c0;height:15px;width:{:d}%"></div></div>'.format(int(float(ratio) * 100))
0080 
0081 # '<hr style="color:#0c0;background-color:#0c0;height:15px; border:none;margin:0;" align="left" width={:d}% /></td>'.format(int(float(ratio) * 100))
0082 
0083 def title1(title):
0084     print(title)
0085     print('=' * len(title))
0086     print('')
0087 
0088 def title2(title):
0089     print(title)
0090     print('-' * len(title))
0091     print('')
0092 
0093 def title3(title):
0094     print('### ' + title)
0095     print('')
0096 
0097 def get_intro_from_code():
0098     '''Return a set for activities as found in GCompris ActivityInfo.qml'''
0099 
0100     activity_info = set()
0101 
0102     activity_dir = gcompris_qt + "/src/activities"
0103     for activity in os.listdir(activity_dir):
0104         # Skip unrelevant activities
0105         if activity == 'template' or \
0106            activity == 'menu' or \
0107            not os.path.isdir(activity_dir + "/" + activity):
0108             continue
0109         activity_info.add(activity + '.ogg')
0110     return activity_info
0111 
0112 def init_intro_description_from_code(locale, gcompris_po):
0113     '''Init the intro description as found in GCompris ActivityInfo.qml'''
0114     '''in the global descriptions hash'''
0115 
0116     voices_po = None
0117     try:
0118         voices_po = polib.pofile(gcompris_qt + '/po/'+locale+'/gcompris_voices.po', encoding='utf-8')
0119     except OSError:
0120         print("**ERROR: Failed to load po file %s**" % ('/po/'+locale+'/gcompris_voices.po'))
0121         print('')
0122 
0123     activity_dir = gcompris_qt + "/src/activities"
0124     for activity in os.listdir(activity_dir):
0125         # Skip unrelevant activities
0126         if activity == 'template' or \
0127            activity == 'menu' or \
0128            not os.path.isdir(activity_dir + "/" + activity):
0129             continue
0130 
0131         descriptions[activity + '.ogg'] = ''
0132         try:
0133             with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f:
0134                 content = f.readlines()
0135 
0136                 for line in content:
0137                     m = re.match('.*title:.*\"(.*)\"', line)
0138                     if m:
0139                         title = m.group(1)
0140                         if gcompris_po:
0141                             title_po = gcompris_po.find(title)
0142                             title = title_po.msgstr if title_po else title
0143                         descriptions[activity + '.ogg'] += ' title: ' + title
0144 
0145                     m = re.match('.*description:.*\"(.*)\"', line)
0146                     if m:
0147                         description = m.group(1)
0148                         if gcompris_po:
0149                             description_po = gcompris_po.find(description)
0150                             description = description_po.msgstr if description_po else description
0151                         descriptions[activity + '.ogg'] += ' description: ' + title
0152 
0153                     m = re.match('.*intro:.*\"(.*)\"', line)
0154                     if m:
0155                         voiceText = m.group(1)
0156                         if voices_po:
0157                             voice_text_po = voices_po.find(voiceText)
0158                             voiceText = voice_text_po.msgstr if voice_text_po and voice_text_po.msgstr != "" else voiceText
0159                         descriptions[activity + '.ogg'] += ' voice: ' + voiceText
0160 
0161             if not activity + '.ogg' in descriptions:
0162                 print("**ERROR: Missing intro tag in %s**" % (activity + "/ActivityInfo.qml"))
0163         except IOError:
0164             pass
0165 
0166     print('')
0167 
0168 
0169 def init_country_names_from_code(component, locale, gcompris_po):
0170     '''Init the country description as found in GCompris geography/resource/board/board*.qml'''
0171     '''in the global descriptions hash'''
0172 
0173     for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'):
0174         component.loadUrl(QUrl(qml))
0175         board = component.create()
0176         levels = board.property('levels')
0177         for level in levels.toVariant():
0178             if 'soundFile' in level and 'toolTipText' in level:
0179                 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg')
0180                 tooltip = level['toolTipText']
0181                 if gcompris_po:
0182                     tooltip_po = gcompris_po.find(tooltip)
0183                     tooltip = tooltip_po.msgstr if tooltip_po else tooltip
0184                 descriptions[sound] = tooltip
0185 
0186 
0187 def get_locales_from_config():
0188     '''Return a set for locales as found in GCompris src/core/LanguageList.qml'''
0189 
0190     locales = set()
0191 
0192     source = gcompris_qt + "/src/core/LanguageList.qml"
0193     try:
0194         with open(source, encoding='utf-8') as f:
0195             content = f.readlines()
0196             for line in content:
0197                 m = re.match('.*\"locale\":.*\"(.*)\"', line)
0198                 if m:
0199                     locale = m.group(1).split('.')[0]
0200                     if locale not in ('system', 'en_US'):
0201                         locales.add(locale)
0202     except IOError as e:
0203         print(f"ERROR: Failed to parse {source}: {e.strerror}")
0204 
0205     return locales
0206 
0207 
0208 def get_locales_from_po_files():
0209     '''Return a set for locales for which we have a po file '''
0210 
0211     locales = set()
0212 
0213     locales_dir = gcompris_qt + "/poqm"
0214     for locale in os.listdir(locales_dir):
0215         locales.add(locale)
0216 
0217     return locales
0218 
0219 def get_translation_status_from_po_files():
0220     '''Return the translation status from the po file '''
0221     '''For each locale as key we provide a list: '''
0222     ''' [ translated_entries, untranslated_entries, fuzzy_entries, percent ]'''
0223 
0224     # en locale has no translation file but mark it 100% done
0225     locales = {'en': [0, 0, 0, 1]}
0226 
0227     descriptions['en'] = 'US English'
0228 
0229     locales_dir = gcompris_qt + "/poqm"
0230     for locale in os.listdir(locales_dir):
0231         po = polib.pofile(locales_dir + '/' + locale + '/gcompris_qt.po', encoding='utf-8')
0232         # Calc a global translation percent
0233         untranslated = len(po.untranslated_entries())
0234         translated = len(po.translated_entries())
0235         fuzzy = len(po.fuzzy_entries())
0236         percent = 1 - (float((untranslated + fuzzy)) / (translated + untranslated + fuzzy))
0237         locales[locale] = [translated, untranslated, fuzzy, percent]
0238 
0239         # Save the translation team in the global descriptions
0240         if 'Language-Team' in po.metadata:
0241             team = po.metadata['Language-Team']
0242             team = re.sub(r' <.*>', '', team)
0243             descriptions[locale] = team
0244         else:
0245             descriptions[locale] = ''
0246 
0247     return locales
0248 
0249 def get_words_from_code():
0250     '''Return a set for words as found in GCompris lang/resource/content-<locale>.json'''
0251     try:
0252         with open(gcompris_qt + '/src/activities/lang/resource/content-' + locale + '.json', encoding='utf-8') as data_file:
0253             data = json.load(data_file)
0254     except IOError:
0255         print('')
0256         print("**ERROR: missing resource file %s**" % ('/src/activities/lang/resource/content-' + locale + '.json'))
0257         print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt#Lang_word_list'))
0258         print('')
0259         return set()
0260 
0261     # Consolidate letters
0262     words = set()
0263     for word in data.keys():
0264         # Skip alphabet letter, they are already handled by the alphabet set
0265         if word[0] == 'U' or word[0] == '1':
0266             continue
0267         words.add(word)
0268         descriptions[word] = '[{:s}](https://gcompris.net/incoming/lang/words.html#{:s})'.format(data[word], word.replace('.ogg', ''))
0269 
0270     return words
0271 
0272 def get_wordsgame_from_code():
0273     '''Return nothing but tells if the required GCompris wordsgame/resource/default-<locale>.json is there'''
0274 
0275     if not os.path.isfile(gcompris_qt + '/src/activities/wordsgame/resource/default-' + locale + '.json'):
0276         print('')
0277         print("**ERROR: missing resource file %s**" % ('/src/activities/wordsgame/resource/default-' + locale + '.json'))
0278         print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Word_Lists_Qt#Wordsgame_.28Typing_words.29'))
0279 
0280         return set()
0281 
0282     # We don't really have voices needs here, just check the file exists
0283     return set()
0284 
0285 def get_click_on_letter_from_code():
0286     '''Return nothing but tells if the required GCompris click_on_letter/resource/levels-<locale>.json is there'''
0287 
0288     if not os.path.isfile(gcompris_qt + '/src/activities/click_on_letter/resource/levels-' + locale + '.json'):
0289         print('')
0290         print("**ERROR: missing resource file %s**" % ('/src/activities/click_on_letter/resource/levels-' + locale + '.json'))
0291         print('[Instructions to create this file TBD](%s)' % ('TBD'))
0292 
0293         return set()
0294 
0295     # We don't really have voices needs here, just check the file exists
0296     return set()
0297 
0298 def get_geography_on_letter_from_code(component):
0299     '''Return all the countries in geography/resource/board/board-x.json'''
0300     words = set()
0301 
0302     for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'):
0303         component.loadUrl(QUrl(qml))
0304         board = component.create()
0305         levels = board.property('levels')
0306         for level in levels.toVariant():
0307             if 'soundFile' in level and ('type' not in level or level['type'] != "SHAPE_BACKGROUND"):
0308                 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg')
0309                 words.add(sound)
0310     return words
0311 
0312 def get_files(locale, voiceset):
0313     to_remove = set(['README'])
0314     try:
0315         return set(os.listdir(locale + '/' + voiceset)) - to_remove
0316     except:
0317         return set()
0318 
0319 def get_locales_from_file():
0320     locales = set()
0321     for file in os.listdir('.'):
0322         if os.path.isdir(file) \
0323            and not os.path.islink(file) \
0324            and file[0] != '.':
0325             locales.add(file)
0326 
0327     return locales
0328 
0329 def get_gletter_alphabet():
0330     try:
0331         with open(gcompris_qt + '/src/activities/gletters/resource/default-' + locale + '.json', encoding='utf-8') as data_file:
0332             data = json.load(data_file)
0333     except IOError:
0334         print('')
0335         print("**ERROR: Missing resource file %s**" % ('/src/activities/gletters/resource/default-' + locale + '.json'))
0336         print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Word_Lists_Qt#Simple_Letters_.28Typing_letters.29_level_design'))
0337         print('')
0338         return set()
0339 
0340     # Consolidate letters
0341     letters = set()
0342     for level in data['levels']:
0343         for w in level['words']:
0344             multiletters = ""
0345             for one_char in w.lower():
0346                 multiletters += 'U{:04X}'.format(ord(one_char))
0347             letters.add(multiletters + '.ogg')
0348             descriptions[multiletters + '.ogg'] = w.lower()
0349 
0350     # Add numbers needed for words
0351     for i in range(10, 21):
0352         letters.add(str(i) + '.ogg')
0353 
0354     return letters
0355 
0356 def diff_set(title, code, files):
0357     '''Returns a stat from 0 to 1 for this report set'''
0358 
0359     if not code and not files:
0360         return 0
0361 
0362     title2(title)
0363 
0364     if verbose and code & files:
0365         title3("These files are correct")
0366         print('| File | Description |')
0367         print('|------|-------------|')
0368         sorted_list = list(code & files)
0369         sorted_list.sort()
0370         for f in sorted_list:
0371             if f in descriptions:
0372                 print('| %s | %s |' % (f, descriptions[f]))
0373             else:
0374                 print('|%s |  |' % (f))
0375         print('')
0376 
0377     if code - files:
0378         title3("These files are missing")
0379         print('| File | Description |')
0380         print('|------|-------------|')
0381         sorted_list = list(code - files)
0382         sorted_list.sort()
0383         for f in sorted_list:
0384             if f in descriptions:
0385                 print('| %s | %s |' % (f, descriptions[f]))
0386             else:
0387                 print('|%s |  |' % (f))
0388         print('')
0389 
0390     if notneeded and files - code:
0391         title3("These files are not needed")
0392         print('| File | Description |')
0393         print('|------|-------------|')
0394         sorted_list = list(files - code)
0395         sorted_list.sort()
0396         for f in sorted_list:
0397             if f in descriptions:
0398                 print('|%s | %s|' % (f, descriptions[f]))
0399             else:
0400                 print('|%s |  |' % (f))
0401         print('')
0402 
0403     return 1 - float(len(code - files)) / len(code | files)
0404 
0405 def diff_locale_set(title, code, files):
0406 
0407     if not code and not files:
0408         return
0409 
0410     title2(title)
0411     if verbose:
0412         title3("We have voices for these locales:")
0413         missing = []
0414         for locale in code:
0415             if os.path.isdir(locale):
0416                 print('* ' + locale)
0417             else:
0418                 # Shorten the locale and test again
0419                 shorten = locale.split('_')
0420                 if os.path.isdir(shorten[0]):
0421                     print('* ' + locale)
0422                 else:
0423                     missing.append(locale)
0424     print('')
0425     print("We miss voices for these locales:")
0426     for f in missing:
0427         print('* ' + f)
0428     print('')
0429 
0430 def check_locale_config(title, stats, locale_config):
0431     '''Display and return locales that are translated above a fixed threshold'''
0432     title2(title)
0433     LIMIT = 0.8
0434     sorted_config = list(locale_config)
0435     sorted_config.sort()
0436     good_locale = []
0437     for locale in sorted_config:
0438         if locale in stats:
0439             if stats[locale][3] < LIMIT:
0440                 print('* {:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale))
0441             else:
0442                 good_locale.append(descriptions[locale] if locale in descriptions else '')
0443         else:
0444             # Shorten the locale and test again
0445             shorten = locale.split('_')[0]
0446             if shorten in stats:
0447                 if stats[shorten][3] < LIMIT:
0448                     print('* {:s} ({:s})'.format((descriptions[shorten] if shorten in descriptions else ''), shorten))
0449                 else:
0450                     good_locale.append(descriptions[shorten] if shorten in descriptions else '')
0451             else:
0452                 print("* %s no translation at all" % (locale))
0453 
0454     print('')
0455     good_locale.sort()
0456     print('There are %d locales above %d%% translation: %s' % (len(good_locale), LIMIT * 100,
0457                                                                ', '.join(good_locale)))
0458 
0459     return good_locale
0460 
0461 #
0462 # main
0463 # ===
0464 
0465 reports = {}
0466 sys.stdout = reports['stats'] = StringIO()
0467 
0468 string_stats = get_translation_status_from_po_files()
0469 check_locale_config("Locales to remove from LanguageList.qml (translation level < 80%)",
0470                     string_stats, get_locales_from_config())
0471 
0472 print('\n[Guide to contribute recording files](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt'))
0473 
0474 # Calc the big list of locales we have to check
0475 all_locales = get_locales_from_po_files() | get_locales_from_file()
0476 all_locales = list(all_locales)
0477 all_locales.sort()
0478 
0479 stats = {}
0480 global_descriptions = copy.deepcopy(descriptions)
0481 
0482 app = QCoreApplication(sys.argv)
0483 engine = QQmlEngine()
0484 component = QQmlComponent(engine)
0485 
0486 for locale in all_locales:
0487     sys.stdout = reports[locale] = StringIO()
0488 
0489     descriptions = copy.deepcopy(global_descriptions)
0490     gcompris_po = None
0491     try:
0492         gcompris_po = polib.pofile(gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po', encoding='utf-8')
0493     except OSError:
0494         if gcompris_po is None:
0495             print("**ERROR: Failed to load po file %s**" % ('/poqm/'+locale+'gcompris_qt.po'))
0496             print('')
0497 
0498     init_intro_description_from_code(locale, gcompris_po)
0499     init_country_names_from_code(component, locale, gcompris_po)
0500 
0501     title1('{:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale))
0502 
0503     lstats = {'locale': locale}
0504     lstats['intro'] = diff_set("Intro ({:s}/intro/)".format(locale), get_intro_from_code(), get_files(locale, 'intro'))
0505     lstats['letter'] = diff_set("Letters ({:s}/alphabet/)".format(locale), get_gletter_alphabet(), get_files(locale, 'alphabet'))
0506 
0507     descriptions['click_on_letter.ogg'] = "Must contains the voice: 'Click on the letter:'"
0508     lstats['misc'] = diff_set("Misc ({:s}/misc/)".format(locale), get_files('en', 'misc'), get_files(locale, 'misc'))
0509 
0510     lstats['color'] = diff_set("Colors ({:s}/colors/)".format(locale), get_files('en', 'colors'), get_files(locale, 'colors'))
0511     lstats['geography'] = diff_set("Geography ({:s}/geography/)".format(locale), get_geography_on_letter_from_code(component), get_files(locale, 'geography'))
0512     lstats['words'] = diff_set("Words ({:s}/words/)".format(locale), get_words_from_code(), get_files(locale, 'words'))
0513     lstats['wordsgame'] = diff_set("Wordsgame", get_wordsgame_from_code(), set())
0514     lstats['click_on_letter'] = diff_set("Click on letter", get_click_on_letter_from_code(), set())
0515     stats[locale] = lstats
0516 
0517 sys.stdout = reports['summary'] = StringIO()
0518 sorted_keys = sorted(stats)
0519 
0520 title1("GCompris Voice Recording Status Summary")
0521 print('| Locale | Strings | Misc | Letters | Colors | Geography | Words | Intro|')
0522 print('|--------|---------|------|---------|--------|-----------|-------|------|')
0523 for locale in sorted_keys:
0524     stat = stats[locale]
0525     print('| [{:s} ({:s})](voice_status_{:s}.html) | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |'
0526           .format((descriptions[locale] if locale in descriptions else ''), stat['locale'], locale,
0527                   string_stats[locale][3] if locale in string_stats else 0,
0528                   stat['misc'], stat['letter'], stat['color'], stat['geography'],
0529                   stat['words'], stat['intro']))
0530 
0531 #
0532 # Now we have all the reports
0533 #
0534 
0535 extensions = ['markdown.extensions.tables']
0536 
0537 sys.stdout = ref_stdout
0538 
0539 with codecs.open("index.html", "w",
0540                  encoding="utf-8",
0541                  errors="xmlcharrefreplace"
0542                  ) as f:
0543     f.write(get_html_header())
0544 
0545     summary = markdown.markdown(reports['summary'].getvalue(), extensions=extensions)
0546     summary2 = ""
0547     for line in summary.split('\n'):
0548         m = re.match(r'<td>(\d\.\d\d)</td>', line)
0549         if m:
0550             rate = m.group(1)
0551             summary2 += get_html_progress_bar(rate)
0552         else:
0553             summary2 += line
0554 
0555         summary2 += '\n'
0556 
0557     f.write(summary2 + '\n')
0558 
0559     f.write(markdown.markdown(reports['stats'].getvalue(), extensions=extensions))
0560     f.write(get_html_footer())
0561 
0562 for locale in sorted_keys:
0563     with codecs.open("voice_status_{:s}.html".format(locale), "w",
0564                      encoding="utf-8",
0565                      errors="xmlcharrefreplace"
0566                      ) as f:
0567         f.write(get_html_header())
0568         f.write(markdown.markdown(reports[locale].getvalue(), extensions=extensions))
0569         f.write(get_html_footer())