File indexing completed on 2024-04-21 14:43:10

0001 #!/usr/bin/python
0002 #
0003 # GCompris - check_voices.py
0004 #
0005 # Copyright (C) 2015 Bruno Coudoin <bruno.coudoin@gcompris.net>
0006 #
0007 #   This program is free software; you can redistribute it and/or modify
0008 #   it under the terms of the GNU General Public License as published by
0009 #   the Free Software Foundation; either version 3 of the License, or
0010 #   (at your option) any later version.
0011 #
0012 #   This program is distributed in the hope that it will be useful,
0013 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
0014 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0015 #   GNU General Public License for more details.
0016 #
0017 #   You should have received a copy of the GNU General Public License
0018 #   along with this program; if not, see <https://www.gnu.org/licenses/>.
0019 #
0020 #
0021 # The output is in markdown. A web page can be generated with:
0022 # ./check_voices.py ../gcompris-kde
0023 #
0024 # (Requires python-markdown to be installed)
0025 #
0026 import os
0027 import sys
0028 import re
0029 import copy
0030 import json
0031 import codecs
0032 from io import StringIO
0033 from datetime import date
0034 import glob
0035 
0036 import markdown
0037 import polib
0038 from PyQt5.QtCore import QCoreApplication, QUrl
0039 from PyQt5.QtQml import QQmlComponent, QQmlEngine
0040 
0041 if len(sys.argv) < 2:
0042     print("Usage: check_voices.py path_to_gcompris [-v] [-nn]")
0043     print("  -v:  verbose, show also files that are fine")
0044     print("  -nn: not needed, show extra file in the voice directory")
0045     sys.exit(1)
0046 
0047 verbose = '-v' in sys.argv
0048 notneeded = '-nn' in sys.argv
0049 gcompris_qt = sys.argv[1]
0050 
0051 # Force output as UTF-8
0052 ref_stdout = sys.stdout
0053 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
0054 
0055 # A global hash to hold a description on a key file like the UTF-8 char of
0056 # the file.
0057 descriptions = {}
0058 
0059 def get_html_header():
0060     return """<!DOCTYPE html>
0061 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
0062 <head>
0063   <meta charset="utf-8"/>
0064   <title>GCompris Voice Recording Status</title>
0065 </head>
0066 <body>
0067 """
0068 
0069 def get_html_footer():
0070     today = date.today()
0071     return """
0072 <hr>
0073 <p>Page generated the {:s}</p>
0074 </body>
0075 """.format(today.isoformat())
0076 
0077 def get_html_progress_bar(ratio):
0078     return '<td width=200 height=30pt>' + \
0079         '<div style="border: 2px solid silver;background-color:#c00"><div style="background-color:#0c0;height:15px;width:{:d}%"></div></div>'.format(int(float(ratio) * 100))
0080 
0081 # '<hr style="color:#0c0;background-color:#0c0;height:15px; border:none;margin:0;" align="left" width={:d}% /></td>'.format(int(float(ratio) * 100))
0082 
0083 def title1(title):
0084     print(title)
0085     print('=' * len(title))
0086     print('')
0087 
0088 def title2(title):
0089     print(title)
0090     print('-' * len(title))
0091     print('')
0092 
0093 def title3(title):
0094     print('### ' + title)
0095     print('')
0096 
0097 def get_intro_from_code():
0098     '''Return a set for activities as found in GCompris ActivityInfo.qml'''
0099 
0100     activity_info = set()
0101 
0102     activity_dir = gcompris_qt + "/src/activities"
0103     for activity in os.listdir(activity_dir):
0104         # Skip unrelevant activities
0105         if activity == 'template' or \
0106            activity == 'menu' or \
0107            not os.path.isdir(activity_dir + "/" + activity):
0108             continue
0109         activity_info.add(activity + '.ogg')
0110     return activity_info
0111 
0112 def init_intro_description_from_code(locale, gcompris_po):
0113     '''Init the intro description as found in GCompris ActivityInfo.qml'''
0114     '''in the global descriptions hash'''
0115 
0116     voices_po = None
0117     try:
0118         voices_po = polib.pofile(gcompris_qt + '/po/'+locale+'/gcompris_voices.po', encoding='utf-8')
0119     except OSError:
0120         print("**ERROR: Failed to load po file %s**" % ('/po/'+locale+'/gcompris_voices.po'))
0121         print('')
0122 
0123     activity_dir = gcompris_qt + "/src/activities"
0124     for activity in os.listdir(activity_dir):
0125         # Skip unrelevant activities
0126         if activity == 'template' or \
0127            activity == 'menu' or \
0128            not os.path.isdir(activity_dir + "/" + activity):
0129             continue
0130 
0131         descriptions[activity + '.ogg'] = ''
0132         try:
0133             with open(activity_dir + "/" + activity + "/ActivityInfo.qml") as f:
0134                 content = f.readlines()
0135 
0136                 for line in content:
0137                     m = re.match('.*title:.*\"(.*)\"', line)
0138                     if m:
0139                         title = m.group(1)
0140                         if gcompris_po:
0141                             title_po = gcompris_po.find(title)
0142                             title = title_po.msgstr if title_po else title
0143                         descriptions[activity + '.ogg'] += ' title: ' + title
0144 
0145                     m = re.match('.*description:.*\"(.*)\"', line)
0146                     if m:
0147                         description = m.group(1)
0148                         if gcompris_po:
0149                             description_po = gcompris_po.find(description)
0150                             description = description_po.msgstr if description_po else description
0151                         descriptions[activity + '.ogg'] += ' description: ' + title
0152 
0153                     m = re.match('.*intro:.*\"(.*)\"', line)
0154                     if m:
0155                         voiceText = m.group(1)
0156                         if voices_po:
0157                             voice_text_po = voices_po.find(voiceText)
0158                             voiceText = voice_text_po.msgstr if voice_text_po and voice_text_po.msgstr != "" else voiceText
0159                         descriptions[activity + '.ogg'] += ' voice: ' + voiceText
0160 
0161             if not activity + '.ogg' in descriptions:
0162                 print("**ERROR: Missing intro tag in %s**" % (activity + "/ActivityInfo.qml"))
0163         except IOError:
0164             pass
0165 
0166     print('')
0167 
0168 
0169 def init_country_names_from_code(component, locale, gcompris_po):
0170     '''Init the country description as found in GCompris geography/resource/board/board*.qml'''
0171     '''in the global descriptions hash'''
0172 
0173     for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'):
0174         component.loadUrl(QUrl(qml))
0175         board = component.create()
0176         levels = board.property('levels')
0177         for level in levels.toVariant():
0178             if 'soundFile' in level and 'toolTipText' in level:
0179                 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg')
0180                 tooltip = level['toolTipText']
0181                 if gcompris_po:
0182                     tooltip_po = gcompris_po.find(tooltip)
0183                     tooltip = tooltip_po.msgstr if tooltip_po else tooltip
0184                 descriptions[sound] = tooltip
0185 
0186 
0187 def get_locales_from_config():
0188     '''Return a set for locales as found in GCompris src/core/LanguageList.qml'''
0189 
0190     locales = set()
0191 
0192     source = gcompris_qt + "/src/core/LanguageList.qml"
0193     try:
0194         with open(source, encoding='utf-8') as f:
0195             content = f.readlines()
0196             for line in content:
0197                 m = re.match('.*\"locale\":.*\"(.*)\"', line)
0198                 if m:
0199                     locale = m.group(1).split('.')[0]
0200                     if locale not in ('system', 'en_US'):
0201                         locales.add(locale)
0202     except IOError as e:
0203         print(f"ERROR: Failed to parse {source}: {e.strerror}")
0204 
0205     return locales
0206 
0207 
0208 def get_locales_from_po_files():
0209     '''Return a set for locales for which we have a po file '''
0210 
0211     locales = set()
0212 
0213     locales_dir = gcompris_qt + "/poqm"
0214     for locale in os.listdir(locales_dir):
0215         locales.add(locale)
0216 
0217     return locales
0218 
0219 def get_translation_status_from_po_files():
0220     '''Return the translation status from the po file '''
0221     '''For each locale as key we provide a list: '''
0222     ''' [ translated_entries, untranslated_entries, fuzzy_entries, percent ]'''
0223 
0224     # en locale has no translation file but mark it 100% done
0225     locales = {'en': [0, 0, 0, 1]}
0226 
0227     descriptions['en'] = 'US English'
0228 
0229     locales_dir = gcompris_qt + "/poqm"
0230     for locale in os.listdir(locales_dir):
0231         po = polib.pofile(locales_dir + '/' + locale + '/gcompris_qt.po', encoding='utf-8')
0232         # Calc a global translation percent
0233         untranslated = len(po.untranslated_entries())
0234         translated = len(po.translated_entries())
0235         fuzzy = len(po.fuzzy_entries())
0236         percent = 1 - (float((untranslated + fuzzy)) / (translated + untranslated + fuzzy))
0237         locales[locale] = [translated, untranslated, fuzzy, percent]
0238 
0239         # Save the translation team in the global descriptions
0240         if 'Language-Team' in po.metadata:
0241             team = po.metadata['Language-Team']
0242             team = re.sub(r' <.*>', '', team)
0243             descriptions[locale] = team
0244         else:
0245             descriptions[locale] = ''
0246 
0247     return locales
0248 
0249 def get_words_from_code():
0250     '''Return a set for words as found in GCompris lang/resource/content-<locale>.json'''
0251     try:
0252         with open(gcompris_qt + '/src/activities/lang/resource/content-' + locale + '.json', encoding='utf-8') as data_file:
0253             data = json.load(data_file)
0254     except IOError:
0255         print('')
0256         print("**ERROR: missing resource file %s**" % ('/src/activities/lang/resource/content-' + locale + '.json'))
0257         print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt#Lang_word_list'))
0258         print('')
0259         return set()
0260 
0261     # Consolidate letters
0262     words = set()
0263     for word in data.keys():
0264         # Skip alphabet letter, they are already handled by the alphabet set
0265         if word[0] == 'U' or word[0] == '1':
0266             continue
0267         words.add(word)
0268         descriptions[word] = '[{:s}](https://gcompris.net/incoming/lang/words.html#{:s})'.format(data[word], word.replace('.ogg', ''))
0269 
0270     return words
0271 
0272 
0273 def check_file_existence(filename, instructions):
0274     if not os.path.isfile(gcompris_qt + filename):
0275         print('')
0276         print("**ERROR: missing resource file %s**" % filename)
0277         print('[Instructions to create this file](%s)' % instructions)
0278 
0279     # We don't really have voices needs here, just check the file exists
0280     return set()
0281 
0282 
0283 def get_grammar_analysis_from_code():
0284     '''Return nothing but tells if the required GCompris grammar_analysis/resource/grammar_analysis-<locale>.json is there'''
0285     return check_file_existence('/src/activities/grammar_analysis/resource/grammar_analysis-' + locale + '.json', 'https://gcompris.net/wiki/How_to_translate#Dataset_to_translate')
0286 
0287 
0288 def get_grammar_classes_from_code():
0289     '''Return nothing but tells if the required GCompris grammar_classes/resource/grammar_classes-<locale>.json is there'''
0290     return check_file_existence('/src/activities/grammar_classes/resource/grammar_classes-' + locale + '.json', 'https://gcompris.net/wiki/How_to_translate#Dataset_to_translate')
0291 
0292 
0293 def get_wordsgame_from_code():
0294     '''Return nothing but tells if the required GCompris wordsgame/resource/default-<locale>.json is there'''
0295     return check_file_existence('/src/activities/wordsgame/resource/default-' + locale + '.json', 'https://gcompris.net/wiki/Word_Lists_Qt#Wordsgame_.28Typing_words.29')
0296 
0297 
0298 def get_click_on_letter_from_code():
0299     '''Return nothing but tells if the required GCompris click_on_letter/resource/levels-<locale>.json is there'''
0300     return check_file_existence('/src/activities/click_on_letter/resource/levels-' + locale + '.json', 'https://gcompris.net/wiki/How_to_translate#Dataset_to_translate')
0301 
0302 
0303 def get_geography_on_letter_from_code(component):
0304     '''Return all the countries in geography/resource/board/board-x.json'''
0305     words = set()
0306 
0307     for qml in glob.glob(gcompris_qt + '/src/activities/geography/resource/board/*.qml'):
0308         component.loadUrl(QUrl(qml))
0309         board = component.create()
0310         levels = board.property('levels')
0311         for level in levels.toVariant():
0312             if 'soundFile' in level and ('type' not in level or level['type'] != "SHAPE_BACKGROUND"):
0313                 sound = level['soundFile'].split('/')[-1].replace('$CA', 'ogg')
0314                 words.add(sound)
0315     return words
0316 
0317 def get_files(locale, voiceset):
0318     to_remove = set(['README'])
0319     try:
0320         return set(os.listdir(locale + '/' + voiceset)) - to_remove
0321     except:
0322         return set()
0323 
0324 def get_locales_from_file():
0325     locales = set()
0326     for file in os.listdir('.'):
0327         if os.path.isdir(file) \
0328            and not os.path.islink(file) \
0329            and file[0] != '.':
0330             locales.add(file)
0331 
0332     return locales
0333 
0334 def get_gletter_alphabet():
0335     try:
0336         with open(gcompris_qt + '/src/activities/gletters/resource/default-' + locale + '.json', encoding='utf-8') as data_file:
0337             data = json.load(data_file)
0338     except IOError:
0339         print('')
0340         print("**ERROR: Missing resource file %s**" % ('/src/activities/gletters/resource/default-' + locale + '.json'))
0341         print('[Instructions to create this file](%s)' % ('https://gcompris.net/wiki/Word_Lists_Qt#Simple_Letters_.28Typing_letters.29_level_design'))
0342         print('')
0343         return set()
0344 
0345     # Consolidate letters
0346     letters = set()
0347     for level in data['levels']:
0348         for w in level['words']:
0349             multiletters = ""
0350             for one_char in w.lower():
0351                 multiletters += 'U{:04X}'.format(ord(one_char))
0352             letters.add(multiletters + '.ogg')
0353             descriptions[multiletters + '.ogg'] = w.lower()
0354 
0355     # Add numbers needed for words
0356     for i in range(10, 21):
0357         letters.add(str(i) + '.ogg')
0358 
0359     return letters
0360 
0361 def diff_set(title, code, files):
0362     '''Returns a stat from 0 to 1 for this report set'''
0363 
0364     if not code and not files:
0365         return 0
0366 
0367     title2(title)
0368 
0369     if verbose and code & files:
0370         title3("These files are correct")
0371         print('| File | Description |')
0372         print('|------|-------------|')
0373         sorted_list = list(code & files)
0374         sorted_list.sort()
0375         for f in sorted_list:
0376             if f in descriptions:
0377                 print('| %s | %s |' % (f, descriptions[f]))
0378             else:
0379                 print('|%s |  |' % (f))
0380         print('')
0381 
0382     if code - files:
0383         title3("These files are missing")
0384         print('| File | Description |')
0385         print('|------|-------------|')
0386         sorted_list = list(code - files)
0387         sorted_list.sort()
0388         for f in sorted_list:
0389             if f in descriptions:
0390                 print('| %s | %s |' % (f, descriptions[f]))
0391             else:
0392                 print('|%s |  |' % (f))
0393         print('')
0394 
0395     if notneeded and files - code:
0396         title3("These files are not needed")
0397         print('| File | Description |')
0398         print('|------|-------------|')
0399         sorted_list = list(files - code)
0400         sorted_list.sort()
0401         for f in sorted_list:
0402             if f in descriptions:
0403                 print('|%s | %s|' % (f, descriptions[f]))
0404             else:
0405                 print('|%s |  |' % (f))
0406         print('')
0407 
0408     return 1 - float(len(code - files)) / len(code | files)
0409 
0410 def diff_locale_set(title, code, files):
0411 
0412     if not code and not files:
0413         return
0414 
0415     title2(title)
0416     if verbose:
0417         title3("We have voices for these locales:")
0418         missing = []
0419         for locale in code:
0420             if os.path.isdir(locale):
0421                 print('* ' + locale)
0422             else:
0423                 # Shorten the locale and test again
0424                 shorten = locale.split('_')
0425                 if os.path.isdir(shorten[0]):
0426                     print('* ' + locale)
0427                 else:
0428                     missing.append(locale)
0429     print('')
0430     print("We miss voices for these locales:")
0431     for f in missing:
0432         print('* ' + f)
0433     print('')
0434 
0435 def check_locale_config(title, stats, locale_config):
0436     '''Display and return locales that are translated above a fixed threshold'''
0437     title2(title)
0438     LIMIT = 0.8
0439     sorted_config = list(locale_config)
0440     sorted_config.sort()
0441     good_locale = []
0442     for locale in sorted_config:
0443         if locale in stats:
0444             if stats[locale][3] < LIMIT:
0445                 print('* {:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale))
0446             else:
0447                 good_locale.append(descriptions[locale] if locale in descriptions else '')
0448         else:
0449             # Shorten the locale and test again
0450             shorten = locale.split('_')[0]
0451             if shorten in stats:
0452                 if stats[shorten][3] < LIMIT:
0453                     print('* {:s} ({:s})'.format((descriptions[shorten] if shorten in descriptions else ''), shorten))
0454                 else:
0455                     good_locale.append(descriptions[shorten] if shorten in descriptions else '')
0456             else:
0457                 print("* %s no translation at all" % (locale))
0458 
0459     print('')
0460     good_locale.sort()
0461     print('There are %d locales above %d%% translation: %s' % (len(good_locale), LIMIT * 100,
0462                                                                ', '.join(good_locale)))
0463 
0464     return good_locale
0465 
0466 #
0467 # main
0468 # ===
0469 
0470 reports = {}
0471 sys.stdout = reports['stats'] = StringIO()
0472 
0473 string_stats = get_translation_status_from_po_files()
0474 check_locale_config("Locales to remove from LanguageList.qml (translation level < 80%)",
0475                     string_stats, get_locales_from_config())
0476 
0477 print('\n[Guide to contribute recording files](%s)' % ('https://gcompris.net/wiki/Voice_translation_Qt'))
0478 
0479 # Calc the big list of locales we have to check
0480 all_locales = get_locales_from_po_files() | get_locales_from_file()
0481 all_locales = list(all_locales)
0482 all_locales.sort()
0483 
0484 stats = {}
0485 global_descriptions = copy.deepcopy(descriptions)
0486 
0487 app = QCoreApplication(sys.argv)
0488 engine = QQmlEngine()
0489 component = QQmlComponent(engine)
0490 
0491 for locale in all_locales:
0492     sys.stdout = reports[locale] = StringIO()
0493 
0494     descriptions = copy.deepcopy(global_descriptions)
0495     gcompris_po = None
0496     try:
0497         gcompris_po = polib.pofile(gcompris_qt + '/poqm/'+locale+'/gcompris_qt.po', encoding='utf-8')
0498     except OSError:
0499         if gcompris_po is None:
0500             print("**ERROR: Failed to load po file %s**" % ('/poqm/'+locale+'gcompris_qt.po'))
0501             print('')
0502 
0503     init_intro_description_from_code(locale, gcompris_po)
0504     init_country_names_from_code(component, locale, gcompris_po)
0505 
0506     title1('{:s} ({:s})'.format((descriptions[locale] if locale in descriptions else ''), locale))
0507 
0508     lstats = {'locale': locale}
0509     lstats['intro'] = diff_set("Intro ({:s}/intro/)".format(locale), get_intro_from_code(), get_files(locale, 'intro'))
0510     lstats['letter'] = diff_set("Letters ({:s}/alphabet/)".format(locale), get_gletter_alphabet(), get_files(locale, 'alphabet'))
0511 
0512     descriptions['click_on_letter.ogg'] = "Must contains the voice: 'Click on the letter:'"
0513     lstats['misc'] = diff_set("Misc ({:s}/misc/)".format(locale), get_files('en', 'misc'), get_files(locale, 'misc'))
0514 
0515     lstats['color'] = diff_set("Colors ({:s}/colors/)".format(locale), get_files('en', 'colors'), get_files(locale, 'colors'))
0516     lstats['geography'] = diff_set("Geography ({:s}/geography/)".format(locale), get_geography_on_letter_from_code(component), get_files(locale, 'geography'))
0517     lstats['words'] = diff_set("Words ({:s}/words/)".format(locale), get_words_from_code(), get_files(locale, 'words'))
0518     lstats['wordsgame'] = diff_set("Wordsgame", get_wordsgame_from_code(), set())
0519     lstats['grammar_analysis'] = diff_set("Grammar Analysis", get_grammar_analysis_from_code(), set())
0520     lstats['grammar_classes'] = diff_set("Grammar Classes", get_grammar_classes_from_code(), set())
0521     lstats['click_on_letter'] = diff_set("Click on letter", get_click_on_letter_from_code(), set())
0522     stats[locale] = lstats
0523 
0524 sys.stdout = reports['summary'] = StringIO()
0525 sorted_keys = sorted(stats)
0526 
0527 title1("GCompris Voice Recording Status Summary")
0528 print('| Locale | Strings | Misc | Letters | Colors | Geography | Words | Intro|')
0529 print('|--------|---------|------|---------|--------|-----------|-------|------|')
0530 for locale in sorted_keys:
0531     stat = stats[locale]
0532     print('| [{:s} ({:s})](voice_status_{:s}.html) | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |'
0533           .format((descriptions[locale] if locale in descriptions else ''), stat['locale'], locale,
0534                   string_stats[locale][3] if locale in string_stats else 0,
0535                   stat['misc'], stat['letter'], stat['color'], stat['geography'],
0536                   stat['words'], stat['intro']))
0537 
0538 #
0539 # Now we have all the reports
0540 #
0541 
0542 extensions = ['markdown.extensions.tables']
0543 
0544 sys.stdout = ref_stdout
0545 
0546 with codecs.open("index.html", "w",
0547                  encoding="utf-8",
0548                  errors="xmlcharrefreplace"
0549                  ) as f:
0550     f.write(get_html_header())
0551 
0552     summary = markdown.markdown(reports['summary'].getvalue(), extensions=extensions)
0553     summary2 = ""
0554     for line in summary.split('\n'):
0555         m = re.match(r'<td>(\d\.\d\d)</td>', line)
0556         if m:
0557             rate = m.group(1)
0558             summary2 += get_html_progress_bar(rate)
0559         else:
0560             summary2 += line
0561 
0562         summary2 += '\n'
0563 
0564     f.write(summary2 + '\n')
0565 
0566     f.write(markdown.markdown(reports['stats'].getvalue(), extensions=extensions))
0567     f.write(get_html_footer())
0568 
0569 for locale in sorted_keys:
0570     with codecs.open("voice_status_{:s}.html".format(locale), "w",
0571                      encoding="utf-8",
0572                      errors="xmlcharrefreplace"
0573                      ) as f:
0574         f.write(get_html_header())
0575         f.write(markdown.markdown(reports[locale].getvalue(), extensions=extensions))
0576         f.write(get_html_footer())