File indexing completed on 2024-05-05 16:47:23

0001 #!/bin/bash
0002 #
0003 #   Copyright (C) 2006-2018 Jarosław Staniek <staniek@kde.org>
0004 #
0005 #   Based on the original script by Michal Svec <rebel@atrey.karlin.mff.cuni.cz>
0006 #
0007 #   This program is free software; you can redistribute it and/or
0008 #   modify it under the terms of the GNU General Public
0009 #   License as published by the Free Software Foundation; either
0010 #   version 2 of the License, or (at your option) any later version.
0011 #
0012 #   This program is distributed in the hope that it will be useful,
0013 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
0014 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015 #    General Public License for more details.
0016 #
0017 #   You should have received a copy of the GNU General Public License
0018 #   along with this program; see the file COPYING.  If not, write to
0019 #   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0020 #   Boston, MA 02110-1301, USA.
0021 
0022 #
0023 # Generates a transliteration_table.h and transliteration_table.cpp files.
0024 # python-Unidecode is used internally (https://pypi.org/project/Unidecode).
0025 #
0026 # TODO: port to python, bash version can consume 4 hours
0027 #
0028 
0029 set -e
0030 function finish {
0031     rm -f $out_cpp_temp $out_h_temp
0032 }
0033 trap finish EXIT
0034 
0035 out_cpp="transliteration_table.cpp"
0036 out_h="transliteration_table.h"
0037 out_cpp_temp="$out_cpp".tmp
0038 out_h_temp="$out_h".tmp
0039 max=65534
0040 
0041 decl="const char *const transliteration_table[TRANSLITERATION_TABLE_SIZE + 1]"
0042 
0043 header=\
0044 "/* Transliteration table of `expr $max + 1` unicode characters
0045    Do not edit this file, it is generated
0046    by $(basename $0) script. */
0047 "
0048 echo "$header
0049 #define TRANSLITERATION_TABLE_SIZE `expr $max + 1`
0050 extern $decl;" > $out_h_temp
0051 
0052 echo -n "$header
0053 #include \"$out_h\"
0054 #define N nullptr
0055 $decl = {" > $out_cpp_temp
0056 
0057 for i in `seq 0 $max` ; do
0058     f=`printf "%04x" $i`
0059     if [ "$i" -lt 16 -o "$i" -eq 92 ] ; then
0060         printf "$i\n/*$f*/\n_\n" $i
0061     elif [ "$i" -lt 128 -a "$i" -ne 32 ] ; then
0062         ch=`printf "%03o" $i`
0063         printf "$i\n/*$f*/\n\\"$ch"\n"
0064     else
0065         { /usr/bin/printf "${i}\n/*${f}*/\n\u${f}\n" 2>&- || echo "_"; }
0066     fi
0067 done | \
0068 while read i && read f && read ch; do
0069     if ! expr "$i" % 8 > /dev/null ; then
0070         expr "$i" % 320 > /dev/null || echo -n ..`expr "$i" \* 100 / $max `% >&2 #progress
0071         echo
0072     else
0073         f= # <-- comment to add /*numbers*/ everywhere
0074     fi
0075     r=`unidecode -c "${ch}" | sed -r -e 's/[^[:alnum:]]//g;s/_+/_/g'`
0076     if [ -z "$r" -o "$r" == "_" ] ; then
0077         echo -n "${f}N/*${ch}*/,"
0078     else
0079         echo -n "${f}\"$r\"/*${ch}*/,"
0080     fi
0081 done >> $out_cpp_temp
0082 echo ..100%
0083 echo "
0084 N};
0085 #undef N" >> $out_cpp_temp
0086 
0087 mv $out_cpp_temp $out_cpp
0088 mv $out_h_temp $out_h