File indexing completed on 2024-12-01 10:29:17

0001 #!/bin/bash
0002 ################################################################################
0003 # sql_keywords.sh
0004 #
0005 # Generate sets of driver-specific keywords.
0006 # This program generates files that can be used as part of KDb drivers
0007 # that list keywords specific to that driver, i.e. words that have to be
0008 # escaped if they are to be used as identifiers in the database.
0009 #
0010 # It extracts keywords from the lexer of the DB sources, deletes keywords that
0011 # are already going to be escaped because they are part of KDb's SQL dialect,
0012 # and writes the resulting keywords to a "char *keywords[]" construct in a .cpp 
0013 # file that can then be used in the driver.
0014 #
0015 # To use:
0016 # Put the DB source tarballs/sources (e.g. mysql-4.1.7.tar.gz, 
0017 # postgresql-base-7.4.6.tar.gz, SQLite's tokenize.c) in the current directory
0018 # then run.
0019 #
0020 # sed, awk, grep have been used without much thought -
0021 # CHECK THE OUTPUT BEFORE INCLUDING IT IN A DRIVER!
0022 #
0023 # 2004 Martin Ellis <martin.ellis@kdemail.net>
0024 # 2011 Jarosław Staniek <staniek@kde.org> - adopted for KDb
0025 
0026 set -e
0027 progname="sql_keywords.sh"
0028 
0029 ################################################################################
0030 # C++ file generator
0031 # params : array   - scoped datatype and name of the array to generate
0032 #          include - a file to include (or "" if none)
0033 #          inFile  - file containing raw keywords
0034 #          outfile - file to write 
0035 header () {
0036   local array="$1"
0037   local include="$2"
0038   local inFile="$3"
0039   local outFile="$4"
0040   echo "Writing keywords in $inFile to $outFile"
0041   cat <<EOF1 > "$outFile";
0042 /* This file is part of the KDE project
0043    Copyright (C) 2004 Martin Ellis <martin.ellis@kdemail.net>
0044    Copyright (C) 2004 Jarosław Staniek <staniek@kde.org>
0045 
0046    This file has been automatically generated from
0047    tools/$progname and $inFile.
0048 
0049    Please edit the $progname, not this file!
0050 
0051    This program is free software; you can redistribute it and/or
0052    modify it under the terms of the GNU Library General Public
0053    License as published by the Free Software Foundation; either
0054    version 2 of the License, or (at your option) any later version.
0055 
0056    This program is distributed in the hope that it will be useful,
0057    but WITHOUT ANY WARRANTY; without even the implied warranty of
0058    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0059    Library General Public License for more details.
0060 
0061    You should have received a copy of the GNU Library General Public License
0062    along with this program; see the file COPYING.  If not, write to
0063    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0064  * Boston, MA 02110-1301, USA.
0065 */
0066 EOF1
0067   if [ "$include" != "" ] ; then
0068     echo "#include <$include>" >> "$outFile"
0069   fi
0070   cat <<EOF2 >> "$outFile";
0071 
0072   ${array}[] = {
0073 EOF2
0074 }
0075 
0076 body() {
0077   local inFile="$1"
0078   local outFile="$2"
0079   awk '/^[a-zA-Z_0-9]*/ { print "\t\t\""$$1"\","; } ' "$inFile" >> "$outFile"
0080 }
0081 
0082 footer() {
0083   local outFile="$1"
0084   cat <<EOF >> "$outFile";
0085                 0
0086   };
0087 }
0088 EOF
0089 
0090 }
0091 
0092 ################################################################################
0093 # Keyword comparison functions
0094 # Globals: keywords
0095 
0096 # readKeywords
0097 # params: filename - file of keywords to read
0098 # sets:   keywords - array of keywords in the file
0099 readKeywords () {
0100   local filename="$1" 
0101   local kdbSQL="$2"
0102   i=0
0103   while read keyword ; do
0104     keywords[$i]="$keyword"
0105     (( i++ ))
0106   done < "$filename"
0107 }
0108 
0109 # compareKeywords
0110 # reads: kdbSQL -
0111 #        driverSQL
0112 # sets:  keywords - driver keywords that are not keywords in KDb
0113 compareKeywords () {
0114   numFound=0
0115   for(( i=0; i < ${#driverSQL[@]}; i++ )) ; do
0116     found="no"
0117     for(( j=0; j < ${#kdbSQL[@]}; j++ )) ; do
0118       if [ "${driverSQL[$i]}" == "${kdbSQL[$j]}" ] ; then
0119         found="yes"
0120       fi
0121     done
0122     if [ "$found" == "no" ] ; then
0123       keywords[$numFound]="${driverSQL[$i]}"
0124       (( numFound++ ))
0125     fi
0126   done
0127 }
0128 
0129 
0130 # getDriverKeywords
0131 # params : kdb -
0132 #          driver -
0133 #          outFile -
0134 getDriverKeywords () {
0135   local kdb="$1"
0136   local driver="$2"
0137   local outFile="$3"
0138 
0139   declare -a kdbSQL
0140   declare -a driverSQL
0141 
0142   echo "Looking for driver-specific keywords in \"$driver\""
0143   readKeywords $kdb
0144   for(( i=0; i < ${#keywords[@]}; i++ )) ; do
0145     kdbSQL[$i]=${keywords[$i]}
0146   done
0147   unset keywords
0148 
0149   readKeywords $driver
0150   for(( i=0; i < ${#keywords[@]}; i++ )) ; do
0151     driverSQL[$i]=${keywords[$i]}
0152   done
0153   unset keywords
0154 
0155   compareKeywords
0156   echo "Writing driver-specific keywords for \"$driver\" to \"$outFile\""
0157   rm -f $outFile
0158   for(( i=0; i < ${#keywords[@]}; i++ )) ; do
0159     echo ${keywords[$i]} >> $outFile
0160   done
0161   unset keywords
0162 }
0163 ################################################################################
0164 
0165 
0166 ################################################################################
0167 # KDb lexer
0168 
0169 checkKDbKeywords () {
0170   local scanner="../src/parser/KDbSqlScanner.l"
0171   if [ ! -r kdb.all -o "$scanner" -nt "kdb.all" ] ; then
0172     echo "Getting keywords"
0173     grep '^(\?"[a-zA-Z_0-9]' "$scanner" | \
0174        sed 's/(\?"\([^"]*\)"[^"]*/\1\n/g' | \
0175        awk '/^[a-zA-Z_0-9]+$/ {print $1;}' | 
0176        sort | uniq > "kdb.all"
0177     awk '/^[a-zA-Z_0-9]+$/ {print $1;}' kdb_keywords.txt >> "kdb.all"
0178   fi
0179 }
0180 
0181 ################################################################################
0182 # DB lexer functions
0183 # These functions munge the extracted lexers from DBs and write the collected
0184 # keywords to file
0185 
0186 # getSQLiteKeywords
0187 # params : inFile  - SQLite3 lexer file
0188 #          outFile - all SQLite3 keywords
0189 getSQLiteKeywords () {
0190   local inFile="$1"
0191   local outFile="$2"
0192   
0193   echo "Getting SQLite keywords ($inFile -> $outFile)"
0194   sed -n '/^static Keyword aKeywordTable/,/};/p' $inFile | \
0195     awk '/  { "[a-zA-Z_0-9]*"/ { print $2;}' | \
0196     sed 's/"\(.*\)".*/\1/g' > $outFile
0197 }
0198 
0199 getPostgreSQLKeywords () {
0200   local inFile="$1"
0201   local outFile="$2"
0202 
0203   echo "Getting PostgreSQL keywords ($inFile -> $outFile)"
0204   sed -n '/^static const ScanKeyword ScanKeywords/,/};/p' $inFile | \
0205     awk '/\t{"[a-zA-Z_0-9]*"/ { print $1;}' | \
0206     sed 's/.*"\(.*\)".*/\1/g' | tr 'a-z' 'A-Z' > $outFile
0207 }
0208 
0209 # getMySQLKeywords
0210 # params : inFile  - MySQL lexer file
0211 #          outFile - all MySQL keywords
0212 getMySQLKeywords () {
0213   local inFile="$1"
0214   local outFile="$2"
0215 
0216   echo "Getting MySQL keywords ($inFile -> $outFile)"
0217   sed -n '/^static SYMBOL symbols/,/};/p' $inFile | \
0218     awk '/  { "[a-zA-Z_0-9]*"/ { print $2;}' | \
0219     sed 's/"\(.*\)".*/\1/g' > $outFile
0220 }
0221 
0222 ################################################################################
0223 # DB tarball functions
0224 # These functions extract the lexer files from the DB source tarballs
0225 
0226 # checkExtracted
0227 # params : tarball - tarball containing backend DB source
0228 #          file - file in tarball containing DB's lexer
0229 checkExtracted () {
0230   local tarball="$1"
0231   local file="$2"
0232 
0233   if [ ! -r "$file" ] ; then
0234     echo "Getting file \"$file\" from \"$tarball\""
0235     tar -zxf "$tarball" "$file"
0236   fi
0237 }
0238 
0239 # checkTarballs
0240 checkTarballs () {
0241   local pathInTar
0242   local appName
0243   local appVer
0244 
0245   # SQLite (native DB backend) keywords
0246   appName="SQLite"
0247   appVer=sqlite
0248   inFile="tokenize.c"
0249   filePrefix="sqlite"
0250   if [ ! -r "$appVer.all" ] || [ ! -r "$appVer.new" ] ; then
0251     getSQLiteKeywords "$inFile" "$appVer.all"
0252   fi
0253   if [ "$appVer.all" -nt "$appVer.new" ] ; then
0254     getDriverKeywords "kdb.all" "$appVer.all" "$appVer.new"
0255     header "const char* const ${appName}Driver::keywords" "${filePrefix}driver.h" "$inFile" "${filePrefix}keywords.cpp"
0256     body   "$appVer.new" "${filePrefix}keywords.cpp"
0257     footer "${filePrefix}keywords.cpp"
0258   fi
0259 
0260   ls mysql-*.tar.gz postgresql-*.tar.gz 2>/dev/null | while read tarball ; do
0261    case "$tarball" in
0262      mysql-4.1.[0-9\.]*.tar.gz)
0263        pathInTar="sql/lex.h"
0264        appName="MySql"
0265        filePrefix="mysql"
0266        appVer="${tarball%.tar.gz}"
0267        if [ ! -r "$appVer.all" ] || [ ! -r "$appVer.new" ] ; then
0268          checkExtracted "$tarball" "$appVer/$pathInTar"
0269          getMySQLKeywords "$appVer/$pathInTar" "$appVer.all"
0270          rm -rf "$appVer"
0271        fi
0272 
0273        if [ "$appVer.all" -nt "$appVer.new" ] ; then
0274          getDriverKeywords "kdb.all" "$appVer.all" "$appVer.new"
0275          header "const char* const ${appName}Driver::keywords" "${filePrefix}driver.h" "$appVer/$pathInTar" "${filePrefix}keywords.cpp"
0276          body   "$appVer.new" "${filePrefix}keywords.cpp"
0277          footer "${filePrefix}keywords.cpp"
0278        fi
0279        ;;
0280 
0281      postgresql-base-7.4.[0-9\.]*.tar.gz)
0282        pathInTar="src/backend/parser/keywords.c"
0283        appName="pqxxSql"
0284        filePrefix="pqxx"
0285        appVer=`echo "${tarball%.tar.gz}" | sed 's/-base//'`
0286        if [ ! -r "$appVer.all" ] || [ ! -r "$appVer.new" ] ; then
0287          checkExtracted "$tarball" "$appVer/$pathInTar"
0288          getPostgreSQLKeywords "$appVer/$pathInTar" "$appVer.all"
0289          rm -rf "$appVer"
0290        fi
0291 
0292        if [ "$appVer.all" -nt "$appVer.new" ] ; then
0293          getDriverKeywords "kdb.all" "$appVer.all" "$appVer.new"
0294          header "const char* const ${appName}Driver::keywords" "${filePrefix}driver.h" "$appVer/$pathInTar" "${filePrefix}keywords.cpp"
0295          body   "$appVer.new" "${filePrefix}keywords.cpp"
0296          footer "${filePrefix}keywords.cpp"
0297        fi
0298        ;;
0299 
0300      *)
0301        echo "Don't know how to deal with $tarball - ignoring"
0302        ;;
0303     esac
0304   done
0305 }
0306 
0307 checkKDbKeywords
0308 src=`printf "src/parser/KDbSqlScanner.l\n"\
0309 "   and tools/kdb_keywords.txt"`
0310 header "const char* const KDbDriverPrivate::kdbSQLKeywords" "Driver_p.h" "$src" "keywords.cpp"
0311 body "kdb.all" "keywords.cpp"
0312 footer "keywords.cpp"
0313 
0314 checkTarballs
0315 wc -l *.all *.new | awk '{print $2" "$1}' |sort|awk '{print $1"\t"$2}'