File indexing completed on 2024-05-12 15:43:16

0001 #! /usr/bin/perl -w
0002 #
0003 # Static Hashtable Generator
0004 #
0005 # (c) 2000-2002 by Harri Porten <porten@kde.org> and
0006 #                  David Faure <faure@kde.org>
0007 # Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org>
0008 #
0009 # Part of the KJS library.
0010 #
0011 # This library is free software; you can redistribute it and/or
0012 # modify it under the terms of the GNU Lesser General Public
0013 # License as published by the Free Software Foundation; either
0014 # version 2 of the License, or (at your option) any later version.
0015 #
0016 # This library is distributed in the hope that it will be useful,
0017 # but WITHOUT ANY WARRANTY; without even the implied warranty of
0018 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0019 # Lesser General Public License for more details.
0020 #
0021 # You should have received a copy of the GNU Lesser General Public
0022 # License along with this library; if not, write to the Free Software
0023 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0024 #
0025 
0026 use strict;
0027 
0028 my $file = $ARGV[0];
0029 shift;
0030 my $findSize = 0;
0031 my $includelookup = 0;
0032 my $useNameSpace;
0033 
0034 # Use -s as second argument to make it try many hash sizes
0035 if (defined($ARGV[0]) && $ARGV[0] eq "-s") { $findSize = 1; shift; }
0036 
0037 # Use -i as second argument to make it include "lookup.h"
0038 if (defined($ARGV[0]) && $ARGV[0] eq "-i") { $includelookup = 1; shift; }
0039 
0040 # Use -n as second argument to make it use the third argument as namespace parameter ie. -n KDOM
0041 if (defined($ARGV[0]) && $ARGV[0] eq "-n") { $useNameSpace = $ARGV[1]; shift; shift; }
0042 
0043 open(IN, $file) or die "No such file $file";
0044 
0045 my @keys = ();
0046 my @values = ();
0047 my @attrs = ();
0048 my @params = ();
0049 my @hashes = ();
0050 my @table = ();
0051 my @links = ();
0052 
0053 my $inside = 0;
0054 my $name;
0055 my $size;
0056 my $hashsize;
0057 my $banner = 0;
0058 sub calcTable();
0059 sub output();
0060 sub hashValue($);
0061 
0062 while (<IN>) {
0063   chop;
0064   s/^\s*//g;
0065   if (/^\#|^$/) {
0066       # comment. do nothing
0067     } elsif (/^\@begin/ && !$inside) {
0068       if (/^\@begin\s*([:_\w]+)\s*(\d+)\s*$/) {
0069         $inside = 1;
0070         $name = $1;
0071         $hashsize = $2;
0072       } else {
0073          printf STDERR "WARNING: \@begin without table name and hashsize, skipping $_\n";
0074       }
0075     } elsif (/^\@end\s*$/ && $inside) {
0076 
0077       if($findSize) {
0078     my $entriesnum=@keys;
0079         print STDERR "Table: $name   $entriesnum entries\n";
0080     for( my $i=3 ; $i<79 ; ++$i) { $hashsize=$i ; calcTable(); }
0081       } else {
0082         calcTable();
0083       }
0084 
0085       output();
0086       @keys = ();
0087       @values = ();
0088       @attrs = ();
0089       @params = ();
0090       @table = ();
0091       @links = ();
0092       @hashes = ();
0093       $inside = 0;
0094     } elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*$/ && $inside) {
0095       my $key = $1;
0096       my $val = $2;
0097       my $att = $3;
0098       my $param = $4;
0099       push(@keys, $key);
0100       push(@values, $val);
0101       push(@hashes, hashValue($key));
0102       printf STDERR "WARNING: Number of arguments missing for $key/$val\n"
0103         if ( $att =~ m/Function/ && length($param) == 0);
0104       push(@attrs, length($att) > 0 ? $att : "0");
0105       push(@params, length($param) > 0 ? $param : "0");
0106     } elsif ($inside) {
0107       die "invalid data {" . $_ . "}";
0108     }
0109 }
0110 
0111 die "missing closing \@end" if ($inside);
0112 
0113 sub calcTable() {
0114   $size = $hashsize;
0115   my $collisions = 0;
0116   my $maxdepth = 0;
0117   my $i = 0;
0118   foreach my $key (@keys) {
0119     my $depth = 0;
0120     my $h = hashValue($key) % $hashsize;
0121     while (defined($table[$h])) {
0122       if (defined($links[$h])) {
0123     $h = $links[$h];
0124     $depth++;
0125       } else {
0126     $collisions++;
0127     $links[$h] = $size;
0128     $h = $size;
0129     $size++;
0130       }
0131     }
0132     #print STDERR "table[$h] = $i\n";
0133     $table[$h] = $i;
0134     $i++;
0135     $maxdepth = $depth if ( $depth > $maxdepth);
0136   }
0137 
0138   # Ensure table is big enough (in case of undef entries at the end)
0139   if ( $#table+1 < $size ) {
0140     $#table = $size-1;
0141   }
0142   #print STDERR "After loop: size=$size table=".($#table+1)."\n";
0143 
0144   if ($findSize) {
0145     my $emptycount = 0;
0146     foreach my $entry (@table) {
0147       $emptycount++ if (!defined($entry));
0148     }
0149     print STDERR "Hashsize: $hashsize  Total Size: $size Empty: $emptycount MaxDepth: $maxdepth Collisions: $collisions\n";
0150   }
0151 #  my $debugtable = 0;
0152 #  foreach my $entry (@table) {
0153 #    print STDERR "$debugtable " . (defined $entry ? $entry : '<undefined>');
0154 #    print STDERR " -> " . $links[$debugtable] if (defined($links[$debugtable]));
0155 #    print STDERR "\n";
0156 #    $debugtable++;
0157 #  }
0158 }
0159 
0160 sub leftShift($$) {
0161     my ($value, $distance) = @_;
0162     return (($value << $distance) & 0xFFFFFFFF);
0163 }
0164 
0165 # Paul Hsieh's SuperFastHash
0166 # http://www.azillionmonkeys.com/qed/hash.html
0167 # Ported from UString..
0168 sub hashValue($) {
0169   my @chars = split(/ */, $_[0]);
0170 
0171   # This hash is designed to work on 16-bit chunks at a time. But since the normal case
0172   # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
0173   # were 16-bit chunks, which should give matching results
0174 
0175   my $EXP2_32 = 4294967296;
0176 
0177   my $hash = 0x9e3779b9;
0178   my $l    = scalar @chars; #I wish this was in Ruby --- Maks
0179   my $rem  = $l & 1;
0180   $l = $l >> 1;
0181 
0182   my $s = 0;
0183 
0184   # Main loop
0185   for (; $l > 0; $l--) {
0186     $hash   += ord($chars[$s]);
0187     my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
0188     $hash   = (leftShift($hash, 16)% $EXP2_32) ^ $tmp;
0189     $s += 2;
0190     $hash += $hash >> 11;
0191     $hash %= $EXP2_32;
0192   }
0193 
0194   # Handle end case
0195   if ($rem !=0) {
0196     $hash += ord($chars[$s]);
0197     $hash ^= (leftShift($hash, 11)% $EXP2_32);
0198     $hash += $hash >> 17;
0199   }
0200 
0201   # Force "avalanching" of final 127 bits
0202   $hash ^= leftShift($hash, 3);
0203   $hash += ($hash >> 5);
0204   $hash = ($hash% $EXP2_32);
0205   $hash ^= (leftShift($hash, 2)% $EXP2_32);
0206   $hash += ($hash >> 15);
0207   $hash = $hash% $EXP2_32;
0208   $hash ^= (leftShift($hash, 10)% $EXP2_32);
0209 
0210   # this avoids ever returning a hash code of 0, since that is used to
0211   # signal "hash not computed yet", using a value that is likely to be
0212   # effectively the same as 0 when the low bits are masked
0213   $hash = 0x80000000  if ($hash == 0);
0214 
0215   return $hash;
0216 }
0217 
0218 sub output() {
0219   if (!$banner) {
0220     $banner = 1;
0221     print "/* Automatically generated from $file using $0. DO NOT EDIT ! */\n";
0222   }
0223 
0224   my $nameEntries = "${name}Entries";
0225   $nameEntries =~ s/:/_/g;
0226 
0227   print "\n#include \"lookup.h\"\n" if ($includelookup);
0228   if ($useNameSpace) {
0229     print "\nnamespace ${useNameSpace}\n{\n";
0230   } else {
0231     print "\nnamespace KJS {\n";
0232   }
0233   print "\nstatic const struct KJS::HashEntry ".$nameEntries."[] = {\n";
0234   my $i = 0;
0235   #print STDERR "writing out table with ".($#table+1)." entries\n";
0236 
0237   if ($hashsize eq 0) {
0238     # To make the hash table lookup code fast, we don't allow tables of size 0.
0239     # That way it can do a modulo by the size without a special case to avoid division by 0.
0240     print "   \{ nullptr, 0, 0, 0, nullptr \}\n";
0241     $hashsize = 1;
0242     $size = 1;
0243   } else {
0244     foreach my $entry (@table) {
0245       if (defined($entry)) {
0246         my $key = $keys[$entry];
0247         print "   \{ \"" . $key . "\"";
0248         print ", static_cast<int>(" . $values[$entry] . ")";
0249         my $kjsattrs = $attrs[$entry];
0250         if ($kjsattrs ne "0") {
0251          $kjsattrs =~ s/([^|]+)/KJS::$1/g; # DontDelete|Function -> KJS::DontDelete|KJS::Function
0252         }
0253         print ", " . $kjsattrs;
0254         print ", " . $params[$entry];
0255         print ", ";
0256         if (defined($links[$i])) {
0257           print "&" . $nameEntries . "[" . $links[$i] . "]" . " \}";
0258         } else {
0259           print "nullptr \}"
0260         }
0261         print "/* " . $hashes[$entry] . " */ ";
0262       } else {
0263         print "   \{ nullptr, 0, 0, 0, nullptr \}";
0264       }
0265       print "," unless ($i == $size - 1);
0266       print "\n";
0267       $i++;
0268     }
0269   }
0270   print "};\n\n";
0271   print "const struct KJS::HashTable $name = ";
0272   print "\{ 2, $size, ".$nameEntries.", $hashsize \};\n\n";
0273   print "} // namespace\n";
0274 }