tools/gen_keywordlist.pl (108 lines of code) (raw):

# # For PostgreSQL Database Management System: # (formerly known as Postgres, then as Postgres95) # # Portions Copyright (c) 1996-2010, The PostgreSQL Global Development Group # # Portions Copyright (c) 1994, The Regents of the University of California # # Permission to use, copy, modify, and distribute this software and its documentation for any purpose, # without fee, and without a written agreement is hereby granted, provided that the above copyright notice # and this paragraph and the following two paragraphs appear in all copies. # # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, # INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, # ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY # OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA # HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # #---------------------------------------------------------------------- # # gen_keywordlist.pl # Perl script that transforms a list of keywords into a ScanKeywordList # data structure that can be passed to ScanKeywordLookup(). # # The input is a C header file containing a series of macro calls # PG_KEYWORD("keyword", ...) # Lines not starting with PG_KEYWORD are ignored. The keywords are # implicitly numbered 0..N-1 in order of appearance in the header file. # Currently, the keywords are required to appear in ASCII order. # # The output is a C header file that defines a "const ScanKeywordList" # variable named according to the -v switch ("ScanKeywords" by default). # The variable is marked "static" unless the -e switch is given. # # ScanKeywordList uses hash-based lookup, so this script also selects # a minimal perfect hash function for the keyword set, and emits a # static hash function that is referenced in the ScanKeywordList struct. # The hash function is case-insensitive unless --no-case-fold is specified. # Note that case folding works correctly only for all-ASCII keywords! # # # Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/tools/gen_keywordlist.pl # #---------------------------------------------------------------------- use strict; use warnings; use Getopt::Long; use FindBin; use lib $FindBin::RealBin; use PerfectHash; my $output_path = ''; my $extern = 0; my $case_fold = 1; my $varname = 'ScanKeywords'; GetOptions( 'output:s' => \$output_path, 'extern' => \$extern, 'case-fold!' => \$case_fold, 'varname:s' => \$varname) || usage(); my $kw_input_file = shift @ARGV || die "No input file.\n"; # Make sure output_path ends in a slash if needed. if ($output_path ne '' && substr($output_path, -1) ne '/') { $output_path .= '/'; } $kw_input_file =~ /(\w+)\.h$/ || die "Input file must be named something.h.\n"; my $base_filename = $1 . '_d'; my $kw_def_file = $output_path . $base_filename . '.h'; open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n"; open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n"; # Opening boilerplate for keyword definition header. printf $kwdef <<EOM, $base_filename, uc $base_filename, uc $base_filename; /*------------------------------------------------------------------------- * * %s.h * List of keywords represented as a ScanKeywordList. * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * NOTES * ****************************** * *** DO NOT EDIT THIS FILE! *** * ****************************** * * It has been GENERATED by src/tools/gen_keywordlist.pl * *------------------------------------------------------------------------- */ #ifndef %s_H #define %s_H #include "common/kwlookup.h" EOM # Parse input file for keyword names. my @keywords; while (<$kif>) { if (/^PG_KEYWORD\("(\w+)"/) { push @keywords, $1; } } # When being case-insensitive, insist that the input be all-lower-case. if ($case_fold) { foreach my $kw (@keywords) { die qq|The keyword "$kw" is not lower-case in $kw_input_file\n| if ($kw ne lc $kw); } } # Error out if the keyword names are not in ASCII order. # # While this isn't really necessary with hash-based lookup, it's still # helpful because it provides a cheap way to reject duplicate keywords. # Also, insisting on sorted order ensures that code that scans the keyword # table linearly will see the keywords in a canonical order. for my $i (0 .. $#keywords - 1) { die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n| if ($keywords[$i] cmp $keywords[ $i + 1 ]) >= 0; } # Emit the string containing all the keywords. printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname; print $kwdef join qq|\\0"\n\t"|, @keywords; print $kwdef qq|";\n\n|; # Emit an array of numerical offsets which will be used to index into the # keyword string. Also determine max keyword length. printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname; my $offset = 0; my $max_len = 0; foreach my $name (@keywords) { my $this_length = length($name); print $kwdef "\t$offset,\n"; # Calculate the cumulative offset of the next keyword, # taking into account the null terminator. $offset += $this_length + 1; # Update max keyword length. $max_len = $this_length if $max_len < $this_length; } print $kwdef "};\n\n"; # Emit a macro defining the number of keywords. # (In some places it's useful to have access to that as a constant.) printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords; # Emit the definition of the hash function. my $funcname = $varname . "_hash_func"; my $f = PerfectHash::generate_hash_function(\@keywords, $funcname, case_fold => $case_fold); printf $kwdef qq|static %s\n|, $f; # Emit the struct that wraps all this lookup info into one variable. printf $kwdef "static " if !$extern; printf $kwdef "const ScanKeywordList %s = {\n", $varname; printf $kwdef qq|\t%s_kw_string,\n|, $varname; printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; printf $kwdef qq|\t%s,\n|, $funcname; printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname; printf $kwdef qq|\t%d\n|, $max_len; printf $kwdef "};\n\n"; printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; sub usage { die <<EOM; Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] [--[no-]case-fold] input_file --output Output directory (default '.') --varname Name for ScanKeywordList variable (default 'ScanKeywords') --extern Allow the ScanKeywordList variable to be globally visible --no-case-fold Keyword matching is to be case-sensitive gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList. The output filename is derived from the input file by inserting _d, for example kwlist_d.h is produced from kwlist.h. EOM }