#!/usr/bin/perl -w # # @(#)$Id: bnf2html.pl,v 3.7 2005/07/13 18:32:35 jleffler Exp $ # # Convert SQL-92, SQL-99 BNF plain text file into hyperlinked HTML. use strict; use POSIX qw(strftime); my(%rules); # Indexed by rule names w/o angle-brackets; each entry is a ref to a hash. my(%keywords); # Index by keywords; each entry is a ref to a hash. use constant debug => 0; sub top { print "

Top

\n\n"; } # Usage: add_entry(\%keywords, $keyword, $rule); # Usage: add_entry(\%rules, $rhs, $rule); sub add_entry { my($reflist, $lhs, $rhs) = @_; ${$reflist}{$lhs} = {} unless defined ${$reflist}{$lhs}; ${$reflist}{$lhs}{$rhs} = 1; } sub add_refs { my($def, $tail) = @_; print "\n\n" if debug; return if $tail =~ m/!!/; while ($tail) { $tail =~ s/^\s*//; if ($tail =~ m%^\<([-:/\w\s]+)\>%) { print "\n" if debug; add_entry(\%rules, $1, $def); $tail =~ s%^\<([-:/\w\s]+)\>%%; } elsif ($tail =~ m%^([-:/\w]+)%) { my($token) = $1; print "\n" if debug; add_entry(\%keywords, $token, $def) if $token =~ m%[[:alpha:]][[:alpha:]]% || $token eq 'C'; $tail =~ s%^[-:/\w]+%%; } else { # Otherwise, it is punctuation (such as the BNF metacharacters). $tail =~ s%^[^-:/\w]%%; } } } # NB: webcode replaces tabs with blanks! open WEBCODE, "webcode @ARGV |" or die "$!"; $_ = ; exit 0 unless defined($_); chomp; # Is it wicked to use double quoting with single quotes, as in qq'text'? # It is used quite extensively in this script - beware! print qq'\n'; print "\n"; print "\n\n"; print " $_ \n\n\n\n"; print "

$_

\n\n"; print qq' \n'; print "
\n"; print qq' Cross-Reference: rules \n'; print "
\n"; print qq' Cross-Reference: keywords \n'; print "
\n"; sub rcs_id { my($id) = @_; $id =~ s%^(@\(#\))?\$[I]d: %%o; $id =~ s% \$$%%o; $id =~ s%,v % %o; $id =~ s%\w+ Exp( \w+)?$%%o; my(@words) = split / /, $id; my($version) = "file $words[0] version $words[1] dated $words[2] $words[3]"; return $version; } sub iso8601_format { my($tm) = @_; my $today = strftime("%Y-%m-%d %H:%M:%S+00:00", gmtime($tm)); return($today); } # Print hrefs for non-terminals and keywords. # Also substitute /* Nothing */ for an absence of productions between alternatives. sub print_tail { my($tail, $tcount) = @_; while ($tail) { my($newtail); if ($tail =~ m%^\s+%) { my($spaces) = $&; $newtail = $'; print "\n" if debug; $spaces =~ s% {4,8}%    %g; print $spaces; # Spaces are not a token - don't count them! } elsif ($tail =~ m%^'[^']*'% || $tail =~ m%^"[^"]*"% || $tail =~ m%^!!.*$%) { # Quoted literal - print and ignore. # Or meta-expression... my($quote) = $&; $newtail = $'; print "\n" if debug; $quote =~ s%!!.*% $quote %; print $quote; $tcount++; } elsif ($tail =~ m%^\<([-:/\w\s]+)\>%) { my($nonterm) = $&; $newtail = $'; print "\n" if debug; $nonterm =~ s%\<([-:/\w\s]+)\>%\<$1\>%; print " $nonterm"; $tcount++; } elsif ($tail =~ m%^[\w_]+%) { # Keyword my($keyword) = $&; $newtail = $'; print "\n" if debug; print qq' $keyword '; $tcount++; } else { # Metacharacter, string literal, etc. $tail =~ m%\S+%; my($symbol) = $&; $newtail = $'; print "\n" if debug; if ($symbol eq '|') { print "/* Nothing */ " if $tcount == 0; $tcount = 0; } else { $symbol =~ s%...omitted...%/* $& */%i; $tcount++; } print " $symbol"; } $tail = $newtail; } return($tcount); } my $hr_count = 0; my $tcount = 0; # Ick! my $def; # Current rule # Don't forget - the input has been web-encoded! while () { chomp; next if /^===*$/o; s/\s+$//o; # Remove trailing white space if (/^[ ]*$/) { print "\n"; } elsif (/^---*$/) { print "
\n"; } elsif (/^@.#..Id:/) { # Convert what(1) string identifier into version information my $id = '$Id: bnf2html.pl,v 3.7 2005/07/13 18:32:35 jleffler Exp $'; my($v1) = rcs_id($_); my $v2 = rcs_id($id); print "

\n"; print "Derived from $v1\n"; my $today = iso8601_format(time); print "
\n"; print "Generated on $today by $v2\n"; print "

\n"; } elsif (/ ::=/) { # Definition line $def = $_; $def =~ s%\<([-:/()\w\s]+)\>.*%$1%; my($tail) = $_; $tail =~ s%.*::=\s*%%; print qq'

  \n'; print qq' <$def>    ::='; $tcount = 0; if ($tail) { add_refs($def, $tail); print "  "; $tcount = print_tail($tail, $tcount); } print "\n"; } elsif (/^\s/) { # Expansion line add_refs($def, $_); print "
"; $tcount = print_tail($_, $tcount); } elsif (m/^--[\/]?(\w+)/) { # Pseudo-directive line in lower-case # Print a 'Top' link before


tags except first. top if /--hr/ && $hr_count++ > 0; s%--(/?[a-z][a-z\d]*)%<$1>%; s%\<([-:/\w\s]+)\>%\<$1\>%g; print "$_\n"; } elsif (m%^--##%) { # Undo web-coding s%>%>%g; s%<%<%g; s%&%&%g; s%^--##\s*%%; print "$_\n"; } elsif (m/^--%start\s+(\w+)/) { # Designated start symbol my $start = $1; print qq'

Start symbol: $start

\n'; } else { # Anything unrecognized passed through unchanged! print "$_\n"; } } # Print index of initial letters for keywords. sub print_index_key { my($prefix, @keys) = @_; my %letters = (); foreach my $keyword (@keys) { my $initial = uc substr $keyword, 0, 1; $letters{$initial} = 1; } foreach my $letter ('A' .. 'Z') { if (defined($letters{$letter})) { print qq' $letter \n'; } else { print qq'$letter\n'; } } print "\n"; } ### Generate cross-reference tables { print "
\n\n"; print "
\n"; print qq'\n'; print "

Cross-Reference Table: Rules

\n"; print_index_key("rules", keys %rules); print "\n"; print "\n"; my %letters = (); foreach my $rule (sort { uc $a cmp uc $b } keys %rules) { my $initial = uc substr $rule, 0, 1; my $label = ""; if (!defined($letters{$initial})) { $letters{$initial} = 1; $label = qq' '; } print qq'\n \n\n"; } print "
Rule (non-terminal) Rules using it
$label $rule '; my $pad = ""; foreach my $ref (sort { uc $a cmp uc $b } keys %{$rules{$rule}}) { print qq'$pad <$ref> \n'; $pad = " "; } print "
\n"; print "
\n"; top; } { print "
\n"; print qq'\n'; print "

Cross-Reference Table: Keywords

\n"; print_index_key("keywords", keys %keywords); print "\n"; print "\n"; my %letters = (); foreach my $keyword (sort { uc $a cmp uc $b } keys %keywords) { my $initial = uc substr $keyword, 0, 1; my $label = ""; if (!defined($letters{$initial})) { $letters{$initial} = 1; $label = qq' '; } print qq'\n \n\n"; } print "
Keyword Rules using it
$label $keyword '; my $pad = ""; foreach my $ref (sort { uc $a cmp uc $b } keys %{$keywords{$keyword}}) { print qq'$pad <$ref> \n'; $pad = " "; } print "
\n"; print "
\n"; top; print "
\n"; } printf "%s\n", q'Please send feedback to Jonathan Leffler, variously:'; printf "%s\n", q' jleffler@us.ibm.com or'; printf "%s\n", q' jonathan.leffler@gmail.com .'; print "\n\n\n";