#!/usr/bin/env perl # # @(#)$Id: bnf2html.pl,v 3.11 2011/07/11 19:57:38 jleffler Exp $ # # Convert SQL-92, SQL-99 BNF plain text file into hyperlinked HTML. use strict; use warnings; use POSIX qw(strftime); my(%rules); # Indexed by rule names w/o angle-brackets; each entry is a ref to a hash. my(%keywords); # Index by keywords; each entry is a ref to a hash. use constant debug => 0; sub top { print "

Top

\n\n"; } # Usage: add_entry(\%keywords, $keyword, $rule); # Usage: add_entry(\%rules, $rhs, $rule); sub add_entry { my($reflist, $lhs, $rhs) = @_; ${$reflist}{$lhs} = {} unless defined ${$reflist}{$lhs}; ${$reflist}{$lhs}{$rhs} = 1; } sub add_refs { my($def, $tail) = @_; print "\n\n" if debug; return if $tail =~ m/!!/; while ($tail) { $tail =~ s/^\s*//; if ($tail =~ m%^\<([-:/\w\s]+)\>%) { print "\n" if debug; add_entry(\%rules, $1, $def); $tail =~ s%^\<([-:/\w\s]+)\>%%; } elsif ($tail =~ m%^([-:/\w]+)%) { my($token) = $1; print "\n" if debug; add_entry(\%keywords, $token, $def) if $token =~ m%[[:alpha:]][[:alpha:]]% || $token eq 'C'; $tail =~ s%^[-:/\w]+%%; } else { # Otherwise, it is punctuation (such as the BNF metacharacters). $tail =~ s%^[^-:/\w]%%; } } } # NB: webcode replaces tabs with blanks! open( my $WEBCODE, "-|", "webcode @ARGV") or die "$!"; # Read first line of file - use as title in head and in H1 heading in body $_ = <$WEBCODE>; exit 0 unless defined($_); chomp; # Is it wicked to use double quoting with single quotes, as in qq'text'? # It is used quite extensively in this script - beware! print qq'\n'; print "\n"; print "\n\n"; print " $_ \n\n\n\n"; print "

$_

\n\n"; print qq' \n'; print "
\n"; print qq' Cross-Reference: rules \n'; print "
\n"; print qq' Cross-Reference: keywords \n'; print "
\n"; sub rcs_id { my($id) = @_; $id =~ s%^(@\(#\))?\$[I]d: %%o; $id =~ s% \$$%%o; $id =~ s%,v % %o; $id =~ s%\w+ Exp( \w+)?$%%o; my(@words) = split / /, $id; my($version) = "file $words[0] version $words[1] dated $words[2] $words[3]"; return $version; } sub iso8601_format { my($tm) = @_; my $today = strftime("%Y-%m-%d %H:%M:%S+00:00", gmtime($tm)); return($today); } # Print hrefs for non-terminals and keywords. # Also substitute /* Nothing */ for an absence of productions between alternatives. sub print_tail { my($tail, $tcount) = @_; while ($tail) { my($newtail); if ($tail =~ m%^\s+%) { my($spaces) = $&; $newtail = $'; print "\n" if debug; $spaces =~ s% {4,8}%    %g; print $spaces; # Spaces are not a token - don't count them! } elsif ($tail =~ m%^'[^']*'% || $tail =~ m%^"[^"]*"% || $tail =~ m%^!!.*$%) { # Quoted literal - print and ignore. # Or meta-expression... my($quote) = $&; $newtail = $'; print "\n" if debug; $quote =~ s%!!.*% $quote %; print $quote; $tcount++; } elsif ($tail =~ m%^\<([-:/\w\s]+)\>%) { my($nonterm) = $&; $newtail = $'; print "\n" if debug; $nonterm =~ s%\<([-:/\w\s]+)\>%\<$1\>%; print " $nonterm"; $tcount++; } elsif ($tail =~ m%^[\w_]([-._\w]*[\w_])?%) { # Keyword my($keyword) = $&; $newtail = $'; print "\n" if debug; print(($keyword =~ m/^\d\d+$/) ? $keyword : qq' $keyword '); $tcount++; } else { # Metacharacter, string literal, etc. $tail =~ m%\S+%; my($symbol) = $&; $newtail = $'; print "\n" if debug; if ($symbol eq '|') { print "/* Nothing */ " if $tcount == 0; $tcount = 0; } else { $symbol =~ s%...omitted...%/* $& */%i; $tcount++; } print " $symbol"; } $tail = $newtail; } return($tcount); } my $hr_count = 0; my $tcount = 0; # Ick! my $def; # Current rule # Don't forget - the input has been web-encoded! while (<$WEBCODE>) { chomp; next if /^===*$/o; s/\s+$//o; # Remove trailing white space if (/^$/) { print "\n"; } elsif (/^---*$/) { print "
\n"; } elsif (/^@.#..Id:/) { # Convert what(1) string identifier into version information my $id = '$Id: bnf2html.pl,v 3.11 2011/07/11 19:57:38 jleffler Exp $'; my($v1) = rcs_id($_); my $v2 = rcs_id($id); print "

\n"; print "Derived from $v1\n"; my $today = iso8601_format(time); print "
\n"; print "Generated on $today by $v2\n"; print "

\n"; } elsif (/ ::=/) { # Definition line $def = $_; $def =~ s%\<([-:/()\w\s]+)\>.*%$1%; my($tail) = $_; $tail =~ s%.*::=\s*%%; print qq'

<$def>    ::='; $tcount = 0; if ($tail) { add_refs($def, $tail); print "  "; $tcount = print_tail($tail, $tcount); } print "\n"; } elsif (/^\s/) { # Expansion line add_refs($def, $_); print "
"; $tcount = print_tail($_, $tcount); } elsif (m/^--[\/]?(\w+)/) { # Pseudo-directive line in lower-case # Print a 'Top' link before


tags except first. top if /--hr/ && $hr_count++ > 0; s%--(/?[a-z][a-z\d]*)%<$1>%; s%\<([-:/\w\s]+)\>%\<$1\>%g; print "$_\n"; } elsif (m%^--##%) { # Undo web-coding s%>%>%g; s%<%<%g; s%&%&%g; s%^--##\s*%%; print "$_\n"; } elsif (m/^--%start\s+(\w+)/) { # Designated start symbol my $start = $1; print qq'

Start symbol: $start

\n'; } else { # Anything unrecognized passed through unchanged! print "$_\n"; } } close $WEBCODE; # Print index of initial letters for keywords. sub print_index_key { my($prefix, @keys) = @_; my %letters = (); foreach my $keyword (@keys) { my $initial = uc substr $keyword, 0, 1; $letters{$initial} = 1; } foreach my $letter ('A' .. 'Z') { if (defined($letters{$letter})) { print qq' $letter \n'; } else { print qq'$letter\n'; } } print "\n"; } ### Generate cross-reference tables { print "
\n\n"; print "
\n"; print qq'\n'; print "

Cross-Reference Table: Rules

\n"; print_index_key("rules", keys %rules); print "\n"; print "\n"; my %letters = (); foreach my $rule (sort { uc $a cmp uc $b } keys %rules) { my $initial = uc substr $rule, 0, 1; my $label = ""; if (!defined($letters{$initial})) { $letters{$initial} = 1; $label = qq' '; } print qq'\n \n\n"; } print "
Rule (non-terminal) Rules using it
$label $rule '; my $pad = ""; foreach my $ref (sort { uc $a cmp uc $b } keys %{$rules{$rule}}) { print qq'$pad <$ref> \n'; $pad = " "; } print "
\n"; print "
\n"; top; } { print "
\n"; print qq'\n'; print "

Cross-Reference Table: Keywords

\n"; print_index_key("keywords", keys %keywords); print "\n"; print "\n"; my %letters = (); foreach my $keyword (sort { uc $a cmp uc $b } keys %keywords) { my $initial = uc substr $keyword, 0, 1; my $label = ""; if (!defined($letters{$initial})) { $letters{$initial} = 1; $label = qq' '; } print qq'\n \n\n"; } print "
Keyword Rules using it
$label $keyword '; my $pad = ""; foreach my $ref (sort { uc $a cmp uc $b } keys %{$keywords{$keyword}}) { print qq'$pad <$ref> \n'; $pad = " "; } print "
\n"; print "
\n"; top; print "
\n"; } printf "%s\n", q'Please send feedback to Jonathan Leffler, variously:'; printf "%s\n", q' jleffler@us.ibm.com or'; printf "%s\n", q' jonathan.leffler@gmail.com .'; print "\n\n\n"; __END__ =pod =head1 PROGRAM bnf2html - Convert (ISO SQL) BNF Notation to Hyperlinked HTML =head1 SYNTAX bnf2html [file ...] =head1 DESCRIPTION The bnf2html filters the annotated BNF (Backus-Naur Form) from its input files and converts it into HTML on standard output. The HTML is heavily hyperlinked. Each rule (LHS) links to a table of other rules where it is used on the RHS. Similarly, each symbol on the RHS is linked to the rule that defines it. Thus, it is possible to find where items are used and defined quite easily. =head1 INPUT FORMAT This script is adapted to the BNF notation using in the SQL standard (ISO/IEC 9075:2003, for example). It also takes various forms of annotations. The first line of the file is used as the title in the head section. It is also used as the text for a H1 header at the top of the body. Lines consisting of two or more equal signs are ignored. Lines consisting of two or more dashes are converted to a horizontal rule. Lines starting with the SCCS identification string '@(#)' are used to print version information about the file converted and the script doing the converting. Lines containing space, colon, colon, equals are treated as rules. Lines starting with white space are treated as continuations of a rule. Lines starting dash, dash, (optionally a slash) and then one or more tag letters are converted into an HTML start or end tag. Any line starting dash, dash, hash, hash has any HTML entities introduced by the WEBCODE program removed. The should be at most one line starting '--%start'; this indicates the start symbol for the bnf2yacc converter, but is effectively ignored by bnf2html. Any other line is passed through verbatim. =head1 AUTHOR Jonathan Leffler =cut