#--geezer.pl #-- "englishizer" - #-- Produces printable ASCII of the House provided #-- composition code format, using the format documentation set collected #-- under QuickGuide.doc. Several other forms of the data are produced. #-- #-- The COMMAND LINE has not been formalized - #-- the way it works currently assumes a source file naming convention, eg, #-- perl -w geezer.pl 1 1 #-- to process ../data/uscode01.bell #-- or #-- perl -w geezer.pl 3 17 #-- to process ../data/uscode03.bell through ../data/uscode17.bell #-- #-- In addition to the one-to-one filter output mentioned above, #-- the OUTPUT is a directory for each input file (USC title) containing #-- one HTML file for each USC fragment found, with fragmentation level of #-- (Super-)Section, plus a number of pop-up index HTMLs #-- #-- Each HTML page produced here points to the XML page for the corresponding #-- fragment in ../emely/uscodenn #-- #-- David Shetland Dec-2001 ff #****************************************************************************************** use strict; # use IO::File; use LDMS_NG::USCRinger; use LDMS_NG::USCXrefs; use Roman; #-- from CPAN #-- mySQL -- # use DBI; # $|= 1; # Autoflush # my $host = ""; # my $opt_user = ""; # my $opt_password = ""; # my $usc_db="usc_process"; # print "Connection to database $usc_db\n"; # my $dbh = DBI->connect("DBI:mysql:$usc_db:$host",$opt_user,$opt_password) || die "Can't connect: $DBI::errstr\n"; # my $table="title_summary"; #----------- #-- titles to process my $titleFirst = ""; my $titleLast = ""; $titleFirst = shift(@ARGV); $titleLast = shift(@ARGV); if (!$titleFirst) { print( "oops - need an initial title number \(1-50\)\n" ); exit; } if (!$titleLast ) { # print( "using initial title number for terminal as well\n" ); $titleLast=$titleFirst; } my $titlenum = "00"; my $chunk = ""; my $tmpkey = "00T00"; #-- file fragment counters my $ctrFragStr = ""; my $ctrFrag = 0; my $flgFragStr = sprintf("%05d",$ctrFrag); #-- marker for last flagged fragment my $rCode = 0; my @headlist = (); #-- expcite style list of super-section heads maintained as a stack my @hdtykeys = (); #-- ssdiv keys that match headlist entries # my $prevLvl = 0; my $pushHead = 0; my $replHead = 0; my $popHead = 0; my $ctrTOFEntry = 0; my $ctrTOFEntStr = ""; my $ctrTOTEntry = 0; my $ctrTOTEntStr = ""; my $ctrAREFEntry = 0; my $ctrAREFEntStr = ""; my $uscExists = 0; my $curfmt = "5800"; my $fmttbl = "5800"; my $inTable = 0; my $centering=0; my $curcod = "00"; my $inNotes = 0; my @curFrag = ("","","","","","","","","","","","","","","","","","","",""); my @cntFrag = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); my $artNum =0; my $lcLen = length("[=LC=1234567]"); #-- length of locator code #------------------------------------- #-- Loop through the files #------------------------------------- for (my $i=$titleFirst; $i<=$titleLast; $i++) { #-- try both base name and appendix... #-- leading zero file numbering foreach my $titlenumalt (sprintf( "%02d",$i ), sprintf( "%02da",$i ) ) { $titlenum = $titlenumalt; # print ("looking for uscode".$titlenum.".bell\n"); #-- file open US Code data in - $uscExists = 1; open (USCODE, "< ../data/uscode".$titlenum.".bell") || do { $uscExists = 0; }; if ($uscExists) { # print ("found uscode".$titlenum.".bell\n"); #-- tell gizer about the new $titlenum, and reset the gizer level tracker &set_titlenum($titlenum); &reset_prevLvl(); #-- make new directory, if needed, for this title if(mkdir "uscode$titlenum"){print("Started new directory: uscode$titlenum\n");}; #-- start data tables of scope "input file" &startTXTFile(); &startLBLFile(); #-- just the header records detected # &startREFFile(); #-- start link tables of scope "input file" &startTOCPage(); &startTOTPage(); for (my $i=1; $i<15; $i++) { &startAREFPage($i); } #-- reset file fragment counters $ctrFragStr = ""; $ctrFrag = 0; $flgFragStr = sprintf("%05d",$ctrFrag); #-- marker for last flagged fragment $rCode = 0; @headlist = (); #-- expcite style list of super-section heads maintained as a stack @hdtykeys = (); #-- ssdiv keys that match headlist entries # $prevLvl = 0; $pushHead = 0; $replHead = 0; $popHead = 0; $ctrTOFEntry = 0; $ctrTOFEntStr = ""; $ctrTOTEntry = 0; $ctrTOTEntStr = ""; $ctrAREFEntry = 0; $ctrAREFEntStr = ""; &genHeaderPage(); &startDataPage(); #-- leader DataPage to collect file header oddments #-- buffers for checking transitions my $lineCntr = 0; my $dataRecTot = 0; # $ta = "T"; #-- Title vs Appendix flag, used in document level hash key &reset_ta(); #-- (USCRinger.pm) Title vs Appendix flag, used in document level hash key my $cur_expcite; my $cur_head; my $rCodeChunk = ""; $curfmt = "5800"; $fmttbl = "5800"; $inTable = 0; $centering=0; @curFrag = ("","","","","","","","","","","","","","","","","","","",""); @cntFrag = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); $artNum = 0; #------------------------------------- #-- Loop through the lines of the file #-- chuncking at each newline #------------------------------------- $dataRecTot = 0; while () { $lineCntr++; $chunk = $_; # print ("debug: length-chunk: ".length($chunk)."\n"); #------------------------------------- #-- TRANSLATE to readable codes -- #-- NB: set_titlenum is called once per title, above #------------------------------------- my $outChunk = ""; $curcod = "00"; my $curlab = ""; #-- empty line (new record) if (length($chunk)==1) { $chunk = "[=newrec]"; } #-- otherwise, geezerize else { $chunk = &gizer($chunk); #-- first translate, then #-- sync with various initial-scan-related flags and variables -- $rCode = &get_rCode(); $curcod = &get_curcod(); ($pushHead, $replHead, $popHead) = &get_head_flags(); $tmpkey = &get_tmpkey(); $inTable = &get_inTable(); # print ("debug: tmpkey, push, repl, pop: ".$tmpkey.$pushHead.$replHead.$popHead."\n"); } if ( $chunk =~ /\[=EOF\]/ ) { &lastDataPage(); } #--------------------------------------------- #---------- OUTPUT --------------------------- #--------------------------------------------- #-- Line Break if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(06)|(07)|([1-3][0-8])|(45)|(47)|(48)|(53)|(55)|(70)|(74)|(79)|(81)/ ) ) { print(HTMOUT "
"); } if ( ($curfmt == "5804") and ( $curcod =~ /([2-3]7)|(70)/ ) ) { print(HTMOUT "
"); } if ( ($inTable ) and ( $curcod =~ /(01)/ ) ) { print(HTMOUT "
"); } if ( $chunk =~ /(\[=table)|(\[=endtable)|(\[=fn)|(\[=boxhead)|(\[=endboxheads)|(\[=vertSpace)/ ) { print(HTMOUT "
"); } #-- Leader note if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(53)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.53 - Source Credits
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(06)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.06 - Running Head
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(73)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.73 - Referred to in other
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(75)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.75 - References in text
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(76)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.76 - Codification
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(78)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.78 - Change of Name
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(84)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.84 - Italic Amendment Note
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(85)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.85 - Transfer of functions
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(86)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.86 - Ex, Ord. No. etc.
\n"); } if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(87)/ ) ) { $inNotes=1; print(HTMOUT "
loc580x.87 - Federal Rules, cross refs
\n"); } #-- mySQL ---------------------- #if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /04/ ) ) { # my $tmp1 = "\'".$titlenum."\'"; # my $tmp2 = "\'".substr( $chunk, $lcLen )."\'"; # $tmp2 =~ s/\[=md\]/\ \-\ /g; #-- translate ndash # # $dbh->do("insert into $table (titlenum, titlenam) values ($tmp1, $tmp2)") or die $DBI::errstr; # $dbh->do("update $table set titlenam=$tmp2 where titlenum=$tmp1" ) or die $DBI::errstr; #} #-- end mySQL ---------------------- #-- Super-Section header update if ( ( ($curfmt =~ /5800/) and ( $curcod =~ /(81)|(52)|(04)/ ) ) || ( ($curfmt =~ /5802/) and ( $curcod =~ /(52)|(04)/ ) ) || ( ($curfmt =~ /5804/) and ( $curcod =~ /(83)/ ) ) ) { if ( $rCode ) { if ($pushHead) { push ( @headlist, $chunk); push ( @hdtykeys, $tmpkey); $pushHead=0; &reset_pushHead(); } if ($replHead) { pop ( @headlist ); pop (@hdtykeys); push ( @headlist, $chunk); push ( @hdtykeys, $tmpkey); $replHead=0; &reset_replHead(); } if ($popHead) { until ( (scalar(@hdtykeys)<3) || ($tmpkey eq pop(@hdtykeys)) ) { pop(@headlist) } pop(@headlist); push(@headlist, $chunk); push ( @hdtykeys, $tmpkey); $popHead=0; &reset_popHead(); } my $headsize = @headlist; if ($headsize>0) {$headsize--;} my $headIndent = ""; if ( "________________" =~ /((_){$headsize})/ ) { $headIndent = $1; } &startDataPage(); &outDataChunk($rCodeChunk); &addTOCEntry($headIndent.$chunk); } } #-- Section if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(80)/ ) ) { &startDataPage(); print(HTMOUT "

Section

\n"); chomp($chunk); } #-- Tables if ( $chunk =~ /(\[=table)/ ) { print(HTMOUT "\n
Table
"); } if ( ($inTable ) and ( $curcod =~ /(95)/ ) ) { $ctrTOTEntry++; $ctrTOTEntStr = sprintf("T%05d",$ctrTOTEntry); } if ( ($inTable ) and ( $curcod =~ /(95)/ ) ) { print(HTMOUT "
SGL0.95 - Table Title
\n"); } if ( ($inTable ) and ( $curcod =~ /(96)|(97)/ ) ) { print(HTMOUT "
SGL0.96(97?) - Headnote
\n"); } #-- Table of Tables entry if ( ($inTable ) and ( $curcod =~ /(95)/ ) ) { &addTOTEntry($chunk); } #-- Labels if ( ( $chunk =~ /\[=LC=[0-9=]{7}\]((((?:[A-Z0-9]{1,5}\.)|(?:\([A-Za-z0-9]{1,5}\)))(\ \[=typeface[a-zA-Z0-9\ \[\]\=\/\,]+?\.\[=md\])?){1,4})/ ) || ( $chunk =~ /\[=LC=[0-9=]{7}\]((((?:[A-Z0-9]{1,5}\.)|(?:\([A-Za-z0-9]{1,5}\)))(\ \[=typeface[a-zA-Z0-9\ \[\]\=\/\,]+?\.\[=md\])?){1,4})/ ) ) { $curlab = $1; } if ( ($curcod == 18) || ($curcod == 19) || ($curcod == 20) || ($curcod == 79) ) { $curlab = substr( $chunk, $lcLen ); } #-- $lcLen is length of locator code if ( $curlab ) { # print(HTMOUT "
LocLab: [$curcod]$curlab
\n"); print(HTMOUT "
\n"); &addLBLFileLine("[$curcod]$curlab\n"); } # print(HTMOUT "" ); #-- Centered things if ( ($curfmt =~ /(5800)|(5802)/) and ( $curcod =~ /(04)|(51)|(52)|(54)|(61)|(71)|(74)|(81)|(83)/ ) ) { print(HTMOUT "
\n
"); $centering=1; } if ( ($inTable ) and ( $curcod =~ /(95)/ ) ) { print(HTMOUT "
\n"); $centering=1; } if ( $rCode ) { $rCodeChunk = $chunk; } #------------------------------------- #-- output every chunk ----------------- &outDataChunk($chunk); #-- send the chunk to the plain text filtered file &addTXTFileLine($chunk); #-- send reference chunk to the reference candidate file # if ( $curcod == 82) { &addREFFileLine($chunk); } #--------------------------------------- #------------------------------------- #------------------------------------- #--- end centering if ( $centering ) { print(HTMOUT "<\/CENTER>\n"); $centering=0; } #------------------------------------- } #-- end (while more lines in file) close (USCODE); } #-- file existence check } #-- try both base name and appendix } #-- end of loop through files #-- final closings close (TXTOUT); close (HTMOUT); close (HTMTOC); close (HTMTOT); for (my $i=1; $i<16; $i++){ print ( "debug cntFrag $i: ".$cntFrag[$i]."\n" ); } #-- end geezer main (subs below) -- ##################################################################################################### ##################################################################################################### #--------------------------------------------------- sub genHeaderPage{ #--------------------------------------------------- $ctrFragStr = sprintf("%05d",$ctrFrag); open (TITLHD, "> uscode".$titlenum."\/T".$titlenum."F".$ctrFragStr.".HTML") || die("\ncan't open output file uscode".$titlenum."\\T".$titlenum."F".$ctrFragStr.".HTML\n"); print(TITLHD q~ ~ ); print(TITLHD "\nT".$titlenum."F".$ctrFragStr."\n"); #-- make keyboard script print(TITLHD q~ ~ ); $ctrFragStr = sprintf("%05d",$ctrFrag); print(TITLHD "\nT".$titlenum."F".$ctrFragStr."\n"); #-- make Next/Prev buttons -- $ctrFragStr = sprintf("%05d",$ctrFrag+1); print(TITLHD "(N)ext\n"); print(TITLHD "(M)ain\n"); print(TITLHD "---".localtime(time())."\n"); print(TITLHD "" ); print (TITLHD "*** uscode".$titlenum); print (TITLHD " (of titles $titleFirst through $titleLast, ".localtime(time()).")\n"); print ( "*** uscode".$titlenum); print ( " (of titles $titleFirst through $titleLast, ".localtime(time()).")\n"); print (TITLHD "
\n"); print (TITLHD "Notes on the HTML
\n"); print (TITLHD "Highlights in purple, vertical space, and H-rules are added metadata.
\n"); print (TITLHD "Original data is either black (bold or not) for printing ASCII, or blue square braketted non-printing codes as translated.
\n"); print (TITLHD "
\n"); print (TITLHD "
\n"); print(TITLHD "
" ); print (TITLHD "
Table of Contents\n"); print (TITLHD "
Table of Tables, Flags, etc.\n"); print (TITLHD "
Flags on labels, etc.\n"); #-- close with last page (stats available) #-- dummy file to give startDataPage something to close the first time. open (HTMOUT, "> uscode".$titlenum."\/z.HTML") || die("\ncan't open output file uscode".$titlenum."\\T".$titlenum."\/z.HTML\n"); } #--------------------------------------------------- #--------------------------------------------------- sub startDataPage{ #-- starts a fresh data fragment, currently for #-- (1) an Rcode, or #-- (2) a Section #--------------------------------------------------- print(HTMOUT "\n\n"); close(HTMOUT); $ctrFrag++; $ctrFragStr = sprintf("%05d",$ctrFrag); open (HTMOUT, "> uscode".$titlenum."\/T".$titlenum."F".$ctrFragStr.".HTML") || die("\ncan't open output file uscode".$titlenum."\\T".$titlenum."F".$ctrFragStr.".HTML\n"); print(HTMOUT q~ ~ ); print(HTMOUT "\nT".$titlenum."F".$ctrFragStr."\n"); #-- make keyboard script print(HTMOUT q~ ~ ); $ctrFragStr = sprintf("%05d",$ctrFrag); print(HTMOUT "T".$titlenum."F".$ctrFragStr."\n"); #-- make control buttons -- $ctrFragStr = sprintf("%05d",$ctrFrag-1); print(HTMOUT "(p)rev\n"); $ctrFragStr = sprintf("%05d",$ctrFrag+1); print(HTMOUT "(n)ext\n"); $ctrFragStr = sprintf("%05d",$ctrFrag); print(HTMOUT "(x)ml\n"); print(HTMOUT "(t)OC\n"); # print(HTMOUT "(m)ain\n"); # print(HTMOUT "(F)lagged\n"); print(HTMOUT "---".localtime(time())."\n"); my $tmphead = join("\n
",@headlist); $tmphead =~ s/\[=md\]/\ \-\ /g; $tmphead =~ s/\[=nd\]/\-/g; $tmphead =~ s/\[=fn\][0-9]\[=fn\]//g; #-- discard footnote numbers $tmphead =~ s/\[[a-zA-Z0-9\ \=\.]+\]//g; if ($rCode) { #-- trim the last entry, since it duplicates the current code head $tmphead =~ s/
.+?$//; } print(HTMOUT "\n

".$tmphead."<\/FONT>\n
"); if ($rCode) { my $RcodeNomenTmp = &getRcodeNomen("$tmpkey"); if ($RcodeNomenTmp) { print(HTMOUT "

".$RcodeNomenTmp."

<\/FONT>\n"); } else { $ctrTOFEntry++; &addTOFEntry( "Heading Type not found; Key=".$tmpkey ); print(HTMOUT "

Heading Type not found; Key=".$tmpkey."

<\/FONT>\n"); } $rCode = 0; &reset_rCode(); } $inNotes=0; } #--------------------------------------------------- #--------------------------------------------------- sub outDataChunk($){ #-- filters a chunk for HTML and writes it #--------------------------------------------------- my $nextOut = shift; #-- experimental xref detection (highlights and collects stats) $nextOut = &markArefs($nextOut); # $nextOut =~ s/<\/AREF>/<\/U>/g; while ( $nextOut =~ s/(.*?)<\/AREF>/\{ART\-$1\}<\/FONT>$2<\/U>/g ) { # print("debug: $1-$2\n"); $artNum = $1; my $curAREF = $2; #-- stats -- $curFrag[$artNum] = "T".$titlenum."F".$ctrFragStr; $cntFrag[$artNum]++; #-- increment and stringify AREF counter $ctrAREFEntry++; $ctrAREFEntStr = sprintf("R%05d",$ctrAREFEntry); #-- use AREF counter as a label for (here) in the html print(HTMOUT "\n\n"); #-- add to the appropriate toc if ( $cntFrag[$artNum] < 99 ) { #-- don't make lists too long (fancier thinning idea, anyone? &addAREFEntry( $artNum, $curAREF ); } } #-- flag uncaptured bell codes... if ( $nextOut =~ /(\[=bell\-[A-Za-z]\])/ ) { $ctrTOFEntry++; my $ctrTOFEntStr = sprintf("Flg%05d",$ctrTOFEntry); print(HTMOUT "\n
Bell code not captured; code=".$1."
\n" ); &addTOFEntry( "Bell code not captured; code=".$1 ); } #-- look for indicative index noun phrase candidates #-- current guess: simple one to four word phrase in double quotes #while ( !$inNotes && $nextOut =~ /\[=ldq\]([a-zA-Z]+(?:\ [a-zA-Z]+){0,3})\[=rdq\]/g ) { #print ("[T".$titlenum."F".$ctrFragStr."] $1\n"); #} #-- sample the strange new codes #while ( $nextOut =~ /\[=(AEAF CD=[A-Z]{4})\]/g ) { #print ("debug: $1\n"); #} #-- translating to blue indirectly through CSS override of $nextOut =~ s/(\[=.+?\])/$1<\/I>/g; print(HTMOUT $nextOut); } #--------------------------------------------------- #--------------------------------------------------- sub lastDataPage{ #--------------------------------------------------- #-- First, finish up print (TITLHD "
\n"); for ( my $i=1; $i<15; $i++ ) { my $artNumStr = sprintf("%02d",$i); print (TITLHD "
Table of Abstract Reference Type ".$artNumStr." (".$cntFrag[$i].")\n"); } print (TITLHD "\n"); close (TITLHD); #--------------------------------------------------- print(HTMOUT "\n\n"); close(HTMOUT); $ctrFragStr = sprintf("%05d",++$ctrFrag); open (HTMOUT, "> uscode".$titlenum."\/T".$titlenum."F".$ctrFragStr.".HTML") || die("\ncan't open output file uscode".$titlenum."\\T".$titlenum."F".$ctrFragStr.".HTML\n"); print(HTMOUT q~ ~ ); print(HTMOUT "\nT".$titlenum."F".$ctrFragStr."\n"); #-- make keyboard script print(HTMOUT q~ ~ ); $ctrFragStr = sprintf("%05d",$ctrFrag); print(HTMOUT "\nT".$titlenum."F".$ctrFragStr."\n"); #-- make Next/Prev buttons -- $ctrFragStr = sprintf("%05d",$ctrFrag-1); print(HTMOUT "(P)rev\n"); # $ctrFragStr = sprintf("%05d",$ctrFrag+1); # print(HTMOUT "(N)ext\n"); print(HTMOUT "(M)ain\n"); # print(HTMOUT "(F)lagged\n"); print(HTMOUT "---".localtime(time())."\n"); print(HTMOUT "

End of File

<\/FONT>\n"); } #--------------------------------------------------- ############## Table of Contents ########################################### #--------------------------------------------------- sub startTOCPage{ #--------------------------------------------------- open (HTMTOC, "> uscode".$titlenum."\/T".$titlenum."TOC.HTML") || die("\ncan't open output file\n"); print(HTMTOC q~ ~ ); print(HTMTOC "\nT".$titlenum."TOC\n"); print(HTMTOC q~ ~); print(HTMTOC "\n\nT".$titlenum."TOC\n"); print(HTMTOC "---".localtime(time())."\n"); print(HTMTOC "TOC

<\/FONT>\n"); } #--------------------------------------------------- #--------------------------------------------------- sub addTOCEntry($){ #--------------------------------------------------- my $ctrFragStr = sprintf("%05d",$ctrFrag); my $nextEntry = shift; $nextEntry =~ s/_/\.\.\./g; #-- play with indentation $nextEntry =~ s/\[=md\]/\ \-\ /g; #-- translate emdashes $nextEntry =~ s/\[=fn\][0-9]\[=fn\]//g; #-- discard footnote numbers $nextEntry =~ s/\[=[a-zA-Z0-9\ \=\.]+\]//g; #-- discard all other tags my $fn = "T".$titlenum."F".$ctrFragStr.".HTML"; print(HTMTOC "
".$nextEntry."<\/FONT>\n"); } #--------------------------------------------------- ############## Table of Flagged Things ########################################### #--------------------------------------------------- sub startTOTPage{ #--------------------------------------------------- open (HTMTOT, "> uscode".$titlenum."\/T".$titlenum."TOT.HTML") || die("\ncan't open output file\n"); print(HTMTOT q~ ~ ); print(HTMTOT "\nT".$titlenum."TOT\n"); print(HTMTOT q~ ~); print(HTMTOT "\n\nT".$titlenum."TOT\n"); print(HTMTOT "---".localtime(time())."\n"); print(HTMTOT "Table of Tables and other flagged things

<\/FONT>\n"); } #--------------------------------------------------- #--------------------------------------------------- sub addTOTEntry($){ #--------------------------------------------------- my $ctrTOTEntStr = sprintf("T%05d",$ctrTOTEntry); my $ctrFragStr = sprintf("%05d",$ctrFrag); my $nextEntry = shift; $nextEntry =~ s/\[[a-zA-Z0-9\ \=\.]+\]//g; #-- discard all other tags my $fnp = "T".$titlenum."F".$ctrFragStr.".HTML#".$ctrTOTEntStr; print(HTMTOT "
".$ctrTOTEntStr." - ".$nextEntry."<\/FONT>\n"); } #--------------------------------------------------- #--------------------------------------------------- #--- for now, put flags in the table of tables sub addTOFEntry($){ #--------------------------------------------------- my $ctrTOFEntStr = sprintf("Flg%05d",$ctrTOFEntry); my $ctrFragStr = sprintf("%05d",$ctrFrag); my $nextEntry = shift; $nextEntry =~ s/\[[a-zA-Z0-9\ \=\.]+\]//g; #-- discard all other tags # print(HTMTOT "
".$ctrTOFEntStr." - ".$nextEntry."<\/FONT>\n"); my $fnp = "T".$titlenum."F".$ctrFragStr.".HTML#".$ctrTOFEntStr; print(HTMTOT "
".$ctrTOFEntStr." - ".$nextEntry."<\/FONT>\n"); } #--------------------------------------------------- ############## Table of Abstract References ########################################### #--------------------------------------------------- sub startAREFPage($){ #--------------------------------------------------- no strict 'refs'; #-- strict refs doesn't let the variable filehandle thing work my $artNum = shift; my $arfNam = sprintf("AREF%02d",$artNum); open ($arfNam, "> uscode".$titlenum."\/T".$titlenum.$arfNam."\.HTML") || die("\ncan't open output file\n"); print($arfNam q~ ~ ); print($arfNam "\nT".$titlenum.$arfNam."\n"); print($arfNam q~ ~); print($arfNam "\n\nT".$titlenum.$arfNam."\n"); print($arfNam "---".localtime(time())."\n"); print($arfNam "Table of Abstract References

<\/FONT>\n"); } #--------------------------------------------------- #--------------------------------------------------- sub addAREFEntry(@){ #--------------------------------------------------- no strict 'refs'; #-- strict refs doesn't let the variable filehandle thing work my $artNum = shift; my $arfNam = sprintf("AREF%02d",$artNum); my $ctrAREFEntStr = sprintf("R%05d",$ctrAREFEntry); my $ctrFragStr = sprintf("%05d",$ctrFrag); my $nextEntry = shift; if ($nextEntry){ $nextEntry =~ s/\[=st\]\[=ts\]/§/g; #-- translate section twist $nextEntry =~ s/\[=nd\]/\-/g; #-- translate ndash $nextEntry =~ s/\[[a-zA-Z0-9\ \=\.]+\]//g; #-- discard all other tags my $fnp = "T".$titlenum."F".$ctrFragStr.".HTML#".$ctrAREFEntStr; print($arfNam "
".$ctrAREFEntStr." - ".$nextEntry."<\/FONT>\n"); } } #--------------------------------------------------- ############## Plain Text File ########################################### #--------------------------------------------------- sub startTXTFile{ #--------------------------------------------------- open (TXTOUT, "> uscode".$titlenum."\/T".$titlenum.".TXT") || die("\ncan't open output file uscode".$titlenum."\/T".$titlenum.".TXT\n"); print(TXTOUT "[=comment]\nThis file contains USC composition code data,\nwith original codes translated into readable ascii\nin [=square brackets] (<<[=>> to distinguish from ordinary text <<[>>)\n[=endcomment]\n"); } #--------------------------------------------------- #--------------------------------------------------- sub addTXTFileLine($){ #--------------------------------------------------- my $nextEntry = shift; $nextEntry =~ s/\[=newrec\]/\[=newrec\]\n/g; #-- symbolic for empty line (new record) print(TXTOUT $nextEntry ); } #--------------------------------------------------- ############## Reference Candidates File ########################################### #--------------------------------------------------- sub startREFFile{ #--------------------------------------------------- open (REFOUT, "> R".$titlenum.".TXT") || die("\ncan't open output file uscode".$titlenum."\/T".$titlenum.".TXT\n"); print(REFOUT "[=comment]\nThis file contains USC composition code data,\nwith original codes translated into readable ascii\nin [=square brackets] (<<[=>> to distinguish from ordinary text <<[>>)\n*** Reference Candidates Only ***\n[=endcomment]\n"); } #--------------------------------------------------- #--------------------------------------------------- sub addREFFileLine($){ #--------------------------------------------------- my $nextEntry = shift; $nextEntry =~ s/\[=newrec\]/\[=newrec\]\n/g; #-- symbolic for empty line (new record) print(REFOUT $nextEntry ); } #--------------------------------------------------- ############## Header record File ########################################### #--------------------------------------------------- sub startLBLFile{ #--------------------------------------------------- open (LBLOUT, "> uscode".$titlenum."\/T".$titlenum."LBL.TXT") || die("\ncan't open output file uscode".$titlenum."\/T".$titlenum."LBL.TXT\n"); } #--------------------------------------------------- #--------------------------------------------------- sub addLBLFileLine($){ #--------------------------------------------------- my $nextEntry = shift; print(LBLOUT $nextEntry ); } #--------------------------------------------------- 1; __END__;