# $Id$ $VERSION{''.__FILE__} = '$Revision$'; # # >>Title:: SGML Format Driver # # >>Copyright:: # Copyright (c) 1992-1997, Ian Clatworthy (ianc@mincom.com). # You may distribute under the terms specified in the LICENSE file. # # >>History:: # ----------------------------------------------------------------------- # Date Who Change # 14-Aug-97 ianc SDF 2.000 # ----------------------------------------------------------------------- # # >>Purpose:: # This library provides an [[SDF_DRIVER]] which generates # [[SGML]] files. # # >>Description:: # # >>Limitations:: # Cross-references and URLs aren't there yet. # # Indented tables within a bulleted list don't work yet. # # Tagged lists aren't mapped as well as they could be. # # Special character support still needs some work (for dagger and # doubledagger, if not a few others). # # Lists which have ordered items, then unordered items, then # ordered items all at the same level are output as three # separate lists. As a result, the numbering in the third list # restarts even if you don't want it to. # # >>Resources:: # # >>Implementation:: # ##### Constants ##### # These are the tags which don't have/need a closing tag %_SGML_NOENDTAG = ( 'title', 1, 'author', 1, 'date', 1, 'sect', 1, 'sect1', 1, 'sect2', 1, 'sect3', 1, 'sect4', 1, 'sect5', 1, 'p', 1, ); # Mapping table for characters %_SGML_CHAR = ( 'bullet', '.', 'c', '©', 'cent', '¢', 'dagger', '^', 'doubledagger', '#', 'emdash', '—', 'endash', '–', 'emspace', ' ', 'enspace', ' ', 'lbrace', '{', 'lbracket', '[', 'nbdash', '-', 'nbspace', ' ', 'nl', '&nl;', 'pound', '£', 'r', '®', 'rbrace', '}', 'rbracket', ']', 'tab', ' ', 'tm', '™', # not sure about this 'yen', '¥', ); # Directive mapping table %_SGML_HANDLER = ( 'tuning', '_SgmlHandlerTuning', 'endtuning', '_SgmlHandlerEndTuning', 'table', '_SgmlHandlerTable', 'row', '_SgmlHandlerRow', 'cell', '_SgmlHandlerCell', 'endtable', '_SgmlHandlerEndTable', 'import', '_SgmlHandlerImport', 'inline', '_SgmlHandlerInline', 'output', '_SgmlHandlerOutput', 'object', '_SgmlHandlerObject', 'stylesheet', '', ); # Phrase directive mapping table %_SGML_PHRASE_HANDLER = ( 'char', '_SgmlPhraseHandlerChar', 'import', '_SgmlPhraseHandlerImport', 'inline', '_SgmlPhraseHandlerInline', 'variable', '_SgmlPhraseHandlerVariable', ); # Table states $_SGML_INTABLE = 1; $_SGML_INROW = 2; $_SGML_INCELL = 3; ##### Variables ##### # Table states @_sgml_tbl_state = (); @_sgml_tbl_endtokens = (); @_sgml_tbl_previndent = (); @_sgml_tbl_title = (); ##### Routines ##### # # >>Description:: # {{Y:SgmlFormat}} is an SDF driver which outputs SGML. # sub SgmlFormat { local(*data) = @_; local(@result); # Format the paragraphs @result = &_SgmlFormatSection(*data); # Build the final result. @result = &_SgmlFinalise(*result); # Return the result return @result; } # # >>_Description:: # {{Y:_SgmlFormatSection}} formats a set of SDF paragraphs into SGML. # sub _SgmlFormatSection { local(*data) = @_; local(@result); local($prev_tag, $prev_indent); local($para_tag, $para_text, %para_attrs); local($directive); # Process the paragraphs @result = (); $prev_tag = ''; $prev_indent = ''; while (($para_text, $para_tag, %para_attrs) = &SdfNextPara(*data)) { # handle directives if ($para_tag =~ /^__(\w+)$/) { $directive = $_SGML_HANDLER{$1}; if (defined &$directive) { &$directive(*result, $para_text, %para_attrs); } else { &AppTrace("sgml", 5, "ignoring internal directive '$1'"); } next; } # Add the paragraph &_SgmlParaAdd(*result, $para_tag, $para_text, *para_attrs, $prev_tag, $prev_indent); } # Do this stuff before starting next loop iteration continue { $prev_tag = $para_tag; $prev_indent = $para_attrs{'in'}; } # Return result return @result; } # # >>_Description:: # {{Y:_SgmlParaAdd}} adds a paragraph. # sub _SgmlParaAdd { local(*result, $para_tag, $para_text, *para_attrs, $prev_tag, $prev_indent) = @_; # local(); local($is_example); local($para_fmt); local($para_override); local($para); local($hdg_level); local($label); local($indent); local($list_tag); # Get the example flag $is_example = $SDF_USER'parastyles_category{$para_tag} eq 'example'; # Enumerated lists are the same as list paragraphs at the previous level, # except that we bold the text if ($para_tag =~ /^LI(\d)$/) { $para_tag = $1 > 1 ? "L" . ($1 - 1) : 'N'; $para_attrs{'bold'} = 1; } # Get the target format name $para_fmt = $SDF_USER'parastyles_to{$para_tag}; $para_fmt = $is_example ? 'tscreen' : 'p' if $para_fmt eq ''; # Map the attributes &SdfAttrMap(*para_attrs, 'sgml', *SDF_USER'paraattrs_to, *SDF_USER'paraattrs_map, *SDF_USER'paraattrs_attrs, $SDF_USER'parastyles_attrs{$para_tag}); # Handle headings if ($para_tag =~ /^[HAP](\d)$/) { $hdg_level = $1 - 1; $para_fmt = $hdg_level ? "sect$hdg_level" : "sect"; } # Handle lists elsif ($para_tag =~ /^(L[FUN]?)(\d)$/) { $para_attrs{'in'} = $2; if ($1 eq 'LU') { $para_fmt = 'itemize'; } elsif ($1 eq 'L') { $para_fmt = 'list'; } else { $para_fmt = 'enum'; } } # Prepend the label, if any (replacing tabs with spaces) $label = $para_attrs{'label'}; $label = 'Note: ' if ($para_tag eq 'Note' || $para_tag eq 'NB') && $label eq ''; $label =~ s/\\t/ /g; $para_text = "{{2:$label}}$para_text" if $label ne ''; # Indent examples, if necessary if ($is_example && $para_attrs{'in'}) { $para_text = " " x ($para_attrs{'in'} * 4) . $para_text; delete $para_attrs{'in'}; } # Format the paragraph body if ($para_attrs{'verbatim'}) { $para = &_SgmlEscape($para_text); delete $para_attrs{'verbatim'}; } else { $para = &_SgmlParaText($para_text); } ## Examples with change bars currently come out as separate ## paragraphs - this fixes the problem, for now #delete $para_attrs{'changed'} if $para_attrs{'changed'}; # Build result $indent = $para_attrs{'in'}; #if ($is_example && $para_tag eq $prev_tag && !%para_attrs) { if ($is_example && $para_tag eq $prev_tag) { &_SgmlParaAppend(*result, $para); } elsif ($indent && $prev_indent != 0) { $item = &_SgmlElement($para_fmt, $para, %para_attrs); &_SgmlItemAppend(*result, $item, $indent, $prev_indent, $para_tag, $prev_tag, *para_attrs); } # Plain paragraphs inside tables are not preceded by

elsif (@_sgml_tbl_state && $para_fmt eq 'p') { push(@result, $para); } else { # After a heading, make sure the next entity is another heading # or a plain paragraph if ($prev_tag =~ /^[HAP]\d$/ && $para_fmt ne 'p' && $para_fmt !~ /^sect\d?$/) { push(@result, '

'); } # Add this element, handling lists which begin at an indent # greater than 1 $para = &_SgmlElement($para_fmt, $para, %para_attrs); $list_tag = $para_fmt; while (--$indent > 0) { $para = "<$list_tag>$para"; } push(@result, $para); } } # # >>_Description:: # {{Y:_SgmlParaText}} converts SDF paragraph text into SGML. # sub _SgmlParaText { local($para_text) = @_; local($para); local($state); local($sect_type, $char_tag, $text, %sect_attrs); local($url); local($added_anchors); local(@char_fonts); local($char_font); local($directive); # Process the text $para = ''; $state = 0; while (($sect_type, $text, $char_tag, %sect_attrs) = &SdfNextSection(*para_text, *state)) { # Build the paragraph if ($sect_type eq 'string') { $para .= &_SgmlEscape($text); } elsif ($sect_type eq 'phrase') { # Expand out link phrases if ($char_tag eq 'L') { ($text, $url) = &SDF_USER'ExpandLink($text); $sect_attrs{'jump'} = $url; } # Escape any special characters $text = &_SgmlEscape($text); # Expand non-breaking spaces, if necessary if ($char_tag eq 'S') { $text =~ s/ /~/g; } # Add hypertext stuff $added_anchors = &_SgmlAddAnchors(*text, *sect_attrs); # Process formatting attributes &SdfAttrMap(*sect_attrs, 'sgml', *SDF_USER'phraseattrs_to, *SDF_USER'phraseattrs_map, *SDF_USER'phraseattrs_attrs, $SDF_USER'phrasestyles_attrs{$char_tag}); # Map the font $char_font = $SDF_USER'phrasestyles_to{$char_tag}; $char_font = 'em' if $char_font eq '' && !$added_anchors; # Add the text for this phrase push(@char_fonts, $char_font); if ($char_font ne '' && $char_font !~ /^SDF/) { $para .= "<$char_font>$text"; } else { $para .= $text; } } elsif ($sect_type eq 'phrase_end') { $char_font = pop(@char_fonts); $para .= "" if $char_font ne '' && $char_font !~ /^SDF/; } elsif ($sect_type eq 'special') { $directive = $_SGML_PHRASE_HANDLER{$char_tag}; if (defined &$directive) { &$directive(*para, $text, %sect_attrs); } else { &AppMsg("warning", "ignoring special phrase '$1' in SGML driver"); } } else { &AppMsg("warning", "unknown section type '$sect_type' in SGML driver"); } } # Return result return $para; } # # >>_Description:: # {{Y:_SgmlFinalise}} generates the final SGML file. # sub _SgmlFinalise { local(*body) = @_; # local(@result); local(@head); # Build the preamble my $dtd = $var{'SGML_DTD'} || 'linuxdoc'; my @head = ( "", '', '

', '', ); # Add the ending stuff push(@body, '', '
'); # Return result return (@head, @body); } # # >>_Description:: # {{Y:_SgmlEscape}} escapes special symbols in SGML text. # sub _SgmlEscape { local($text) = @_; # local($result); local($old_match_flag); # Enable multi-line matching $old_match_flag = $*; $* = 1; # Escape the special symbols. Note that it isn't exactly clear # from the SGML-Tools and/or QWERTZ DTD documentation as to # whether all of these are mandatory, but they shouldn't cause # any harm (I hope!) $text =~ s/\&/&/g; $text =~ s/\/>/g; $text =~ s/\"/&dquot;/g; $text =~ s/\$/$/g; $text =~ s/\~/˜/g; $text =~ s/\#/#/g; $text =~ s/\%/%/g; $text =~ s/\\/\/g; $text =~ s/\|/|/g; $text =~ s/\[/&ftag;/g; # Reset multi-line matching flag $* = $old_match_flag; # Return result $text; } # # >>_Description:: # {{Y:_SgmlAttr}} formats a set of attributes into SGML. # sub _SgmlAttr { local(*attrs) = @_; local($sgml); local($attr, $value, $type); for $attr (sort keys %attrs) { # get the attribute value $value = $attrs{$attr}; # get the attribute type if ($attr =~ s/^sgml\.//) { $type = $_SGML_ATTR_TYPES{$attr}; $type = "string" if $type eq ''; } else { $type = $_SGML_ATTR_TYPES{$attr}; } next unless $type; # Map the attribute name to uppercase $attr =~ tr/a-z/A-Z/; # build the result if ($type eq 'string') { $sgml .= " $attr=\"" . &_SgmlEscape($value) . '"'; } else { $sgml .= " $attr=$value"; } } # Return result $sgml; } # # >>_Description:: # {{Y:_SgmlElement}} formats a SGML element from a # tag, text and set of attributes. # sub _SgmlElement { local($tag, $text, %attr) = @_; # local($sgml); # For preformatted sections, tags go on separate lines $text = "\n$text\n" if $tag eq 'tscreen'; # Add hypertext stuff &_SgmlAddAnchors(*text, *attr); # Bold the text, if requested if ($attr{'bold'}) { $text = "$text"; } # For list items, add the item stuff $text = "\n$text" if $tag =~ /^(itemize|enum|list)$/; # Return result if ($_SGML_NOENDTAG{$tag}) { return "<$tag>$text"; } else { #return "<$tag" . &_SgmlAttr(*attr) . ">$text"; return "<$tag>$text"; } } # # >>_Description:: # {{Y:_SgmlAddAnchors}} adds hypertext jumps and ids to a section of text. # of text. It returns true if anchors were added. # sub _SgmlAddAnchors { local(*text, *attr) = @_; local($result); local($value); local($user_ext); local($old_match_flag); # Skip this routine for now return 0; # Enable multi-line matching $old_match_flag = $*; $* = 1; # For hypertext jumps, surround the text. If the # text contains a jump, the existing jump is removed. if ($attr{'jump'} ne '') { # Get the jump value. If an extension other than sgml is # requested, change the jump value accordingly. Also, # we make sure than any special characters are escaped. $value = $attr{'jump'}; $user_ext = $SDF_USER'var{'SGML_EXT'}; if ($user_ext) { $value =~ s/\.sgml/.$user_ext/; } $value = &_SgmlEscape($value); $text =~ s/\]+\>(.*)\<\/A\>/$1/; $text = "$text"; delete $attr{'jump'}; $result++; } # For hypertext ids, surround the text if it doesn't already contain # a jump. Otherwise, prefix the text with a dummy target so that # jump and id definitions don't clash. if ($attr{'id'} ne '') { $value = &_SgmlEscape($attr{'id'}); if ($text =~ /\ $text"; } else { $text = "$text"; } delete $attr{'id'}; $result++; } # Reset multi-line matching flag $* = $old_match_flag; # Return result return $result; } # # >>_Description:: # {{Y:_SgmlParaAppend}} merges {{para}} into the last paragraph # in {{@result}}. Both paragraphs are assumed to be fixed-width. # sub _SgmlParaAppend { local(*result, $para) = @_; # local(); #$para = " " if $para eq ''; substr($result[$#result], -10) = "$para&nl;\n"; } # # >>_Description:: # {{Y:_SgmlItemAppend}} merges a list item {{item}} into the current # output. The item before is assumed to be a list item too. # sub _SgmlItemAppend { local(*result, $item, $indent, $prev_indent, $para_tag, $prev_tag, *para_attrs) = @_; # local(); local($type, $prev_type); local($posn, $end_tokens); # Get the list type and previous type if ($item =~ /^<(\w+)/) { $type = $1; } else { &AppMsg("warning", "unable to get list type during item merge"); } if ($result[$#result] =~ /(\w+)>$/) { $prev_type = $1; } else { &AppMsg("warning", "unable to get previous type during item merge"); } # Indent is increasing if ($indent > $prev_indent) { if ($result[$#result] =~ s/(<\/\w+>){$prev_indent}$//) { $end_tokens = $&; } else { &AppMsg("warning", "unable to get endtokens during item merge"); } while (++$prev_indent < $indent) { $item = "<$type>$item"; } } # Indent is descreasing or the same else { # handle items of an existing list if ($type eq $prev_type) { if ($result[$#result] =~ s/(<\/\w+>){$indent}$//) { $end_tokens = $&; } else { &AppMsg("warning", "unable to get endtokens during item merge"); } if ($item =~ /^<\w+>(.+)<\/\w+>$/s) { $item = $1; } else { &AppMsg("warning", "unable to get item during item merge"); } } # item is not compatible with the current list - start a new one else { $end_tokens = ''; $indent--; if ($indent) { if ($result[$#result] =~ s/(<\/\w+>){$indent}$//) { $end_tokens = $&; } else { &AppMsg("warning", "unable to get endtokens during item merge"); } } } } # Merge the item $result[$#result] .= "$item$end_tokens"; } # # >>_Description:: # {{Y:_SgmlHandlerTuning}} handles the 'tuning' directive. # sub _SgmlHandlerTuning { local(*outbuffer, $style, %attr) = @_; # local(); # do nothing } # # >>_Description:: # {{Y:_SgmlHandlerEndTuning}} handles the 'endtuning' directive. # sub _SgmlHandlerEndTuning { local(*outbuffer, $style, %attr) = @_; # local(); # do nothing } # # >>_Description:: # {{Y:_SgmlHandlerTable}} handles the 'table' directive. # sub _SgmlHandlerTable { local(*outbuffer, $columns, %attr) = @_; # local(); local($indent, $previous_indent, $posn, $begin_tokens, $end_tokens); # Handle tables inside a list # Note: the previous indent is available as a dynamically # scoped variable in &SgmlFormatSection $indent = $attr{'listitem'}; $begin_tokens = ''; $end_tokens = ''; if ($indent) { $previous_indent = $prev_indent; # get dynamically scoped var if ($indent > $previous_indent) { $posn = -7 * $previous_indent; while ($previous_indent++ < $indent) { $begin_tokens .= ""; $end_tokens .= ""; } } else { $posn = -$indent * 7; } if ($posn < 0) { $end_tokens .= substr($outbuffer[$#outbuffer], $posn); substr($outbuffer[$#outbuffer], $posn) = $begin_tokens; } else { push(@outbuffer, $begin_tokens); } } # Update the state push(@_sgml_tbl_state, $_SGML_INTABLE); push(@_sgml_tbl_endtokens, $end_tokens); push(@_sgml_tbl_previndent, $indent); push(@_sgml_tbl_title, $attr{'title'}); # Build the layout my @col_aligns = split(//, 'l' x $columns); my @user_aligns = split(/,/, $attr{'colaligns'}); my $i; for ($i = 0; $i <= $#user_aligns; $i++) { $col_aligns[$i] = lc(substr($user_aligns[$i], 0, 1)); } my $col_sep = $attr{'style'} eq 'plain' ? '' : "|"; my $layout = join($col_sep, @col_aligns); # Update the output buffer push(@outbuffer, "", ""); } # # >>_Description:: # {{Y:_SgmlHandlerRow}} handles the 'row' directive. # sub _SgmlHandlerRow { local(*outbuffer, $text, %attr) = @_; # local(); local($state); # Finalise the old cell/row, if any $state = $_sgml_tbl_state[$#_sgml_tbl_state]; if ($state eq $_SGML_INCELL) { push(@outbuffer, "@"); } elsif ($state eq $_SGML_INROW) { push(@outbuffer, "@"); } # Update the state $_sgml_tbl_state[$#_sgml_tbl_state] = $_SGML_INROW; } # # >>_Description:: # {{Y:_SgmlHandlerCell}} handles the 'cell' directive. # sub _SgmlHandlerCell { local(*outbuffer, $text, %attr) = @_; # local(); local($state); # If the cell is hidden, output nothing return if $attr{'hidden'}; # Finalise the old cell, if any $state = $_sgml_tbl_state[$#_sgml_tbl_state]; if ($state eq $_SGML_INCELL) { push(@outbuffer, "|"); } # Update the state $_sgml_tbl_state[$#_sgml_tbl_state] = $_SGML_INCELL; } # # >>_Description:: # {{Y:_SgmlHandlerEndTable}} handles the 'endtable' directive. # sub _SgmlHandlerEndTable { local(*outbuffer, $text, %attr) = @_; # local(); # Update the state my $state = pop(@_sgml_tbl_state); my $tbl_title = pop(@_sgml_tbl_title); my $end_tokens = pop(@_sgml_tbl_endtokens); # Finalise the table push(@outbuffer, ""); if ($tbl_title ne '') { push(@outbuffer, ""); } push(@outbuffer, "
" . $tbl_title . "
"); # Terminate the list, if any push(@outbuffer, $end_tokens); # Restore the previous indent. We do this by hacking the # %para_attrs hash dynamically scoped in &SgmlFormatSection. :-( $para_attrs{'in'} = pop(@_sgml_tbl_previndent); } # # >>_Description:: # {{Y:_SgmlHandlerImport}} handles the import directive. # sub _SgmlHandlerImport { local(*outbuffer, $filepath, %attr) = @_; # local(); local($para); # Build the result &_SgmlPhraseHandlerImport(*para, $filepath, %attr); push(@outbuffer, $para); } # # >>_Description:: # {{Y:_SgmlHandlerInline}} handles the inline directive. # sub _SgmlHandlerInline { local(*outbuffer, $text, %attr) = @_; # local(); # Check we can handle this format my $target = $attr{'target'}; return unless $target eq 'sgml'; # Build the result push(@outbuffer, $text); } # # >>_Description:: # {{Y:_SgmlHandlerOutput}} handles the output directive. # sub _SgmlHandlerOutput { local(*outbuffer, $text, %attr) = @_; # local(); # do nothing } # # >>_Description:: # {{Y:_SgmlHandlerObject}} handles the 'object' directive. # sub _SgmlHandlerObject { local(*outbuffer, $text, %attrs) = @_; # local(); # do nothing } # # >>_Description:: # {{Y:_SgmlPhraseHandlerChar}} handles the 'char' phrase directive. # sub _SgmlPhraseHandlerChar { local(*para, $text, %attr) = @_; # local(); # Map the symbolic names if (defined($_SGML_CHAR{$text})) { $para .= $_SGML_CHAR{$text}; } else { # Numbers are ISO character codes $para .= $text =~ /\D/ ? "&$text;" : "&#$text;"; } } # # >>_Description:: # {{Y:_SgmlPhraseHandlerImport}} handles the 'import' phrase directive. # sub _SgmlPhraseHandlerImport { local(*para, $filepath, %attr) = @_; # local(); # Trim the extension off the filepath $filepath =~ s/\.\w+$//; # Build the result $para .= "
\n\n
"; } # # >>_Description:: # {{Y:_SgmlPhraseHandlerInline}} handles the 'inline' phrase directive. # sub _SgmlPhraseHandlerInline { local(*para, $text, %attr) = @_; # local(); # Build the result $para .= $text; } # # >>_Description:: # {{Y:_SgmlPhraseHandlerVariable}} handles the 'variable' phrase directive. # sub _SgmlPhraseHandlerVariable { local(*para, $text, %attr) = @_; # local(); # do nothing } # package return value 1;