6 $usage = "Usage: $0 <opts> file
\r
8 -p Link to points in the document. The default is to link
\r
9 to the closest containing section.
\r
10 -g Group terms with IndexDiv based on the first letter
\r
11 of the term (or its sortas attribute).
\r
12 (This probably doesn't handle i10n particularly well)
\r
13 -s name Name the IndexDiv that contains symbols. The default
\r
14 is 'Symbols'. Meaningless if -g is not used.
\r
15 -t name Title for the index.
\r
16 -P file Read a preamble from file. The content of file will
\r
17 be inserted before the <index> tag.
\r
18 -i id The ID for the <index> tag.
\r
19 -o file Output to file. Defaults to stdout.
\r
20 -S scope Scope of the index, must be 'all', 'local', or 'global'.
\r
21 If unspecified, 'all' is assumed.
\r
22 -I scope The implied scope, must be 'all', 'local', or 'global'.
\r
23 IndexTerms which do not specify a scope will have the
\r
24 implied scope. If unspecified, 'all' is assumed.
\r
26 -f Force the output file to be written, even if it appears
\r
27 to have been edited by hand.
\r
28 -N New index (generates an empty index file).
\r
29 file The file containing index data generated by Jade
\r
30 with the DocBook HTML Stylesheet.\n";
\r
32 die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:x');
\r
34 $linkpoints = $opt_p;
\r
35 $lettergroups = $opt_g;
\r
36 $symbolsname = $opt_s || "Symbols";
\r
39 $outfile = $opt_o || '-';
\r
41 $scope = uc($opt_S) || 'ALL';
\r
42 $impliedscope = uc($opt_I) || 'ALL';
\r
44 $forceoutput = $opt_f;
\r
48 $indextag = $setindex ? 'setindex' : 'index';
\r
51 safe_open(*OUT, $outfile);
\r
53 print OUT "<$indextag id='$indexid'>\n\n";
\r
55 print OUT "<$indextag>\n\n";
\r
58 print OUT "<!-- This file was produced by collateindex.pl. -->\n";
\r
59 print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
\r
61 print OUT "</$indextag>\n";
\r
65 $dat = shift @ARGV || die $usage;
\r
66 die "$0: cannot find $dat.\n" if ! -f $dat;
\r
68 %legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1);
\r
69 if ($scope && !$legal_scopes{$scope}) {
\r
70 die "Invalid scope.\n$usage\n";
\r
72 if ($impliedscope && !$legal_scopes{$impliedscope}) {
\r
73 die "Invalid implied scope.\n$usage\n";
\r
81 print STDERR "Processing $dat...\n";
\r
83 # Read the index file, creating an array of objects. Each object
\r
84 # represents and indexterm and has fields for the content of the
\r
91 if (/^\/indexterm/i) {
\r
96 if (/^indexterm (.*)$/i) {
\r
99 $idx->{'zone'} = {};
\r
100 $idx->{'href'} = $1;
\r
101 $idx->{'count'} = $termcount;
\r
102 $idx->{'scope'} = $impliedscope;
\r
106 if (/^indexpoint (.*)$/i) {
\r
107 $idx->{'hrefpoint'} = $1;
\r
111 if (/^title (.*)$/i) {
\r
112 $idx->{'title'} = $1;
\r
116 if (/^primary[\[ ](.*)$/i) {
\r
117 if (/^primary\[(.*?)\] (.*)$/i) {
\r
118 $idx->{'psortas'} = $1;
\r
119 $idx->{'primary'} = $2;
\r
121 $idx->{'psortas'} = $1;
\r
122 $idx->{'primary'} = $1;
\r
127 if (/^secondary[\[ ](.*)$/i) {
\r
128 if (/^secondary\[(.*?)\] (.*)$/i) {
\r
129 $idx->{'ssortas'} = $1;
\r
130 $idx->{'secondary'} = $2;
\r
132 $idx->{'ssortas'} = $1;
\r
133 $idx->{'secondary'} = $1;
\r
138 if (/^tertiary[\[ ](.*)$/i) {
\r
139 if (/^tertiary\[(.*?)\] (.*)$/i) {
\r
140 $idx->{'tsortas'} = $1;
\r
141 $idx->{'tertiary'} = $2;
\r
143 $idx->{'tsortas'} = $1;
\r
144 $idx->{'tertiary'} = $1;
\r
149 if (/^see (.*)$/i) {
\r
150 $idx->{'see'} = $1;
\r
154 if (/^seealso (.*)$/i) {
\r
155 $idx->{'seealso'} = $1;
\r
159 if (/^significance (.*)$/i) {
\r
160 $idx->{'significance'} = $1;
\r
164 if (/^class (.*)$/i) {
\r
165 $idx->{'class'} = $1;
\r
169 if (/^scope (.*)$/i) {
\r
170 $idx->{'scope'} = uc($1);
\r
174 if (/^startref (.*)$/i) {
\r
175 $idx->{'startref'} = $1;
\r
179 if (/^id (.*)$/i) {
\r
185 if (/^zone (.*)$/i) {
\r
189 die "Bad zone: $_\n" if !/^title (.*)$/i;
\r
190 $idx->{'zone'}->{$href} = $1;
\r
194 die "Unrecognized: $_\n";
\r
198 print STDERR "$termcount entries loaded...\n";
\r
200 # Fixup the startrefs...
\r
201 # In DocBook, STARTREF is a #CONREF attribute; support this by copying
\r
202 # all of the fields from the indexterm with the id specified by STARTREF
\r
203 # to the indexterm that has the STARTREF.
\r
204 foreach $idx (@term) {
\r
205 my($ididx, $field);
\r
206 if ($idx->{'startref'}) {
\r
207 $ididx = $id{$idx->{'startref'}};
\r
208 foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso',
\r
209 'psortas', 'ssortas', 'tsortas', 'significance',
\r
210 'class', 'scope') {
\r
211 $idx->{$field} = $ididx->{$field};
\r
216 # Sort the index terms
\r
217 @term = sort termsort @term;
\r
219 # Move all of the non-alphabetic entries to the front of the index.
\r
220 @term = sortsymbols(@term);
\r
222 safe_open(*OUT, $outfile);
\r
224 # Write the index...
\r
226 print OUT "<$indextag id='$indexid'>\n\n";
\r
228 print OUT "<$indextag>\n\n";
\r
231 print OUT "<!-- This file was produced by collateindex.pl. -->\n";
\r
232 print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
\r
234 print OUT "<!-- ULINK is abused here.
\r
236 The URL attribute holds the URL that points from the index entry
\r
237 back to the appropriate place in the output produced by the HTML
\r
238 stylesheet. (It's much easier to calculate this URL in the first
\r
241 The Role attribute holds the ID (either real or manufactured) of
\r
242 the corresponding INDEXTERM. This is used by the print backends
\r
243 to produce page numbers.
\r
245 The entries below are sorted and collated into the correct order.
\r
246 Duplicates may be removed in the HTML backend, but in the print
\r
247 backends, it is impossible to suppress duplicate pages or coalesce
\r
248 sequences of pages into a range.
\r
251 print OUT "<title>$title</title>\n\n" if $title;
\r
253 $last = {}; # the last indexterm we processed
\r
254 $first = 1; # this is the first one
\r
255 $group = ""; # we're not in a group yet
\r
256 $lastout = ""; # we've not put anything out yet
\r
258 foreach $idx (@term) {
\r
259 next if $idx->{'startref'}; # no way to represent spans...
\r
260 next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL');
\r
261 next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL');
\r
262 next if &same($idx, $last); # suppress duplicates
\r
266 # If primary changes, output a whole new index term, otherwise just
\r
267 # output another secondary or tertiary, as appropriate. We know from
\r
268 # sorting that the terms will always be in the right order.
\r
269 if (!&tsame($last, $idx, 'primary')) {
\r
270 print "DIFF PRIM\n" if $debug;
\r
271 &end_entry() if not $first;
\r
273 if ($lettergroups) {
\r
274 # If we're grouping, make the right indexdivs
\r
275 $letter = $idx->{'psortas'};
\r
276 $letter = $idx->{'primary'} if !$letter;
\r
277 $letter = uc(substr($letter, 0, 1));
\r
279 # symbols are a special case
\r
280 if (($letter lt 'A') || ($letter gt 'Z')) {
\r
282 || (($group ge 'A') && ($group le 'Z'))) {
\r
283 print OUT "</indexdiv>\n" if !$first;
\r
284 print OUT "<indexdiv><title>$symbolsname</title>\n\n";
\r
287 } elsif (($group eq '') || ($group ne $letter)) {
\r
288 print OUT "</indexdiv>\n" if !$first;
\r
289 print OUT "<indexdiv><title>$letter</title>\n\n";
\r
294 $first = 0; # there can only be on first ;-)
\r
296 print OUT "<indexentry>\n";
\r
297 print OUT " <primaryie>", $idx->{'primary'};
\r
298 $lastout = "primaryie";
\r
300 if ($idx->{'secondary'}) {
\r
301 print OUT "\n </primaryie>\n";
\r
302 print OUT " <secondaryie>", $idx->{'secondary'};
\r
303 $lastout = "secondaryie";
\r
306 if ($idx->{'tertiary'}) {
\r
307 print OUT "\n </secondaryie>\n";
\r
308 print OUT " <tertiaryie>", $idx->{'tertiary'};
\r
309 $lastout = "tertiaryie";
\r
311 } elsif (!&tsame($last, $idx, 'secondary')) {
\r
312 print "DIFF SEC\n" if $debug;
\r
314 print OUT "\n </$lastout>\n" if $lastout;
\r
316 print OUT " <secondaryie>", $idx->{'secondary'};
\r
317 $lastout = "secondaryie";
\r
318 if ($idx->{'tertiary'}) {
\r
319 print OUT "\n </secondaryie>\n";
\r
320 print OUT " <tertiaryie>", $idx->{'tertiary'};
\r
321 $lastout = "tertiaryie";
\r
323 } elsif (!&tsame($last, $idx, 'tertiary')) {
\r
324 print "DIFF TERT\n" if $debug;
\r
326 print OUT "\n </$lastout>\n" if $lastout;
\r
328 if ($idx->{'tertiary'}) {
\r
329 print OUT " <tertiaryie>", $idx->{'tertiary'};
\r
330 $lastout = "tertiaryie";
\r
339 # Termcount is > 0 iff some entries were skipped.
\r
340 print STDERR "$termcount entries ignored...\n";
\r
344 print OUT "</indexdiv>\n" if $lettergroups;
\r
345 print OUT "</$indextag>\n";
\r
349 print STDERR "Done.\n";
\r
355 my($aP) = $a->{'psortas'} || $a->{'primary'};
\r
356 my($aS) = $a->{'ssortas'} || $a->{'secondary'};
\r
357 my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
\r
359 my($bP) = $b->{'psortas'} || $b->{'primary'};
\r
360 my($bS) = $b->{'ssortas'} || $b->{'secondary'};
\r
361 my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
\r
365 $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
\r
366 $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
\r
367 $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
\r
368 $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
\r
369 $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
\r
370 $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
\r
372 # print "[$aP]=[$bP]\n";
\r
373 # print "[$aS]=[$bS]\n";
\r
374 # print "[$aT]=[$bT]\n";
\r
376 # Two index terms are the same if:
\r
377 # 1. the primary, secondary, and tertiary entries are the same
\r
378 # (or have the same SORTAS)
\r
380 # 2. They occur in the same titled section
\r
382 # 3. They point to the same place
\r
384 # Notes: Scope is used to suppress some entries, but can't be used
\r
385 # for comparing duplicates.
\r
386 # Interpretation of "the same place" depends on whether or
\r
387 # not $linkpoints is true.
\r
389 $same = (($aP eq $bP)
\r
392 && ($a->{'title'} eq $b->{'title'})
\r
393 && ($a->{'href'} eq $b->{'href'}));
\r
395 # If we're linking to points, they're only the same if they link
\r
396 # to exactly the same spot. (surely this is redundant?)
\r
397 $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'})
\r
404 # Unlike same(), tsame only compares a single term
\r
408 my($sterm) = substr($term, 0, 1) . "sortas";
\r
411 $A = $a->{$sterm} || $a->{$term};
\r
412 $B = $b->{$sterm} || $b->{$term};
\r
414 $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A);
\r
415 $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B);
\r
421 # End any open elements...
\r
422 print OUT "\n </$lastout>\n" if $lastout;
\r
423 print OUT "</indexentry>\n\n";
\r
428 # Print out the links for an indexterm. There can be more than
\r
429 # one if the term has a ZONE that points to more than one place.
\r
430 # (do we do the right thing in that case?)
\r
432 my($key, $indent, @hrefs);
\r
438 if ($idx->{'see'}) {
\r
439 # it'd be nice to make this a link...
\r
441 print OUT "\n </$lastout>\n";
\r
444 print OUT $indent, "<seeie>", $idx->{'see'}, "</seeie>\n";
\r
448 if ($idx->{'seealso'}) {
\r
449 # it'd be nice to make this a link...
\r
451 print OUT "\n </$lastout>\n";
\r
454 print OUT $indent, "<seealsoie>", $idx->{'seealso'}, "</seealsoie>\n";
\r
458 if (keys %{$idx->{'zone'}}) {
\r
459 foreach $key (keys %{$idx->{'zone'}}) {
\r
460 $href{$key} = $idx->{'zone'}->{$key};
\r
461 $phref{$key} = $idx->{'zone'}->{$key};
\r
464 $href{$idx->{'href'}} = $idx->{'title'};
\r
465 $phref{$idx->{'href'}} = $idx->{'hrefpoint'};
\r
468 # We can't use <LINK> because we don't know the ID of the term in the
\r
469 # original source (and, in fact, it might not have one).
\r
471 @hrefs = keys %href;
\r
475 $key = shift @hrefs;
\r
477 $linkend = $phref{$key};
\r
483 $role = $1 if $role =~ /\#(.*)$/;
\r
486 print OUT "<ulink url=\"$linkend\" role=\"$role\">";
\r
487 print OUT "<emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
\r
488 print OUT $href{$key};
\r
489 print OUT "</emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
\r
490 print OUT "</ulink>";
\r
495 my($aP) = $a->{'psortas'} || $a->{'primary'};
\r
496 my($aS) = $a->{'ssortas'} || $a->{'secondary'};
\r
497 my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
\r
498 my($ap) = $a->{'count'};
\r
500 my($bP) = $b->{'psortas'} || $b->{'primary'};
\r
501 my($bS) = $b->{'ssortas'} || $b->{'secondary'};
\r
502 my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
\r
503 my($bp) = $b->{'count'};
\r
505 $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
\r
506 $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
\r
507 $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
\r
508 $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
\r
509 $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
\r
510 $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
\r
515 # make sure seealso's always sort to the bottom
\r
516 return 1 if ($a->{'seealso'});
\r
517 return -1 if ($b->{'seealso'});
\r
518 # if everything else is the same, keep these elements
\r
519 # in document order (so the index links are in the right
\r
521 return $ap <=> $bp;
\r
523 return $aT cmp $bT;
\r
526 return $aS cmp $bS;
\r
529 return $aP cmp $bP;
\r
540 # Move the non-letter things to the front. Should digits be thier
\r
541 # own group? Maybe...
\r
542 foreach $idx (@term) {
\r
543 $letter = $idx->{'psortas'};
\r
544 $letter = $idx->{'primary'} if !$letter;
\r
545 $letter = uc(substr($letter, 0, 1));
\r
547 if (($letter lt 'A') || ($letter gt 'Z')) {
\r
554 return (@sym, @new);
\r
558 local(*OUT) = shift;
\r
561 if (($outfile ne '-') && (!$forceoutput)) {
\r
563 if (open (OUT, $outfile)) {
\r
565 if (/<!-- Remove this comment if you edit this file by hand! -->/){
\r
576 print "\n$outfile appears to have been edited by hand; use -f or\n";
\r
577 print " change the output file.\n";
\r
582 open (OUT, ">$outfile") || die "$usage\nCannot write to $outfile.\n";
\r
585 # Copy the preamble
\r
586 if (open(F, $preamble)) {
\r
592 warn "$0: cannot open preamble $preamble.\n";
\r