#! /usr/bin/perl
#
# Copyright 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
#
# Make a DocBook chart showing compose combinations for a locale
#
# See perldoc at end (or run with --help or --man options) for details
# of command-line options.
#
# Compose file grammar is defined in modules/im/ximcp/imLcPrs.c
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
my $error_count = 0;
my $charset;
my $locale_name;
my $output_filename = '-';
my $man = 0;
my $help = 0;
my $make_index = 0;
GetOptions ('charset:s' => \$charset,
	    'locale=s' => \$locale_name,
	    'output=s' => \$output_filename,
	    'index' => \$make_index,
	    'help|?' => \$help,
	    'man' => \$man)
    or pod2usage(2);
pod2usage(1) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
if (!defined($charset) || ($charset eq "")) {
  if (defined($locale_name)) {
    my $guessed_charset = $locale_name;
    $guessed_charset =~ s{^.*\.}{};
    if ($guessed_charset =~ m{^(utf-8|gbk|gb18030)$}i) {
      $charset = $1;
    } elsif ($guessed_charset =~ m{iso8859-(\d+)}i) {
      $charset = "iso-8859-$1";
    } elsif ($guessed_charset =~ m{^microsoft-cp(125\d)$}) {
      $charset = "windows-$1";
    }
  }
  if (!defined($charset) || ($charset eq "")) {
    $charset = "utf-8";
  }
}
if ($make_index) {
  # Print Docbook output
  open my $OUTPUT, '>', $output_filename
      or die "Could not create $output_filename: $!";
  print $OUTPUT
      join ("\n",
	    qq(),
	    q(),
	    q(),
	    q(  ),
	    q(    Xlib Compose Key Charts),
	    q(  ),
	    ( map { qq(  \
    \
  ) }
	      @ARGV ),
	    q(),
	    "\n"
      );
  close $OUTPUT or die "Couldn't write $output_filename: $!";
  exit(0);
}
foreach my $a (@ARGV) {
  $error_count += make_compose_chart($a);
}
exit($error_count);
sub make_compose_chart {
  my ($filename) = @_;
  my $errors = 0;
  my @compose_table = ();
  my @included_files = ();
  my $line = 0;
  my $pre_file = ($filename =~ m{\.pre$}) ? 1 : 0;
  my $in_c_comment = 0;
  my $in_comment = 0;
  my $keyseq_count = 0;
  open my $COMPOSE, '<', $filename or die "Could not open $filename: $!";
 COMPOSE_LINE:
  while (my $cl = <$COMPOSE>) {
    $line++;
    chomp($cl);
    my $original_line = $cl;
    # Special handling for changes cpp makes to .pre files
    if ($pre_file == 1) {
      if ($in_c_comment) {		# Look for end of multi-line C comment
	if ($cl =~ m{\*/(.*)$}) {
	  $cl = $1;
	  $in_c_comment = 0;
	} else {
	  next;
	}
      }
      $cl =~ s{/\*.\**/}{};		# Remove single line C comments
      if ($cl =~ m{^(.*)/\*}) {		# Start of a multi-line C comment
	$cl = $1;
	$in_c_comment = 1;
      }
      $cl =~ s{^\s*XCOMM}{#};		# Translate pre-processing comments
    }
    chomp($cl);
    if ($cl =~ m{^\s*#\s*(.*)$}) {	# Comment only lines
      # Combine comment blocks
      my $comment = $1;
      if ($in_comment) {
	my $prev_comment = pop @compose_table;
	$comment = join(' ', $prev_comment->{-comment}, $comment);
      } else {
	$in_comment = 1;
      }
      push @compose_table, { -type => 'comment', -comment => $comment };
      next COMPOSE_LINE;
    }
    $in_comment = 0;
    if ($cl =~ m{^\s*$}) {		# Skip blank lines
      next COMPOSE_LINE;
    }
    elsif ($cl =~ m{^(STATE\s+|END_STATE)}) {
      # Sun extension to compose file syntax
      next COMPOSE_LINE;
    }
    elsif ($cl =~ m{^([^:]+)\s*:\s*(.+)$}) {
      my ($seq, $action) = ($1, $2);
      $seq =~ s{\s+$}{};
      my @keys = grep { $_ !~ m/^\s*$/ } split /[\s\<\>]+/, $seq;
      push @compose_table, {
	-type => 'keyseq',
	-keys => [ @keys ],
	-action => $action
      };
      $keyseq_count++;
      next COMPOSE_LINE;
    } elsif ($cl =~ m{^(STATE_TYPE:|\@StartDeadKeyMap|\@EndDeadKeyMap)}) {
      # ignore
      next COMPOSE_LINE;
    } elsif ($cl =~ m{^include "(.*)"}) {
      my $incpath = $1;
      $incpath =~ s{^X11_LOCALEDATADIR/(.*)/Compose}{the $1 compose table};
      push @included_files, $incpath;
      next COMPOSE_LINE;
    } else {
      print STDERR ('Unrecognized pattern in ', $filename,
		    ' on line #', $line, ":\n  ", $cl, "\n");
    }
  }
  close $COMPOSE;
  if ($errors > 0) {
    return $errors;
  }
  # Print Docbook output
  open my $OUTPUT, '>', $output_filename
      or die "Could not create $output_filename: $!";
  print $OUTPUT
      join ("\n",
	    qq(),
	    q(),
	    qq(),
	    qq(Xlib Compose Keys for $locale_name),
	    q(Applications using Xlib input handling should recognize),
	    q( these compose key sequences in locales using the),
	    qq( $locale_name compose table.),
	    "\n"
      );
  if (@included_files) {
    print $OUTPUT
	q(This compose table includes the non-conflicting),
	q( entries from: ),
	join(',', @included_files),
	q(.  Those entries are not shown here - see those charts for the),
	q( included key sequences.),
	"\n";
  }
  my @pretable_comments = ();
  if ($keyseq_count == 0) {
    @pretable_comments = @compose_table;
  } elsif ($compose_table[0]->{-type} eq 'comment') {
    push @pretable_comments, shift @compose_table;
  }
  foreach my $comment_ref (@pretable_comments) {
    print $OUTPUT
	qq(), xml_escape($comment_ref->{-comment}), qq(\n);
  }
  if ($keyseq_count > 0) {
    start_table($OUTPUT);
    my $row_count = 0;
    foreach my $cr (@compose_table) {
      if ($row_count++ > 750) {
	# Break tables every 750 rows to avoid overflowing
	# xmlto/xsltproc limits on the largest tables
	end_table($OUTPUT);
	start_table($OUTPUT);
	$row_count = 0;
      }
      if ($cr->{-type} eq 'comment') {
	print $OUTPUT
	    qq(),
	    xml_escape($cr->{-comment}), qq(
\n);
      } elsif ($cr->{-type} eq 'keyseq') {
	my $action = join(" ", xml_escape($cr->{-action}));
	if ($action =~ m{^\s*"\\([0-7]+)"}) {
	  my $char = oct($1);
	  if ($char >= 32) {
	    $action =~ s{^\s*"\\[0-7]+"}{"$char;"};
	  }
	}
	$action =~ s{^\s*"(.+)"}{"$1"};
	print $OUTPUT
	    qq(),
	    qq(),
	    (map { qq($_) } xml_escape(@{$cr->{-keys}})),
	    qq(),
	    qq(),
	    $action,
	    qq(
\n);
      }
    }
    end_table($OUTPUT);
  } else {
    print $OUTPUT
	qq(),
	qq(This compose table defines no sequences of its own.),
	qq(\n);
  }
  print $OUTPUT "\n";
  close $OUTPUT or die "Couldn't write $output_filename: $!";
  return $errors;
}
sub xml_escape {
  my @output;
  foreach my $l (@_) {
      $l =~ s{\&}{&}g;
      $l =~ s{\<}{<}g;
      $l =~ s{\>}{>}g;
      push @output, $l;
  }
  return @output;
}
sub start_table {
  my ($OUTPUT) = @_;
  print $OUTPUT
      join("\n",
	   qq(
Compose Key Sequences for $locale_name),
	   qq(),
	   qq( ),
	   qq( ),
	   qq(  Key SequenceAction),
	   qq( 
),
	   qq( \n),
      );
}
sub end_table {
  my ($OUTPUT) = @_;
  print $OUTPUT "\n\n
\n";
}
__END__
=head1 NAME
compose-chart - Make DocBook/XML charts of compose table entries
=head1 SYNOPSIS
compose-chart [options] [file ...]
 Options:
    --charset[=]	character set to specify in XML doctype
    --locale=	name of locale to display in chart
    --output=	filename to output chart to
    --index		make index of charts instead of individual chart
    --help		brief help message
    --man		full documentation
=head1 OPTIONS
=over 8
=item B<--charset>[=I]
Specify a character set to list in the doctype declaration in the XML output.
If not specified, attempts to guess from the locale name, else default to
"utf-8".
=item B<--locale>=I
Specify the locale name to use in the chart titles and introductory text.
=item B<--output>=I
Specify the output file to write the DocBook output to.
=item B<--index>
Generate an index of the listed locale charts instead of a chart for a
specific locale.
=item B<--help>
Print a brief help message and exit.
=item B<--man>
Print the manual page and exit.
=back
=head1 DESCRIPTION
This program will read the given compose table file(s) and generate
DocBook/XML charts listing the available characters for end-user reference.
=cut