
# Apply a set of rules to tagged output files

# Constants

$tagsep = "_";

# Output options
#
# stdout	- output to stdout
# rewrite	- rewrite the file if there are any changes

$option_output = "stdout";	# Default output
$option_limit = 20;		# Default max number of rules to apply

$rule_file = "";
$install_dir = "";

while (substr($ARGV[0],0,1) eq "-" && $ARGV[0] ne "-")
{
	if ($ARGV[0] eq "-rewrite") { $option_output = "rewrite"; }
	if ($ARGV[0] eq "-rules") { shift @ARGV; $option_limit = 0; $rule_file = $ARGV[0]; }
	if ($ARGV[0] eq "-home") { shift @ARGV; $install_dir = $ARGV[0]; }
	if ($ARGV[0] eq "-limit") { shift @ARGV; $option_limit = $ARGV[0]; }
	shift @ARGV;
}

# Get a default installation directory

$x = $0;
if ($install_dir eq "" && $x =~ /util\/[^\/]+.perl$/)
{
	$install_dir = $x;
	$install_dir =~ s/util\/[^\/]+.perl$//;
	$install_dir = "." if (! $install_dir);
}
$install_dir = "." if (! $install_dir);

# Get a default rule file

if (! $rule_file)
{
	$rule_file = "$install_dir/models/rules.cur";
}

# Set the default input to be stdin

if (! scalar(@ARGV))
{
	push @ARGV, "-";
}

# Read the rules, up to the default limit

$num_rules = 0;
for $r (`cat $rule_file`)
{
	chomp $r;

	if ($r =~ /([^$tagsep\|]*)$tagsep*([^$tagsep\|]*)\|([^$tagsep\|]*)$tagsep*([^$tagsep\|]*)\|([^$tagsep\|]*)$tagsep*([^$tagsep\|]*)->([^ ]*)/)
	{
		$rule_wb1[$num_rules] = $1;
		$rule_tb1[$num_rules] = $2;
		$rule_w[$num_rules] = $3;
		$rule_f[$num_rules] = $4;
		$rule_wa1[$num_rules] = $5;
		$rule_ta1[$num_rules] = $6;
		$rule_tag[$num_rules] = $7;
		$num_rules++;
	} else
	{
		print "Rule $r not recognized.\n";
		exit;
	}

	last if ($option_limit > 0 && $num_rules >= $option_limit);
}

# print "$rule_wb1 $rule_tb1 | $rule_w $rule_f | $rule_wa1 $rule_ta1 -> $rule_tag\n";

# Process all of the files on the command line

for $f (@ARGV)
{
	@lines = `cat $f`;

	$change = 0;

	for $line_num (0 .. $#lines)
	{
		$line_change = 0;

		$n = 0;
		@w = ();
		@t = ();

		while ($lines[$line_num] =~ /([^\s${tagsep}]*)${tagsep}([^\s]*)/g)
		{
			$w[$n] = $1;
			$t[$n] = $2;
			$n++;
		}

		for ($i = 0; $i < $n; $i++)
		{
			# print "$w[$i]_$t[$i] ...\n" if ($w[$i] eq "positive");
			for ($j = 0; $j < $num_rules; $j++)
			{
				next if ($rule_wb1[$j] && ($i == 0 || $rule_wb1[$j] ne $w[$i-1]));
				next if ($rule_tb1[$j] && ($i == 0 || $rule_tb1[$j] ne $t[$i-1]));
				next if ($rule_w[$j] && $rule_w[$j] ne $w[$i]);
				next if ($rule_f[$j] && $rule_f[$j] ne $t[$i]);
				next if ($rule_wa1[$j] && ($i == $n - 1 || $rule_wa1[$j] ne $w[$i+1]));
				next if ($rule_ta1[$j] && ($i == $n - 1 || $rule_ta1[$j] ne $t[$i+1]));

				if ($t[$i] ne $rule_tag[$j])
				{
					# print "$w[$i]_$t[$i] -> $rule_tag[$j]\n";
					$t[$i] = $rule_tag[$j];
					$line_change = 1;
				}
			}
		}

		# Fixup any multi-word tags

		for ($i = 0; $i < $n-1; $i++)
		{
			next if ($t[$i] !~ /[+]/);

			for ($j = $i+1; $j < $n; $j++)
			{
				last if ($t[$j] !~ /[+]/);
			}
			if ($t[$j] !~ /[+]/)
			{
				$t[$i] = "$t[$j]+";
			}
		}
		$t[$n-1] =~ s/[+]//;

		if ($line_change)
		{
			$change = 1;
			$lines[$line_num] = "";
			for ($i = 0; $i < $n; $i++)
			{
				$lines[$line_num] .= " " if ($i > 0);
				$lines[$line_num] .= "$w[$i]$tagsep$t[$i]";
			}
			$lines[$line_num] .= "\n";
		}
	}

	if ($change && $option_output eq "rewrite" && $f ne "-")
	{
		open F, ">$f";
		for (@lines) { print F $_; }
		close F;
	} elsif ($option_output eq "stdout")
	{
		for (@lines) { print $_; }
	}
}
