#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>

#include <Btree.h>

#include "tagger.h"

char option_untag[1000];

// #include "huge.h"
// #define RTYPE huge
#define RTYPE double

#include "lex.h"

Lexicon *lex = NULL;
Lexicon *trans = NULL;
Lexicon *cons = NULL;

int use_codes = 1;	// Whether to use codes in the lexicon

int verbose = 1;	// Echo commands if verbose == 1

// Backoff defaults for tag counts

double lex_backoff[MAX_TAGS];
double add_smoothing = 0.0;

#if NGRAMS==2
#define MAX_STATES MAX_TAGS
#elif NGRAMS==3
#define MAX_STATES (MAX_TAGS*MAX_TAGS)
#elif NGRAMS==4
#define MAX_STATES (MAX_TAGS*MAX_TAGS*MAX_TAGS)
#endif

// Predefined Markov chain for original language

int num_tags = 0;		// The number of tags
int num_states = 0;		// The number of states
int end_tag = 0;		// The tag for end-of-sentence

// The tags

char tag_str[MAX_TAGS][MAX_WORDS];	// The actual tags

double zero = 0.0;		// Zeros, etc
int z = 0;

// This data stores the n-grams

double *count0;
double *count1;
double *count2;
double *count3;
#if NGRAMS>=4
double *count4;
#endif
double *train;

double pr_tag[MAX_TAGS];
double pr_state[MAX_STATES];
double pr_trans[MAX_STATES][MAX_STATES];

// These are the n-gram counts

#define c0 count0[0]
#define c1(i) count1[(i-1) % (num_tags)]
#define c2(i) count2[(i-1) % (num_tags*num_tags)]
#define c3(i) count3[(i-1) % (num_tags*num_tags*num_tags)]
#if NGRAMS>=4
#define c4(i) count4[(i-1) % (num_tags*num_tags*num_tags*num_tags)]
#endif

// Count the transitions from i to j by reducing to n-grams

#define t2(i,j) c2((i-1)*num_tags+((j-1)%num_tags)+1)
#define t3(i,j) c3((i-1)*num_tags+((j-1)%num_tags)+1)
#define t4(i,j) c4((i-1)*num_tags+((j-1)%num_tags)+1)

// Given a state i, return the tag at the position in the n-gram

int tag_at(int i, int pos)
{
	i -= 1;
	while (++pos <= 0) i /= num_tags;
	return (i%num_tags);
}

char *tag_to_str(int i)
{
	return tag_str[i];
}

int str_to_tag(char *str)
{
	int	i;

	for (i = 0; i < num_tags; ++i)
		if (strcmp(str, tag_str[i]) == 0)
			return i;
	return -1;
}

// Reset the tag and normalize

void tag_set(char *word, double *dw, char *tag, int cond)
{
	int	i;
	double	m;

	if (str_to_tag(tag) < 0) return;

	if (cond)
	{
		// printf("Forcing %s to %s\n", word, tag);
		for (i = 0; i < num_tags; ++i)
			dw[i] = 0.0;
		dw[str_to_tag(tag)] = 1.0;
	} else
	{
		dw[str_to_tag(tag)] = 0.0;
		m = 0.0;
		for (i = 0; i < num_tags; ++i)
			m += dw[i];

		// m could be 0 here

		for (i = 0; i < num_tags; ++i)
			dw[i] /= m;
	}
}

void tag_ok(char *word, double *dw, char *tag, int cond)
{
	int	i;
	double	m;

	if (str_to_tag(tag) < 0) return;

	if (! cond)
	{
		dw[str_to_tag(tag)] = 0.0;
		m = 0.0;
		for (i = 0; i < num_tags; ++i)
			m += dw[i];

		// m could be 0 here

		for (i = 0; i < num_tags; ++i)
			dw[i] /= m;
	}
}


int state_next(int i, int s)
{
#if NGRAMS==2
	return s+1;
#elif NGRAMS==3
	return tag_at(i,0) * num_tags + s+1;
#elif NGRAMS>=4
	return tag_at(i,-1) * num_tags*num_tags + tag_at(i,0)*num_tags + s+1;
#endif
}

int state_prev(int i, int s)
{
#if NGRAMS==2
	return s+1;
#elif NGRAMS==3
	return s*num_tags + tag_at(i,-1) + 1;
#elif NGRAMS>=4
	return s*num_tags*num_tags + tag_at(i,-2)*num_tags + tag_at(i,-1) + 1;
#endif
}

void init(int i)
{
	int	j;

	if (num_tags == 0)
	{
		printf("ngrams must be read prior to init\n");
		exit(0);
	}

	if (i != NGRAMS)
	{
		printf("This version of the tagger was compiled for ``init %d''.\n", NGRAMS);
		i = NGRAMS;
	}

	if (i == 2) num_states = num_tags;
	else if (i == 3) num_states = num_tags * num_tags;
	else if (i == 4) num_states = num_tags * num_tags * num_tags;

	for (i = 1; i <= num_states; ++i)
	{
#if NGRAMS==2
		pr_state[i-1] = (c0 > 0.0) ? c1(i) / c0 : 1.0 / (double) num_states;
#elif NGRAMS==3
		pr_state[i-1] = (c0 > 0.0) ? c2(i) / c0 : 1.0 / (double) num_states;
#elif NGRAMS==4
		pr_state[i-1] = (c0 > 0.0) ? c3(i) / c0 : 1.0 / (double) num_states;
#endif

		for (j = 1; j <= num_states; ++j)
		{
			pr_trans[i-1][j-1] = 0.0;
#if NGRAMS==2
			pr_trans[i-1][j-1] = (c1(i) > 0.0) ? t2(i,j) / c1(i) : 1.0 / (double) num_tags;
#elif NGRAMS==3
			if (tag_at(i,0) == tag_at(j,-1))
				pr_trans[i-1][j-1] = (c2(i) > 0.0) ? t3(i,j) / c2(i) : 1.0 / (double) num_tags;
#elif NGRAMS==4
			if (tag_at(i,0) == tag_at(j,-1) && tag_at(i,-1) == tag_at(j,-2))
				pr_trans[i-1][j-1] = (c3(i) > 0.0) ? t4(i,j) / c3(i) : 1.0 / (double) num_tags;
#endif
		}
	}
}

class Sentence
{
public:
	Sentence(char *str);
	~Sentence();

	void print(int);		// print the sentence
	void compute();			// forward-backward algorithms
	void viterbi();			// viterbi algorithm
	void baseline();		// baseline algorithm
	void maxprob();			// use current probabilities to estimate word
	void syntax();			// use actual tags to predict next tag

	void normalize();		// Normalize dw
	void transform(char *);		// Apply Brill transformation to pr

	int	num_words;		// The number of words in the sentence
	char	words[MAX_WORDS][MAX_WLEN];	// The words in the sentence

	double	dw[MAX_WORDS][MAX_TAGS];	// The d_w(t) for each word and tag
	double	pr[MAX_WORDS][MAX_TAGS];	// The current tag probability estimates
	double	count[MAX_WORDS];	// Count of occurences (only if whole word was in lexicon)

	int	comp_tag[MAX_WORDS];	// The computed tag
	int	actual_tag[MAX_WORDS];	// The actual tag, when known

	RTYPE	p_sent;			// Probability of sentence

	RTYPE &alpha(int t, int i);	// Access routines
	RTYPE &beta(int t, int i);
	double &delta(int t, int i);
	int &psi(int t, int i);

private:
	void Sentence::brill_args(char *rule, int& t_new, int& t_old, int& op, char *a1, char *a2);

	RTYPE *alpha_array;		// Alpha, beta, etc
	RTYPE *beta_array;
	double *delta_array;
	int *psi_array;
	double *xi;
};

Sentence::Sentence(char *str)
{
	int	i;
	char	*s;
	char	idiom[MAX_WLEN + 1];

	num_words = 0;

	// Scan the string for a complete sentence

	while (*str)
	{
		// Look for the next word

		while (isspace(*str)) ++str;

		if (! *str) break;

		// Copy this word

		strncpy(words[num_words], str, MAX_WLEN);
		words[num_words][MAX_WLEN] = '\0';
		i = 0;
		while (*str && (! isspace(*str))) ++str, ++i;
		words[num_words][i] = '\0';

		// If there is a tag separator, get the tag

		comp_tag[num_words] = -1;
		actual_tag[num_words] = -1;
		if (s = strchr(words[num_words], cTAGSEP))
		{
			*s = '\0';

			if (lex && num_words > 0)
			{
				sprintf(idiom, "%.10s %.10s", words[num_words-1], words[num_words]);
				if (lex->exists(idiom))
				{
					strcpy(words[num_words-1], idiom);
					continue;
				}
			}
			if (lex && num_words > 1)
			{
				sprintf(idiom, "%.10s %.10s %.10s", words[num_words-2], words[num_words-1], words[num_words]);
				if (lex->exists(idiom))
				{
					strcpy(words[num_words-2], idiom);
					--num_words;
					continue;
				}
			}
			if (lex && num_words > 2)
			{
				sprintf(idiom, "%.10s %.10s %.10s %.10s", words[num_words-3], words[num_words-2], words[num_words-1], words[num_words]);
				if (lex->exists(idiom))
				{
					strcpy(words[num_words-3], idiom);
					num_words -= 2;
					continue;
				}
			}

			actual_tag[num_words] = str_to_tag(s+1);

			// printf("Scanning %s\n", words[num_words]);

			++num_words;

			// End sentence at a period-tag

			if (strcmp(s+1, ".") == 0)
				break;
		}
	}

	// Allocate arrays

	alpha_array = new RTYPE[num_words*MAX_STATES];
	beta_array = new RTYPE[num_words*MAX_STATES];
	xi = new RTYPE[MAX_STATES*num_tags];
	delta_array = new double[num_words*MAX_STATES];
	psi_array = new int[num_words*MAX_STATES];

	// printf("num_words: %d\n", num_words);

	if (alpha_array == NULL || beta_array == NULL
	|| delta_array == NULL || psi_array == NULL)
	{
		printf("Could not allocate sentence\n");
		exit(1);
	}
}

Sentence::~Sentence()
{
	if (alpha_array) delete[] alpha_array;
	if (beta_array) delete[] beta_array;
	if (delta_array) delete[] delta_array;
	if (psi_array) delete[] psi_array;
	if (xi) delete[] xi;
}

RTYPE &Sentence::alpha(int t, int i)
{
	if (i < 1 || i > num_states) return (RTYPE&) zero = 0.0;
	return alpha_array[(t-1) * num_states + (i-1)];
}

RTYPE &Sentence::beta(int t, int i)
{
	if (i < 1 || i > num_states) return (RTYPE&) zero = 0.0;
	return beta_array[(t-1) * num_states + (i-1)];
}

double &Sentence::delta(int t, int i)
{
	if (i < 1 || i > num_states) return zero = 0.0;
	return delta_array[(t-1) * num_states + (i-1)];
}

int &Sentence::psi(int t, int i)
{
	if (i < 1 || i > num_states) return z = 0;
	return psi_array[(t-1) * num_states + (i-1)];
}

static int count_words(char *s)
{
	int	i;

	i = 1;
	for (; *s; ++s)
	{
		if (*s == ' ') ++i;
	}
	return i;
}

static char *get_word(char *s, int i)
{
	static char buff[MAX_WLEN];

	for (; i > 0 && *s; ++s)
	{
		if (*s == ' ') --i;
	}
	strcpy(buff, s);
	s = &buff[0];
	while (*s && *s != ' ') ++s;
	*s = '\0';

	return &buff[0];
}

void Sentence::print(int how)
{
	int	i, j, w;
	double	v;

	if (how == 1)
		printf("sentence log10 probability %g\n", p_sent);
	for (i = 0; i < num_words; ++i)
	{
		// Get the words from an idiom

		for (w = 0; w < count_words(words[i]); ++w)
		{
			if (how == 2 && i + w > 0)
				printf(" ");

			printf("%s", get_word(words[i], w));

			if (how == 0)
			{
				if (actual_tag[i] >= 0)
					printf("%s%s", sTAGSEP, tag_to_str(actual_tag[i]));
				if (comp_tag[i] < 0)
					printf(" untagged", tag_to_str(comp_tag[i]));
				else if (comp_tag[i] == actual_tag[i])
					printf(" ok", tag_to_str(comp_tag[i]));
				else
				{
					printf(" tagged %s", tag_to_str(comp_tag[i]));
					if (w < count_words(words[i]) - 1) printf("+");
				}
				printf("\n");
			} else if (how == 1)
			{
				for (j = 0; j < num_tags; ++j)
				{
					v = floor(0.5 + 100.0 * pr[i][j]) / 100.0;
					if (v > 0)
					{
						printf("%s%s", sTAGSEP, tag_to_str(j));
						if (w < count_words(words[i]) - 1) printf("+");
						printf(":%g", v);
					}
				}
				printf("\n");
			} else if (how == 2)
			{
				if (comp_tag[i] < 0)
					printf("%s%s", sTAGSEP, option_untag);
				else
				{
					printf("%s%s", sTAGSEP, tag_to_str(comp_tag[i]));
					if (w < count_words(words[i]) - 1) printf("+");
				}
			}
		}
	}
	if (how == 2)
		printf("\n");
}

// This is where smoothing should be performed!
// To this end, the sentence should retrieve default
// back-off probabilities

void Sentence::normalize()
{
	int	i, t;
	double	T, N, Z, d, sum;
	int	known;

	for (i = 0; i < num_words; ++i)
	{
		N = T = Z = 0.0;
		for (t = 0; t < num_tags; ++t)
		{
			N += dw[i][t];
			if (dw[i][t] > 0.0) T++; else Z += lex_backoff[t];
		}

		d = N / (N + T);

		// If the word was seen enough (or if it was manually tagged)
		// then don't back off, but average with an insignificant amount
		// of the backoff in case it is needed to break ties.

#define INSIGNIF (10e-10)

		known = (count[i] > 999.0 || N > 999.0);

		if (known) d = 1.0 - INSIGNIF;

		// Witten-Bell smoothing with uniform backoff priors

		sum = 0.0;
		for (t = 0; t < num_tags; ++t)
		{
#if 0
			if (dw[i][t] > 0) printf("%s %s%s:%g before\n", words[i], sTAGSEP, tag_to_str(t), dw[i][t]);
#endif

			if (add_smoothing != 0.0)
			{
				if (add_smoothing > 0.0)
					dw[i][t] = (dw[i][t] + add_smoothing) / (N + add_smoothing * (double) num_tags);
			} else if (known)
			{
				if (N > 0.0 && dw[i][t] > 0.0)
					dw[i][t] = d * dw[i][t] / N;
				else
					dw[i][t] = 0.0;
				if (Z > 0.0 && dw[i][t] > 0.0)
					dw[i][t] += (1.0 - d) * lex_backoff[t] / Z;
			} else
			{
				if (N > 0.0 && dw[i][t] > 0.0) dw[i][t] = d * dw[i][t] / N;
				else if (Z > 0.0) dw[i][t] = (1.0 - d) * lex_backoff[t] / Z;
				else dw[i][t] = 0.0;
			}
			sum += dw[i][t];
#if 0
			if (dw[i][t] > 0) printf("%s %s%s:%g after\n", words[i], sTAGSEP, tag_to_str(t), dw[i][t]);
#endif
		}

		if (sum == 0.0)
		{
			printf("Cannot continue because word '%s' has no possible tags.\n", words[i]);
			exit(1);
		}
	}
}

enum { CURWD = 1, LBIGRAM, NEXT1OR2OR3TAG, NEXT1OR2TAG, NEXT1OR2WD, NEXT2TAG,
	NEXTBIGRAM, NEXTTAG, NEXTWD, PREV1OR2OR3TAG, PREV1OR2TAG, PREV1OR2WD, PREV2TAG,
	PREVBIGRAM, PREVTAG, PREVWD, RBIGRAM, SURROUNDTAG, WDAND2AFT, WDAND2TAGAFT,
	WDAND2TAGBFR, WDNEXTTAG, WDPREVTAG};

#define MAX_OPS 23

char *rule_ops[MAX_OPS] = { "CURWD", "LBIGRAM", "NEXT1OR2OR3TAG", "NEXT1OR2TAG", "NEXT1OR2WD", "NEXT2TAG",
	"NEXTBIGRAM", "NEXTTAG", "NEXTWD", "PREV1OR2OR3TAG", "PREV1OR2TAG", "PREV1OR2WD", "PREV2TAG",
	"PREVBIGRAM", "PREVTAG", "PREVWD", "RBIGRAM", "SURROUNDTAG", "WDAND2AFT", "WDAND2TAGAFT",
	"WDAND2TAGBFR", "WDNEXTTAG", "WDPREVTAG"};

void Sentence::brill_args(char *rule, int& t_old, int& t_new, int& op, char *a1, char *a2)
{
	char	*a;
	int	i;
	char	tmp[BUF_SIZE];

	t_new = 0;
	t_old = 0;
	op = 0;
	strcpy(a1, "");
	strcpy(a2, "");

	strcpy(tmp, rule);

#if 0
	printf("Scanning rule %s\n", tmp);
#endif

	// The old tag

	a = strtok(tmp, " \t\n\r");
	if (! a) return;
	t_old = str_to_tag(a);
	if (t_old < 0) return;

	// The new tag

	a = strtok(NULL, " \t\n\r");
	if (! a) return;
	t_new = str_to_tag(a);
	if (t_new < 0) return;

	// The operation

	a = strtok(NULL, " \t\n\r");
	if (! a) return;
	for (i = 0; i < MAX_OPS; ++i)
		if (strcmp(a, rule_ops[i]) == 0)
			op = i + 1;

	if (op == 0) return;

	// The first arg

	a = strtok(NULL, " \t\n\r");
	if (! a) return;
	strcpy(a1, a);

	// The second arg, if any

	a = strtok(NULL, " \t\n\r");
	if (! a) return;
	strcpy(a2, a);
}

// The probability of the new tag must exceed this threshold
// or the rule will not be applied

#define THRESHOLD_PROB 1e-5

void Sentence::transform(char *rule)
{
	int	r;
	double	p;
	double	pr_new[MAX_WORDS][MAX_TAGS];
	int	t;
	int	t_old, t_new, op, ta, tb;
	char	a1[100], a2[100];

	// Parse the rule to get the old and new tags
	// and the context definition

	brill_args(rule, t_old, t_new, op, a1, a2);
	ta = str_to_tag(a1);
	tb = str_to_tag(a2);

	if (op == 0) return;

	// Apply the rule at each position

	for (r = 1; r <= num_words; ++r)
	{
		// probability of context

		p = pr[r-1][t_old];

		switch (op) {
		case CURWD:		// w : w_0 = 0
			if (strcmp(words[r-1], a1) != 0) p = 0.0;
			break;
		case LBIGRAM:		// w_a w_b : w_{-1} = w_a and w_0 = w_b
			if (r <= 1 || strcmp(words[r-2], a1) != 0 || strcmp(words[r-1], a2) != 0) p = 0.0;
			break;
		case NEXT1OR2OR3TAG:	// t : t_1 = t or t_2 = t or t_3 = t
			if (r > num_words-1 || ta < 0) p = 0.0;
			else p = p * (1.0 - (r > num_words-1 ? 1.0 : 1.0 - pr[r][ta]) * (r+1 > num_words-1 ? 1.0 : 1.0 - pr[r+1][ta]) * (r+2 > num_words-1 ? 0.0 : 1.0 - pr[r+2][ta]));
			break;
		case NEXT1OR2TAG:	// t : t_1 = t or t_2 = t
			if (r > num_words-1 || ta < 0) p = 0.0;
			else p = p * (1.0 - (r > num_words-1 ? 1.0 : 1.0 - pr[r][ta]) * (r+1 > num_words-1 ? 1.0 : 1.0 - pr[r+1][ta]));
			break;
		case NEXT1OR2WD:	// w : w_1 = w or w_2 = w
			if ((r > num_words-1 || strcmp(words[r], a1) != 0) && (r+1 > num_words-1 || strcmp(words[r+1], a1) != 0)) p = 0.0;
			break;
		case NEXT2TAG:		// t : t_2 = t
			if (r+1 > num_words-1 || ta < 0) p = 0.0;
			else p = p * pr[r+1][ta];
			break;
		case NEXTBIGRAM:	// t_a t_b : t_1 = t_a and t_2 = t_b
			if (r+1 > num_words-1 || ta < 0 || tb < 0) p = 0.0;
			else p = p * pr[r][ta] * pr[r+1][ta];
			break;
		case NEXTTAG:		// t : t_1 = t
			if (r > num_words-1 || ta < 0) p = 0.0;
			else p = p * pr[r][ta];
			break;
		case NEXTWD:		// w : w_1 = w
			if (r > num_words-1 || strcmp(words[r], a1) != 0) p = 0.0;
			break;
		case PREV1OR2OR3TAG:	// t : t_{-1} = t or t_{-2} = t or t_{-3} = t
			if (r <= 1 || ta < 0) p = 0.0;
			else p = p * (1.0 - (r-2 >= 0 ? 1.0 - pr[r-2][ta] : 1.0) * (r-3 >= 0 ? 1.0 - pr[r-3][ta] : 1.0) * (r-4 >= 0 ? 1.0 - pr[r-4][ta] : 1.0));
			break;
		case PREV1OR2TAG:	// t : t_{-1} = t or t_{-2} = t
			if (r <= 1 || ta < 0) p = 0.0;
			else p = p * (1.0 - (r-2 >= 0 ? 1.0 - pr[r-2][ta] : 1.0) * (r-3 >= 0 ? 1.0 - pr[r-3][ta] : 1.0));
			break;
		case PREV1OR2WD:	// w : w_{-1} = w or w_{-2} = w
			if ((r-2 < 0 || strcmp(words[r-2], a1) != 0) && (r-3 < 0 || strcmp(words[r-3], a1) != 0)) p = 0.0;
			break;
		case PREV2TAG:		// t : t_{-2} = t
			if (r-3 < 0 || ta < 0) p = 0.0;
			else p = p * pr[r-3][ta];
			break;
		case PREVBIGRAM:	// t_a t_b : t_{-2} = t_a and t_{-1} = t_b
			if (r-3 < 0 || ta < 0 || tb < 0) p = 0.0;
			else p = p * pr[r-2][ta] * pr[r-3][ta];
			break;
		case PREVTAG:		// t : t_{-1} = t
			if (r-2 < 0 || ta < 0) p = 0.0;
			else p = p * pr[r-2][ta];
			break;
		case PREVWD:		// w : w_{-1} = w
			if (r-2 < 0 || strcmp(words[r-2], a1) != 0) p = 0.0;
			break;
		case RBIGRAM:		// w_a w_b : w_0 = w_a and w_1 = w_b
			if (r+1 > num_words-1 || strcmp(words[r], a1) != 0 || strcmp(words[r+1], a2) != 0) p = 0.0;
			break;
		case SURROUNDTAG:	// t_a t_b : t_{-1} = t_a and t_1 = t_b
			if (r-2 < 0 || r >= num_tags || ta < 0 || tb < 0) p = 0.0;
			else p = p * pr[r-2][ta] * pr[r][tb];
			break;
		case WDAND2AFT:		// w_a w_b : w_0 = w_a and w_2 = w_b
			if (strcmp(words[r-1], a1) != 0 || r+1 > num_words-1 || strcmp(words[r+1], a2) != 0) p = 0.0;
			break;
		case WDAND2TAGAFT:	// w t : w_0 = w and t_2 = t
			if (strcmp(words[r-1], a1) != 0 || r+1 > num_words-1 || tb < 0) p = 0.0;
			else p = p * pr[r+1][tb];
			break;
		case WDAND2TAGBFR:	// w t : w_0 = w and t_{-2} = t
			if (strcmp(words[r-1], a1) != 0 || r-2 < 0 || tb < 0) p = 0.0;
			else p = p * pr[r-2][tb];
			break;
		case WDNEXTTAG:		// w t : w_0 = w and t_1 = t
			if (strcmp(words[r-1], a1) != 0 || r+1 > num_words-1 || tb < 0) p = 0.0;
			else p = p * pr[r+1][tb];
			break;
		case WDPREVTAG:		// w t : w_0 = w and t_{-1} = t
			if (strcmp(words[r-1], a1) != 0 || r-1 < 0 || tb < 0) p = 0.0;
			else p = p * pr[r-1][tb];
			break;
		}

		if (t_new < 0 || pr[r-1][t_new] < THRESHOLD_PROB) p = 0.0;

#if 0
		if (p > 0.0)
			printf("Applying %s at %d (%s,%s%s->%s) with probability %g\n",
				rule, r, words[r-1], sTAGSEP, tag_to_str(t_old), tag_to_str(t_new), p);
#endif

		// Update the probabilities

		for (t = 0; t < num_tags; ++t)
			pr_new[r-1][t] = p * (t == t_new ? 1.0 : 0.0) + (1.0 - p) * pr[r-1][t];
	}

#if 0

	int cnt = 0;
	for (r = 1; r <= num_words; ++r)
	{
		for (t = 0; t < num_tags; ++t)
		{
			if ((pr[r-1][t] > 1e-10 || pr_new[r-1][t] > 1e-10) && fabs((pr[r-1][t] - pr_new[r-1][t]) / (pr[r-1][t] + pr_new[r-1][t])) > 0.01)
				++cnt;
		}
	}
	if (cnt > 0) printf("Applied rule %s\n", rule);
	for (r = 1; r <= num_words; ++r)
	{
		for (t = 0; t < num_tags; ++t)
		{
			if ((pr[r-1][t] > 1e-10 || pr_new[r-1][t] > 1e-10) && fabs((pr[r-1][t] - pr_new[r-1][t]) / (pr[r-1][t] + pr_new[r-1][t])) > 0.01)
				printf("%s%s%s : %g -> %g\n", words[r-1], sTAGSEP, tag_to_str(t), pr[r-1][t], pr_new[r-1][t]);
		}
	}
#endif

	// store probabilities

	for (r = 1; r <= num_words; ++r)
		for (t = 0; t < num_tags; ++t)
			pr[r-1][t] = pr_new[r-1][t];

}

void Sentence::compute()
{
	int	r, i, j, k;
	double	m, v;
	double	scale;

	if (verbose) printf("compute\n");

	// Compute alpha

	p_sent = 0.0;
	for (r = 1; r <= num_words; ++r)
	{
		// printf("computing alpha(%d/%d)\n", r, num_words);
		scale = 0.0;

		for (i = 1; i <= num_states; ++i)
		{
			alpha(r, i) = 0.0;

			if (dw[r-1][tag_at(i,0)] <= 0.0) continue;

			if (r == 1)
			{
				// if (tag_at(i,-1) == end_tag)
					alpha(r, i) = pr_state[i-1] * dw[r-1][tag_at(i,0)] / pr_tag[tag_at(i,0)];
			} else
			{
				for (k = 0; k < num_tags; ++k)
				{
					j = state_prev(i, k);
					alpha(r,i) += alpha(r-1,j)
						* (RTYPE) (pr_trans[j-1][i-1] * dw[r-1][tag_at(i,0)] / pr_tag[tag_at(i,0)]);
#if 0
// if (alpha(r-1,j) > 0.0)
	printf("(%s %s) -> (%s %s), a=%g, dw=%g\n",
		tag_to_str(tag_at(j,-1)),
		tag_to_str(tag_at(j,0)),
		tag_to_str(tag_at(i,-1)),
		tag_to_str(tag_at(i,0)),
		pr_trans[j-1][i-1], dw[r-1][tag_at(i,0)]);
#endif
				}
			}
#if 0
// if (alpha(r,i) > 0.0)
	printf("alpha(%d,%d) = %g (%s%s%s)\n", r, i, log10(alpha(r,i)), words[r-1], sTAGSEP, tag_to_str(tag_at(i,0)));
#endif

			scale += alpha(r, i);
		}

		for (i = 1; i <= num_states; i++)
			alpha(r, i) /= scale;

		p_sent += log10(scale);
	}

#if 0
printf("p_sent = %g, num_states = %d\n", p_sent, num_states);
#endif

	// Compute beta

	for (r = num_words; r >= 1; --r)
	{
		scale = 0.0;
		for (i = 1; i <= num_states; ++i)
		{
			beta(r, i) = 0.0;

			if (r == num_words)
				beta(r, i) = 1.0;
			else
			{
				for (k = 0; k < num_tags; ++k)
				{
					j = state_next(i, k);
					beta(r,i) += ((RTYPE) (pr_trans[i-1][j-1] * dw[r][tag_at(j,0)] / pr_tag[tag_at(j,0)]))
						* beta(r+1, j);
				}
			}

			scale += beta(r, i);
		}

		for (i = 1; i <= num_states; i++)
			beta(r, i) /= scale;
	}

	// For each ngram k

	// printf("Computing training data\n");
	for (r = 1; r <= num_words-1; ++r)
	{
		m = 0.0;

		for (k = 0; k < num_states * num_tags; ++k)
		{
			// The ngram corresponds to a transition from i to j

			i = (k / num_tags) % num_states + 1;
			j = k % num_states + 1;

			// Accumulate the training probabilities

			xi[k] = alpha(r, i) * ((RTYPE) (pr_trans[i-1][j-1] * dw[r][tag_at(j,0)] / pr_tag[tag_at(j,0)])) * beta(r+1, j);
			m += xi[k];
		}

		if (m > 0.0)
		{
			for (k = 0; k < num_states * num_tags; ++k)
			{
				xi[k] /= m;
				train[k] += xi[k];
			}
		}
	}

	// Compute maximum likelihood for each tag, and normalize!

	for (r = 1; r <= num_words; ++r)
	{
		for (i = 0; i < num_tags; ++i)
			pr[r-1][i] = 0.0;

		v = 0.0;
		for (i = 1; i <= num_states; ++i)
		{
			pr[r-1][tag_at(i,0)] += alpha(r,i)*beta(r,i);
			v += pr[r-1][tag_at(i,0)];
		}

		for (i = 0; i < num_tags; ++i)
			pr[r-1][i] /= v;
	}

	maxprob();

#if 0
	printf("Log probability of sentence: %g (prob %g)\n", log10(p_sent), p_sent);
	for (r = 0; r < num_words; ++r)
		printf("%s%s%s\n", words[r], sTAGSEP, tag_to_str(comp_tag[r]));
#endif
}

// #define DEBUG

#define VSCALE

void Sentence::viterbi()
{
	int	i, j, k, r, p;
	double	m, max_delta;
#ifdef VSCALE
	double scale = 0.0;
#endif

	if (verbose) printf("viterbi\n");

	r = 1;
	for (i = 1; i <= num_states; ++i)
	{
#ifdef VSCALE
		delta(r, i) = pr_state[i-1] * dw[r-1][tag_at(i,0)] / pr_tag[tag_at(i,0)];
		if (delta(r, i) > scale)
		{
			scale = delta(r, i);
		}
#else
		delta(r, i) = log10(pr_state[i-1] * dw[r-1][tag_at(i,0)] / pr_tag[tag_at(i,0)]);
#endif
		psi(r, i) = 0;

#if 0
		printf("delta(%d,%s) = %g\n", r, tag_to_str(tag_at(i,0)), delta(r,i));
		printf("pr_state(%s) = %g\n", tag_to_str(tag_at(i,0)), pr_state[i-1]);
		printf("dw[%s][%s] = %g\n", words[r-1], tag_to_str(tag_at(i,0)), dw[r-1][tag_at(i,0)]);
		printf("count1[%s] = %g\n", tag_to_str(tag_at(i,0)), count1[tag_at(i,0)]);
#endif
	}

#ifdef VSCALE
	if (scale > 0.0)
		for (i = 1; i <= num_states; ++i)
			delta(r, i) /= scale;
#endif

	for (r = 2; r <= num_words; ++r)
	{
		// printf("viterbi(delta(%d,*))\n", r);

#ifdef VSCALE
		scale = 0.0;
#endif

		for (j = 1; j <= num_states; ++j)
		{
#ifdef VSCALE
			max_delta = 0.0;
#else
			max_delta = -1.0e100;
#endif
			p = -1;

			for (k = 0; k < num_tags; ++k)
			{
				i = state_prev(j, k);

#ifdef VSCALE
				m = delta(r-1,i) * pr_trans[i-1][j-1] * dw[r-1][tag_at(j,0)] / pr_tag[tag_at(j,0)];
#else
				m = delta(r-1,i) + log10(pr_trans[i-1][j-1] * dw[r-1][tag_at(j,0)] / pr_tag[tag_at(j,0)]);
#endif
				if (m > max_delta)
				{
					max_delta = m;
					p = i;
				}
			}
			if (p < 1) p = 1;

			delta(r, j) = max_delta;
			psi(r, j) = p;

#ifdef VSCALE
			if (delta(r, j) > scale)
				scale = delta(r, j);
#endif
		}

#ifdef VSCALE
		if (scale > 0.0)
			for (j = 1; j <= num_states; ++j)
				delta(r, j) /= scale;
#endif
	}

	max_delta = delta(num_words, 1);
	p = 1;
	for (i = 1; i <= num_states; ++i)
	{
		m = delta(num_words, i);
		if (m > max_delta)
		{
			max_delta = m;
			p = i;
		}
	}
	comp_tag[num_words-1] = tag_at(p, 0);
	for (r = num_words-1; r > 0; --r)
	{
		p = psi(r+1, p);
		comp_tag[r-1] = tag_at(p, 0);
	}

#if 0
	for (r = 0; r < num_words; ++r)
		printf("%s%s%s\n", words[r], sTAGSEP, tag_to_str(comp_tag[r]));
#endif
}

void Sentence::baseline()
{
	int	i, j;

	if (verbose) printf("baseline\n");

	for (i = 0; i < num_words; ++i)
		for (j = 0; j < num_tags; ++j)
			pr[i][j] = dw[i][j];
	maxprob();
}

// Compute the maximum likelihood tag based on whatever is in
// the pr field.

void Sentence::maxprob()
{
	int	i, j;
	double	m;

	if (verbose) printf("maxprob\n");

	for (i = 0; i < num_words; ++i)
	{
		m = 0.0;
		for (j = 0; j < num_tags; ++j)
		{
			if (pr[i][j] > m)
			{
				m = pr[i][j];
				comp_tag[i] = j;
			}
		}
	}
}

// Compute the maximum likelihood tag given the previous tags
// (up to ngram-1). This is supposed to give the syntactic estimate
// of the next tag.

// However, to do it more correctly it should use
// the forward-backward algorithm for each word in which the probability
// of the output from a state is 0 for all words except the word
// under consideration if the state does not agree with the actual
// state, and the output from states for the word in question are
// either uniformly probable or given by the unconditioned
// tag probabilities.

void Sentence::syntax()
{
	int	i, j, s, k, n;
	double	m, v;

#if (NGRAMS<2)||(NGRAMS>4)
	printf("Syntax not implemented for ngram %d\n", ngram);
	return;
#else

	if (verbose) printf("syntax\n");

	for (i = 0; i < num_words; i++)
	{
		if (actual_tag[i] < 0)
		{
			printf("Syntax requires that all tags are known.\n");
			return;
		}
	}

	for (i = 0; i < num_words; ++i)
	{
		// The actual ngram to use (one less because that is how the state is defined)

		for (n = 0; n < NGRAMS-1 && i-n > 0; ++n)
			;

		// get the actual tag of the previous word(s)

		// printf("syntax for ");

		s = 0;
		for (k = 0; k < n; ++k)
		{
			s = s * num_tags + actual_tag[i-n+k];
			// printf("%s%s", sTAGSEP, tag_to_str(actual_tag[i-n+k]));
		}
		// printf(" -> ");

		// use it to predict the most likely next word


		m = 0.0;

		for (j = 0; j < num_tags; ++j)
		{
			k = s * num_tags + j;

			switch (n+1) {
			case 1: v = count1[k]; break;
			case 2: v = count2[k]; break;
			case 3: v = count3[k]; break;
#if NGRAMS==4
			case 4: v = count4[k]; break;
#endif
			default:
				// printf("Error in syntax, ngram = %d\n", n);
				return;
			}

			if (v > m)
			{
				m = v;
				comp_tag[i] = j;
			}
		}

		// printf("%s (%s)\n", tag_to_str(comp_tag[i]),
		// (comp_tag[i] == actual_tag[i]) ? "good" : tag_to_str(actual_tag[i]));
	}
#endif
}

inline double urand()
{
	return (double) (random() & 0xfffff) / (double) 0x100000;
}

Sentence *o = NULL;

// If pop is given, then go ahead and pop the file stream
// otherwise return 0 at end of file pointer

FILE *file_stack[MAX_FILES];
int num_files = 0;

int command_input(char *file)
{
	if (verbose) printf("input %s\n", file);

	file_stack[num_files] = fopen(file, "r");
	if (file_stack[num_files] == NULL)
	{
		printf("Could not open %s for input\n", file);
		return 0;
	}
	++num_files;
	return 1;
}

void chomp(char *line)
{
	int	i;

	i = strlen(line) - 1;
	while (i >= 0 && line[i] == '\n' || line[i] == '\r')
		line[i--] = '\0';
}


int read_line(char *line, int pop, int blank_ok)
{
	do
	{
		if (num_files <= 0) return 0;
		strcpy(line, "");
		if (! fgets(line,MAX_LLEN,file_stack[num_files-1]))
		{
			if (num_files == 1) return 0;
			fclose(file_stack[num_files-1]);
			--num_files;
			if (pop == 0) return 0;
		}
		if (line[0] == '$') printf("%s", line);
	} while ((blank_ok == 0 && strlen(line) <= 1) || line[0] == '#');

	chomp(line);

	return 1;
}

#define GET_ARG(what) for (s = what; *line && isspace(*line) == 0; s++, line++) *s = *line; *s = '\0';
#define GET_REST(what) for (s = what; *line; s++, line++) *s = *line; *s = '\0';
#define SKIP_SPACE() while (isspace(*line)) ++line;

void get_command(char *line, char *com, char *arg1, char *arg2)
{
	char	*s;
	*com = *arg1 = *arg2 = '\0';

	SKIP_SPACE();
	GET_ARG(com);
	SKIP_SPACE();

	if (strcmp(com, "alphabet") == 0)
	{
		GET_REST(arg1);
		return;
	}

	GET_ARG(arg1);
	SKIP_SPACE();
	GET_ARG(arg2);
}

int cmp(const void *a, const void *b)
{
	return (* (double *) a < * (double *) b) ? 1 : -1;
}

void scangram(char *file)
{
	FILE	*fp;
	char	line[BUF_SIZE];
	int	i, j, k;
	double	v;

	if (verbose) printf("ngrams %s\n", file);

	fp = fopen(file, "r");
	if (fp == NULL)
	{
		printf("could not open %s\n", file);
		exit(1);
	}

	fgets(line, BUF_SIZE, fp);
	num_tags = atoi(line);

	k = -1;
	i = j = 0;
	while (fgets(line, BUF_SIZE, fp))
	{
		chomp(line);

		v = atof(line);

		if (k == -1)
		{
			strcpy(tag_to_str(i), line);
			if (i-j >= num_tags-1)
			{
				j = i + 1;
				++k;
			}
		} else if (k == 0)
		{
			count0[0] = v;
			j = i+1;
			k = 1;
		} else if (k == 1)
		{
			count1[i-j] = v;
			if (v <= 0.0)
			{
				printf("Tag %s has probability 0.\n", tag_to_str(i-j));
				// exit(1);
			}
			if (i-j >= num_tags-1)
			{
				j = i+1;
				++k;
			}
		} else if (k == 2)
		{
#if 0
			printf("scangram(%s,%s) = count2[%d] = %g\n",
				tag_to_str(tag_at(i-j+1,-1)),
				tag_to_str(tag_at(i-j+1,0)),
				i-j, v);
#endif
			count2[i-j] = v;
			if (i-j >= num_tags*num_tags-1)
			{
				j = i+1;
				++k;
			}
		} else if (k == 3)
		{
			count3[i-j] = v;
			if (i-j >= num_tags*num_tags*num_tags-1)
			{
				j = i+1;
				++k;
			}
#if NGRAMS>=4
		} else if (k == 4)
		{
			count4[i-j] = v;
			if (i-j >= num_tags*num_tags*num_tags*num_tags-1)
			{
				j = i+1;
				++k;
			}
#endif
		}

		++i;
	}

	fclose(fp);

	end_tag = str_to_tag(".");

	for (i = 0; i < num_tags; ++i)
		pr_tag[i] = count1[i] / count0[0];
}

void read_train(char *file)
{
	FILE	*fp;
	int	i;
	char	line[BUF_SIZE];
	double	v;

	if (verbose) printf("read_train %s\n", file);

	for (i = 0; i < num_states * num_tags; ++i) train[i] = 0.0;

	fp = fopen(file, "r");
	if (fp == NULL)
	{
		printf("could not open %s\n", file);
		return;
	}

	i = 0;
	while (fgets(line, BUF_SIZE, fp))
	{
		v = atof(line);
		train[i] = v;
		++i;
	}
	fclose(fp);
}

void save_train(char *file)
{
	FILE	*fp;
	int	i;
	char	line[BUF_SIZE];

	if (verbose) printf("save_train %s\n", file);

	fp = fopen(file, "w");
	if (fp == NULL)
	{
		printf("could not open %s\n", file);
		return;
	}

	for (i = 0; i < num_states * num_tags; ++i)
		fprintf(fp, "%g\n", train[i]);

	fclose(fp);
}

void restore_train()
{
	int	i, j, k;
	double	m;

	if (verbose) printf("restore_train\n");

	for (i = 0; i < num_states; ++i)
		for (j = 0; j < num_states; ++j)
			pr_trans[i][j] = 0.0;

	// use the training probabilities

	for (k = 0; k < num_states * num_tags; ++k)
	{
		i = (k / num_tags) % num_states + 1;
		j = k % num_states + 1;

		pr_trans[i-1][j-1] = train[k];
	}

	// normalize

	for (i = 0; i < num_states; ++i)
	{
		m = 0.0;
		for (j = 0; j < num_states; ++j)
			m += pr_trans[i][j];
		if (m > 0.0)
		{
			for (j = 0; j < num_states; ++j)
				pr_trans[i][j] /= m;
		} else
		{
			for (j = 0; j < num_states; ++j)
				pr_trans[i][j] = 1.0 / (double) num_states;
		}
	}
}

void smooth()
{

	// Compute discounted probabilities, then backoff

	double *p1 = new double[MAX_TAGS];
	double *p2 = new double[MAX_TAGS*MAX_TAGS];
	double *p3 = new double[MAX_TAGS*MAX_TAGS*MAX_TAGS];

	double	a, b, d;
	double	N, T, Z;

	int	i, j, k, w;

	double p, m;

	N = T = Z = 0;
	for (i = 0; i < num_tags; ++i)
	{
		N += count1[i];
		if (count1[i] > 0) T++; else Z++;
	}
	d = N / (N + T);

	// The discounted probability for the 1-grams is not backed-off

	for (i = 0; i < num_tags; ++i)
		p1[i] = (count1[i] > 0) ? d * count1[i] / count0[0] : (1 - d) / Z;


	// The 2-gram probabilities (i -> j)

	for (i = 0; i < num_tags; ++i)
	{

		N = T = Z = 0;
		for (j = 0; j < num_tags; ++j)
		{
			k = i * num_tags + j;
			N += count2[k];
			if (count2[k] > 0) T++; else Z++;
		}
		d = N > 0 ? N / (N + T) : 0.0;

#if 0
		printf("Discount factor for %s = %g\n", tag_to_str(i), d);
#endif

		a = b = 0.0;
		for (j = 0; j < num_tags; ++j)
		{
			k = i * num_tags + j;

			// Use the discounted probability, if possible, otherwise use
			// the backoff probability. This will be adjusted afterwards to
			// get a probability

			p2[k] = (count2[k] > 0) ? d * count2[k] / count1[i] : p1[j];

			if (count2[k] > 0) a += p2[k]; else b += p2[k];

#if 0
			printf("p2(%s->%s) = %g, count2 = %g\n", tag_to_str(i), tag_to_str(j), p2[k], count2[k]);
#endif
		}

		a = (1.0 - a) / b;
		for (j = 0; j < num_tags; ++j)
		{
			k = i * num_tags + j;

			if (count2[k] == 0) p2[k] *= a;

#if 0
			printf("p2(%s->%s) = %g, count2 = %g\n", tag_to_str(i), tag_to_str(j), p2[k], count2[k]);
#endif
		}
	}

	// The 3-gram probabilities

	for (i = 0; i < num_tags*num_tags; ++i)
	{

		w = i % num_tags;

		N = T = Z = 0;
		for (j = 0; j < num_tags; ++j)
		{
			k = i * num_tags + j;
			N += count3[k];
			if (count3[k] > 0) T++; else Z++;
		}
		d = N > 0 ? N / (N + T) : 0.0;

		a = b = 0.0;
		for (j = 0; j < num_tags; ++j)
		{
			k = i * num_tags + j;

			// Use the discounted probability, if possible, otherwise use
			// the backoff probability. This will be adjusted afterwards to
			// get a probability

			p3[k] = (count3[k] > 0) ? d * count3[k] / count2[i] : p2[w * num_tags + j];

			if (count3[k] > 0) a += p3[k]; else b += p3[k];
		}

		a = (1.0 - a) / b;
		for (j = 0; j < num_tags; ++j)
		{
			k = i * num_tags + j;

			if (count3[k] == 0) p3[k] *= a;
		}
	}

	m = 0.0;
	for (i = 0; i < num_states; ++i)
	{

#if NGRAMS==2
		p = p1[i];
#elif NGRAMS==3
		if (end_tag == tag_at(i+1,-1))
			p = p2[end_tag * num_tags + tag_at(i+1,0)];
#endif

		if (fabs(p - pr_state[i]) > m)
			m = fabs(p - pr_state[i]);

		pr_state[i] = p;

		for (j = 0; j < num_states; ++j)
		{

#if NGRAMS==2
			p = p2[i * num_tags + j];
#elif NGRAMS==3
			if (tag_at(i+1,0) == tag_at(j+1,-1))
				p = p3[i * num_tags + (j % num_tags)];
			else
				p = 0.0;
#endif

#if 0
			if (p != pr_trans[i][j])
			{
				printf("pr(%s->%s) was %g now %g\n", tag_to_str(i), tag_to_str(j), pr_trans[i][j], p);
			} else
			{
				printf("pr(%s->%s) = %g unch\n", tag_to_str(i), tag_to_str(j), pr_trans[i][j]);
			}
#endif

			if (fabs(p - pr_trans[i][j]) > m)
				m = fabs(p - pr_trans[i][j]);

			pr_trans[i][j] = p;
		}
	}

#if 0
	printf("Maximum absolute difference %g\n", m);
#endif

	delete [] p1;
	delete [] p2;
	delete [] p3;
}

// Read a file of brill transformations
// and apply them to the tag probabilities
// in pr.

void command_brill(char *file)
{
	FILE	*fp;
	char	rule[BUF_SIZE];

	if (verbose) printf("brill %s\n", file);

	fp = fopen(file, "r");
	if (fp == NULL)
	{
		printf("Rule file %s could not be opened.\n", file);
		return;
	}

	while (fgets(rule, BUF_SIZE, fp))
	{
		chomp(rule);
#if 0
		printf("Transforming with %s\n", rule);
#endif
		o->transform(rule);
	}

	fclose(fp);
}

char *spelled_numbers[] = {
"first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
"twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety", "hundred",
"1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "0th",
NULL };

enum {NUMBER_NOTOK=0, NUMBER_OK, NUMBER_DEFINITE, NUMBER_HYPHEN};

int numstringok(char *str)
{
	int	i, n, d, h;
	char	buff[MAX_WLEN];

	strcpy(buff, str);
	for (i = 0; i < strlen(buff); ++i) buff[i] = tolower(buff[i]);

	n = d = h = 0;
	for (i = 0; i < strlen(buff); ++i)
	{
		if (strchr("0123456789", buff[i]))
			++d;
		else if ((i == 0 && strchr("=+-", buff[i])) || strchr(",.:", buff[i]))
			++n;
		else if (i > 0 && (buff[i] == '-' || buff[i] == '+'))
			++h;
	}

	// This is a number

	if (d > 0 && d + n == strlen(buff))
		return NUMBER_DEFINITE;

	// This is a hyphenated number

	if (d > 0 && d + n + h == strlen(buff))
		return NUMBER_HYPHEN;

	for (i = 0; spelled_numbers[i]; ++i)
	{
		if (strncmp(buff, spelled_numbers[i], strlen(spelled_numbers[i])) == 0
		|| (strlen(spelled_numbers[i]) >= strlen(buff) && strcmp(buff - strlen(spelled_numbers[i]), spelled_numbers[i]) == 0))
		{
			// printf("%s could be a spelled number (%d=%s, length %d)\n", buff, i, spelled_numbers[i], strlen(spelled_numbers[i]));
			return NUMBER_OK;
		}
	}

	return NUMBER_NOTOK;
}


enum { ADHOC_NONE = 0, ADHOC_MEDPOST, ADHOC_PENN};

int option_adhoc = ADHOC_MEDPOST;

void command_sentence(char *file)
{
	char	line[MAX_LLEN];
	int	i, t, j;
	char	word[MAX_WLEN+1], *str, *entry;

	if (trans)
		option_adhoc = ADHOC_NONE;

	if (strlen(file) > 0)
	{
		command_input(file);
		strcpy(line, "");

		// Gather lines together

		while (strlen(line) < 9000 && read_line(line+strlen(line), 0, 0))
		{
			strcat(line, " ");
		}

		if (o) delete o;
		o = new Sentence(line);
	} else
	{
		// Read a single line from the current input file to get a sentence

		if (read_line(line, 0, 0))
		{
			if (o) delete o;
			o = new Sentence(line);
		}
	}

	if (o == NULL) return;

	for (i = 0; i < o->num_words; i++)
	{
		strncpy(word, o->words[i], MAX_WLEN);
		word[MAX_WLEN] = '\0';

		// Lower case the first letter only if the remaining letters are all lower case

		if (i == 0)
		{
			for (j = 1; j < strlen(word); ++j)
				if (! islower(word[j]))
					break;
			if (j == strlen(word))
				word[0] = tolower(word[0]);
		}

		str = &word[0];

		if (trans)
		{
			str = trans->get(str);
			if (str && str[0] == cTAGSEP) ++str;
		}

		if (str == NULL)
		{
			strcpy(word, "");
			str = &word[0];
		}

		entry = lex->get(str);
		o->count[i] = lex->count(str);

		for (t = 0; t < num_tags; ++t)
		{
			o->dw[i][t] = lex->scan_lex(entry, tag_to_str(t));
#if 0
			printf("%s (%s) %s%s:%g\n", o->words[i], str, sTAGSEP, tag_to_str(t), o->dw[i][t]);
#endif
		}
	}

	o->normalize();

	// Check post-lexicon constraints

	if (option_adhoc != ADHOC_NONE)
	{
		char	essential[MAX_WLEN];
		int	ok, flag;

		for (i = 0; i < o->num_words; i++)
		{
			strncpy(word, o->words[i], MAX_WLEN);
			word[MAX_WLEN] = '\0';

			if (o->count[i] > 999.0) continue;	// This means the lexicon entry is definite.

			tag_set(word, o->dw[i], "$", strcmp(word, "$") == 0);
			tag_set(word, o->dw[i], "''", strcmp(word, "'") == 0 || strcmp(word, "''") == 0);
			tag_set(word, o->dw[i], "(", strcmp(word, "(") == 0 || strcmp(word, "[") == 0 || strcmp(word, "{") == 0);
			tag_set(word, o->dw[i], ")", strcmp(word, ")") == 0 || strcmp(word, "]") == 0 || strcmp(word, "}") == 0);
			tag_set(word, o->dw[i], ",", strcmp(word, ",") == 0);
			tag_set(word, o->dw[i], ".", strcmp(word, ".") == 0 || strcmp(word, "!") == 0 || strcmp(word, "?") == 0);
			tag_set(word, o->dw[i], ":", strcmp(word, "-") == 0 || strcmp(word, "--") == 0 || strcmp(word, ":") == 0 || strcmp(word, ";") == 0);
			tag_set(word, o->dw[i], "``", strcmp(word, "`") == 0 || strcmp(word, "``") == 0);


			// Numbers are easily recognized,
			// Verbs cannot be hyphenated, this takes care of hyphenated participles
			// which must be tagged as JJ
			// These are the only tags with constraints (so far) that are different
			// in the MedPost and Penn treebank tag set

			if (option_adhoc == ADHOC_MEDPOST)
			{
				switch (numstringok(word)) {
				case NUMBER_DEFINITE: tag_set(word, o->dw[i], "MC", 1); break;
				case NUMBER_OK: tag_ok(word, o->dw[i], "MC", 1); break;
				case NUMBER_HYPHEN: tag_set(word, o->dw[i], "MC", 1); break;
				default: tag_ok(word, o->dw[i], "MC", 0); break;
				}

				tag_ok(word, o->dw[i], "VVB", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VVD", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VVG", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VVI", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VVN", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VVZ", strchr(word, '-') == NULL);
			} else if (option_adhoc == ADHOC_PENN)
			{
				switch (numstringok(word)) {
				case NUMBER_DEFINITE: tag_set(word, o->dw[i], "CD", 1); break;
				case NUMBER_OK: tag_ok(word, o->dw[i], "CD", 1); break;
				case NUMBER_HYPHEN: tag_set(word, o->dw[i], "CD", 1); break;
				default: tag_ok(word, o->dw[i], "CD", 0); break;
				}

				tag_ok(word, o->dw[i], "VBP", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VBD", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VBG", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VB", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VBN", strchr(word, '-') == NULL);
				tag_ok(word, o->dw[i], "VBZ", strchr(word, '-') == NULL);
			}

			// If the word is hyphenated, look at the last word, call this
			// the essential word

			str = strrchr(word, '-');
			if (str == NULL || strlen(str) < 3) str = &word[0]; else ++str;
			strcpy(essential, str);

			// If the essential word has at least one letter, and has an embedded cap
			// or number, make it an NN or NNS. NNP is possible but we accept this error.

			ok = 0;
			flag = 0;
			for (j = 0; j < strlen(essential); ++j)
			{
				if ((j > 0 && isupper(essential[j])) || isdigit(essential[j])) flag = 1;
				if (isalpha(essential[j])) ok = 1;
			}
			if (flag == 1 && j > 0 && essential[j - 1] == 's') flag = 2;
			if (ok == 1 && flag == 1) tag_set(word, o->dw[i], "NN", 1);
			if (ok == 1 && flag == 2) tag_set(word, o->dw[i], "NNS", 1);

			// Require any NNP to be capitalized

			tag_ok(word, o->dw[i], "NNP", isupper(essential[0]));

			if (cons)
			{
				// Impose constraints if there is an entry

				if (entry = cons->get(word))
				{
					if (verbose) printf("constrain %s%s\n", word, entry);

					for (t = 0; t < num_tags; ++t)
					{
						if (cons->scan_lex(entry, tag_to_str(t)) < 0.5)
						{
							tag_ok(word, o->dw[i], tag_to_str(t), 0);
						}
					}
				}
			}
#if 0
			for (t = 0; t < num_tags; ++t)
				printf("%s%s%s:%g after constraints\n", word, sTAGSEP, tag_to_str(t), o->dw[i][t]);
#endif

		}
	}

	for (i = 0; i < o->num_words; i++)
		for (t = 0; t < num_tags; ++t)
			o->pr[i][t] = o->dw[i][t];

}

void command_copypr()
{
	int	i, t;

	if (verbose) printf("copypr\n");

	for (i = 0; i < o->num_words; i++)
		for (t = 0; t < num_tags; ++t)
			o->dw[i][t] = o->pr[i][t];
}

void command_print()
{
	o->print(0);
}

void command_printfull()
{
	o->print(1);
}

void command_printsent()
{
	o->print(2);
}

main(int argc, char **argv)
{
	int	i, j, t;
	double	m;
	char	line[MAX_LLEN], com[MAX_LLEN], arg1[MAX_LLEN], arg2[MAX_LLEN];
	int	used = 0;
	char	*entry;

	srandom(time(NULL));

	strcpy(option_untag, "UNTAGGED");

	file_stack[0] = stdin;
	num_files = 1;

	count0 = new double;
	count1 = new double[MAX_TAGS];
	count2 = new double[MAX_TAGS * MAX_TAGS];
	count3 = new double[MAX_TAGS * MAX_TAGS * MAX_TAGS];
#if NGRAMS>=4
	count4 = new double[MAX_TAGS * MAX_TAGS * MAX_TAGS * MAX_TAGS];
#endif
	num_tags = 0;

	train = new double[MAX_TAGS * MAX_TAGS * MAX_TAGS];

	for (i = 0; i < MAX_TAGS * MAX_TAGS * MAX_TAGS; ++i) train[i] = 0.0;

	for (i = 0; i < MAX_TAGS; ++i)
		lex_backoff[i] = 1.0;

// #define INTERNAL_TOKENIZE

extern int process_tokenizer_args(int argc, char **argv);
extern int run_tokenizer(FILE *ofp);

#ifdef INTERNAL_TOKENIZE
	process_tokenizer_args(argc, argv);
	int fd[2];
	pipe(fd);
	FILE *ifp = fdopen(fd[0], "r");
	FILE *ofp = fdopen(fd[1], "w");
	run_tokenizer(ofp);
	fclose(ofp);
	file_stack[0] = ifp;
	num_files = 1;
#else
	for (i = 1; i < argc; ++i)
	{
		if (argv[i][0] != '-' && num_files == 1)
		{
			num_files = 0;
			command_input(argv[i]);
		}
	}
#endif

	// Read the input file

	while (read_line(line,1,0))
	{
		get_command(line, com, arg1, arg2);
		fflush(stdout);
		if (strcmp(com, "input") == 0)
		{
			command_input(arg1);
		} else if (strcmp(com, "ngrams") == 0)
		{
			scangram(arg1);
		} else if (strcmp(com, "code") == 0)
		{
			use_codes = atoi(arg1);
			if (verbose) printf("code %d\n", use_codes);
		} else if (strcmp(com, "lex") == 0)
		{
			if (verbose) printf("lex %s %s\n", arg1, arg2);
			if (lex) delete lex;
			lex = new Lexicon(num_tags, atoi(arg1), arg2, use_codes);
			for (i = 0; i < num_tags; ++i)
				lex_backoff[i] = 1.0;
		} else if (strcmp(com, "addlex") == 0)
		{
			if (verbose) printf("addlex %s\n", arg1);
			if (lex)
				lex->addfile(arg1);
		} else if (strcmp(com, "rmlex") == 0)
		{
			if (verbose) printf("rmlex %s\n", arg1);
			if (lex)
				lex->rmfile(arg1);
		} else if (strcmp(com, "translate") == 0)
		{
			if (verbose) printf("translate %s\n", arg1);
			if (trans) delete trans;
			trans = new Lexicon(num_tags, atoi(arg1), arg2, use_codes);
		} else if (strcmp(com, "constrain") == 0)
		{
			if (verbose) printf("constrain %s\n", arg1);
			if (cons) delete cons;
			cons = new Lexicon(num_tags, 0, arg1, use_codes);
		} else if (strcmp(com, "addsmoothing") == 0)
		{
			add_smoothing = atof(arg1);
			if (verbose) printf("add %g\n", add_smoothing);
		} else if (strcmp(com, "backoff") == 0)
		{
			add_smoothing = 0.0;
			if (verbose) printf("backoff %s\n", arg1);

			entry = lex->get(arg1);
			for (t = 0; t < num_tags; ++t)
				lex_backoff[t] = lex->scan_lex(entry, tag_to_str(t));
		} else if (strcmp(com, "sentence") == 0)
		{
			command_sentence(arg1);
		} else if (strcmp(com, "brill") == 0)
		{
			command_brill(arg1);
		} else if (strcmp(com, "copypr") == 0)
		{
			command_copypr();
		} else if (strcmp(com, "print") == 0)
		{
			command_print();
		} else if (strcmp(com, "printfull") == 0)
		{
			command_printfull();
		} else if (strcmp(com, "printsent") == 0)
		{
			if (verbose) printf("printsent\n");
			command_printsent();
		} else if (strcmp(com, "init") == 0)
		{
			if (verbose) printf("init %s\n", arg1);
			init(atoi(arg1));
		} else if (strcmp(com, "smooth") == 0)
		{
			if (verbose) printf("smooth\n");
			smooth();
		} else if (strcmp(com, "compute") == 0)
		{
			if (o) o->compute();
		} else if (strcmp(com, "viterbi") == 0)
		{
			if (o) o->viterbi();
		} else if (strcmp(com, "baseline") == 0)
		{
			if (o) o->baseline();
		} else if (strcmp(com, "maxprob") == 0)
		{
			if (o) o->maxprob();
		} else if (strcmp(com, "syntax") == 0)
		{
			if (o) o->syntax();
		} else if (strcmp(com, "read_train") == 0)
		{
			read_train(arg1);
		} else if (strcmp(com, "save_train") == 0)
		{
			save_train(arg1);
		} else if (strcmp(com, "restore_train") == 0)
		{
			restore_train();
		} else if (strcmp(com, "echo") == 0)
		{
			printf("%s\n", line + 5);
		} else if (strcmp(com, "verbose") == 0)
		{
			verbose = atoi(arg1);
			if (verbose) printf("verbose %s\n", arg1);
		} else if (strcmp(com, "adhoc") == 0)
		{
			if (verbose) printf("adhoc %s\n", arg1);
			if (strcmp(arg1, "none") == 0) option_adhoc = ADHOC_NONE;
			else if (strcmp(arg1, "medpost") == 0) option_adhoc = ADHOC_MEDPOST;
			else if (strcmp(arg1, "penn") == 0) option_adhoc = ADHOC_PENN;
		} else if (strcmp(com, "untag") == 0)
		{
			strcpy(option_untag, arg1);
		} else if (strcmp(com, "exit") == 0)
		{
			exit(0);
		}
	}
}
