<?
class spelling extends module {

    //
    // Author: Herman Tolentino (unless specified)
    // ngram method author: Mikko Saari
    // ngram source url: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //
    // AEFI NLP Project
    // Vaccine Analytic Unit - Brighton Collaboration - National Library of Medicine
    //
    // IMPORTANT:
    // This assumes that the UMLS Lexicon tables have been set up and that the LRAGR table
    // has been indexed using STR.
    //

    function spelling() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "spelling";
        $this->description = "AEFI Module - Spelling Checker";
        // 0.1: BEGIN

    }

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
        module::set_dep($this->module, "textlab");

    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("FTITLE_SPELLING_CHECK", "english", "SPELLING CHECK", "Y");
        module::set_lang("INSTR_SPELLING_CHECK", "english", "CLICK ON THE HIGHLIGHTED TERMS ABOVE", "Y");
        module::set_lang("LBL_POSSIBLE_MISPELLED", "english", "POSSIBLE MISPELLED WORD", "Y");
        module::set_lang("LBL_POSSIBLE_CORRECTIONS", "english", "POSSIBLE CORRECTIONS", "Y");
        module::set_lang("LBL_REPLACEMENT_TERM", "english", "REPLACEMENT TERM", "Y");
        module::set_lang("INSTR_SPELLCORRECTION_LIST", "english", "PICK ONE FROM THE LIST AND CLICK ON <b>Replace from Correction List</b>", "Y");
        module::set_lang("LBL_SPELLING_SOURCE", "english", "SPELLING SOURCE", "Y");
        module::set_lang("LBL_BUILD_DICT", "english", "BUILD DICTIONARY", "Y");
    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        module::set_menu($this->module, "Build Dictionary", "LIBRARIES", "_build_dict");

        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        module::execsql("CREATE TABLE `m_spelling_dictionary` (".
            "`word_id` float NOT NULL auto_increment,".
            "`word_str` varchar(255) NOT NULL default '',".
            "`word_ngram` varchar(255) default '',".
            "`word_header` varchar(4) default '',".
            "`word_metaphone` varchar(25) default '',".
            "`word_anterior` varchar(5) default '',".
            "`word_posterior` varchar(5) default '',".
            "`word_fragment` varchar(10) default '',".
            "PRIMARY KEY  (`word_id`), ".
            "UNIQUE INDEX (`word_str`),".
            "INDEX (`word_anterior`),".
            "INDEX (`word_posterior`),".
            "INDEX (`word_header`),".
            "INDEX (`word_metaphone`),".
            "INDEX (`word_fragment`)".
            ") ENGINE=MyISAM ");

        module::execsql("CREATE TABLE `m_spelling_dictaux` (".
            "`word_id` float NOT NULL auto_increment,".
            "`word_str` varchar(255) NOT NULL default '',".
            "`word_ngram` varchar(255) default '',".
            "`word_header` varchar(4) default '',".
            "`word_metaphone` varchar(25) default '',".
            "`word_anterior` varchar(5) default '',".
            "`word_posterior` varchar(5) default '',".
            "`word_fragment` varchar(10) default '',".
            "`frequency` int(11) default 0, ".
            "`neighbors` varchar(20) not null default '', ".
            "PRIMARY KEY  (`word_id`), ".
            "UNIQUE INDEX (`word_str, neighbors`),".
            "INDEX (`word_anterior`),".
            "INDEX (`word_posterior`),".
            "INDEX (`word_header`),".
            "INDEX (`word_metaphone`),".
            "INDEX (`word_fragment`)".
            ") ENGINE=MyISAM ");

        module::execsql("CREATE TABLE `m_spelling_ignorelist` (".
            "`ignore_str` varchar(50) NOT NULL default '',".
            "PRIMARY KEY  (`ignore_str`)".
            ") ENGINE=MyISAM");

    }

    function drop_tables() {

        module::execsql("DROP TABLE `m_spelling_dictionary`;");
        module::execsql("DROP TABLE `m_spelling_dictaux`;");
        module::execsql("DROP TABLE `m_spelling_ignorelist`;");
    }


    // --------------- CUSTOM MODULE FUNCTIONS ------------------

   function mark_text() {
   //
   // This function marks or highlights (yellow) misspelled words
   // in cleaned / spellchecked text. If used in spellchecked text,
   // it shows remaining misspelled words.
   // input: cleaned or spellchecked text
   // output: marked up text showing misspelled words
   //
        if (func_num_args()) {
            $arg_list = func_get_args();

            $get_vars = $arg_list[0];
            $cleaned_text = $arg_list[1];

            // STEP 1: TAG cleaned text temporarily
            $tagged_text = tagger::tagtext($cleaned_text, "text");

            // STEP 2: REMOVE end-of-sentence markers, symbols, numbers
            $tagged_text = preg_replace("/(\._\.)|(\!_\.)|(\?_\.)|(:_:)|(-_:)|(;_:)|(\/_SYM)|(-_SYM)|(,_,)|(x_SYM)|(.+_MC)/", "", $tagged_text);
            //print_r($tagged_text);

            // STEP 3: TOKENIZE tagged text and make sure they do no repeat
            $tokens = preg_split("/[\s,-]+/", $tagged_text);
            //print_r($tokens);

            for($i=0; $i<count($tokens); $i++) {

                // REMOVE TAGS
                list($searchterm, $tag) = explode("_", $tokens[$i]);

                // LOWER CASE SEARCH TERM
                if (strlen(trim($searchterm))>0) {
                    $searchterm = strtolower($searchterm);

                    // STEP 4: SEARCH LRAGR TABLE surrogate in GAME m_spelling_dictionary
                    if (spelling::is_correct_spelling($searchterm)==false) {

                        // STEP 5: MAKE SURE term IS NOT IN IGNORE LIST
                        if (spelling::in_ignore_list($searchterm)==false) {
                            // STEP 6: SHOW MISSPELLED TERM AS A LINK TO SPELL CHECKER
                            //$search = "/\b(".$searchterm.")\b/i";
                            //$replace = "<a class='highlight' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$1#spellcheck'>$1</a>";
                            //$cleaned_text = preg_replace($search, $replace, $cleaned_text);
                            $misspelled_list[] = "<a class='highlight' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$searchterm#spellcheck'>$searchterm</a>";
                            $replacement = "<span class='highlight'>$searchterm</span>";
                            $cleaned_text = str_replace($searchterm, $replacement, $cleaned_text);
                        }
                    }
                }
            }
            return $cleaned_text;
        }
    }

   function misspelled_list() {
   //
   // Lists down misspelled words found in cleaned text.
   // input: cleaned text
   // output: unique list of misspelled words
   //
        if (func_num_args()) {
            $arg_list = func_get_args();

            $get_vars = $arg_list[0];
            $cleaned_text = $arg_list[1];

            // STEP 1: TAG cleaned text temporarily
            $tagged_text = tagger::tagtext($cleaned_text, "text");

            // STEP 2: REMOVE end-of-sentence markers, symbols, numbers
            $tagged_text = preg_replace("/(\._\.)|(\!_\.)|(\?_\.)|(:_:)|(-_:)|(;_:)|(\/_SYM)|(-_SYM)|(,_,)|(x_SYM)|(.+_MC)/", "", $tagged_text);
            //print_r($tagged_text);

            // STEP 3: TOKENIZE tagged text and make sure they do no repeat
            $tokens = preg_split("/[\s,-]+/", $tagged_text);
            //print_r($tokens);

            for($i=0; $i<count($tokens); $i++) {

                // REMOVE TAGS
                list($searchterm, $tag) = explode("_", $tokens[$i]);

                // LOWER CASE SEARCH TERM
                if (strlen(trim($searchterm))>0) {
                    //$searchterm = strtolower($searchterm);

                    // STEP 4: SEARCH LRAGR TABLE surrogate in GAME m_spelling_dictionary
                    if (spelling::is_correct_spelling($searchterm)==false) {

                        // STEP 5: MAKE SURE term IS NOT IN IGNORE LIST
                        if (spelling::in_ignore_list($searchterm)==false) {
                            // STEP 6: SHOW MISSPELLED TERM AS A LINK TO SPELL CHECKER
                            //$search = "/\b(".$searchterm.")\b/i";
                            //$replace = "<a class='highlight' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$1#spellcheck'>$1</a>";
                            //$cleaned_text = preg_replace($search, $replace, $cleaned_text);
                            $misspelled_list[] = "<a class='ptmenu' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$searchterm#spellcheck'>$searchterm</a><br>";
                        }
                    }
                }
            }

            if (count($misspelled_list)>0) {
                return implode("", array_unique($misspelled_list));
            } else {
                return "<font color='red'>None</font>";
            }
        }
    }

    function manage_term() {
        if (func_num_args()) {
            $arg_list = func_get_args();

            $menu_id = $arg_list[0];
            $get_vars = $arg_list[1];
            $post_vars = $arg_list[2];
            $textlab = $arg_list[3];

            if ($post_vars["submitdict"]) {
                spelling::process_manage_term($menu_id, $get_vars, $post_vars, $textlab);
            }
            spelling::form_manage_term($menu_id, $get_vars, $post_vars, $textlab);
        }
    }

    function process_manage_term() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $get_vars = $arg_list[1];
            $post_vars = $arg_list[2];
            $textlab = $arg_list[3];

            //print_r($post_vars);

            switch ($post_vars["submitdict"]) {
            case "Change Dictionary":
                //header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&spellcheck=".$post_vars["spellcheck"]."#spellcheck");
                break;
            case "Add to Dictionary":
                if ($post_vars["misspelled"]) {
                    if (!spellcheck::aux_insert($post_vars["misspelled"])) {
                        spellcheck::aux_update($post_vars["misspelled"]);
                    }
                    if ($result = mysql_query($sql)) {
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]);
                    }
                }
                break;
            case "Replace from Correction List":
                if ($get_vars["document_id"] && $post_vars["misspelled"] && $post_vars["correction_list"] && $textlab["cleaned_text"]) {
                    $replacement_base = ($textlab["spellchecked_text"]?$textlab["spellchecked_text"]:$textlab["cleaned_text"]);
                    //$spellchecked_text = str_replace($post_vars["misspelled"], $post_vars["correction_list"], $replacement_base);
                    $search = "/\b([".strtoupper(substr($post_vars["misspelled"],0,1)).strtolower(substr($post_vars["misspelled"],0,1))."])(".substr($post_vars["misspelled"],1,strlen($post_vars["misspelled"])-1).")\b/";
                    $replace = "$1".substr($post_vars["correction_list"],1,strlen($post_vars["correction_list"])-1);
                    $spellchecked_text = preg_replace($search, $replace, $replacement_base);
                    $sql = "update m_textlab_document set ".
                           "spellchecked_text = '$spellchecked_text' ".
                           "where document_id = '".$get_vars["document_id"]."'";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        if (!spellcheck::aux_insert($post_vars["correction_list"], $get_vars["document_id"])) {
                            spellcheck::aux_update($post_vars["correction_list"], $get_vars["document_id"]);
                        }
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                    }
                }
                break;
            case "Replace using Replacement Term":
                if ($get_vars["document_id"] && $post_vars["misspelled"] && $post_vars["replacement"] && $textlab["cleaned_text"]) {
                    $replacement_base = ($textlab["spellchecked_text"]?$textlab["spellchecked_text"]:$textlab["cleaned_text"]);
                    //$spellchecked_text = str_replace($post_vars["misspelled"], $post_vars["replacement"], $replacement_base);
                    $search = "/\b([".strtoupper(substr($post_vars["misspelled"],0,1)).strtolower(substr($post_vars["misspelled"],0,1))."])(".substr($post_vars["misspelled"],1,strlen($post_vars["misspelled"])-1).")\b/";
                    $replace = "$1".substr($post_vars["replacement"],1,strlen($post_vars["replacement"])-1);
                    $spellchecked_text = preg_replace($search, $replace, $replacement_base);
                    $sql = "update m_textlab_document set ".
                           "spellchecked_text = '$spellchecked_text' ".
                           "where document_id = '".$get_vars["document_id"]."'";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        $spellcheck = new spellcheck($spellchecked_text);
                        $corrected_array = $spellcheck->predict();
                        if (!spellcheck::aux_insert($post_vars["replacement"], $get_vars["document_id"])) {
                            spellcheck::aux_update($post_vars["replacement"], $get_vars["document_id"]);
                        }
                    }
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                }
                break;
            case "Add to Ignore List":
                if ($post_vars["misspelled"]) {
                    $sql = "insert into m_spelling_ignorelist (ignore_str) values ('".$post_vars["misspelled"]."')";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        $replacement_base = ($textlab["spellchecked_text"]?$textlab["spellchecked_text"]:$textlab["cleaned_text"]);
                        $sql_update = "update m_textlab_document set ".
                                      "spellchecked_text = '$replacement_base' ".
                                      "where document_id = '".$get_vars["document_id"]."'";
                        $result_update = mysql_query($sql_update);
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                    }
                }
                break;
            case "Cancel":
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                break;
            }
        }
    }

    function form_manage_term() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $get_vars = $arg_list[1];
            $post_vars = $arg_list[2];
            $textlab = $arg_list[3];

            $term = $get_vars["spellcheck"];

            print "<a name='spellcheck'>";
            print "<span class='newstitle'>".FTITLE_SPELLING_CHECK."</span><br>";
            print "<span class='tinylight'>".INSTR_SPELLING_CHECK."</span><br>";
            print "<table width='450' cellpadding='5' cellspacing='1' style='border: 1px solid black'>";
            print "<form action = '".$_SERVER["SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"")."&tab=".$get_vars["tab"]."&spellcheck=".$get_vars["spellcheck"]."#spellcheck' name='form_manage_term' method='post'>";
            print "<tr valign='top'><td>";
            print "<table cellpadding='2' cellspacing='1'>";
            print "<tr><td>";
            print "<span class='boxtitle'>".LBL_SPELLING_SOURCE."</span><br> ";
            $post_vars["spellsource"] = (isset($post_vars["spellsource"])?$post_vars["spellsource"]:"L"); // Lexicon is default source
            print "<input type='radio' name='spellsource' value='P' ".($post_vars["spellsource"]=="P"?"checked":"")."'> PSpell<br>";
            print "<input type='radio' name='spellsource' value='L' ".($post_vars["spellsource"]=="L"?"checked":"")."'> UMLS Lexicon<br>";
            print "</td></tr>";
            print "<tr><td>";
            print "<span class='boxtitle'>".LBL_POSSIBLE_MISPELLED."</span><br> ";
            print "<input type='text' class='textbox' size='20' maxlength='20' name='misspelled' value='".($term?$term:$post_vars["misspelled"])."' style='border: 1px solid #000000'><br>";
            print "</td></tr>";
            print "<tr><td>";
            print "<span class='boxtitle'>".LBL_POSSIBLE_CORRECTIONS."</span><br> ";
            $spellcheck = new spellcheck($textlab["cleaned_text"]);
            if ($word_list = $spellcheck->word_list($get_vars["spellcheck"], $get_vars["context"])) {
            //if ($word_list = spelling::word_list($menu_id, $post_vars, $get_vars, $term)) {
                print "<span class='tinylight'>".INSTR_SPELLCORRECTION_LIST."</span><br>";
                print "<select size='10' name='correction_list'>";
                foreach ($word_list as $key=>$value) {
                    print "<option value='$value'>".$value."</option>";
                }
                print "</select>";
            } else {
                print "<font color='red'>None</font>";
            }
            print "</td></tr>";
            print "<tr><td><br>";
            print "<span class='boxtitle'>".LBL_REPLACEMENT_TERM."</span><br> ";
            print "<input type='text' class='textbox' size='20' maxlength='20' name='replacement' value='".($term?$term:$post_vars["misspelled"])."' style='border: 1px solid #000000'><br>";
            print "</td></tr>";
            print "</table>";
            print "</td><td>";
            print "<table cellpadding='2' cellspacing='1'>";
            print "<tr><td>";
            print "<input type='hidden' name='spellcheck' value='".$get_vars["spellcheck"]."'>";
            print "<input type='hidden' name='cleaned_text' value='".$textlab["cleaned"]."'>";
            print "<input type='submit' value = 'Change Dictionary' class='textbox' name='submitdict' title='Changes dictionary used for spelling check.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Add to Dictionary' class='textbox' name='submitdict' title='Adds to the dictionary the word from the MISSPELLED WORD field.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Replace from Correction List' class='textbox' name='submitdict' title='Replaces misspelled word with word selected from the CORRECTION LIST.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Replace using Replacement Term' class='textbox' name='submitdict' title='Replaces misspelled word with word selected from the REPLACEMENT TERM field.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Add to Ignore List' class='textbox' name='submitdict' title='Adds the word in the MISPELLED WORD field to the Ignore list.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Cancel' class='textbox' name='submitdict' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "</table>";
            print "</td></tr>";
            print "</form>";
            print "</table>";
        }
    }

    function _build_dict() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
        }

        print "<table width='500'>";
        print "<form action = '".$_SERVER["SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"")."' name='form_dumpdata' method='post'>";
        print "<tr valign='top'><td>";
        print "<span class='boxtitle'>".LBL_BUILD_DICT."</span><br><br> ";
        print "This module builds the online dictionary used for checking spelling. It requires that the UMLS LRAGR table ".
              "and WordNet have been set up. This module uses the 2005AB version of the UMLS Lexicon and the 2.0 version of ".
              "Wordnet from Princeton University. To build the dictionary click on <b>Start</b>.";
        print "</td></tr>";
        print "<tr><td><br>";
        print "<input type='submit' value = 'Start' class='textbox' name='submitdict' style='border: 1px solid #000000'> ";
        print "<input type='submit' value = 'Cancel' class='textbox' name='submitdict' style='border: 1px solid #000000'> ";
        print "</td></tr>";
        print "</form>";
        print "</table><br>";

        if ($post_vars["submitdict"] == "Start") {
            // Load LRAGR word list
            print $message = "<font color='red'>Loading LRAGR terms...</font><br>";
            module::execsql("TRUNCATE m_spelling_dictionary");
            if (module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT STR AS word_str FROM umls.LRAGR GROUP BY STR")) {
                module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT STR AS word_str FROM umls.LRAGR GROUP BY STR");
                module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT ABR AS word_str FROM umls.LRABR GROUP BY ABR");
                module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT BAS AS word_str FROM umls.LRMOD GROUP BY BAS");
                module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT STR AS word_str FROM umls.LRPRP GROUP BY STR");

                // known errors
                module::execsql("delete from m_spelling_dictionary where word_str = 'occurence'");
                module::execsql("delete from m_spelling_dictionary where word_str = 'diphterias'");

                $recs1 = spelling::record_count();
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&dictstep=2&recs1=$recs1");
            }
        }
        if ($get_vars["dictstep"]==2 && $get_vars["recs1"]>0) {
            // Load Wordnet word list
            print $message = "<font color='red'>Loaded LRAGR terms...".$get_vars["recs1"]." records.</font><br>";
            print $message = "<font color='red'>Loading Wordnet terms...</font><br>";
            if (module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT word FROM wn.wn_synset;")) {
                $recs2 = spelling::record_count()-$get_vars["recs1"];
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&dictstep=3&recs1=".$get_vars["recs1"]."&recs2=$recs2");
            }
        }
        if ($get_vars["dictstep"]==3 && $get_vars["recs2"]>0) {
            // build special fields (ngram, metaphone, header)
            print $message = "<font color='red'>Loaded LRAGR terms...".$get_vars["recs1"]." records.</font><br>";
            print $message = "<font color='red'>Loaded Wordnet terms...".$get_vars["recs2"]." records.</font><br>";
            print $message = "<font color='red'>Computing metaphones and n-grams...</font><br>";
            if (spelling::init_dictionary()) {
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&dictstep=4&recs1=".$get_vars["recs1"]."&recs2=".$get_vars["recs2"]);
            }
        }
        if ($get_vars["dictstep"]==4) {
            print $message = "<font color='red'>Loaded LRAGR terms...".$get_vars["recs1"]." records.</font><br>";
            print $message = "<font color='red'>Loaded Wordnet terms...".$get_vars["recs2"]." records.</font><br>";
            print $message = "<font color='red'>Metaphones and N-grams complete.</font><br>";
            print $message = "<font color='red'>Dictionary has ".spelling::record_count()." records.</font><br>";
        }

    }

    function init_dictionary() {

        $sql = "SELECT word_id, word_str from m_spelling_dictionary order by word_str";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($id, $word) = mysql_fetch_array($result)) {
                    $compound_word = explode("_", trim($word));
                    if (count($compound_word)>1) {
                        $word = implode(" ", $compound_word);
                    }

                    $metaphone = metaphone($word);
                    $ngram_array = spelling::create_ngram($word, 2);
                    $ngram_string = implode("|", $ngram_array);
                    $header = substr($word, 0, 4);
                    $anterior = substr($word, 1, 4);
                    $posterior = substr($word, -5, 4);
                    if (strlen($word)>10) {
                        $fragment = substr($word, 0, 10);
                    } else {
                        if (spellcheck::modulus(strlen($word),2)===0) {
                            $fragment = $word;
                        } else {
                            $fragment = substr($word, 0, strlen($word)-1);
                        }
                    }
                    /*
                    print "word = $word<br>";
                    print "compound word = $word<br>";
                    print "metaphone = $metaphone<br>";
                    print "ngram= $ngram_string<br>";
                    print "header = $header<br>";
                    */
                    $sql_update = "update m_spelling_dictionary set ".
                                  "word_str = '$word',".
                                  "word_anterior = '$anterior',".
                                  "word_posterior = '$posterior', ".
                                  "word_metaphone = '$metaphone', ".
                                  "word_ngram = '$ngram_string', ".
                                  "word_header = '$header', ".
                                  "word_fragment = '$fragment' ".
                                  "where word_id = '$id'";

                    $result_update = mysql_query($sql_update);
                }
                // test that the dictionary set up reaches the end
                $sql_metaphone = "select word_metaphone from m_spelling_dictionary where word_str = 'zygomorphic' limit 1";
                if ($result_metaphone = mysql_query($sql_metaphone)) {
                    if (mysql_num_rows($result_metaphone)) {
                        if (list($metaphone) = mysql_fetch_array($result_metaphone)) {
                            if (strlen($metaphone)>0) {
                                return true;
                            }
                        }
                    }
                }
            }
        }
    }

    function record_count() {

        $sql = "select count(*) from m_spelling_dictionary";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($count) = mysql_fetch_array($result);
                return $count;
            }
        }
    }

    function baseform() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $searchterm = $arg_list[0];

            $sql = "select BAS from umls.LRAGR where STR like '$searchterm' limit 10";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    if (list($baseform) = mysql_fetch_array($result)) {
                        return $baseform;
                    }
                }
            }
        }
    }

    function word_list() {
    //
    // This function comes up with a word list of
    // possible corrections for the misspelled word.
    // input: possible misspelled word
    // output: list of corrections as an array
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $term = $arg_list[3];

            $spellsource = ($post_vars["spellsource"]?$post_vars["spellsource"]:$get_vars["spellsource"]); // spell source

            switch ($spellsource) {

            case "L": // Lexicon and Wordnet

                // STEP 1: CREATE N-GRAM AND METAPHONE OF INPUT TERM (DIGRAM)
                $ngram_input = spelling::create_ngram($term, 2);
                $metaphone = metaphone($term);
                $possible_array = array();

                // STEP 2: LOOK UP SIMILAR METAPHONE IN CUSTOM DICTIONARY
                // join main and auxiliary dictionaries
                $sql = "(select word_str, word_ngram, word_metaphone ".
                       "from m_spelling_dictionary ".
                       "where word_metaphone like '$metaphone') ".
                       "union distinct ".
                       "(select word_str, word_ngram, word_metaphone ".
                       "from m_spelling_dictaux ".
                       "where word_metaphone like '$metaphone') ".
                       "order by word_metaphone limit 1000";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        $possible_array = array();
                        while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result)) {
                            $ngram_db = explode("|", $ngram_string);
                            // STEP 3: COMPARE N-GRAMS AND GET SCORE
                            $score = spelling::compare_ngrams($ngram_db, $ngram_input);
                            // ADD TO LIST IF > 0.3
                            if ($score>0.3) {
                                $possible_array[] .= $word;
                            }
                        }
                    }
                }
                // STEP 4: IF WORD IS MORE THN 3 CHARACTERS GET DICTIONARY TERM WITH
                // SAME HEADER AS INPUT TERM
                if (strlen($term)>=3) {

                    $pattern[0] = "/(".$term.")/";
                    $pattern[1] = "/(wi)/";

                    $replacement[0] = "$1";
                    $replacement[1] = "whi";

                    for($i=0; $i<count($pattern); $i++) {

                        $modified = preg_replace($pattern[$i], $replacement[$i], $term);

                        $sql = "select word_str, word_ngram ".
                               "from m_spelling_dictionary ".
                               "where word_header = substr('$modified', 1,4) limit 10";
                        if ($result = mysql_query($sql)) {
                            if (mysql_num_rows($result)) {
                                while (list($word, $ngram_string) = mysql_fetch_array($result)) {
                                    // STEP 5: COMPARE N-GRAMS AGAIN
                                    $ngram_db = explode("|", $ngram_string);
                                    $score = spelling::compare_ngrams($ngram_db, $ngram_input);
                                    // assume misspell would be + or - 3 characters
                                    $maxlen = strlen($term)+3;
                                    $minlen = strlen($term)-3;
                                    if ($score>0.3 && (strlen($word)<=$maxlen && strlen($word)>=$minlen)) {
                                        // ADD TO LIST IF >0.3
                                        $possible_array[] .= $word;
                                    }
                                }
                            }
                        }
                    }
                }
                // STEP 6: SORT ACCORDING TO LEVENSHTEIN DISTANCE
                $levarray = array();
                $possible_array = array_unique($possible_array);
                foreach($possible_array as $key=>$value) {
                    $lev = levenshtein($term, $value);
                    $levarray[] .= "$lev|$value";
                    //print "$value: $lev<br>";
                }
                sort($levarray);
                foreach($levarray as $key=>$value) {
                    $levsort[] .= preg_replace("/([0-9]{0,2}\|)/","", $value);
                }
                return $levsort;
                break;

            case "P": // PSpell

                $pspell_link = pspell_new("en", "", "", "", PSPELL_NORMAL);
                $suggestions = pspell_suggest($pspell_link, $term);
                return $suggestions;

                break;

            }

        }
    }

    function predict_spelling() {
    }

    function spellcheck() {
    //
    // other function for spelling
    // not used anymore in application
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $pspell_link = pspell_new("en", "", "", "", PSPELL_FAST);
            $tokens = explode(" ", $inputstring);
            foreach($tokens as $key=>$value) {
                list($word, $tag) = explode("_", $value);
                if ($tag<>"MC") {
                    if (!pspell_check($pspell_link, $word)) {
                        $list = "";
                        $suggestions = pspell_suggest($pspell_link, $word);
                        foreach ($suggestions as $list_item) {
                            $list .= $list_item." ";
                        }
                        $spellchecked[] .= "<font color='red'>".$word."</font>: ".$list;
                    }
                }
            }
            foreach($spellchecked as $key => $value) {
                $retval .= $value."\n";
            }
            return $retval;
        } else {
            print "SYNTAX: spellcheck(inputString String)";
        }
    }

    function is_correct_spelling() {
    //
    // This module uses both the UMLS Lexicon and PSpell.
    //
    // input: potentially misspelled word
    // output: true or false
    //         true correct spelling
    //         false wrong spelling or word not found
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0]; // input string
            //print "$inputstring<br>";

            //-------------------------------------------------------------
            if (strlen(trim($inputstring))<>0) {
                //
                // select from both main and auxiliary dictionary
                //
                $sql = "(select word_str ".
                       "from m_spelling_dictionary ".
                       "where word_str = '$inputstring') ".
                       "union distinct ".
                       "(select word_str ".
                       "from m_spelling_dictaux ".
                       "where word_str = '$inputstring')";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)>0) {
                        if (list($word) = mysql_fetch_array($result)) {
                            //print "$word lex found<br>";
                            return true;
                        } else {
                            $pspell_link = pspell_new("en");
                            if (pspell_check($pspell_link, $inputstring)) {
                                //print "$word pspell found<br>";
                                return true;
                            } else {
                                return false;
                            }
                        }
                    } else {
                        return false;
                    }
                }
            }
            //-------------------------------------------------------------
        }
    }

    function in_ignore_list() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0]; // input string

            if (strlen(trim($inputstring))<>0) {
                $sql = "select ignore_str ".
                       "from m_spelling_ignorelist ".
                       "where lower(ignore_str) like '".strtolower($inputstring)."'";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        if (list($ignore_str) = mysql_fetch_array($result)) {
                            return true;
                        } else {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
            }
        }
    }

    function create_ngram($string, $n) {

    //
    // Author: Mikko Saari
    // Source: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //
        $length = strlen($string) + $n;
        $string = str_pad($string, $length, " ", STR_PAD_BOTH);

        $length = strlen($string) - $n + 1;
        for ($i = 0; $i < $length; $i++) {
            $ngrams[] = substr($string, $i, $n);
        }

        return $ngrams;
    }

    function compare_ngrams($ngram1, $ngram2) {

    //
    // Author: Mikko Saari
    // Source: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //

        $sum = array_unique(array_merge($ngram1, $ngram2));
        $intersection = array_intersect($ngram1, $ngram2);
        $score = count($intersection) / count($sum);
        return $score;
    }
}

/** ************************************************
** SPELLING CLASS
** $Author: Herman Tolentino MD
** $Last update: 9/15/05
*/

class spellcheck {

    var $input;
    var $tagged;
    var $token_array;
    var $misspelled_array;
    var $decision_array;
    var $source_array;
    var $source_stats;
    var $decision_stats;
    var $levarray;

    function spellcheck($inputstring) {
    //
    // assume $inputstring is cleaned text
    //

        $this->input = $inputstring;
        $this->levarray = array();
        $this->tagged = utf8_decode($this->tagged);
        $this->tagged = tagger::tagtext($this->input, "text");
        //$this->tagged = preg_replace("/\b(\._\.)|(\!_\.)|(\?_\.)|(:_:)|(-_:)|(;_:)|(.+_SYM)|(-_SYM)|(,_,)|(x_SYM)|(.?_MC)|([A-Za-z]+_DD)|([A-Za-z]+_II)|([+]+_)(NN|NNS|JJ)\b/", "", $this->tagged);
        //$this->tagged = preg_replace("/\b(\._\.)|(\!_\.)|(\?_\.)|(:_:)|(-_:)|(;_:)|(.+_SYM)|(-_SYM)|(,_,)|(x_SYM)|([0-9]+_MC)/", "", $this->tagged);
        $this->token_array = $this->tokenize($this->tagged);

        $this->source_array["metaphone"]["array"] = array();
        $this->source_array["ngram"]["array"] = array();
        $this->source_array["header"]["array"] = array();
        $this->source_array["transposition"]["array"] = array();
        $this->source_array["deletion"]["array"] = array();
        $this->source_array["insertion"]["array"] = array();
        $this->source_array["substitution"]["array"] = array();
        //$this->source_array["pspell"]["array"] = array();

        $this->source_array["metaphone"]["count"] = 0;
        $this->source_array["ngram"]["count"] = 0;
        $this->source_array["header"]["count"] = 0;
        $this->source_array["transposition"]["count"] = 0;
        $this->source_array["deletion"]["count"] = 0;
        $this->source_array["insertion"]["count"] = 0;
        $this->source_array["substitution"]["count"] = 0;
        //$this->source_array["pspell"]["count"] = 0;

        $this->decision_array["concept"] = 0;
        $this->decision_array["homonym"] = 0;
        $this->decision_array["ngram"] = 0;
        $this->decision_array["metaphone"] = 0;
        $this->decision_array["length"] = 0;
        $this->decision_array["pos"] = 0;
        $this->decision_array["history"] = 0;

    }

    function cleanstr($string){
        $len = strlen($string);
        for($a=0; $a<$len; $a++){
            $p = ord($string[$a]);
            # chr(32) is space, it is preserved..
            (($p > 64 && $p < 123) || $p == 32) ? $ret .= $string[$a] : $ret .= "";
        }
        return $ret;
    }

    function tokenize($tagged) {

        return preg_split("/[\s,-]+/", $tagged);
    }

    function remove_tag($token) {

        list($term, $tag) = explode("_", $token);
        return $term;
    }

    function in_ignore_list($term) {

        if (strlen(trim($term))<>0) {
            $sql = "select ignore_str ".
                   "from m_spelling_ignorelist ".
                   "where lower(ignore_str) like '".strtolower($term)."'";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    if (list($ignore_str) = mysql_fetch_array($result)) {
                        return true;
                    } else {
                        return false;
                    }
                } else {
                    return false;
                }
            }
        }

    }

    function create_ngram($string, $n) {

    //
    // Author: Mikko Saari
    // Source: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //
        $length = strlen($string) + $n;
        $string = str_pad($string, $length, " ", STR_PAD_BOTH);

        $length = strlen($string) - $n + 1;
        for ($i = 0; $i < $length; $i++) {
            $ngrams[] = substr($string, $i, $n);
        }

        return $ngrams;
    }

    function compare_ngrams($ngram1, $ngram2) {

    //
    // Author: Mikko Saari
    // Source: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //

        $sum = array_unique(array_merge($ngram1, $ngram2));
        $intersection = array_intersect($ngram1, $ngram2);
        $score = count($intersection) / count($sum);
        return $score;
    }

    function metaphone_search($misspelled) {

        $possible_array = array();

        $ngram_input = $this->create_ngram($misspelled, 2);
        $metaphone = metaphone($misspelled);

        $sql = "(select word_str, word_ngram, word_metaphone ".
               "from m_spelling_dictionary ".
               "where word_metaphone like '$metaphone') ".
               "union distinct ".
               "(select word_str, word_ngram, word_metaphone ".
               "from m_spelling_dictaux ".
               "where word_metaphone like '$metaphone') ".
               "order by word_metaphone limit 1000";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $possible_array = array();
                while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result)) {
                    $ngram_db = explode("|", $ngram_string);
                    // STEP 3: COMPARE N-GRAMS AND GET SCORE
                    $score = $this->compare_ngrams($ngram_db, $ngram_input);
                    // ADD TO LIST IF > 0.3
                    if ($score>0.3) {
                        $possible_array[] .= $word;
                    }
                }
            }
        }
        if (is_array($possible_array)) {
            $this->source_array["metaphone"]["array"] = array_merge($this->source_array["metaphone"]["array"], $possible_array);
            $this->source_array["metaphone"]["count"] = (int) $this->source_array["metaphone"]["count"] + count($possible_array);
        }
        $this->source_array["metaphone"]["count"] = $possible_array;
        return $possible_array;

    }

    function modulus($op1, $op2) {

        $q = $op1/$op2;
        $modulus = $q - (int) $q;
        return $modulus;
    }

    function chunk($string) {

        for($i=0; $i<strlen($string); $i++) {
            $tokens[] = substr($string, $i, 1);
        }
        return $tokens;
    }

    function pspell_search($misspelled) {

        $pspell_link = pspell_new("en", PSPELL_NORMAL);
        $possible_array = pspell_suggest($pspell_link, $misspelled);
        if (is_array($possible_array)) {
            $this->source_array["pspell"]["array"] = array_merge($this->source_array["pspell"]["array"], $possible_array);
            $this->source_array["pspell"]["count"] = (int) $this->source_array["pspell"]["count"] + count($possible_array);
        }
        return $possible_array;
    }

    function transposition_search($misspelled) {
    //
    // misspelling assumption:
    // examples: concsiousness (consciousness), teh (the)
    //

        $possible_array = array();

        if (strlen($misspelled)>10) {
            $fragment = substr($misspelled, 0, 10);
        } else {
            if ($this->modulus(strlen($misspelled),2)===0) {
                $fragment = $misspelled;
            } else {
                $fragment = substr($misspelled, 0, strlen($misspelled)-1);
            }
        }

        $ngram_input = $this->create_ngram($misspelled, 2);

        for($i=1; $i<strlen($fragment); $i++) {
            $part = substr($fragment, $i, 2);
            $switch = $this->chunk($part);

            if (isset($switch[1]) && isset($switch[0])) {
                $replace = $switch[1].$switch[0];
                $searchterm = substr_replace($fragment, $replace, $i , 2);

                $sql = "(select word_str, word_ngram, word_metaphone ".
                       "from m_spelling_dictionary ".
                       "where word_fragment like '".strtolower($searchterm)."') ".
                       "union distinct ".
                       "(select word_str, word_ngram, word_metaphone ".
                       "from m_spelling_dictaux ".
                       "where word_fragment like '".strtolower($searchterm)."') ".
                       "limit 1000";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        //$possible_array = array();
                        while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result)) {
                            $ngram_db = explode("|", $ngram_string);
                            $score = $this->compare_ngrams($ngram_db, $ngram_input);
                            if ($score>0.3) {
                                $possible_array[] .= $word;
                            }
                        }
                    }
                }
            }
        }
        if (is_array($possible_array)) {
            $this->source_array["transposition"]["array"] = array_merge($this->source_array["transposition"]["array"], $possible_array);
            $this->source_array["transposition"]["count"] = (int) $this->source_array["transposition"]["count"] + count($possible_array);
        }
        return $possible_array;
    }

    function ngram_search($misspelled) {

        $possible_array = array();

        if (strlen($misspelled)>5) {
            $ngram_input = $this->create_ngram($misspelled, 2);
            $anterior = substr($misspelled, 0, 4);
            $posterior = substr($misspelled, -5, 4);
            $first_letter = substr($misspelled,0,1);

            $sql_anterior = "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictionary ".
                            "where word_anterior = '$anterior') ".
                            "union distinct ".
                            "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictaux ".
                            "where word_anterior = '$anterior') ".
                            "order by word_metaphone limit 1000";
            if ($result_anterior = mysql_query($sql_anterior)) {
                if (mysql_num_rows($result_anterior)) {
                    while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result_anterior)) {
                        $ngram_db = explode("|", $ngram_string);
                        $score = $this->compare_ngrams($ngram_db, $ngram_input);
                        if ($score>0.3) {
                            $possible_array[] .= $word;
                        }
                    }
                    if (is_array($possible_array)) {
                        $possible_array = array_unique($possible_array);
                    }
                }
            }


            $sql_posterior = "(select word_str, word_ngram, word_metaphone ".
                             "from m_spelling_dictionary ".
                             "where word_posterior = '$posterior' and substr(word_str,1,1) = '$first_letter') ".
                             "union distinct ".
                             "(select word_str, word_ngram, word_metaphone ".
                             "from m_spelling_dictaux ".
                             "where word_posterior = '$posterior' and substr(word_str,1,1) = '$first_letter') ".
                             "order by word_metaphone limit 1000";
            if ($result_posterior = mysql_query($sql_posterior)) {
                if (mysql_num_rows($result_posterior)) {
                    while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result_posterior)) {
                        $ngram_db = explode("|", $ngram_string);
                        $score = $this->compare_ngrams($ngram_db, $ngram_input);
                        if ($score>0.3) {
                            $possible_array[] .= $word;
                        }
                    }
                    if (is_array($possible_array)) {
                        $possible_array = array_unique($possible_array);
                    }
                }
            }

        }
        if (is_array($possible_array)) {
            $this->source_array["ngram"]["array"] = array_merge($this->source_array["ngram"]["array"], $possible_array);
            $this->source_array["ngram"]["count"] = (int) $this->source_array["ngram"]["count"] + count($possible_array);
        }
        return $possible_array;
    }

    function header_search($misspelled) {

        if (preg_match("/^[A-Za-z0-9]+/", $misspelled)) {
        $possible_array = array();
        if (strlen($misspelled)>3) {

            $ngram_input = $this->create_ngram($misspelled, 2);

            $pattern[0] = "/(".$misspelled.")/";
            $pattern[1] = "/(wi)/";

            $replacement[0] = "$1";
            $replacement[1] = "whi";

            for($i=0; $i<count($pattern); $i++) {

                $modified = @preg_replace($pattern[$i], $replacement[$i], $misspelled);

                $header = substr($modified , 0,4);
                $sql = "(select word_str, word_ngram ".
                       "from m_spelling_dictionary ".
                       "where word_header like '$header' or word_header like '".strtolower($header)."' or word_header like '".ucfirst($header)."') union distinct ".
                       "(select word_str, word_ngram ".
                       "from m_spelling_dictaux ".
                       "where word_header like '$header' or word_header like '".strtolower($header)."' or word_header like '".ucfirst($header)."') ".
                       "limit 1000";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        while (list($word, $ngram_string) = mysql_fetch_array($result)) {
                            if (count(explode(" ", $word))==1) {
                            $ngram_db = explode("|", $ngram_string);
                            $score = $this->compare_ngrams($ngram_db, $ngram_input);
                            //print "word $word<br>";
                            //print "score $score<br>";
                            $maxlen = strlen($misspelled)+2;
                            $minlen = strlen($misspelled)-2;
                            if ($score>0.2 && (strlen($word)<=$maxlen && strlen($word)>=$minlen)) {
                                $possible_array[] .= $word;
                            }
                            }
                        }
                        if (is_array($possible_array)) {
                            $possible_array = array_unique($possible_array);
                        }
                    }
                }
            }
        }
        if(is_array($possible_array)) {
            $this->source_array["header"]["array"] = array_merge($this->source_array["header"]["array"], $possible_array);
            $this->source_array["header"]["count"] = (int) $this->source_array["header"]["count"] + count($possible_array);
        }
        return $possible_array;
        }
    }

    function deletion_search($misspelled) {
    //
    // assumes a deletion in misspelled word
    // examples: consciosness (consciousness), th (the)
    //

        $possible_array = array();

        if (preg_match("/^[A-Za-z0-9]+/", $misspelled)) {
        for($i=0; $i<strlen($misspelled); $i++) {

            $char = substr($misspelled, $i, 1);
            $regexp = substr_replace($misspelled, $char."_", $i, 1);
            $ngram_input = $this->create_ngram($regexp, 2);
            $sql_anterior = "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictionary ".
                            "where word_str like '$regexp' or word_str like '".strtolower($regexp)."' or word_str like '".ucfirst($regexp)."' order by word_str) ".
                            "union distinct ".
                            "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictaux ".
                            "where word_str = '$regexp' or word_str like '".strtolower($regexp)."' or word_str like '".ucfirst($regexp)."' order by word_str) ".
                            "limit 1000";
            if ($result_anterior = mysql_query($sql_anterior)) {
                if (mysql_num_rows($result_anterior)) {
                    while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result_anterior)) {
                        $ngram_db = explode("|", $ngram_string);
                        $score = $this->compare_ngrams($ngram_db, $ngram_input);
                        //print "word $word<br>";
                        //print "score $score<br>";
                        if ($score>=0.2) {
                            $possible_array[] .= $word;
                        }
                    }
                    //print_r($possible_array);
                    if (is_array($possible_array)) {
                        $possible_array = array_unique($possible_array);
                    }
                }
            }
        }
        if (is_array($possible_array)) {
            $this->source_array["deletion"]["array"] = array_merge($this->source_array["deletion"]["array"], $possible_array);
            $this->source_array["deletion"]["count"] = (int) $this->source_array["deletion"]["count"] + count($possible_array);
        }
        return $possible_array;
        }
    }

    function insertion_search($misspelled) {
    //
    // assumes a deletion in misspelled word
    // examples: consciosness (consciousness), th (the)
    //

        $possible_array = array();

        if (preg_match("/^[A-Za-z0-9]+/", $misspelled)) {
        for($i=0; $i<strlen($misspelled); $i++) {

            $char = substr($misspelled, $i, 1);
            $regexp = substr_replace($misspelled, "", $i, 1);
            $ngram_input = $this->create_ngram($regexp, 2);
            $sql_anterior = "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictionary ".
                            "where word_str like '$regexp' order by word_str) ".
                            "union distinct ".
                            "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictaux ".
                            "where word_str = '$regexp' order by word_str) ".
                            "limit 1000";
            if ($result_anterior = mysql_query($sql_anterior)) {
                if (mysql_num_rows($result_anterior)) {
                    while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result_anterior)) {
                        $ngram_db = explode("|", $ngram_string);
                        $score = $this->compare_ngrams($ngram_db, $ngram_input);
                        if ($score>0.3) {
                            $possible_array[] .= $word;
                        }
                    }
                    if (is_array($possible_array)) {
                        $possible_array = array_unique($possible_array);
                    }
                }
            }
        }
        if (is_array($possible_array)) {
            $this->source_array["insertion"]["array"] = array_merge($this->source_array["insertion"]["array"], $possible_array);
            $this->source_array["insertion"]["count"] = (int) $this->source_array["insertion"]["count"] + count($possible_array);
        }
        return $possible_array;
        }
    }

    function substitution_search($misspelled) {
    //
    // assumes a deletion in misspelled word
    // examples: consciosness (consciousness), th (the)
    //

        if (preg_match("/^[A-Za-z0-9]+/", $misspelled)) {
        for($i=0; $i<strlen($misspelled); $i++) {

            $char = substr($misspelled, $i, 1);
            $regexp = substr_replace($misspelled, "_", $i, 1);
            $ngram_input = $this->create_ngram($regexp, 2);
            $sql_anterior = "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictionary ".
                            "where word_str like '$regexp' order by word_str) ".
                            "union distinct ".
                            "(select word_str, word_ngram, word_metaphone ".
                            "from m_spelling_dictaux ".
                            "where word_str = '$regexp' order by word_str) ".
                            "limit 1000";
            if ($result_anterior = mysql_query($sql_anterior)) {
                if (mysql_num_rows($result_anterior)) {
                    $possible_array = array();
                    while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result_anterior)) {
                        $ngram_db = explode("|", $ngram_string);
                        $score = $this->compare_ngrams($ngram_db, $ngram_input);
                        if ($score>0.3) {
                            $possible_array[] .= $word;
                        }
                    }
                    if (is_array($possible_array)) {
                        $possible_array = array_unique($possible_array);
                    }
                }
            }
        }
        if (is_array($possible_array)) {
            $this->source_array["substitution"]["array"] = array_merge($this->source_array["substitution"]["array"], $possible_array);
            $this->source_array["substitution"]["count"] = (int) $this->source_array["substitution"]["count"] + count($possible_array);
        }
        return $possible_array;
        }
    }

    function bias_concept($possible) {

        $possible = $this->baseform($possible);
        $sql = "select * from umls.MRXNW_ENG where LAT = 'ENG' and NWD = '$possible'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $this->decision_array["concept"] = (float) $this->decision_array["concept"] + (-0.3);
                return -0.3;
            } else {
                $this->decision_array["concept"] = (float) $this->decision_array["concept"] + (0.2);
                return 0.2;
            }
        }
    }

    function baseform($searchterm) {

        $sql = "select BAS from umls.LRAGR where STR like '".strtolower($searchterm)."' limit 1";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($baseform) = mysql_fetch_array($result)) {
                    return $baseform;
                }
            }
        }
        return $searchterm;
    }

    function bias_homonym($misspelled, $possible) {

        if (substr($misspelled,0,1)<>substr($possible,0,1)) {
            $metaphone_misspelled = metaphone($misspelled);
            $metaphone_possible = metaphone($possible);
            if ($metaphone_misspelled == $metaphone_possible) {
                $this->decision_array["homonym"] = (float) $this->decision_array["homonym"] + (-0.2);
                return -0.2;
            } else {
                $this->decision_array["homonym"] = (float) $this->decision_array["homonym"] + (0);
                return 0;
            }
        }
    }

    function bias_ngram_char($misspelled, $possible) {

        $misspelled_trigram = $this->create_ngram($misspelled, 3);
        $possible_trigram = $this->create_ngram($possible, 3);
        $score = $this->compare_ngrams($misspelled_trigram, $possible_trigram);
        $this->decision_array["ngram"] = (float) $this->decision_array["ngram"] + (-$score);
        return (-$score);
    }

    function bias_metaphone($misspelled, $possible) {

        if (metaphone($misspelled) == metaphone($possible)) {
            $this->decision_array["metaphone"] = (float) $this->decision_array["metaphone"] + (-0.2);
            return -0.2;
        } else {
            $this->decision_array["metaphone"] = (float) $this->decision_array["metaphone"] + (0);
            return 0;
        }
    }

    function bias_length($misspelled, $possible, $list_deletion) {

        $diff = strlen($possible) - strlen($misspelled);
        if ($diff >= -1 and $diff <= 1) {
            if (count($list_deletion)>0) {
                $this->decision_array["length"] = (float) $this->decision_array["length"] + (-0.3);
                return -0.3;
            } else {
                $this->decision_array["length"] = (float) $this->decision_array["length"] + (-0.05);
                return -0.05;
            }
        } else {
            $this->decision_array["length"] = (float) $this->decision_array["length"] + (0);
            return 0;
        }
    }

    function bias_pos($phrase, $possible) {

        $phrase_tagged = tagger::tagtext($phrase, "text");
        list($left, $middle, $right) = explode(" ", $phrase_tagged);
        list($x, $middle_tag) = explode("_", $middle);
        //print "middle: $middle<br>";
        $term_tagged = tagger::tagtext($possible, "text");
        list($x, $term_tag) = explode("_", $term_tagged);
        //print "term: $term_tagged<br>";
        if ($term_tag==$middle_tag) {
            $this->decision_array["pos"] = (float) $this->decision_array["pos"] + (-0.2);
            return -0.2;
        } elseif (substr($term_tag,0,2)==substr($middle_tag,0,2)) {
            $this->decision_array["pos"] = (float) $this->decision_array["pos"] + (-0.1);
            return -0.1;
        } else {
            $this->decision_array["pos"] = (float) $this->decision_array["pos"] + (0.2);
            return 0.2;
        }

    }

    function bias_history($possible) {

        $sql = "select word_str, frequency from m_spelling_dictaux where word_str = '$possible'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($word, $frequency) = mysql_fetch_array($result)) {
                    $this->decision_array["history"] = (float) $this->decision_array["history"] + (-0.2)*$frequency;
                    return -0.2*$frequency;
                } else {
                    $this->decision_array["history"] = (float) $this->decision_array["history"] + (0);
                    return 0;
                }
            }
        }
    }

    function word_list($misspelled, $context) {

        $context_array = explode("|", $context);
        $phrase = implode(" ", $context_array);
        $possible_array = array();

        // step-wise array merge
        $list_metaphone = $this->metaphone_search($misspelled);
        if (is_array($list_metaphone)) {
            $possible_array = array_merge($possible_array, $list_metaphone);
        }
        $list_ngram = $this->ngram_search($misspelled);
        if (is_array($list_ngram)) {
            $possible_array = array_merge($possible_array, $list_ngram);
        }
        $list_header = $this->header_search($misspelled);
        if (is_array($list_header)) {
            $possible_array = array_merge($possible_array, $list_header);
        }
        $list_transposition = $this->transposition_search($misspelled);
        if (is_array($list_transposition)) {
            $possible_array = array_merge($possible_array, $list_transposition);
        }
        $list_deletion = $this->deletion_search($misspelled);
        if (is_array($list_deletion)) {
            $possible_array = array_merge($possible_array, $list_deletion);
        }
        $list_insertion = $this->insertion_search($misspelled);
        if (is_array($list_insertion)) {
            $possible_array = array_merge($possible_array, $list_insertion);
        }
        $list_substitution = $this->substitution_search($misspelled);
        if (is_array($list_substitution)) {
            $possible_array = array_merge($possible_array, $list_substitution);
        }
        //$list_pspell = $this->pspell_search($misspelled);

        //$possible_array = array_merge($list_metaphone, $list_ngram, $list_header, $list_transposition, $list_deletion, $list_insertion, $list_substitution);
        //print_r($possible_array);

        if (is_array($possible_array)) {

            $levarray = array();
            $possible_array = array_unique($possible_array);
            $bias = 0;
            $bias_concept = 0;
            $bias_homonym = 0;
            $bias_ngram_char = 0;
            $bias_metaphone = 0;
            $bias_length = 0;
            $bias_pos = 0;
            $bias_history = 0;
            $distance = 0;

            foreach($possible_array as $key=>$value) {
                $bias_concept = $this->bias_concept($value);
                $bias_homonym = $this->bias_homonym($misspelled, $value);
                $bias_ngram_char = $this->bias_ngram_char($misspelled, $value);
                $bias_metaphone = $this->bias_metaphone($misspelled, $value);
                $bias_length = $this->bias_length($misspelled, $value, $list_deletion);
                $bias_pos = $this->bias_pos($phrase, $value);
                $bias_history = $this->bias_history($value);
                $distance = levenshtein($misspelled, $value);

                $score = $bias_concept + $bias_homonym + $bias_ngram_char + $bias_metaphone + $bias_length + $bias_pos + $bias_history + $distance;
                if ($score < 5) {
                    $levarray["$value"] = $score;
                }
                $prev_score = $score;
            }
            //print_r($levarray);
            asort($levarray);
            foreach ($levarray as $key=>$value) {
                $list[] = $key;
            }
            return $list;
        } else {
            return array();
        }
    }

    function is_correct_spelling($term) {

        if ($this->in_ignore_list($term)) {
            return true;
        } else {
            $sql_aux = "select word_str ".
                       "from m_spelling_dictaux ".
                       "where word_str = '".strtolower($term)."'";
            if ($result_aux = mysql_query($sql_aux)) {
                if (mysql_num_rows($result_aux)) {
                    if (list($word) = mysql_fetch_array($result_aux)) {
                        //print "$word lex found<br>";
                        return true;
                    }
                } else {
                    $sql_dict = "select word_str ".
                       "from m_spelling_dictionary ".
                       "where word_str = '".strtolower($term)."'";
                    if ($result_dict = mysql_query($sql_dict)) {
                        if (mysql_num_rows($result_dict)) {
                            if (list($word) = mysql_fetch_array($result_dict)) {
                                // learning part
                                $this->aux_insert($word);
                                return true;
                            }
                        } else {
                            $pspell_link = pspell_new("en");
                            if (pspell_check($pspell_link, $term)) {
                                //print "$word pspell found<br>";
                                return true;
                            }
                        }
                    }
                }
            }
            return false;
        }
    }

    function pick_word($misspelled, $context) {

        $levarray = $this->word_list($misspelled, $context);
        if (is_array($levarray) && count($levarray)>0) {
            foreach ($levarray as $key=>$value) {
                return $value;
            }
        } else {
            return $misspelled;
        }

    }

    function square($value) {

        return (float) $value * (float) $value;
    }

    function predict() {

        for($i=0; $i<count($this->token_array);$i++) {
            if (isset($this->token_array[$i-1])) {
                $myleft = new token($this->token_array[$i-1]);
                $this->term_left = $myleft->get_term();
            }
            if (isset($this->token_array[$i+1])) {
                $myright = new token($this->token_array[$i+1]);
                $this->term_right = $myright->get_term();
            }
            if (preg_match("/(_NN)|(_VV)|(_JJ)|(_RR)/", $this->token_array[$i])) {
                if (isset($this->token_array[$i])) {
                    $me = new token($this->token_array[$i]);
                    $this->term_me = $me->get_term();
                }
                $term = $this->remove_tag($this->token_array[$i]);
                if (!is_numeric($term)) {
                    if (!$this->is_correct_spelling(strtolower($term))) {
                        if (!preg_match("/-/", $term)) {
                            $this->misspelled_array[] = $term;
                            $this->context_array[] = $this->term_left."|".$this->term_me."|".$this->term_right;
                        }
                    }
                }
            }
        }
        for ($i=0; $i<count($this->misspelled_array); $i++) {
            //if (!preg_match("/([A-Z.]+)/", $this->misspelled_array[$i]) && !preg_match("/\b([A-Z][a-z]{2,4}[ ]+)/",$this->misspelled_array[$i])) {
            if (!preg_match("/([A-Z]\.?){2,7}/", $this->misspelled_array[$i])) {
                $correction = $this->pick_word($this->misspelled_array[$i], $this->context_array[$i]);
                if (strlen($correction)>0) {
                    foreach ($this->source_array as $key=>$value) {
                        if (array_search($correction, $this->source_array[$key]["array"])) {
                            $this->source_array[$key]["count"] = (int) $this->source_array[$key]["count"]+1;
                        }
                    }
                    $this->corrected_array[] = $this->misspelled_array[$i]."|||".$correction."|||".$this->context_array[$i];
                }
            }
        }
        $total_count = 0;
        foreach ($this->source_array as $key=>$value) {
            $total_count = $total_count + (int) sqrt($this->square($this->source_array["$key"]["count"]));
        }
        if ($total_count<>0) {
            $this->source_stats = "<span class='boxtitle'>WORDLIST CONTRIBUTIONS</span><br>";
            foreach ($this->source_array as $key=>$value) {
                $abs = (float) sqrt($this->square($this->source_array["$key"]["count"]));
                $this->source_stats .= "$key: ".$abs/$total_count."<br>";
            }
        }
        $total_bias = 0;
        foreach ($this->decision_array as $key => $value) {
            $total_bias = $total_bias + (float) sqrt($this->square($value));
        }
        if ($total_bias<>0) {
            $this->decision_stats = "<span class='boxtitle'>BIAS CONTRIBUTIONS</span><br>";
            foreach ($this->decision_array as $key => $value) {
                $abs = (float) sqrt($this->square($this->decision_array[$key]));
                $this->decision_stats .= "$key: ".$abs/$total_bias."<br>";
            }
        }

        return $this->corrected_array;

    }

    function spellstats() {

        print $this->source_stats."<br>";
        print $this->decision_stats;
    }

    function format($corrected_array) {

        if (count($corrected_array)>0) {
            foreach($corrected_array as $value) {
                list($misspelled, $corrected, $context) = explode("|||", $value);
                $formatted[] = stripslashes("$misspelled -> $corrected<br>");
            }
            array_unique($formatted);
            print "<span class='boxtitle'>CORRECTED</span><br>";
            foreach ($formatted as $text) {
                print "$text";
            }
            print "<br>";
        } else {
            print "<font color='red'>No corrections.</font><br>";
        }
    }

    function replace($text_input, $corrected_input) {

        if (is_array($corrected_input)) {
            $tokens = explode(" ", $text_input);
            foreach ($corrected_input as $pair) {
                list($misspelled, $corrected, $context) = explode("|||", $pair);
                foreach ($tokens as $key=>$term) {
                    if (!preg_match("/([+]+)/", $misspelled)) {
                        $tokens[$key] = preg_replace("/\b(".$misspelled.")\b/", "$corrected", $tokens[$key]);
                    }
                }
            }
            $replaced = implode(" ", $tokens);
            return $replaced;
        } elseif(is_string($corrected_input)) {
            $tokens = explode(" ", $text_input);
            foreach ($tokens as $key=>$term) {
                list($misspelled, $corrected) = explode("|", $corrected_input);
                $tokens[$key] = preg_replace("/^".$misspelled."/", "$corrected", $tokens[$key]);
            }
            $replaced = implode(" ", $tokens);
            return $replaced;
        } else {
            return $text_input;
        }
    }

    function mark_text($cleaned_text, $corrected_array) {

        if (count($corrected_array)>0) {
            foreach ($corrected_array as $value) {
                list($searchterm, $corrected, $context) = explode("|||", $value);
                $misspelled_list[] = $searchterm;
            }
        }
        if (count($misspelled_list)>0) {
            $tokens = explode(" ", $cleaned_text);
            foreach($tokens as $key => $value) {
                foreach($misspelled_list as $searchterm) {
                    //print $searchterm;
                    if (!preg_match("/([+]+)/", $searchterm)) {
                        $tokens[$key] = preg_replace("/\b(".$searchterm.")([,.!? ])/", "<span class='highlight'>$searchterm</span>$2", $tokens[$key]);
                    }
                }
            }
            $highlighted = implode(" ", $tokens);
            return $highlighted;
        } else {
            return $cleaned_text;
        }
    }

    function misspelled_list($get_vars, $corrected_array) {

        if (count($corrected_array)>0) {
            foreach ($corrected_array as $key=>$value) {
                list($searchterm, $corrected, $context) = explode("|||", $value);
                $misspelled_list[] = "<a class='ptmenu' href='".$_SERVER["PHP_SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$searchterm&context=$context#spellcheck'>$searchterm</a><br>";
            }
        }
        if (count($misspelled_list)>0) {
            return implode("", array_unique($misspelled_list));
        } else {
            return "<font color='red'>None</font>";
        }

    }

    function decision_tally() {

        foreach ($this->source_array as $key=>$value) {
            $this->decision_array[$key]["pct"] = 0;
            $this->decision_array[$key]["count"] = 0;
            $this->decision_array[$key]["total"] = count($value);
        }
    }

    function aux_update($word) {

        $word = strtolower($word);

        $sql_update = "update m_spelling_dictaux set ".
                      "frequency = frequency+1 ".
                      "where word_str = '$word'";

         if ($result_update = mysql_query($sql_update)) {
            return true;
         }
         return false;

    }

    function aux_insert($word) {

        $metaphone = metaphone($word);
        $ngram_array = spellcheck::create_ngram($word, 2);
        $ngram_string = implode("|", $ngram_array);
        $header = substr($word, 0, 4);
        $anterior = substr($word, 1, 4);
        $posterior = substr($word, -5, 4);
        if (strlen($word)>10) {
            $fragment = substr($word, 0, 10);
        } else {
            if (spellcheck::modulus(strlen($word),2)===0) {
                $fragment = $word;
            } else {
                $fragment = substr($word, 0, strlen($word)-1);
            }
        }
        $sql_insert = "insert into m_spelling_dictaux (word_str, word_metaphone, word_ngram, word_anterior, word_posterior, word_header, word_fragment, frequency) ".
                      "values ('$word', '$metaphone', '$ngram_string', '$anterior', '$posterior', '$header', '$fragment', 1)";
        if ($result_insert = mysql_query($sql_insert)) {
            return true;
        }
        return false;
    }

    // end of spellcheck class

}

?>
