<?
class spelling extends module {

    //
    // Author: Herman Tolentino (unless specified)
    // ngram method author: Mikko Saari
    // ngram source url: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //
    // AEFI NLP Project
    // Vaccine Analytic Unit - Brighton Collaboration - National Library of Medicine
    //
    // IMPORTANT:
    // This assumes that the UMLS Lexicon tables have been set up and that the LRAGR table
    // has been indexed using STR.
    //

    function spelling() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "spelling";
        $this->description = "AEFI Module - Spelling Checker";
        // 0.1: BEGIN

    }

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
        module::set_dep($this->module, "textlab");

    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("FTITLE_SPELLING_CHECK", "english", "SPELLING CHECK", "Y");
        module::set_lang("INSTR_SPELLING_CHECK", "english", "CLICK ON THE HIGHLIGHTED TERMS ABOVE", "Y");
        module::set_lang("LBL_POSSIBLE_MISPELLED", "english", "POSSIBLE MISPELLED WORD", "Y");
        module::set_lang("LBL_POSSIBLE_CORRECTIONS", "english", "POSSIBLE CORRECTIONS", "Y");
        module::set_lang("LBL_REPLACEMENT_TERM", "english", "REPLACEMENT TERM", "Y");
        module::set_lang("INSTR_SPELLCORRECTION_LIST", "english", "PICK ONE FROM THE LIST AND CLICK ON <b>Replace from Correction List</b>", "Y");
        module::set_lang("LBL_SPELLING_SOURCE", "english", "SPELLING SOURCE", "Y");
        module::set_lang("LBL_BUILD_DICT", "english", "BUILD DICTIONARY", "Y");
    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        module::set_menu($this->module, "Build Dictionary", "LIBRARIES", "_build_dict");

        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        module::execsql("CREATE TABLE `m_spelling_dictionary` (".
            "`word_id` float NOT NULL auto_increment,".
            "`word_str` varchar(255) NOT NULL default '',".
            "`word_ngram` varchar(255) default '',".
            "`word_header` varchar(4) default '',".
            "`word_metaphone` varchar(25) default '',".
            "PRIMARY KEY  (`word_id`), ".
            "UNIQUE INDEX (`word_str`),".
            "INDEX (`word_header`),".
            "INDEX (`word_metaphone`)".
            ") ENGINE=MyISAM ");

        module::execsql("CREATE TABLE `m_spelling_dictaux` (".
            "`word_id` float NOT NULL auto_increment,".
            "`word_str` varchar(255) NOT NULL default '',".
            "`word_ngram` varchar(255) default '',".
            "`word_header` varchar(4) default '',".
            "`word_metaphone` varchar(25) default '',".
            "PRIMARY KEY  (`word_id`), ".
            "UNIQUE INDEX (`word_str`),".
            "INDEX (`word_header`),".
            "INDEX (`word_metaphone`)".
            ") ENGINE=MyISAM ");

        module::execsql("CREATE TABLE `m_spelling_ignorelist` (".
            "`ignore_str` varchar(50) NOT NULL default '',".
            "PRIMARY KEY  (`ignore_str`)".
            ") ENGINE=MyISAM");

        module::execsql("CREATE TABLE `m_textlab_nounphrase` (".
            "`phrase_id` float NOT NULL auto_increment,".
            "`phrase_str` varchar(50) NOT NULL default '',".
            "`phrase_tagged` varchar(70) default '', ".
            "PRIMARY KEY  (`phrase_id`), ".
            "UNIQUE INDEX (`phrase_str`)".
            ") ENGINE=MyISAM");
    }

    function drop_tables() {

        module::execsql("DROP TABLE `m_spelling_dictionary`;");
        module::execsql("DROP TABLE `m_spelling_ignorelist`;");
        module::execsql("DROP TABLE `m_textlab_nounphrase`;");
    }


    // --------------- CUSTOM MODULE FUNCTIONS ------------------

   function mark_text() {
   //
   // This function marks or highlights (yellow) misspelled words
   // in cleaned / spellchecked text. If used in spellchecked text,
   // it shows remaining misspelled words.
   // input: cleaned or spellchecked text
   // output: marked up text showing misspelled words
   //
        if (func_num_args()) {
            $arg_list = func_get_args();

            $get_vars = $arg_list[0];
            $cleaned_text = $arg_list[1];

            // STEP 1: TAG cleaned text temporarily
            $tagged_text = tagger::tagtext($cleaned_text, "text");

            // STEP 2: REMOVE end-of-sentence markers, symbols, numbers
            $tagged_text = preg_replace("/(\._\.)|(\!_\.)|(\?_\.)|(:_:)|(-_:)|(;_:)|(\/_SYM)|(-_SYM)|(,_,)|(x_SYM)|([\.0-9]+_MC)/", "", $tagged_text);
            //print_r($tagged_text);

            // STEP 3: TOKENIZE tagged text and make sure they do no repeat
            $tokens = preg_split("/[\s,-]+/", $tagged_text);
            //print_r($tokens);

            for($i=0; $i<count($tokens); $i++) {

                // REMOVE TAGS
                list($searchterm, $tag) = explode("_", $tokens[$i]);

                // LOWER CASE SEARCH TERM
                if (strlen(trim($searchterm))>0) {
                    $searchterm = strtolower($searchterm);

                    // STEP 4: SEARCH LRAGR TABLE surrogate in GAME m_spelling_dictionary
                    if (spelling::is_correct_spelling($searchterm)==false) {

                        // STEP 5: MAKE SURE term IS NOT IN IGNORE LIST
                        if (spelling::in_ignore_list($searchterm)==false) {
                            // STEP 6: SHOW MISSPELLED TERM AS A LINK TO SPELL CHECKER
                            //$search = "/\b(".$searchterm.")\b/i";
                            //$replace = "<a class='highlight' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$1#spellcheck'>$1</a>";
                            //$cleaned_text = preg_replace($search, $replace, $cleaned_text);
                            $misspelled_list[] = "<a class='highlight' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$searchterm#spellcheck'>$searchterm</a>";
                            $replacement = "<span class='highlight'>$searchterm</span>";
                            $cleaned_text = str_replace($searchterm, $replacement, $cleaned_text);
                        }
                    }
                }
            }
            return $cleaned_text;
        }
    }

   function misspelled_list() {
   //
   // Lists down misspelled words found in cleaned text.
   // input: cleaned text
   // output: unique list of misspelled words
   //
        if (func_num_args()) {
            $arg_list = func_get_args();

            $get_vars = $arg_list[0];
            $cleaned_text = $arg_list[1];

            // STEP 1: TAG cleaned text temporarily
            $tagged_text = tagger::tagtext($cleaned_text, "text");

            // STEP 2: REMOVE end-of-sentence markers, symbols, numbers
            $tagged_text = preg_replace("/(\._\.)|(\!_\.)|(\?_\.)|(:_:)|(-_:)|(;_:)|(\/_SYM)|(-_SYM)|(,_,)|(x_SYM)|([\.0-9]+_MC)/", "", $tagged_text);
            //print_r($tagged_text);

            // STEP 3: TOKENIZE tagged text and make sure they do no repeat
            $tokens = preg_split("/[\s,-]+/", $tagged_text);
            //print_r($tokens);

            for($i=0; $i<count($tokens); $i++) {

                // REMOVE TAGS
                list($searchterm, $tag) = explode("_", $tokens[$i]);

                // LOWER CASE SEARCH TERM
                if (strlen(trim($searchterm))>0) {
                    //$searchterm = strtolower($searchterm);

                    // STEP 4: SEARCH LRAGR TABLE surrogate in GAME m_spelling_dictionary
                    if (spelling::is_correct_spelling($searchterm)==false) {

                        // STEP 5: MAKE SURE term IS NOT IN IGNORE LIST
                        if (spelling::in_ignore_list($searchterm)==false) {
                            // STEP 6: SHOW MISSPELLED TERM AS A LINK TO SPELL CHECKER
                            //$search = "/\b(".$searchterm.")\b/i";
                            //$replace = "<a class='highlight' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$1#spellcheck'>$1</a>";
                            //$cleaned_text = preg_replace($search, $replace, $cleaned_text);
                            $misspelled_list[] = "<a class='ptmenu' href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&spellcheck=$searchterm#spellcheck'>$searchterm</a><br>";
                        }
                    }
                }
            }

            if (count($misspelled_list)>0) {
                return implode("", array_unique($misspelled_list));
            } else {
                return "<font color='red'>None</font>";
            }
        }
    }

    function manage_term() {
        if (func_num_args()) {
            $arg_list = func_get_args();

            $menu_id = $arg_list[0];
            $get_vars = $arg_list[1];
            $post_vars = $arg_list[2];
            $textlab = $arg_list[3];

            if ($post_vars["submitdict"]) {
                spelling::process_manage_term($menu_id, $get_vars, $post_vars, $textlab);
            }
            spelling::form_manage_term($menu_id, $get_vars, $post_vars, $textlab);
        }
    }

    function process_manage_term() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $get_vars = $arg_list[1];
            $post_vars = $arg_list[2];
            $textlab = $arg_list[3];

            print_r($post_vars);

            switch ($post_vars["submitdict"]) {
            case "Change Dictionary":
                //header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&spellcheck=".$post_vars["spellcheck"]."#spellcheck");
                break;
            case "Add to Dictionary":
                if ($post_vars["misspelled"]) {
                    $metaphone = metaphone($post_vars["misspelled"]);
                    $ngram_array = spelling::create_ngram($post_vars["misspelled"], 2);
                    $header = substr($post_vars["misspelled"],0,4);
                    $ngram_string = implode("|", $ngram_array);
                    $sql = "insert into m_spelling_dictaux (word_str, word_metaphone, word_ngram, word_header) ".
                           "values ('".$post_vars["misspelled"]."', '$metaphone', '$ngram_string', '$header')";
                    if ($result = mysql_query($sql)) {
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]);
                    }
                }
                break;
            case "Replace from Correction List":
                if ($get_vars["document_id"] && $post_vars["misspelled"] && $post_vars["correction_list"] && $textlab["cleaned_text"]) {
                    $replacement_base = ($textlab["spellchecked_text"]?$textlab["spellchecked_text"]:$textlab["cleaned_text"]);
                    $spellchecked_text = str_replace($post_vars["misspelled"], $post_vars["correction_list"], $replacement_base);
                    //$search = "/\b([".strtoupper(substr($post_vars["misspelled"],0,1)).strtolower(substr($post_vars["misspelled"],0,1))."])(".substr($post_vars["misspelled"],1,strlen($post_vars["misspelled"])-1).")\b/";
                    //$replace = "$1".substr($post_vars["correction_list"],1,strlen($post_vars["correction_list"])-1);
                    //$spellchecked_text = preg_replace($search, $replace, $replacement_base);
                    $sql = "update m_textlab_document set ".
                           "spellchecked_text = '$spellchecked_text' ".
                           "where document_id = '".$get_vars["document_id"]."'";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                    }
                }
                break;
            case "Replace using Replacement Term":
                if ($get_vars["document_id"] && $post_vars["misspelled"] && $post_vars["replacement"] && $textlab["cleaned_text"]) {
                    $replacement_base = ($textlab["spellchecked_text"]?$textlab["spellchecked_text"]:$textlab["cleaned_text"]);
                    //$spellchecked_text = str_replace($post_vars["misspelled"], $post_vars["replacement"], $replacement_base);
                    $search = "/\b([".strtoupper(substr($post_vars["misspelled"],0,1)).strtolower(substr($post_vars["misspelled"],0,1))."])(".substr($post_vars["misspelled"],1,strlen($post_vars["misspelled"])-1).")\b/";
                    $replace = "$1".substr($post_vars["replacement"],1,strlen($post_vars["replacement"])-1);
                    $spellchecked_text = preg_replace($search, $replace, $replacement_base);
                    $sql = "update m_textlab_document set ".
                           "spellchecked_text = '$spellchecked_text' ".
                           "where document_id = '".$get_vars["document_id"]."'";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        $metaphone = metaphone($post_vars["replacement"]);
                        $ngram_array = spelling::create_ngram($post_vars["replacement"], 2);
                        $header = substr($post_vars["misspelled"],0,4);
                        $ngram_string = implode("|", $ngram_array);
                        $sql_insert = "insert into m_spelling_dictaux (word_str, word_metaphone, word_ngram) ".
                                      "values ('".$post_vars["replacement"]."', '$metaphone', '$ngram_string')";
                        $result_insert = mysql_query($sql_insert);
                    }
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                }
                break;
            case "Add to Ignore List":
                if ($post_vars["misspelled"]) {
                    $sql = "insert into m_spelling_ignorelist (ignore_str) values ('".$post_vars["misspelled"]."')";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        $replacement_base = ($textlab["spellchecked_text"]?$textlab["spellchecked_text"]:$textlab["cleaned_text"]);
                        $sql_update = "update m_textlab_document set ".
                                      "spellchecked_text = '$replacement_base' ".
                                      "where document_id = '".$get_vars["document_id"]."'";
                        $result_update = mysql_query($sql_update);
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                    }
                }
                break;
            case "Cancel":
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]);
                break;
            }
        }
    }

    function form_manage_term() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $get_vars = $arg_list[1];
            $post_vars = $arg_list[2];
            $textlab = $arg_list[3];

            $term = $get_vars["spellcheck"];

            print "<a name='spellcheck'>";
            print "<span class='newstitle'>".FTITLE_SPELLING_CHECK."</span><br>";
            print "<span class='tinylight'>".INSTR_SPELLING_CHECK."</span><br>";
            print "<table width='450' cellpadding='5' cellspacing='1' style='border: 1px solid black'>";
            print "<form action = '".$_SERVER["SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"")."&tab=".$get_vars["tab"]."&spellcheck=".$get_vars["spellcheck"]."#spellcheck' name='form_manage_term' method='post'>";
            print "<tr valign='top'><td>";
            print "<table cellpadding='2' cellspacing='1'>";
            print "<tr><td>";
            print "<span class='boxtitle'>".LBL_SPELLING_SOURCE."</span><br> ";
            $post_vars["spellsource"] = (isset($post_vars["spellsource"])?$post_vars["spellsource"]:"L"); // Lexicon is default source
            print "<input type='radio' name='spellsource' value='P' ".($post_vars["spellsource"]=="P"?"checked":"")."'> PSpell<br>";
            print "<input type='radio' name='spellsource' value='L' ".($post_vars["spellsource"]=="L"?"checked":"")."'> UMLS Lexicon<br>";
            print "</td></tr>";
            print "<tr><td>";
            print "<span class='boxtitle'>".LBL_POSSIBLE_MISPELLED."</span><br> ";
            print "<input type='text' class='textbox' size='20' maxlength='20' name='misspelled' value='".($term?$term:$post_vars["misspelled"])."' style='border: 1px solid #000000'><br>";
            print "</td></tr>";
            print "<tr><td>";
            print "<span class='boxtitle'>".LBL_POSSIBLE_CORRECTIONS."</span><br> ";
            if ($word_list = spelling::word_list($menu_id, $post_vars, $get_vars, $term)) {
                print "<span class='tinylight'>".INSTR_SPELLCORRECTION_LIST."</span><br>";
                print "<select size='10' name='correction_list'>";
                foreach ($word_list as $key=>$value) {
                    print "<option value='$value'>".$value."</option>";
                }
                print "</select>";
            } else {
                print "<font color='red'>None</font>";
            }
            print "</td></tr>";
            print "<tr><td><br>";
            print "<span class='boxtitle'>".LBL_REPLACEMENT_TERM."</span><br> ";
            print "<input type='text' class='textbox' size='20' maxlength='20' name='replacement' value='".($term?$term:$post_vars["misspelled"])."' style='border: 1px solid #000000'><br>";
            print "</td></tr>";
            print "</table>";
            print "</td><td>";
            print "<table cellpadding='2' cellspacing='1'>";
            print "<tr><td>";
            print "<input type='hidden' name='spellcheck' value='".$get_vars["spellcheck"]."'>";
            print "<input type='hidden' name='cleaned_text' value='".$textlab["cleaned"]."'>";
            print "<input type='submit' value = 'Change Dictionary' class='textbox' name='submitdict' title='Changes dictionary used for spelling check.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Add to Dictionary' class='textbox' name='submitdict' title='Adds to the dictionary the word from the MISSPELLED WORD field.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Replace from Correction List' class='textbox' name='submitdict' title='Replaces misspelled word with word selected from the CORRECTION LIST.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Replace using Replacement Term' class='textbox' name='submitdict' title='Replaces misspelled word with word selected from the REPLACEMENT TERM field.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Add to Ignore List' class='textbox' name='submitdict' title='Adds the word in the MISPELLED WORD field to the Ignore list.' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Cancel' class='textbox' name='submitdict' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "</table>";
            print "</td></tr>";
            print "</form>";
            print "</table>";
        }
    }

    function _build_dict() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
        }

        print "<table width='500'>";
        print "<form action = '".$_SERVER["SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"")."' name='form_dumpdata' method='post'>";
        print "<tr valign='top'><td>";
        print "<span class='boxtitle'>".LBL_BUILD_DICT."</span><br><br> ";
        print "This module builds the online dictionary used for checking spelling. It requires that the UMLS LRAGR table ".
              "and WordNet have been set up. This module uses the 2005AB version of the UMLS Lexicon and the 2.0 version of ".
              "Wordnet from Princeton University. To build the dictionary click on <b>Start</b>.";
        print "</td></tr>";
        print "<tr><td><br>";
        print "<input type='submit' value = 'Start' class='textbox' name='submitdict' style='border: 1px solid #000000'> ";
        print "<input type='submit' value = 'Cancel' class='textbox' name='submitdict' style='border: 1px solid #000000'> ";
        print "</td></tr>";
        print "</form>";
        print "</table><br>";

        if ($post_vars["submitdict"] == "Start") {
            // Load LRAGR word list
            print $message = "<font color='red'>Loading LRAGR terms...</font><br>";
            module::execsql("TRUNCATE m_spelling_dictionary");
            if (module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT STR AS word_str FROM umls.LRAGR GROUP BY STR")) {
                $recs1 = spelling::record_count();
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&dictstep=2&recs1=$recs1");
            }
        }
        if ($get_vars["dictstep"]==2 && $get_vars["recs1"]>0) {
            // Load Wordnet word list
            print $message = "<font color='red'>Loaded LRAGR terms...".$get_vars["recs1"]." records.</font><br>";
            print $message = "<font color='red'>Loading Wordnet terms...</font><br>";
            if (module::execsql("INSERT IGNORE INTO m_spelling_dictionary (word_str) ".
                "SELECT word FROM wn.wn_synset;")) {
                $recs2 = spelling::record_count()-$get_vars["recs1"];
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&dictstep=3&recs1=".$get_vars["recs1"]."&recs2=$recs2");
            }
        }
        if ($get_vars["dictstep"]==3 && $get_vars["recs2"]>0) {
            // build special fields (ngram, metaphone, header)
            print $message = "<font color='red'>Loaded LRAGR terms...".$get_vars["recs1"]." records.</font><br>";
            print $message = "<font color='red'>Loaded Wordnet terms...".$get_vars["recs2"]." records.</font><br>";
            print $message = "<font color='red'>Computing metaphones and n-grams...</font><br>";
            if (spelling::init_dictionary()) {
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&dictstep=4&recs1=".$get_vars["recs1"]."&recs2=".$get_vars["recs2"]);
            }
        }
        if ($get_vars["dictstep"]==4) {
            print $message = "<font color='red'>Loaded LRAGR terms...".$get_vars["recs1"]." records.</font><br>";
            print $message = "<font color='red'>Loaded Wordnet terms...".$get_vars["recs2"]." records.</font><br>";
            print $message = "<font color='red'>Metaphones and N-grams complete.</font><br>";
            print $message = "<font color='red'>Dictionary has ".spelling::record_count()." records.</font><br>";
        }

    }

    function init_dictionary() {

        $sql = "SELECT word_id, word_str from m_spelling_dictionary order by word_str";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($id, $word) = mysql_fetch_array($result)) {
                    $compound_word = explode("_", trim($word));
                    if (count($compound_word)>1) {
                        $word = implode(" ", $compound_word);
                    }

                    $metaphone = metaphone($word);
                    $ngram_array = spelling::create_ngram($word, 2);
                    $ngram_string = implode("|", $ngram_array);
                    $header = substr($word, 0,4);
                    /*
                    print "word = $word<br>";
                    print "compound word = $word<br>";
                    print "metaphone = $metaphone<br>";
                    print "ngram= $ngram_string<br>";
                    print "header = $header<br>";
                    */
                    $sql_update = "update m_spelling_dictionary set ".
                                  "word_str = '$word',".
                                  "word_metaphone = '$metaphone', ".
                                  "word_ngram = '$ngram_string', ".
                                  "word_header = '$header' ".
                                  "where word_id = '$id'";

                    $result_update = mysql_query($sql_update);
                }
                // test that the dictionary set up reaches the end
                $sql_metaphone = "select word_metaphone from m_spelling_dictionary where word_str = 'zygomorphic' limit 1";
                if ($result_metaphone = mysql_query($sql_metaphone)) {
                    if (mysql_num_rows($result_metaphone)) {
                        if (list($metaphone) = mysql_fetch_array($result_metaphone)) {
                            if (strlen($metaphone)>0) {
                                return true;
                            }
                        }
                    }
                }
            }
        }
    }

    function record_count() {

        $sql = "select count(*) from m_spelling_dictionary";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($count) = mysql_fetch_array($result);
                return $count;
            }
        }
    }

    function baseform() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $searchterm = $arg_list[0];

            $sql = "select BAS from umls.LRAGR where STR like '$searchterm' limit 10";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    if (list($baseform) = mysql_fetch_array($result)) {
                        return $baseform;
                    }
                }
            }
        }
    }

    function word_list() {
    //
    // This function comes up with a word list of
    // possible corrections for the misspelled word.
    // input: possible misspelled word
    // output: list of corrections as an array
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $term = $arg_list[3];

            $spellsource = ($post_vars["spellsource"]?$post_vars["spellsource"]:$get_vars["spellsource"]); // spell source

            switch ($spellsource) {

            case "L": // Lexicon and Wordnet

                // STEP 1: CREATE N-GRAM AND METAPHONE OF INPUT TERM (DIGRAM)
                $ngram_input = spelling::create_ngram($term, 2);
                $metaphone = metaphone($term);
                $possible_array = array();

                // STEP 2: LOOK UP SIMILAR METAPHONE IN CUSTOM DICTIONARY
                // join main and auxiliary dictionaries
                $sql = "(select word_str, word_ngram, word_metaphone ".
                       "from m_spelling_dictionary ".
                       "where word_metaphone like '$metaphone') ".
                       "union distinct ".
                       "(select word_str, word_ngram, word_metaphone ".
                       "from m_spelling_dictaux ".
                       "where word_metaphone like '$metaphone') ".
                       "order by word_metaphone limit 1000";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        $possible_array = array();
                        while (list($word, $ngram_string, $metaphone) = mysql_fetch_array($result)) {
                            $ngram_db = explode("|", $ngram_string);
                            // STEP 3: COMPARE N-GRAMS AND GET SCORE
                            $score = spelling::compare_ngrams($ngram_db, $ngram_input);
                            // ADD TO LIST IF > 0.3
                            if ($score>0.3) {
                                $possible_array[] .= $word;
                            }
                        }
                    }
                }
                // STEP 4: IF WORD IS MORE THN 3 CHARACTERS GET DICTIONARY TERM WITH
                // SAME HEADER AS INPUT TERM
                if (strlen($term)>=3) {

                    $pattern[0] = "/(".$term.")/";
                    $pattern[1] = "/(wi)/";

                    $replacement[0] = "$1";
                    $replacement[1] = "whi";

                    for($i=0; $i<count($pattern); $i++) {

                        $modified = preg_replace($pattern[$i], $replacement[$i], $term);

                        $sql = "select word_str, word_ngram ".
                               "from m_spelling_dictionary ".
                               "where word_header = substr('$modified', 1,4) limit 10";
                        if ($result = mysql_query($sql)) {
                            if (mysql_num_rows($result)) {
                                while (list($word, $ngram_string) = mysql_fetch_array($result)) {
                                    // STEP 5: COMPARE N-GRAMS AGAIN
                                    $ngram_db = explode("|", $ngram_string);
                                    $score = spelling::compare_ngrams($ngram_db, $ngram_input);
                                    // assume misspell would be + or - 3 characters
                                    $maxlen = strlen($term)+3;
                                    $minlen = strlen($term)-3;
                                    if ($score>0.3 && (strlen($word)<=$maxlen && strlen($word)>=$minlen)) {
                                        // ADD TO LIST IF >0.3
                                        $possible_array[] .= $word;
                                    }
                                }
                            }
                        }
                    }
                }
                // STEP 6: SORT ACCORDING TO LEVENSHTEIN DISTANCE
                $levarray = array();
                $possible_array = array_unique($possible_array);
                foreach($possible_array as $key=>$value) {
                    $lev = levenshtein($term, $value);
                    $levarray[] .= "$lev|$value";
                    //print "$value: $lev<br>";
                }
                sort($levarray);
                foreach($levarray as $key=>$value) {
                    $levsort[] .= preg_replace("/([0-9]{0,2}\|)/","", $value);
                }
                return $levsort;
                break;

            case "P": // PSpell

                $pspell_link = pspell_new("en", "", "", "", PSPELL_NORMAL);
                $suggestions = pspell_suggest($pspell_link, $term);
                return $suggestions;

                break;

            }

        }
    }

    function spellcheck() {
    //
    // other function for spelling
    // not used anymore in application
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $pspell_link = pspell_new("en", "", "", "", PSPELL_FAST);
            $tokens = explode(" ", $inputstring);
            foreach($tokens as $key=>$value) {
                list($word, $tag) = explode("_", $value);
                if ($tag<>"MC") {
                    if (!pspell_check($pspell_link, $word)) {
                        $list = "";
                        $suggestions = pspell_suggest($pspell_link, $word);
                        foreach ($suggestions as $list_item) {
                            $list .= $list_item." ";
                        }
                        $spellchecked[] .= "<font color='red'>".$word."</font>: ".$list;
                    }
                }
            }
            foreach($spellchecked as $key => $value) {
                $retval .= $value."\n";
            }
            return $retval;
        } else {
            print "SYNTAX: spellcheck(inputString String)";
        }
    }

    function is_correct_spelling() {
    //
    // This module uses both the UMLS Lexicon and PSpell.
    //
    // input: potentially misspelled word
    // output: true or false
    //         true correct spelling
    //         false wrong spelling or word not found
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0]; // input string
            //print "$inputstring<br>";

            //-------------------------------------------------------------
            if (strlen(trim($inputstring))<>0) {
                //
                // select from both main and auxiliary dictionary
                //
                $sql = "(select word_str ".
                       "from m_spelling_dictionary ".
                       "where word_str = '$inputstring') ".
                       "union distinct ".
                       "(select word_str ".
                       "from m_spelling_dictaux ".
                       "where word_str = '$inputstring')";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)>0) {
                        if (list($word) = mysql_fetch_array($result)) {
                            //print "$word lex found<br>";
                            return true;
                        } else {
                            $pspell_link = pspell_new("en");
                            if (pspell_check($pspell_link, $inputstring)) {
                                //print "$word pspell found<br>";
                                return true;
                            } else {
                                return false;
                            }
                        }
                    } else {
                        return false;
                    }
                }
            }
            //-------------------------------------------------------------
        }
    }

    function in_ignore_list() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0]; // input string

            if (strlen(trim($inputstring))<>0) {
                $sql = "select ignore_str ".
                       "from m_spelling_ignorelist ".
                       "where lower(ignore_str) like '".strtolower($inputstring)."'";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        if (list($ignore_str) = mysql_fetch_array($result)) {
                            return true;
                        } else {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
            }
        }
    }

    function create_ngram($string, $n) {

    //
    // Author: Mikko Saari
    // Source: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //
        $length = strlen($string) + $n;
        $string = str_pad($string, $length, " ", STR_PAD_BOTH);

        $length = strlen($string) - $n + 1;
        for ($i = 0; $i < $length; $i++) {
            $ngrams[] = substr($string, $i, $n);
        }

        return $ngrams;
    }

    function compare_ngrams($ngram1, $ngram2) {

    //
    // Author: Mikko Saari
    // Source: http://www.melankolia.net/archives/2004/11/ngram_string_ma.html
    //

        $sum = array_unique(array_merge($ngram1, $ngram2));
        $intersection = array_intersect($ngram1, $ngram2);
        $score = count($intersection) / count($sum);
        return $score;
    }
}
?>
