<?
class umlstools extends module {

    //
    // Author: Herman Tolentino (unless specified)
    //
    // AEFI NLP Project
    // Vaccine Analytic Unit - Brighton Collaboration - National Library of Medicine
    //
    // IMPORTANT:
    // This assumes that the UMLS Lexicon tables have been set up and that the LRAGR table
    // has been indexed using STR.
    //

    function umlstools() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "umlstools";
        $this->description = "AEFI Module - UMLS Tools";
        // 0.1: BEGIN

    }

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
        module::set_dep($this->module, "textlab");

    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("LBL_CUI_DETAIL_FOR_TOKEN:", "english", "CUI DETAIL FOR TOKEN", "Y");
        module::set_lang("LBL_CHANGE_TYPE", "english", "TYPE OF CHANGE", "Y");
        module::set_lang("INSTR_CHANGE_TYPE", "english", "SELECT WHETHER CHANGE WILL BE GLOBAL OR TO SPECIFIC ITEM ONLY.", "Y");
        module::set_lang("FTITLE_MANAGE_CUI", "english", "DOCUMENT CONCEPT MANAGER", "Y");
        module::set_lang("INSTR_MANAGE_CUI", "english", "SELECT APPROPRIATE CONCEPT AND PART-OF-SPEECH TAG TO ASSIGN TO TERM. THE FIGURE AFTER THE CUI IS ITS AVERAGE DISTANCE FROM ALL OTHER CONCEPTS IN THE DOCUMENT.", "Y");

    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }


        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }


    }

    function drop_tables() {

    }

    // --------------- CUSTOM MODULE FUNCTIONS ------------------


    function sentence_bigram_inventory() {

        $sql = "select cuitagged_text from m_textlab_document";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                // PROCESS DOCUMENT
                while (list($inputstring) = mysql_fetch_array($result)) {
                    $sentences = preg_split("/(\._\.)|(!_\.)|(\?_\.)|(\n\r)/", $inputstring);
                    // PROCESS SENTENCE
                    // initialize array
                    $cui_array = array();
                    for ($i=0; $i<count($sentences); $i++) {
                        $tokens = explode(" ", $sentences[$i]);
                        for ($j=0; $j<count($tokens); $j++) {
                            // CREATE CUI ARRAY
                            if (preg_match("/(C[0-9]{7})/", $tokens[$j])) {
                                list($term, $tag, $concept) = explode("_", $tokens[$i]);
                                if ($concept<>"C0000000" && $concept<>"") {
                                    $cui_array[] = $concept;
                                }
                            }
                        }
                        // CREATE CUI PAIRS
                        for($j=0; $j<count($cui_array); $j++) {
                            for($k=0; $k<count($cui_array); $k++) {
                                if ($cui_array[$j]<>$cui_array[$k]) {
                                    $concept1 = $cui_array[$j];
                                    $concept2 = $cui_array[$k];
                                    $cui_pair["$concept1"]["$concept2"]++;
                                }
                            }
                        }
                    } // END SENTENCE
                } // END INPUTSTRING
                foreach ($cui_pair as $cui1 => $cui2_array) {
                    foreach($cui2_array as $cui2 => $frequency) {
                        $sql_pair = "select * from m_textlab_bigrams where concept1_id = '$cui1' and concept2_id = '$cui2'";
                        if ($result_pair = mysql_query($sql_pair)) {
                            if (mysql_num_rows($result_pair)) {
                                $sql_update = "update m_textlab_bigrams set ".
                                              "sentence_frequency = $frequency ".
                                              "where concept1_id = '$cui1' and concept2_id = '$cui2'";
                                $result_update = mysql_query($sql_update);
                            }
                        }
                    }
                }
            }
        }

    }

    /**
    *   creates bigram inventory, bigram being concept pairs and
    *   their pair frequency and semantic node distance
    *
    */
    function bigram_inventory() {

        // STEP 1: delete all table contents of the m_textlab_bigrams table
        module::execsql("truncate m_textlab_bigrams");

        // STEP 2: do a recursive query on m_textlab_terms table and get
        // frequency of co-occurrence
        $sql1 = "select t1.concept_id, t2.concept_id, count(t1.concept_id) ".
                "from m_textlab_terms t1, m_textlab_terms t2  ".
                "where t1.concept_id <> t2.concept_id ".
                "and t1.concept_id <> '' and t2.concept_id <> '' ".
                "and t1.concept_id <> 'C0000000' and t2.concept_id <> 'C0000000' ".
                "and t1.document_id = t2.document_id ".
                "group by t1.concept_id, t2.concept_id;";
        if ($result1 = mysql_query($sql1)) {
            if (mysql_num_rows($result1)) {
                while(list($concept1_id, $concept2_id, $frequency) = mysql_fetch_array($result1)) {
                    // STEP 3: determine node distance between 2 concepts
                    $c = new concept();
                    $distance = $c->semantic_distance($concept1_id, $concept2_id);

                    // STEP 4: update m_textlab_bigrams table
                    // do an update or insert
                    $sql_pair = "select * from m_textlab_bigrams where concept1_id = '$concept2_id' and concept2_id = '$concept1_id'";
                    if ($result_pair = mysql_query($sql_pair)) {
                        if (mysql_num_rows($result_pair)) {
                            // do nothing
                        } else {
                            $sql_insert = "insert into m_textlab_bigrams (concept1_id, concept2_id, frequency, distance) ".
                                          "values ('$concept1_id', '$concept2_id', '$frequency', '$distance')";
                            if (!($result_insert = mysql_query($sql_insert))) {
                                $sql_update = "update m_textlab_bigrams set ".
                                              "frequency = frequency + 1, ".
                                              "where concept1_id = '$concept1_id' and concept2_id = '$concept2_id'";
                                $result_update = mysql_query($sql_update);
                            }
                        }
                    }
                }
            }
        }
    }

    function lookup_noun_phrases($tokens, $current_pos, $lookahead_num) {

        list($term0, $tag0, $cui) = explode("_", $tokens[$current_pos]);
        $compound_term = $term0;

        for ($j=1; $j<$lookahead_num; $j++) {
            list($term1, $tag1, $cui1) = explode("_", $tokens[$current_pos+$j]);

            // STEP 3: create compound token
            $compound_term .= " ".$term1;

            // STEP 4: normalize compound term
            $norm_compound = explode(" ", $compound_term);
            if (sort($norm_compound)) {
                $compound_term = implode(" ", $norm_compound);
            }

            // STEP 5: look it up in normalized strings
            $sql = "select NSTR, CUI ".
                   "from umls.MRXNS_ENG ".
                   "where LAT = 'ENG' and NSTR like '".trim(strtolower($compound_term))."'";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    while (list($ns, $cui) = mysql_fetch_array($result)) {
                        if (count(explode(" ", $ns))>1) {
                            //$ns = tagger::tagtext($ns, "text");
                            $phrases[] .= "$cui|$ns";
                        }
                    }
                }
            }

        }
        return $phrases;

    }

    function nounphrase_format() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $inputstring = $arg_list[3];

            $lines = explode(" ", $inputstring);
            $i = 0;
            foreach($lines as $token) {
                $i++;
                list($phrase, $tag, $cui) = explode("_", $token);
                $new_string[] = $phrase."_".$tag."_<a href='".$_SERVER["PHP_SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$phrase."_".$tag."_".$cui."&phr=".$i."#cuidetail'><b>".$cui."</b></a><br>";
            }

            $outputstring = implode("", $new_string);

            return $outputstring;
        }
    }

    function tag_highlight() {
    //
    // input: concept tagged text
    // output: html marked text
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $inputstring = $arg_list[3];
            $text_type = $arg_list[4];

            $tokens = explode(" ", $inputstring);
            $i = 0;
            foreach ($tokens as $item) {
                $i++;
                $search[0] = "/\b([a-zA-Z0-9-]+)(_N)([a-zA-Z0-9_]+)\b/"; // nouns
                $search[1] = "/\b([a-zA-Z0-9]+)(_V)([a-zA-Z0-9_]+)\b/"; // verbs
                $search[2] = "/\b([a-zA-Z0-9]+)(_J)([a-zA-Z0-9_]+)\b/"; // adjectives
                $search[3] = "/\b([a-zA-Z0-9]+)(_R)([a-zA-Z0-9_+]+)\b/"; // adverbs
                //$search[3] = "/\b([a-zA-Z0-9]+)(_I)([a-zA-Z0-9_]+)\b/"; // adverbs
                $search[4] = "/\b([Nn]o_D[_DBC0-9]{0,10})\b/"; // negatives
                $search[5] = "/\b([Uu]nlikely_RR)\b/"; // negatives
                $search[6] = "/\b([Dd]enie[sd]_[_0-9A-Za-z])\b/"; // negatives
                $search[7] = "/\b([Nn]ot_RR)\b/"; // negatives
                $search[8] = "/\b([Ww]ithout_[_0-9A-Za-z])\b/"; // negatives
                $search[9] = "/([a-zA-Z0-9-]+)(_[A-Z]{2,4})(_)(C[0-9]{7})\b/";
                $search[10] = "/\b([0-9H]+_TIME[UDH])\b/";
                $search[11] = "/\b([.^0-9CF]+_TEMP[CF])\b/";

                $replace[0] = "<span style='background-color:#99CCFF'>$1$2$3</span>";
                $replace[1] = "<span style='background-color:#FFCC00'>$1$2$3</span>";
                $replace[2] = "<span style='background-color:#CCFF66'>$1$2$3</span>";
                $replace[3] = "<span style='background-color:#FF6600'>$1$2$3</span>";
                //$replace[3] = "<span style='background-color:#CCCCFF'>$1$2$3</span>";
                $replace[4] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
                $replace[5] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
                $replace[6] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
                $replace[7] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
                $replace[8] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
                if ($text_type=="predicted") {
                    $replace[9] = "$1$2$3$4";
                } else {
                    $replace[9] = "$1$2$3<a href='".$_SERVER["PHP_SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=$1$2$3$4&seq=".$i."&text=$text_type#cuidetail'><b>$4</b></a>";
                }
                $replace[10] = "<span style='background-color:#000000'>&nbsp;<font color='#FFFF00'><b>$1</b></font>&nbsp;</span>";
                $replace[11] = "<span style='background-color:#FF3300'>&nbsp;<font color='#FFFFFF'><b>$1</b></font>&nbsp;</span>";

                $colored_item = preg_replace($search, $replace, $item);

                $colored_text[] = $colored_item;
            }

            $markedup = implode(" ", $colored_text);

            return $markedup;
        }
    }

    function concept_list() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];

            $sql = "select concept_id from m_textlab_terms where concept_id <> '' and document_id = '".$get_vars["document_id"]."'";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    $i = 0;
                    $concept_list = "";
                    while (list($concept) = mysql_fetch_array($result)) {
                        $i++;
                        $num = str_pad($i, 3, "0", STR_PAD_LEFT);
                        $concept_list .= "<a href='".$_SERVER["PHP_SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cui=".$concept."' class='groupmenu'>$concept</a><br>";
                    }
                }
            }
            return $concept_list;
        }
    }

    function token_count() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $list = preg_match_all("/(_[NVDJRI])/", $inputstring, $matches);
            return count($matches[0]);
        }
    }

    function concept_count() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $list = preg_match_all("/(_)(C[0-9]{7})\b/", $inputstring, $matches);
            return count($matches[2]);
        }
    }

    function false_concept_count() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $list = preg_match_all("/(_)(C[0]{7})\b/", $inputstring, $matches);
            return count($matches[0]);
        }
    }

    function cuilinks() {
    // input is concept tagged text
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $concept_text = $arg_list[3];

            $tokens = explode(" ", $concept_text);
            //print_r($tokens);
            for($i=0; $i<count($tokens); $i++) {
                if (preg_match("/_C/", $tokens[$i])) {
                    $tokens[$i] = preg_replace("/^([a-zA-Z0-9_]+_(C[0-9]{7}))$/", "<a href='".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&token=$1#cuidetail'>$1</a>", $tokens[$i]);
                }
            }
            $tokens_merged = implode(" ", $tokens);
            return $tokens_merged;
        }
    }


    function inventory($document_id) {

        $sql = "select * from m_textlab_terms where document_id = '$document_id' limit 1";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                return "YES";
            }
        }
        return "NO";
    }

    function lexicon_lookup($term) {

        $sql = "select EUI, STR, SCA, AGR, BAS from umls.LRAGR where STR = '$term'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                print "<table width='100%' cellpadding='2' cellspacing='0'>";
                print "<tr valign='top' bgcolor='#CCFF99'><td>EUI</td><td>STR</td><td>SCA</td><td>AGR</td><td>BAS</td></tr>";
                while (list($eui, $str, $sca, $agr, $bas) = mysql_fetch_array($result)) {
                    print "<tr><td class='tabletext'>$eui</td>".
                          "<td class='tabletext'>$str</td>".
                          "<td class='tabletext'>$sca</td>".
                          "<td class='tabletext'>$agr</td>".
                          "<td class='tabletext'>$bas</td></tr>";
                }
                print "</table>";
            }
        }
    }

    function mrdef_lookup($cui) {
        $sql = "select SAB, DEF ".
               "from umls.MRDEF ".
               "where CUI = '$cui' ";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($source, $definition) = mysql_fetch_array($result)) {
                    return "DEFINITION: $definition [SOURCE: $source]<br>";
                }
            }
        }
    }

    function wordnet_lookup($term) {

        $sql = "select s.word, s.sense_number, s.ss_type, g.gloss ".
               "from wn.wn_synset s, wn.wn_gloss g ".
               "where s.synset_id = g.synset_id and s.word = '$term' ".
               "order by s.sense_number";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                print "<table width='100%' cellpadding='2' cellspacing='0'>";
                print "<tr valign='top' bgcolor='#CCFF99'><td>WORD</td><td>SENSE</td><td>TYPE</td><td>GLOSSARY</td></tr>";
                while (list($word, $sense, $type, $gloss) = mysql_fetch_array($result)) {
                    print "<tr valign='top'>".
                          "<td class='tabletext'>$word</td>".
                          "<td class='tabletext'>$sense</td>".
                          "<td class='tabletext'>$type</td>".
                          "<td class='tabletext'>$gloss</td></tr>";
                }
                print "</table>";
            }
        }
    }


    function xml_wrap($inputstring, $phrases) {

        $xml .= "<filtered>\n";
        $xml .= "<sentence>\n";
        $xml .= "$inputstring\n";
        $xml .= "</sentence>\n";
        $xml .= "<phrases>\n";
        if (count($phrases)>0) {
            foreach ($phrases as $key=>$value) {
                $xml .= "<phrase_item>$value</phrase_item>\n";
            }
        }
        $xml .= "</phrases>\n";
        $xml .= "</filtered>";

        return $xml;
    }

    function filter() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $anchor[0] = "/\b([Rr]egard)(ing)?(_)/";
            $anchor[1] = "/\b([Ee]xperience)(d|s)?(_)/";
            $anchor[2] = "/\b([Rr]eport)(s|ed)?(_)/";
            $anchor[3] = "/\b([Cc]omplain)(s|ed|t)(_)/";
            $anchor[4] = "/\b([Ff]ollow)(s|ing|ed)(_)/";
            $anchor[5] = "/\b([Nn]otic)(e|ing|ed)(_)/";
            $anchor[6] = "/\b(([Ff]eel)(s|ing)|(felt))(_)/";
            $anchor[7] = "/\b([Nn]ot)(ed|es|ing)(_)/";
            $anchor[8] = "/\b([Cc]onfirm)(ed|s|ing)?(_)/";
            $anchor[9] = "/\b([Ss]tate)(ed|s)?(_)/";
            $anchor[10] = "/\b([Dd]en)(y|ies|ied)(_)/";
            $anchor[11] = "/\b([Ii]ndicative)(_)/";
            $anchor[12] = "/\b([Oo]currence)(_)/";
            $anchor[13] = "/\b([Ss]ymptom)(s|atology)?(_)/";
            $anchor[13] = "/\b([Ss]ign)(s)?(_)/";
            $anchor[14] = "/\b([Dd]ifficult)(y)?(_)/";
            $anchor[15] = "/\b([Nn])(o)(_)/";
            $anchor[16] = "/\b([Uu]n)?([Ll]ikely)(_)/";
            $anchor[17] = "/\b([Tt]here_EX )(is_VBZ)(_C[0-9]{7})?/";
            $anchor[18] = "/\b(injection_NN_C0021485 site_NN_C0205145)/";
            $anchor[19] = "/\b([Pp])(ossible)\b/";

            $sentences = preg_split("/(\._\.)|(!_\.)|(\?_\.)|(\n\r)/", $inputstring);
            $sentences = array_unique($sentences);
            //print_r($sentences);

            for ($i=0; $i<count($sentences); $i++) {
                for ($j=0; $j<count($anchor); $j++) {
                    // STEP 1: GET ALL THE SENTENCES WITH ANCHOR WORDS
                    if (preg_match_all($anchor[$j], $sentences[$i], $matches)) {

                        // STEP 2: TAKE OUT SYMBOLS, REMAINING PUNCTUATION MARKS

                        $searches[0] = "/(-_:)/";
                        $searches[1] = "/(:_:)/";
                        $searches[2] = "/(-_SYM)/";
                        $searches[3] = "/(,_,)/";
                        $searches[4] = "/\b(A-Z]+[a-z])(_NN)\b/";
                        $searches[5] = "/([a-zA-Z]+)(_PN[GDR]?)/";
                        $searches[6] = "/([0-9.]+)(_MC)/";

                        $replacements[0] = "";
                        $replacements[1] = "";
                        $replacements[2] = "";
                        $replacements[3] = "";
                        $replacements[4] = "***$2";
                        $replacements[5] = "***$2";
                        $replacements[6] = "@@@$2";

                        $scrubbed = preg_replace($searches,$replacements, $sentences[$i]);
                        $filtered_sentences[] .= $scrubbed;

                    }
                }
            }
            $filtered_sentences = array_unique($filtered_sentences);
            foreach($filtered_sentences as $key=>$value) {
                $filtered_text .= umlstools::phrase_detect_inline($value);
            }

            return $filtered_text;
        }
    }

    function colorlevel($level) {

        switch ($level) {
        case 1:
            $color = "#FFFFCC";
            break;
        case 2:
            $color = "#FFFF99";
            break;
        case 3:
            $color = "#FFCC00";
            break;
        case 4:
            $color = "#FF9900";
            break;
        case 5:
            $color = "#FF6633";
            break;
        default:
            $color = "#FFFFFF";
        }
        return $color;
    }

    function nominalize() {
    //
    // takes in one phrase/sentence at a time
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            //print "[$inputstring]<br>";

            // tokenize
            $tokens = preg_split("/[\s]+/", $inputstring);
            $maxtokens = 1; // look ahead x tokens
            $phrases = array();

            for($i=0; $i<count($tokens)-$maxtokens; $i++) {

                // STEP 1: get base token
                $base_token = explode("_", $tokens[$i]);

                if (preg_match("/JJ/", $base_token[1])) {

                    $nom_array = umlstools::get_nominal_term($base_token[0]);

                    if (count($nom_array)>0) {
                    foreach ($nom_array as $key=>$nom) {
                        $compound_term = $nom;

                        for ($j=1; $j<=$maxtokens; $j++) {
                            $next_token = explode("_", $tokens[$i+$j]);

                            $compound_term .= " ".$next_token[0];
                            //print "$compound_term<br>";

                            // STEP 4: normalize compound term
                            $compound_term = trim($compound_term);
                            $norm_compound = explode(" ", $compound_term);

                            if (count($norm_compound)>1) {
                                // STEP 5: look it up in normalized strings
                                $sql = "select NSTR, CUI ".
                                       "from umls.MRXNS_ENG ".
                                       "where LAT = 'ENG' and NSTR like '".trim(strtolower($compound_term))."' ".
                                       "group by NSTR, CUI";
                                //print "<br>";
                                if ($result = mysql_query($sql)) {
                                    if (mysql_num_rows($result)) {
                                        while (list($umls_string, $umls_cui) = mysql_fetch_array($result)) {
                                            $phrases[] = $compound_term;
                                        }
                                        $phrases = array_unique($phrases);
                                        $search = "/(".$base_token[0]."_JJ)/";
                                        $replace = $nom."_NN";
                                        $inputstring = preg_replace($search, $replace, $inputstring);
                                    }
                                }
                            }

                        } // $j loop


                    } // foreach
                    }

                 }
            } // $i loop
            return $inputstring;
        }
    }

    function get_nominal_term($searchterm) {

        $sql = "select n.BAS1 from umls.LRNOM n, umls.LRAGR a ".
               "where a.EUI = n.EUI2 and n.SCA2 = 'adj' and n.BAS2 = '".$searchterm."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($nom) = mysql_fetch_array($result)) {
                    $nom_array[] = $nom;
                }
                return $nom_array;
            }
        }
    }

    function scrubber() {
    //
    // STAGE 1 TRANSFORMATION
    // Uses staged cleaning with regular expressions
    // Input: Raw Text
    // Output: Scrubbed Text
    //
    // IMPORTANT: Unicode can't be handled by regular expressions. If you encounter
    // bugs in quotes removal, suspect unicode quotes. Just change the text to regular
    // ASCII quotes.
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $raw_text = $arg_list[0];

            // --------------------------------------
            // SET A: UNITS OF MEASUREMENT

            $patterns[0] = "/\b([0-9\.]+)(ml)\b/";
            $patterns[1] = '/(\d+)(")(\s{0,1})(X|x)(\s{0,1})(\d+)(")/'; // n inch by n inch
            $patterns[2] = '/(\d+)(")/';
            $patterns[3] = "/\b(degr[es]+)(\s{1})([Cc])([., ])/";
            $patterns[4] = "/\b(degr[es]+)(\s{1})([Ff])([., ])/";
            $patterns[5] = "/\b(q)(\s{0,1})([0-9]+)(\s{0,1})([Hh])/";
            $patterns[6] = "/\b((\({0,1})[0-9]+)(\))/";
            $patterns[7] = "/( #'s )/";
            $patterns[8] = "/([.0-9]+)([cm][glm])/";
            $patterns[9] = "/\b([0-9]+)([Xx])/";
            $patterns[10] = "/\b([Xx])([0-9]+)\b/";
            $patterns[11] = "/\b(area)(\s{0,1})(measured|measuring|size){0,1}(\s{0,1})([Ww])(=|-)([0-9]+ ?[0-9]?\/?[0-9]?)(\s{0,1})(cm|mm|inches|inches)([,]{0,1})(\s{0,1})([Ll])(=|-)([0-9]+ ?[0-9]?\/?[0-9]?)(\s{0,1})(cm|mm|inches|inches)/";
            $patterns[12] = "/\b(area)(\s{0,1})(measured|measuring|size){0,1}(\s{0,1})([Ll])(=|-)([0-9]+ ?[0-9]?\/?[0-9]?)(\s{0,1})(cm|mm|inches|inches)([,]{0,1})(\s{0,1})([Ww])(=|-)([0-9]+ ?[0-9]?\/?[0-9]?)(\s{0,1})(cm|mm|inches|inches)/";
            $patterns[13] = "/\b(\s+)([.])([0-9]+)/";
            $patterns[14] = "/\b([0-9]+)(\s{0,1})(m[Ll])/";

            $replacements[0] = "$1 $2";
            $replacements[1] = "$1-inch by $6-inch";
            $replacements[2] = "$1 inches ";
            $replacements[3] = "$1 Celsius$4";
            $replacements[4] = "$1 Fahrenheit$4";
            $replacements[5] = "every $3 $5ours";
            $replacements[6] = "$1$2 - ";
            $replacements[7] = " numbers ";
            $replacements[8] = "$1 $2";
            $replacements[9] = "$1 times";
            $replacements[10] = "$1 $2";
            $replacements[11] = "$1$2$3$4width$6$7$8$9$10$11length$13$14$15$16$17";
            $replacements[12] = "$1$2$3$4length$6$7$8$9$10$11width$13$14$15$16$17";
            $replacements[13] = " 0.$3";
            $replacements[14] = "$1 $3";

            $corrected = preg_replace($patterns, $replacements, $raw_text);

            // --------------------------------------
            // SET B: QUOTED STUFF

            $patterns[0] = '/(\s{0,1})(-\>)(\s{0,1})/'; // -> symbol
            $patterns[1] = "/(\(-\))/";
            $patterns[2] = '/(\()(S|s)(\))/'; // S in parentheses
            $patterns[3] = "/\b([A-Za-z]+)('s)\b/";
            $patterns[4] = "/\b(W|w)(on't)\b/";
            $patterns[5] = "/\b(D|d)(oes|o)(n't)\b/";
            $patterns[6] = "/(@)/";
            $patterns[7] = "/(\s+)(\+)([0-9]+)/";
            $patterns[8] = "/\b(C|c)(an't)\b/";
            $patterns[9] = "/\b(D|d)(id)(n't)\b/";
            $patterns[10] = "/\b(D|d)(\/)(c'd)\b/";
            $patterns[11] = "/\b(B|b)(abe's)\b/";
            $patterns[12] = "/\b(C|c)(ont'd)\b/";
            $patterns[13] = "/\b(C|c)(ouldn't)\b/";
            $patterns[14] = "/\b(W|w)(ouldn't)\b/";
            $patterns[15] = "/\b(H|h)(asn't)\b/";
            $patterns[16] = "/\b(H|h)(aven't)\b/";
            $patterns[17] = "/\b(W|w)(asn't)\b/";
            $patterns[18] = "/\b(W|w)(eren't)\b/";
            $patterns[19] = "/\b(I|i)(sn't)\b/";
            $patterns[20] = "/\b(A|a)(ren't)\b/";
            $patterns[21] = "/\b(S|s)(houldn't)\b/";
            $patterns[22] = "/\b([Aa])(woke)(\s+)(=)(\s+)/";
            $patterns[23] = "/(=)(\s{0,1})([0-9]+)/";
            $patterns[24] = "/(=\>|\>=)(\s{0,1})([0-9]+)/";
            $patterns[25] = "/\b(I'd)\b/";
            $patterns[26] = "/((\+){2})/";

            $replacements[0] = "$1- $3";
            $replacements[1] = "negative";
            $replacements[2] = "$2";
            $replacements[3] = "$1";
            $replacements[4] = "$1ould not";
            $replacements[5] = "$1$2 not";
            $replacements[6] = "at";
            $replacements[7] = "$1more than $3";
            $replacements[8] = "$1annot";
            $replacements[9] = "$1id not";
            $replacements[10] = "$1ischarged";
            $replacements[11] = "$1aby's";
            $replacements[12] = "$1ontinued";
            $replacements[13] = "$1ould not";
            $replacements[14] = "$1ould not";
            $replacements[15] = "$1as not";
            $replacements[16] = "$1ave not";
            $replacements[17] = "$1as not";
            $replacements[18] = "$1ere not";
            $replacements[19] = "$1s not";
            $replacements[20] = "$1re not";
            $replacements[21] = "$1hould not";
            $replacements[22] = "$1 around ";
            $replacements[23] = " equal to $3";
            $replacements[24] = " more than or equal to $3";
            $replacements[25] = "I would";
            $replacements[26] = "moderately";

            $corrected = preg_replace($patterns, $replacements, $corrected);


            $patterns[0] = "/(\s+)(-)([a-zA-Z]+)/"; // phrases that begin with dashes
            $patterns[1] = "/(\()/";
            $patterns[2] = "/(\))/";
            $patterns[3] = "/(\*)/";
            $patterns[4] = "/(#)([0-9]+)\b/";
            $patterns[5] = "/(~)/";
            $patterns[6] = "/( >)(\s{0,1})([0-9]+)\b/";
            $patterns[7] = '/(\()([\sa-zA-Z0-9]+)(\))/'; // anything enclosed in parentheses

            $replacements[0] = "$1$2 $3";
            $replacements[1] = "";
            $replacements[2] = "";
            $replacements[3] = "-";
            $replacements[4] = "number $2";
            $replacements[5] = "around";
            $replacements[6] = " more than $3";
            $replacements[7] = "$2";

            $corrected = preg_replace($patterns, $replacements, $corrected);


            $patterns[0] = '/(")/'; // unquote quoted words
            $patterns[1] = "/(')/";

            $replacements[0] = "";
            $replacements[1] = "";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // --------------------------------------
            // SET C: NUMBER ISSUES

            $patterns[1] = "/\b(1st)\b/";
            $patterns[2] = "/\b(2nd)\b/";
            $patterns[3] = "/\b(3rd)\b/";
            $patterns[4] = "/\b([Bb][Ii][Dd])\b/";
            $patterns[5] = "/\b([Tt][Ii][Dd])\b/";

            $replacements[1] = "first";
            $replacements[2] = "second";
            $replacements[3] = "third";
            $replacements[4] = "twice a day";
            $replacements[5] = "three times a day";


            // ---------------------
            // SET D: MEDICAL ABBREVIATIONS

            $patterns[0] = "/\b([Ww]ith|[Ww]\/)(\s+)([Cc])(\/)([Oo])/";
            $replacements[0] = "with complaint of";
            $corrected = preg_replace($patterns, $replacements, $corrected);

            $patterns[0] = '/\b(re)([:]{0,1})(\s{0,1})\b/';
            $patterns[1] = '/\b([Cc])(\/)(o)\b/';
            $patterns[2] = '/&/';
            $patterns[3] = '/( (L|l)t?)(\.{0,1})(\s+)/';
            $patterns[4] = '/( (R|r)t?)(\.{0,1})(\s+)/';
            $patterns[5] = '/\b(Hep|hep)(\.{0,1})(\s{0,1})([A-Z]{0,1})\b/';
            $patterns[6] = "/\b(d\/c)(\s{1})(to)/";
            $patterns[7] = "/\b([Ii])(mm)(n{0,1})(s{0,1})(\s+|[.,])/";
            $patterns[8] = "/\b(I)(\.{0,1})(M)(\.{0,1})(\s+)\b/";
            $patterns[9] = "/\b([Ii])(\.{0,1})([Vv])(\.{0,1})(\s+)/";
            $patterns[10] = "/\b([Pp]\/c)(\s{1})(to)\b/";
            $patterns[11] = "/\b(ER)\b/";
            $patterns[12] = "/\b(D|d)(x)\b/";
            $patterns[13] = "/\b(D)(r)(s?)(\.?)('?)(\s{1})/";
            $patterns[14] = "/\b(M|m)(om)\b/";
            $patterns[15] = "/\b(O2 )([Ss]ats? )/";
            $patterns[16] = "/\b(RN)\b/";
            $patterns[17] = "/\b(P)(-)([0-9]{2,3})\b/";
            $patterns[18] = "/\b(R|r)(esp)(\.?)(\s?)([Ss]ystem)/";
            $patterns[19] = "/\b(H|h)(osp)(\.{0,1})\b/";
            $patterns[20] = "/\b(T|t)(el\.{0,1})(\s{1})(call)\b/";
            $patterns[21] = "/\b(B)(\.{0,1})(P)(\.{0,1})(\s+)/";
            $patterns[22] = "/\b(E|e)(merg)(\.?)\b/";
            $patterns[23] = "/\b(T\/P)|(TPC)|(T\/C)\b/";
            $patterns[24] = "/\b(P\.?H\.?N\.?)\b/";
            $patterns[25] = "/\b(T|t)(emp)\b/";
            $patterns[26] = "/\b(D|d)(ad)\b/";
            $patterns[27] = "/(given )(in |on )(LA)/";
            $patterns[28] = "/(given )(in |on )(RA)/";
            $patterns[29] = "/\b(H|h)(x)\b/";
            $patterns[30] = "/(A|a)(ccdg\.{0,1} to)/";
            $patterns[31] = "/\b(P|p)(t)(\.*)/";
            $patterns[32] = "/\b(P|p)(\.?)(\/?)(C|c)(\.?)/";
            $patterns[33] = "/\b(F\.?)(\s?)(Doctor)/";
            $patterns[34] = "/\b(A|a)(pprox)([.]{0,1})(\s+)/";
            $patterns[35] = "/\b(I|i)(nj)([.]{0,1})(\s+)/";
            $patterns[36] = "/\b([Hh])([Aa])(,?)(\s?)([Nn])(,?)(\s?)([Vv])/";
            $patterns[37] = "/\b(Resps)\b/";
            $patterns[38] = "/\b(resps)\b/";
            $patterns[39] = "/\b([Pp]oss)\b/";
            $patterns[40] = "/\b([Ss])(x)\b/";
            $patterns[41] = "/\b(on\+off)\b/";
            $patterns[42] = "/\b(paed)(\s{1})(dept)/";
            $patterns[43] = "/\b(paed)\b/";
            $patterns[44] = "/\b([Tt]yl)(\s+)(supp|rectal|with codeine)/";
            $patterns[45] = "/\b(neg)\b/";
            $patterns[46] = "(or\/and)";
            $patterns[47] = "/\b([Dd]ep)(t)(\.{0,1})\b/";
            $patterns[48] = "/\b(Enc to have)/";
            $patterns[49] = "/\b([Rr]espiration)( N at )([0-9]+)/";
            $patterns[50] = "/(B)(\.{0,1})(W)(\.{0,1})/";
            $patterns[51] = "/\b(P|p)(ed)(\.{0,1})(\s{1})|(p)(eads?)(\.{0,1})(\s{1})/";
            $patterns[52] = "/\b(reaction to )(vac)(\.{0,1})(\s{1})/";
            $patterns[53] = "/\b(R|r)(xn)(s)(\s+|[.,])/";
            $patterns[54] = "/(in |on )(his |her )(RA)/";
            $patterns[55] = "/(in |on )(his |her )(LA)/";
            $patterns[56] = "/\b(H|h)(as )(been )(diag\. )(with)/";
            $patterns[57] = "/\b(2ry)\b/";
            $patterns[58] = "/\b(B|b)(abe)\b/";
            $patterns[59] = "/\b([Aa])(pp)(t{0,1})(\s+|[.,])/";
            $patterns[60] = "/(B|b)(ab[ey])(\w{0,1})( BF)/";
            $patterns[61] = "/\b(to )(BF)/";
            $patterns[62] = "/( has )(\w+)( BF)/";
            $patterns[63] = "/\b(prior to )(immun\.? )/";
            $patterns[64] = "/\b(M|m)(eds)\b/";
            $patterns[65] = "/\b(dose )(for )(wt\.? )/";
            $patterns[66] = "/\b(pre)([ -]?)(med )(with )/";
            $patterns[67] = "/\b(neuro )(changes)/";
            $patterns[68] = "/\b(^blood)(\s+)(shot)/";
            $patterns[69] = "/ (M\.M\.R\.)/";
            $patterns[70] = "/\b([Aa])(mt)\b/";
            $patterns[71] = "/\b([Gg])([Rr])( )([0-9]{1,2})/";
            $patterns[72] = "/\b([Aa]fter )([Ii])(mm)\b/";
            $patterns[73] = "/\b([Bb])(ld)\b/";
            $patterns[74] = "/\b([Bb])(\/[wn])\b/";
            $patterns[75] = "/\b([Dd])(\/[Cc])\b/";
            $patterns[76] = "/(T )(taken|obtained)/";
            $patterns[77] = "/\b([Oo])(2)\b/";
            $patterns[78] = "/\b([Pp]hone )(call )(msg )/";
            $patterns[79] = "/\b(on )(the )(abd )\b/";
            $patterns[80] = "/\b([Tt])(x )(with|w\/)( )/";
            $patterns[81] = "/\b([Uu])(\/)([Ss])\b/";
            $patterns[82] = "/\b([Ss]een )(by )([Dd])(r\. )/";
            $patterns[83] = "/\b([Ff])(\/[Uu] )(with )([Dd])(r\.? )/";
            $patterns[84] = "/\b([0-9]{1})(\s{0,1})(tsp )/";
            $patterns[85] = "/\b([Ss]aw )([Dd])(r\.{0,1})([,-]{0,1})/";
            $patterns[86] = "/\b(degrees )([Ff])(\s+|[,])/";
            $patterns[87] = "/\b(degrees )([Cc])(\s+|[,])/";
            $patterns[88] = "/\b(had )(a ){0,1}([Bb])([Mm])(\s+)/";
            $patterns[89] = "/\b(take)(\s{0,1})(\w{0,1})(\s{0,1})(T)([., ])/";
            $patterns[90] = "/\b(Emerg|Emergency)(\s+)([Dd])(oc)(\s+)/";
            $patterns[91] = "/\b(axi)(\s+)([0-9.]+)(\s+)(degrees)/";
            $patterns[92] = "/\b(seen in)(\s+)([Ee])(\.?)([Rr])(\.?)(\s+)/";
            $patterns[93] = "/\b([Ii])([Mm])([Pp])(:{0,1})(\s+)(unlikely|is)(\s+)/";
            $patterns[94] = "/\b([Rr]efer)(red){0,1}(\s+)(to)(\s+)(MD)/";
            $patterns[95] = "/\b([Aa]ssoc)('d){0,1}(\s+)(with|w|w\/)(\s+)/";
            $patterns[96] = "/\b([Aa])(bdo?\.?)(\s+)(pain)(\s+)/";
            $patterns[97] = "/\b(assocd)\b/";
            $patterns[98] = "/\b([Hh])([Ss])(\s+)(normal)(\s+|[,.])\b/";
            $patterns[99] = "/\b([Aa])(bd\.?)(\s+)(soft)(\s+|[,.])/";
            $patterns[100] = "/\b([Vv])(ag)(\s+)([Dd]ischarge)(\s+|[.,])/";
            $patterns[101] = "/\b([Tt])(C)(\s+)(from)/";
            $patterns[102] = "/\b(large|small|big)(\s+)(amts)/";
            $patterns[103] = "/\b(SOB)\b/";
            $patterns[104] = "/\b(every)(\s+)([0-9]+)(\s+)(h)(\s+|[.,])/";
            $patterns[105] = "/\b([Nn])(euro)(\s+)(consult)/";
            $patterns[106] = "/\b(YF)\b/";
            $patterns[107] = "/\b(ORS)\b/";
            $patterns[108] = "/\b([H])(\/{0,1})([A])(s?)(\s+|[.,-])/";
            $patterns[109] = "/\b([Dd])(\/)([Tt])\b/";
            $patterns[110] = "/\b([Tt])(x)(\s+|[.])/";

            $replacements[0] = "regarding$3 ";
            $replacements[1] = "$1omplained of";
            $replacements[2] = "and";
            $replacements[3] = " $2eft$4";
            $replacements[4] = " $2ight$4";
            $replacements[5] = "$1atitis $4";
            $replacements[6] = "discharge $3";
            $replacements[7] = "$1mmunization$4 ";
            $replacements[8] = "$1ntramuscular$5";
            $replacements[9] = "$1ntravenous$5";
            $replacements[10] = "Phone call $3";
            $replacements[11] = "Emergency Room";
            $replacements[12] = "$1iagnosis";
            $replacements[13] = "$1octor$3$6";
            $replacements[14] = "$1other";
            $replacements[15] = "Oxygen saturation ";
            $replacements[16] = "Registered Nurse";
            $replacements[17] = "$1ressure is $3";
            $replacements[18] = "$1espiratory $5";
            $replacements[19] = "$1ospital";
            $replacements[20] = "$1elephone $4";
            $replacements[21] = "$1lood $3ressure$5";
            $replacements[22] = "$1mergency$3";
            $replacements[23] = "Telephone call";
            $replacements[24] = "Public Health Nurse";
            $replacements[25] = "$1emperature";
            $replacements[26] = "father";
            $replacements[27] = "given $2 Left Arm";
            $replacements[28] = "given $2 Right Arm";
            $replacements[29] = "$1istory";
            $replacements[30] = "$1ccording to";
            $replacements[31] = "$1atient";
            $replacements[32] = "$1hone call";
            $replacements[33] = "Family Doctor";
            $replacements[34] = "$1pproximately ";
            $replacements[35] = "$1njection ";
            $replacements[36] = "$1eadache$3$4$5ausea$6$7$8omiting";
            $replacements[37] = "Breathes";
            $replacements[38] = "breathes";
            $replacements[39] = "$1ible";
            $replacements[40] = "$1ymptoms";
            $replacements[41] = "on and off";
            $replacements[42] = "pediatric department";
            $replacements[43] = "pediatrician";
            $replacements[44] = "$1enol $2$3";
            $replacements[45] = "negative";
            $replacements[46] = "and/or";
            $replacements[47] = "$1artmen$2$3";
            $replacements[48] = "Encouraged to have";
            $replacements[49] = "$1 normal at $3";
            $replacements[50] = "$1lood $3ork";
            $replacements[51] = "pediatrician ";
            $replacements[52] = "$1vaccination$3$4";
            $replacements[53] = "$1eaction$3$4";
            $replacements[54] = "$1$2right arm";
            $replacements[55] = "$1$2left arm";
            $replacements[56] = "$1$2$3diagnosed $5";
            $replacements[57] = "secondary";
            $replacements[58] = "$1aby";
            $replacements[59] = "$1ppointment ";
            $replacements[60] = "$1$2$3 breastfed";
            $replacements[61] = "$1 breastfeed";
            $replacements[62] = "$1$2 breastfed";
            $replacements[63] = "$1immunization ";
            $replacements[64] = "$1edications";
            $replacements[65] = "$1$2weight ";
            $replacements[66] = "premedicate $4";
            $replacements[67] = "neurologic $2";
            $replacements[68] = "vaccination";
            $replacements[69] = " MMR";
            $replacements[70] = "$1mount";
            $replacements[71] = "$1rade $4";
            $replacements[72] = "$1 $2mmunization";
            $replacements[73] = "$1lood";
            $replacements[74] = "$1etween";
            $replacements[75] = "$1ischarge";
            $replacements[76] = "temperature $2";
            $replacements[77] = "$1xygen";
            $replacements[78] = "$1$2message ";
            $replacements[79] = "$1$2abdomen ";
            $replacements[80] = "$1reated $3$4";
            $replacements[81] = "$1ltrasound";
            $replacements[82] = "$1$2$3octor ";
            $replacements[83] = "$1ollow up $3 $4octor ";
            $replacements[84] = "$1 teaspoon ";
            $replacements[85] = "$1 $2octor $4";
            $replacements[86] = "$1$2ahrenheit$3";
            $replacements[87] = "$1$2elsius$3";
            $replacements[88] = "$1 $3owel $4ovement $5";
            $replacements[89] = "$1$2$3$4temperature$6";
            $replacements[90] = "$1$2doctor$5";
            $replacements[91] = "axilla $2$3$4$5$6";
            $replacements[92] = "$1$2$3mergency $5oom$7";
            $replacements[93] = "$1mpression$4$5$6$7";
            $replacements[94] = "$1$2$3$4$5doctor";
            $replacements[95] = "$1iated$3$4$5";
            $replacements[96] = "$1bdominal pain$5";
            $replacements[97] = "associated";
            $replacements[98] = "$1eart sounds $4$5";
            $replacements[99] = "$1bdomen $4$5";
            $replacements[100] = "$1aginal discharge$5";
            $replacements[101] = "$1elephone call $4";
            $replacements[102] = "$1$2amounts";
            $replacements[103] = "shortness of breath";
            $replacements[104] = "$1 $3 hours$4";
            $replacements[105] = "$1eurology $4";
            $replacements[106] = "Yellow Fever";
            $replacements[107] = "Oculo-respiratory Syndrome";
            $replacements[108] = "$1eadache$4$5";
            $replacements[109] = "due to";
            $replacements[110] = "$1reated ";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // Shorter terms that will not be caught up there.

            $patterns[0] = "/\b([Rr])(esp)(\.?)\b/";
            $patterns[1] = "/\b(RUQ)\b/";
            $patterns[2] = "/\b(PO)|([Pp]\.[Oo]\.)/";
            $patterns[3] = "/\b(EP)\b/";
            $patterns[4] = "/\b(C|c)(t\.?)(\s{0,1})\b/";
            $patterns[5] = "/\b([Bb])((\/)|([Pp]))([Mm])\b/";
            $patterns[6] = "/\b(FP)\b/";
            $patterns[7] = "/\b(C|c)(k)\b/";
            $patterns[8] = "/\b(C|c)(kd)\b/";
            $patterns[9] = "/\b([Ss])(\s{0,1})(\+)(\s{0,1})([Ss])/";
            $patterns[10] = "/\b(A|a)(cc to)\b/";
            $patterns[11] = "/\b([Ff])(\/)([Uu])\b/";
            $patterns[12] = "/( \+ )/";
            $patterns[13] = "/\b([Ll]ot)(#)/";
            $patterns[14] = "/\b(p\/u)\b/";
            $patterns[15] = "/\b(f\/u)\b/";
            $patterns[16] = "/\b([Ss])(\s{0,1})(\/|and)(\s{0,1})([Ss])(\s{0,1})/";
            $patterns[17] = "/\b(O2)\b/";
            $patterns[18] = "/\b(PRN)\b/";
            $patterns[19] = "/\b(supp)\b/";
            $patterns[20] = "/\b(PM )(History)/";
            $patterns[21] = "/\b([Rr])(ec'?d)\b/";
            $patterns[22] = "/\b(b\/c)(\s+of)\b/";

            $replacements[0] = "$1espiration";
            $replacements[1] = "right upper quadrant";
            $replacements[2] = "per orem";
            $replacements[3] = "Emergency Physician";
            $replacements[4] = "$1lient$3";
            $replacements[5] = "beats per minute";
            $replacements[6] = "Family Physician";
            $replacements[7] = "$1heck";
            $replacements[8] = "$1hecked";
            $replacements[9] = "$1igns and $5ymptoms";
            $replacements[10] = "$1ccording to";
            $replacements[11] = "$1ollow up";
            $replacements[12] = " and ";
            $replacements[13] = "$1 number";
            $replacements[14] = "pick up";
            $replacements[15] = "follow up";
            $replacements[16] = "signs and symptoms ";
            $replacements[17] = "Oxygen";
            $replacements[18] = "as needed";
            $replacements[19] = "suppository";
            $replacements[20] = "Past Medical History";
            $replacements[21] = "$1eceived";
            $replacements[22] = "because$2";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            $patterns[0] = "/\b((N)([,]|\/)(D)([,]|\/)(V)(\s+|[,.]))|((N)([,]|\/)(V)([,]|\/)(D)(\s+|[,.]))|((N)([,]|\/)(V)(\s+|[,.]))/e";
            $replacements[0] = "umlstools::symptom_parse('\\1').' '";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // ---------------------
            // SET E: DATE/TIME ELEMENTS

            $patterns[0] = "/\b([0-9]{1,2})(-|\/)([A-Za-z]{3,9})(-|\/)([0-9]{4})\b/";
            $patterns[1] = "/\b(JAN)\b/";
            $patterns[2] = "/\b(FEB)\b/";
            $patterns[3] = "/\b(MAR)\b/";
            $patterns[4] = "/\b(APR)\b/";
            $patterns[5] = "/\b(MAY)\b/";
            $patterns[6] = "/\b(JUN)\b/";
            $patterns[7] = "/\b(JUL)\b/";
            $patterns[8] = "/\b(AUG)\b/";
            $patterns[9] = "/\b(SEP|SEPT)\b/";
            $patterns[10] = "/\b(OCT)\b/";
            $patterns[11] = "/\b(NOV)\b/";
            $patterns[12] = "/\b(DEC)\b/";

            $replacements[0] = "$1 $3 $5";
            $replacements[1] = "January";
            $replacements[2] = "February";
            $replacements[3] = "March";
            $replacements[4] = "April";
            $replacements[5] = "May";
            $replacements[6] = "June";
            $replacements[7] = "July";
            $replacements[8] = "August";
            $replacements[9] = "September";
            $replacements[10] = "October";
            $replacements[11] = "November";
            $replacements[12] = "December";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            $patterns[0] = "/\b(Mon\.{0,1})\b/";
            $patterns[1] = "/\b((Tue)|(Tues)(\.?))\b/";
            $patterns[2] = "/\b(Wed)\b/";
            $patterns[3] = "/\b((Thu)|(Thur)|(Thurs))\b/";
            $patterns[4] = "/\b(Fri)\b/";
            $patterns[5] = "/\b(Sat)\b/";
            $patterns[6] = "/\b(Sun)\b/";
            $patterns[7] = "/\b([Jj]an\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[8] = "/\b([Ff]eb\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[9] = "/\b([Mm]ar\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[10] = "/\b([Aa]pr\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[11] = "/\b([Jj]un\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[12] = "/\b([Jj]ul\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[13] = "/\b([Aa]ug\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[14] = "/\b([Ss]ept?\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[15] = "/\b([Oo]ct\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[16] = "/\b([Nn]ov\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[17] = "/\b([Dd]ec\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[18] = "/(\s|:)([0-9]{0,2})(\s{0,1})(H|h)(r)(s?)(\s+|[.,])/";
            $patterns[19] = "/\b((Jan|January)|(Feb|February)|(Mar|March)|(Apr|April)|(May)|(Jun|June)|(Jul|July)|(Aug|August)|(Sep|Sept|September)|(Oct|October)|(Nov|November)|(Dec|December))\b/";
            $patterns[20] = "/\b([0-9]+)(\s{0,1})([ap]\.{0,1}m\.{0,1})/"; // Separate am/pm from the time
            $patterns[21] = "/\b([0-9]+)(\s{0,1})(min)(s?)(\.{0,1})(\s{1})/"; // 20 mins.
            $patterns[22] = "/\b([0-9]+)(days)\b/";
            $patterns[23] = "/\b([0-9]+)(\s{0,1})(wk)/";
            $patterns[24] = "/\b([0-9]{4})(hrs)\b/";
            $patterns[25] = "/(q)(\s{0,1})([0-9]+)(-{0,1})([0-9]{0,1})(\s{0,1})/";
            $patterns[26] = "/\b([Aa])(\.)([Mm])(\.)\s/";
            $patterns[27] = "/\b([Pp])(\.)([Mm])(\.)\s/";
            $patterns[28] = "/\b([0-9]+)(hr?)\b/";
            $patterns[29] = "/\b(nite)\b/";
            $patterns[30] = "/\b(started)(\s+)(\w+)/e";
            $patterns[31] = "/\b(M)(-)(Friday)/";
            $patterns[32] = "/\b([0-9]+)(\s+)(yr[s.]+)/";
            $patterns[33] = "/\b([0-9]+[-][0-9]+)(\s?)(h|hrs)(\s+|[.,])/";
            $patterns[34] = "/\b(q)(\.?)([0-9]+)/";

            $replacements[0] = "Monday";
            $replacements[1] = "Tuesday";
            $replacements[2] = "Wednesday";
            $replacements[3] = "Thursday";
            $replacements[4] = "Friday";
            $replacements[5] = "Saturday";
            $replacements[6] = "Sunday";
            $replacements[7] = "January $3";
            $replacements[8] = "February $3";
            $replacements[9] = "March $3";
            $replacements[10] = "April $3";
            $replacements[11] = "June $3";
            $replacements[12] = "July $3";
            $replacements[13] = "August $3";
            $replacements[14] = "September $3";
            $replacements[15] = "October $3";
            $replacements[16] = "November $3";
            $replacements[17] = "December $3";
            $replacements[18] = "$1$2 $4our$6 ";
            $replacements[19] = "$1";
            $replacements[20] = "$1 $3";
            $replacements[21] = "$1$2 minutes$6";
            $replacements[22] = "$1 $2";
            $replacements[23] = "$1$2week";
            $replacements[24] = "$1 hours";
            $replacements[25] = "every $3$4$5 ";
            $replacements[26] = " AM ";
            $replacements[27] = " PM ";
            $replacements[28] = "$1 hours";
            $replacements[29] = "night";
            $replacements[30] = "'\\1'.'\\2'.umlstools::process_day_term('\\3')";
            $replacements[31] = "Monday to Friday";
            $replacements[32] = "$1 years";
            $replacements[33] = "$1 hours ";
            $replacements[34] = "every $3";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // DOCTOR/PARENT IDENTIFIERS

            $patterns[0] = "/(for ){0,1}(MHO|Dr|Dr\.|Doctor|Ms\.?|Mr\.?|Mrs\.?)(\s+)(([A-Z][A-Za-z-]+)|[A-Z]\.?)(\s+){0,1}([A-Z][a-z]+){0,1}/e";
            $replacements[0] = "'\\1'.umlstools::deidentify_name('\\1', '\\2', '\\3', '\\5', '1')";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // NUMBER IDENTIFIERS

            $patterns[0] = "/\b(\({0,1})([0-9]{0,3})(\){0,1})(-?)([0-9]{3})(-?)([0-9]{4})/e";
            $replacements[0] = "umlstools::deidentify_numbers('\\2','\\5','\\7')";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // DATES
            $patterns[0] = "/\b(([JFMASOND][a-z]+)(\s+)([0-9]{1,2})((st|nd|rd|th){0,1})([.-]|\/|\s+)([0-9]{2,4}))/e";
            $patterns[1] = "/\b(([JFMASOND][a-z]+)(\s+)([0-9]{1,2})(,)(\s{0,1})([0-9]{2,4}))/e";
            $patterns[2] = "/\b(([0-9]{2,4})([.-]|\/)([0-9]{1,2})([.-]|\/)([0-9]{1,2}))/e";
            $patterns[3] = "/\b(([0-9]{1,2})(\s+|[-])([JFMASOND][A-Za-z]+)(\s+|[-])([0-9]{2,4}))/e";
            $patterns[4] = "/\b(([0-9]{2})([JFMASOND][a-z]+)([0-9]{2}))/e";
            $patterns[5] = "/((.*)([0-9]+)(st|th|nd|rd)(.*))/e";

            $replacements[0] = "umlstools::extract_date('\\1', '1')";
            $replacements[1] = "umlstools::extract_date('\\1', '2')";
            $replacements[2] = "umlstools::extract_date('\\1', '3')";
            $replacements[3] = "umlstools::extract_date('\\1', '4')";
            $replacements[4] = "umlstools::extract_date('\\1', '5')";
            $replacements[5] = "umlstools::extract_date('\\1', '6')";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            return $corrected;

        }
    }

    function symptom_parse($var) {

        if (preg_match("/(N)/", $var)) {
            $array[] = "nausea";
        }
        if (preg_match("/(V)/", $var)) {
            $array[] = "vomiting";
        }
        if (preg_match("/(D)/", $var)) {
            $array[]= "diarrhea";
        }
        if (count($array)>0) {
            $retval = implode(", ", $array);
        }
        return $retval;
    }

    function extract_date($var, $case) {

        $month_full = array("January", "February", "March", "April", "May", "June",
                       "July", "August", "September", "October", "November", "December");

        $month_abbrev = array("Jan", "Feb", "Mar", "Apr", "Jun", "Jul", "Aug", "Sep", "Sept",
                       "Oct", "Nov", "Dec");

        $days_array = array("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
                          "Mon", "Tue", "Wed", "Thu", "Thur", "Fri", "Sat", "Sun");

        switch ($case) {
        case "1":
            if (count(explode(" ", $var))>2) {
                list($month, $day, $year) = explode(" ", $var);
            } else {
                list($month, $composite) = explode(" ", $var);
            }
            $month = ucfirst(strtolower($month));
            $key1 = array_search($month, $month_full);
            $key2 = array_search($month, $month_abbrev);
            $key = isset($key1)?$key1:$key2;
            if (isset($key)) {
                //print $composite;
                if ($composite) {
                    if (strstr($composite,".")) {
                        list($day, $year) = explode(".", $composite);
                    }
                    if (strstr($composite,"-")) {
                        list($day, $year) = explode("-", $composite);
                    }
                    if (strstr($composite,"/")) {
                        list($day, $year) = explode("/", $composite);
                    }
                    if (strstr($composite,",")) {
                        list($day, $year) = explode(",", $composite);
                    }
                } else {
                }
                $day = preg_replace("/(st|2nd|rd|th)/", "", $day);
                if (strlen($year)==2)  {
                    if ((int) $year <= 99 && (int) $year > 40) {
                        $year = "19".$year;
                    } elseif ((int) $year >= 0) {
                        $year = "20".$year;
                    }
                }
                if ((int) $year > (int) date("Y") || (int) $year < 1940) {
                    return $var;
                } else {
                    $day = str_pad($day, 2, "0", STR_PAD_LEFT);
                    return "$year $month $day";
                }
            } else {
                return $var;
            }
            break;

        case "2":
            list($composite, $year) = explode(",", $var);
            list($month, $day) = explode(" ", $composite);
            $month = ucfirst(strtolower($month));
            $key1 = array_search($month, $month_full);
            $key2 = array_search($month, $month_abbrev);
            $key = isset($key1)?$key1:$key2;
            if (isset($key)) {
                if (strlen($year)==2)  {
                    if ((int) $year <= 99 && (int) $year > 40) {
                        $year = "19".trim($year);
                    } elseif ((int) $year >= 0) {
                        $year = "20".trim($year);
                    }
                }
                $day = str_pad($day, 2, "0", STR_PAD_LEFT);
                return "$year $month $day";
            } else {
                return $var;
            }
            break;

        case "3":
            if (strstr($var,".")) {
                list($year, $month, $day) = explode(".", $var);
            }
            if (strstr($var,"-")) {
                list($year, $month, $day) = explode("-", $var);
            }
            if (strstr($var,"/")) {
                list($year, $month, $day) = explode("/", $var);
            }
            if (strlen($year)==2)  {
                if ((int) $year <= 99 && (int) $year > 40) {
                    $year = "19".$year;
                } elseif ((int) $year >= 0) {
                    $year = "20".$year;
                }
            }
            if ((int) $year > (int) date("Y")) {
                return $var;
            } else {
                $month = (int) $month;
                $month_string = $month_full[$month-1];
                $day = str_pad($day, 2, "0", STR_PAD_LEFT);
            return "$year $month_string $day";
            }
            break;

        case "4":
            list($day, $month, $year) = explode(" ", $var);
            $month = ucfirst(strtolower($month));
            $key1 = array_search($month, $month_full);
            $key2 = array_search($month, $month_abbrev);
            $key = isset($key1)?$key1:$key2;
            if (isset($key)) {
                if (strlen($year)==2)  {
                    if ((int) $year <= 99 && (int) $year > 40) {
                        $year = "19".$year;
                    } elseif ((int) $year >= 0) {
                        $year = "20".$year;
                    }
                }
                $day = str_pad($day, 2, "0", STR_PAD_LEFT);
                $month = $month_full[$key];
                return "$year $month $day";
            } else {
                return $var;
            }
            break;

        case "5":
            preg_match("/([0-9]{2})([JFMASOND][a-z]+)([0-9]{2})/", $var, $items);
            $day = $items[1];
            $month = $items[2];
            $year = $items[3];
            $month = ucfirst(strtolower($month));
            $key1 = array_search($month, $month_full);
            $key2 = array_search($month, $month_abbrev);
            $key = isset($key1)?$key1:$key2;
            if (isset($key)) {
                if (strlen($year)==2)  {
                    if ((int) $year <= 99 && (int) $year > 40) {
                        $year = "19".$year;
                    } elseif ((int) $year >= 0) {
                        $year = "20".$year;
                    }
                }
                $day = str_pad($day, 2, "0", STR_PAD_LEFT);
                return "$year $month $day";
            } else {
                return $var;
            }
            break;

        case "6":
            preg_match_all("/([0-9]{4} [JFMASOND][a-z]+ [0-9]{1,2})/", $var, $complete_dates);
            if ($complete_dates[0][0]) {
                $stripped_var = preg_replace("/[.,:]/", "", $var);
                $tokens = explode(" ", $stripped_var);
                preg_match_all("/(([0-9]+)(st|th|nd|rd))/", $var, $matches);
                list($cyear, $cmonth, $cday) = explode(" ",$complete_dates[0][0]);
                foreach ($tokens as $key => $token) {
                    foreach ($matches[0] as $value) {
                        if ($value==$token) {
                            if ($month1 = array_search(ucfirst($tokens[$key-1]), $month_full)) {
                                $month_index = $month1;
                            } elseif ($month2 = array_search(ucfirst($tokens[$key-1]), $month_full)) {
                                $month_index = $month2;
                            } else {
                                $month_index = false;
                            }
                            if ($month_index) {
                                $month = $month_full[$month_index];
                                $day = preg_replace("/(st|nd|rd|th)/", "", $value);
                                $day = str_pad($day, 2, "0", STR_PAD_LEFT);
                                $search = "/".$tokens[$key-1]." ".$tokens[$key]."/";
                                $replacement = "$cyear $month $day";
                                return preg_replace($search, $replacement, $var);
                            }
                        }
                    }
                }
            } else {
                return $var;
            }
            break;

        case "7":
            if (strstr($var,".")) {
                list($composite, $day) = explode(".", $var);
            }
            if (strstr($var,"-")) {
                list($year, $month) = explode("-", $composite);
            }
            if (strlen($year)==3)  {
                if ((int) $year <= 99 && (int) $year > 40) {
                    $year = "19".$year;
                } elseif ((int) $year >= 0) {
                    $year = "20".$year;
                }
            }
            if ((int) $year > (int) date("Y")) {
                return $var;
            } else {
                $month = (int) $month;
                $month_string = $month_full[$month-1];
                $day = str_pad($day, 2, "0", STR_PAD_LEFT);
            return "$year $month_string $day";
            }
            break;
        }
    }

    function process_day_term($var) {
        $day_abbrev = array("mon","tue","wed","thu","fri","sat","sun");
        $day_full = array("Monday","Tuesday","Wednesday","Friday","Saturday","Sunday");
        if ($key = array_search(strtolower(substr($var,0,3)), $day_abbrev)) {
            return $day_full[$key];
        } else {
            return $var;
        }
    }

    function deidentify_name($for, $prefix, $first, $last) {
        if (substr($prefix,0,2)=="Dr" || substr($prefix,0,3)=="Doc" || substr($prefix,0,3)=="MHO") {
            if ($for=="for ") {
                return $prefix." "."Patche Adams ";
            } else {
                return $prefix." "."Marcus Welby ";
            }
        }
        if (substr($prefix,0,4)=="Miss" || substr($prefix,0,2)=="Ms" || substr($prefix,0,3)=="Mrs") {
            return $prefix." "."Jane MeNobody ";
        }
        if (substr($prefix,0,2)=="Mr") {
            return $prefix." John MeNobody ";
        }
    }

    function deidentify_numbers($areacode, $three, $four) {
        $three = preg_replace("/[0-9]/", "9", $three);
        $four = preg_replace("/[0-9]/", "9", $four);
        if ($areacode) {
            $areacode = preg_replace("/[0-9]/", "9", $areacode);
            return $areacode."-".$three."-".$four;
        } else {
            return $three."-".$four;
        }
    }

    function getmicrotime() {

        list($usec, $sec) = explode(" ",microtime());
        return ((float)$usec + (float)$sec);
    }

    function term_inventory() {
    //
    // parameters: concept tagged text
    // action: saves terms, tags and treebank id's in table
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $document_id = $arg_list[1];

            $inventory = new inventory($inputstring, $document_id);
            $inventory->clear_terms();
            $inventory->update();

            return true;
        }
    }


// END OF CLASS
}
/** ************************************************
** CONCEPT MANAGER CLASS
** $Author: Herman Tolentino MD
** $Last update: 10/05/05
*/
class cuimanager {

    var $arglist;

    function cuimanager() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $textlab = $arg_list[3];
            $this->arglist = $arg_list;
        }

        if (isset($post_vars["submitcui"])) {
            $this->process_manage_cui();
        }
        $this->form_manage_cui();
    }

    function mark_tagged($term, $text, $get_vars) {

        $arg_list = $this->arglist;
        $menu_id = $arg_list[0];
        $post_vars = $arg_list[1];
        $get_vars = $arg_list[2];
        $textlab = $arg_list[3];

        $term = ($get_vars["cuitagged"]?$get_vars["cuitagged"]:$post_vars["cuitagged"]);
        if (isset($get_vars["phr"]) && $get_vars["phr"]) {
            $text = ($textlab["phrased_text"]?$textlab["phrased_text"]:$post_vars["phrased_text"]);
        } else {
            $text = ($textlab["cuitagged_text"]?$textlab["cuitagged_text"]:$post_vars["cuitagged_text"]);
        }
        $sequence = (isset($get_vars["seq"])?$get_vars["seq"]:$get_vars["phr"]);

        $tokens = explode(" ", $text);
        $i = 0;
        foreach($tokens as $unit) {
            $i++;
            $new_text[] = ($sequence==$i?"<b>$unit</b>":$unit)."<span style='color:#A0A0A0; font-weight: bold;'>$i</span>";
        }
        $itemized = implode(" ", $new_text);
        //return $text;
        if (isset($get_vars["phr"])) {
            return preg_replace("/\b(".str_replace("|", "\|", $term).")/", "<span class='highlight'>$1</span>", $itemized);
        } else {
            return preg_replace("/\b(".$term.")/", "<span class='highlight'>$1</span>", $itemized);
        }
    }

    function form_manage_cui() {

        $arg_list = $this->arglist;
        $menu_id = $arg_list[0];
        $post_vars = $arg_list[1];
        $get_vars = $arg_list[2];
        $textlab = $arg_list[3];
        $sequence = (isset($get_vars["seq"])?$get_vars["seq"]:$get_vars["phr"]);

        $c = new concept();

        print "<a name='cuitagged'>";
        print "<span class='newstitle'>".FTITLE_MANAGE_CUI."</span><br>";
        print "<span class='tinylight'>".INSTR_MANAGE_CUI."</span><br>";
        print "<table width='600' cellpadding='3' cellspacing='1' style='border: 1px solid black'>";
        // MANAGER FORM
        print "<form action = '".$_SERVER["PHP_SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"")."&tab=".$get_vars["tab"]."&cuitagged=".$get_vars["cuitagged"]."&seq=$sequence".(isset($get_vars["text"])?"&text=".$get_vars["text"]:"")."#cuidetail' name='form_manage_cui' method='post'>";
        //
        print "<tr valign='top'><td colspan='4'>";

        $cuitagged = ($get_vars["cuitagged"]?$get_vars["cuitagged"]:$post_vars["cuitagged"]);
        list($input_term,$input_tag,$input_cui) = explode("_", $cuitagged);

        // extra processing if phrase
        if (isset($get_vars["phr"])) {
            // normalize phrase
            $phrase_tokens = explode("|", $input_term);
            sort($phrase_tokens);
            $input_term = implode(" ", $phrase_tokens);
        }
        if (isset($get_vars["phr"])) {
            print "<div id='phrase' style='visibility:show;'>";
            print $this->mark_tagged($cuitagged, $textlab["phrased_text"], $get_vars);
            print "</div>";
        } else {
            print "<div id='cuitagged' style='visibility: show;'>";
            print $this->mark_tagged($cuitagged, $textlab["cuitagged_text"], $get_vars);
            print "</div>";
        }

        print "</td></tr>";
        print "<tr valign='top' bgcolor='#99CCFF'><td colspan='4'>";
        print "UMLS LEXICON LOOKUP<br>";
        print "</td></tr>";
        print "<tr valign='top' bgcolor='white'><td colspan='4'>";
        print umlstools::lexicon_lookup($input_term);
        print "</td></tr>";
        print "<tr valign='top' bgcolor='#99CCFF'><td colspan='4'>";
        print "WORDNET LOOKUP<br>";
        print "</td></tr>";
        print "<tr valign='top' bgcolor='white'><td colspan='4' class='tabletext'>";
        print umlstools::wordnet_lookup($input_term);
        print "</td></tr>";
        print "<tr valign='top'><td colspan='4'>";
        print "<span class='error'>".($get_vars["cuitagged"]?$get_vars["cuitagged"]:$post_vars["cuitagged"]).": <font color='red'><b>$sequence</b></font></span><br> ";
        print "</td></tr>";
        print "<tr valign='top' bgcolor='#99CCFF'><td colspan='4'>";
        print "CONCEPT LOOKUP RESULTS<br>";
        print "</td></tr>";
        print "<tr valign='top'><td colspan='4'>";
        print "<a name='cuidetail'><br>";
        //print "<input type='radio' name='query_format' ".($post_vars["query_format"]=="NS"?"checked":"")." onchange='javascript:submit();' value='NS'> Normalized string<br>";
        //print "<input type='radio' name='query_format' ".($post_vars["query_format"]=="NN"?"checked":"")." onchange='javascript:submit();' value='NN'> Convert to noun<br>";
        $baseform = $c->baseform($input_term);
        $searchterm = ($baseform?$baseform:$input_term);
        $list_cached = $c->get_list_cached_cui($searchterm);
        $concept_list = array();
        if (is_array($list_cached)) {
            $concept_list = array_merge($concept_list, $list_cached);
        }
        $list_umls_ns = $c->get_list_umls_nscui($searchterm);
        if (is_array($list_umls_ns)) {
            $concept_list = array_merge($concept_list, $list_umls_ns);
        }
        $list_umls_cs = $c->get_list_umls_cscui($searchterm);
        if (is_array($list_umls_cs)) {
            $concept_list = array_merge($concept_list, $list_umls_cs);
        }
        $list_umls_norm = $c->get_list_umls_norm($searchterm);
        if (is_array($list_umls_norm)) {
            $concept_list = array_merge($concept_list, $list_umls_norm);
        }
        //$concept_list = array_merge($list_cached, $list_umls_ns, $list_umls_cs, $list_umls_norm);
        $concept_list = array_unique($concept_list);
        if (count($concept_list)==0) {
            $concept_list = array($input_cui);
        }
        print $this->corpus_list($input_term, $get_vars["document_id"]);
        foreach ($concept_list as $concept) {
            unset($checked_override);
            $sql = "select s.NSTR, c.CUI, c.AUI, c.SAB, v.SON, c.CODE, c.STR, t.TUI, t.STY ".
                   "from umls.MRXNS_ENG s, umls.MRCONSO c, umls.MRSTY t, umls.MRSAB v ".
                   "where c.CUI =s.CUI and s.CUI = t.CUI and c.SAB = v.RSAB and c.ISPREF = 'Y' ".
                   "and c.LAT = 'ENG' and c.CUI = '$concept' ".
                   "group by c.SAB, t.STY, c.CODE, c.STR";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    print "<table width='600' bgcolor='white' cellpadding='2' cellspacing='0'>";
                    print "<tr valign='top' bgcolor='#CCFF99'>".
                          "<td class='tabletext'>SAB</td>".
                          "<td class='tabletext'>CODE</td>".
                          "<td class='tabletext'>STR</td>".
                          "<td class='tabletext'>STY</td></tr>";
                    $prev_cui = "";
                    while(list($nstr, $cui, $aui, $sab, $son, $code, $str, $tui, $sty) = mysql_fetch_array($result)) {
                        if ($prev_cui <> $cui) {
                            $distance = 1/($c->corpus_semantic_distance($cui));
                            $bigram_prob = $c->corpus_bigram_probability($cui);
                            $left_token = new token($this->token_by_position($sequence-1, $textlab["cuitagged_text"]));
                            $left_tag = $left_token->get_tag();
                            $right_token = new token($this->token_by_position($sequence+1, $textlab["cuitagged_text"]));
                            $right_tag = $right_token->get_tag();
                            $pos_prob = $c->corpus_pos_probability($concept, $input_term, $input_tag, $left_tag, $right_tag);
                            $weight = "<font color='red'>".($distance * $bigram_prob * $pos_prob)."</font>";
                            $checked = ($input_cui==$cui?"checked":"");
                            print "<tr valign='top' bgcolor='#FFCC33'>".
                                  "<td colspan='4'><input type='radio' name='cui' $checked value='$cui'>".
                                  "<span class='service'>$cui</span> ".$weight."</td></tr>";
                            print "<tr valign='top' bgcolor='white'><td colspan='4'>".umlstools::mrdef_lookup($cui)."</td></tr>";
                        }
                        print "<tr valign='top' bgcolor='white'>".
                              "<td class='tabletext'><font color='blue'><a name='$sab' title='$son'>$sab</a></font></td>".
                              "<td class='tabletext'>$code</td>".
                              "<td class='tabletext'>$str</td>".
                              "<td class='tabletext' nowrap><b>$tui</b> $sty</td></tr>";
                        $prev_cui = $cui;
                    }
                    print "</table><br>";
                    print "<input type='hidden' name='document_id' value='".($get_vars["document_id"]?$get_vars["document_id"]:$textlab["document_id"])."'>";
                    $checked_override = "";
                } else {
                    $checked_override = "checked";
                }
            }
        }
        print "</td></tr>";
        print "<tr valign='top' bgcolor='#FFCC33'>".
              "<td colspan='4'><input type='radio' name='cui' ".($input_cui==="C0000000"?"checked":"")." value='C0000000'>".
              "<span class='service'>C0000000 (NO CUI)</span></td></tr>";
        $mytoken = new token($get_vars["cuitagged"]);
        $mycui = $mytoken->get_cui();
        print "<tr valign='top' bgcolor='#FFCC33'>".
              "<td colspan='4'>".
              "<input type='radio' name='cui' value=''>".
              "<span class='service'>OVERRIDE CUI</span> ".
              "<input type='text' class='textbox' size='8' maxlength='8' name='override' $checked_override value='".($checked_override?$mycui:"")."' style='border: 1px solid #000000'>".
              "</td></tr>";
        print "<tr valign='top' bgcolor='white'><td colspan='4'>";
        print tagger::treebank_select($input_tag);
        print "<span class='boxtitle'>".LBL_CHANGE_TYPE."</span><br> ";
        print "<span class='tinylight'>".INSTR_CHANGE_TYPE."</span><br>";
        print "<span class='service'>";
        print "<input type='radio' class='textbox' name='change_type' value='item' checked > ITEM-SPECIFIC <br>";
        print "<input type='radio' class='textbox' name='change_type' value='global' > GLOBAL <br>";
        print "</span><br>";
        print "<table cellpadding='2' cellspacing='1'>";
        print "<tr><td>";
        print "<input type='hidden' name='input_term' value='".$input_term."'>";
        print "<input type='hidden' name='cuitagged' value='".(isset($get_vars["cuitagged"])?$get_vars["cuitagged"]:$post_vars["cuitagged"])."'>";
        print "<input type='hidden' name='cuitagged_text' value='".(isset($textlab["cuitagged_text"])?$textlab["cuitagged_text"]:$post_vars["cuitagged_text"])."'>";
        print "<input type='hidden' name='phrased_text' value='".(isset($textlab["phrased_text"])?$textlab["phrased_text"]:$post_vars["phrased_text"])."'>";
        print "<input type='hidden' name='sequence' value='".$sequence."'>";
        if (isset($get_vars["phr"])) {
            print "<input type='hidden' name='phrase_seq' value='".(isset($get_vars["phr"])?$get_vars["phr"]:$post_vars["phrase_seq"])."'>";
        }
        if (isset($get_vars["text"])) {
            print "<input type='hidden' name='text' value='".(isset($get_vars["text"])?$get_vars["text"]:$post_vars["text"])."'>";
        }
        print "<input type='submit' value = 'Adopt Configuration' class='textbox' name='submitcui' title='Changes the token in tagged text.' style='border: 1px solid #000000'> ";
        //print "<input type='submit' value = 'Previous' class='textbox' name='submitcui' title='Go to previous tagged token' style='border: 1px solid #000000'> ";
        print "<input type='submit' value = 'Next' class='textbox' name='submitcui' title='Go to next tagged token' style='border: 1px solid #000000'> ";
        print "</td></tr>";
        print "<tr><td>";
        print "<input type='submit' value = 'Cancel' class='textbox' name='submitcui' style='border: 1px solid #000000'> ";
        print "</td></tr>";
        print "</table>";

        print "</td></tr>";
        print "</form>";
        print "</table>";
    }

    function corpus_list($searchterm, $document_id) {

        $baseform = concept::baseform($searchterm);

        $sql = "select document_id, term, concept_id, treebank_id, sum(frequency) ".
               "from m_textlab_terms ".
               "where term = '$searchterm' or term = '".ucfirst($searchterm)."' or term = '".$baseform."' ".
               "and document_id <> '$document_id' ".
               "group by document_id, term, concept_id, treebank_id";
        if ($result = mysql_query($sql)) {
            $return = "";
            if (mysql_num_rows($result)) {
                $return .= "<table width='100%'>";
                $return .= "<tr valign='top'><td>";
                $return .= "<span class='boxtitle'>CONSISTENCY CHECK:</span><br>";
                $return .= "<span class='textbox'>This term is found in these other documents.&gt;&gt;</span>";
                $return .= "</td><td>";
                $return .= "<table width='300' cellpadding='2' cellspacing='1'>";
                $return .= "<tr bgcolor='#9999FF'><td nowrap class='filter'>DOC ID</td><td class='filter'>TERM</td>".
                           "<td class='filter'>CONCEPT</td><td nowrap class='filter'>TREEBANK ID</td><td class='filter'>FREQ</td></tr>";
                while (list($doc_id, $term, $concept, $tree_id, $count) = mysql_fetch_array($result)) {
                    $return .= "<tr><td class='filter'>$doc_id</td><td class='filter'>$term</td>".
                               "<td class='filter'><font color='red'>$concept</font></td>".
                               "<td class='filter'><font color='blue'>$tree_id</font></td>".
                               "<td class='filter'>$count</td></tr>";
                }
                $return .= "</table>";
                $return .= "</td></tr></table>";
                return $return;
            }
        }
    }

    function process_manage_cui() {

        $arg_list = $this->arglist;
        $menu_id = $arg_list[0];
        $post_vars = $arg_list[1];
        $get_vars = $arg_list[2];
        $textlab = $arg_list[3];

        switch($post_vars["submitcui"]) {
        case "Adopt Configuration":
            if ($post_vars["input_term"] && $post_vars["cuitagged"] && $textlab["cuitagged_text"]) {
                if ($post_vars["override"]) {
                    $new_token = $post_vars["input_term"]."_".$post_vars["treebank"].($post_vars["override"]?"_".$post_vars["override"]:"");
                } else {
                    $new_token = $post_vars["input_term"]."_".$post_vars["treebank"].($post_vars["cui"]?"_".$post_vars["cui"]:"");
                }
                if ($post_vars["phrase_seq"]) {
                    if ($post_vars["cui"]=="NOCUI") {
                        $phrased_text = $this->remove_by_tagnumber($post_vars["phrase_seq"], $get_vars["cuitagged"], $textlab["phrased_text"]);
                    } else {
                        if ($post_vars["change_type"]=="item") {
                            $phrased_text = $this->replace_by_tagnumber($post_vars["phrase_seq"], $new_token, $textlab["phrased_text"]);
                        } else {
                            $phrased_text = preg_replace("/\b(".$post_vars["cuitagged"].")\b/", "$new_token", $textlab["phrased_text"]);
                        }
                    }
                    $sql = "update m_textlab_document set ".
                           "phrased_text = '$phrased_text' ".
                           "where document_id = '".$post_vars["document_id"]."'";
                    if ($result = mysql_query($sql)) {
                        $sequence = ($post_vars["sequence"]?$post_vars["sequence"]:$post_vars["phrase_seq"]);
                        $next_token = $this->retrieve_token($sequence, $textlab["phrased_text"]);
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$next_token[0]."&phr=".$next_token[1]."#cuitagged");
                        //header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$new_token."&phr=".$post_vars["sequence"]."&text=".$get_vars["text"]."#cuitagged");
                    }
                } else {
                    if ($post_vars["cui"]=="NOCUI") {
                        $cuitagged_text = $this->remove_by_tagnumber($post_vars["sequence"], $get_vars["cuitagged"], $textlab["cuitagged_text"]);
                    } else {
                        if ($post_vars["change_type"]=="item") {
                            $cuitagged_text = $this->replace_by_tagnumber($post_vars["sequence"], $new_token, $textlab["cuitagged_text"]);
                        } else {
                            $cuitagged_text = preg_replace("/\b(".$post_vars["cuitagged"].")\b/", "$new_token", $textlab["cuitagged_text"]);
                        }
                    }
                    $sql = "update m_textlab_document set ".
                           "cuitagged_text = '$cuitagged_text' ".
                           "where document_id = '".$post_vars["document_id"]."'";
                    if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                        $sequence = ($post_vars["sequence"]?$post_vars["sequence"]:$post_vars["phrase_seq"]);
                        $next_token = $this->retrieve_token($sequence, $textlab["cuitagged_text"]);
                        header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$next_token[0]."&seq=".$next_token[1]."&text=".$get_vars["text"]."#cuitagged");
                    }
                }
            }
            break;
        case "Next":
            print_r($post_vars);
            if (($post_vars["sequence"] || $post_vars["phrase_seq"]) && ($textlab["cuitagged_text"] || $textlab["cuitagged_text_predicted"])) {
                $sequence = ($post_vars["sequence"]?$post_vars["sequence"]:$post_vars["phrase_seq"]);
                if ($post_vars["phrase_seq"]) {
                    $next_token = $this->retrieve_token($sequence, $textlab["phrased_text"]);
                } else {
                    $next_token = $this->retrieve_token($sequence, $textlab["cuitagged_text"]);
                }
            }
            if ($post_vars["phrase_seq"]) {
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$next_token[0]."&phr=".$next_token[1]."#cuitagged");
            } else {
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$next_token[0]."&seq=".$next_token[1]."&text=".$get_vars["text"]."#cuitagged");
            }
            break;

        case "Previous":
            if (($post_vars["sequence"] || $post_vars["phrase_seq"]) && ($textlab["cuitagged_text"] || $textlab["cuitagged_text_predicted"])) {
                $sequence = ($post_vars["sequence"]?$post_vars["sequence"]:$post_vars["phrase_seq"]);
                if ($post_vars["phrase_seq"]) {
                    $previous_token = $this->retrieve_previous_token($sequence, $textlab["phrased_text"]);
                } else {
                    $previous_token = $this->retrieve_previous_token($sequence, $textlab["cuitagged_text"]);
                }
            }
            if ($post_vars["phrase_seq"]) {
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$previous_token[0]."&phr=".$previous_token[1]."#cuitagged");
            } else {
                header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$previous_token[0]."&seq=".$previous_token[1]."&text=".$get_vars["text"]."#cuitagged");
            }
            break;
        default:
        }
    }

    function replace_by_tagnumber($tagnumber, $new_token, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i==$tagnumber) {
                $tokens[$i-1] = $new_token;
            }
        }
        $changed_text = implode(" ", $tokens);
        return $changed_text;
    }

    function remove_by_tagnumber($tagnumber, $new_token, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i==$tagnumber) {
                unset($tokens[$i-1]);
            }
        }
        $changed_text = implode(" ", $tokens);
        return $changed_text;
    }

    function retrieve_token($sequence, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i>$sequence) {
                if (preg_match("/(_C[0-9]{7})/", $item)) {
                    return array($tokens[$i-1],$i);
                }
            }
        }
    }

    function token_by_position($sequence, $text) {

        $tokens = explode(" ", $text);
        $i = 0;
        foreach($tokens as $item) {
            $i++;
            if ($sequence==($i+1)) {
                return $tokens[$i];
            }
        }
    }

    function retrieve_previous_token($sequence, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i>$sequence) {
                if (preg_match("/(_C[0-9]{7})/", $item)) {
                    return array($tokens[$i-2],$i);
                }
            }
        }
    }

// end
// CONCEPT MANAGER CLASS
}

/** ************************************************
** PARSER CLASS
** Finite State Machine parser
**
** The lexicon is MySQL table m_textlab_terms which contains
** words from adverse event free text reports.
**
** $Author: Herman Tolentino MD
** $Last update: 11/15/05
*/
class parser {

    var $tree;
    var $raw_input;
    var $sentences;
    var $class;
    var $state;

    function parser() {
        $this->tree = array();
    }

    function input($cuitagged) {
        $this->raw_input = $cuitagged;
        $this->sentence_split($this->raw_input);
    }

    function sentence_split($raw_input) {
        $this->sentences = preg_split("/(\._\.)/", $this->raw_input);
    }

    function parse() {

        for ($i=0; $i<count($this->sentences); $i++) {
            if (preg_match("/(.*)([Ww]ithin_II_C0332285 hours_NNS_C0439227 of_II vaccination_NN_C0042196)/", $this->sentences[$i])) {
                $this->sentences[$i] = preg_replace("/(.*)([Ww]ithin_II_C0332285 hours_NNS_C0439227 of_II vaccination_NN_C0042196)/", "", $this->sentences[$i]);
                $this->deep_parse($this->sentences[$i]);
            }
            if (preg_match("/(.*)([Cc]ame_VVD to_TO see_VVI_C0000000)(.*)(regarding_VVG)(.*)(following_VVG_C0332282)(.*)/", $this->sentences[$i])) {
                $this->sentences[$i] = preg_replace("/(.*)([Cc]ame_VVD to_TO see_VVI_C0000000)(.*)(regarding_VVG)(.*)(following_VVG_C0332282)(.*)/", "$5", $this->sentences[$i]);
                $this->deep_parse($this->sentences[$i]);
            }
            if (preg_match("/(noticed_VVD)/", $this->sentences[$i])) {
                $this->sentences[$i] = preg_replace("/(.*)([Nn]oticed_VVD)/", "$5", $this->sentences[$i]);
                $this->deep_parse($this->sentences[$i]);
            }
            if (preg_match("/(.*)(complained_VVN)(.*)(of_II)/", $this->sentences[$i])) {
                $this->sentences[$i] = preg_replace("/(.*)([Cc]omplained_VVN)(.*)(of_II)(.*)/", "$5", $this->sentences[$i]);
                $this->deep_parse($this->sentences[$i]);
            }
        }
        return $this->tree;
    }

    function deep_parse($sentence, $state="START") {

    // top-down finite-state left-right parsing

        $this->state = $state;
        print "PARSING: ".$sentence."<br>";
        while ($this->state <> "EOS") {
            print "<font color='red'>state: ".$this->state."</font><br>";
            switch ($this->state) {
            case "START":
                // look for verb
                if (preg_match("/\b(was_VBD_C0000000)|(noticed_VVD)\b/", $sentence, $matches)) {
                    list($np, $vp) = preg_split("/\b(".$matches[0].")\b/", $sentence);
                    $this->state = "NP";
                } elseif (preg_match("/([A-Za-z]+_V[VGDJN]{2,5}[_C0-9]{0,8})/", $sentence, $matches)) {
                    list($np, $vp) = preg_split("/([A-Za-z]+_V[VGDJN]{2,5}[_C0-9]{0,10})/", $sentence);
                    $this->state = "NP";
                } elseif (preg_match("/([A-Za-z]+_JJ[_CR0-9]{0,10})(.*)([A-Za-z]+_NN[_CS0-9]{0,10})(.*)([A-Za-z]+_CC)(.*)([A-Za-z]+_NN[_CS0-9]{0,10})/", $sentence, $matches)) {
                    $conj_algebra = $matches[0];
                    $this->state = "VP2_CONJ_ALG2";
                } elseif (preg_match("/(([A-Za-z]+_JJ[_CR0-9]{0,10})(\s)){1,3}([A-Za-z]+_NN[S]{0,1}[_C0-9]+)/", $sentence, $matches)) {
                    print_r($matches);
                    $conj_algebra = $matches[0];
                    $this->state = "VP2_NP";
                } else {
                    $this->state = "EOS";
                }
                break;
            case "NP":
                $np_fragments = preg_split("/(\s+)/", $np);
                for ($i=0; $i<count($np_fragments); $i++) {
                    if (preg_match_all("/\b(.*)(_N)(.*)\b/", $np_fragments[$i], $matches)) {
                        $token = new token($np_fragments[$i]);
                        $cui = $token->cui;
                        $aefi = new aefi();
                        if ($aefi->is_bodypart($cui)) {
                            $this->tree["BODYPART"][] = $cui;
                        }
                        if($aefi->is_ssx($cui)) {
                            $this->tree["SSX"][] = $cui;
                        }
                    }
                }
                $this->state = "VP1";
                break;
            case "VP1":
                $vp_fragments = preg_split("/(-_:)|(-_SYM)|([A-Za-z]+_II)|([A-Za-z]+_DD)|([A-Za-z]+_PN[G]+)/", $vp);
                for($i=0; $i<count($vp_fragments); $i++) {
                    if (preg_match("/([A-Za-z]+_JJ[_CR0-9]{0,10})(.*)([A-Za-z]+_CC)(.*)([A-Za-z]+_JJ[_CR0-9]{0,10})(.*)([A-Za-z]+_NN[_CS0-9]{0,10})/", $vp_fragments[$i], $matches)) {
                        $conj_algebra = $vp_fragments[$i];
                        unset($vp_fragments[$i]);
                        $this->state = "VP2_CONJ_ALG1";
                        break;
                    } elseif (preg_match_all("/([A-Za-z]+_NN[_CS0-9]{0,10})/", $vp_fragments[$i], $matches)) {
                        foreach ($matches[0] as $value) {
                            $token = new token($value);
                            $cui = $token->cui;
                            $aefi = new aefi();
                            if ($aefi->is_bodypart($cui)) {
                                $this->tree["BODYPART"][] = $cui;
                            }
                            if($aefi->is_ssx($cui)) {
                                $this->tree["SSX"][] = $cui;
                            }
                        }
                    } else {
                        $this->state = "VP2_CONJ_ALG2";
                    }
                }
                $vp2_fragments = array();
                foreach ($vp_fragments as $value) {
                    if (strlen(trim($value))>0) {
                        $vp2_fragments[] = $value;
                    }
                }
                $vp_fragments = $vp2_fragments;
                break;
            case "VP2_CONJ_ALG1":
                $distributed = preg_replace("/([A-Za-z]+_JJ[_RC0-9]{0,10})(\s+)(and_CC)(\s+)([A-Za-z]+_JJ[_RC0-9]{0,10})(\s+)([A-Za-z]+_NN[_SC0-9]{0,10})/","$1 $7|$5 $7", $conj_algebra);
                $split = preg_split("/(\|)/", $distributed);
                foreach ($split as $compound_noun) {
                    $phrase = new phrase($compound_noun);
                    if ($token = new token($phrase->detect(array($compound_noun)))) {
                        $aefi = new aefi();
                        if ($cui = $token->get_cui()) {
                            if ($aefi->is_bodypart($cui)) {
                                $this->tree["BODYPART"][] = $cui;
                            } elseif ($aefi->is_ssx($cui)) {
                                $this->tree["SSX"][] = $cui;
                            }
                            break;
                        }
                    }
                }
                $this->state = "VP2_NP";
                break;
            case "VP2_CONJ_ALG2":
                $distributed = preg_replace("/([A-Za-z]+_JJ[_RC0-9]{0,10})(\s+)([A-Za-z]+_NN[_SC0-9]{0,10})(\s+)(and_CC)(\s+)([A-Za-z]+_NN[_SC0-9]{0,10})/","$1 $3|$1 $7", $conj_algebra);
                $split = preg_split("/(\|)/", $distributed);
                foreach ($split as $compound_noun) {
                    $phrase = new phrase($compound_noun);
                    if ($phrase_detected = $phrase->detect(array($compound_noun))) {
                        $token = new token($phrase_detected);
                        $aefi = new aefi();
                        if ($cui = $token->get_cui()) {
                            if ($aefi->is_bodypart($cui)) {
                                $this->tree["BODYPART"][] = $cui;
                            } elseif ($aefi->is_ssx($cui)) {
                                $this->tree["SSX"][] = $cui;
                            }
                            $this->state = "EOS";
                        }
                    } else {
                        if ($parts = explode(" ", trim($compound_noun))) {
                            foreach ($parts as $value) {
                                if (strlen(trim($value))>0) {
                                    $vp_fragments[] = $value;
                                }
                            }
                            if (is_array($vp_fragments)) {
                                $vp_fragments = array_unique($vp_fragments);
                                $vp2_fragments = array();
                                foreach ($vp_fragments as $value) {
                                    if (strlen(trim($value))>0) {
                                        $vp2_fragments[] = $value;
                                    }
                                }
                                $vp_fragments = $vp2_fragments;
                            }
                            $this->state = "VP2_FRAGMENTS";
                        } else {
                            $this->state = "EOS";
                        }
                    }
                }
                break;
            case "VP2_NP":
                print_r($vp_fragments);
                for ($i=0; $i<count($vp_fragments); $i++) {
                    if (preg_match("/(([A-Za-z]+_JJ[_CR0-9]{0,10})|([A-Za-z]+_VV[_GJC0-9]{0,10}))(.*)([A-Za-z]+_NN[S]{0,1}[_C0-9]+)/", $vp_fragments[$i], $matches) ||
                        preg_match("/(([A-Za-z]+_JJ[_CR0-9]{0,10})(\s)){1,3}([A-Za-z]+_NN[S]{0,1}[_C0-9]+)/", $vp_fragments[$i], $matches)) {
                        $compound_noun = $vp_fragments[$i];
                        $phrase = new phrase($compound_noun);
                        $detected_phrase = $phrase->detect(array($compound_noun));
                        if (strlen(trim($detected_phrase))>0) {
                            if ($token = new token($detected_phrase)) {
                                $aefi = new aefi();
                                if ($cui = $token->get_cui()) {
                                    if ($aefi->is_bodypart($cui)) {
                                        $this->tree["BODYPART"][] = $cui;
                                    } elseif ($aefi->is_ssx($cui)) {
                                        $this->tree["SSX"][] = $cui;
                                    }
                                    unset($vp_fragments[$i]);
                                }
                            }
                        } else {
                            $temp_array = explode(" ", trim($vp_fragments[$i]));
                            $vp_fragments[$i] = $temp_array[1];
                        }
                    }
                }
                $vp2_fragments = array();
                foreach ($vp_fragments as $value) {
                    if (strlen(trim($value))>0) {
                        $vp2_fragments[] = $value;
                    }
                }
                $vp_fragments = $vp2_fragments;
                if (count($vp_fragments)>0) {
                    $this->state = "VP2_NN";
                } else {
                    $this->state = "EOS";
                }
                break;
            case "VP2_NN":
                for($i=0; $i<count($vp_fragments); $i++) {
                    if ($fragments = preg_split("/(,_,)/", $vp_fragments[$i])) {
                        unset($vp_fragments[$i]);
                        foreach ($fragments as $value) {
                            $temp_array = explode(" ", trim($value));
                            if (count($temp_array)>1) {
                                $back_job[] = $value;
                            } else {
                                if ($token = new token($value)) {
                                    $aefi = new aefi();
                                    if ($cui = $token->get_cui()) {
                                        if ($aefi->is_bodypart($cui)) {
                                            $this->tree["BODYPART"][] = $cui;
                                        } elseif ($aefi->is_ssx($cui)) {
                                            $this->tree["SSX"][] = $cui;
                                        }
                                        continue;
                                    }
                                }
                            }
                        }
                    } else {
                        if ($token = new token($vp_fragments[$i])) {
                            $aefi = new aefi();
                            if ($cui = $token->get_cui()) {
                                if ($aefi->is_bodypart($cui)) {
                                    $this->tree["BODYPART"][] = $cui;
                                } elseif ($aefi->is_ssx($cui)) {
                                    $this->tree["SSX"][] = $cui;
                                }
                            }
                        }
                    }
                }
                $vp2_fragments = array();
                foreach ($vp_fragments as $value) {
                    if (strlen(trim($value))>0) {
                        $vp2_fragments[] = $value;
                    }
                }
                $vp_fragments = $vp2_fragments;
                if ($back_job) {
                    foreach ($back_job as $value) {
                        $vp_fragments[] = $value;
                    }
                    unset($back_job);
                    $this->state = "VP2_NP";
                } else {
                    $this->state = "EOS";
                }
                break;
            case "VP2_FRAGMENTS":
                for($i=0; $i<=count($vp_fragments); $i++) {
                    $token = new token($vp_fragments[$i]);
                    $cui = $token->cui;
                    $aefi = new aefi();
                    if ($aefi->is_bodypart($cui)) {
                        $this->tree["BODYPART"][] = $cui;
                    }
                    if($aefi->is_ssx($cui)) {
                        $this->tree["SSX"][] = $cui;
                    }
                }
                $this->state = "EOS";
                break;
            default:
                $this->state = "EOS";
            }
        }
        if (is_array($this->tree["SSX"])) {
            $this->tree["SSX"] = array_unique($this->tree["SSX"]);
        }
        if (is_array($this->tree["BODYPART"])) {
            $this->tree["BODYPART"] = array_unique($this->tree["BODYPART"]);
        }
    }

}

/** ************************************************
** KFILTER CLASS
** $Author: Herman Tolentino MD
** $Last update: 11/11/05
**
** NOTES:
** 1. Keyword filter class: filters essential concepts
**    for document summarization
** 2. Uses concept bigram inventory
*/

class kfilter {

    var $input_string;
    var $word_filters;

    function kfilter($input_string) {
        print $this->input_string = $input_string;
    }

    function add_filter($class, $pattern) {
        $this->word_filters[] = array("class" => $class, "pattern"=>$pattern);
        print_r($this->word_filters);
    }

    function get_filter($filter) {

        foreach ($this->word_filters as $key=>$value) {
            if ($value["class"]==$filter) {
                preg_match("/".$value["pattern"]."/", $this->input_string, $matches);
                print_r($matches);
            }

        }
    }

}

/** ************************************************
** QTCLUSTER CLASS
** $Author: Herman Tolentino MD
** $Last update: 11/11/05
**
** NOTES:
** 1. AI clustering algorithm
** 2. Uses concept bigram inventory
*/

class qtcluster {

    var $document_id;
    var $diameter;
    var $qtclusters;
    var $qtcluster_stats;

    function qtcluster($document_id, $diameter=10, $elements=25) {
        $this->document_id = $document_id;
        $this->diameter = $diameter;
        $this->elements = $elements;
    }

    function get_cluster_stats() {
        return $this->qtcluster_stats;
    }

    function detect() {

        $start = true;
        $filter_condition = "";
        while ($elements > $this->elements || $start == true) {
            $start = false;
            $sql = "select t.concept_id ".
                   "from m_textlab_terms t, umls.MRSTY s ".
                   "where t.concept_id = s.CUI and t.document_id = '".$this->document_id."' ".
                   "and (s.TUI = 'T184' or s.TUI = 'T046' or s.TUI = 'T047' or s.TUI = 'T048' or s.TUI = 'T022') ".
                   "and t.concept_id <> '' and t.concept_id <> 'C0000000' ".
                   "and term <> 'signs' and term <> 'symptoms' ".
                   $filter_condition.
                   "order by t.concept_id";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    $sql_delete = "delete from m_textlab_clusters where document_id = '".$this->document_id."'";
                    $result_delete = mysql_query($sql_delete);
                    while (list($concept) = mysql_fetch_array($result)) {
                        $sql_l2 = "select b.concept2_id, (b.distance * (1/b.frequency)) dmetric ".
                                  "from m_textlab_bigrams b, m_textlab_terms t ".
                                  "where t.concept_id = b.concept2_id and b.concept1_id = '$concept' ".
                                  "order by (b.distance * (1/b.frequency))";
                        if ($result_l2 = mysql_query($sql_l2)) {
                            if (mysql_num_rows($result_l2)) {
                                $connections = 0;
                                $distance = 0;
                                while (list($c2, $d) = mysql_fetch_array($result_l2)) {
                                    $connections++;
                                    $distance = $distance + $d;
                                    if ($distance <= $this->diameter) {
                                        $sql_insert = "insert into m_textlab_clusters (document_id, concept_id, connections, distance) ".
                                                      "values ('".$this->document_id."', '$concept', '$connections', '$distance')";
                                        if ($result_insert = mysql_query($sql_insert)) {
                                        } else {
                                            $sql_update = "update m_textlab_clusters set connections = '$connections', distance = '$distance' ".
                                                          "where concept_id = '$concept' and document_id = '".$this->document_id."'";
                                                $result_update = mysql_query($sql_update);
                                        }
                                    } else {
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    // exclude matched concepts
                    // with each iteration by updating
                    // $filter_condition
                    $sql_last = "select concept_id, connections, distance ".
                                "from m_textlab_clusters ".
                                "where document_id = '".$this->document_id."' ".
                                "order by connections desc";

                    if ($result_last = mysql_query($sql_last)) {
                        if (mysql_num_rows($result_last)) {
                            list($concept, $elements, $distance) = mysql_fetch_array($result_last);
                            $this->qtcluster_stats[] = array("concept" => $concept, "elements" => $elements, "distance" => $distance);
                            $this->qtclusters[] = $concept;
                            $filter_condition .= " and t.concept_id <> '$concept' ";
                        }
                    }
                    //print_r($this->qtcluster_stats);
                }
            }
        }
        return $this->qtclusters;
    }

    function set_diameter($diameter) {
        $this->diameter = $diameter;
    }

    function get_filter($filter) {

        foreach ($this->word_filters as $key=>$value) {
            if ($value["class"]==$filter) {
                preg_match("/".$value["pattern"]."/", $this->input_string, $matches);
                print_r($matches);
            }

        }
    }

}

/** ************************************************
** CONCEPT CLASS
** $Author: Herman Tolentino MD
** $Last update: 11/11/05
**
** NOTES:
** 1. We stopped using normalized word search
**    because of it being too database- and resource-intensive.
*/
class concept {

    var $postagged;
    var $tokens;
    var $document_id;
    var $track_disambig_array;
    var $track_source_array;
    var $elapsed;

    function concept() {

        $this->track_source_array["cache"]["array"] = array();
        $this->track_source_array["string"]["array"] = array();
        //$this->track_source_array["word"]["array"] = array();
        $this->track_source_array["norm"]["array"] = array();
        $this->track_source_array["concept"]["array"] = array();

        $this->track_source_array["cache"]["count"] = 0;
        $this->track_source_array["string"]["count"] = 0;
        //$this->track_source_array["word"]["count"] = 0;
        $this->track_source_array["norm"]["count"] = 0;
        $this->track_source_array["concept"]["count"] = 0;

        $this->track_disambig_array["distance"] = 0;
        $this->track_disambig_array["bigram"] = 0;
        $this->track_disambig_array["pos"] = 0;
    }

    function set_text($postagged_text) {
        $this->postagged = $postagged_text;
        $this->tokens = preg_split("/[\s]+/", $this->postagged);
    }

    function set_document_id($document_id) {
        $this->document_id = $document_id;
    }

    function concept_relation ($concept1, $concept2) {
    }

    function tag() {
    //
    // makes use of UMLS MRXNS_ENG
    // make sure input is POS tagged text
    // NOTE: this routine also changes POS tag if incorrect
    //
        $start = umlstools::getmicrotime();
        for($i=0; $i<count($this->tokens); $i++) {
            // match only selected tags
            if (preg_match("/(_[NVJRID])/", $this->tokens[$i])) {
                $mytoken = new token($this->tokens[$i]);
                $term = $mytoken->get_term();
                $tag = $mytoken->get_tag();

                $left_token = new token($this->tokens[$i-1]);
                $left_tag = $left_token->get_tag();

                $right_token = new token($this->tokens[$i+1]);
                $right_tag = $right_token->get_tag();

                $list_cached = $this->get_list_cached_cui($term);
                if (count($list_cached)==1) {
                    $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $list_cached[0]);
                    $new_token = $term."_".$tag."_".$list_cached[0];
                    $this->tokens[$i] = $new_token;
                } elseif(count($list_cached)>1) {
                    if ($this->no_cui_probability($term)>0.6) {
                        $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, "C0000000");
                        $new_token = $term."_".$tag."_C0000000";
                        $this->tokens[$i] = $new_token;
                    } else {
                        if ($final_cui = $this->disambiguate_concept($list_cached, $term, $tag, $left_tag, $right_tag)) {
                            $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $final_cui);
                            $new_token = $term."_".$tag."_".$final_cui;
                            $this->tokens[$i] = $new_token;
                        }
                    }
                } elseif (count($list_cached)==0) {
                    $list_umls_ns = $this->get_list_umls_nscui($term);
                    if (count($list_umls_ns)==1) {
                        $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $list_umls_ns[0]);
                        $new_token = $term."_".$tag."_".$list_umls_ns[0];
                        $this->tokens[$i] = $new_token;
                    } elseif (count($list_umls_ns)>1) {
                        if ($final_cui = $this->disambiguate_concept($list_umls_ns, $term, $tag, $left_tag, $right_tag)) {
                            $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $final_cui);
                            $new_token = $term."_".$tag."_".$final_cui;
                            $this->tokens[$i] = $new_token;
                        }
                    } elseif (count($list_umls_ns)==0) {
                        $list_umls_norm = $this->get_list_umls_norm($term);
                        if (count($list_umls_norm)==1) {
                            $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $list_umls_norm[0]);
                            $new_token = $term."_".$tag."_".$list_umls_norm[0];
                            $this->tokens[$i] = $new_token;
                        } elseif (count($list_umls_norm)>1) {
                            if ($final_cui = $this->disambiguate_concept($list_umls_norm, $term, $tag, $left_tag, $right_tag)) {
                                $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $final_cui);
                                $new_token = $term."_".$tag."_".$final_cui;
                                $this->tokens[$i] = $new_token;
                            }
                        } elseif (count($list_umls_norm)==0) {
                            $list_umls_concept = $this->get_list_umls_cscui($term);
                            if (count($list_umls_concept)==1) {
                                $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $list_umls_concept[0]);
                                $new_token = $term."_".$tag."_".$list_umls_concept[0];
                                $this->tokens[$i] = $new_token;
                            } elseif (count($list_umls_concept)>1) {
                                if ($final_cui = $this->disambiguate_concept($list_umls_concept, $term, $tag, $left_tag, $right_tag)) {
                                    $tag = $this->change_tag($term, $tag, $left_tag, $right_tag, $final_cui);
                                    $new_token = $term."_".$tag."_".$final_cui;
                                    $this->tokens[$i] = $new_token;
                                }
                            }
                        }
                    }
                }
            }
        } // end for loop

        $this->elapsed = umlstools::getmicrotime() - $start;
        $this->collect_stats();

        $taggedstring = implode(" ", $this->tokens);
        $search[0] = "/(([0-9]{4}_MC)(\s+)([A-za-z]+(ry|ch|il|ay|ly|ne|ber)_NN)(\s+)([0-9-]{2}_MC))/e";
        $search[1] = "/((DAY_NN_C[0-9]{7})(\s+)([0-9]+)([+-]{0,1}_MC))/e";
        $search[2] = "/(([0-9]{4}_MC)(\s+)(hours_NNS_C0439227))/e";
        $taggedstring = preg_replace($search,"concept::date_screen('\\1')",$taggedstring);

        $search[0] = "/(([0-9]{2,3}_MC)(\s{1})(degrees_NNS_C0542560))/e";
        $search[1] = "/(((38|39|40|41|101|102|103|104)(\.)[1-9]_MC))/e";
        $taggedstring = preg_replace($search,"concept::vital_signs_screen('\\1')",$taggedstring);

        return $taggedstring;
    }

    function date_screen($datestring) {

        $date_screened = $datestring;

        if (preg_match("/(([0-9]{4}_MC)(\s{1})([A-za-z]+(ry|ch|il|ay|ne|ber)_NN)(\s{1})([0-9-]{2}_MC))/", $datestring)) {
            $date_tokens = explode(" ", $datestring);
            // year
            $array_month["01"] = "January";
            $array_month["02"] = "February";
            $array_month["03"] = "March";
            $array_month["04"] = "April";
            $array_month["05"] = "May";
            $array_month["06"] = "June";
            $array_month["07"] = "July";
            $array_month["08"] = "August";
            $array_month["09"] = "September";
            $array_month["10"] = "October";
            $array_month["11"] = "November";
            $array_month["12"] = "December";
            $year = new token($date_tokens[0]);
            $month_temp = new token($date_tokens[1]);
            foreach ($array_month as $month_value=>$month_string) {
                if ($month_string==$month_temp->term) {
                    $month = $month_value;
                    break;
                }
            }
            $day = new token($date_tokens[2]);
            $int_month = (int) $month;
            $int_day = (int) $day->term;
            $int_year = (int) $year->term;
            if (checkdate($int_month, $int_day, $int_year)) {
                $time = mktime(0, 0, 0, $int_month, $int_day, $int_year);
                $date_screened = $time."_TIMEU";
                return $date_screened;
            }
        }
        if (preg_match("/((DAY_NN_C[0-9]{7})(\s+)([0-9]+)([+-]{0,1}_MC))/", $datestring)) {
            $day = preg_replace("/((DAY_NN_C[0-9]{7})(\s+)([0-9]+)([+-]{0,1}_MC))/", "$4", $datestring);
            return $day."_TIMED";
        }
        if (preg_match("/(([0-9]{4}_MC)(\s+)(hours_NNS_C0439227))/", $datestring)) {
            $hours = preg_replace("/([0-9]{4})(_MC)(\s+)(hours_NNS_C0439227)/", "$1H_TIMEH$3$4", $datestring);
            return $hours;
        }
        return $datestring;
    }

    function vital_signs_screen($vital_signs_string) {

        $vital_signs_screen = $vital_signs_string;

        if (preg_match("/([0-9]{2,3})(_MC)(\s{1})(degrees_NNS_C0542560)/", $vital_signs_string)) {
            $temp = preg_replace("/([0-9]{2,3})(_MC)(\s{1})(degrees_NNS_C0542560)/", "$1", $vital_signs_string);
            $int_temp = (int) $temp;
            if ($int_temp >= 37 && $int_temp <= 45) {
                return $int_temp."C_TEMPC";
            } elseif ($int_temp >= 97 && $int_temp <= 107) {
                return $int_temp."F_TEMPF";
            }
        }
        if (preg_match("/(((38|39|40|41|101|102|103|104)(\.)[1-9]_MC))/", $vital_signs_string)) {
            if (preg_match("/((38|39|40|41)(\.)[1-9])(_MC)/", $vital_signs_string, $matches)) {
                return $matches[1]."C_TEMPC";
            }
            if (preg_match("/((101|102|103|104)(\.)[1-9])(_MC)/", $vital_signs_string, $matches)) {
                return $matches[1]."F_TEMPF";
            }
        }

        return $vital_signs_screen;
    }

    function baseform($searchterm) {

        $sql = "select BAS from umls.LRAGR where STR like '$searchterm' limit 10";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($baseform) = mysql_fetch_array($result)) {
                    return $baseform;
                }
            }
        }
    }

    function disambiguate_concept($concept_list, $term, $tag=NULL, $left_tag=NULL, $right_tag=NULL) {

        $points = 0;
        foreach($concept_list as $concept) {
            $distance = $this->corpus_semantic_distance($concept);
            $points++;
            $bigram_prob = $this->corpus_bigram_probability($concept);
            if ($tag<>NULL && $left_tag<>NULL && $right_tag<>NULL) {
                $pos_prob = $this->corpus_pos_probability($concept, $term, $tag, $left_tag, $right_tag);
            } else {
                $pos_prob = 0.01;
            }

            $this->track_disambig_array["distance"] = (float) $this->track_disambig_array["distance"] + $distance;
            $this->track_disambig_array["points"] = (float) $this->track_disambig_array["points"] + 1;
            $this->track_disambig_array["bigram"] = (float) $this->track_disambig_array["bigram"] + $bigram_prob;
            $this->track_disambig_array["pos"] = (float) $this->track_disambig_array["pos"] + $pos_prob;

            //$frequency = $this->corpus_cooccurrence_frequency($concept);
            //$weight["$concept"] = (float) 1/$distance * (float) $frequency;
            if ($distance>0) {
                $weight["$concept"] = (float) 1/$distance * (float) $bigram_prob * (float) $pos_prob;
            }
            print "<br>TERM: $term<br>";
            print "CONCEPT: $concept<br>";
            print "DISTANCE: ".($distance>0?(1/$distance):"<font clor='red'>NA</font>")." CUM=".$this->track_disambig_array["distance"]."<br>";
            print "DISAMBIGUATION POINTS: ".($points>0?$points:0)."CUM=".$this->track_disambig_array["points"] = (float) $this->track_disambig_array["points"]."<br>";
            print "BIGRAM PROB: $bigram_prob CUM=".$this->track_disambig_array["bigram"]."<br>";
            print "POS PROB: $pos_prob CUM=".$this->track_disambig_array["pos"]."<br>";
            print "WEIGHT: ".$weight["$concept"]."<br>";
        }
        if (count($weight)==0) {
            return false;
        } else {
            arsort($weight);
            print ">>> <font color='green'><b>DISAMBIGUATION RESULTS [$term]:</b></font><br>";
            $i=1;
            foreach ($weight as $key=>$value) {
                print "RANK=$i CUI=$key SCORE=$value<br>";
                $i++;
            }
            print "<br>";
            foreach ($weight as $concept_key=>$value) {
                return $concept_key;
            }
        }
    }

    function collect_stats() {

        $source_cache_total = count($this->track_source_array["cache"]["array"]);
        $source_string_total = count($this->track_source_array["string"]["array"]);
        //$source_word_total = count($this->track_source_array["word"]["array"]);
        $source_norm_total = count($this->track_source_array["norm"]["array"]);
        $source_concept_total = count($this->track_source_array["concept"]["array"]);
        $source_total = ($source_cache_total + $source_string_total + $source_norm_total + $source_concept_total);
        $source_stats = "<b>SOURCE RESULTS:</b><br>".
                        "CACHE: ".($source_cache_total/$source_total)."<br>".
                        "STRING: ".($source_string_total/$source_total)."<br>".
                        //"WORD: ".($source_word_total/$source_total)."<br>".
                        "NORM: ".($source_norm_total/$source_total)."<br>".
                        "CONCEPT: ".($source_concept_total/$source_total)."<br>";
        $disambig_distance_total = (float) $this->track_disambig_array["distance"];
        $disambig_bigram_total = (float) $this->track_disambig_array["bigram"];
        $disambig_pos_total = (float) $this->track_disambig_array["pos"];
        $disambig_total_points = (float) $this->track_disambig_array["points"];
        $disambig_total = ($disambig_distance_total + $disambig_bigram_total + $disambig_pos_total);
        $disambig_stats = "<b>DISAMBIGUATION RESULTS:</b><br>".
                          "TOTAL DISAMBIGUATED: ".$disambig_total_points."<br>".
                          "DISTANCE: ".($disambig_distance_total/$disambig_total)."<br>".
                          "BIGRAM: ".($disambig_bigram_total/$disambig_total)."<br>".
                          "POS: ".($disambig_pos_total/$disambig_total)."<br><br>".
                          "<b>ELAPSED TIME (seconds):</b><br>".
                          $this->elapsed." seconds<br>";
        $sql_string = $source_stats."<br>".$disambig_stats;
        module::execsql("update m_textlab_document set stats = '$sql_string' where document_id = '".$this->document_id."'");

    }

    function corpus_pos_probability($concept, $myterm, $tag, $left_tag, $right_tag) {

        $total = 0;
        if ($concept=="C0000000") {
            print $sql = "select frequency, term, term_tag, left_tag, right_tag, concept_id from m_textlab_pos where ".
                   "concept_id = '$concept' and term = '$myterm'";
        } else {
            $sql = "select frequency, term, term_tag, left_tag, right_tag, concept_id from m_textlab_pos where ".
                   "concept_id = '$concept'";
               //"term = '$myterm'";
        }
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $total = 0;
                $pos_freq = 0;
                while (list($frequency, $term, $self, $left, $right, $concept_id) = mysql_fetch_array($result)) {
                    $total = $total + $frequency;
                    //if ($left == $left_tag && $right == $right_tag && $term==$myterm && $tag==$self) {
                    if ($right == $right_tag && $term==$myterm && $tag==$self) {
                        $pos_freq = $pos_freq + $frequency;
                    }
                }
            }
            if ($total==0) {
                return 0.001;
            } else {
                $pos_prob = $pos_freq/$total;
                return ($pos_prob>0?$pos_prob:0.001);
            }
        }

    }

    function corpus_cooccurrence_frequency($concept) {

        $sql = "select concept_id from m_textlab_terms where concept_id <> '' and concept_id<>'C0000000'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui) = mysql_fetch_array($result)) {
                    $total++;
                    $frequency[] = $this->umls_cooccurrence($concept, $cui);
                }
                $mean_frequency = (float) array_sum($frequency)/$total;
                return ($mean_frequency>0?$mean_frequency:0.01);
            }
        }
    }

    function get_list_cached_cui($searchterm) {
        $baseform = $this->baseform(strtolower($searchterm));
        $sqlterm = trim(strlen($baseform)>0?$baseform:$searchterm);
        $sql_cache = "select concept_id, sum(frequency) ".
                     "from m_textlab_terms ".
                     "where (term like '".strtolower($sqlterm)."' ".
                     "or term like '".ucfirst($sqlterm)."' ".
                     "or term like '".$searchterm."' ".
                     "or term like '$baseform') ".
                     "and concept_id <> '' ".
                     "group by concept_id, frequency desc";
        if ($result_cache = mysql_query($sql_cache)) {
            if (mysql_num_rows($result_cache)) {
                while (list($concept, $frequency) = mysql_fetch_array($result_cache)) {
                    $cui_list[] = $concept;
                }
                if (is_array($cui_list)) {
                    $cui_list = array_unique($cui_list);
                    $this->track_source_array["cache"]["array"] = array_merge($this->track_source_array["cache"]["array"], $cui_list);
                    $this->track_source_array["cache"]["count"] = (int) $this->track_source_array["cache"]["count"] + count($cui_list);
                }
                print "CACHED [$searchterm]: ".implode(" ", $cui_list)."<br>";
                return $cui_list;
            }
        }
    }

    function get_list_umls_nscui($searchterm) {

        $baseform = "";

        if (count(explode(" ", $searchterm))>1) {
            $sqlterm = trim($searchterm);
        } else {
            $baseform = $this->baseform(strtolower($searchterm));
            $sqlterm = trim(strlen($baseform)>0?$baseform:$searchterm);
        }
        $sql = "select s.CUI ".
               "from umls.MRXNS_ENG s ".
               "where (s.NSTR like '".strtolower($searchterm)."' ".
               "or s.NSTR like '".ucfirst($searchterm)."' ".
               "or s.NSTR = '$baseform')";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui) = mysql_fetch_array($result)) {
                    $cui_list[] = $cui;
                }
                if (is_array($cui_list)) {
                    $cui_list = array_unique($cui_list);
                    $this->track_source_array["string"]["array"] = array_merge($this->track_source_array["string"]["array"], $cui_list);
                    $this->track_source_array["string"]["count"] = (int) $this->track_source_array["string"]["count"] + count($cui_list);
                }
                print "STRING [$searchterm]: ".implode(" ", $cui_list)."<br>";
                return $cui_list;
            }
        }
    }

    function get_list_umls_cscui($searchterm) {
        if (count(explode(" ", $searchterm))>1) {
            $sqlterm = trim($searchterm);
        } else {
            $baseform = $this->baseform(strtolower($searchterm));
            $sqlterm = trim(strlen($baseform)>0?$baseform:$searchterm);
        }
        if (isset($baseform)) {
            $sql = "select c.CUI ".
                   "from umls.MRCONSO c ".
                   "where (c.STR like '".strtolower($searchterm)."' ".
                   "or c.STR like '".ucfirst($searchterm)."' ".
                   "or c.STR like '$baseform')";
        } else {
            $sql = "select c.CUI ".
                   "from umls.MRCONSO c ".
                   "where (c.STR like '".strtolower($searchterm)."' ".
                   "or c.STR like '".ucfirst($searchterm)."')";
    }
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui) = mysql_fetch_array($result)) {
                    $cui_list[] = $cui;
                }
                if (is_array($cui_list)) {
                    $cui_list = array_unique($cui_list);
                    $this->track_source_array["concept"]["array"] = array_merge($this->track_source_array["concept"]["array"], $cui_list);
                    $this->track_source_array["concept"]["count"] = (int) $this->track_source_array["concept"]["count"] + count($cui_list);
                }
                print "CONCEPT [$searchterm]: ".implode(" ", $cui_list)."<br>";
                return $cui_list;
            }
        }
    }

    function get_nominal_term($searchterm) {

        $sql = "select n.BAS1 from umls.LRNOM n, umls.LRAGR a ".
               "where a.EUI = n.EUI2 and n.BAS2 = '".$searchterm."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($nom) = mysql_fetch_array($result)) {
                    $nom_array[] = $nom;
                }
                $retval = array_unique($nom_array);
                return $retval;
            }
        }
        return array();
    }

    function get_list_umls_norm($searchterm) {

        $baseform = $this->baseform(strtolower($searchterm));
        $sqlterm = (strlen($baseform)>0?$baseform:$searchterm);

        $nom_array = $this->get_nominal_term($sqlterm);
        if (count($nom_array)>0) {
            foreach ($nom_array as $search) {
                $sql = "select s.CUI ".
                       "from umls.MRXNS_ENG s ".
                       "where (s.NSTR like '".trim(strtolower($search))."' ".
                       "or s.NSTR like '".ucfirst($searchterm)."' ".
                       "or s.NSTR like '$baseform')";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        while (list($cui) = mysql_fetch_array($result)) {
                            $cui_list[] = $cui;
                        }
                        if (is_array($cui_list)) {
                            $cui_list = array_unique($cui_list);
                            $this->track_source_array["norm"]["array"] = array_merge($this->track_source_array["norm"]["array"], $cui_list);
                            $this->track_source_array["norm"]["count"] = (int) $this->track_source_array["norm"]["count"] + count($cui_list);
                        }
                        print "NORM [$searchterm]: ".implode(" ", $cui_list)."<br>";
                        return $cui_list;
                    }
                }
            }
        }
    }

    function get_list_umls_nwcui($searchterm) {
    // limited use because database intensive

        $baseform = $this->baseform(strtolower($searchterm));
        $sqlterm = (strlen($baseform)>0?$baseform:$searchterm);
        $sqlw = "select w.CUI ".
                "from umls.MRXNW_ENG w ".
                "where w.NWD like '".strtolower($sqlterm)."' ".
                "or w.NWD like '".ucfirst($sqlterm)."' ".
                "group by w.CUI limit 10";
        if ($result = mysql_query($sqlw)) {
            $rows = mysql_num_rows($result);
            if ($rows > 0 && $rows <=5) {
                while (list($cui) = mysql_fetch_array($result)) {
                    $cui_list[] = $cui;
                }
                if (is_array($cui_list)) {
                    $cui_list = array_unique($cui_list);
                    $this->track_source_array["word"]["array"] = array_merge($this->track_source_array["word"]["array"], $cui_list);
                    $this->track_source_array["word"]["count"] = (int) $this->track_source_array["word"]["count"] + count($cui_list);
                }
                print "WORD [$searchterm]: ".implode(" ", $cui_list)."<br>";
                return $cui_list;
            }
        }
    }

    function no_cui_probability($searchterm) {

        $sql = "select term, concept_id from m_textlab_terms ".
               "where term='".$searchterm."' or term='".ucfirst($searchterm)."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while(list($term, $concept_id) = mysql_fetch_array($result)) {
                    $total++;
                    if ($concept_id == "C0000000") {
                        $freq++;
                    }
                }
                return $freq/$total;
            }
        }
    }

    function umls_cooccurrence($concept1, $concept2) {

        $sql = "select sum(COF) from umls.MRCOC where CUI1 = '$concept1' and CUI2 = '$concept2' group by CUI1, CUI2";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($frequency) = mysql_fetch_array($result)) {
                    return $frequency;
                }
            } else {
                return 0;
            }
        }
    }

    function corpus_bigram_probability($concept) {

        $sql = "select concept1_id, concept2_id, distance, frequency ".
               "from m_textlab_bigrams ";
               //"where concept1_id <> 'C0000000' and concept2_id <> 'C0000000'";
        $cui_array = array();
        $frequency_array = array();
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui1, $cui2, $distance, $frequency) = mysql_fetch_array($result)) {
                    $cui_array[] = $frequency;
                    if ($cui1==$concept || $cui2==$concept) {
                        $frequency_array[] = $frequency;
                    }
                }
            }
            if (count($cui_array)==0) {
                return 0.01;
            } else {
                $mean_probability = array_sum($frequency_array)/array_sum($cui_array);
                return ($mean_probability>0?$mean_probability:0.01);
            }
        }
    }

    function learned_semantic_distance($concept1, $concept2) {

        print $sql = "select distance from m_textlab_bigrams ".
               "where concept1_id = '$concept1' and concept2_id = '$concept2'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($distance) = mysql_fetch_array($result);
                return $distance;
            }
        }
    }

    function semantic_distance($concept1, $concept2) {

    //
    // compute semantic distance between two concepts
    // input: two concepts
    // output: number of nodes distance using semantic tree number
    //
    // concept: Traversing the Semantic Network tree nodes and counting
    // How many nodes you need to traverse to get to the other concept?
    // This uses the "isa" class hierarchy of the semantic network as
    // represented by the semantic tree number, STN.
    //

        // STEP 1: GET THE CUI'S AND TREE NUMBERS FOR concept 1
        $sql1 = "select CUI, STN from umls.MRSTY where CUI = '$concept1' order by char_length(STN)";
        if ($result1 = mysql_query($sql1)) {
            if (mysql_num_rows($result1)) {
                if (list($cui, $stn) = mysql_fetch_array($result1)) {
                    // STEP 2: STORE THEM IN CUI ARRAY
                    $stn = preg_replace("/\b(A|B)([0-9]{1})(\.)/", "$1.$2.", $stn);
                    $node["$cui"] = explode(".", $stn);
                }
            }
        }
        $sql2 = "select CUI, STN from umls.MRSTY where CUI = '$concept2' order by char_length(STN)";
        if ($result2 = mysql_query($sql2)) {
            if (mysql_num_rows($result2)) {
                if (list($cui, $stn) = mysql_fetch_array($result2)) {
                    // STEP 2: STORE THEM IN CUI ARRAY
                    $stn = preg_replace("/\b(A|B)([0-9]{1})(\.)/", "$1.$2.", $stn);
                    $node["$cui"] = explode(".", $stn);
                }
            }
        }
        // STEP 3: GET ARRAY COUNTS, ASSUME TOTAL IS DISTANCE FROM END TO END
        if (isset($node["$concept1"]) && isset($node["$concept2"])) {
            $d1 = count($node["$concept1"]);
            $d2 = count($node["$concept2"]);
            $diff = abs($d1-$d2);

            // STEP 4: USE LEAST ARRAY COUNT AS MAX FOR LOOP
            $least = ($d1<=$d2?$d1:$d2);

            // STEP 5: IMPLEMENT TREE LEVEL LOOP TO STEP FROM ONE LEVEL TO ANOTHER
            $i = 0;
            for ($i=0; $i<$least; $i++) {
                if ($node["$concept1"][0]<>$node["$concept2"][0]) {
                    $topnode = 1;
                } else {
                    $topnode = 0;
                }
                if ($node["$concept1"][$i]===$node["$concept2"][$i]) {
                    // STEP 6: IF LEVELS ARE EQUAL DECREMENT DISTANCE
                    $d1--;
                    $d2--;
                }
            }

            // STEP 7: RETURN REMAINING NODES
            return $d1+$d2+$diff+$topnode;
        }
    }

    function corpus_semantic_distance($concept) {

        if (func_num_args()) {
            $arg_list = func_get_args();
            if (isset($arg_list[0])) {
                $concept = $arg_list[0];
            }
            if (isset($arg_list[1])) {
                $document_id = $arg_list[1];
            }
        }
        if (isset($document_id)) {
            $sql = "select concept_id from m_textlab_terms where document_id = '$document_id'";
        } else {
            $sql = "select concept_id from m_textlab_terms";
        }
        $cui_array = array();
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui) = mysql_fetch_array($result)) {
                    //if (!($distance = $this->learned_semantic_distance($cui, $concept))) {
                        $distance = $this->semantic_distance($cui, $concept);
                    //}
                    $cui_array[] = $distance;
                }
            }
            //print_r($cui_array);
            if (count($cui_array)==0) {
                return 0;
            } else {
                $mean_distance = array_sum($cui_array)/count($cui_array);
                return $mean_distance;
            }
        }
    }

    function tagprob ($term, $cui, $tag, $param, $value) {

        switch ($param) {
        case "RTAG":
            $field = "right_tag";
            break;
        case "LTAG":
            $field = "left_tag";
            break;
        case "STAG":
            $field = "term_tag";
            break;
        }
        $probability = 0.001;
        $sql = "select $field, sum(frequency) from m_textlab_pos ".
               "where (term = '".strtolower($term)."' or term = '".strtoupper($term)."' or term = '".ucfirst($term)."') ".
               "and concept_id = '$cui' and term_tag = '$tag' group by $field";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $total = 0;
                $prob = 0;
                while (list($field, $freq) = mysql_fetch_array($result)) {
                    $frequencies["$field"] = $freq;
                    $total = $total+$freq;
                }
                foreach ($frequencies as $field=>$frequency) {
                    if ($field==$value) {
                        $probability = $frequency/$total;
                        print "$param PROB [$value] = $frequency/$total: $probability<br>";
                        return $probability;
                    }
                }
            }
        }
        return $probability;
    }

    function change_tag($input_term, $input_tag, $left_tag, $right_tag, $input_cui) {

        print "<b>CHANGE POS TAG ROUTINE:</b><br>";
        print "INPUT TERM: [$left_tag] $input_term [$input_tag] [$right_tag]<br>";
        $sql = "select term, term_tag, left_tag, right_tag, sum(frequency) freq ".
               "from m_textlab_pos ".
               "where concept_id = '$input_cui' ".
               "and (term like '".strtolower($input_term)."' or term like '".ucfirst($input_term)."' or term like '".strtoupper($input_term)."') ".
               "group by term, term_tag";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)>1) {
                $freq = 0;
                $total = 0;
                while (list($term, $self, $left, $right, $freq) = mysql_fetch_array($result)) {
                    $different = ($self<>$input_tag?"<font color='red'>DIFFERENT</font>":"<font color='blue'>SAME</font>");
                    $same_weight = ($self==$input_tag?0.3:0);
                    print "DB TERM: [$left] $term [$self] [$right]<br>";
                    print "DB TAG: $self [$freq] $different<br>";
                    $left_prob = $this->tagprob($term, $input_cui, $self, "LTAG", $left_tag);
                    $right_prob = $this->tagprob($term, $input_cui, $self, "RTAG", $right_tag);
                    $self_prob = $this->tagprob($term, $input_cui, $self, "STAG", $input_tag);
                    $tree["$self"] = $left_prob + $right_prob + $self_prob;
                }
                arsort($tree);
                print_r($tree);
                foreach($tree as $final=>$value) {
                    print "FINAL TAG: $final [$value]<br>";
                    return $final;
                }
                print "FINAL TAG: $input_tag<br>";
                return $input_tag;
            } else {
                print "FINAL TAG: $input_tag<br>";
                return $input_tag;
            }
        }

    }

    function get_cached_cui($searchterm) {

        $baseform = spellcheck::baseform(strtolower($searchterm));
        $searchterm = (strlen($baseform)>0?$baseform:$searchterm);
        $sql_cache = "select concept_id, sum(frequency) ".
                     "from m_textlab_terms ".
                     "where term like '$searchterm' ".
                     "group by concept_id, frequency desc";
        if ($result_cache = mysql_query($sql_cache)) {
            if (mysql_num_rows($result_cache)) {
                $final_cui = "";
                $max_prob = 0;
                $max_freq = 0;
                while (list($cui, $frequency) = mysql_fetch_array($result_cache)) {
                    if ($cui=="C0000000") {
                        $no_cui_prob = $this->no_cui_probability($searchterm);
                        if ($no_cui_prob>0.6) {
                            return "C0000000";
                        }
                    }
                    $probability = $this->corpus_bigram_probability($cui);
                    if ($probability > $max_prob) {
                        $max_prob = $probability;
                        $final_cui = $cui;
                    }
                    if ($probability == $max_prob) {
                        $distance_final_cui = $this->corpus_semantic_distance($final_cui);
                        $distance_cui = $this->corpus_semantic_distance($cui);
                        $final_cui = ($distance_final_cui<$distance_cui?$final_cui:$cui);
                    }
                }
                return $final_cui;
            }
        }
    }

    function get_ns_cui($searchterm) {

        $baseform = spellcheck::baseform(strtolower($searchterm));
        $searchterm = (strlen($baseform)>0?$baseform:$searchterm);
        $sql = "select s.CUI ".
               "from umls.MRXNS_ENG s ".
               "where s.LAT = 'ENG' ".
               "and s.NSTR like '".strtolower($searchterm)."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $d = array();
                $c = array();
                while (list($cui) = mysql_fetch_array($result)) {
                    $distance = $this->corpus_semantic_distance($cui);
                    $d[] = $distance;
                    $c[] = $cui;
                 }
                 $final_cui = $this->get_final_cui($d, $c);
                 return $final_cui;
             }
        }
    }

    function get_nw_cui($searchterm) {

        $baseform = $this->baseform(strtolower($searchterm));
        $searchterm = (strlen($baseform)>0?$baseform:$searchterm);
        $sqlw = "select w.CUI ".
                "from umls.MRXNW_ENG w ".
                "where w.LAT = 'ENG' ".
                "and w.NWD like '".strtolower($searchterm)."'";
        if ($result = mysql_query($sql)) {
            $rows = mysql_num_rows($result);
            if ($rows<=5 && $rows>0) {
                $d = array();
                $c = array();
                while (list($cui) = mysql_fetch_array($result)) {
                    $distance = $this->corpus_semantic_distance($cui);
                    $d[] = $distance;
                    $c[] = $cui;
                }
                $final_cui = $this->get_final_cui($d, $c);
                return $final_cui;
            }
        }
    }
    function get_final_cui($array_distance, $array_cui) {

        asort($array_distance);
        foreach ($array_distance as $key=>$value) {
            return $array_cui[$key];
        }
    }

    function predict() {

        for($i=0; $i<count($this->tokens); $i++) {

            // match only selected tags
            if (preg_match("/(_[NVJR])/", $this->tokens[$i])) {
                $mytoken = new token($this->tokens[$i]);
                $searchterm = $mytoken->get_term();
                $tag = $mytoken->get_tag();
                if ($final_cui = $this->get_cached_cui($searchterm)) {
                    if ($final_cui && $final_cui<>"C0000000") {
                            $change_tag = $this->change_tag($searchterm, $tag, $final_cui);
                            $newtoken = $searchterm."_".($change_tag?$change_tag:$tag)."_".$final_cui;
                        } else {
                            $newtoken = $searchterm."_".$tag;
                        }
                        $this->tokens[$i] = $newtoken;
                    } elseif ($final_cui = $this->get_ns_cui($searchterm)) {
                        if ($final_cui && $final_cui<>"C0000000") {
                            $change_tag = $this->change_tag($searchterm, $tag, $final_cui);
                            $newtoken = $searchterm."_".($change_tag?$change_tag:$tag)."_".$final_cui;
                        } else {
                            $newtoken = $searchterm."_".$tag;
                        }
                        $this->tokens[$i] = $newtoken;
                    } else {
                        $final_cui = $this->get_nw_cui($searchterm);
                        if ($final_cui && $final_cui<>"C0000000") {
                            $change_tag = $this->change_tag($searchterm, $tag, $final_cui);
                            $newtoken = $searchterm."_".($change_tag?$change_tag:$tag)."_".$final_cui;
                        } else {
                            $newtoken = $searchterm."_".$tag;
                        }
                        $this->tokens[$i] = $newtoken;
                    }
                }
            } // end for loop
            $taggedstring = implode(" ", $this->tokens);
            return $taggedstring;
    }

    // end of concept class
}

/** ************************************************
** TOKEN CLASS
** $Author: Herman Tolentino MD
** $Last update: 9/15/05
*/
class token {

    var $token_array;
    var $token;
    var $term;
    var $tag;
    var $cui;

    function token($input) {
        $this->token = $input;
        $this->token_array = explode("_", $input);
        if (isset($this->token_array[0])) {
            $this->term = $this->token_array[0];
        }
        if (isset($this->token_array[1])) {
            $this->tag = $this->token_array[1];
        }
        if (count($this->token_array)>2) {
            $this->cui = $this->token_array[2];
        } else {
            $this->cui = "";
        }
    }

    function get_term() {
        if (isset($this->token_array[0])) {
            return $this->token_array[0];
        }
    }

    function get_tag() {
        if (isset($this->token_array[1])) {
            return $this->token_array[1];
        }
    }

    function get_cui() {
        if ($this->_is_cui_tagged($this->token) && isset($this->token_array[2])) {
            return $this->token_array[2];
        }
        return "";
    }

    function _is_cui_tagged($token_string) {

        if (preg_match("/(C[0-9]{7})/", $token_string)) {
            return true;
        }
        return false;
    }

    // end of token class
}

/** ************************************************
** PHRASE CLASS
** $Author: Herman Tolentino MD
** $Last update: 9/15/05
*/
class phrase {

    var $input;
    var $sentences;
    var $phrases;
    var $lookahead;
    var $sentence_count;
    var $phrase_stats;

    function phrase($inputstring) {
        $this->input = $inputstring;
        $this->sentence_split();

    }

    function get_sentences() {
        return $this->sentences;
    }

    function sentence_split() {
        $this->sentences = preg_split("/(\._\.)|(\?_\.)/", $this->input);
        $this->sentence_count = count($this->sentences);
    }

    function get_phrase($current_index, $token_array, $lookahead) {

        $j = 0;
        $term = "";
        for ($i=$current_index; $i<count($token_array); $i++) {
            $token = new token($token_array[$i]);
            $term .= $token->get_term()." ";
            $tag = $token->get_tag();
            $phrase[] = $term;
            $j++;
            if ($j>$lookahead) {
                break;
            }
        }
        if (is_array($phrase)) {
            array_unique($phrase);
            return $phrase;
        } else {
            return array();
        }
    }

    function compose($phrase, $cui) {

        $terms = explode(" ", $phrase);
        foreach($terms as $term) {
            $cleaned[] = trim($term);
        }
        if (count($terms)>0) {
            $new_term = implode("|", $cleaned)."_NNPHR_".$cui;
            return $new_term;
        }
        return $terms[0]."_".$cui;
    }

    function format() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $inputstring = $arg_list[3];

            $lines = explode(" ", $inputstring);
            $i = 0;
            foreach($lines as $token) {
                $i++;
                list($phrase, $tag, $cui) = explode("_", $token);
                $new_string[] = $phrase."_".$tag."_<a href='".$_SERVER["PHP_SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$phrase."_".$tag."_".$cui."&phr=".$i."#cuidetail'><b>".$cui."</b></a><br>";
            }

            $outputstring = implode(" ", $new_string);

            return $outputstring;
        }
    }

    function get_phrase_tokens() {
        return $this->phrase_stats["phrase_tokens"];
    }

    function get_phrase_terms() {
        if (isset($this->phrase_stats["phrase_terms"])) {
            return $this->phrase_stats["phrase_terms"];
        }
    }

    function detect() {

        $sentence_phrases = array();

        if (func_num_args()>0) {
            $arglist = func_get_args();
            if (isset($arglist[0])) {
                $sentence_array = $arglist[0];
            }
        } else {
            $sentence_array = $this->sentences;
        }
        print_r($sentence_array);

        $this->lookahead = 4;
        $c = new concept();
        foreach ($sentence_array as $sentence) {
            $token_array = preg_split("/(:_:)|(,_,)|(-_SYM)|(\/_SYM)|(\s)/", $sentence);
            for ($i=0; $i<count($token_array); $i++) {
                $phrase = $this->get_phrase($i, $token_array, $this->lookahead);
                $phrase = array_unique($phrase);
                foreach($phrase as $term) {
                    // no need to sort this as cached cui table has string in
                    // original sort order
                    $term = trim($term);
                    if (count(explode(" ", $term))>1) {
                        $list_cached = $c->get_list_cached_cui($term);
                        if (count($list_cached)==1) {
                            $sentence_phrases[] = $this->compose($term, $list_cached[0]);
                            $this->phrase_stats["phrase_terms"][] = $term;
                            break;
                        } elseif(count($list_cached)>1) {
                            if ($final_cui = $c->disambiguate_concept($list_cached, $term)) {
                                $sentence_phrases[] = $this->compose($term, $final_cui);
                                $this->phrase_stats["phrase_terms"][] = $term;
                                break;
                            }
                        } elseif (count($list_cached)==0) {
                            $sort_string = $this->sort_string($term);
                            $list_umls_ns = $c->get_list_umls_nscui($sort_string);
                            if (count($list_umls_ns)==1) {
                                $sentence_phrases[] = $this->compose($term, $list_umls_ns[0]);
                                $this->phrase_stats["phrase_terms"][] = $term;
                                break;
                            } elseif (count($list_umls_ns)>1) {
                                if ($final_cui = $c->disambiguate_concept($list_umls_ns, $sort_string)) {
                                    $sentence_phrases[] = $this->compose($term, $final_cui);
                                    $this->phrase_stats["phrase_terms"][] = $term;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
        if (is_array($sentence_phrases)) {
            $this->phrase_stats["phrases_tokens"] = $sentence_phrases;
            $return_sentence_phrase = implode(" ", array_unique($sentence_phrases));
            return $return_sentence_phrase;
        } else {
            return "";
        }

    }

    function sort_string($term) {

        $words = explode(" ", $term);
        foreach ($words as $key=>$value) {
            $term_array[] = $value;
            if (count($term_array)>1) {
                asort($term_array);
                $sorted_phrase = implode(" ", $term_array);
                return $sorted_phrase;
            }
        }
    }

    // end of phrase class

}



/** ************************************************
** INVENTORY CLASS
** $Author: Herman Tolentino MD
** $Last update: 9/15/05
*/

class inventory {

    var $input;
    var $docid;
    var $term_tokens;
    var $phrase_tokens;
    var $token_count;
    var $phrase_text;

    function inventory($inputstring, $document_id) {

        $this->input = $inputstring;
        $this->docid = $document_id;
        $this->term_tokens = $this->tokenize($this->input);
        $this->phrase_text = $this->get_phrase_text($this->docid);
    }

    function tokenize($input) {
        $tokens = preg_split("/(\s+)|(\._\.)|(,_,)|(:_:)|( )/", $input);
        return $tokens;
    }

    function clear_terms() {
        module::execsql("delete from m_textlab_terms where document_id = '".$this->docid."'");
        module::execsql("delete from m_textlab_pos where document_id = '".$this->docid."'");
        module::execsql("delete from m_textlab_vocabulary where document_id = '".$this->docid."'");
    }

    function term_search($document_id, $cui) {
        $sql = "select term from m_textlab_terms ".
               "where document_id = '$document_id' and ".
               "concept_id = '$cui'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($term) = mysql_fetch_array($result);
                return $term;
            } else {
                return false;
            }
        }
    }

    function term_lookup($document_id, $term, $tag, $cui) {

        $sql = "select * from m_textlab_terms ".
               "where document_id = '$document_id' and ".
               "term = '".strtolower($term)."' and ".
               "concept_id = '$cui' and ".
               "treebank_id = '$tag'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                return true;
            } else {
                return false;
            }
        }
    }

    function term_update($document_id, $term, $tag, $cui) {

        $sql_update = "update m_textlab_terms ".
                      "set frequency = frequency +1 ".
                      "where document_id = '$document_id' and ".
                      "term = '".strtolower($term)."' and ".
                      "concept_id = '$cui' and ".
                      "treebank_id = '$tag'";
        if ($result_update = mysql_query($sql_update)) {
            return true;
        }
    }

    function term_add($document_id, $term, $tag, $cui) {

        if (strlen(trim($term))<>0 && $tag<>"MC" && $tag<>":" && $tag<>"SYM") {
            $sql_insert = "insert into m_textlab_terms (document_id, term, concept_id, treebank_id, frequency) ".
                          "values ('$document_id', '".strtolower($term)."', '$cui', '$tag', '1')";
            if ($result_insert = mysql_query($sql_insert)) {
                return true;
            }
        }
    }

    function get_global_term_count() {

        $sql = "select sum(frequency) from m_textlab_terms";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($count) = mysql_fetch_array($result);
                return $count;
            }
        }
        return 0;
    }

    function get_concept_count_terms() {

        $sql = "select count(*) from m_textlab_terms ".
               "where concept_id<>'' and concept_id<>'C0000000' ".
               "and treebank_id<>'NNPHR' and document_id = '".$this->docid."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($count) = mysql_fetch_array($result);
                return $count;
            } else {
                return 0;
            }
        }
    }

    function get_concept_count_phrases() {

        $sql = "select count(*) from m_textlab_terms ".
               "where concept_id<>'' and concept_id<>'C0000000' ".
               "and treebank_id='NNPHR' and document_id = '".$this->docid."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($count) = mysql_fetch_array($result);
                return $count;
            } else {
                return 0;
            }
        }
    }

    function get_term_count() {

        $sql = "select count(*) from m_textlab_terms ".
               "where document_id = '".$this->docid."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($count) = mysql_fetch_array($result);
                return $count;
            } else {
                return 0;
            }
        }
    }

    function update_stats() {

        $concept_count_terms = $this->get_concept_count_terms();
        $concept_count_phrases = $this->get_concept_count_phrases();
        $term_count = $this->get_term_count();

        $sql = "update m_textlab_document set ".
               "term_count = '$term_count', ".
               "concept_count_terms = '$concept_count_terms', ".
               "concept_count_phrases = '$concept_count_phrases' ".
               "where document_id = '".$this->docid."'";
        if ($result = mysql_query($sql)) {
            return true;
        }
    }

    function get_phrase_text($document_id) {

        $sql = "select phrased_text from m_textlab_document where document_id = '$document_id'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($phrase_text) = mysql_fetch_array($result)) {
                    return $phrase_text;
                }
            } else {
                return "";
            }
        }

    }

    function pos_lookup($doc_id, $term, $tag, $ltag, $rtag, $cui) {

        $sql = "select * from m_textlab_pos where ".
               "document_id = '$doc_id' and ".
               "term ='".strtolower($term)."' and ".
               "term_tag = '$tag' and ".
               "left_tag = '$ltag' and ".
               "right_tag = '$rtag' and ".
               "concept_id = '$cui'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                return true;
            }
        }
        return false;
    }

    function pos_update($doc_id, $term, $tag, $ltag, $rtag, $cui) {

        $sql = "update m_textlab_pos set ".
               "frequency = frequency + 1 where ".
               "document_id = '$doc_id' and ".
               "term ='".strtolower($term)."' and ".
               "term_tag = '$tag' and ".
               "left_tag = '$ltag' and ".
               "right_tag = '$rtag' and ".
               "concept_id = '$cui'";
        if ($result = mysql_query($sql)) {
            return true;
        }
        return false;
    }

    function pos_add($doc_id, $term, $tag, $ltag, $rtag, $cui) {

        if ($doc_id && $term && $tag && $cui) {
            $sql = "insert into m_textlab_pos (document_id, term, term_tag, left_tag, right_tag, frequency, concept_id) ".
                   "values ('$doc_id', '".strtolower($term)."', '$tag', '$ltag', '$rtag', '1', '$cui')";
            if ($result = mysql_query($sql)) {
                return true;
            }
        }
        return false;
    }

    function vocab_lookup($doc_id, $src, $type) {

        $sql = "select * from m_textlab_vocabulary where ".
               "document_id = '$doc_id' and ".
               "source = '$src' and ".
               "stype = '$type'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                return true;
            }
        }
        return false;
    }

    function vocab_update($doc_id, $src, $type) {

        $sql = "update m_textlab_vocabulary set ".
               "frequency = frequency + 1 where ".
               "document_id = '$doc_id' and ".
               "source = '$src' and ".
               "stype = '$type'";
        if ($result = mysql_query($sql)) {
            return true;
        }

    }

    function vocab_add($doc_id, $src, $type) {

        $sql = "insert into m_textlab_vocabulary (document_id, source, stype, frequency) ".
               "values ('$doc_id', '$src', '$type', '1')";
        if ($result = mysql_query($sql)) {
            return true;
        }
    }

    function get_semantic_type($cui) {

        $sql = "select TUI from umls.MRSTY where CUI = '$cui'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($tui) = mysql_fetch_array($result)) {
                    return $tui;
                }
            }
        }
    }

    function get_source($cui) {

        $sql = "select SAB from umls.MRCONSO where CUI = '$cui' and LAT = 'ENG'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($source) = mysql_fetch_array($result)) {
                    $source_array[] = $source;
                }
                if (is_array($source_array) && count($source_array)>0) {
                    return $source_array;
                }
            }
        }

    }

    function update() {

        for($i=0; $i<count($this->term_tokens); $i++) {
            $term_token = new token($this->term_tokens[$i]);
            $term = $term_token->get_term();
            $tag = $term_token->get_tag();
            $cui = $term_token->get_cui();

            $left_token = new token($this->term_tokens[$i-1]);
            $left_tag = $left_token->get_tag();

            $right_token = new token($this->term_tokens[$i+1]);
            $right_tag = $right_token->get_tag();

            // STEP 3: LOOKUP TERM IF IT IS ALREADY THERE
            if ($tag<>"MC") {
                if ($this->term_lookup($this->docid, $term, $tag, $cui)) {
                    $this->term_update($this->docid, $term, $tag, $cui);
                } else {
                    // STEP 3B: IF NOT THERE INSERT A NEW RECORD
                    $this->term_add($this->docid, $term, $tag, $cui);
                }
                // UPDATE POS TABLE
                if ($this->pos_lookup($this->docid, $term, $tag, $left_tag, $right_tag, $cui)) {
                    $this->pos_update($this->docid, $term, $tag, $left_tag, $right_tag, $cui);
                } else {
                    $this->pos_add($this->docid, $term, $tag, $left_tag, $right_tag, $cui);
                }
                // UPDATE VOCABULARY TABLE
                $semtype = $this->get_semantic_type($cui);
                if ($source_array = $this->get_source($cui)) {
                    foreach ($source_array as $source) {
                        if ($this->vocab_lookup($this->docid, $source, $semtype)) {
                            $this->vocab_update($this->docid, $source, $semtype);
                        } else {
                            $this->vocab_add($this->docid, $source, $semtype);
                        }
                    }
                }
            }

        }

        $this->phrase_tokens = explode(" ", $this->phrase_text);
        for ($i = 0; $i<count($this->phrase_tokens); $i++) {
            $phrase_token = new token($this->phrase_tokens[$i]);
            $phrase = $phrase_token->get_term();
            $tag = $phrase_token->get_tag();
            $cui = $phrase_token->get_cui();
            $phrase = str_replace("|", " ", $phrase);
            if ($this->term_lookup($this->docid, $phrase, $tag, $cui)) {
                $this->term_update($this->docid, $phrase, $tag, $cui);
            } else {
                $this->term_add($this->docid, $phrase, $tag, $cui);
            }
        }

        $this->update_stats();
        return true;
    }

    // end of inventory class
}
?>