<?
class umlstools extends module {

    //
    // Author: Herman Tolentino (unless specified)
    //
    // AEFI NLP Project
    // Vaccine Analytic Unit - Brighton Collaboration - National Library of Medicine
    //
    // IMPORTANT:
    // This assumes that the UMLS Lexicon tables have been set up and that the LRAGR table
    // has been indexed using STR.
    //

    function umlstools() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "umlstools";
        $this->description = "AEFI Module - UMLS Tools";
        // 0.1: BEGIN

    }

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
        module::set_dep($this->module, "textlab");

    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("LBL_CUI_DETAIL_FOR_TOKEN:", "english", "CUI DETAIL FOR TOKEN", "Y");
        module::set_lang("LBL_CHANGE_TYPE", "english", "TYPE OF CHANGE", "Y");
        module::set_lang("INSTR_CHANGE_TYPE", "english", "SELECT WHETHER CHANGE WILL BE GLOBAL OR TO SPECIFIC ITEM ONLY.", "Y");
        module::set_lang("FTITLE_MANAGE_CUI", "english", "DOCUMENT CONCEPT MANAGER", "Y");
        module::set_lang("INSTR_MANAGE_CUI", "english", "SELECT APPROPRIATE CONCEPT AND PART-OF-SPEECH TAG TO ASSIGN TO TERM. THE FIGURE AFTER THE CUI IS ITS AVERAGE DISTANCE FROM ALL OTHER CONCEPTS IN THE DOCUMENT.", "Y");

    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }


        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }


    }

    function drop_tables() {

    }


    // --------------- CUSTOM MODULE FUNCTIONS ------------------


    function inventory_lookup() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $document_id = $arg_list[0];
            $term = $arg_list[1];
            $cui = $arg_list[2];
            $tag = $arg_list[3];

            $sql = "select * from m_textlab_terms ".
                   "where document_id = '$document_id' and ".
                   "term = '$term' and ".
                   "concept_id = '$cui' and ".
                   "treebank_id = '$tag'";
             if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    return true;
                } else {
                    return false;
                }
            }
         }
    }

    function inventory_term_update($document_id, $term, $cui, $tag) {

        $sql_update = "update m_textlab_terms ".
                      "set frequency = frequency +1 ".
                      "where document_id = '$document_id' and ".
                      "term = '$term' and ".
                      "concept_id = '$cui' and ".
                      "treebank_id = '$tag'";
        if ($result_update = mysql_query($sql_update)) {
            return true;
        }
    }

    function inventory_term_add($document_id, $term, $cui, $tag) {

        if (strlen(trim($term))<>0 && $tag<>"MC" && $tag<>":" && $tag<>"SYM") {
            $sql_insert = "insert into m_textlab_terms (document_id, term, concept_id, treebank_id, frequency) ".
                          "values ('$document_id', '$term', '$cui', '$tag', '1')";
            if ($result_insert = mysql_query($sql_insert)) {
                return true;
            }
        }

    }

    /**
    *   creates bigram inventory, bigram being concept pairs and
    *   their frequency in being found as pairs
    *
    */
    function bigram_inventory() {

        // STEP 1: delete all table contents of the m_textlab_bigrams table
        module::execsql("truncate m_textlab_bigrams");

        // STEP 2: do a recursive query on m_textlab_terms table and get
        // frequency of co-occurrence
        $sql1 = "select t1.concept_id, t2.concept_id, count(t1.concept_id) ".
                "from m_textlab_terms t1, m_textlab_terms t2  ".
                "where t1.concept_id <> t2.concept_id and t1.concept_id <> '' ".
                "and t2.concept_id <> '' and t1.document_id = t2.document_id ".
                "group by t1.concept_id, t2.concept_id;";
        if ($result1 = mysql_query($sql1)) {
            if (mysql_num_rows($result1)) {
                while(list($concept1_id, $concept2_id, $frequency) = mysql_fetch_array($result1)) {
                    // STEP 3: determine node distance between 2 concepts
                    $distance = umlstools::node_distance($concept1_id, $concept2_id);

                    // STEP 4: update m_textlab_bigrams table
                    // do an update or insert
                    $sql_insert = "insert into m_textlab_bigrams (concept1_id, concept2_id, frequency, distance) ".
                                  "values ('$concept1_id', '$concept2_id', '$frequency', '$distance')";
                    if (!($result_insert = mysql_query($sql_insert))) {
                        $sql_update = "update m_textlab_bigrams set ".
                                      "frequency = frequency + 1, ".
                                      "where concept1_id = '$concept1_id' and concept2_id = '$concept2_id'";
                        $result_update = mysql_query($sql_update);
                    }
                }
            }
        }
    }

    function term_inventory() {
    //
    // parameters: concept tagged text
    // action: saves terms, tags and treebank id's in table
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $document_id = $arg_list[1];

            $tokens = preg_split("/(\s+)|(\._\.)|(,_,)|(:_:)|( )/", $inputstring);

            // STEP 1: CLEAR DOCUMENT ENTRIES FIRST FROM INVENTORY
            module::execsql("delete from m_textlab_terms where document_id = '$document_id'");
            for($i=0; $i<count($tokens); $i++) {

                // STEP 2: TOKENIZE INPUT TAGGED TERM
                $parts = explode("_", $tokens[$i]);
                $term = $parts[0];
                $tag = $parts[1];
                if (count($parts)>2) {
                    $cui = trim($parts[2]);
                } else {
                    $cui = "";
                }

                // STEP 3: LOOKUP TERM IF IT IS ALREADY THERE
                if (umlstools::inventory_lookup($document_id, $term, $cui, $tag)) {
                    umlstools::inventory_term_update($document_id, $term, $cui, $tag);
                } else {
                    // STEP 3B: IF NOT THERE INSERT A NEW RECORD
                    umlstools::inventory_term_add($document_id, $term, $cui, $tag);
                }


            }
            umlstools::inventory_phrase_update($document_id);
            return true;
        }
    }

    function inventory_phrase_update($document_id) {

        $sql = "select phrased_text from m_textlab_document where document_id = '$document_id'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                if (list($phrased_text) = mysql_fetch_array($result)) {

                    $lines = explode(" ", $phrased_text);
                    foreach($lines as $token) {
                        list($phrase, $tag, $cui) = explode("_", $token);
                        $phrase = str_replace("|", " ", $phrase);
                        if (umlstools::inventory_lookup($document_id, $phrase, $cui, $tag)) {
                            umlstools::inventory_term_update($document_id, $phrase, $cui, $tag);
                        } else {
                            umlstools::inventory_term_add($document_id, $phrase, $cui, $tag);
                        }
                    }
                }
            }
        }

    }

    function lookup_noun_phrases($tokens, $current_pos, $lookahead_num) {

        list($term0, $tag0, $cui) = explode("_", $tokens[$current_pos]);
        $compound_term = $term0;

        for ($j=1; $j<$lookahead_num; $j++) {
            list($term1, $tag1, $cui1) = explode("_", $tokens[$current_pos+$j]);

            // STEP 3: create compound token
            $compound_term .= " ".$term1;

            // STEP 4: normalize compound term
            $norm_compound = explode(" ", $compound_term);
            if (sort($norm_compound)) {
                $compound_term = implode(" ", $norm_compound);
            }

            // STEP 5: look it up in normalized strings
            $sql = "select NSTR, CUI ".
                   "from umls.MRXNS_ENG ".
                   "where LAT = 'ENG' and NSTR like '".trim(strtolower($compound_term))."'";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    while (list($ns, $cui) = mysql_fetch_array($result)) {
                        if (count(explode(" ", $ns))>1) {
                            //$ns = tagger::tagtext($ns, "text");
                            $phrases[] .= "$cui|$ns";
                        }
                    }
                }
            }

        }
        return $phrases;

    }

    function nounphrase_format() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $inputstring = $arg_list[3];

            $lines = explode(" ", $inputstring);
            foreach($lines as $token) {
                $i++;
                list($phrase, $tag, $cui) = explode("_", $token);
                $new_string[] = $phrase."_".$tag."_<a href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$phrase."_".$tag."_".$cui."&phr=".$i."#cuidetail'><b>".$cui."</b></a><br>";
            }

            $outputstring = implode("", $new_string);

            return $outputstring;
        }
    }

    function tag_highlight() {
    //
    // input: concept tagged text
    // output: html marked text
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $inputstring = $arg_list[3];
            $text_type = $arg_list[4];

            $tokens = explode(" ", $inputstring);
            foreach ($tokens as $item) {
            $i++;
            $search[0] = "/\b([a-zA-Z0-9]+)(_N)([a-zA-Z0-9_]+)\b/"; // nouns
            $search[1] = "/\b([a-zA-Z0-9]+)(_V)([a-zA-Z0-9_]+)\b/"; // verbs
            $search[2] = "/\b([a-zA-Z0-9]+)(_J)([a-zA-Z0-9_]+)\b/"; // adjectives
            //$search[3] = "/\b([a-zA-Z0-9]+)(_R)([a-zA-Z0-9_]+)\b/"; // adverbs
            //$search[3] = "/\b([a-zA-Z0-9]+)(_I)([a-zA-Z0-9_]+)\b/"; // adverbs
            $search[3] = "/\b([Nn]o_D[DB])\b/"; // negatives
            $search[4] = "/\b([Uu]nlikely_RR)\b/"; // negatives
            $search[5] = "/\b([a-zA-Z0-9]+)(_[A-Z]{2,4})(_)(C[0-9]{7})\b/";

            $replace[0] = "<span style='background-color:#99CCFF'>$1$2$3</span>";
            $replace[1] = "<span style='background-color:#FFCC00'>$1$2$3</span>";
            $replace[2] = "<span style='background-color:#CCFF66'>$1$2$3</span>";
            //$replace[3] = "<span style='background-color:#FF6600'>$1$2$3</span>";
            //$replace[3] = "<span style='background-color:#CCCCFF'>$1$2$3</span>";


            $replace[3] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
            $replace[4] = "<span style='background-color:#FF99FF'>$1$2$3</span>";
            $replace[5] = "$1$2$3<a href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=$1$2$3$4&seq=".$i."&text=$text_type#cuidetail'><b>$4</b></a>";

            $colored_item = preg_replace($search, $replace, $item);

            $colored_text[] = $colored_item;
            }

            $markedup = implode(" ", $colored_text);

            return $markedup;
        }
    }

    function concept_list() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];

            $sql = "select concept_id from m_textlab_terms where concept_id <> '' and document_id = '".$get_vars["document_id"]."'";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    while (list($concept) = mysql_fetch_array($result)) {
                        $i++;
                        $num = str_pad($i, 3, "0", STR_PAD_LEFT);
                        $concept_list .= "<a href='".$_SERVER[PHP_SELF]."?page=PROCESSING&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&tab=".$get_vars["tab"]."&cui=".$concept."' class='groupmenu'>$concept</a><br>";
                    }
                }
            }
            return $concept_list;
        }
    }

    function concept_count() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $list = preg_match_all("/(_)(C[0-9]{7})\b/", $inputstring, $matches);
            return count(array_unique($matches[2]));
        }
    }

    function concept_tag() {
    //
    // makes use of UMLS MRXNS_ENG and MRXNW_ENG
    // make sure input is POS tagged text
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $tokens = preg_split("/[\s]+/", $inputstring);
            //print_r($tokens);
            for($i=0; $i<count($tokens); $i++) {
                // match only selected tags
                if (preg_match("/(_[NVJRC])/", $tokens[$i])) {
                    list($searchterm, $tag) = explode("_", $tokens[$i]);
                    if ($baseform = umlstools::baseform(strtolower($searchterm))) {
                        $sql = "select s.CUI ".
                               "from umls.MRXNS_ENG s ".
                               "where s.LAT = 'ENG' ".
                               "and s.NSTR like '".strtolower($baseform)."'";
                    } else {
                        $sql = "select s.CUI ".
                               "from umls.MRXNS_ENG s ".
                               "where s.LAT = 'ENG' ".
                               "and s.NSTR like '".strtolower($searchterm)."'";
                    }
                    if ($result = mysql_query($sql)) {
                        if (mysql_num_rows($result)) {
                            if (list($cui) = mysql_fetch_array($result)) {
                                $newtoken = $searchterm."_".$tag."_".$cui;
                                $concept_array[] .= $newtoken;
                                //print "<b>".$tokens[$i]."</b><br>";
                            }
                            $tokens[$i] = $newtoken;
                        } else {
                            // lookup normalized words
                            if ($baseform = umlstools::baseform(strtolower($searchterm))) {
                                $sqlw = "select w.CUI ".
                                        "from umls.MRXNW_ENG w ".
                                        "where w.LAT = 'ENG' ".
                                        "and w.NWD like '".strtolower($baseform)."'";
                            } else {
                                $sqlw = "select w.CUI ".
                                        "from umls.MRXNW_ENG w ".
                                        "where w.LAT = 'ENG' ".
                                        "and w.NWD like '".strtolower($searchterm)."'";
                            }
                            if ($result = mysql_query($sql)) {
                                if (mysql_num_rows($result)) {
                                    if (list($cui) = mysql_fetch_array($result)) {
                                        $newtoken = $searchterm."_".$tag."_".$cui;
                                        $concept_array[] .= $newtoken;
                                        //print "<b>".$tokens[$i]."</b><br>";
                                    }
                                    $tokens[$i] = $newtoken;
                                }
                            }
                        }
                    }
                }
            } // end for loop
            $taggedstring = implode(" ", $tokens);
            return $taggedstring;
        }
    }

    function get_final_cui($array_distance, $array_cui) {

        asort($array_distance);
        foreach ($array_distance as $key=>$value) {
            return $array_cui[$key];
        }
    }

    function predict_concept_tags() {
    //
    // makes use of UMLS MRXNS_ENG and MRXNW_ENG
    // make sure input is POS tagged text
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $tokens = preg_split("/[\s]+/", $inputstring);
            //print_r($tokens);

            for($i=0; $i<count($tokens); $i++) {
                // match only selected tags
                if (preg_match("/(_[NVJR])/", $tokens[$i])) {
                    list($searchterm, $tag) = explode("_", $tokens[$i]);
                    if ($baseform = umlstools::baseform(strtolower($searchterm))) {
                        $sql = "select s.CUI ".
                               "from umls.MRXNS_ENG s ".
                               "where s.LAT = 'ENG' ".
                               "and s.NSTR like '".strtolower($baseform)."'";
                    } else {
                        $sql = "select s.CUI ".
                               "from umls.MRXNS_ENG s ".
                               "where s.LAT = 'ENG' ".
                               "and s.NSTR like '".strtolower($searchterm)."'";
                    }
                    if ($result = mysql_query($sql)) {
                        if (mysql_num_rows($result)) {
                            $d = array();
                            $c = array();
                            while (list($cui) = mysql_fetch_array($result)) {
                                $distance = umlstools::document_distance($cui);
                                //$cooccurrence = umlstools::document_bigram_probability($cui);
                                $d[] = $distance;
                                $c[] = $cui;
                                //$b[] = $cooccurrence;
                                //print "TOKEN: $searchterm = $cui<br>";
                                //print "DISTANCE: $distance<br>";
                                //print "BIGRAM: $cooccurrence<br>";
                                //$rank["$cui"]["probability"][] = $coocurrence;
                            }
                            $final_cui = umlstools::get_final_cui($d, $c);
                            $newtoken = $searchterm."_".$tag."_".$final_cui;
                            //print "FINAL DISTANCE: $final_distance<br>";
                            //print "FINAL BIGRAM: $final_bigram<br>";
                            //print "FINAL TOKEN: $newtoken<br><br>";
                            $concept_array[] .= $newtoken;
                            $tokens[$i] = $newtoken;
                        } else {
                            // lookup normalized words
                            if ($baseform = umlstools::baseform(strtolower($searchterm))) {
                                $sqlw = "select w.CUI ".
                                        "from umls.MRXNW_ENG w ".
                                        "where w.LAT = 'ENG' ".
                                        "and w.NWD like '".strtolower($baseform)."'";
                            } else {
                                $sqlw = "select w.CUI ".
                                        "from umls.MRXNW_ENG w ".
                                        "where w.LAT = 'ENG' ".
                                        "and w.NWD like '".strtolower($searchterm)."'";
                            }
                            if ($result = mysql_query($sql)) {
                                if (mysql_num_rows($result)) {
                                    $d = array();
                                    $c = array();
                                    while (list($cui) = mysql_fetch_array($result)) {
                                        $distance = umlstools::document_distance($cui);
                                        $d[] = $distance;
                                        $c[] = $cui;
                                    }
                                    $final_cui = umlstools::get_final_cui($d, $c);
                                    $newtoken = $searchterm."_".$tag."_".$final_cui;
                                    //print "FINAL DISTANCE: $final_distance<br>";
                                    //print "FINAL BIGRAM: $final_bigram<br>";
                                    //print "FINAL TOKEN: $newtoken<br><br>";
                                    $concept_array[] .= $newtoken;
                                    $tokens[$i] = $newtoken;
                                }
                            }
                        }
                    }
                }
            } // end for loop
            $taggedstring = implode(" ", $tokens);
            return $taggedstring;
        }
    }

    function phrase_detect() {
    //
    // input: concept tagged text
    // output: line listing of noun phrases
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            // tokenize
            $tokens = preg_split("/[\s,]+/", $inputstring);
            $maxtokens = 4; // look ahead two terms

            for($i=0; $i<count($tokens)-$maxtokens; $i++) {

                // STEP 1: get base token
                list($term, $tag, $cui) = explode("_", $tokens[$i]);
                $compound_term = $term;

                // STEP 2: get the next token
                // $maxtokens set look ahead number of x tokens
                $tokencount = 0;
                for ($j=1; $j<=$maxtokens; $j++) {
                    list($term, $tag, $cui) = explode("_", $tokens[$i+$j]);

                    // STEP 3: create compound token
                    $compound_term .= " ".$term;

                    // STEP 4: normalize compound term
                    $norm_compound = explode(" ", $compound_term);
                    if (sort($norm_compound)) {
                        $norm_term = implode(" ", $norm_compound);
                    }

                    // STEP 5: look it up in normalized strings
                    $sql = "select NSTR, CUI ".
                           "from umls.MRXNS_ENG ".
                           "where LAT = 'ENG' and NSTR like '".trim(strtolower($norm_term))."'";
                    if ($result = mysql_query($sql)) {
                        if (mysql_num_rows($result)) {
                            if (list($ns, $cui) = mysql_fetch_array($result)) {
                                if (count(explode(" ", $ns))>1) {
                                    $compound_term = trim($compound_term);
                                    $compound_term = str_replace(" ", "|", $compound_term);
                                    $phrases[] .= $compound_term."_NNPHR_".$cui;
                                }
                            }
                        }
                    }

                } // $j loop

            } // $i loop
            if (count($phrases)>0) {
                return implode(" ", array_unique($phrases));
            }
        }
    }

    function document_bigram_probability() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $concept = $arg_list[0];
            $document_id = $arg_list[1];
        }
        $sql = "select concept_id from m_textlab_terms where concept_id <> ''";
        $cui_array = array();
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui) = mysql_fetch_array($result)) {
                    if (!($probability = umlstools::bigram_probability($cui, $concept))) {
                        $probability = 0;
                    }
                    $cui_array[] = $probability;
                }
            }
            //print_r($cui_array);
            if (count($cui_array)==0) {
                return 0;
            } else {
                $mean_probability = array_sum($cui_array)/count($cui_array);
                return $mean_probability;
            }
        }
    }

    function document_distance() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $concept = $arg_list[0];
            $document_id = $arg_list[1];
        }
        if ($document_id) {
            $sql = "select concept_id from m_textlab_terms where document_id = '$document_id'";
        } else {
            $sql = "select concept_id from m_textlab_terms";
        }
        $cui_array = array();
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($cui) = mysql_fetch_array($result)) {
                    if (!($distance = umlstools::learned_node_distance($cui, $concept))) {
                        $distance = umlstools::node_distance($cui, $concept);
                    }
                    $cui_array[] = $distance;
                }
            }
            //print_r($cui_array);
            if (count($cui_array)==0) {
                return 0;
            } else {
                $mean_distance = array_sum($cui_array)/count($cui_array);
                return $mean_distance;
            }
        }
    }

    function bigram_probability($concept1, $concept2) {

        $sql_n = "select sum(frequency) from m_textlab_bigrams";
        if ($result_n = mysql_query($sql_n)) {
            if (mysql_num_rows($result_n)) {
                list($n) = mysql_fetch_array($result_n);
            }
        }

        $sql = "select frequency from m_textlab_bigrams ".
               "where concept1_id = '$concept1' and concept2_id = '$concept2'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($frequency) = mysql_fetch_array($result);
            }
        }
        if ($frequency>0 && $n>0) {
            return ($frequency/$n);
        } else {
            return 0;
        }

    }

    function learned_node_distance($concept1, $concept2) {

        $sql = "select distance from m_textlab_bigrams ".
               "where concept1_id = '$concept1' and concept2_id = '$concept2'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($distance) = mysql_fetch_array($result);
                return $distance;
            }
        }
    }

    function node_distance($concept1, $concept2) {
    //
    // compute semantic distance between two concepts
    // input: two concepts
    // output: number of nodes distance using semantic tree number
    //
    // concept: Traversing the Semantic Network tree nodes and counting
    // How many nodes you need to traverse to get to the other concept?
    // This uses the "isa" class hierarchy of the semantic network as
    // represented by the semantic tree number, STN.
    //

        // STEP 1: GET THE CUI'S AND TREE NUMBERS FOR concept 1
        $sql1 = "select CUI, STN from umls.MRSTY where CUI = '$concept1' order by char_length(STN)";
        if ($result1 = mysql_query($sql1)) {
            if (mysql_num_rows($result1)) {
                if (list($cui, $stn) = mysql_fetch_array($result1)) {
                    // STEP 2: STORE THEM IN CUI ARRAY
                    $stn = preg_replace("/\b(A|B)([0-9]{1})(\.)/", "$1.$2.", $stn);
                    $node["$cui"] = explode(".", $stn);
                }
            }
        }
        $sql2 = "select CUI, STN from umls.MRSTY where CUI = '$concept2' order by char_length(STN)";
        if ($result2 = mysql_query($sql2)) {
            if (mysql_num_rows($result2)) {
                if (list($cui, $stn) = mysql_fetch_array($result2)) {
                    // STEP 2: STORE THEM IN CUI ARRAY
                    $stn = preg_replace("/\b(A|B)([0-9]{1})(\.)/", "$1.$2.", $stn);
                    $node["$cui"] = explode(".", $stn);
                }
            }
        }
        // STEP 3: GET ARRAY COUNTS, ASSUME TOTAL IS DISTANCE FROM END TO END
        $d1 = count($node["$concept1"]);
        $d2 = count($node["$concept2"]);
        $diff = abs($d1-$d2);

        // STEP 4: USE LEAST ARRAY COUNT AS MAX FOR LOOP
        $least = ($d1<=$d2?$d1:$d2);

        // STEP 5: IMPLEMENT TREE LEVEL LOOP TO STEP FROM ONE LEVEL TO ANOTHER
        $i = 0;
        for ($i=0; $i<$least; $i++) {
            if ($node["$concept1"][0]<>$node["$concept2"][0]) {
                $topnode = 1;
            } else {
                $topnode = 0;
            }
            if ($node["$concept1"][$i]===$node["$concept2"][$i]) {
                // STEP 6: IF LEVELS ARE EQUAL DECREMENT DISTANCE
                $d1--;
                $d2--;
            }
        }

        // STEP 7: RETURN REMAINING NODES
        return $d1+$d2+$diff+$topnode;
    }

    function baseform() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $searchterm = $arg_list[0];

            $sql = "select BAS from umls.LRAGR where STR like '$searchterm' limit 10";
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    if (list($baseform) = mysql_fetch_array($result)) {
                        return $baseform;
                    }
                }
            }
        }
    }

    function cuilinks() {
    // input is concept tagged text
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $concept_text = $arg_list[3];

            $tokens = explode(" ", $concept_text);
            //print_r($tokens);
            for($i=0; $i<count($tokens); $i++) {
                if (preg_match("/_C/", $tokens[$i])) {
                    $tokens[$i] = preg_replace("/^([a-zA-Z0-9_]+_(C[0-9]{7}))$/", "<a href='".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$get_vars["document_id"]."&token=$1#cuidetail'>$1</a>", $tokens[$i]);
                }
            }
            $tokens_merged = implode(" ", $tokens);
            return $tokens_merged;
        }
    }

    function cui_manager() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $textlab = $arg_list[3];

            if ($post_vars["submitcui"]) {
                umlstools::process_manage_cui($menu_id, $post_vars, $get_vars, $textlab);
            }
            umlstools::form_manage_cui($menu_id, $post_vars, $get_vars, $textlab);

        }
    }

    function mark_tagged($term, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $unit) {
            $i++;
            $new_text[] = $unit."<span style='color:#A0A0A0; font-weight: bold;'>$i</span>";
        }
        $itemized = implode(" ", $new_text);
        return preg_replace("/\b(".$term.")/", "<span class='highlight'>$1</span>", $itemized);
    }

    function process_manage_cui() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $textlab = $arg_list[3];

            //print_r($post_vars);
            switch($post_vars["submitcui"]) {
            case "Adopt Configuration":
                if ($post_vars["treebank"] && $post_vars["input_term"] && $post_vars["cuitagged"] && $textlab["cuitagged_text"]) {
                    $new_token = $post_vars["input_term"]."_".$post_vars["treebank"].($post_vars["cui"]?"_".$post_vars["cui"]:"");
                    if ($post_vars["phrase_seq"]) {
                        if ($post_vars["cui"]=="NOCUI") {
                            $phrased_text = umlstools::remove_by_tagnumber($post_vars["phrase_seq"], $get_vars["cuitagged"], $textlab["phrased_text"]);
                        } else {
                            if ($post_vars["change_type"]=="item") {
                                $phrased_text = umlstools::replace_by_tagnumber($post_vars["phrase_seq"], $new_token, $textlab["phrased_text"]);
                            } else {
                                $phrased_text = preg_replace("/\b(".$post_vars["cuitagged"].")\b/", "$new_token", $textlab["phrased_text"]);
                            }
                        }
                        $sql = "update m_textlab_document set ".
                               "phrased_text = '$phrased_text' ".
                               "where document_id = '".$post_vars["document_id"]."'";
                        if ($result = mysql_query($sql)) {
                            header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$new_token."&phr=".$post_vars["sequence"]."&text=".$get_vars["text"]."#cuitagged");
                        }
                    } else {
                        if ($post_vars["change_type"]=="item") {
                            if ($get_vars["text"]=="actual") {
                                $cuitagged_text = umlstools::replace_by_tagnumber($post_vars["sequence"], $new_token, $textlab["cuitagged_text"]);
                            } else {
                                $cuitagged_text_predicted = umlstools::replace_by_tagnumber($post_vars["sequence"], $new_token, $textlab["cuitagged_text_predicted"]);
                            }
                        } else {
                            if ($get_vars["text"]=="actual") {
                                $cuitagged_text = preg_replace("/\b(".$post_vars["cuitagged"].")\b/", "$new_token", $textlab["cuitagged_text"]);
                            } else {
                                $cuitagged_text_predicted = preg_replace("/\b(".$post_vars["cuitagged"].")\b/", "$new_token", $textlab["cuitagged_text_predicted"]);
                            }
                        }
                        if ($get_vars["text"]=="actual") {
                            $sql = "update m_textlab_document set ".
                                   "cuitagged_text = '$cuitagged_text' ".
                                   "where document_id = '".$post_vars["document_id"]."'";
                        } else {
                            $sql = "update m_textlab_document set ".
                                   "cuitagged_text_predicted = '$cuitagged_text_predicted' ".
                                   "where document_id = '".$post_vars["document_id"]."'";
                        }
                        if ($result = mysql_query($sql)) {
                            header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$new_token."&seq=".$post_vars["sequence"]."&text=".$get_vars["text"]."#cuitagged");
                        }
                    }
                }
                break;
            case "Next":
                if (($post_vars["sequence"] || $post_vars["phrase_seq"]) && ($textlab["cuitagged_text"] || $textlab["cuitagged_text_predicted"])) {
                    $sequence = ($post_vars["sequence"]?$post_vars["sequence"]:$post_vars["phrase_seq"]);
                    if ($post_vars["phrase_seq"]) {
                        $next_token = umlstools::retrieve_token($sequence, $textlab["phrased_text"]);
                    } else {
                        if ($get_vars["text"]=="actual") {
                            $next_token = umlstools::retrieve_token($sequence, $textlab["cuitagged_text"]);
                        } else {
                            $next_token = umlstools::retrieve_token($sequence, $textlab["cuitagged_text_predicted"]);
                        }
                    }
                }
                if ($post_vars["phrase_seq"]) {
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$next_token[0]."&phr=".$next_token[1]."#cuitagged");
                } else {
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]."&document_id=".$post_vars["document_id"]."&tab=".$get_vars["tab"]."&cuitagged=".$next_token[0]."&seq=".$next_token[1]."&text=".$get_vars["text"]."#cuitagged");
                }
                break;
            }
        }
    }

    function replace_by_tagnumber($tagnumber, $new_token, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i==$tagnumber) {
                $tokens[$i-1] = $new_token;
            }
        }
        $changed_text = implode(" ", $tokens);
        return $changed_text;
    }

    function remove_by_tagnumber($tagnumber, $new_token, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i==$tagnumber) {
                unset($tokens[$i-1]);
            }
        }
        $changed_text = implode(" ", $tokens);
        return $changed_text;
    }

    function retrieve_token($sequence, $text) {

        $tokens = explode(" ", $text);
        foreach($tokens as $item) {
            $i++;
            if ($i>$sequence) {
                if (preg_match("/(_C[0-9]{7})/", $item)) {
                    return array($tokens[$i-1],$i);
                }
            }
        }
    }

    function form_manage_cui() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $textlab = $arg_list[3];

            print "<a name='cuitagged'>";
            print "<span class='newstitle'>".FTITLE_MANAGE_CUI."</span><br>";
            print "<span class='tinylight'>".INSTR_MANAGE_CUI."</span><br>";
            print "<table width='550' cellpadding='5' cellspacing='1' style='border: 1px solid black'>";
            print "<form action = '".$_SERVER["SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"")."&tab=".$get_vars["tab"]."&cuitagged=".$get_vars["cuitagged"]."&text=".$get_vars["text"]."#cuitagged' name='form_manage_cui' method='post'>";
            print "<tr valign='top'><td colspan='2'>";

            $cuitagged = $get_vars["cuitagged"];
            list($input_term,$input_tag,$input_cui) = explode("_", $cuitagged);
            // extra processing if phrase
            if ($get_vars["phr"]) {
                // normalize phrase
                $phrase_tokens = explode("|", $input_term);
                sort($phrase_tokens);
                $input_term = implode(" ", $phrase_tokens);
            }
            if ($get_vars["phr"]) {
                print "<div id='phrase' style='visibility:show;'>";
                print umlstools::mark_tagged($cuitagged, $textlab["phrased_text"]);
                print "</div>";
            } else {
                if ($get_vars["text"]=="actual") {
                    print "<div id='cuitagged' style='visibility: show;'>";
                    print umlstools::mark_tagged($cuitagged, $textlab["cuitagged_text"]);
                    print "</div>";
                } else {
                    print "<div id='cuitagged' style='visibility: show;'>";
                    print umlstools::mark_tagged($cuitagged, $textlab["cuitagged_text_predicted"]);
                    print "</div>";
                }
            }

            print "</td></tr>";
            print "<tr valign='top'><td>";

            if ($baseform = umlstools::baseform($input_term)) {
                $sql = "select s.NSTR, c.CUI, c.SAB, c.CODE, c.STR, t.STY ".
                       "from umls.MRXNS_ENG s, umls.MRCONSO c, umls.MRSTY t ".
                       "where c.CUI = s.CUI and s.CUI = t.CUI and c.ISPREF = 'Y' and c.LAT = 'ENG' ".
                       "and s.NSTR = '".strtolower($baseform)."' group by c.CUI, c.SAB, c.CODE, c.STR, t.STY";
            } else {
                $sql = "select s.NSTR, c.CUI, c.SAB, c.CODE, c.STR, t.STY ".
                       "from umls.MRXNS_ENG s, umls.MRCONSO c, umls.MRSTY t ".
                       "where c.CUI = s.CUI and s.CUI = t.CUI and c.ISPREF = 'Y' and c.LAT = 'ENG' ".
                       "and s.NSTR = '".strtolower($input_term)."' group by c.CUI, c.SAB, c.CODE, c.STR, t.STY";
            }
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    print "<a name='cuidetail'><br>";
                    print "<span class='error'>".$get_vars["cuitagged"].": <font color='red'><b>$token</b></font></span><br> ";
                    print "<table width='550' bgcolor='white' cellpadding='2' cellspacing='0'>";
                    print "<tr valign='top' bgcolor='#CCFF99'><td colspan='4'>";
                    print "UMLS LEXICON LOOKUP<br>";
                    print "</td></tr>";
                    print "<tr valign='top' bgcolor='white'><td colspan='4'>";
                    print umlstools::lexicon_lookup($input_term);
                    print "</td></tr>";
                    print "<tr valign='top' bgcolor='#CCFF99'><td colspan='4'>";
                    print "WORDNET LOOKUP<br>";
                    print "</td></tr>";
                    print "<tr valign='top' bgcolor='white'><td colspan='4'>";
                    print umlstools::wordnet_lookup($input_term);
                    print "</td></tr>";
                    print "<tr valign='top' bgcolor='#CCFF99'>".
                               "<td class='tabletext'>SAB</td>".
                               "<td class='tabletext'>CODE</td>".
                               "<td class='tabletext'>STR</td>".
                               "<td class='tabletext'>STY</td></tr>";
                    while(list($nstr, $cui, $sab, $code, $str, $sty) = mysql_fetch_array($result)) {
                        if ($prev_cui <> $cui) {
                            print "<tr valign='top' bgcolor='#FFCC33'>".
                                  "<td colspan='5'><input type='radio' name='cui' ".($input_cui==$cui?"checked":"")." value='$cui'>".
                                  "<span class='service'>$cui</span> ".umlstools::document_distance($cui)."</td></tr>";
                        }
                        print "<tr valign='top' bgcolor='white'>".
                                   "<td class='tabletext'>$sab</td>".
                                   "<td class='tabletext'>$code</td>".
                                   "<td class='tabletext'>$str</td>".
                                   "<td class='tabletext'>$sty</td></tr>";
                        $prev_cui = $cui;
                    }
                    print "<tr valign='top' bgcolor='#FFCC33'>".
                          "<td colspan='5'><input type='radio' name='cui' value='NOCUI'>".
                          "<span class='service'>NO CUI</span></td></tr>";
                    print "</table><br>";
                    print "<input type='hidden' name='document_id' value='".($get_vars["document_id"]?$get_vars["document_id"]:$textlab["document_id"])."'>";
                } else {
                    print "<a name='cuidetail'><br>";
                    print "<font color='red'>CUI has no counterpart in selected controlled vocabularies.</font>";
                }
            }
            print tagger::treebank_select($input_tag);
            print "<span class='boxtitle'>".LBL_CHANGE_TYPE."</span><br> ";
            print "<span class='tinylight'>".INSTR_CHANGE_TYPE."</span><br>";
            print "<span class='service'>";
            print "<input type='radio' class='textbox' name='change_type' value='item' checked > ITEM-SPECIFIC <br>";
            print "<input type='radio' class='textbox' name='change_type' value='global' > GLOBAL <br>";
            print "</span><br>";
            print "<table cellpadding='2' cellspacing='1'>";
            print "<tr><td>";
            print "<input type='hidden' name='input_term' value='".$input_term."'>";
            print "<input type='hidden' name='cuitagged' value='".$get_vars["cuitagged"]."'>";
            print "<input type='hidden' name='cuitagged_text' value='".$textlab["cuitagged_text"]."'>";
            print "<input type='hidden' name='phrased_text' value='".$textlab["phrased_text"]."'>";
            print "<input type='hidden' name='sequence' value='".$get_vars["seq"]."'>";
            print "<input type='hidden' name='phrase_seq' value='".$get_vars["phr"]."'>";
            print "<input type='hidden' name='text' value='".$get_vars["text"]."'>";
            print "<input type='submit' value = 'Adopt Configuration' class='textbox' name='submitcui' title='Changes the token in tagged text.' style='border: 1px solid #000000'> ";
            print "<input type='submit' value = 'Next' class='textbox' name='submitcui' title='Go to next tagged token' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "<tr><td>";
            print "<input type='submit' value = 'Cancel' class='textbox' name='submitcui' style='border: 1px solid #000000'> ";
            print "</td></tr>";
            print "</table>";

            print "</td></tr>";
            print "</form>";
            print "</table>";
        }
    }

    function inventory($document_id) {

        $sql = "select * from m_textlab_terms where document_id = '$document_id' limit 1";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                return "YES";
            }
        }
        return "NO";
    }

    function lexicon_lookup($term) {

        $sql = "select EUI, STR, SCA, AGR, BAS from umls.LRAGR where STR = '$term'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                print "<table>";
                print "<tr valign='top' bgcolor='#CCFF99'><td>EUI</td><td>STR</td><td>SCA</td><td>AGR</td><td>BAS</td></tr>";
                while (list($eui, $str, $sca, $agr, $bas) = mysql_fetch_array($result)) {
                    print "<tr><td>$eui</td><td>$str</td><td>$sca</td><td>$agr</td><td>$bas</td></tr>";
                }
                print "</table>";
            }
        }
    }

    function wordnet_lookup($term) {

        $sql = "select s.word, s.sense_number, s.ss_type, g.gloss ".
               "from wn.wn_synset s, wn.wn_gloss g ".
               "where s.synset_id = g.synset_id and s.word = '$term' ".
               "order by s.sense_number";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                print "<table>";
                print "<tr valign='top' bgcolor='#CCFF99'><td>WORD</td><td>SENSE</td><td>TYPE</td><td>GLOSSARY</td></tr>";
                while (list($word, $sense, $type, $gloss) = mysql_fetch_array($result)) {
                    print "<tr valign='top'><td>$word</td><td>$sense</td><td>$type</td><td>$gloss</td></tr>";
                }
                print "</table>";
            }
        }
    }

    function cui_details() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];

            $token = $get_vars["cuitagged"];

            list($term, $tag, $concept) = explode("_", $token);
            if ($baseform = umlstools::baseform($term)) {
                $sql = "select s.NSTR, c.CUI, c.SAB, c.CODE, c.STR, t.STY ".
                       "from umls.MRXNS_ENG s, umls.MRCONSO c, umls.MRSTY t ".
                       "where c.CUI = s.CUI and s.CUI = t.CUI and c.ISPREF = 'Y' and c.LAT = 'ENG' ".
                       //"and (c.SAB = 'MDR' or c.SAB = 'CST' or c.SAB = 'WHO' or c.SAB = 'ICD9CM' or c.SAB = 'MSH' or c.SAB = 'SNOMEDCT') ".
                       "and s.NSTR = '$baseform'";
            } else {
                $sql = "select s.NSTR, c.CUI, c.SAB, c.CODE, c.STR, t.STY ".
                       "from umls.MRXNS_ENG s, umls.MRCONSO c, umls.MRSTY t ".
                       "where c.CUI = s.CUI and s.CUI = t.CUI and c.ISPREF = 'Y' and c.LAT = 'ENG' ".
                       //"and (c.SAB = 'MDR' or c.SAB = 'CST' or c.SAB = 'WHO' or c.SAB = 'ICD9CM' or c.SAB = 'MSH' or c.SAB = 'SNOMEDCT') ".
                       "and s.NSTR = '$term'";
            }
            if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                if (mysql_num_rows($result)) {
                    $retval .= "<a name='cuidetail'><br>";
                    $retval .= "<form action = '".$_SERVER["SELF"]."?page=PROCESSING&menu_id=".$get_vars["menu_id"].($get_vars["document_id"]?"&document_id=".$get_vars["document_id"]:"").($get_vars["tab"]?"&tab=".$get_vars["tab"]:"")."' name='form_cuidetail' method='post'>";
                    $retval .= "<span class='boxtitle'>".LBL_CUI_DETAIL_FOR_TOKEN.": <font color='red'><b>$token</b></font></span><br> ";
                    $retval .= "<table width='500' bgcolor='black' cellpadding='2' cellspacing='1'>";
                    $retval .= "<tr valign='top' bgcolor='yellow'>".
                               "<td class='tabletext'>&nbsp;</td>".
                               "<td class='tabletext'>NSTR</td>".
                               "<td class='tabletext'>CUI</td>".
                               "<td class='tabletext'>SAB</td>".
                               "<td class='tabletext'>CODE</td>".
                               "<td class='tabletext'>STR</td>".
                               "<td class='tabletext'>STY</td></tr>";
                    while(list($nstr, $cui, $sab, $code, $str, $sty) = mysql_fetch_array($result)) {
                        $retval .= "<tr valign='top' bgcolor='white'>".
                                   "<td class='tabletext'><input type='radio' name='cui' value='$cui'></td>".
                                   "<td class='tabletext'>$nstr</td>".
                                   "<td class='tabletext'>$cui</td>".
                                   "<td class='tabletext'>$sab</td>".
                                   "<td class='tabletext'>$code</td>".
                                   "<td class='tabletext'>$str</td>".
                                   "<td class='tabletext'>$sty</td></tr>";
                    }
                    $retval .= "</table><br>";
                    $retval .= "<input type='hidden' name='document_id' value='".($get_vars["document_id"]?$get_vars["document_id"]:$textlab["document_id"])."'>";
                    $retval .= "<input type='submit' value = 'Replace CUI' class='textbox' name='submitcuidetail' style='border: 1px solid #000000'> ";
                    $retval .= "<input type='submit' value = 'Cancel' class='textbox' name='submitcuidetail' style='border: 1px solid #000000'> ";
                    $retval .= "</form>";
                } else {
                    $retval .= "<a name='cuidetail'><br>";
                    $retval .= "<font color='red'>CUI has no counterpart in selected controlled vocabularies.</font>";
                }
                return $retval;
            }

        }
    }

    function phrase_detect_inline() {
    //
    // takes in one phrase/sentence at a time
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $inputstring = umlstools::nominalize($inputstring);
            //print "[$inputstring]<br>";

            // tokenize
            $tokens = preg_split("/[\s]+/", $inputstring);
            $maxtokens = 3; // look ahead x tokens
            $phrases = array();

            for($i=0; $i<count($tokens)-$maxtokens; $i++) {

                // STEP 1: get base token
                $base_token = explode("_", $tokens[$i]);
                $compound_term = $base_token[0];
                $regexp_term = $base_token[0];

                // STEP 2: get the next token $tokens[$i+$j]
                $tokencount = 0;
                for ($j=1; $j<=$maxtokens; $j++) {
                    $next_token = explode("_", $tokens[$i+$j]);
                    if (count($next_token)>2) {
                        $regexp_cui = $next_token[2];
                    }

                    // STEP 3: create compound token
                    $compound_term .= " ".$next_token[0];
                    //$compound_term = trim($compound_term);
                    //print "$compound_term<br>";

                    // STEP 4: normalize compound term
                    $compound_term = trim($compound_term);
                    $norm_compound = explode(" ", $compound_term);

                    if (count($norm_compound)>1) {

                        if (sort($norm_compound)) {
                            $compound_term = implode(" ", $norm_compound);
                        }

                        // STEP 5: look it up in normalized strings
                        $sql = "select NSTR, CUI ".
                               "from umls.MRXNS_ENG ".
                               "where LAT = 'ENG' and NSTR like '".trim(strtolower($compound_term))."' ".
                               "group by NSTR, CUI";
                        //print "<br>";
                        if ($result = mysql_query($sql)) {
                            if (mysql_num_rows($result)) {
                                while (list($umls_string, $umls_cui) = mysql_fetch_array($result)) {
                                    $phrases[] = "$compound_term|$umls_cui";
                                }
                            }
                        }

                    }

                } // $j loop
            } // $i loop

            if (count($phrases)>0) {
                $new = array_unique($phrases);
                return nl2br(umlstools::xml_wrap($inputstring, $phrases));
            }
        }
    }

    function xml_wrap($inputstring, $phrases) {

        $xml .= "<filtered>\n";
        $xml .= "<sentence>\n";
        $xml .= "$inputstring\n";
        $xml .= "</sentence>\n";
        $xml .= "<phrases>\n";
        if (count($phrases)>0) {
            foreach ($phrases as $key=>$value) {
                $xml .= "<phrase_item>$value</phrase_item>\n";
            }
        }
        $xml .= "</phrases>\n";
        $xml .= "</filtered>";

        return $xml;
    }

    function filter() {

        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            $anchor[0] = "/\b([Rr]egard)(ing)?(_)/";
            $anchor[1] = "/\b([Ee]xperience)(d|s)?(_)/";
            $anchor[2] = "/\b([Rr]eport)(s|ed)?(_)/";
            $anchor[3] = "/\b([Cc]omplain)(s|ed|t)(_)/";
            $anchor[4] = "/\b([Ff]ollow)(s|ing|ed)(_)/";
            $anchor[5] = "/\b([Nn]otic)(e|ing|ed)(_)/";
            $anchor[6] = "/\b(([Ff]eel)(s|ing)|(felt))(_)/";
            $anchor[7] = "/\b([Nn]ot)(ed|es|ing)(_)/";
            $anchor[8] = "/\b([Cc]onfirm)(ed|s|ing)?(_)/";
            $anchor[9] = "/\b([Ss]tate)(ed|s)?(_)/";
            $anchor[10] = "/\b([Dd]en)(y|ies|ied)(_)/";
            $anchor[11] = "/\b([Ii]ndicative)(_)/";
            $anchor[12] = "/\b([Oo]currence)(_)/";
            $anchor[13] = "/\b([Ss]ymptom)(s|atology)?(_)/";
            $anchor[13] = "/\b([Ss]ign)(s)?(_)/";
            $anchor[14] = "/\b([Dd]ifficult)(y)?(_)/";
            $anchor[15] = "/\b([Nn])(o)(_)/";
            $anchor[16] = "/\b([Uu]n)?([Ll]ikely)(_)/";
            $anchor[17] = "/\b([Tt]here_EX )(is_VBZ)(_C[0-9]{7})?/";
            $anchor[18] = "/\b(injection_NN_C0021485 site_NN_C0205145)/";
            $anchor[19] = "/\b([Pp])(ossible)\b/";

            $sentences = preg_split("/(\._\.)|(!_\.)|(\?_\.)|(\n\r)/", $inputstring);
            $sentences = array_unique($sentences);
            //print_r($sentences);

            for ($i=0; $i<count($sentences); $i++) {
                for ($j=0; $j<count($anchor); $j++) {
                    // STEP 1: GET ALL THE SENTENCES WITH ANCHOR WORDS
                    if (preg_match_all($anchor[$j], $sentences[$i], $matches)) {

                        // STEP 2: TAKE OUT SYMBOLS, REMAINING PUNCTUATION MARKS

                        $searches[0] = "/(-_:)/";
                        $searches[1] = "/(:_:)/";
                        $searches[2] = "/(-_SYM)/";
                        $searches[3] = "/(,_,)/";
                        $searches[4] = "/\b(A-Z]+[a-z])(_NN)\b/";
                        $searches[5] = "/([a-zA-Z]+)(_PN[GDR]?)/";
                        $searches[6] = "/([0-9.]+)(_MC)/";

                        $replacements[0] = "";
                        $replacements[1] = "";
                        $replacements[2] = "";
                        $replacements[3] = "";
                        $replacements[4] = "***$2";
                        $replacements[5] = "***$2";
                        $replacements[6] = "@@@$2";

                        $scrubbed = preg_replace($searches,$replacements, $sentences[$i]);
                        $filtered_sentences[] .= $scrubbed;

                    }
                }
            }
            $filtered_sentences = array_unique($filtered_sentences);
            foreach($filtered_sentences as $key=>$value) {
                $filtered_text .= umlstools::phrase_detect_inline($value);
            }

            return $filtered_text;
        }
    }

    function nominalize() {
    //
    // takes in one phrase/sentence at a time
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];

            //print "[$inputstring]<br>";

            // tokenize
            $tokens = preg_split("/[\s]+/", $inputstring);
            $maxtokens = 1; // look ahead x tokens
            $phrases = array();

            for($i=0; $i<count($tokens)-$maxtokens; $i++) {

                // STEP 1: get base token
                $base_token = explode("_", $tokens[$i]);

                if (preg_match("/JJ/", $base_token[1])) {

                    $nom_array = umlstools::get_nominal_term($base_token[0]);

                    if (count($nom_array)>0) {
                    foreach ($nom_array as $key=>$nom) {
                        $compound_term = $nom;

                        for ($j=1; $j<=$maxtokens; $j++) {
                            $next_token = explode("_", $tokens[$i+$j]);

                            $compound_term .= " ".$next_token[0];
                            //print "$compound_term<br>";

                            // STEP 4: normalize compound term
                            $compound_term = trim($compound_term);
                            $norm_compound = explode(" ", $compound_term);

                            if (count($norm_compound)>1) {
                                // STEP 5: look it up in normalized strings
                                $sql = "select NSTR, CUI ".
                                       "from umls.MRXNS_ENG ".
                                       "where LAT = 'ENG' and NSTR like '".trim(strtolower($compound_term))."' ".
                                       "group by NSTR, CUI";
                                //print "<br>";
                                if ($result = mysql_query($sql)) {
                                    if (mysql_num_rows($result)) {
                                        while (list($umls_string, $umls_cui) = mysql_fetch_array($result)) {
                                            $phrases[] = $compound_term;
                                        }
                                        $phrases = array_unique($phrases);
                                        $search = "/(".$base_token[0]."_JJ)/";
                                        $replace = $nom."_NN";
                                        $inputstring = preg_replace($search, $replace, $inputstring);
                                    }
                                }
                            }

                        } // $j loop


                    } // foreach
                    }

                 }
            } // $i loop
            return $inputstring;
        }
    }

    function get_nominal_term($searchterm) {

        $sql = "select n.BAS1 from umls.LRNOM n, umls.LRAGR a ".
               "where a.EUI = n.EUI2 and n.SCA2 = 'adj' and n.BAS2 = '".$searchterm."'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($nom) = mysql_fetch_array($result)) {
                    $nom_array[] = $nom;
                }
                return $nom_array;
            }
        }
    }

    function scrubber() {
    //
    // STAGE 1 TRANSFORMATION
    // Uses staged cleaning with regular expressions
    // Input: Raw Text
    // Output: Scrubbed Text
    //
    // IMPORTANT: Unicode can't be handled by regular expressions. If you encounter
    // bugs in quotes removal, suspect unicode quotes. Just change the text to regular
    // ASCII quotes.
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $raw_text = $arg_list[0];

            // --------------------------------------
            // SET A: UNITS OF MEASUREMENT

            $patterns[0] = "/\b([0-9\.]+)(ml)\b/";
            $patterns[1] = '/(\d+)(")(\s{0,1})(X|x)(\s{0,1})(\d+)(")/'; // n inch by n inch
            $patterns[2] = '/(\d+)(")/';
            $patterns[3] = "/\b(degrees?)(\s{1})(C)(\s{1})/";
            $patterns[4] = "/\b(degrees?)(\s{1})(F)(\s{1})/";
            $patterns[5] = "/\b(q)(\s{0,1})([0-9]+)(\s{0,1})([Hh])/";
            $patterns[6] = "/\b((\({0,1})[0-9]+)(\))/";
            $patterns[7] = "/( #'s )/";

            $replacements[0] = "$1 $2";
            $replacements[1] = "$1-inch by $6-inch";
            $replacements[2] = "$1 inches";
            $replacements[3] = "$1 Celsius ";
            $replacements[4] = "$1 Fahrenheit ";
            $replacements[5] = "every $3 $5ours";
            $replacements[6] = "$1$2 - ";
            $replacements[7] = " numbers ";

            $corrected = preg_replace($patterns, $replacements, $raw_text);

            // --------------------------------------
            // SET B: QUOTED STUFF

            $patterns[0] = '/(\s{0,1})(-\>)(\s{0,1})/'; // -> symbol
            $patterns[1] = '/(\()([\sa-zA-Z0-9]+)(\))/'; // anything enclosed in parentheses
            $patterns[2] = '/(\()(S|s)(\))/'; // S in parentheses
            $patterns[3] = "/('s)\b/";
            $patterns[4] = "/\b(W|w)(on't)\b/";
            $patterns[5] = "/\b(D|d)(oes|o)(n't)\b/";
            $patterns[6] = "/(@)/";
            $patterns[7] = "/(\s+)(\+)([0-9]+)/";
            $patterns[8] = "/\b(C|c)(an't)\b/";
            $patterns[9] = "/\b(D|d)(id)(n't)\b/";
            $patterns[10] = "/\b(D|d)(\/)(c'd)\b/";
            $patterns[11] = "/\b(B|b)(abe's)\b/";

            $replacements[0] = "$1- $3";
            $replacements[1] = "$2";
            $replacements[2] = "$2";
            $replacements[3] = "s";
            $replacements[4] = "$1ould not";
            $replacements[5] = "$1$2 not";
            $replacements[6] = "at";
            $replacements[7] = "$1more than $3";
            $replacements[8] = "$1annot";
            $replacements[9] = "$1id not";
            $replacements[10] = "$1ischarged";
            $replacements[11] = "$1aby's";

            $corrected = preg_replace($patterns, $replacements, $corrected);


            $patterns[0] = "/(\s+)(-)([a-zA-Z]+)/"; // phrases that begin with dashes
            $patterns[1] = "/(\()/";
            $patterns[2] = "/(\))/";
            $patterns[3] = "/(\*)/";
            $patterns[4] = "/(#)([0-9]+)\b/";
            $patterns[5] = "/(~)/";
            $patterns[6] = "/( >)(\s{0,1})([0-9]+)\b/";

            $replacements[0] = "$1$2 $3";
            $replacements[1] = "";
            $replacements[2] = "";
            $replacements[3] = "-";
            $replacements[4] = "number $2";
            $replacements[5] = "around";
            $replacements[6] = " more than $3";

            $corrected = preg_replace($patterns, $replacements, $corrected);


            $patterns[0] = '/(")/'; // unquote quoted words
            $patterns[1] = "/(')/";

            $replacements[0] = "";
            $replacements[1] = "";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // --------------------------------------
            // SET C: NUMBER ISSUES

            $patterns[1] = "/\b(1st)\b/";
            $patterns[2] = "/\b(2nd)\b/";
            $patterns[3] = "/\b(3rd)\b/";

            $replacements[1] = "first";
            $replacements[2] = "second";
            $replacements[3] = "third";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // ---------------------
            // SET D: MEDICAL ABBREVIATIONS
            $patterns[0] = '/\b(re)([:]{0,1})(\s{0,1})\b/';
            $patterns[1] = '/\bc\/o\b/';
            $patterns[2] = '/&/';
            $patterns[3] = '/\b((L|l)t?)(\.{0,1})(\s+)/';
            $patterns[4] = '/\b((R|r)t?)(\.{0,1})(\s+)/';
            $patterns[5] = '/\b(Hep|hep)(\.{0,1})(\s{0,1})([A-Z]{0,1})\b/';
            $patterns[6] = "/\b(d\/c)(\s{1})(to)/";
            $patterns[7] = "/\b(imm)(s{0,1})\b/";
            $patterns[8] = "/\b(I)(\.{0,1})(M)(\.{0,1})\b/";
            $patterns[9] = "/\b(IV)\b/";
            $patterns[10] = "/\b([Pp]\/c)(\s{1})(to)\b/";
            $patterns[11] = "/\b(ER)\b/";
            $patterns[12] = "/\b(D|d)(x)\b/";
            $patterns[13] = "/\b(D)(r)(s?)(\.?)('?)(\s{0,1})/";
            $patterns[14] = "/\b(M|m)(om)\b/";
            $patterns[15] = "/\b(O2 )([Ss]ats? )/";
            $patterns[16] = "/\b(RN)\b/";
            $patterns[17] = "/\b(P)(-)([0-9]{2,3})\b/";
            $patterns[18] = "/\b(R|r)(esp)(\.?)(\s?)([Ss]ystem)/";
            $patterns[19] = "/\b(H|h)(osp)(\.{0,1})\b/";
            $patterns[20] = "/\b(T|t)(el\.{0,1})(\s{1})(call)\b/";
            $patterns[21] = "/\b(B\.?)(P\.?)\b/";
            $patterns[22] = "/\b(E|e)(merg)(\.?)\b/";
            $patterns[23] = "/\b(T\/P)|(TPC)|(T\/C)\b/";
            $patterns[24] = "/\b(P\.?H\.?N\.?)\b/";
            $patterns[25] = "/\b(T|t)(emp)\b/";
            $patterns[26] = "/\b(D|d)(ad)\b/";
            $patterns[27] = "/(given )(in |on )(LA)/";
            $patterns[28] = "/(given )(in |on )(RA)/";
            $patterns[29] = "/\b(H|h)(x)\b/";
            $patterns[30] = "/(A|a)(ccdg\.{0,1} to)/";
            $patterns[31] = "/\b(P|p)(t)(\.*)/";
            $patterns[32] = "/\b(P|p)(\.?)(\/?)(C|c)(\.?)/";
            $patterns[33] = "/\b(F\.?)(\s?)(Doctor)/";
            $patterns[34] = "/\b(A|a)(pprox)([.]{0,1})(\s+)/";
            $patterns[35] = "/\b(I|i)(nj)([.]{0,1})(\s+)/";
            $patterns[36] = "/\b([Hh])([Aa])(,?)(\s?)([Nn])(,?)(\s?)([Vv])/";
            $patterns[37] = "/\b(Resps)\b/";
            $patterns[38] = "/\b(resps)\b/";
            $patterns[39] = "/\b([Pp]oss)\b/";
            $patterns[40] = "/\b(Sx)\b/";
            $patterns[41] = "/\b(on\+off)\b/";
            $patterns[42] = "/\b(paed)(\s{1})(dept)/";
            $patterns[43] = "/\b(paed)\b/";
            $patterns[44] = "/(Tyl supp)/";
            $patterns[45] = "/\b(neg)\b/";
            $patterns[46] = "(or\/and)";
            $patterns[47] = "/\b([Dd]ep)(t)(\.{0,1})\b/";
            $patterns[48] = "/\b(Enc to have)/";
            $patterns[49] = "/\b([Rr]espiration)( N at )([0-9]+)/";
            $patterns[50] = "/(B)(\.{0,1})(W)(\.{0,1})/";
            $patterns[51] = "/\b(P|p)(ed)(\.{0,1})(\s{1})|(p)(eads?)(\.{0,1})(\s{1})/";
            $patterns[52] = "/\b(reaction to )(vac)(\.{0,1})(\s{1})/";
            $patterns[53] = "/\b(R|r)(xn)\b/";
            $patterns[54] = "/(in |on )(his |her )(RA)/";
            $patterns[55] = "/(in |on )(his |her )(LA)/";
            $patterns[56] = "/\b(H|h)(as )(been )(diag\. )(with)/";
            $patterns[57] = "/\b(2ry)\b/";
            $patterns[58] = "/\b(B|b)(abe)\b/";
            $patterns[59] = "/\b(A|a)(ppt\.{0,1} )\b/";
            $patterns[60] = "/(B|b)(ab[ey])(\w{0,1})( BF)/";
            $patterns[61] = "/\b(to )(BF)/";
            $patterns[62] = "/( has )(\w+)( BF)/";
            $patterns[63] = "/\b(prior to )(immun\.? )/";
            $patterns[64] = "/\b(M|m)(eds)\b/";
            $patterns[65] = "/\b(dose )(for )(wt\.? )/";
            $patterns[66] = "/\b(pre)([ -]?)(med )(with )/";
            $patterns[67] = "/\b(neuro )(changes)/";
            $patterns[68] = "/\b(shot)\b/";

            $replacements[0] = "regarding$3 ";
            $replacements[1] = "complained of";
            $replacements[2] = "and";
            $replacements[3] = "left$4";
            $replacements[4] = "right$4";
            $replacements[5] = "$1atitis $4";
            $replacements[6] = "discharge $3";
            $replacements[7] = "immunization$2";
            $replacements[8] = "intramuscular";
            $replacements[9] = "Intravenous";
            $replacements[10] = "Phone call $3";
            $replacements[11] = "Emergency Room";
            $replacements[12] = "$1iagnosis";
            $replacements[13] = "$1octor$3$6";
            $replacements[14] = "$1other";
            $replacements[15] = "Oxygen saturation ";
            $replacements[16] = "Registered Nurse";
            $replacements[17] = "$1ressure is $3";
            $replacements[18] = "$1espiratory $5";
            $replacements[19] = "$1ospital";
            $replacements[20] = "$1elephone $4";
            $replacements[21] = "Blood Pressure";
            $replacements[22] = "$1mergency$3";
            $replacements[23] = "Telephone call";
            $replacements[24] = "Public Health Nurse";
            $replacements[25] = "$1emperature";
            $replacements[26] = "father";
            $replacements[27] = "given $2 Left Arm";
            $replacements[28] = "given $2 Right Arm";
            $replacements[29] = "$1istory";
            $replacements[30] = "$1ccording to";
            $replacements[31] = "$1atient";
            $replacements[32] = "$1hone call";
            $replacements[33] = "Family Doctor";
            $replacements[34] = "$1pproximately ";
            $replacements[35] = "$1njection ";
            $replacements[36] = "$1eadache$3$4$5ausea$6$7$8omiting";
            $replacements[37] = "Breathes";
            $replacements[38] = "breathes";
            $replacements[39] = "$1ible";
            $replacements[40] = "symptoms";
            $replacements[41] = "on and off";
            $replacements[42] = "pediatric department";
            $replacements[43] = "pediatrician";
            $replacements[44] = "Tylenol suppository";
            $replacements[45] = "negative";
            $replacements[46] = "and/or";
            $replacements[47] = "$1artmen$2$3";
            $replacements[48] = "Encouraged to have";
            $replacements[49] = "$1 normal at $3";
            $replacements[50] = "$1lood $3ork";
            $replacements[51] = "pediatrician ";
            $replacements[52] = "$1vaccination$3$4";
            $replacements[53] = "$1eaction";
            $replacements[54] = "$1$2right arm";
            $replacements[55] = "$1$2left arm";
            $replacements[56] = "$1$2$3diagnosed $5";
            $replacements[57] = "secondary";
            $replacements[58] = "$1aby";
            $replacements[59] = "$1ppointment ";
            $replacements[60] = "$1$2$3 breastfed";
            $replacements[61] = "$1 breastfeed";
            $replacements[62] = "$1$2 breastfed";
            $replacements[63] = "$1immunization ";
            $replacements[64] = "$1edications";
            $replacements[65] = "$1$2weight ";
            $replacements[66] = "premedicate $4";
            $replacements[67] = "neurologic $2";
            $replacements[68] = "injection";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // Shorter terms that will not be caught up there.

            $patterns[0] = "/\b([Rr])(esp)(\.?)\b/";
            $patterns[1] = "/\b(RUQ)\b/";
            $patterns[2] = "/\b(PO)\b/";
            $patterns[3] = "/\b(EP)\b/";
            $patterns[4] = "/\b(C|c)(t\.?)(\s{0,1})\b/";
            $patterns[5] = "/\b([Bb])((\/)|([Pp]))([Mm])\b/";
            $patterns[6] = "/\b(FP)\b/";
            $patterns[7] = "/\b(C|c)(k)\b/";
            $patterns[8] = "/\b(C|c)(kd)\b/";
            $patterns[9] = "/\b([Ss])(\s{0,1})(\+)(\s{0,1})([Ss])/";
            $patterns[10] = "/\b(A|a)(cc to)\b/";
            $patterns[11] = "/\b([Ff])(\/)([Uu])\b/";
            $patterns[12] = "/( \+ )/";
            $patterns[13] = "/\b([Ll]ot)(#)/";
            $patterns[14] = "/\b(p\/u)\b/";
            $patterns[15] = "/\b(f\/u)\b/";
            $patterns[16] = "/\b(s\/s)\b/";
            $patterns[17] = "/(\+\/-)/";
            $patterns[18] = "/\b(O2)\b/";

            $replacements[0] = "$1espiration";
            $replacements[1] = "right upper quadrant";
            $replacements[2] = "per orem";
            $replacements[3] = "Emergency Physician";
            $replacements[4] = "$1lient$3";
            $replacements[5] = "beats per minute";
            $replacements[6] = "Family Physician";
            $replacements[7] = "$1heck";
            $replacements[8] = "$1hecked";
            $replacements[9] = "$1igns and $5ymptoms";
            $replacements[10] = "$1ccording to";
            $replacements[11] = "$1ollow up";
            $replacements[12] = " and ";
            $replacements[13] = "$1 number";
            $replacements[14] = "pick up";
            $replacements[15] = "follow up";
            $replacements[16] = "signs and symptoms";
            $replacements[17] = "about";
            $replacements[18] = "Oxygen";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // ---------------------
            // SET E: DATE/TIME ELEMENTS

            $patterns[0] = "/\b(Mon\.{0,1})\b/";
            $patterns[1] = "/\b((Tue)|(Tues)(\.?))\b/";
            $patterns[2] = "/\b(Wed)\b/";
            $patterns[3] = "/\b((Thu)|(Thur)|(Thurs))\b/";
            $patterns[4] = "/\b(Fri)\b/";
            $patterns[5] = "/\b(Sat)\b/";
            $patterns[6] = "/\b(Sun)\b/";
            $patterns[7] = "/\b([Jj]an\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[8] = "/\b([Ff]eb\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[9] = "/\b([Mm]ar\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[10] = "/\b([Aa]pr\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[11] = "/\b([Jj]un\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[12] = "/\b([Jj]ul\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[13] = "/\b([Aa]ug\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[14] = "/\b([Ss]ept?\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[15] = "/\b([Oo]ct\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[16] = "/\b([Nn]ov\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[17] = "/\b([Dd]ec\.{0,1})(\s{0,1})([0-9]{0,2})\b/";
            $patterns[18] = "/(\s|:)([0-9]{0,2})(\s{0,1})(H|h)(r)(s?)/";
            $patterns[19] = "/\b((Jan|January)|(Feb|February)|(Mar|March)|(Apr|April)|(May)|(Jun|June)|(Jul|July)|(Aug|August)|(Sep|Sept|September)|(Oct|October)|(Nov|November)|(Dec|December))\b/";
            $patterns[20] = "/\b([0-9]+)(\s{0,1})([ap]\.{0,1}m\.{0,1})/"; // Separate am/pm from the time
            $patterns[21] = "/\b([0-9]+)(\s{0,1})(min)(s?)(\.{0,1})(\s{1})/"; // 20 mins.
            $patterns[22] = "/\b([0-9]+)(days)\b/";
            $patterns[23] = "/\b([0-9]+)(\s{0,1})(wk)/";
            $patterns[24] = "/\b([0-9]{4})(hrs)\b/";
            $patterns[25] = "/(q)(\s{0,1})([0-9]+)(-{0,1})([0-9]{0,1})(\s{0,1})/";

            $replacements[0] = "Monday";
            $replacements[1] = "Tuesday";
            $replacements[2] = "Wednesday";
            $replacements[3] = "Thursday";
            $replacements[4] = "Friday";
            $replacements[5] = "Saturday";
            $replacements[6] = "Sunday";
            $replacements[7] = "January $3";
            $replacements[8] = "February $3";
            $replacements[9] = "March $3";
            $replacements[10] = "April $3";
            $replacements[11] = "June $3";
            $replacements[12] = "July $3";
            $replacements[13] = "August $3";
            $replacements[14] = "September $3";
            $replacements[15] = "October $3";
            $replacements[16] = "November $3";
            $replacements[17] = "December $3";
            $replacements[18] = "$1$2 $4our$6";
            $replacements[19] = "$1";
            $replacements[20] = "$1 $3";
            $replacements[21] = "$1$2 minute$4$5$6";
            $replacements[22] = "$1 $2";
            $replacements[23] = "$1$2week";
            $replacements[24] = "$1 hours";
            $replacements[25] = "every $3$4$5 ";

            $corrected = preg_replace($patterns, $replacements, $corrected);

            // IDENTIFIERS


            return $corrected;


        }
    }

// END OF CLASS
}
?>