<?
class tagger extends module {

    // Author: Herman Tolentino MD
    // NLP Project 2005
    // License: GNU GPL
    //
    // IMPORTANT: This module uses the MedPost part-of-speech tagger developed by
    // Larry Smith. Please quote the following article when using this application:
    //
    // Smith L, Rindflesch T, Wilbur WJ. MedPost: a part-of-speech tagger for bioMedical text.
    // Bioinformatics. 2004 Sep 22;20(14):2320-1. Epub 2004 Apr 8.
    //
    // MedPost also uses the Penn Tree Bank Tag set, however, we noted that this is a modified
    // form of the Penn Treebank Tag Set with additional tags.
    //

    function tagger() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "tagger";
        $this->description = "AEFI Module - POS Tagger";
        // 0.1: BEGIN

        if (!defined("MEDPOST_DIR")) {
            return print("<font color='red'>MEDPOST_DIR not defined.</font>");
        }
    }

    // --------------- STANDARD MODULE FUNCTIONS ------------------

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("FTITLE_PENN_TAGS_FORM", "english", "PENN TAGS FORM", "Y");
        module::set_lang("LBL_TREEBANK_ID", "english", "Treebank ID", "Y");
        module::set_lang("LBL_TREEBANK_NAME", "english", "Treebank Name", "Y");
        module::set_lang("INSTR_TREEBANK_STATUS", "english", "Check to activate", "Y");
        module::set_lang("LBL_TREEBANK_EXAMPLE", "english", "Treebank Example", "Y");
        module::set_lang("FTITLE_TAG_LIST", "english", "PENN TAGS LIST", "Y");
        module::set_lang("THEAD_TREEBANK_ID", "english", "TREEBANK ID", "Y");
        module::set_lang("THEAD_TREEBANK_NAME", "english", "NAME", "Y");
        module::set_lang("THEAD_TREEBANK_STATUS", "english", "STATUS", "Y");
        module::set_lang("THEAD_TREEBANK_EXAMPLE", "english", "EXAMPLE", "Y");

    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }
        module::set_menu($this->module, "Penn Treebank", "LIBRARIES", "_penntreebank");

        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        module::execsql("CREATE TABLE `m_lib_penncode` (".
            "`treebank_id` varchar(10) NOT NULL default '',".
            "`treebank_name` varchar(100) NOT NULL default '',".
            "`treebank_example` varchar(100),".
            "`treebank_status` char(1) default 'Y',".
            "PRIMARY KEY  (`treebank_id`)".
            ") TYPE=InnoDB; ");

        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('CC','Coordinating conjunction','and, but, or')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('CS','Subordinating conjunction','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('CSN','Comparative conjunction','than')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('CST','Complementizer','that')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('DB','Predeterminer','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('DD','Determiner','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('EX','Existential there','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('GE','Genitive marker','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('II','Preposition or subordinating conjunction','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('JJ','Adjective','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('JJR','Adjective comparative','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('JJT','Adjective superlative','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('MC','Number or numeric','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('NN','Noun, singular or mass','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('NNP','Proper noun, singular','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('NNS','Noun, plural','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('PN','Pronoun','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('PND','Determiner as pronoun','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('PNG','Genitive pronoun','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('PNR','Relative pronoun','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('RR','Adverb','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('RRR','Comparative adverb','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('RRT','Superlative adverb','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('SYM','Symbol','Should be used for mathematical, scientific or technical symbols')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('TO','to','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VM','modal','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VBB','Verb base form','be, am, are')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VBD','Verb, past tense','was, were')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VBG','Verb, gerund or present participle','being')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VBI','Verb, infinitive','be')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VBN','Verb, past participle','been')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VBZ','Verb, 3rd person singular','is')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VDB','Verb, base do','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VDD','Verb, past did','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VDG','Verb, present participle doing','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VDI','Verb, infinitive do','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VDI','Verb, infinitive do','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VDZ','Verb, 3rd person singular does','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VHB','Verb, base have','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VHD','Verb, past had','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VHG','Verb, present participle having','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VHI','Verb, infinitive have','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VHN','Verb, past participle had','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VHZ','Verb, 3rd person singular has','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVB','Verb, base form lexical verb','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVD','Verb, past tense','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVG','Verb, present participle','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVI','Verb, infinitive lexical verb','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVN','Verb, past participle','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVZ','Verb, 3rd person singular','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVNJ','Verb, prenominal past participle','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVGJ','Verb, prenominal present participle','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('VVGN','Verb, nominal gerund','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('(','Left parenthesis','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES (')','Right parenthesis','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES (',','Comma','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('.','End of sentence period','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES (':','Dashes, colons','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('\"','Left quote','')");
        module::execsql("INSERT INTO `m_lib_penncode` (`treebank_id`, `treebank_name`, `treebank_example`) VALUES ('\"','Right quote','')");

    }

    function drop_tables() {
        module::execsql("DROP TABLE `m_lib_penncode`;");

    }

    // --------------- CUSTOM MODULE FUNCTIONS ------------------


    function _penntreebank () {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            $validuser = $arg_list[3];
            $isadmin = $arg_list[4];
            //print_r($arg_list);
        }
        // always check dependencies
        if ($exitinfo = module::missing_dependencies('module')) {
            return print($exitinfo);
        }
        if ($post_vars["submittag"]) {
            tagger::process_tag($menu_id,$post_vars,$get_vars);
        }
        tagger::form_tag($menu_id,$post_vars,$get_vars);
        tagger::display_tag($menu_id,$post_vars,$get_vars);
    }

    function form_tag() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            if ($get_vars["treebank_id"]) {
                $sql = "select treebank_id, treebank_name, treebank_status, treebank_example ".
                       "from m_lib_penncode where treebank_id = '".$get_vars["treebank_id"]."'";
                if ($result = mysql_query($sql)) {
                    if (mysql_num_rows($result)) {
                        $penn = mysql_fetch_array($result);
                        //print_r($penn);
                    }
                }
            }
        }
        print "<table width='500'>";
        print "<form action = '".$_SERVER["SELF"]."?page=LIBRARIES&menu_id=".$get_vars["menu_id"].($get_vars["treebank_id"]?"&treebank_id=".$get_vars["treebank_id"]:"")."' name='form_penncode' method='post'>";
        print "<tr valign='top'><td>";
        print "<span class='library'>".FTITLE_PENN_TAGS_FORM."</span><br><br>";
        print "</td></tr>";
        print "<tr valign='top'><td>";
        print "<span class='boxtitle'>".LBL_TREEBANK_ID."</span><br> ";
        print "<input type='text' class='textbox' size='10' maxlength='10' name='treebank_id' value='".($penn["treebank_id"]?$penn["treebank_id"]:"")."' style='border: 1px solid #000000'><br>";
        print "</td></tr>";
        print "<tr valign='top'><td>";
        print "<span class='boxtitle'>".LBL_TREEBANK_NAME."</span><br> ";
        print "<input type='text' class='textbox' size='30' maxlength='30' name='treebank_name' value='".($penn["treebank_name"]?$penn["treebank_name"]:"")."' style='border: 1px solid #000000'><br>";
        print "</td></tr>";
        if ($penn["treebank_id"]) {
            // assume active when adding new tags
            print "<tr valign='top'><td>";
            print "<input type='checkbox' class='textbox' name='treebank_status' ".($penn["treebank_status"]=='Y'?"checked":"")." style='border: 1px solid #000000'> ";
            print "<span class='boxtitle'>".INSTR_TREEBANK_STATUS."</span><br> ";
            print "</td></tr>";
        }
        print "<tr valign='top'><td>";
        print "<span class='boxtitle'>".LBL_TREEBANK_EXAMPLE."</span><br> ";
        print "<textarea class='textbox' rows='10' cols='60' name='treebank_example' style='border: 1px solid #000000'>";
        print ($penn["treebank_example"]?$penn["treebank_example"]:"");
        print "</textarea><br>";
        print "</td></tr>";
        print "<tr><td><br>";
        if ($get_vars["treebank_id"]) {
            print "<input type='hidden' name='treebank_id' value='".($get_vars["treebank_id"]?$get_vars["treebank_id"]:$penn["treebank_id"])."'>";
            print "<input type='submit' value = 'Update Tag' class='textbox' name='submittag' style='border: 1px solid #000000'> ";
            print "<input type='submit' value = 'Delete Tag' class='textbox' name='submittag' style='border: 1px solid #000000'> ";
            print "<input type='submit' value = 'New Tag' class='textbox' name='submittag' style='border: 1px solid #000000'> ";
        } else {
            print "<input type='submit' value = 'Add Tag' class='textbox' name='submittag' style='border: 1px solid #000000'> ";
        }
        print "</td></tr>";
        print "</form>";
        print "</table><br>";
    }

    function process_tag() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
            print_r($arg_list);
        }
        switch($post_vars["submittag"]) {
        case "New Tag":
            header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]);
            break;
        case "Add Tag":
            if ($post_vars["treebank_id"] && $post_vars["treebank_name"]) {
                print $sql = "insert into m_lib_penncode (treebank_id, treebank_name, treebank_example) ".
                       "values ('".strtoupper($post_vars["treebank_id"])."', '".$post_vars["treebank_name"]."', '".$post_vars["treebank_example"]."')";
                if ($result = mysql_query($sql) or die(mysql_errno().": ".mysql_error())) {
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]);
                }
            }
            break;
        case "Update Tag":
            if ($post_vars["treebank_id"]) {
                $status = ($post_vars["treebank_status"]?"Y":"N");
                $sql = "update m_lib_penncode set ".
                       "treebank_name = '".$post_vars["treebank_name"]."', ".
                       "treebank_status = '$status', ".
                       "treebank_example = '".$post_vars["treebank_example"]."' ".
                       "where document_id = '".$post_vars["document_id"]."'";
                if ($result = mysql_query($sql)) {
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]);
                }
            }
            break;
        case "Delete Tag":
            if (module::confirm_delete($menu_id, $post_vars, $get_vars)) {
                $sql = "delete from m_lib_penncode where treebank_id = '".$post_vars["treebank_id"]."'";
                if ($result = mysql_query($sql)) {
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]);
                }
            } else {
                if ($post_vars["confirm_delete"]=="No") {
                    header("location: ".$_SERVER["PHP_SELF"]."?page=".$get_vars["page"]."&menu_id=".$get_vars["menu_id"]);
                }
            }
            break;
        }
    }

    function display_tag() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $menu_id = $arg_list[0];
            $post_vars = $arg_list[1];
            $get_vars = $arg_list[2];
        }
        print "<table width='600'>";
        print "<tr valign='top'><td colspan='3'>";
        print "<span class='library'>".FTITLE_TAG_LIST."</span><br>";
        print "</td></tr>";
        print "<tr valign='top'><td><b>".THEAD_TREEBANK_ID."</b></td><td><b>".THEAD_TREEBANK_NAME."</b></td><td><b>".THEAD_TREEBANK_STATUS."</b></td><td><b>".THEAD_TREEBANK_EXAMPLE."</b></td></tr>";
        $sql = "select treebank_id, treebank_name, treebank_status, treebank_example ".
               "from m_lib_penncode order by treebank_id";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($id, $name, $status, $example) = mysql_fetch_array($result)) {
                    print "<tr valign='top'><td><a href='".$_SERVER["SELF"]."?page=".$get_vars["page"]."&menu_id=$menu_id&treebank_id=$id'>$id</a></td>".
                          "<td>$name</td>".
                          "<td>$status</td>".
                          "<td>$example</td></tr>";
                }
            } else {
                print "<font color='red'>No tags found.</font><br>";
            }
        }
        print "</table><br>";
    }

    function tagtext() {
    //
    // requires medpost tagger
    //
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $option = $arg_list[1];
            $commandstring = "echo \"$inputstring\" | ".MEDPOST_DIR."/medpost -$option";
            $handle = popen($commandstring, "r");
            while (!feof($handle)) {
                $buffer .= fread($handle, 8192);
            }
            pclose($handle);
            $tokens = count(explode(" ", $buffer));
            $sentences = count(preg_split("/(\._\.)|(\!_\.)|\?_\./", $buffer))-1;
            return $buffer;
        } else {
            print "SYNTAX: array tagtext(inputString String, option String)";
        }
    }

    function highlight_tags() {
        if (func_num_args()) {
            $arg_list = func_get_args();
            $inputstring = $arg_list[0];
            $patterns[0] = "/(\!_\.)|(\._\.)/";
            $patterns[1] = "/(_NN )|(_NNS )/";
            $patterns[2] = "/(_VVZ )|(_VVG )|(_VVGN )|(_VVNJ )|(_VVN )|(_VVD )|(_VDD )|(_VBD )|(_VVI )|(_VHD )|(_VHZ )/";
            $replacements[0] = "<font color='red'><b>EOS</b></font>";
            $replacements[1] = " <font color='blue'><b>NOUN</b></font> ";
            $replacements[2] = " <font color='green'><b>VERB</b></font> ";
            $buffer = preg_replace($patterns, $replacements, $inputstring);
            return $buffer;
        } else {
            print "SYNTAX: highlight_tags(inputString String)";
        }
    }

    function treebank_select($input_tag) {

        $sql = "select treebank_id, treebank_name from m_lib_penncode ".
               "where instr('VNJ', substr(treebank_id, 1,1)) order by treebank_name";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                print "<select name='treebank' class='service'>";
                print "<option value='0'>Select Treebank ID</option>";
                while (list($id, $name) = mysql_fetch_array($result)) {
                    print "<option value='$id' ".(trim($input_tag)==trim($id)?"selected":"").">".str_pad($id, 5,"-",STR_PAD_RIGHT)." $name</option>";
                }
                print "</select><br><br>";
            }
        }

    }

// end of class
}
?>
