<?
class visualize extends module {

    //
    // Author: Herman Tolentino (unless specified)
    //
    // AEFI NLP Project
    // Vaccine Analytic Unit - Brighton Collaboration - National Library of Medicine
    //
    // IMPORTANT:
    // This class assumes constant JPGRAPH_DIR has been defined in index.php
    //

    function visualize() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "visualize";
        $this->description = "AEFI Module - Visualization";
        // 0.1: BEGIN

    }

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
        module::set_dep($this->module, "textlab");
        module::set_dep($this->module, "umlstools");

    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("FTITLE_SPELLING_CHECK", "english", "SPELLING CHECK", "Y");
        module::set_lang("INSTR_SPELLING_CHECK", "english", "CLICK ON THE HIGHLIGHTED TERMS ABOVE", "Y");
        module::set_lang("LBL_POSSIBLE_MISPELLED", "english", "POSSIBLE MISPELLED WORD", "Y");
        module::set_lang("LBL_POSSIBLE_CORRECTIONS", "english", "POSSIBLE CORRECTIONS", "Y");
        module::set_lang("LBL_REPLACEMENT_TERM", "english", "REPLACEMENT TERM", "Y");
        module::set_lang("INSTR_SPELLCORRECTION_LIST", "english", "PICK ONE FROM THE LIST AND CLICK ON <b>Replace from Correction List</b>", "Y");
        module::set_lang("LBL_SPELLING_SOURCE", "english", "SPELLING SOURCE", "Y");
        module::set_lang("LBL_BUILD_DICT", "english", "BUILD DICTIONARY", "Y");
    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

    }

    function drop_tables() {

    }


    // --------------- CUSTOM MODULE FUNCTIONS ------------------

    function show_entropy($document_id) {

        print "<table width='90%'cellpadding='2' cellspacing='0'>";
        print "<tr bgcolor='#FF9900'><td>SUBSET</td><td>BITS</td><td>MAXENT</td><td>RATIO</td></tr>";
        print "<tr><td colspan='4' bgcolor='#FFCC33'>DOCUMENT ".$document_id."</td></tr>";
        $term = array("term", "tag", "concept");
        foreach ($term as $token_subset) {
            $entropy["$document_id"] = visualize::entropy_params($token_subset, $document_id);
            $e = new Entropy($entropy["$document_id"]["tokens"], $entropy["$document_id"]["frequencies"]);
            $values = $e->get_EntropyValues();
            print "<tr>";
            print "<td>$token_subset</td>";
            print "<td>".$values["bits"]."</td>";
            print "<td>".$values["maxent"]."</td>";
            print "<td>".$values["ratio"]."</td>";
            print "</tr>";
        }
        print "</table>";

    }

    function get_frequencies($document_id, $category) {

        switch($category) {
        case "STY":
            $sql = "select t.STY, sum(m.frequency) ".
                   "from m_textlab_terms m, umls.MRSTY t ".
                   "where m.concept_id = t.CUI and document_id = '$document_id' ".
                   "group by t.STY";
            break;
        case "TAG":
            $sql = "select p.treebank_name, sum(m.frequency) ".
                   "from m_textlab_terms m, m_lib_penncode p ".
                   "where m.treebank_id = p.treebank_id and m.document_id = '$document_id' ".
                   "group by p.treebank_name;";
            break;
        case "CONCEPT":
            $sql = "select m.concept_id, sum(m.frequency) ".
                   "from m_textlab_terms m ".
                   "where m.document_id = '$document_id' and concept_id<>'' ".
                   "group by concept_id;";
            break;
        }
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while(list($type, $frequency) = mysql_fetch_array($result)) {
                    $out_semantic_type[] = $type;
                    $out_frequency[] = $frequency;
                }
                $retval["token"] = $out_semantic_type;
                $retval["frequency"] = $out_frequency;
                return $retval;
            }
        }
    }

    function write_graph($document_id, $graphtype, $datatype, $graph_title, $data, $width, $height) {

        $graph_name = "HBAR1_".$datatype."_".$document_id.".png";
        switch($graphtype) {
        case "HBAR1":

            //$datay=array(2,3,5,8,12,6,3);
            //$datax=array("Jan","Feb","Mar","Apr","May","Jun","Jul");

            $datax = $data["token"];
            $datay = $data["frequency"];
            $n = array_sum($data["frequency"]);

            // Size of graph
            //$width=600;
            //$height=600;

            // Set the basic parameters of the graph
            $graph = new Graph($width, $height, $graph_name, 0, 0);
            $graph->SetScale("textlin");

            // Rotate graph 90 degrees and set margin
            $graph->Set90AndMargin(250,20,90,30);

            // Nice shadow
            //$graph->SetShadow();

            // Setup title
            $graph->title->Set($graph_title." DOCNUM $document_id");
            $graph->title->SetFont(FF_VERDANA,FS_NORMAL,11);
            $graph->subtitle->Set("n = $n");

            // Setup X-axis
            $graph->xaxis->SetTickLabels($datax);
            $graph->xaxis->SetFont(FF_VERDANA,FS_NORMAL,8);

            // Some extra margin looks nicer
            $graph->xaxis->SetLabelMargin(10);

            // Label align for X-axis
            $graph->xaxis->SetLabelAlign('right','center');

            // Add some grace to y-axis so the bars doesn't go
            // all the way to the end of the plot area
            $graph->yaxis->scale->SetGrace(20);

            // We don't want to display Y-axis
            //$graph->yaxis->Hide();

            // Now create a bar pot
            $bplot = new BarPlot($datay);
            $bplot->SetFillColor("orange");
            //$bplot->SetShadow();

            //You can change the width of the bars if you like
            $bplot->SetWidth(0.7);

            // We want to display the value of each bar at the top
            $bplot->value->Show();
            $bplot->value->SetFont(FF_VERDANA,FS_NORMAL,8);
            $bplot->value->SetAlign('left','center');
            $bplot->value->SetColor("black","darkred");
            $bplot->value->SetFormat('%.1f');

            // Add the bar to the graph
            $graph->Add($bplot);

            // .. and stroke the graph
            $graph->Stroke();
            print "<img src='./game/graph/$graph_name'>";
            break;
        }
    }

    function entropy_params ($column, $document_id) {
    //
    // input: column name from terms
    // output: array of column names and frequencies
    //
        switch ($column) {
        case "term":
            $sql = "select term, sum(frequency) ".
                   "from m_textlab_terms where document_id = '$document_id' ".
                   "group by term";
            break;
        case "tag":
            $sql = "select treebank_id, sum(frequency) ".
                   "from m_textlab_terms where document_id = '$document_id' ".
                   "group by treebank_id";
            break;
        case "concept":
            $sql = "select concept_id, sum(frequency) ".
                   "from m_textlab_terms ".
                   "where document_id = '$document_id' and concept_id<>'' ".
                   "group by concept_id";
            break;
        }
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($token, $frequency) = mysql_fetch_array($result)) {
                    $token_items[] = $token;
                    $token_frequencies[] = $frequency;
                }
                $retval["tokens"] = $token_items;
                $retval["frequencies"] = $token_frequencies;
                return $retval;
            }
        }
    }

    function cluster($document_id) {

        $sql = "select concept_id, sum(frequency) from m_textlab_terms where concept_id <> '' and document_id = '$document_id' group by concept_id";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($concept_id, $frequency) = mysql_fetch_array($result)) {
                    $concept_array[] = $concept_id;
                    $concept_frequency[] = $frequency;
                    $total += $frequency;
                }
                $concept_count = count($concept_array);
                $midpt = (($concept_count)/2) - fmod($concept_count,2);
                //for ($i=0; $i<4; $i++) {
                    print "ITERATION: $i<br>";
                    if (!isset($new_nodes)) {
                        $new_nodes[] = $midpt;
                    }
                    foreach ($new_nodes as $input_node) {
                        for ($j=0; $j<$concept_count; $j++) {
                            $squared_freq = 0;
                            $squared_node_dist = 0;
                            $node_dist = umlstools::node_distance($concept_array[$j], $concept_array[$input_node]);
                            $squared_freq = (1-($concept_frequency[$j]/$total)) * (1-($concept_frequency[$j]/$total));
                            $squared_node_dist = ($node_dist * $node_dist);
                            $distance = sqrt($squared_freq + $squared_node_dist);
                            print "NODE: $j<br>";
                            print " node distance: $node_dist<br>";
                            print " squared frequency: $squared_freq<br>";
                            print " squared node distance: $squared_node_dist<br>";
                            print " euclidean distance: $distance<br><br>";
                            $distance_array[$j] = $distance;
                            $bmu_bag[$j] = $distance;
                            //print "distance ".$concept_array[$j]." ($concept_frequency[$j]/$total) with ".$concept_array[$midpt]." is ".$distance."<br>";
                        }
                        sort($bmu_bag);
                        //print_r($bmu_bag);
                        $best_match = $bmu_bag[0];
                        print "best match: ".$bmu_bag[0]."<br>";
                        reset($concept_array);
                        reset($concept_frequency);
                        for ($j=0; $j<$concept_count; $j++) {
                            if ($distance_array[$j]==$best_match) {
                                $new_nodes[] = $j;
                            }
                        }
                        $string = "best nodes: ";
                        foreach($new_nodes as $node_index) {
                            $string .= $node_index." ";
                            $best_concepts[$i] = $node_index;
                        }
                        print $string."<br>";
                    }
                //}
                print_r($best_concepts);
            }
        }
    }
}

class Entropy {

    var $tokens      = array();
    var $num_events  = 0;
    var $token_freqs = array();
    var $token_probs = array();
    var $num_tokens  = 0;
    var $bits        = 0.0;
    var $maxent      = 0.0;
    var $ratio       = 0.0;

    function Entropy($tokens, $freq) {
        $this->tokens      = $tokens;
        $this->num_events  = count($this->tokens);
        $this->token_freqs = $freq;
        $this->num_tokens  = count($this->token_freqs);
        foreach ($this->token_freqs as $token => $freq) {
            $this->token_probs[$token]  = $freq / $this->num_events;
            $entropy += $this->token_probs[$token] * log($this->token_probs[$token], 2);
        }
        $this->bits   = -1.0 * $entropy;
        $this->maxent = log($this->num_tokens, 2);
        $this->ratio  = $this->bits / $this->maxent;
  }

    function get_EntropyValues() {
        $retval["bits"] = $this->bits;
        $retval["maxent"] = $this->maxent;
        $retval["ratio"] = $this->ratio;
        return $retval;
    }

}

class TableEntropy {

  var $table       = "";
  var $column      = "";
  var $select      = "";
  var $where       = "";
  var $group_by    = "";

  var $num_events  = 0;
  var $token_freqs = array();
  var $token_probs = array();
  var $num_tokens  = 0;
  var $bits        = 0.0;
  var $maxent      = 0.0;
  var $ratio       = 0.0;

  function setTable($table) {
    $this->table = $table;
  }

  function setColumn($column) {
    $this->column = $column;
  }

  function setSelect($sql) {
    $this->select = $sql;
  }

  function setWhere($sql) {
    $this->where = $sql;
  }

  function setGroupBy($sql) {
    $this->group_by = $sql;
  }

  function getTokenFrequencies() {
    global $db;
    $sql  = " SELECT $this->column, count($this->column) as freq ";
    $sql .= " FROM ". $this->table;
    if ($this->where != "")
      $sql .= " WHERE ". $this->where;
    if ($this->group_by != "")
      $sql .= " GROUP BY ". $this->group_by;
    else
      $sql .= " GROUP BY ". $this->column;
    $this->token_freqs = $db->getCol($sql, "freq");
  }

  function getEntropy() {
    global $db;
    $this->getTokenFrequencies();
    $this->num_events  = array_sum($this->token_freqs);
    $this->num_tokens  = count($this->token_freqs);
    foreach ($this->token_freqs as $token => $freq) {
      $this->token_probs[$token]  = $freq / $this->num_events;
      $entropy += $this->token_probs[$token] * log($this->token_probs[$token], 2);
    }
    $this->bits   = -1.0 * $entropy;
    $this->maxent = log($this->num_tokens, 2);
    $this->ratio  = $this->bits / $this->maxent;
  }

}

?>
