<?
class visualize extends module {

    //
    // Author: Herman Tolentino (unless specified)
    //
    // AEFI NLP Project
    // Vaccine Analytic Unit - Brighton Collaboration - National Library of Medicine
    //
    // IMPORTANT:
    // This class assumes constant JPGRAPH_DIR has been defined in index.php
    //

    function visualize() {
        //
        // do not forget to update version
        //
        $this->author = 'Herman Tolentino MD';
        $this->version = "0.1-".date("Y-m-d");
        $this->module = "visualize";
        $this->description = "AEFI Module - Visualization";
        // 0.1: BEGIN

    }

    function init_deps() {
    //
    // insert dependencies in module_dependencies
    //
        module::set_dep($this->module, "module");
        module::set_dep($this->module, "textlab");
        module::set_dep($this->module, "umlstools");

    }

    function init_lang() {
    //
    // insert necessary language directives
    //
        module::set_lang("FTITLE_SPELLING_CHECK", "english", "SPELLING CHECK", "Y");
        module::set_lang("INSTR_SPELLING_CHECK", "english", "CLICK ON THE HIGHLIGHTED TERMS ABOVE", "Y");
        module::set_lang("LBL_POSSIBLE_MISPELLED", "english", "POSSIBLE MISPELLED WORD", "Y");
        module::set_lang("LBL_POSSIBLE_CORRECTIONS", "english", "POSSIBLE CORRECTIONS", "Y");
        module::set_lang("LBL_REPLACEMENT_TERM", "english", "REPLACEMENT TERM", "Y");
        module::set_lang("INSTR_SPELLCORRECTION_LIST", "english", "PICK ONE FROM THE LIST AND CLICK ON <b>Replace from Correction List</b>", "Y");
        module::set_lang("LBL_SPELLING_SOURCE", "english", "SPELLING SOURCE", "Y");
        module::set_lang("LBL_BUILD_DICT", "english", "BUILD DICTIONARY", "Y");
    }

    function init_menu() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

        // put in more details
        module::set_detail($this->description, $this->version, $this->author, $this->module);
    }

    function init_stats() {
    }

    function init_help() {
    }

    function init_sql() {
        if (func_num_args()>0) {
            $arg_list = func_get_args();
            $module_id = $arg_list[0];
        }

    }

    function drop_tables() {

    }


    // --------------- CUSTOM MODULE FUNCTIONS ------------------

    function show_entropy($document_id) {

        print "<table width='90%'cellpadding='2' cellspacing='0'>";
        print "<tr bgcolor='#FF9900'><td>SUBSET</td><td>BITS</td><td>MAXENT</td><td>RATIO</td></tr>";
        print "<tr><td colspan='4' bgcolor='#FFCC33'>DOCUMENT ".$document_id."</td></tr>";
        $term = array("term", "tag", "concept");
        foreach ($term as $token_subset) {
            $entropy["$document_id"] = visualize::entropy_params($token_subset, $document_id);
            $e = new Entropy($entropy["$document_id"]["tokens"], $entropy["$document_id"]["frequencies"]);
            $values = $e->get_EntropyValues();
            print "<tr>";
            print "<td>$token_subset</td>";
            print "<td>".$values["bits"]."</td>";
            print "<td>".$values["maxent"]."</td>";
            print "<td>".$values["ratio"]."</td>";
            print "</tr>";
        }
        print "</table>";

    }

    function get_frequencies($document_id, $category) {

        switch($category) {
        case "STY":
            $y = 1;
            $sql = "select t.STY, sum(m.frequency) ".
                   "from m_textlab_terms m, umls.MRSTY t ".
                   "where m.concept_id = t.CUI and document_id = '$document_id' ".
                   "group by t.STY";
            break;
        case "TAG":
            $y = 1;
            $sql = "select p.treebank_name, sum(m.frequency) ".
                   "from m_textlab_terms m, m_lib_penncode p ".
                   "where m.treebank_id = p.treebank_id and m.document_id = '$document_id' ".
                   "group by p.treebank_name;";
            break;
        case "TAG2":
            $y = 2;
            $sql = "select p.treebank_name, sum(m.frequency) tagcount, sum(if(m.concept_id<>'',m.frequency,0)) conceptcount ".
                   "from m_textlab_terms m, m_lib_penncode p ".
                   "where m.treebank_id = p.treebank_id and document_id = '$document_id' ".
                   "group by m.treebank_id;";
            break;
        case "VOCAB2":
            $y = 2;
            $sql = "select source, count(stype), sum(frequency) ".
                   "from m_textlab_vocabulary ".
                   "where document_id = '$document_id' ".
                   "group by source";
            break;
        case "CONCEPT":
            $y = 1;
            $sql = "select m.concept_id, sum(m.frequency) ".
                   "from m_textlab_terms m ".
                   "where m.document_id = '$document_id' and concept_id<>'' ".
                   "group by concept_id;";
            break;
        }
        if ($y==1) {
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    while(list($token, $frequency) = mysql_fetch_array($result)) {
                        $out_token[] = $token;
                        $out_frequency[] = $frequency;
                    }
                    $retval["token"] = $out_token;
                    $retval["frequency"] = $out_frequency;
                    return $retval;
                }
            }
        } elseif ($y==2) {
            if ($result = mysql_query($sql)) {
                if (mysql_num_rows($result)) {
                    while(list($token, $frequency1, $frequency2) = mysql_fetch_array($result)) {
                        $out_token[] = $token;
                        $out_frequency1[] = $frequency1;
                        $out_frequency2[] = $frequency2;
                    }
                    $retval["token"] = $out_token;
                    $retval["frequency1"] = $out_frequency1;
                    $retval["frequency2"] = $out_frequency2;
                    return $retval;
                }
            }
        }
    }

    function write_graph($document_id, $graphtype, $datatype, $graph_title, $data, $width, $height) {

        $graph_name = $graphtype."_".$datatype."_".$document_id.".png";
        switch($graphtype) {

        case "HBAR1": // 1 set of values for y

            //$datay=array(2,3,5,8,12,6,3);
            //$datax=array("Jan","Feb","Mar","Apr","May","Jun","Jul");

            $datax = $data["token"];
            $datay = $data["frequency"];
            $n = array_sum($data["frequency"]);

            // Size of graph
            //$width=600;
            //$height=600;

            // Set the basic parameters of the graph
            $graph = new Graph($width, $height, $graph_name, 0, 0);
            $graph->SetScale("textlin");

            // Rotate graph 90 degrees and set margin
            $graph->Set90AndMargin(250,20,90,30);

            // Nice shadow
            //$graph->SetShadow();

            // Setup title
            $graph->title->Set($graph_title." DOCNUM $document_id");
            $graph->title->SetFont(FF_VERDANA,FS_NORMAL,11);
            $graph->subtitle->Set("n = $n");

            // Setup X-axis
            $graph->xaxis->SetTickLabels($datax);
            $graph->xaxis->SetFont(FF_VERDANA,FS_NORMAL,8);

            // Some extra margin looks nicer
            $graph->xaxis->SetLabelMargin(10);

            // Label align for X-axis
            $graph->xaxis->SetLabelAlign('right','center');

            // Add some grace to y-axis so the bars doesn't go
            // all the way to the end of the plot area
            $graph->yaxis->scale->SetGrace(20);

            // We don't want to display Y-axis
            //$graph->yaxis->Hide();

            // Now create a bar pot
            $bplot = new BarPlot($datay);
            $bplot->SetFillColor("orange");
            //$bplot->SetShadow();

            //You can change the width of the bars if you like
            $bplot->SetWidth(0.7);

            // We want to display the value of each bar at the top
            $bplot->value->Show();
            $bplot->value->SetFont(FF_VERDANA,FS_NORMAL,8);
            $bplot->value->SetAlign('left','center');
            $bplot->value->SetColor("black","darkred");
            $bplot->value->SetFormat('%.1f');

            // Add the bar to the graph
            $graph->Add($bplot);

            // .. and stroke the graph
            $graph->Stroke();
            print "<img src='./game/graph/$graph_name'>";
            break;

        case "HBAR2": // 2 sets of values for y
            //$datay=array(2,3,5,8,12,6,3);
            //$datax=array("Jan","Feb","Mar","Apr","May","Jun","Jul");

            $datax = $data["token"];
            $datay1 = $data["frequency1"];
            $datay2 = $data["frequency2"];
            $n = array_sum($data["frequency1"]);

            // Size of graph
            //$width=600;
            //$height=600;

            // Set the basic parameters of the graph
            $graph = new Graph($width, $height, $graph_name, 0, 0);
            $graph->SetScale("textlin");

            // Rotate graph 90 degrees and set margin
            $graph->Set90AndMargin(250,20,90,30);

            // Nice shadow
            //$graph->SetShadow();

            // Setup title
            $graph->title->Set($graph_title." DOCNUM $document_id");
            $graph->title->SetFont(FF_VERDANA,FS_NORMAL,11);
            $graph->subtitle->Set("n = $n");

            // Setup X-axis
            $graph->xaxis->SetTickLabels($datax);
            $graph->xaxis->SetFont(FF_VERDANA,FS_NORMAL,8);

            // Some extra margin looks nicer
            $graph->xaxis->SetLabelMargin(10);

            // Label align for X-axis
            $graph->xaxis->SetLabelAlign('right','center');

            // Add some grace to y-axis so the bars doesn't go
            // all the way to the end of the plot area
            $graph->yaxis->scale->SetGrace(20);

            // We don't want to display Y-axis
            //$graph->yaxis->Hide();

            // Now create a bar pot
            $b1plot = new BarPlot($datay1);
            $b1plot->SetFillColor("orange");
            $b2plot = new BarPlot($datay2);
            $b2plot->SetFillColor("blue");
            //$bplot->SetShadow();

            //You can change the width of the bars if you like
            $b1plot->SetWidth(0.7);
            $b2plot->SetWidth(0.7);

            // We want to display the value of each bar at the top
            $b1plot->value->Show();
            $b1plot->value->SetFont(FF_VERDANA,FS_NORMAL,8);
            $b1plot->value->SetAlign('left','center');
            $b1plot->value->SetColor("black","darkred");
            $b1plot->value->SetFormat('%.1f');

            $b2plot->value->Show();
            $b2plot->value->SetFont(FF_VERDANA,FS_NORMAL,8);
            $b2plot->value->SetAlign('left','center');
            $b2plot->value->SetColor("black","darkred");
            $b2plot->value->SetFormat('%.1f');

            // Create the grouped bar plot
            $gbplot = new GroupBarPlot(array($b1plot,$b2plot));

            // Add the bar to the graph
            $graph->Add($gbplot);

            // .. and stroke the graph
            $graph->Stroke();
            print "<img src='./game/graph/$graph_name'>";
            break;
        }
    }

    function entropy_params ($column, $document_id) {
    //
    // input: column name from terms
    // output: array of column names and frequencies
    //
        switch ($column) {
        case "term":
            $sql = "select term, sum(frequency) ".
                   "from m_textlab_terms where document_id = '$document_id' ".
                   "group by term";
            break;
        case "tag":
            $sql = "select treebank_id, sum(frequency) ".
                   "from m_textlab_terms where document_id = '$document_id' ".
                   "group by treebank_id";
            break;
        case "concept":
            $sql = "select concept_id, sum(frequency) ".
                   "from m_textlab_terms ".
                   "where document_id = '$document_id' and concept_id<>'' ".
                   "group by concept_id";
            break;
        }
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                while (list($token, $frequency) = mysql_fetch_array($result)) {
                    $token_items[] = $token;
                    $token_frequencies[] = $frequency;
                }
                $retval["tokens"] = $token_items;
                $retval["frequencies"] = $token_frequencies;
                return $retval;
            }
        }
    }

    function vocabulary_summary($document_id, $type) {

        $sql = "select v.source, s.SON, count(v.stype), sum(v.frequency) ".
               "from m_textlab_vocabulary v, umls.MRSAB s ".
               "where s.RSAB = v.source and document_id = '$document_id' ".
               "group by source;";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                print "<table>";
                print "<tr>";
                print "<td>SOURCE</td><td>SEMANTIC TYPE</td><td>FREQUENCY</td>";
                print "</tr>";
                while(list($source, $son, $type, $frequency) = mysql_fetch_array($result)) {
                    print "<tr><td><span title='$son'>$source</span></td><td>$type</td><td>$frequency</td></tr>";
                }
                print "</table>";
            }
        }
    }

    function nearest_neighbor($document_id) {
        /*
        print $sql = "select b.concept1_id, b.concept2_id, b.distance, b.frequency ".
                 "from m_textlab_bigrams b, m_textlab_terms t1, m_textlab_terms t2 ".
                 "where b.concept1_id <> 'C0000000' and b.concept2_id <> 'C0000000' ".
                 "and b.concept1_id = t1.concept_id and b.concept2_id = t2.concept_id ".
                 "and b.concept1_id <> b.concept2_id ".
                 "and b.distance <> 'NULL' and t1.document_id = '$document_id' ".
                 "and t2.document_id = '$document_id'";
        */
        $sql = "select b.concept1_id, b.concept2_id, b.distance, b.frequency ".
               "from m_textlab_bigrams b, m_textlab_terms t1, m_textlab_terms t2, umls.MRSTY s1, umls.MRSTY s2 ".
               "where b.concept1_id = s1.CUI and b.concept2_id = s2.CUI and (s1.TUI = 'T184' or s2.TUI = 'T184') ".
               "and b.concept1_id <> 'C0000000' and b.concept2_id <> 'C0000000' and b.concept1_id = t1.concept_id ".
               "and b.concept2_id = t2.concept_id and b.distance <> 'NULL' ".
               "and t1.document_id = t2.document_id and t1.document_id = '$document_id' ".
               "group by concept2_id, concept1_id;";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $temp = array();
                $i = 0;
                $total = 0;
                while (list($concept1, $concept2, $distance, $frequency) = mysql_fetch_array($result)) {
                    $nn[$i]["c1"] = $concept1;
                    $nn[$i]["c2"] = $concept2;
                    $nn[$i]["d"] = $distance;
                    $nn[$i]["f"] = $frequency;
                    $total = $total + $frequency;
                    $i++;
                }
                foreach ($nn as $key => $pair) {
                    $nn[$key]["weight"] = (1/$nn[$key]["d"]) * ($nn[$key]["f"]/$total);
                    if ($nn[$key]["weight"] > 0.0009) {
                        $nn[$key]["c1"]." ".$nn[$key]["c2"]."->".$nn[$key]["weight"];
                        $triples[] = array("subject"=>$nn[$key]["c1"], "predicate"=>"", "object"=>$nn[$key]["c2"]);
                    }
                }
                $graph = new graphviz($triples);
                $graph->filename($document_id.".dot");
                $graph->set_tool("neato");
                $graph->set_node_fontname("Helvetica");
                $graph->set_size("150,150");
                $graph->set_overlap("orthoxy");
                $graph->write_dot();
                $graph->draw();

                $nn = new Symptoms($triples);

                $kmeans = new kmeans(3);

            }
        }
    }

}

class kmeans {

    var $k;
    var $points;

    function kmeans($points) {
        $this->points = $points;
    }

    function set_k($k) {
        $this->k = $k;
    }
}

class symptoms {

    var $nodes;
    var $triples;
    var $tree;

    function neighbor($triples) {
        $this->triples = $triples;
        foreach ($this->triples as $triple) {
            $this->add_node($triple["subject"]);
            $this->get_connections($triple["subject"]);
            $this->add_node($triple["object"]);
            $this->get_connections($triple["object"]);
        }
        print "<table width='600' cellspacing='1' bgcolor='black'>";
        foreach($this->nodes as $entity=>$node) {
            print "<tr bgcolor='#99CCFF'><td colspan='4'><b>$entity</b></td></tr>";
            print "<tr bgcolor='#FFFF99' valign='top'>";
            foreach($node as $name=>$value) {
                if (!is_array($value) && $name<>"tree") {
                    print "<td>$name <b>$value</b></td>";
                }
            }
            if (is_array($value)) {
                $value = implode("<br>", $value);
                print "<tr bgcolor='#FFCC66' valign='top'><td colspan='3'>$value</td></tr>";
            }
            print "</tr>";
        }
        print "</table>";
    }

    function get_nodes() {
        return $this->nodes;
    }

    function get_type_name($entity) {
        $sql = "select STY from umls.MRSTY where CUI = '$entity'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($type) = mysql_fetch_array($result);
                return $type;
            }
        }
    }

    function get_term($entity) {
        $sql = "select term from m_textlab_terms where concept_id = '$entity'";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                list($term) = mysql_fetch_array($result);
                return $term;
            }
        }
    }

    function get_parents($entity) {

        $sql = "select h.PAUI, h.SAB, c.CODE, c.STR ".
               "from umls.MRHIER h, umls.MRCONSO c ".
               "where h.PAUI = c.AUI and h.CUI = '$entity' ".
               "group by h.PAUI, h.SAB, c.STR";
        if ($result = mysql_query($sql)) {
            if (mysql_num_rows($result)) {
                $this->tree = array();
                while (list($aui, $sab, $code, $str) = mysql_fetch_array($result)) {
                    $this->tree[] = "$aui $sab ($code) $str";
                }
                return $this->tree;
            }
        }
    }

    function add_node($entity) {
        $this->nodes["$entity"] = array();
    }

    function get_connections($entity) {
        foreach ($this->triples as $triple) {
            if ($entity==$triple["subject"]) {
                $count++;
            }
            if ($entity==$triple["object"]) {
                $count++;
            }
        }
        $this->nodes["$entity"]["stype"] = $this->get_type_name($entity);
        $this->nodes["$entity"]["term"] = $this->get_term($entity);
        $this->nodes["$entity"]["connections"] = $count;
        $this->nodes["$entity"]["tree"] = $this->get_parents($entity);
    }

}

class Entropy {

    var $tokens      = array();
    var $num_events  = 0;
    var $token_freqs = array();
    var $token_probs = array();
    var $num_tokens  = 0;
    var $bits        = 0.0;
    var $maxent      = 0.0;
    var $ratio       = 0.0;

    function Entropy($tokens, $freq) {
        $this->tokens      = $tokens;
        $this->num_events  = count($this->tokens);
        $this->token_freqs = $freq;
        $this->num_tokens  = count($this->token_freqs);
        foreach ($this->token_freqs as $token => $freq) {
            $this->token_probs[$token]  = $freq / $this->num_events;
            $entropy += $this->token_probs[$token] * log($this->token_probs[$token], 2);
        }
        $this->bits   = -1.0 * $entropy;
        $this->maxent = log($this->num_tokens, 2);
        $this->ratio  = $this->bits / $this->maxent;
  }

    function get_EntropyValues() {
        $retval["bits"] = $this->bits;
        $retval["maxent"] = $this->maxent;
        $retval["ratio"] = $this->ratio;
        return $retval;
    }

}

class TableEntropy {

  var $table       = "";
  var $column      = "";
  var $select      = "";
  var $where       = "";
  var $group_by    = "";

  var $num_events  = 0;
  var $token_freqs = array();
  var $token_probs = array();
  var $num_tokens  = 0;
  var $bits        = 0.0;
  var $maxent      = 0.0;
  var $ratio       = 0.0;

  function setTable($table) {
    $this->table = $table;
  }

  function setColumn($column) {
    $this->column = $column;
  }

  function setSelect($sql) {
    $this->select = $sql;
  }

  function setWhere($sql) {
    $this->where = $sql;
  }

  function setGroupBy($sql) {
    $this->group_by = $sql;
  }

  function getTokenFrequencies() {
    global $db;
    $sql  = " SELECT $this->column, count($this->column) as freq ";
    $sql .= " FROM ". $this->table;
    if ($this->where != "")
      $sql .= " WHERE ". $this->where;
    if ($this->group_by != "")
      $sql .= " GROUP BY ". $this->group_by;
    else
      $sql .= " GROUP BY ". $this->column;
    $this->token_freqs = $db->getCol($sql, "freq");
  }

  function getEntropy() {
    global $db;
    $this->getTokenFrequencies();
    $this->num_events  = array_sum($this->token_freqs);
    $this->num_tokens  = count($this->token_freqs);
    foreach ($this->token_freqs as $token => $freq) {
      $this->token_probs[$token]  = $freq / $this->num_events;
      $entropy += $this->token_probs[$token] * log($this->token_probs[$token], 2);
    }
    $this->bits   = -1.0 * $entropy;
    $this->maxent = log($this->num_tokens, 2);
    $this->ratio  = $this->bits / $this->maxent;
  }

}

?>
