#!/local/bin/perl

#
# html4blast - Convert blast output to HTML hypertext.
#
# It displays eventually a Gif image made with a modified version
# of Alessandro Guffanti (guffanti@tigem.it) PaintBlast package
# drawimage function.
#
# v1.3  - 04/01/1998 - njoly@pasteur.fr
# v1.2  - 20/10/1998 - njoly@pasteur.fr
# v1.1  - 08/09/1998 - njoly@pasteur.fr
# v1.0  - 21/08/1998 - njoly@pasteur.fr

use strict;
use Getopt::Std;
use File::Basename;
use GD;

## WWW servers configuration
my $wwwefetch = "http://bioweb.pasteur.fr/cgi-bin/efetch";
my $wwwsrs = "http://bioweb.pasteur.fr/cgi-bin/srs5/wgetz";

my $wwwgenbank = "http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query";
my $wwwsprot = "http://www.expasy.ch/cgi-bin/get-sprot-entry";
my $wwwenzyme = "http://www.expasy.ch/cgi-bin/get-enzyme-entry";
my $wwwembl = "http://www.ebi.ac.uk/htbin/expasyfetch";
my $wwwpir = "http://www-nbrf.georgetown.edu/cgi-bin/nbrfget";

## Databases configuration
## 'label => [ "efetch_tag", "srs_names+...", "extern_query" ],
my %db = ( 'gb' => [ "gb", "genbanknew+genbank",
		     "$wwwgenbank?db=n&form=6&dopt=g&uid=" ],
	   'gi' => [ "", "", "$wwwgenbank?db=s&form=6&dopt=g&uid=" ],
	   'emb' => [ "em", "emblnew+embl", "$wwwembl?" ],
	   'em' => [ "em", "emblnew+embl", "$wwwembl?" ],
           'sp' => [ "sp", "swissprot+sptrembl+swissnew", "$wwwsprot?" ],
           'gp' => [ "gp", "", "" ],
           'pir' => [ "pir", "pir", "$wwwpir?" ],
           'nrl3d' => [ "nrl3d", "nrl3d", "" ],
           'epd' => [ "epd", "", "" ],
           'enz' => [ "", "enzyme", "$wwwenzyme?" ] );

## Init default values
my $outfile = "blast.html";

## Check command line
my %options = ();
if (! getopts("o:gnse",\%options)) { usage(); exit(1); }
if (@ARGV != 1) { usage(); exit(1); }
if ($options{'o'}) { $outfile = $options{'o'}; }
my $infile = shift(@ARGV);

## Open HTML outfile
open(OUT,"> $outfile") || die("Can't open $outfile\n");
print OUT "<HTML>";
print OUT "<TITLE>$outfile</TITLE>";
print OUT "<BODY><PRE>";

## Parsing values
my $resnb = 0; my $proba = ""; my $pronb = 0;

## Open & Parse Blast report file
open(IN,"< $infile") || die("Can't open $infile\n");
while(<IN>) {
    ## Init databases refs values
    my $gi = "";
    my $label = ""; my $acc = ""; my $loc = "";
    my $enz = "";

    ## Quote '<' character for HTML
    s/</&lt/g;

    ## Check for new blast results
    if (/^T?BLAST[NX]?/) { $resnb++; }
    next if ($resnb == 0);

    ## Check for databases references
    if (/^>?\s?gi\|(\S*)\|(\S+)\|(\S*)\|(\S*)\s/) {
	$gi = $1; $label = $2; $acc = $3; $loc = $4; }
    elsif (/^>?\s?(\S+)\|(\S*)\|(\S*)\s/) {
	$label = $1; $acc = $2; $loc = $3; }
    elsif (/^>?\s?gi\|(\S*)\s/) {
	$gi = $1; }
    my $key = "$label\_$acc\_$loc\_$resnb";
    if ($gi ne "") { $key = "$gi\_$resnb"; }

    ## Check for enzyme database reference
    if (/\(EC\s(\d+\.\d+\.\d+\.\d+)\)/) { $enz = $1; }

    ## Check for proba/expect
    if (/^(gi|$label)\|.* \d+\s\s(\S+)(\s+\d+)?$/) {
	$proba = $2; $pronb++; }

    ## Make databases links
    if (! $options{'n'} && $gi.$acc.$loc ne "") {
	my $qgi = ""; my $qacc = ""; my $qloc = "";
	## Efetch links (default)
	if ($db{$label}->[0] ne "") {
	    my $efetch = $db{$label}->[0];
	    $qacc = "$wwwefetch?-a%20$efetch:$acc";
	    $qloc = "$wwwefetch?$efetch:$loc"; }
	## Srs links (-s flag)
	if ($options{'s'} && $db{$label}->[1] ne "") {
	    my $srs = $db{$label}->[1]; $srs =~ s/\+/%20/g;
	    $qacc = "$wwwsrs?-e+[{$srs}-AccNumber:$acc]";
	    $qloc = "$wwwsrs?-e+[{$srs}-ID:$loc]"; }
	## External links (-e flag)
	if ($options{'e'}) {
	    if ($db{$label}->[2] ne "") {
		my $xtern = $db{$label}->[2];
		$qacc = "$xtern$acc";
		$qloc = ""; }
	    if ($db{'gi'}) { $qgi = $db{'gi'}->[2].$gi; }
	}
	if ($qacc ne "" && $acc ne "") {
	    s/\|$acc\|/\|<A HREF="$qacc">$acc<\/A>\|/; }
	if ($qloc ne "" && $loc ne "") {
	    s/\|$loc /\|<A HREF="$qloc">$loc<\/A> /; }
	if ($qgi ne "" && $gi ne "" && $db{'gi'}) {
	    s/gi\|$gi/gi\|<A HREF="$qgi">$gi<\/A>/; }
    }
    
    ## Make enzyme links
    if (! $options{'n'} && $enz ne "" && $db{'enz'}) {
	my $qenz = "";
	if ($options{'s'} && $db{'enz'}->[1] ne "") {
	    my $enzym = $db{'enz'}->[1]; $enzym =~ s/\+/%20/g;
	    $qenz = "$wwwsrs?-e+[{$enzym}-ID:$enz]"; }
	if ($options{'e'} && $db{'enz'}->[2] ne "") {
	    my $enzym = $db{'enz'}->[2];
	    $qenz = "$enzym$enz"; }
	if ($qenz ne "") { s/EC $enz/EC <A HREF="$qenz">$enz<\/A>/; }
    }

    ## Make proba/expect links
    if ($proba ne "" && $key ne "___$resnb") {
	s/  $proba/  <A NAME="P_$key"><\/A><A HREF="#H_$key">$proba<\/A>/;
	s/^(>|\s)/<A NAME="H_$key"><\/A><A HREF="#P_$key">$1<\/A>/; }

    ## Display alignment graphical summary
    if ($options{'g'} && /^$/ && $pronb != 0) {
	print OUT "</PRE>\n";
	select OUT;
	my $image = drawimage($infile,$resnb);
        print OUT "<PRE>\n";
        $pronb = 0; }

    ## Write definitve line
    print OUT $_; }

## Close Blast & HTML file
print OUT "</PRE></BODY></HTML>";
close(IN);
close(OUT);

## Normal end
exit(0);


## Usage display
sub usage {
    my $prog = basename($0);
    print STDERR <<USAGE
usage: $prog [options] file

options [default]:
    -o <file>   Output file name [$outfile].
    -g          Graphical alignment summary.
    -n          No databases links.
    -s          Srs databases links.
    -e          External sites databases links.
USAGE
};



## This is a modified version of Alessandro Guffanti (guffanti@tigem.it)
## original PaintBlast package, for local use at the Pasteur Institute.
sub drawimage {
    my $file = shift;
    my $nb = shift;

    ## Init values
    my $res = 0;
    my $qname = "Query"; my $qlen = 0;
    my $seqid = "";
    my %seqs = ();
    my @seqlist = ();

    ## Parse blast file
    open(INB,"< $file") || die("Can't open file $file");
    while(<INB>) {
	
	## Check for Blast result start
	if (/^T?BLAST[NX]?/) { $res++; next; }
	
	## Skip line if not in wanted result
	next if ($res != $nb);

	## Get query name & length
	if (/^Query=\s+\S+\|(\S*)\|/) { $qname = $1; next; }
	if (/\(([\d,]+) letters\)/) {
	    $qlen = $1; $qlen =~ s/,//g; next; }

	## Get match sequence id
	if (/^[>\s]gi\|(\w*)\|/) {
	    $seqid = $1;
	    next if ($seqid eq "");
	    next if (grep(/$seqid/,@seqlist) != ());
	    push(@seqlist,$seqid);
	    next; }
	elsif (/^[>\s](\w*)\|(\w*)\|(\w*)/) {
	    $seqid = "$1&$2&$3";
	    next if ($seqid eq "");
	    next if (grep(/$seqid/,@seqlist) != ());
	    push(@seqlist,$seqid);
	    next; }

	## Get align score
	if (/Score =\s+(\d+(\.\d+)?)/) {
	    push(@{$seqs{$seqid}},$1);
	    push(@{$seqs{$seqid}},0);
	    push(@{$seqs{$seqid}},0);
	    next; }

	## Get query align start & end
	if (/^Query:\s+(\d+)\s+\S+\s+(\d+)$/) {
	    my $stop  = pop(@{$seqs{$seqid}});
	    my $start = pop(@{$seqs{$seqid}});
	    my $beg; my $end;
	    
	    ## Check for reversed query (NCBI) or not (Wash. Univ.)
	    if ($1 < $2) { $beg = $1; $end = $2; }
	    else { $beg = $2; $end = $1; }
	    if ($start == 0 || $beg < $start) {
		push(@{$seqs{$seqid}},$beg); }
	    else { push(@{$seqs{$seqid}},$start); }
	    if ($stop == 0 || $end > $stop) {
		push(@{$seqs{$seqid}},$end); }
	    else { push(@{$seqs{$seqid}},$stop); }
	    next; }
	
    }
    close(INB);

    ## Init alignments summary image
    my $imgname = "paintblast$nb.$$.gif";
    my $font = gdMediumBoldFont; my $fonth = $font->height;
    my $seqnb = scalar @seqlist; my $spos = 15 * $font->width;
    my $imgw = 632; my $imgh = (4 + $seqnb + 7) * $fonth;
    my $img = new GD::Image($imgw,$imgh);
    $img->interlaced('true');

    ## Allocate colors
    my $white = $img->colorAllocate(255,255,255);
    my $black = $img->colorAllocate(0,0,0);
    my $red = $img->colorAllocate(255,0,0);
    my $green = $img->colorAllocate(0,205,25);
    my $yellow = $img->colorAllocate(247,174,0);
    my $grey = $img->colorAllocate(130,130,130);
    my $blue = $img->colorAllocate(0,0,255);

    ## Scoring colors intervals (min value key).
    my %colors = ( 0 => $blue, 50 => $grey, 100 => $yellow, 150 => $green,
		   200 => $red );

    ## Black frame round image
    $img->rectangle(0,0,$imgw-1,$imgh-1,$black);

    ## Draw top query
    my $start = $spos; my $stop = $imgw - $fonth;
    my $line = $fonth;
    $img->string($font,$fonth,$line,$qname,$red);
    $img->filledRectangle($start,$line,$stop,$line+$fonth/2-1,$red);
    $line += $fonth/2;

    ## Rule scaling calc
    my $unit = ($stop - $start) / $qlen;
    my $delta = 1;
    for (my $i=1; 1; $i*=10) {
	my $tmp = $qlen/$i;
	if (($tmp/5) > 10) { $delta = $i * 5; next }
	if ($tmp > 10) { $delta = $i; next; }
	last; }

    ## Draw top rule
    for (my $i=0; $i<=$qlen/$delta; $i++) {
	my $pos = $i*$delta*$unit;
	$img->line($spos+$pos,$line,$spos+$pos,$line+$fonth/2-1,$red);
	next if ($i%5 != 0);
	$img->string($font,$spos+$pos,$line+$fonth/2,$i*$delta,$black); }
    $line += $fonth * 2.5;

    ## Draw all matched sequences & image hrefs
    print "<MAP NAME=HSPMAP$nb>\n";
    foreach (@seqlist) {
	## Sequence name
	my $id = "";
	if (/(\S*)&(\S*)&(\S*)/ && $1 ne "gi") { 
	    if ($2 ne "") { $id = $2; }
	    else { $id = "$3 (ID)"; }
	}
	else { $id = $_; }
	$img->string($font,$fonth,$line,$id,$black);
	my $seqhref = $_; $seqhref =~ s/&/_/g;
	my $line2 = $line + $fonth * 3/4 - 1;
	print "<AREA SHAPE=RECT COORDS=$fonth,$line,$spos,$line2 HREF=#H_$seqhref\_$nb>\n";
	## Sequence HSPs (printed in reverse appearance order).
	my @hsp = @{$seqs{$_}};
	while (@hsp != ()) {
	    my $end = pop(@hsp) * $unit + $start;
	    my $begin = pop(@hsp) * $unit + $start;
	    my $score = pop(@hsp);
	    my $color = 0;
	    ## Aligned sequences printed in reverse appearance order
	    foreach (sort bynumber keys %colors) {
		if ($score > $_) { $color = $colors{$_}; next; }
		last; }
	    $img->filledRectangle($begin,$line+$fonth/4,
				  $end,$line+$fonth*3/4,$color); }
	$line += $fonth; }
    print "</MAP>\n";
    print "<CENTER>";
    print "<IMG BORDER=0 USEMAP=#HSPMAP$nb SRC=$imgname ISMAP>";
    print "</CENTER>\n";

    ## Draw bottom query & rule
    $line += $fonth;
    for (my $i=0; $i*$delta<=$qlen; $i++) {
	my $pos = $i*$delta*$unit;
	$img->line($spos+$pos,$line+$fonth,$spos+$pos,$line+$fonth*1.5-1,$red);
	next if ($i%5 != 0);
	$img->string($font,$spos+$pos,$line,$i*$delta,$black); }
    $line += $fonth;
    $img->string($font,$fonth,$line,$qname,$red);
    $img->filledRectangle($start,$line+$fonth/2,$stop,$line+$fonth-1,$red);
    $line += $fonth;

    ## Draw color legend
    $line += $fonth;
    $img->string($font,$fonth,$line,"Scoring",$red);
    $img->string($font,$fonth,$line+$fonth,"colors",$red);
    my $nbcol = scalar keys %colors;
    my $lencol = ($stop - $start) / $nbcol;
    my $pos = $start; my $prec = 0;
    foreach (sort bynumber keys %colors) {
	my $color = $colors{$_};
	$img->filledRectangle($pos,$line,$pos+$lencol,$line+$fonth,$color);
	$img->string($font,$pos,$line+$fonth,"S>$_",$black);
	$pos += $lencol; }

    ## Convert image to GIF and save
    my $image = $img->gif;
    open(IMG,"> $imgname");
    print IMG $image;
    close(IMG);

    return $imgname; }

## Sort by numbers
sub bynumber { $a <=> $b; }
