#!/local/bin/perl

#
# html4blast - Convert blast output to HTML hypertext.
#
# It displays eventually a Gif image made by Alessandro Guffanti
# (guffanti@tigem.it) PaintBlast package.
#
# v1.1  - 08/09/1998 - njoly@pasteur.fr
# v1.0  - 21/08/1998 - njoly@pasteur.fr

use strict;
use Getopt::Std;
use File::Basename;
use IPaintBlast;

## www servers configuration
my $wwwsrs = "http://bioweb.pasteur.fr/cgi-bin/srs5/wgetz";
my $wwwefetch = "http://bioweb.pasteur.fr/cgi-bin/efetch";
my $wwwgenbank = "http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query";
my $wwwsprot = "http://www.expasy.ch/cgi-bin/get-sprot-entry";
my $wwwenzyme = "http://www.expasy.ch/cgi-bin/get-enzyme-entry";
my $wwwembl = "http://www.ebi.ac.uk/htbin/expasyfetch";
my $wwwpir = "http://www-nbrf.georgetown.edu/cgi-bin/nbrfget";

## databases configuration
## 'label_name' => [ "srs_db_names+...", "efetch_name", "extern_site" ]
my %db = ( 'gb' => [ "genbank+genbanknew", "gb",
		     "$wwwgenbank?db=n&form=6&dopt=g&uid=" ],
	   'emb' => [ "embl+emblnew", "em", "$wwwembl?" ],
	   'em' => [ "embl+emblnew", "em", "$wwwembl?" ],
	   'sp' => [ "swissprot+trembl+swissnew", "sp", "$wwwsprot?" ],
	   'gp' => [ "", "gp", "" ],
	   'pir' => [ "pir", "pir", "$wwwpir?" ],
	   'nrl3d' => [ "nrl3d", "nrl3d", "" ],
	   'epd' => [ "", "epd", "" ],
	   'enz' => [ "enzyme", "", "$wwwenzyme?" ] );

## Command line options & args
my %options = (); my $outfile = "blast.html";
getopts("o:gse", \%options);
if (@ARGV != 1) { usage(); exit(1); }
if ($options{'o'}) { $outfile = $options{'o'}; }
my $infile = shift(@ARGV);
my $pcount = 0; my $res = 0;

## Proceed each blast output file line
open(OUT,"> $outfile") || die("Can't open file $outfile\n");
print OUT "<HTML>";
print OUT "<TITLE>$outfile</TITLE>";
print OUT "<BODY><PRE>";

open(IN,"< $infile") || die("Can't open file $infile\n");
while(<IN>) {
    my $label = ""; my $acc = ""; my $loc = "";
    my $proba = ""; my $enz = "";

    ## Check for new blast result
    if (/^T?BLAST[NX]?/) { $res++; }

    ## Quote '<' characters
    s/</&lt/g;

    ## Check for 'label|acc|loc' line
    if (/^>?\s?(\S+)\|(\S*)\|(\S*) /) {
	$label = $1; $acc = $2; $loc = $3; }
    ## Check for enzyme ID
    if (/\(EC (\d+\.\d+\.\d+\.\d+)\)/) {
	$enz = $1; }
    ## Check for proba line
    if ($label ne "" && /^$label.* \d+\s\s(\S+)(\s+\d+)?$/) {
	$proba = $1; }

    ## Make acc|loc links
    if ($db{$label}) {
	my $accquery = ""; my $locquery = "";
	## Make efetch query args
	if (@{$db{$label}}->[1] ne "") {
	    $accquery = "$wwwefetch?-a%20@{$db{$label}}->[1]:$acc";
	    $locquery = "$wwwefetch?@{$db{$label}}->[1]:$loc"; }
	## Make SRS query args
	if ($options{'s'} && @{$db{$label}}->[0] ne "") {
	    my $dblist = @{$db{$label}}->[0]; $dblist =~ s/\+/%20/g;
	    $accquery = "$wwwsrs?-e+-lv+1+[{$dblist}-AccNumber:$acc]";
	    $locquery = "$wwwsrs?-e+-lv+1+[{$dblist}-ID:$loc]"; }
	## Make external acc links
	if ($options{'e'} && @{$db{$label}}->[2] ne "") {
	    $accquery = @{$db{$label}}->[2].$acc; }
	## Do links
	if ($accquery ne "" && $acc ne "") { 
	    s/\|$acc\|/\|<A HREF="$accquery">$acc<\/A>\|/; }
	if ($locquery ne "" && $loc ne "") {
	    s/\|$loc /\|<A HREF="$locquery">$loc<\/A> /; }
    }
    ## Make enzyme links
    if ($db{'enz'} && $enz ne "") {
	my $enzquery = "";
	if ($options{'s'} && @{$db{'enz'}}->[0] ne "") {
	    my $dblist = @{$db{'enz'}}->[0]; $dblist =~ s/\+/%20/g;
	    $enzquery = "$wwwsrs?-e+-lv+1+[{$dblist}-ID:$enz]"; }
	if ($options{'e'} && @{$db{'enz'}}->[2] ne "") {
	    $enzquery = @{$db{'enz'}}->[2].$enz; }
	if ($enzquery ne "") { s/EC $enz/EC <A HREF="$enzquery">$enz<\/A>/; }
    }
    ## Make proba links
    if ($proba ne "" && ($acc ne "" || $loc ne "")) {
	my $key = "$label\_$acc\_$loc\_$res";
	s/  $proba(\s+\d+)?$/  <A NAME="H_$key"><\/A><A HREF="#$key">$proba<\/A>$1/;
	$pcount++; }
    ## Make alignment header links
    if ($label ne "" && ($acc ne "" || $loc ne "") && /^>\s?$label/) {
	my $key = "$label\_$acc\_$loc\_$res";
	s/^>/<A NAME="$key"><\/A><A HREF="#H_$key">><\/A>/; }
    
    ## Eventually insert aligned sequence graph
    if ($options{'g'} && /^$/ && $pcount) {
	print OUT "</PRE>\n";
	select(OUT);
	my $image = IPaintBlast::drawimage($infile,$res);
	print OUT "<PRE>\n";
	$pcount = 0; }

    ## Print output line
    print OUT $_;

}
close(IN);

print OUT "</PRE></BODY></HTML>";
close(OUT);


sub usage {
    print STDERR "usage: ".basename($0)." [options] <file>\n\n";
    print STDERR "options [default]:\n";
    print STDERR "\t-o <file>\tOutput file name [$outfile].\n";
    print STDERR "\t-g\t\tGraphical alignment summary.\n";
    print STDERR "\t-s\t\tSrs www links (if possible).\n";
    print STDERR "\t-e\t\tExternal sites links (if possible).\n"; }
