package blast;

use URI::Escape;
use LWP::UserAgent;
use LWP::Simple;
use HTTP::Request::Common qw(POST);
use HTTP::Headers;
use Term::ANSIColor;
require LWP::UserAgent;
use XML::XPath;
use XML::Validate;
use XML::XPath::XMLParser;
use DBI;
use datab;			# Database module

#BLAST 1# - Main function for blasting query sequence - function(%params)			
#Return: id_query result or 0
sub blasting 
{			
	#1 Local parameters
	my %vstup = @_;			# Input field of values
	my $dbi = undef;		# Database for insert values
	my $vystup = undef;		# Output from blast
	my $blocal;				# Order for local blast 
	my $xml = undef;		# XML output for parsing
	my %pole = ();			# Field for HIT insert
	my $id_hit = 0;			# Counter for HIT identificator
	my $e_experiment = 0;	# Experimental e-value
	my $e_cut_off = 0.001;	# User defined e-value
	my $count_hits = 0;		# Counter of hits for validation
	my $prikaz;				# Order for DB
	my $align_cut_off = 0;	# User defined align-length
	my $align_value = 0;	# Experimental align-length
	my $hits_found = 1;		# Are HITS FOUNDED or NOT

	#2 Get DB connection
	$dbi = &datab::pristup($vstup{'db_host'}, $vstup{'db_name'}, $vstup{'db_user'}, $vstup{'db_heslo'});
	return 0 unless defined($dbi);

	#3 SET PROCEDURE LOCAL BLAST
	$blocal = 'echo -e "' . $vstup{'sequence'} . '" | ' . $vstup{'blast_mode'} . ' -query - -db ' . $vstup{'db_blast'} . ' -outfmt 5';
	
	#4A IF LOCAL GIVE LOCAL BLAST 
	if ($vstup{'local'}) 
	{
		$vystup = `$blocal` . "\n";
	}
	#4B REMOTE BLAST
	else 
	{	
			$vystup = &blast($vstup{'sequence'}, $vstup{'blast_mode'}, $vstup{'db_blast'}, $vstup{'seq_id'});
	}
	
	#5 If ERROR in SEARCH
	return 0 unless (defined($vystup));
	# If no hits found
	if ($hits_found == 0)
	{
		return $vstup{'seq_id'};
	}

	#6 Get XML data from output
	eval {
		$xml = XML::XPath->new(xml => $vystup);
	};	

	#7 If ERROR DETECTED
	if ($@) 
	{
 		print STDERR color("red"), "ERROR: Problem with XML file for sequence name ", $vstup{'seq_id'}, ". XML output is invalid.\n", color("reset");
		&datab::err_message("ERROR: Problem with XML file for sequence name " . $vstup{'seq_id'} .". XML output is invalid.\n");
		return 0;
	}

	#8 Define CUTOFF values
	$e_cut_off = sprintf("%.10g", $vstup{'e_value'});
	$align_cut_off = sprintf("%.10g", $vstup{'align'});

	#9 Extract values from XML field
	while ($id_hit < $vstup{'max_count_hit'} + 1)
	{
		# Increase iterator ID_HIT
		$id_hit++;
		# Verify XML input
		if (&is_exists_xml($xml, "/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]"))
		{
			#A Get values from XML and insert into field
			$pole{'name_hit'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_def");
			$pole{'AC_num'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_accession");
			$pole{'Hit_length'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_len");
			$pole{'Bit_score'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_bit-score");
			$pole{'Score'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_score");
			$pole{'E_value'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_evalue");
			$pole{'Query_from'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_query-from");
			$pole{'Query_to'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_query-to");
			$pole{'Identity'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_identity");
			$pole{'Positive'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_positive");
			$pole{'Align_length'} = $xml->getNodeText("/BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit[$id_hit]/Hit_hsps/Hsp[1]/Hsp_align-len");

			#B Transfer text into numbers
			$e_experiment = sprintf("%.10g", $pole{'E_value'});
			$align_value = sprintf("%.10g", $pole{'Align_length'});

			#C Verify E-VALUE and ALIGN-LENGTH
			if (($e_experiment < $e_cut_off) && (!($pole{'name_hit'} eq '')) && ($align_value > $align_cut_off)) 
			{			
				#a INSERT OPERATION
				$prikaz = $dbi->prepare("INSERT INTO 2_Hits (	2_seq_name, 2_hit_definition, 2_hit_accession, 2_hit_len, 
										2_bit_score, 2_score, 2_evalue, 2_query_from, 2_query_to, 
										2_identity, 2_positive, 2_align_length) 
							VALUES (?,?,?,?,?,?,?,?,?,?,?,?) ON DUPLICATE KEY UPDATE 2_seq_name = 2_seq_name;");
				if (!($prikaz->execute(	$vstup{'seq_id'}, $pole{'name_hit'}, $pole{'AC_num'}, $pole{'Hit_length'},
							$pole{'Bit_score'}, $pole{'Score'}, $pole{'E_value'}, $pole{'Query_from'},
							$pole{'Query_to'}, $pole{'Identity'}, $pole{'Positive'}, $pole{'Align_length'}))) 
				{
					# If error print ERROR MESSAGE
					print STDERR color("red"), "ERROR: Problem with insert hit record about sequence name ", $vstup{'seq_id'}, " into database.\n", color("reset");
					&datab::warr_message("ERROR: Problem with insert hit record about sequence name " . $vstup{'seq_id'} ." into database.\n");
				}
				#b Searching GO_numbers from NCBI
				# &GO_blast_search($pole{'AC_num'}, %vstup);
			}
			#D Increase count of hits
			$count_hits++;
		}
		# If count of processed hit is more than 2
		elsif ($id_hit > 2)
		{
			$id_hit = $vstup{'max_count_hit'} + 5;
		}
	}
	
	#10 Disconnect DB system
	$dbi->disconnect;

	#11 Verify searching and give return value
	if ($count_hits) 
	{
		return $vstup{'seq_id'};	# Success blast
	}
	else 
	{
		return 0;
	}	
}

#BLAST 4# - Help function for validating XML document
#Return: 1 if succes and 0 if error
sub is_exists_xml 
{
        #1 Local variables
        my $xml = shift;
        my $cesta = shift;

        #2 If not defined return undef value
        return 0 unless defined $xml;
        eval
        {
                $xml->exists($cesta)
        };
        #3 Get test result and give 1 for succes and 0 for error
        if ( $@ )
        {
                return 0;
        }
        else
        {
                return 1;
        }
}


#BLAST 2# - Blast function for REMOTE BLAST - function(sequence, blast_mode, db_blast, seq_name)			
#Return: blast result, undef value if fail
sub blast
{
	#1 Define parameters
	my $query = shift;		# Query SEQ
	my $program = shift;		# Blast_mode
	my $database = shift;		# NCBI databbase for searching
	my $name = shift;		# Name of query SEQ
	my $ua;				# User agent
	my $encoded_query;		# Encoded query for UA
	my $urlx = 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
	my $req;			# Request for QUERY SEQ
	my $response;			# Response from NCBI server
	my $rid;			# Blast variable 1
	my $rtoe;			# Blast variable 2
	my $pc_time = 0;		# PC time for computing

	#2 Create client for web comunications
	$ua = LWP::UserAgent->new;

	#3 Read and encode the query
	$encoded_query = uri_escape($query);

	#4 SEND QUERY and sleep
	$req = POST $urlx,[
						PROGRAM => "$program",
						DATABASE => "$database",
						QUERY => "$encoded_query",
						CMD => "Put"
				];
	sleep 3;
	
	#5 GET RESPONSE IF SEND OK
	$response = $ua->request($req);
	if (!($response->is_success)) 
	{
		#A Print ERROR MESSAGE
		print STDERR color("red"), "ERROR: Problem with retrieving result for sequence name $name.\n", color("reset");
		&datab::err_message("ERROR: Problem with retrieving result for sequence name $name.\n");
		#B Print MESSAGE FROM SEARCH
		print STDERR color("red"), $name, ": ", $response->status_line, "\n", color("reset");
		&datab::err_message($name . ": " . $response->status_line . "\n");
		#C Return UNDEF VALUE
		return undef;
	}

	#6 Parse out the request id
	$response->content =~ /^    RID = (.*$)/m;
	$rid = $1;

	#7 Parse out the estimated time to completion
	$response->content =~ /^    RTOE = (.*$)/m;
	$rtoe = $1;

	#8 Wait for search to complete
	sleep $rtoe;
	
	#9 Set PC_TIME COUNTER
	$pc_time = 0;	

	#10 Loop for results
	while (1)
    	{
		#A Get request
    		$req = new HTTP::Request GET => "https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
    		$response = $ua->request($req);
		
		#B If response is error
		if ($response->is_error) 
		{
			# Print ERROR MESSAGE
			print STDERR color("red"), "ERROR: Problem with retrieving result for sequence name $name.\n", color("reset");
			&datab::err_message("ERROR: Problem with retrieving result for sequence name $name.\n");
			print STDERR color("red"), $name, ": ", $response->status_line, "\n", color("reset");
			&datab::err_message($name . ": " .$response->status_line . "\n");
			# Return UNDEF VALUE
			return undef;
		}
		
		#C If response is waiting
    		if ($response->content =~ /\s+Status=WAITING/m)
        	{
			if ($pc_time > 300) 
			{
				# Print ERROR MESSAGE
				print STDERR color("red"), "ERROR: Time for blasting sequence name $name elapsed.\n", color("reset");
				&datab::err_message("ERROR: Time for blasting sequence name $name elapsed.\n");
				# Return UNDEF VALUE
				return undef;  
			}				
			else 
			{
				$pc_time++;
				sleep 5;
				next;
			}
        	}
		
		#D If response is failed
	    	if ($response->content =~ /\s+Status=FAILED/m)
        	{
			# Print ERROR MESSAGE and return UNDEF VALUE
        		print STDERR color("red"), "ERROR: Search $rid for sequence name $name failed. Please report to blast-help\@ncbi.nlm.nih.gov.\n", color("reset");
			&datab::err_message("ERROR: Search $rid for sequence name $name failed. Please report to blast-help\@ncbi.nlm.nih.gov.\n");
        		return undef;
        	}
		
		#E If response status is unknown
    		if ($response->content =~ /\s+Status=UNKNOWN/m)
        	{
			# Print ERROR MESSAGE and return UNDEF VALUE
        		print STDERR color("red"), "ERROR: Search $rid expired for sequence name $name expired.\n", color("reset");
			&datab::err_message("ERROR: Search $rid expired for sequence name $name expired.\n");
        		return undef;
        	}
		
		#F If response status is ready
    		if ($response->content =~ /\s+Status=READY/m) 
        	{
       			if ($response->content =~ /\s+ThereAreHits=yes/m)
        		{
        			last;
        		}
        		else
            		{	
				# If NO HITS FOUND
            			print STDERR "No hits found for sequence $name.\n";
				&datab::warr_message("No hits found for sequence $name.\n");
				$hits_found = 0;
            			return 0;
            		}
        	}	
    		#G if we get here, something unexpected happened.
		return undef;
	} # end loop

	#11 Retrieve and give results
	$req = new HTTP::Request GET => "https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=XML&RID=$rid";
	eval {
		$response = $ua->request($req);
	};
	
	#12A If response is SUCCESS
	if ($response->is_success) 
	{
		return $response->content;
	}
	#12B If PROBLEM
	else 
	{
		#A Print ERROR MESSAGE
		print STDERR color("red"), "ERROR: Problem with retrieving results for sequence name $name.\n", color("reset");		
		print STDERR color("red"), $name, ": ", $response->status_line, "\n", color("reset");
		&datab::err_message("ERROR: Problem with retrieving result for sequence name $name.\n");
		&datab::err_message($name . ": " . $response->status_line . "\n");
		return undef;
	}
}

1;
