#!/usr/local/bin/perl
#
# Copyright 2005-2009 Genes to Cognition Programme (G2C) and 
# Genome Research Limited (GRL)
#
# Contact: webmaster@genes2cognition.org
# See    : www.genes2cognition.org/software/southern_blot
#
# You may distribute this file/module under the terms of the Perl artistic
# licence: http://www.perlfoundation.org/artistic_license_2_0

=pod

=head1 NAME - get_probe_search_cpu_time

=head1 COMMAND LINE PARAMETERS

Required parameters
  --config_file
  --probe_name
  
Optional parameters

  --debug
  --help|h              

=head1 CONFIGURATION FILE

See get_probe_search_cpu_time.ini for more configuration details.

=head1 DESCRIPTION

Programme to report the total CPU time taken to run a Southern blot
design. Calculates this by checking the LSF output files generated by
each run_probe_search job.

Requires a configuration file allowing it to find the GeneTargeting
db to connect to, and the directory holding the LSF output files.

=head1 EXAMPLE RUN

 --probe_name=test
./get_probe_search_cpu_time --config_file=get_probe_search_cpu_time.ini 
--probe_name=test

GeneTargeting analysis database --
Connected to host: host, as user: user
Database       : southern_blot_design

Total cpu time: 3313.43 sec.
Total cpu time: 0.9 hrs

=head1 CONTACT

G2C B<email> webmaster@genes2cognition.org

=cut


use strict;
use Carp;
use Getopt::Long;
use GeneTargetingDB;
use GeneTargeting::Utils qw( show_perldoc );
use GeneTargeting::Utils::Config;

#Globals
my ($debug);

{
    my ( $probe_name, $config_file, $debug );    
    GetOptions(
        "probe_name=s"      => \$probe_name,
        "config_file=s"     => \$config_file,
        "debug"             => \$debug,
        "help|h"            => \&show_perldoc,
    ) or show_perldoc();
    
    show_perldoc("Must set --probe_name") unless $probe_name;
    
    print "$0 : ", scalar(localtime), "\n\n";
    print "GeneTargeting analysis database --\n";
    my $cfg = GeneTargeting::Utils::Config->new($config_file, $debug);
    my ($GeneTargeting_dba, $db_txt) = $cfg->make_analysis_DBAdaptor_from_config(1);

    #Get the adaptors we need.
    my ($probe_aptr, $job_aptr) = get_adaptors($GeneTargeting_dba);
    
    #Retrieve the DNAProbe
    my $probe = $probe_aptr->fetch_by_name($probe_name)
        or confess "Could not probe '$probe_name'";
    
    get_total_cpu_time($GeneTargeting_dba, $cfg, $probe);    
}


sub get_total_cpu_time {
    my ( $dba, $cfg, $probe ) = @_;
    
    my ($probe_aptr, $job_aptr) = get_adaptors($dba);
    
    #these are hardcoded and should be removed
    my $lsf_output_dir = $cfg->lsf_output_dir;
    unless (-d $lsf_output_dir) {
        confess "Can't read directory '$lsf_output_dir' $!";
    }
    my $db_name = $cfg->analysis_database_db;
    
    my $job_ids = $job_aptr->get_ids_by_DNAProbe_name($probe->name)
        or confess "Got no job ids for ", $probe->name;
    
    my $total_cpu_time = 0;    
    foreach my $job_id (@$job_ids) {
        
        my $file_name = $lsf_output_dir . '/' . $db_name . '_job_id_' . $job_id . '.out';
        unless (-s $file_name) {
            confess "Can't read file '$file_name' $!";
        }
        my $runner_string = "grep 'CPU time' $file_name";
        unless ($runner_string and $runner_string =~ /CPU time/) {
            confess "Failed to grep for 'CPU time' for file $file_name";
        }
        my ($line) = `$runner_string`;
        my @fields = split(/\s+/, $line);
        my $cpu_time = $fields[4];
        
        $total_cpu_time += $cpu_time;
    }
    
    my $cpu_hrs = $total_cpu_time / 3600;
    print "Total cpu time: $total_cpu_time sec.\n";
    print "Total cpu time: ", sprintf("%.1f", $cpu_hrs), " hrs\n";
}

sub get_adaptors {
    my ( $dba ) = @_;

    my $probe_aptr = $dba->get_DNAProbeAdaptor()
        or confess "Could not get DNAProbeAdaptor";
    my $job_aptr  = $dba->get_JobAdaptor()
        or confess "Could not get JobAdaptor";
    
    return ($probe_aptr, $job_aptr);
}

sub configure_parameters {

    my $mandatory = {};
    my $optional  = {};

    $mandatory->{'analysis_database'} = ['db', 'host', 'user'];
     $optional->{'analysis_database'} = ['pass', 'port'];
    
    $mandatory->{lsf} = ['output_dir'];
    return ($mandatory, $optional);
}
