#!/usr/local/ensembl/bin/perl -w
#
# Copyright 2005-2009 Genes to Cognition Programme (G2C) and 
# Genome Research Limited (GRL)
#
# Contact: webmaster@genes2cognition.org
# See    : www.genes2cognition.org/software/southern_blot
#
# You may distribute this file/module under the terms of the Perl artistic
# licence: http://www.perlfoundation.org/artistic_license_2_0

=pod

=head1 NAME - submit_probe_search

=head1 COMMAND LINE PARAMETERS

Required parameters
   --probe_name
 
Optional parameters
  --config_file         Otherwise the default is used (see below)
  --conf_name           Reserved for future use
  --lsf_queue           defaults to first one specified in .ini file
  --host_type           LSF '-m' parameter
  --additional_params   anything else you want added to the bsub
  --max_jobs            Maximum number of jobs to submit
  --fast_submit         Omits the 1 second delay between LSF bsubs
  --debug               Switch on debugging
  --h|--help            Output documentation

=head1 CONFIGURATION FILE

See submit_probe_search.ini for more configuration details.

=head1 DESCRIPTION

Programme to submit GeneTargeting jobs (such as those made with
create_probe_search) to a compute farm with LSF.

Expects to find the status of each job to be 'created', and updates this to
'submitted'. Skips jobs in other states such as those already 'running' or
'success' or failed.

=head1 EXAMPLE RUN

./submit_probe_search --config_file=submit_probe_search.ini
  --probe_name=test --max_jobs=1

Connected to host: host, as user: user
Database       : southern_blot_design

Using GeneTargetingBaseDir: '/southern_blot/'
Submission host ok : 'host'
Using default queue: 'normal'
Using default lsf output dir: '/southern_blot/lsf_out'

Fetched 23 job ids for probe: 'test'
Job <685054> is submitted to queue <normal>.
Submitted job id=1
  Reached --max_jobs=1
Total skipped  : 0/23 jobs
Total submitted: 1/23 jobs
    
=head1 CONTACT

G2C B<email> webmaster@genes2cognition.org

=cut


use strict;
use Carp;
use GeneTargetingDB;
use GeneTargeting::Utils qw (show_perldoc);
use GeneTargeting::Utils::Config;
use Getopt::Long;
use Sys::Hostname;

#global
my $debug;

{
    my ($config_file, $conf_name, $probe_name, $queue, $host_type
        , $max_jobs, $lsf_output_dir, $fast_submit, $additional);
        
    GetOptions(
        "config_file=s"         => \$config_file,
        "conf_name=s"           => \$conf_name,
        "probe_name=s"          => \$probe_name,
        "lsf_queue=s"           => \$queue,
        "host_type=s"           => \$host_type,
        "max_jobs=i"            => \$max_jobs,
        "lsf_output_dir=s"      => \$lsf_output_dir,
        "fast_submit"           => \$fast_submit,
        "debug"                 => \$debug,
        "additional_params=s"   => \$additional,
        "h|help"                => \&show_perldoc,
    ) or show_perldoc();

    unless ($conf_name or $probe_name) {
        show_perldoc("Must set one of --conf_name --probe_name");
    }
    if ($conf_name and $probe_name) {
        show_perldoc("Must set only one of --conf_name --probe_name");
    }
    
    my $cfg = GeneTargeting::Utils::Config->new($config_file, $debug);    
    my ($analysis_dba) = $cfg->make_analysis_DBAdaptor_from_config(1);
    set_runner_script_names();
    
    verify_submission_hostname($cfg);
    set_and_verify_lsf_queue($cfg, $queue);
    set_and_verify_lsf_output_dir($cfg, $lsf_output_dir);
    
    my $job_aptr = $analysis_dba->get_JobAdaptor
        or confess "Could not get JobAdaptor";
        
    my $job_ids;
    if ($probe_name) {
        $job_ids = $job_aptr->get_ids_by_DNAProbe_name($probe_name);
    } else {
        confess "Currently can only submit by --probe_name";
    }
    unless ($job_ids) {
        confess "Fetched no job ids for probe name: '$probe_name'";
    } else {
        print "Fetched ", scalar(@$job_ids)
            , " job ids for probe: '$probe_name'\n";
    }
    
    submit_jobs($analysis_dba, $cfg, $job_ids
        , $max_jobs, $fast_submit, $additional);
}    

{
    my ($scripts, $run_dir);
    sub set_runner_script_names {
        
        my $base_dir;
        if ($base_dir = $ENV{GeneTargetingBaseDir}) {
            verify_full_path($base_dir);
            $run_dir = $base_dir . '/scripts/run';
            $run_dir =~ s/\/\//\//g;
            print STDERR "Using GeneTargetingBaseDir: '$base_dir'\n";
        } else {
            confess 'Must set $GeneTargetingBaseDir env variable';
        }
        
        $scripts = {};
        $scripts->{'GeneTargeting::DBEntry::Conf::Exonerate'}
            = 'run_probe_search';
    }

    sub submit_jobs {
        my ( $dba, $cfg, $job_ids, $max_jobs, $fast_submit
            , $additional_params ) = @_;

        my $conf_aptr = $dba->get_ConfAdaptor
            or confess "Could not get ConfAdaptor";
        my $job_aptr = $dba->get_JobAdaptor
            or confess "Could not get JobAdaptor";
        my $run_config_file = $cfg->run_config_file
            or confess "config_file not set in [run] section of .ini file";

        my $submitted_job_count = 0;
        my $skipped_job_count = 0;
        foreach my $job_id (@$job_ids) {
            my $conf = $conf_aptr->fetch_by_Job_id($job_id)
                or confess "Failed to fetch conf by job id: '$job_id'";

            my $job = $job_aptr->fetch_by_db_id($job_id)
                or confess "Failed to retrieve job with fetch_by_db_id: '$job_id'";
            
            #Check the job state
            unless ($job->state eq 'created') {
                print "Skipping job id=", $job->id, " as already '"
                    . $job->state . "'\n";
                $skipped_job_count++;
                next;
            }
            
            my $script_name;
            unless ($script_name = $scripts->{ref($conf)}) {
                confess "Don't know how to submit a job of conf class"
                    . ref($conf);
            }

            my $output_file = $cfg->lsf_output_dir . '/' . $dba->db
                . '_job_id_' . $job_id . '.out';
            $output_file =~ s/\/\//\//g;
                
            my $runner_string = 'bsub -o ' . $output_file
                . ' -q ' . $cfg->lsf_queues;
            $runner_string .= " $additional_params" if $additional_params;
            $runner_string .= " $run_dir" . '/' . $script_name
                . ' --job_id=' . $job_id . ' --config_file='
                . $cfg->run_config_file;
            
            system $runner_string;
            
            #Update the job state here.
            $job->state('submitted');
            $job_aptr->update($job);
            print "Submitted job id=", $job->id, "\n";
            
            $submitted_job_count++;
            if ($max_jobs and $submitted_job_count >= $max_jobs) {
                print "  Reached --max_jobs=$max_jobs";
                last;
            }
            sleep 1 unless $fast_submit;
        }
        print "\nTotal skipped  : $skipped_job_count" . '/' . scalar(@$job_ids)
            , " jobs\n";
        print "Total submitted: $submitted_job_count" . '/' . scalar(@$job_ids)
            , " jobs\n";
    }
}

sub set_and_verify_lsf_output_dir {
    my ( $cfg, $specified_output_dir ) = @_;
    
    if ($specified_output_dir) {
        verify_full_path($specified_output_dir);
        $cfg->lsf_output_dir($specified_output_dir);
        print STDERR "Set LSF output dir to: '$specified_output_dir'\n"
            if $debug;
        return 1;
    }
    
    my $output_dir = $cfg->lsf_output_dir
        or confess "No output_dir specified in [lsf] section of .ini file";
    verify_full_path($output_dir);
    print STDERR "Using default lsf output dir: '$output_dir'\n";
    return 1;
}

sub verify_full_path {
    my ( $path ) = @_;
    
    unless (substr($path, 0, 1) eq '/') {
        confess "Must specify a fully qualified path : '$path'";
    }
    return 1;
}

sub verify_submission_hostname {
    my ( $cfg ) = @_;
    
    my $current_host = hostname();
    my @acceptable_hosts = split(/\s+/, $cfg->lsf_submission_host_names);
    
    my $valid;
    foreach my $acceptable (@acceptable_hosts) {
        if ($current_host =~ /^$acceptable/) {
            $valid++;
            last;
        }
    }
    if ($valid) {
        print "Submission host ok : '$current_host'\n";
        return 1;
    } else {
        confess "Host '$current_host' not acceptable for submission"
            . "\nName should start as one of: "
            , $cfg->lsf_submission_host_names;
    }
}

sub set_and_verify_lsf_queue {
    my ( $cfg, $specified_queue ) = @_;
    
    my $valid;
    my @queues = split(/\s+/, $cfg->lsf_queues);
    unless (@queues) {
        confess "No queues specified in [lsf] section of .ini file";
    }
    
    #Return the first queue in the .ini file as a default
    unless ($specified_queue) {
        $cfg->lsf_queues($queues[0]);
        print "Using default queue: '$queues[0]'\n";
        return 1;
    }
    
    #Otherwise verify the one specified on command line
    if ($specified_queue) {        

        foreach my $queue (@queues) {
            if ($specified_queue eq $queue) {
                $valid++;
                last;
            }
        }
    }
    
    if ($valid) {
        $cfg->lsf_queues($specified_queue);
        print "Specified queue ok : '$specified_queue'\n";
        return 1;
    } else {
        confess "Could not verify lsf_queue: '$specified_queue'"
            . "\nShould be one of: ", $cfg->lsf_queues;
    }
}

sub configure_parameters {

    my $mandatory = {};
    my $optional  = {};

    $mandatory->{'analysis_database'} = ['db', 'host', 'user'];
     $optional->{'analysis_database'} = ['pass', 'port'];

    $mandatory->{'lsf'} = ['output_dir', 'queues'
        , 'execution_host_type', 'submission_host_names'];
        
    $mandatory->{'run'} = ['config_file'];
    
    return ($mandatory, $optional);
}


