#!/usr/local/bin/perl
#
# Copyright 2005-2009 Genes to Cognition Programme (G2C) and 
# Genome Research Limited (GRL)
#
# Contact: webmaster@genes2cognition.org
# See    : www.genes2cognition.org/software/southern_blot
#
# You may distribute this file/module under the terms of the Perl artistic
# licence: http://www.perlfoundation.org/artistic_license_2_0

=pod

=head1 NAME - create_probe_search_db_tables

=head1 COMMAND LINE PARAMETERS

Required parameters
  --db                          database name
  --host                        MySQL host
  --user                        MySQL user name

Optional parameters

  --pass                        password (if needed for user)
  --port                        else defaults to 3306

=head1 CONFIGURATION FILE

Not required

=head1 DESCRIPTION

Creates the tables necessary in the specified MySQL database for
automated design of Southern blot probes.

You should create the db first (manually), then run this script

Example command line:

./create_probe_search_db_tables --host=host --user=user
  --pass=pass --db=southern_blot_design

Connected to mysql on: host, port 3306

Made table: 'conf'
Made table: 'external_db'
Made table: 'hit'
Made table: 'hit_xref'
Made table: 'job'
Made table: 'job_error'
Made table: 'job_sequence'
Made table: 'probe'
Made table: 'probe_sequence'
Made table: 'sequence'
Made table: 'sequence_hit'
Made table: 'xref'

=head1 CONTACT

G2C B<email> webmaster@genes2cognition.org

=cut

use strict;
use warnings;
use DBI;
use Carp;
use Getopt::Long 'GetOptions';
use GeneTargeting::Utils qw(
    show_perldoc
);

{
    my ($db, $user, $pass, $host, $port, $driver);

    GetOptions(
        'host=s'    => \$host,
        'port'      => \$port,
        'user=s'    => \$user,
        'db=s'      => \$db,
	'pass=s'    => \$pass,
    ) or usage();

    $driver = 'mysql';
    $port   ||= 3306;
                
    show_perldoc("Must specify --db") unless $db;

    my $dbh;
    my $dsn = "DBI:$driver:database=$db;host=$host;port=$port";
    
    eval {
        $dbh = DBI->connect($dsn, $user, $pass, {'RaiseError' => 1});
    };
    confess  "Can't connect to database '$db' : $@" if $@ or ! $dbh;
    $pass = undef;
    print "Connected to $driver on: $host, port $port\n\n";
    
    my %existing_table = map {$_->[0], 1}
       @{ $dbh->selectall_arrayref("show tables") };

    my %tables = (

        conf => q{
            conf_id             INT unsigned not null primary key auto_increment
            , conf_name         VARCHAR(40) not null
            , conf_class        VARCHAR(80) not null
            , conf_description  VARCHAR(120)
            , conf_text         TEXT not null

            , unique(conf_name)
            },

        external_db => q{
            external_db_id      INT unsigned not null primary key auto_increment
            , db_name           VARCHAR(30) not null
            , description       VARCHAR(255)
            , display_label     VARCHAR(40)
            , sequence_source   VARCHAR(255)

            , unique (db_name)
            },

        hit => q{
            hit_id          INT unsigned not null primary key auto_increment
            , xref_id       INT unsigned not null
            , md5hex        CHAR(32)
            , seq_length    INT unsigned not null

            , key xref_hit (xref_id, hit_id)
            },

        hit_xref => q{
            hit_id          INT unsigned not null
            , xref_id       INT unsigned not null

            , key (xref_id, hit_id)
            , key (hit_id, xref_id)
            },

        job => q{
            job_id              INT unsigned not null primary key auto_increment
            , conf_id           INT unsigned not null
            , state             ENUM('created', 'submitted', 'running', 'success'
                                    , 'failed') not null
            , key(conf_id)
            },

        job_error => q{
            job_id              INT unsigned not null
            , error_string      VARCHAR(80) not null
            },
            
        job_sequence => q{
        
            job_id             INT unsigned not null 
            , sequence_id      INT unsigned not null

            , primary key (job_id, sequence_id)
            , unique(sequence_id, job_id)
            },

        probe => q{
            probe_id            INT unsigned not null primary key auto_increment
            , name              VARCHAR(30) not null
            , description       VARCHAR(255)
            , assembly          VARCHAR(10) not null
            , chromosome        VARCHAR(10) not null
            , start             INT unsigned not null
            , end               INT unsigned not null
            , strand            enum('1','-1') not null
            , end_bias          enum('5prime','3prime')
            , unique (name)
            },

        probe_sequence => q{
            probe_id            INT unsigned not null
            , sequence_id       INT unsigned not null
            
            , key (sequence_id, probe_id)
            , key (probe_id, sequence_id)
            },

        sequence => q{
            sequence_id         INT unsigned not null primary key auto_increment
            , assembly          VARCHAR(10) not null
            , chromosome        VARCHAR(10) not null
            , start             INT unsigned not null
            , end               INT unsigned not null
            , strand            enum('1','-1') not null
            , dna               TEXT not null
            , key (assembly)
            , key (chromosome)
            , key (start)
            , key (end)
            , key (strand)
            },

        sequence_hit => q{
            sequence_id         INT unsigned not null 
            , hit_id            INT unsigned not null
            , conf_id           INT unsigned not null
            , score             INT unsigned not null
            , P_value           DOUBLE
            , query_start       INT unsigned not null
            , query_end         INT unsigned not null
            , query_frame       TINYINT unsigned
            , query_strand      TINYINT not null
            , subject_start     INT unsigned not null
            , subject_end       INT unsigned not null
            , subject_frame     TINYINT unsigned
            , subject_strand    TINYINT not null
            , cigar_line        TEXT
            , percent_id        FLOAT
            , soft_masked       INT unsigned

            , key (sequence_id, hit_id)
            , key (hit_id, sequence_id)
            , key (sequence_id, conf_id)
            },

        xref => q{
            xref_id             INT unsigned not null primary key auto_increment
            , accession         VARCHAR(40) not null
            , external_db_id    INT unsigned not null
            , description       VARCHAR(255)
            , gene_name         VARCHAR(60)

            , unique acc_db_id (accession, external_db_id)
            , fulltext (description, gene_name)
            },        
    );

    foreach my $tab (sort keys %tables) {
        if ($existing_table{$tab}) {
            carp "Table already exists: '$tab'\n";
        } else {
            $dbh->do("create table $tab ($tables{$tab})");
            print "Made table: '$tab'\n";
        }
    }


    END {
        $dbh->disconnect if $dbh;
    }
}
