package QtlRead; =head1 NAME QtlRead -- Read a QTL file and save the pertinent information into the database =head1 SYNOPSIS my $boolean = QtlRead -> new; =head1 DESCRIPTION This package reads a QTL file and saves the pertinent data into the database. QtlRead, itself, is not an object and cannot be instantiated. =head1 VERSION 0.001 (last update: 6/30/04) =head1 AUTHOR Chet Langin, clangin@siu.edu SIU Plant Biotechnology and Genomics Core-facility =head1 BUGS None known. =head1 SEE ALSO extropy ExtropyConstants ExtropyUtils Extropy::MenuMain =head1 COPYRIGHT Copyright 2004, Chet Langin, All Rights Reserved. This program is free software. You may copy or redistribute it under the same terms as Perl itself. =head1 METHODS The remainder of this document describes the methods available to the programmer. =cut # load the pragmas use warnings; use strict; # load other modules use ExtropyConstants; use ExtropyUtils; =head2 new() my $boolean = QtlRead->new; Reads a QTL file and loads the data into the database. =cut # package variables my $composite_counter = 0; my $qtl_locationless_counter = 0; my $previous_line_blank = 0; my $qtl_name = ""; my $qtl_counter = 0; my $gene = ""; my $locus_name = ""; my $locus_counter = 0; my $start = -1; my $end = -1; my $cm_start = -1; my $cm_end = -1; my $mlg = ""; my $longest_qtl = 0; my $zero_length_qtls = 0; my $shortest_qtl = 100000000; my $summate_qtl = 0; my $qtl_divisor = 0; my $longest_qtl_name = 0; # ******************************** new ****************************** sub new { my $self = shift; my $configuration = shift; my $db_manager = shift; my $project = shift; my $file_name = ""; my $file_obtained = TRUE; my $refresh = FALSE; message_start; if($project->{current_project} eq "") { message("You must activate a project, first."); $file_obtained = FALSE; } # if else { # A project has been activated if($project->{qtl_file} ne "") { message("Data from QTL file $project->{qtl_file}"); message("is already in the database."); if(yes("Refresh the database from this file?")) { $file_name = $project->{qtl_file}; $refresh = TRUE; } # if elsif(!yes("Delete this existing data in the database and continue?")) { return FALSE; } # elsif } # if my $loop = TRUE; while($loop) { if(!$refresh) { message("Give the /path/to/file.txt."); blank_line; $file_name = prompt("Enter path and QTL file name"); } # if if($file_name eq "Q") { $loop = FALSE; $file_obtained = FALSE; } # if elsif(file_name_ok($file_name, $configuration, $db_manager)) { $loop = FALSE; $file_obtained = TRUE; } # elsif else { $file_obtained = FALSE; if(!yes("Try another file name?")) { $loop = FALSE; } # if } # else } # while } # else(a project has been activated) if($file_obtained) { # save the configuration $project->{qtl_file} = $file_name; $configuration->save($project); press_enter; TRUE; } # if else { press_enter; FALSE; } # else } # new # ******************************** file_name_ok ****************************** =head2 file_name_ok my $boolean = QtlRead->file_name_ok($file_name, $configuration, $db_manager); Determines if a file name is acceptable. If the file name is ok, then the QTL file is read and the data is stored in MySQL for later use. For internal usage only. =cut # --------------------------------------------------------------------------------- sub file_name_ok { my $file_name = shift; my $configuration = shift; my $db_manager = shift; my $name_ok = TRUE; my $error = FALSE; my $number_lines = 0; my $number_qtls = 0; open(INPUT_FILE, "<", "$file_name") or $error = TRUE; if($error) { message_start; message("Could not read $file_name"); message("$!"); $name_ok = FALSE; } # if else { # Read the QTL input file message("QTL file $file_name opened"); message("Connecting to the database."); $db_manager->connect; message("Deleting previous QTL data in the database."); $db_manager->execute("delete from qtl"); $db_manager->execute("delete from bad_qtl"); $db_manager->execute("delete from qtl2locus"); message("Entering data into the database..."); # read the input file my @input_file = ; close INPUT_FILE; $number_lines = scalar(@input_file); # look at each line in the input file LINE:for(my $line_counter = 10; $line_counter < $number_lines; $line_counter++) { $_ = $input_file[$line_counter]; chomp; chop; if(/^\s*$/){ # ending a record next LINE if($previous_line_blank); if($composite_counter == 0) { # Save the QTL's in the bad_qtl database table my $name_length = length($qtl_name); $longest_qtl_name = $name_length if($name_length > $longest_qtl_name); my $eid = $db_manager->{dbh}->quote(""); my $db_qtl = $db_manager->{dbh}->quote("$qtl_name"); $db_manager->execute("insert into bad_qtl values($eid, $db_qtl)"); $qtl_locationless_counter++; } # if elsif($composite_counter != 2) { print "Composite counter is not 2: $line_counter\n"; } # elsif else { # this QTL has a location, process it # Save the relations in the qtl database table my $name_length = length($qtl_name); $longest_qtl_name = $name_length if($name_length > $longest_qtl_name); my $eid = $db_manager->{dbh}->quote(""); my $db_qtl = $db_manager->{dbh}->quote("$qtl_name"); my $db_mlg = $db_manager->{dbh}->quote("$mlg"); $gene = -1 if($gene eq ""); my $db_gene = $db_manager->{dbh}->quote("$gene"); $db_manager->execute("insert into qtl values($eid, $db_qtl, $db_mlg, $cm_start, $cm_end, $start, $end, $db_gene)"); } # else $qtl_name = ""; $gene = ""; $composite_counter = 0; $previous_line_blank = 1; next LINE; } # if $previous_line_blank = 0; my @fields = split / /; if($fields[0] eq "QTL") { if(!(/^\s*$/, chomp($input_file[$line_counter - 1]))) { print "previous line not empty: $line_counter\n"; } # if if($qtl_name ne "") { print "Has two QTL fields $line_counter\n"; } # if my @name_fields = split /"/; $qtl_name = $name_fields[1]; $qtl_counter++; } # if elsif(/^Putative_candidate_gene/) { # xxx my @name_fields = split /"/; $gene = $name_fields[1]; } # elsif elsif(/^Candidate_gene/) { # xxx my @name_fields = split /"/; $gene = $name_fields[1]; } # elsif elsif(/^Locus/) { my @name_fields = split /"/; $locus_name = $name_fields[1]; $locus_counter++; if($qtl_name eq "") { message("Locus $locus_name has no QTL match."); } # if else { my $eid = $db_manager->{dbh}->quote(""); my $db_qtl = $db_manager->{dbh}->quote("$qtl_name"); my $db_locus = $db_manager->{dbh}->quote("$locus_name"); $db_manager->execute("insert into qtl2locus values($eid, $db_qtl, $db_locus)"); } # else } # elsif elsif(/Composite_2/) { $composite_counter++; if($composite_counter == 1) { $fields[-1] = 0 if($fields[-1] < 0); $cm_start = $fields[-1]; $start = sprintf("%08.0f", $fields[-1] * FACTOR); # get the MLG my @fields = split /"/; # I added the below condition because not all of the entries have quote marks if(!$fields[1]) { @fields = split / /; } # if my $mlg_field = $fields[1]; my @mlg_sub_fields = split /-/, $mlg_field; $mlg = uc $mlg_sub_fields[0]; if(/(A1|A2|B1|B2|C1|C2|D1A|D1B|D2|E|F|G|H|I|J|K|L|M|N|O)/, $mlg) { # (nothing) } # if else { message("MLG $mlg not recognized."); } # else $mlg = "D1AQ" if($mlg eq "D1A"); $mlg = "D1BW" if($mlg eq "D1B"); } # if else { # composite_counter != 1 (it equals 2) $cm_end = $fields[-1]; $end = sprintf("%08.0f", $fields[-1] * FACTOR); $longest_qtl = $end - $start if(($end - $start) > $longest_qtl); if($end - $start == 0) { $zero_length_qtls++; print "zero-length QTL: $qtl_name\n"; } # if else { $shortest_qtl = $end - $start if(($end - $start) < $shortest_qtl); } # else $summate_qtl += $end - $start; $qtl_divisor++; } # else } # elsif } # for # print closing information message("QTL's: $qtl_counter ($qtl_locationless_counter have no locations given)."); message("Longest QTL: $longest_qtl."); message("Shortest QTL: $shortest_qtl."); my $average = $summate_qtl / $qtl_divisor; message("Average QTL: $average."); message("Zero-length QTL's: $zero_length_qtls."); message("Longest QTL name: $longest_qtl_name."); message("QTL/Locus relations: $locus_counter."); } # else message("Disconnecting from the database."); $db_manager->disconnect; close INPUT_FILE; if($name_ok) { TRUE; } # if else { FALSE; } # else } # project_name_ok 1