package FpcRead; =head1 NAME FpcRead -- Read an FPC file and save the pertinent information into the database =head1 SYNOPSIS my $boolean = FpcRead -> new(); =head1 DESCRIPTION This package reads an FPC file and saves the pertinent data into the database. FpcRead, itself, is not an object and cannot be instantiated. =head1 VERSION 0.001 (last update: 6/30/04) =head1 AUTHOR Chet Langin, clangin@siu.edu SIU Plant Biotechnology and Genomics Core-facility =head1 BUGS None known. =head1 SEE ALSO extropy ExtropyConstants ExtropyUtils Extropy::MenuMain =head1 COPYRIGHT Copyright 2004, Chet Langin, All Rights Reserved. This program is free software. You may copy or redistribute it under the same terms as Perl itself. =head1 METHODS The remainder of this document describes the methods available to the programmer. =cut # load the pragmas use warnings; use strict; # load other modules use Time::HiRes; # for the timer use ExtropyConstants; use ExtropyUtils; # package variables my $line_counter = 0; my $clone_counter = 0; my $bad_clone_counter = 0; my $good_clone_counter = 0; my $clone_name = ""; my $previous_line_blank = 0; my $status = "Comments"; my $field_number = 0; my $max_field_number = 0; my $loci_matches = 0; my $mlg_name = ""; my $loci_name = ""; my $contig_matches = 0; my $first_map = 1; # boolean my $first_contig_name = ""; my $contig_start = 0; my $contig_end = 0; # subroutines sub advance_display(); # ******************************** new ****************************** =head2 new() my $boolean = FpcRead->new; Reads an FPC file and loads the data into the database. =cut # --------------------------------------------------------------------- sub new { my $self = shift; my $configuration = shift; my $db_manager = shift; my $project = shift; my $file_name = ""; my $file_obtained = TRUE; my $refresh = FALSE; message_start; if($project->{current_project} eq "") { message("You must activate a project, first."); $file_obtained = FALSE; } # if elsif($project->{mlg_file} eq "") { message("You must read an MLG file, first."); $file_obtained = FALSE; } # elsif else { if($project->{fpc_file} ne "") { message("Data from FPC file $project->{fpc_file}"); message("is already in the database."); if(yes("Refresh the database from this file?")) { $file_name = $project->{fpc_file}; $refresh = TRUE; } # if elsif(!yes("Delete this existing data in the database and continue?")) { return FALSE; } # elsif } # if my $loop = TRUE; while($loop) { if(!$refresh) { message("Give the /path/to/file.fpc."); blank_line; $file_name = prompt("Enter path and FPC file name"); } # if if($file_name eq "Q") { $loop = FALSE; $file_obtained = FALSE; } # if elsif(file_name_ok($file_name, $configuration, $db_manager, $project)) { $loop = FALSE; $file_obtained = TRUE; } # elsif else { $file_obtained = FALSE; if(!yes("Try another file name?")) { $loop = FALSE; } # if } # else } # while } # else if($file_obtained) { # save the configuration $project->{fpc_file} = $file_name; $configuration->save($project); press_enter; TRUE; } # if else { press_enter; FALSE; } # else } # new # ******************************** file_name_ok ****************************** =head2 project_name_ok my $boolean = FpcRead->file_name_ok($file_name, $configuration, $db_manager); Determines if a file name is acceptable. If the file name is ok, then the FPC file is read and the data is stored in MySQL for later use. For internal usage only. =cut # --------------------------------------------------------------------------------- sub file_name_ok { my $file_name = shift; my $configuration = shift; my $db_manager = shift; my $project = shift; my $name_ok = TRUE; my $error = FALSE; open(INPUT_FILE, "<", "$file_name") or $error = TRUE; if($error) { message_start; message("Could not read $file_name"); message("$!"); $name_ok = FALSE; } # if else { # Read the FPC input file # Start the timer my $start_time = Time::HiRes::time(); # force unbuffered output my $old_fh = select(STDOUT); $| = 1; select($old_fh); $status = "Comments"; message("FPC file $file_name opened"); message("Connecting to the database."); $db_manager->connect; message("Deleting any previous FPC data in the database."); $db_manager->execute("delete from clones"); $db_manager->execute("delete from contigs"); $db_manager->execute("delete from clone2locus"); $db_manager->execute("delete from contig2clone"); $project->{locus_names_crosschecked} = FALSE; $project->{locus_names_updated} = FALSE; message("Entering data into the database..."); message("Scanning $status"); print " "; # check each line of the file $line_counter = 0; LINE: while() { chomp; my $current_line = $_; # stop at the Markerdata line last if(/Markerdata/); # see if a comment if(substr($current_line, 0, 2) eq "//") { if($status ne "Comments") { blank_line; message("Comments in body of file: $line_counter\n"); blank_line; } # if advance_display(); next LINE; } # if # switch from comments to bad clones if($status eq "Comments") { $status = "Bad Clones"; print "\n"; message("Scanning $status"); print " "; $previous_line_blank = 1; advance_display(); next LINE; } # if # note blank lines if(/^\s*$/) { if($previous_line_blank) { print "\nMultiple blank lines in succession: $line_counter\n"; advance_display(); next LINE; } # if $previous_line_blank = 1; $field_number = 0; advance_display(); next LINE; } # if # note the field number of the current record if($previous_line_blank) { $field_number = 1; } # if else { $field_number++; $max_field_number = $field_number if($field_number > $max_field_number); } # else # get the field name $previous_line_blank = 0; my @words = split / /; my $field_name = $words[0]; # process Field 1 (the clone field) if($field_number == 1) { $clone_counter++; @words =split /"/; $clone_name = $words[1]; # see if a field name is present if(!$clone_name) { message("No clone name: $line_counter."); exit; } # if # see if the field name is "Clone" if($field_name ne "Clone") { message("First field name is not clone: $line_counter."); exit; } # if # see if a bad clone if(substr($clone_name, 0, 1) eq "!") { # A bad clone $bad_clone_counter++; # Save the clone name in the clones database table my $eid = $db_manager->{dbh}->quote(""); my $clone = $db_manager->{dbh}->quote("$clone_name"); my $type = $db_manager->{dbh}->quote("bad"); $db_manager->execute("insert into clones values($eid, $clone, $type, -1)"); if($status ne "Bad Clones") { message("\nBad Clone out of place: $line_counter."); } # if advance_display(); next LINE; } # if else { # else is a good clone $good_clone_counter++; # Save the clone name in the clones database table my $eid = $db_manager->{dbh}->quote(""); my $clone = $db_manager->{dbh}->quote("$clone_name"); my $type = $db_manager->{dbh}->quote("alpha"); $db_manager->execute("insert into clones values($eid, $clone, $type, -1)"); if($status eq "Bad Clones") { $status = "Good Clones"; print "\n"; message("Scanning $status."); print " "; } # if advance_display(); next LINE; } # else } # if($field_number == 1) # check and skip uninteresting field names if($field_name eq "Gel_number" || $field_name eq "Bands" || $field_name eq "Creation_date" || $field_name eq "Modified_date" || $field_name eq "Fp_number" || $field_name eq "Approximate_match_to_cosmid" || $field_name eq "Exact_match_to_cosmid" || $field_name eq "Pseudo_match_to_cosmid" ) { advance_display(); next LINE; } # if # check for loci elsif($field_name eq "Positive_Locus" || $field_name eq "Positive_Probe" ) { $loci_matches++; my @loci_name_array = split /"/; my $loci_long_name = $loci_name_array[1]; # find the MLG's $loci_long_name =~ /(A1|A2|B|B1|B2|C1|C2|D|D1|D1A|D1a|d1a+q|Q|D1AQ|D1B|D1BW|D2|E|F|G|H|I|J|K|L|M|N|O|unknown|Y)\?*$/i; if($1) { $mlg_name = uc($1); $loci_name = $`; if($loci_name =~ /d1a\+$/) { $loci_name = $`; } # if } # if else { $mlg_name = "unknown"; $loci_name = $loci_long_name; } # else if($loci_name =~ /^sat/) { $loci_name = "\u$loci_name"; } # if $mlg_name = "unknown" if ($mlg_name =~ /(B|D|D1|Y|\?)$/); $mlg_name = "D1AQ" if($mlg_name =~ /(D1A|D1A|D1A+Q|Q)/); $mlg_name = "D1BW" if($mlg_name =~ /(D1B)/); # Save the relations in the clone2locus database table my $eid = $db_manager->{dbh}->quote(""); my $clone = $db_manager->{dbh}->quote("$clone_name"); my $locus = $db_manager->{dbh}->quote("$loci_name"); my $db_mlg = $db_manager->{dbh}->quote("$mlg_name"); my $db_good = $db_manager->{dbh}->quote("no"); $db_manager->execute("insert into clone2locus values($eid, $clone, $locus, $db_mlg)"); $db_manager->execute("insert into clone_loci values($eid, $locus, $db_good)"); advance_display(); next LINE; } # elsif # check for contigs elsif($field_name eq "Map") { my @map_contig_array = split /"/; my $contig_name = $map_contig_array[1]; my @map_location_array = split / /; my $location = $map_location_array[4]; if($first_map) { $first_map = 0; $contig_matches++; $first_contig_name = $contig_name; $contig_start = $location; } # if else { $first_map = 1; if($first_contig_name ne $contig_name) { print "Contig names do not match: $line_counter\n"; exit; } # if $contig_end = $location; # Save the relations in the contig2clone database table my $db_eid = $db_manager->{dbh}->quote(""); my $db_contig = $db_manager->{dbh}->quote("$contig_name"); my $db_clone = $db_manager->{dbh}->quote("$clone_name"); my $db_start = $db_manager->{dbh}->quote("$contig_start"); my $db_end = $db_manager->{dbh}->quote("$contig_end"); my $length = $contig_end - $contig_start; my $db_length = $db_manager->{dbh}->quote("$length"); my $db_dup = $db_manager->{dbh}->quote("1"); $db_manager->execute("insert into contig2clone values($db_eid, $db_contig, $db_clone, $db_start, $db_end, $db_length, $db_dup, 'yes')"); # Save the contig name in the contigs database table my $db_type = $db_manager->{dbh}->quote("beta"); $db_manager->execute("insert into contigs values($db_eid, $db_contig, $db_type, -1, -1, -1, -1, -1)"); } # else advance_display(); next LINE; } # elsif # report any other field names else { message("Unknown field name: $field_name (Line $line_counter Field $field_number)."); } # else advance_display(); } # while print "\n"; message("Getting contig starts, ends, and counts..."); my $sth = $db_manager->execute("select ctg, min(start), max(end), count(*) from contig2clone group by ctg"); message("Calculating and saving contig lengths..."); my @row = (); while(@row = $sth->fetchrow_array()) { my $db_ctg = $db_manager->{dbh}->quote(shift @row); my $start = shift @row; my $db_start = $db_manager->{dbh}->quote($start); my $end = shift @row; my $db_end = $db_manager->{dbh}->quote($end); my $length = $end - $start; my $db_length = $db_manager->{dbh}->quote($length); my $db_count = $db_manager->{dbh}->quote(shift @row); $db_manager->execute("update contigs set start = $db_start, end = $db_end, length = $db_length, clone_c = $db_count where ctg = $db_ctg"); } # while message("Getting average clone length"); my $sth2 = $db_manager->execute("select avg(length) from contig2clone"); my @row2 = (); while(@row2 = $sth2->fetchrow_array()) { my $ave_clone_length = shift @row2; $project->{ave_clone_length} = $ave_clone_length; } # while message("Average clone length is $project->{ave_clone_length} bands"); message("Calculating band factor"); my $band_factor = AVE_CLONE_LENGTH / $project->{ave_clone_length}; # (bases/bands) $project->{band_factor} = $band_factor; message("Band factor: $band_factor bases per band"); # print closing information print "\n"; message("Bad Clones: $bad_clone_counter."); message("Good Clones: $good_clone_counter."); message("Total Clones: $clone_counter."); message("Loci matches: $loci_matches."); message("Contig matches: $contig_matches."); message("Max Field Number: $max_field_number."); blank_line; if(yes("Use fpc_adjust.txt file to make special adjustments?")) { $error = FALSE; open(ADJUST_FILE, "<", "../working_data/fpc_adjust.txt") or $error = TRUE; if($error) { message("Could not open fpc_adjust.txt file"); message("Special adjustments not made"); } # if else { # the file was opened ok my @file_array = ; my $number_lines = scalar(@file_array); for(my $lcv = 0; $lcv < $number_lines; $lcv++) { my $current_query = $file_array[$lcv]; chomp $current_query; message("Executing \"$current_query\""); $db_manager->execute("$current_query"); } # for close ADJUST_FILE; } # else } # if else { message("Not making special adjustments"); } # else # stop the timer my $run_time = Time::HiRes::time() - $start_time; if($run_time < 60) { message("FPC file read time: $run_time seconds."); } # if else { my $minutes = int($run_time / 60); $run_time %= 60; message("FPC file read time: $minutes minutes, $run_time seconds."); } # else } # else message("Disconnecting from the database."); $db_manager->disconnect; close INPUT_FILE; if($name_ok) { TRUE; } # if else { FALSE; } # else } # project_name_ok # ******************************** advance_display() ************************* =head2 advance_display() advance_display() Puts dots on the display so that the user can see that the program is running. =cut # --------------------------------------------------------------------------------- sub advance_display() { $line_counter++; if($status eq "Comments") { print "."; } # if else { print "." if($line_counter % 10000 == 0); } # else } # advance_display() 1