#!/usr/bin/perl -w use strict; # examine_fpc_file.pl, last update: 7/23/03 # examines an fpc file to see if it is faulty # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # start the timer use Time::HiRes; my $start_time = Time::HiRes::time(); # check parameters if(scalar(@ARGV) != 3) { print "Usage: ./examine_fpc_file.pl input_file clone_output_file contig_output_file\n"; exit; } # if my $input_file_str = $ARGV[0]; my $clone_output_file_str = $ARGV[1]; my $contig_output_file_str = $ARGV[2]; # subroutines sub advance_display(); # force unbuffered output my $old_fh = select(STDOUT); $| = 1; select($old_fh); # open the files open(INPUT_FILE, "<", $input_file_str) or die "Cannot open input file: $!\n"; open(CLONE_OUTPUT_FILE, ">", $clone_output_file_str) or die "Cannot open clone output file: $!\n"; open(CONTIG_OUTPUT_FILE, ">", $contig_output_file_str) or die "Cannot open contig output file: $!\n"; # global variables my $line_counter = 0; my $status = "Comments"; my $previous_line_blank = 0; my $field_number = 0; my $bad_clone_counter = 0; my $good_clone_counter = 0; my $clone_counter = 0; my $loci_matches = 0; my $contig_matches = 0; my $max_field_number = 0; my $first_map = 1; # boolean my $first_contig_name = ""; my $contig_start = 0; my $contig_end = 0; my $clone_name = ""; my $mlg_name = ""; my $loci_name = ""; # start the display print "*********\n"; print "****************************\n"; print "****************************************************\n"; print "Scanning $status"; # check each line of the file LINE: while() { chomp; my $current_line = $_; # stop at the Markerdata line last if($current_line eq "Markerdata"); # see if a comment if(substr($current_line, 0, 2) eq "//") { print "\nComments in body of file: $line_counter\n" if($status ne "Comments"); advance_display(); next LINE; } # if # switch from comments to bad clones if($status eq "Comments") { $status = "Bad Clones"; print "\nScanning $status"; $previous_line_blank = 1; advance_display(); next LINE; } # if # note blank lines if(/^\s*$/) { if($previous_line_blank) { print "\nMultiple blank lines in succession: $line_counter\n"; advance_display(); next LINE; } # if $previous_line_blank = 1; $field_number = 0; advance_display(); next LINE; } # if # note the field number of the current record if($previous_line_blank) { $field_number = 1; } # if else { $field_number++; $max_field_number = $field_number if($field_number > $max_field_number); } # else # get the field name $previous_line_blank = 0; my @words = split / /; my $field_name = $words[0]; # process Field 1 (the clone field) if($field_number == 1) { $clone_counter++; @words =split /"/; $clone_name = $words[1]; # see if a field name is present if(!$clone_name) { print "No clone name: $line_counter\n"; exit; } # if # see if the field name is "Clone" if($field_name ne "Clone") { print "First field name is not clone: $line_counter\n"; exit; } # if # see if a bad clone if(substr($clone_name, 0, 1) eq "!") { if($status eq "Bad Clones") { $bad_clone_counter++; } # if else { print "\nBad Clone out of place: $line_counter\n"; } # else advance_display(); next LINE; } # if # else is a good clone else { $good_clone_counter++; if($status eq "Bad Clones") { $status = "Good Clones"; print "\nScanning $status"; } # if advance_display(); next LINE; } # else } # if # check and skip uninteresting field names if($field_name eq "Gel_number" || $field_name eq "Bands" || $field_name eq "Creation_date" || $field_name eq "Modified_date" || $field_name eq "Fp_number" || $field_name eq "Approximate_match_to_cosmid" || $field_name eq "Exact_match_to_cosmid" || $field_name eq "Pseudo_match_to_cosmid" ) { advance_display(); next LINE; } # if # check for loci elsif($field_name eq "Positive_Locus" || $field_name eq "Positive_Probe" ) { $loci_matches++; my @loci_name_array = split /"/; my $loci_long_name = $loci_name_array[1]; # find the MLG's $loci_long_name =~ /(A1|A2|B1|B2|C1|C2|D1A|D1a|Q|D1AQ|D1B|D1BW|D2|E|F|G|H|I|J|K|L|M|N|O|unknown)$/; if($1) { $mlg_name = $1; } # if else { print "No MLG match: $loci_long_name $line_counter\n"; exit; } # else $loci_name = substr($loci_long_name, 0, length($loci_long_name) - length($mlg_name)); $mlg_name = "D1AQ" if($mlg_name =~ /(D1A|D1a|Q)/); $mlg_name = "D1BW" if($mlg_name =~ /(D1B)/); print CLONE_OUTPUT_FILE "$clone_name\t$loci_name\t$mlg_name\n"; advance_display(); next LINE; } # elsif # check for contigs elsif($field_name eq "Map") { my @map_contig_array = split /"/; my $contig_name = $map_contig_array[1]; my @map_location_array = split / /; my $location = $map_location_array[4]; if($first_map) { $first_map = 0; $contig_matches++; $first_contig_name = $contig_name; $contig_start = $location; } # if else { $first_map = 1; if($first_contig_name ne $contig_name) { print "Contig names do not match: $line_counter\n"; exit; } # if $contig_end = $location; print CONTIG_OUTPUT_FILE "$contig_name\t$clone_name\t$contig_start\t$contig_end\n"; } # else advance_display(); next LINE; } # elsif # report any other field names else { print "Unknown field name: $field_name (Line $line_counter Field $field_number)\n"; exit; } # else advance_display(); } # while # close the files close INPUT_FILE; close CLONE_OUTPUT_FILE; close CONTIG_OUTPUT_FILE; # print closing information print "\n"; print "Bad Clones: $bad_clone_counter\n"; print "Good Clones: $good_clone_counter\n"; print "Total Clones: $clone_counter\n"; print "Loci matches: $loci_matches\n"; print "Contig matches: $contig_matches\n"; print "Max Field Number: $max_field_number\n"; # stop the timer my $run_time = Time::HiRes::time() - $start_time; print "Run time: $run_time seconds\n"; print "****************************************************\n"; print "****************************\n"; print "*********\n"; sub advance_display() { $line_counter++; if($status eq "Comments") { print "."; } # if else { print "." if($line_counter % 10000 == 0); } # else } # advance_display()