#!/usr/bin/perl -w use strict; # examine_fpc_file.plx, last update: 9/12/03 # examines an fpc file and extracts its data # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # start the timer use Time::HiRes; my $start_time = Time::HiRes::time(); # check parameters if(scalar(@ARGV) != 1) { print "Usage: ./examine_fpc_file.plx fpc_input_file\n"; print " Example: ./examine_fpc_file.plx fpc.txt\n"; exit; } # if my $input_file_str = $ARGV[0]; if($ARGV[0] eq "1") { $input_file_str = "webdata.fpc"; } # if my $clone_output_file_str = "clone2locus_relations.txt"; my $locus_output_file_str = "locus2clone_relations.txt"; my $contig_output_file_str = "contig2clone_relations.txt"; # subroutines sub advance_display(); # force unbuffered output my $old_fh = select(STDOUT); $| = 1; select($old_fh); # open the files open(INPUT_FILE, "<", $input_file_str) or die "Cannot open input file: $!\n"; open(CLONE_OUTPUT_FILE, ">", $clone_output_file_str) or die "Cannot open clone output file: $!\n"; open(LOCUS_OUTPUT_FILE, ">", $locus_output_file_str) or die "Cannot open locus output file: $!\n"; open(CONTIG_OUTPUT_FILE, ">", $contig_output_file_str) or die "Cannot open contig output file: $!\n"; # global variables my $line_counter = 0; my $status = "Comments"; my $previous_line_blank = 0; my $field_number = 0; my $bad_clone_counter = 0; my $good_clone_counter = 0; my $clone_counter = 0; my $loci_matches = 0; my $contig_matches = 0; my $max_field_number = 0; my $first_map = 1; # boolean my $first_contig_name = ""; my $contig_start = 0; my $contig_end = 0; my $clone_name = ""; my $mlg_name = ""; my $loci_name = ""; # start the display print "*********\n"; print "****************************\n"; print "****************************************************\n"; print "Scanning $status"; # check each line of the file LINE: while() { chomp; my $current_line = $_; # stop at the Markerdata line # last if($current_line eq "Markerdata"); last if(/Markerdata/); # see if a comment if(substr($current_line, 0, 2) eq "//") { print "\nComments in body of file: $line_counter\n" if($status ne "Comments"); advance_display(); next LINE; } # if # switch from comments to bad clones if($status eq "Comments") { $status = "Bad Clones"; print "\nScanning $status"; $previous_line_blank = 1; advance_display(); next LINE; } # if # note blank lines if(/^\s*$/) { if($previous_line_blank) { print "\nMultiple blank lines in succession: $line_counter\n"; advance_display(); next LINE; } # if $previous_line_blank = 1; $field_number = 0; advance_display(); next LINE; } # if # note the field number of the current record if($previous_line_blank) { $field_number = 1; } # if else { $field_number++; $max_field_number = $field_number if($field_number > $max_field_number); } # else # get the field name $previous_line_blank = 0; my @words = split / /; my $field_name = $words[0]; # process Field 1 (the clone field) if($field_number == 1) { $clone_counter++; @words =split /"/; $clone_name = $words[1]; # see if a field name is present if(!$clone_name) { print "No clone name: $line_counter\n"; exit; } # if # see if the field name is "Clone" if($field_name ne "Clone") { print "First field name is not clone: $line_counter\n"; exit; } # if # see if a bad clone if(substr($clone_name, 0, 1) eq "!") { if($status eq "Bad Clones") { $bad_clone_counter++; } # if else { print "\nBad Clone out of place: $line_counter\n"; } # else advance_display(); next LINE; } # if # else is a good clone else { $good_clone_counter++; if($status eq "Bad Clones") { $status = "Good Clones"; print "\nScanning $status"; } # if advance_display(); next LINE; } # else } # if # check and skip uninteresting field names if($field_name eq "Gel_number" || $field_name eq "Bands" || $field_name eq "Creation_date" || $field_name eq "Modified_date" || $field_name eq "Fp_number" || $field_name eq "Approximate_match_to_cosmid" || $field_name eq "Exact_match_to_cosmid" || $field_name eq "Pseudo_match_to_cosmid" ) { advance_display(); next LINE; } # if # check for loci elsif($field_name eq "Positive_Locus" || $field_name eq "Positive_Probe" ) { $loci_matches++; my @loci_name_array = split /"/; my $loci_long_name = $loci_name_array[1]; # find the MLG's $loci_long_name =~ /(A1|A2|B|B1|B2|C1|C2|D|D1|D1A|D1a|d1a+q|Q|D1AQ|D1B|D1BW|D2|E|F|G|H|I|J|K|L|M|N|O|unknown|Y)\?*$/i; if($1) { $mlg_name = uc($1); $loci_name = $`; if($loci_name =~ /d1a\+$/) { $loci_name = $`; } } # if else { $mlg_name = "unknown"; $loci_name = $loci_long_name; # print "No MLG match: $loci_long_name $line_counter\n"; # exit; } # else if($loci_name =~ /^sat/) { $loci_name = "\u$loci_name"; } # if $mlg_name = "unknown" if ($mlg_name =~ /(B|D|D1|Y|\?)$/); $mlg_name = "D1AQ" if($mlg_name =~ /(D1A|D1A|D1A+Q|Q)/); $mlg_name = "D1BW" if($mlg_name =~ /(D1B)/); print CLONE_OUTPUT_FILE "$clone_name\t$loci_name\t$mlg_name\n"; print LOCUS_OUTPUT_FILE "$loci_name\t$clone_name\t$mlg_name\n"; advance_display(); next LINE; } # elsif # check for contigs elsif($field_name eq "Map") { my @map_contig_array = split /"/; my $contig_name = $map_contig_array[1]; my @map_location_array = split / /; my $location = $map_location_array[4]; if($first_map) { $first_map = 0; $contig_matches++; $first_contig_name = $contig_name; $contig_start = $location; } # if else { $first_map = 1; if($first_contig_name ne $contig_name) { print "Contig names do not match: $line_counter\n"; exit; } # if $contig_end = $location; print CONTIG_OUTPUT_FILE "$contig_name\t$clone_name\t$contig_start\t$contig_end\n"; } # else advance_display(); next LINE; } # elsif # report any other field names else { print "Unknown field name: $field_name (Line $line_counter Field $field_number)\n"; exit; } # else advance_display(); } # while # close the files close INPUT_FILE; close CLONE_OUTPUT_FILE; close LOCUS_OUTPUT_FILE; close CONTIG_OUTPUT_FILE; my $sorted_clone_str = "sorted_clone2locus_relations.txt"; my $sorted_locus_str = "sorted_locus2clone_relations.txt"; my $sorted_contig_str = "sorted_contig2clone_relations.txt"; print "\nSorting the output files...\n"; system("sort $clone_output_file_str > $sorted_clone_str"); system("sort $locus_output_file_str > $sorted_locus_str"); system("sort $contig_output_file_str > $sorted_contig_str"); # print closing information print "\n"; print "Bad Clones: $bad_clone_counter\n"; print "Good Clones: $good_clone_counter\n"; print "Total Clones: $clone_counter\n"; print "Loci matches: $loci_matches\n"; print "Contig matches: $contig_matches\n"; print "Max Field Number: $max_field_number\n"; # stop the timer my $run_time = Time::HiRes::time() - $start_time; if($run_time < 60) { print "\nRun time: $run_time seconds\n"; } # if else { my $minutes = int($run_time / 60); $run_time %= 60; print "\nRun time: $minutes minutes, $run_time seconds\n"; } # else print "****************************************************\n"; print "****************************\n"; print "*********\n"; sub advance_display() { $line_counter++; if($status eq "Comments") { print "."; } # if else { print "." if($line_counter % 10000 == 0); } # else } # advance_display()