#!/usr/bin/perl -w use strict; # spread_loci.pl, last update: 8/4/03 # spreads the loci depending upon the locations of their matching clones # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # start the timer use Time::HiRes; my $start_time = Time::HiRes::time; # global variables my $clone_placements_file_str = ""; my $relations_file_str = ""; my $output_file_str = ""; my %clone_mlg = (); my %clone_midpoint = (); my $previous_locus = ""; my $previous_mlg = ""; my $valid_locus = 1; my $midpoint_summation = 0; my $midpoint_divisor = 1; # check the arguments if(scalar(@ARGV) == 1 && $ARGV[0] == 1) { $clone_placements_file_str = "sorted_clone_placements.txt"; $relations_file_str = "sorted_locus2clone_relations.txt"; $output_file_str = "locus_placements.txt"; } # if elsif (scalar(@ARGV) == 3) { $clone_placements_file_str = $ARGV[0]; $relations_file_str = $ARGV[1]; $output_file_str = $ARGV[2]; } # elsif else { print "Usage: ./spread_loci.pl clone_placement_file relations_file output_file\n"; print " Example: ./spread_loci.pl sorted_clone_placements.txt sorted_locus2clone_relations.txt locus_placements.txt\n"; exit; } # else # get and save the clone locations in a hash open(CLONE_FILE, "<", $clone_placements_file_str) or die "Cannot open clone placements file: $!\n"; while() { chomp; my @fields = split /\t/; my $current_clone = $fields[3]; my $current_mlg = $fields[0]; my $current_start = $fields[1]; my $current_end = $fields[2]; if(exists($clone_mlg{$current_clone})) { print "Clone already exists in hash: $current_clone\n"; } # if else { $clone_mlg{$current_clone} = $current_mlg; $clone_midpoint{$current_clone} = ($current_start + $current_end) / 2; } # else } # while close CLONE_FILE; # determine the location for each locus open(RELATIONS_FILE, "<", $relations_file_str) or die "Cannot open relations file: $!\n"; open(OUTPUT_FILE, ">", $output_file_str) or die "Cannot open output file: $!\n"; while() { chomp; my @fields = split /\t/; my $current_locus = $fields[0]; my $current_clone = $fields[1]; my $current_mlg = $fields[2]; my $clone_mlg = ""; my $clone_midpoint = 0; if(exists($clone_mlg{$current_clone})) { $clone_mlg = $clone_mlg{$current_clone}; $clone_midpoint = $clone_midpoint{$current_clone}; } # if else { next; } # else if($current_locus eq $previous_locus) { $valid_locus = 0 if($clone_mlg ne $previous_mlg); if($valid_locus) { $midpoint_summation += $clone_midpoint; $midpoint_divisor++; } # if } # if else { if($valid_locus && $previous_locus ne "") { my $current_midpoint = $midpoint_summation / $midpoint_divisor; my $start = sprintf("%08.0f", $current_midpoint - 100); my $end = sprintf("%08.0f", $current_midpoint + 100); print OUTPUT_FILE "$previous_mlg\t$start\t$end\t$previous_locus\n"; } # if $previous_locus = $current_locus; $previous_mlg = $clone_mlg; $valid_locus = 1; $midpoint_summation = $clone_midpoint; $midpoint_divisor = 1; } # else } # while if($valid_locus) { my $current_midpoint = $midpoint_summation / $midpoint_divisor; my $start = sprintf("%08.0f", $current_midpoint - 100); my $end = sprintf("%08.0f", $current_midpoint + 100); print OUTPUT_FILE "$previous_mlg\t$start\t$end\t$previous_locus\n"; } # if close RELATIONS_FILE; close OUTPUT_FILE; # display the run time print "Run time: ${\(Time::HiRes::time - $start_time)} seconds\n";