#!/usr/bin/perl -w use strict; # spread_contigs.pl, last update: 8/1/03 # spreads out the contigs so that they do not overlap # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # start the timer use Time::HiRes; my $start_time = Time::HiRes::time; # check the arguments if(scalar(@ARGV) != 2) { print "Usage: ./spread_contigs input_file output_file\n"; print " Example: ./spread_contigs sorted_contig_anchors.txt sorted_contig_placements.txt\n"; exit; } # if # subroutines sub process_group(); sub get_most_misplaced(); sub push_left(); use constant OFFSET => 1000; my $input_file_str = $ARGV[0]; my $output_file_str = $ARGV[1]; open(INPUT_FILE, "<", $input_file_str) or die "Cannot open input file: $!\n"; open(OUTPUT_FILE, ">", $output_file_str) or die "Cannot open output file: $!\n"; #print opening information print "*" x 3 . "\n" . "*" x 6 . "\n" . "*" x 9 . "\n"; # global variables my $previous_mlg = ""; my @data_array = (); my $record_number = 0; my $biggest_distance = 0; my $biggest_neg_distance = 0; my $comparative_displacement = 0; while() { chomp; # fields: mlg start end contig_name no_of_clone_matches my @fields = split/\t/; # process one MLG at a time my $current_mlg = $fields[0]; if($current_mlg eq $previous_mlg) { # add this record to current MLG group $record_number++; $data_array[$record_number] = [@fields]; $data_array[$record_number][5] = $fields[1]; } # if else { # process previous MLG group if($previous_mlg ne "") { &process_group; # save to the output file for(my $lcv = 1; $lcv <= $record_number; $lcv++) { my $output_str = join "\t", @{$data_array[$lcv]}; my $difference = $data_array[$lcv][1] - $data_array[$lcv][5]; $output_str .= "\t$difference\n"; print OUTPUT_FILE $output_str; } # for } # if # start a new MLG group (add first record to it) $previous_mlg = $current_mlg; @data_array = (); $record_number = 1; $data_array[$record_number] = [@fields]; $data_array[$record_number][5] = $fields[1]; } # else } # while &process_group; # save to the output file for(my $lcv = 1; $lcv <= $record_number; $lcv++) { my $output_str = join "\t", @{$data_array[$lcv]}; my $difference = $data_array[$lcv][1] - $data_array[$lcv][5]; $output_str .= "\t$difference\n"; print OUTPUT_FILE $output_str; } # for close INPUT_FILE; close OUTPUT_FILE; # print closing information print "Run time: ${\(Time::HiRes::time - $start_time)} seconds\n"; print "*" x 9 . "\n" . "*" x 6 . "\n" . "*" x 3 . "\n"; exit; sub process_group() { my $conflicts = 0; my $consecutive_conflicts = 0; my $mlg = ""; for(my $lcv = 1; $lcv < $record_number; $lcv++) { $mlg = $data_array[$lcv][0]; my $start = $data_array[$lcv][1]; my $end = $data_array[$lcv][2]; my $next_start = $data_array[$lcv + 1][1]; my $next_end = $data_array[$lcv + 1][2]; my $overlap = $end + OFFSET - $next_start; if($overlap > 0) { # a conflict $conflicts++; $consecutive_conflicts++; $data_array[$lcv + 1][1] += $overlap; $data_array[$lcv + 1][2] += $overlap; } # if else { # not a conflict $consecutive_conflicts = 0; } # else } # for print "$mlg conflicts: $conflicts\n"; my $most_misplaced = &get_most_misplaced; # do the contigs one at a time from the right for(my $lcv = $record_number; $lcv > 0; $lcv--) { $comparative_displacement = $data_array[$lcv][1] - $data_array[$lcv][5]; PUSH: while($comparative_displacement > OFFSET) { last PUSH if(!&push_left($lcv)); $comparative_displacement = $data_array[$lcv][1] - $data_array[$lcv][5]; } # if } # for $most_misplaced = &get_most_misplaced; } # process_group() sub get_most_misplaced() { $biggest_distance = 0; $biggest_neg_distance = 0; my $current_winner = 0; for(my $lcv = 1; $lcv < $record_number; $lcv++) { my $distance = $data_array[$lcv][1] - $data_array[$lcv][5]; if($distance > $biggest_distance) { $biggest_distance = $distance; $current_winner = $lcv; } # if $biggest_neg_distance = $distance if($distance < $biggest_neg_distance); } # for print "Biggest distance: $biggest_distance $biggest_neg_distance\n"; $current_winner; } # get_most_misplaced() sub push_left() { my $ok = 0; my $current_contig = $_[0]; if($current_contig == 1 && $data_array[$current_contig][1] > OFFSET) { # it is the leftmost contig and there is room to move left $ok = 1; } # if elsif($current_contig > 1 && $data_array[$current_contig][1] > $data_array[$current_contig - 1][2] + 1000) { # there is room to move without bumping into another contig $ok = 1; } # elsif elsif(&push_left($current_contig - 1)) { # another contig can be bumped to the left $ok = 1; } # elsif my $current_displacement = $data_array[$current_contig][1] - $data_array[$current_contig][5]; if($ok && abs($current_displacement - OFFSET) > $comparative_displacement) { $ok = 0; } # if if($ok) { $data_array[$current_contig][1] = sprintf("%08.0f", $data_array[$current_contig][1] - OFFSET); $data_array[$current_contig][2] = sprintf("%08.0f", $data_array[$current_contig][2] - OFFSET); } # if $ok; } # push_left()