#!/usr/bin/perl -w use strict; # spread_clones.pl, last update: 8/1/03 # spreads out the clones based on their locations in the contigs # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # start the timer use Time::HiRes; my $start_time = Time::HiRes::time; # check the arguments my $relation_input_file_str = ""; my $contig_placement_file_str = ""; my $output_file_str = ""; if(scalar(@ARGV) == 1 && $ARGV[0] == 1) { $relation_input_file_str = "sorted_contig2clone_relations.txt"; $contig_placement_file_str = "sorted_contig_placements.txt"; $output_file_str = "clone_placements.txt"; } # if elsif(scalar(@ARGV) != 3) { print "Usage: ./spread_clones.pl relation_file contig_placement_file output_file\n"; print " (Example: ./spread_clones.pl sorted_contig2clone_relations.txt sorted_contig_placements.txt clone_placements.txt)\n"; exit; } # elsif else { $relation_input_file_str = $ARGV[0]; $contig_placement_file_str = $ARGV[1]; $output_file_str = $ARGV[2]; } # else use constant BAND_LENGTH => 3881.89350119645; # start the display print "-" x 3 . "\n" . "-" x 6 . "\n" . "-" x 12 . "\n"; # open the files open(RELATION_INPUT_FILE, "<", $relation_input_file_str) or die "Cannot open relation input file: $!\n"; open(CONTIG_PLACEMENT_FILE, "<", $contig_placement_file_str) or die "Cannot open contig placement file: $!\n"; open(OUTPUT_FILE, ">", $output_file_str) or die "Cannot open output file: $!\n"; open ERROR_FILE, ">spread_clones_errors.txt" or die "Cannot open error file: $!\n"; # make a hash of the contig locations my %contig_mlg = (); my %contig_start = (); my $contig_counter = 0; while() { chomp; my @fields = split /\t/; my $mlg = $fields[0]; my $start = $fields[1]; my $contig_name = $fields[3]; $contig_mlg{$contig_name} = $mlg; $contig_start{$contig_name} = $start; $contig_counter++; } # while # place each clone my $previous_contig = ""; my $contig_mlg = ""; my $contig_start = 0; while() { chomp; my @fields = split/\t/; my $contig = $fields[0]; my $clone = $fields[1]; my $start_band = $fields[2]; my $end_band = $fields[3]; if($contig ne $previous_contig) { if(exists($contig_mlg{$contig})) { $contig_mlg = $contig_mlg{$contig}; $contig_start = $contig_start{$contig}; } # if else { print ERROR_FILE "No location known for $contig\n"; $contig_mlg = ""; } # else } # if if($contig_mlg ne "") { my $clone_start = sprintf("%08.0f", $contig_start + $start_band * BAND_LENGTH); my $clone_end = sprintf("%08.0f", $contig_start + $end_band * BAND_LENGTH); print OUTPUT_FILE "$contig_mlg\t$clone_start\t$clone_end\t$clone\t$contig\n"; } # if $previous_contig = $contig; } # while print "$contig_counter contigs to process...\n"; close RELATION_INPUT_FILE; close CONTIG_PLACEMENT_FILE; close OUTPUT_FILE; close ERROR_FILE; # print closing information print "Run time: ${\(Time::HiRes::time - $start_time)} seconds\n"; print "-" x 12 . "\n" . "-" x 6 . "\n" . "-" x 3 . "\n";