#!/usr/bin/perl -w use strict; # extract_loci.plx, last update: 9/2/03 # extract locus anchor data from a file provided by Iowa State # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # begin the timer use Time::HiRes; my $start_time = Time::HiRes::time; # check the arguments if(scalar(@ARGV) > 0) { print "The arguments are preset as follows:\n"; print " Input file: raw_loci.txt\n"; print " Output file: locus_anchors.txt\n"; print " Sorted output file: sorted_locus_anchors.txt\n"; print "Restart without any arguments to continue...\n"; exit; } # if use constant FACTOR => 441824.931130786; # set the filenames my $input_file_str = "raw_loci.txt"; my $output_file_str = "locus_anchors.txt"; my $sorted_output_file_str = "sorted_locus_anchors.txt"; # read the input file open(INPUT_FILE, "<", $input_file_str) or die "Cannot open input file: $!\n"; my @file_array = ; close INPUT_FILE; my $number_lines = scalar(@file_array); my $stable_label_name = ""; my $current_label_name = ""; my $current_mlg = ""; my $current_cm = -1; my $current_midpoint; open(OUTPUT_FILE, ">", $output_file_str) or die "Cannot open output file: $!\n"; # look at each line in the input file for(my $current_line=0; $current_line < $number_lines; $current_line++) { if($file_array[$current_line] =~ / : /) { my @field_array = split /"/, $file_array[$current_line]; $current_label_name = $field_array[1]; if($field_array[0] =~ /Locus/) { if($current_mlg eq "" || $current_cm == -1) { print "$current_line: MLG or cM problem\n"; } # if else { $current_midpoint = 101 if($current_midpoint < 101); my $output_str = sprintf("%s\t%08d\t%08d\t%08d\t%s\n", $current_mlg, $current_midpoint - 100, $current_midpoint + 100, $current_midpoint, $stable_label_name); print OUTPUT_FILE $output_str; } # else $stable_label_name = $current_label_name; $current_mlg = ""; $current_cm = -1; } # if } # if elsif($file_array[$current_line] =~ /^Map.+Composite_2/) { # clean up the end of the line chomp $file_array[$current_line]; chop $file_array[$current_line]; $file_array[$current_line] =~ s/\s*$//; if($current_mlg ne "") { print "More than one location: $current_line\n"; } # if # get the MLG $file_array[$current_line] =~ /"(.+)-/; $current_mlg = $1; $current_mlg = "D1AQ" if($current_mlg eq "D1a"); $current_mlg = "D1BW" if($current_mlg eq "D1b"); # get the cM location, change it to bases (round it) my @field_array = split / /, $file_array[$current_line]; $current_cm = $field_array[scalar(@field_array) - 1]; $current_midpoint = sprintf("%.0f", $current_cm * FACTOR); } # elsif } # for # close OUTPUT_FILE; system("sort $output_file_str > $sorted_output_file_str"); # my $run_time = Time::HiRes::time - $start_time; print "Run time: ${\(Time::HiRes::time - $start_time)} seconds\n";