#!/usr/bin/perl -w use strict; # extract_loci2.plx, last update: 9/14/03 # extract locus anchor data from a USDA spreadsheet # by Chet Langin, clangin@siu.edu # SIU Plant Biotechnology and Genome Core-facility # begin the timer use Time::HiRes; my $start_time = Time::HiRes::time; # check the arguments if(scalar(@ARGV) > 0) { print "The arguments are preset as follows:\n"; print " Input file: usda.txt\n"; print " Output file: locus_anchors.txt\n"; print " Sorted output file: sorted_locus_anchors.txt\n"; print "Restart without any arguments to continue...\n"; exit; } # if use constant FACTOR => 441824.931130786; # set the filenames my $input_file_str = "usda.txt"; my $output_file_str = "locus_anchors.txt"; my $sorted_output_file_str = "sorted_locus_anchors.txt"; # read the input file open(INPUT_FILE, "<", $input_file_str) or die "Cannot open input file: $!\n"; my @file_array = ; close INPUT_FILE; my $number_lines = scalar(@file_array); my $name = ""; my $mlg = ""; my $cm = -1; my $midpoint = 0; open(OUTPUT_FILE, ">", $output_file_str) or die "Cannot open output file: $!\n"; # look at each line in the input file for(my $current_line=0; $current_line < $number_lines; $current_line++) { my @field_array = split /\t/, $file_array[$current_line]; $name = $field_array[0]; $mlg = $field_array[1]; $mlg = "D1AQ" if($mlg eq "D1a"); $mlg = "D1BW" if($mlg eq "D1b"); $cm = $field_array[2]; $midpoint = sprintf("%.0f", $cm * FACTOR); $midpoint = 101 if($midpoint < 101); my $output_str = sprintf("%s\t%08d\t%08d\t%08d\t%s\n", $mlg, $midpoint - 100, $midpoint + 100, $midpoint, $name); print OUTPUT_FILE $output_str; } # for # close OUTPUT_FILE; system("sort $output_file_str > $sorted_output_file_str"); # my $run_time = Time::HiRes::time - $start_time; print "Run time: ${\(Time::HiRes::time - $start_time)} seconds\n";