package Gff; =head1 NAME Gff -- An object that keeps track of GFF file information. =head1 SYNOPSIS my $gff = Gff->new; $gff->write; Object hash entries: gff_file_written boolean =head1 DESCRIPTION This object keeps track of the project information. =head1 VERSION 0.001 (last update: 7/16/04) =head1 AUTHOR Chet Langin, clangin@siu.edu SIU Plant Biotechnology and Genomics Core-facility =head1 BUGS None known. =head1 SEE ALSO extropy ExtropyConstants ExtropyUtils Extropy::MenuMain =head1 COPYRIGHT Copyright 2004, Chet Langin, All Rights Reserved. This program is free software. You may copy or redistribute it under the same terms as Perl itself. =head1 METHODS The remainder of this document describes the methods available to the programmer. =cut # load the pragmas use warnings; use strict; # load other modules use ExtropyConstants; use ExtropyUtils; # package variables my $db_manager; my $project; # ******************************** new ****************************** =head2 new my $gff = Gff->new; Creates an instance of the Project object. =cut # --------------------------------------------------------------------- sub new { # instantiate the oject my $self = {}; bless $self; my $defined = TRUE; $self->reset; # return values if($defined) { $self; } # if else { undef; } # else } # new() # ******************************** reset ***************************** =head2 reset reset; Resets the variables for the project. For internal use, only. =cut # --------------------------------------------------------------------- sub reset { my $self = shift; $self->{loci} = FALSE; $self->{clones} = FALSE; $self->{contigs} = FALSE; $self->{qtls} = FALSE; $self->{ends} = FALSE; $self->{mtps} = FALSE; $self->{ests} = FALSE; $self->{seq} = FALSE; $self->{rel} = FALSE; $self->{con} = FALSE; $self->{gff_file_written} = FALSE; } # reset # ******************************** write ***************************** =head2 write write; Writes the GFF file. =cut # --------------------------------------------------------------------- sub write { my $self = shift; $db_manager = shift; $project = shift; my %mlg_lengths = (); my $gff_filename = "$db_manager->{database_name}.gff"; message_start; message("Opening $gff_filename for output"); my $file_opened = TRUE; open(GFF_FILE, ">", $gff_filename) or $file_opened = FALSE; if($file_opened) { # Write to the GFF file message("$gff_filename opened for output"); message("Connecting to the database"); $db_manager->connect; ########################### ############ MLG's ######## ########################### message("Getting the MLG info"); my $sth1 = $db_manager->execute("select mlg, length from mlg"); message("Saving the MLG's in the GFF file."); my @row1 = (); while(@row1 = $sth1->fetchrow_array()) { my $mlg = shift @row1; my $length = shift @row1; $mlg_lengths{$mlg} = $length; print GFF_FILE "mlg$mlg\tchromosome\tComponent\t1\t$length\t.\t.\t.\tSequence \"mlg$mlg\"\n"; } # while ########################### ############ Loci ######### ########################### if($self->{loci}) { message("Getting the alpha loci info"); my $sth2 = $db_manager->execute("select locus, mlg, anchor, comment, type from loci where type = 'alpha'"); message("Saving the alpha loci in the GFF file."); my @row2 = (); while(@row2 = $sth2->fetchrow_array()) { my $locus = shift @row2; my $mlg = shift @row2; my $anchor = shift @row2; my $comment = shift @row2; my $type = shift @row2; my $track = "aLoci"; $anchor = 101 if ($anchor < 101); my $overflow = ($anchor + 100) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - 100; my $end = $anchor + 100; my $name = $locus; my $note = "$comment, $type. "; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while message("Getting the beta loci info"); my $sth3 = $db_manager->execute("select locus, mlg, anchor, comment, type from loci where type = 'beta'"); message("Saving the beta loci in the GFF file."); my @row3 = (); while(@row3 = $sth3->fetchrow_array()) { my $locus = shift @row3; my $mlg = shift @row3; my $anchor = shift @row3; my $comment = shift @row3; my $type = shift @row3; my $track = "bLoci"; $anchor = 101 if ($anchor < 101); my $overflow = ($anchor + 100) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - 100; my $end = $anchor + 100; my $name = $locus; # Get the matching clones my $clones = 0; my $sth3a = $db_manager->execute("select clone from clone2locus3 where locus = '$locus'"); my @row3a = (); while(@row3a = $sth3a->fetchrow_array()) { my $clone = shift @row3a; $clones++; } # while # Get the matching sequences my $sequence = ""; my $sth3b = $db_manager->execute("select sequence from sequence where hit = '$locus'"); my @row3b = (); while(@row3b = $sth3b->fetchrow_array()) { my $hit = shift @row3b; if($sequence eq "") { $sequence = "$hit"; } # if else { $sequence .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth3c = $db_manager->execute("select related from related where hit = '$locus'"); my @row3c = (); while(@row3c = $sth3c->fetchrow_array()) { my $related = shift @row3c; $relateds++; } # while my $note = ""; if($sequence eq "") { $note = "$comment, $type, $clones clone/s, $relateds related genes. "; } # if else { $note = "$comment, $type, $clones clone/s, $relateds related genes, $sequence. "; } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while message("Getting the gamma loci info"); my $sth4 = $db_manager->execute("select locus, mlg, anchor, comment, type from loci where type = 'gamma'"); message("Saving the gamma loci in the GFF file."); my @row4 = (); while(@row4 = $sth4->fetchrow_array()) { my $locus = shift @row4; my $mlg = shift @row4; my $anchor = shift @row4; my $comment = shift @row4; my $type = shift @row4; my $track = "gLoci"; $anchor = 101 if ($anchor < 101); my $overflow = ($anchor + 100) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - 100; my $end = $anchor + 100; my $name = $locus; # Get the matching clones my $clones = 0; my $sth4a = $db_manager->execute("select clone from clone2locus3 where locus = '$locus'"); my @row4a = (); while(@row4a = $sth4a->fetchrow_array()) { my $clone = shift @row4a; $clones++; } # while # Get the matching contigs my $ctgs = 0; my $sth4b = $db_manager->execute("select contig2clone.ctg from clone2locus3, contig2clone where clone2locus3.locus = '$locus' and clone2locus3.clone = contig2clone.clone"); # valid my @row4b = (); while(@row4b = $sth4b->fetchrow_array()) { my $ctg = shift @row4b; $ctgs++; } # while # Get the matching sequences my $sequence = ""; my $sth4c = $db_manager->execute("select sequence from sequence where hit = '$locus'"); my @row4c = (); while(@row4c = $sth4c->fetchrow_array()) { my $hit = shift @row4c; if($sequence eq "") { $sequence = "$hit"; } # if else { $sequence .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth4d = $db_manager->execute("select related from related where hit = '$locus'"); my @row4d = (); while(@row4d = $sth4d->fetchrow_array()) { my $related = shift @row4d; $relateds++; } # while my $note = ""; if($sequence eq "") { if($relateds > 0) { $note = "$comment, $type, $clones clone/s, $ctgs contig/s, $relateds related genes. "; } # if else { $note = "$comment, $type, $clones clone/s, $ctgs contig/s. "; } # else } # if else { if($relateds > 0) { $note = "$comment, $type, $clones clone/s, $ctgs contig/s, $relateds related genes, $sequence. "; } # if else { $note = "$comment, $type, $clones clone/s, $ctgs contig/s, $sequence. "; } # else } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while } # if (loci) else { message("Not getting the loci info"); } # else ########################### ########### Clones ######## ########################### # Note: the contig locations have already been set and spread in UpdateContigAnchors.pm if($self->{clones}) { message("Deleting clone_locations table non-alpha info for new input"); $db_manager->execute("delete from clone_locations where type != 'alpha'"); # The alpha locations were input in UpdateContigAnchors.pm message("Getting the gamma clone info"); my $sth5 = $db_manager->execute("select clone_anchors.clone, clone_anchors.mlg, clone_anchors.anchor, clone_anchors.dup_i, clone_anchors.dup_t, contig2clone.length from clones, clone_anchors, contig2clone where clones.type = 'gamma' and clones.clone = clone_anchors.clone and clones.clone = contig2clone.clone"); message("Saving the gamma clones in the GFF file"); message(" and the clone_locations table"); my @row5 = (); while(@row5 = $sth5->fetchrow_array()) { my $clone = shift @row5; my $mlg = shift @row5; my $anchor = shift @row5; my $dup_i = shift @row5; my $dup_t = shift @row5; my $clone_length = shift @row5; my $track = "gClones"; # Determine the starting and ending locations of the clone $clone_length = int($clone_length * $project->{band_factor}); my $half_length = int($clone_length / 2); $anchor = $half_length + 1 if ($anchor < $half_length + 1); my $overflow = ($anchor + $half_length) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - $half_length; my $end = $start + $clone_length; my $name = $clone; my $type = "gamma"; # get the matching loci my $matches = ""; my $sth5a = $db_manager->execute("select locus from clone2locus3 where clone = '$clone'"); my @row5a = (); while(@row5a = $sth5a->fetchrow_array()) { my $locus = shift @row5a; if($matches eq "") { $matches = $locus; } # if else { $matches .= ", $locus"; } # else } # while # get the matching contigs my $sth5b = $db_manager->execute("select ctg from contig2clone where clone = '$clone'"); # valid my @row5b = (); while(@row5b = $sth5b->fetchrow_array()) { my $contig = shift @row5b; if($matches eq "") { $matches = $contig; } # if else { $matches .= ", $contig"; } # else } # while # get the matching MTP my $sth5b1 = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row5b1 = (); while(@row5b1 = $sth5b1->fetchrow_array()) { my $mtp = shift @row5b1; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth5b2 = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row5b2 = (); while(@row5b2 = $sth5b2->fetchrow_array()) { my $est = shift @row5b2; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth5b3 = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row5b3 = (); while(@row5b3 = $sth5b3->fetchrow_array()) { my $hit = shift @row5b3; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth5b4 = $db_manager->execute("select related from related where hit = '$clone'"); my @row5b4 = (); while(@row5b4 = $sth5b4->fetchrow_array()) { my $related = shift @row5b4; $relateds++; } # while my $note = ""; if($dup_t > 1) { if($relateds > 0) { $note = "$dup_i of $dup_t, $type, $matches, $relateds related genes. "; } # if else { $note = "$dup_i of $dup_t, $type, $matches. "; } # else } # if else { if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'gamma')"); } # while message("Getting the spread gamma clone info"); my $sth5c = $db_manager->execute("select contig2clone.clone, contig2clone.start, contig2clone.length, contig2clone.ctg, contig_anchors.mlg, contig_anchors.spread_start, clone_anchors.dup_i, clone_anchors.dup_t, contigs.start from contig2clone, clone_anchors, contig_anchors, clones, contigs where contig2clone.clone = clone_anchors.clone and contig2clone.ctg = contig_anchors.ctg and clone_anchors.clone = clones.clone and clones.type = 'gamma' and contig2clone.ctg = contigs.ctg"); # valid message("Saving the spread gamma clones in the GFF file."); my @row5c = (); while(@row5c = $sth5c->fetchrow_array()) { my $clone = shift @row5c; my $band_start = shift @row5c; my $length = shift @row5c; my $ctg = shift @row5c; my $mlg = shift @row5c; my $spread_start = shift @row5c; my $dup_i = shift @row5c; my $dup_t = shift @row5c; my $ctg_band_start = shift @row5c; my $track = "gsClones"; my $displacement = $band_start - $ctg_band_start; my $start = $spread_start + ($displacement * $project->{band_factor}); my $end = $start + int($length * $project->{band_factor}); my $name = $clone; my $type = "gamma"; # get the matching loci my $matches = ""; my $sth5d = $db_manager->execute("select locus from clone2locus3 where clone = '$clone'"); my @row5d = (); while(@row5d = $sth5d->fetchrow_array()) { my $locus = shift @row5d; if($matches eq "") { $matches = $locus; } # if else { $matches .= ", $locus"; } # else } # while # get the matching contigs my $sth5e = $db_manager->execute("select ctg from contig2clone where clone = '$clone'"); # valid my @row5e = (); while(@row5e = $sth5e->fetchrow_array()) { my $contig = shift @row5e; if($matches eq "") { $matches = $contig; } # if else { $matches .= ", $contig"; } # else } # while # get the matching MTP my $sth5e1 = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row5e1 = (); while(@row5e1 = $sth5e1->fetchrow_array()) { my $mtp = shift @row5e1; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth5e2 = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row5e2 = (); while(@row5e2 = $sth5e2->fetchrow_array()) { my $est = shift @row5e2; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth5e3 = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row5e3 = (); while(@row5e3 = $sth5e3->fetchrow_array()) { my $hit = shift @row5e3; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth5e4 = $db_manager->execute("select related from related where hit = '$clone'"); my @row5e4 = (); while(@row5e4 = $sth5e4->fetchrow_array()) { my $related = shift @row5e4; $relateds++; } # while my $note = ""; if($dup_t > 1) { if($relateds > 0) { $note = "$dup_i of $dup_t, $type, $matches, $relateds related genes. "; } # if else { $note = "$dup_i of $dup_t, $type, $matches. "; } # else } # if else { if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'spread')"); } # while message("Getting the beta 2 clone info"); my $sth5f = $db_manager->execute("select contig2clone.clone, contig2clone.start, contig2clone.length, contig2clone.ctg, contig_anchors.mlg, contig_anchors.start, contigs.start from contig2clone, contig_anchors, clones, contigs where contig2clone.ctg = contig_anchors.ctg and contig2clone.clone = clones.clone and clones.type = 'beta 2' and contig2clone.ctg = contigs.ctg"); # valid message("Saving the beta 2 clones in the GFF file"); message(" and the clone_locations table"); my @row5f = (); while(@row5f = $sth5f->fetchrow_array()) { my $clone = shift @row5f; my $band_start = shift @row5f; my $length = shift @row5f; my $ctg = shift @row5f; my $mlg = shift @row5f; my $ctg_start = shift @row5f; my $ctg_band_start = shift @row5f; my $track = "b2Clones"; my $displacement = $band_start - $ctg_band_start; my $start = $ctg_start + ($displacement * $project->{band_factor}); my $end = $start + int($length * $project->{band_factor}); my $name = $clone; my $type = "beta 2"; # get the matching contigs my $matches = ""; my $sth5h = $db_manager->execute("select ctg from contig2clone where clone = '$clone'"); # valid my @row5h = (); while(@row5h = $sth5h->fetchrow_array()) { my $contig = shift @row5h; if($matches eq "") { $matches = $contig; } # if else { $matches .= ", $contig"; } # else } # while # get the matching MTP my $sth5h1 = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row5h1 = (); while(@row5h1 = $sth5h1->fetchrow_array()) { my $mtp = shift @row5h1; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth5h2a = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row5h2a = (); while(@row5h2a = $sth5h2a->fetchrow_array()) { my $est = shift @row5h2a; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth5h2b = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row5h2b = (); while(@row5h2b = $sth5h2b->fetchrow_array()) { my $hit = shift @row5h2b; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth5h2c = $db_manager->execute("select related from related where hit = '$clone'"); my @row5h2c = (); while(@row5h2c = $sth5h2c->fetchrow_array()) { my $related = shift @row5h2c; $relateds++; } # while my $note = ""; if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'beta 2')"); } # while message("Getting the beta 2 clone info for Queue"); my $sth5f0 = $db_manager->execute("select contig2clone.clone, contig2clone.start, contig2clone.length, contig2clone.ctg, contigs_onQ.start, contigs.start from contig2clone, contigs_onQ, clones, contigs where contig2clone.ctg = contigs_onQ.ctg and contig2clone.clone = clones.clone and clones.type = 'beta 2' and contig2clone.ctg = contigs.ctg"); # valid message("Saving the beta 2 clones for Queue in the GFF file"); message(" and the clone_locations table"); my @row5f0 = (); while(@row5f0 = $sth5f0->fetchrow_array()) { my $clone = shift @row5f0; my $band_start = shift @row5f0; my $length = shift @row5f0; my $ctg = shift @row5f0; my $mlg = "Queue"; my $ctg_start = shift @row5f0; my $ctg_band_start = shift @row5f0; my $track = "b2Clones"; my $displacement = $band_start - $ctg_band_start; my $start = $ctg_start + ($displacement * $project->{band_factor}); my $end = $start + int($length * $project->{band_factor}); my $name = $clone; my $type = "beta 2"; # get the matching contigs my $matches = ""; my $sth5h2 = $db_manager->execute("select ctg from contig2clone where clone = '$clone'"); # valid my @row5h2 = (); while(@row5h2 = $sth5h2->fetchrow_array()) { my $contig = shift @row5h2; if($matches eq "") { $matches = $contig; } # if else { $matches .= ", $contig"; } # else } # while # get the matching MTP my $sth5h3 = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row5h3 = (); while(@row5h3 = $sth5h3->fetchrow_array()) { my $mtp = shift @row5h3; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth5h3a = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row5h3a = (); while(@row5h3a = $sth5h3a->fetchrow_array()) { my $est = shift @row5h3a; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth5h3b = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row5h3b = (); while(@row5h3b = $sth5h3b->fetchrow_array()) { my $hit = shift @row5h3b; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth5h3c = $db_manager->execute("select related from related where hit = '$clone'"); my @row5h3c = (); while(@row5h3c = $sth5h3c->fetchrow_array()) { my $related = shift @row5h3c; $relateds++; } # while my $note = ""; if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else # if($name eq "IS009H03") { # print "$name $mlg $start $end second one\n"; # } # if print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'beta 2')"); } # while message("Getting the reversed clone info"); my $sth5f1 = $db_manager->execute("select contig2clone.clone, contig2clone.end, contig2clone.length, contig2clone.ctg, contig_anchors.mlg, contig_anchors.rev_start, contigs.end from contig2clone, contig_anchors, clones, contigs where contig2clone.ctg = contig_anchors.ctg and contig2clone.clone = clones.clone and clones.type = 'beta 2' and contig2clone.ctg = contigs.ctg"); # valid message("Saving the reverse beta 2 clones in the GFF file"); message(" and the clone_locations table"); my @row5f1 = (); while(@row5f1 = $sth5f1->fetchrow_array()) { # xxx my $clone = shift @row5f1; my $band_end = shift @row5f1; my $length = shift @row5f1; my $ctg = shift @row5f1; my $mlg = shift @row5f1; my $rev_start = shift @row5f1; my $ctg_band_end = shift @row5f1; my $track = "RevClones"; my $displacement = $ctg_band_end - $band_end; my $start = $rev_start + ($displacement * $project->{band_factor}); my $end = $start + int($length * $project->{band_factor}); my $name = $clone; my $type = "reverse"; # get the matching contigs my $matches = ""; my $sth5f0 = $db_manager->execute("select ctg from contig2clone where clone = '$clone'"); # valid my @row5f0 = (); while(@row5f0 = $sth5f0->fetchrow_array()) { my $contig = shift @row5f0; if($matches eq "") { $matches = $contig; } # if else { $matches .= ", $contig"; } # else } # while # get the matching MTP my $sth5f01 = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row5f01 = (); while(@row5f01 = $sth5f01->fetchrow_array()) { my $mtp = shift @row5f01; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth5f02 = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row5f02 = (); while(@row5f02 = $sth5f02->fetchrow_array()) { my $est = shift @row5f02; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth5f0b = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row5f0b = (); while(@row5f0b = $sth5f0b->fetchrow_array()) { my $hit = shift @row5f0b; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth5f0c = $db_manager->execute("select related from related where hit = '$clone'"); my @row5f0c = (); while(@row5f0c = $sth5f0c->fetchrow_array()) { my $related = shift @row5f0c; $relateds++; } # while my $note = ""; if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'reverse')"); } # while message("Getting the spread beta 2 clone info"); my $sth5f2 = $db_manager->execute("select contig2clone.clone, contig2clone.start, contig2clone.length, contig2clone.ctg, contig_anchors.mlg, contig_anchors.spread_start, contigs.start from contig2clone, contig_anchors, clones, contigs where contig2clone.ctg = contig_anchors.ctg and contig2clone.clone = clones.clone and clones.type = 'beta 2' and contig2clone.ctg = contigs.ctg"); # valid message("Saving the spread beta 2 clones in the GFF file."); my @row5f2 = (); while(@row5f2 = $sth5f2->fetchrow_array()) { my $clone = shift @row5f2; my $band_start = shift @row5f2; my $length = shift @row5f2; my $ctg = shift @row5f2; my $mlg = shift @row5f2; my $spread_start = shift @row5f2; my $ctg_band_start = shift @row5f2; my $track = "b2sClones"; my $displacement = $band_start - $ctg_band_start; my $start = $spread_start + ($displacement * $project->{band_factor}); my $end = $start + int($length * $project->{band_factor}); my $name = $clone; my $type = "beta 2"; # get the matching contigs my $matches = ""; my $sth5f3 = $db_manager->execute("select ctg from contig2clone where clone = '$clone'"); # valid my @row5f3 = (); while(@row5f3 = $sth5f3->fetchrow_array()) { my $contig = shift @row5f3; if($matches eq "") { $matches = $contig; } # if else { $matches .= ", $contig"; } # else } # while # get the matching MTP my $sth5f4 = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row5f4 = (); while(@row5f4 = $sth5f4->fetchrow_array()) { my $mtp = shift @row5f4; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth5f4a = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row5f4a = (); while(@row5f4a = $sth5f4a->fetchrow_array()) { my $est = shift @row5f4a; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth5f4b = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row5f4b = (); while(@row5f4b = $sth5f4b->fetchrow_array()) { my $hit = shift @row5f4b; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth5f4c = $db_manager->execute("select related from related where hit = '$clone'"); my @row5f4c = (); while(@row5f4c = $sth5f4c->fetchrow_array()) { my $related = shift @row5f4c; $relateds++; } # while my $note = ""; if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'spread')"); } # while message("Getting the beta 1 clone info"); my $sth6 = $db_manager->execute("select clone_anchors.clone, clone_anchors.mlg, clone_anchors.anchor, clone_anchors.dup_i, clone_anchors.dup_t from clones, clone_anchors where clones.type = 'beta 1' and clones.clone = clone_anchors.clone"); message("Saving the beta 1 clones in the GFF file"); message(" and the clone_locations table"); my @row6 = (); while(@row6 = $sth6->fetchrow_array()) { my $clone = shift @row6; my $mlg = shift @row6; my $anchor = shift @row6; my $dup_i = shift @row6; my $dup_t = shift @row6; my $track = "b1Clones"; my $half_clone_length = int(AVE_CLONE_LENGTH / 2); $anchor = $half_clone_length + 1 if ($anchor < $half_clone_length + 1); my $overflow = ($anchor + $half_clone_length) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - $half_clone_length; my $end = $start + AVE_CLONE_LENGTH; my $name = $clone; my $type = "beta 1"; # get the matching loci my $matches = ""; my $sth6a = $db_manager->execute("select locus from clone2locus3 where clone = '$clone'"); my @row6a = (); while(@row6a = $sth6a->fetchrow_array()) { my $locus = shift @row6a; if($matches eq "") { $matches = $locus; } # if else { $matches .= ", $locus"; } # else } # while # get the matching MTP my $sth6b = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row6b = (); while(@row6b = $sth6b->fetchrow_array()) { my $mtp = shift @row6b; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth6b2 = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row6b2 = (); while(@row6b2 = $sth6b2->fetchrow_array()) { my $est = shift @row6b2; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth6b3 = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row6b3 = (); while(@row6b3 = $sth6b3->fetchrow_array()) { my $hit = shift @row6b3; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth6b4 = $db_manager->execute("select related from related where hit = '$clone'"); my @row6b4 = (); while(@row6b4 = $sth6b4->fetchrow_array()) { my $related = shift @row6b4; $relateds++; } # while my $note = ""; if($dup_t > 1) { if($relateds > 0) { $note = "$dup_i of $dup_t, $type, $matches, $relateds related genes. "; } # if else { $note = "$dup_i of $dup_t, $type, $matches. "; } #else } # if else { if($relateds > 0) { $note = "$type, $matches, $relateds related genes. "; } # if else { $note = "$type, $matches. "; } # else } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; $db_manager->execute("insert into clone_locations values('', '$name', '$mlg', '$start', '$end', 'beta 1')"); } # while message("Getting the alpha clone info"); my $sth6c = $db_manager->execute("select clone, start, end from clone_locations where type = 'alpha'"); message("Saving the alpha clones in the GFF file"); my @row6c = (); while(@row6c = $sth6c->fetchrow_array()) { my $clone = shift @row6c; my $mlg = "Queue"; my $start = shift @row6c; my $end = shift @row6c; my $track = "aClones"; my $name = $clone; my $type = "alpha"; # get the matching loci my $matches = ""; my $sth6d = $db_manager->execute("select locus from clone2locus3 where clone = '$clone'"); my @row6d = (); while(@row6d = $sth6d->fetchrow_array()) { my $locus = shift @row6d; if($matches eq "") { $matches = $locus; } # if else { $matches .= ", $locus"; } # else } # while # get the matching MTP my $sth6e = $db_manager->execute("select mtp from mtp where clone = '$clone'"); # valid my @row6e = (); while(@row6e = $sth6e->fetchrow_array()) { my $mtp = shift @row6e; if($matches eq "") { $matches = $mtp; } # if else { $matches .= ", $mtp"; } # else } # while # get the matching EST my $sth6f = $db_manager->execute("select est from est where clone = '$clone'"); # valid my @row6f = (); while(@row6f = $sth6f->fetchrow_array()) { my $est = shift @row6f; if($matches eq "") { $matches = $est; } # if else { $matches .= ", $est"; } # else } # while # Get the matching sequences my $sth6g = $db_manager->execute("select sequence from sequence where hit = '$clone'"); my @row6g = (); while(@row6g = $sth6g->fetchrow_array()) { my $hit = shift @row6g; if($matches eq "") { $matches = "$hit"; } # if else { $matches .= ", $hit" } # else } # while # Get the matching Related Genes my $relateds = 0; my $sth6h = $db_manager->execute("select related from related where hit = '$clone'"); my @row6h = (); while(@row6h = $sth6h->fetchrow_array()) { my $related = shift @row6h; $relateds++; } # while if($matches ne "") { $type .= ", $matches"; } # if my $note = ""; if($relateds > 0) { $note = "$type, $relateds related genes. "; } # if else { $note = "$type. "; } #else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while } # if (clones) else { message("Not getting the clones info"); } # else ########################### ########## Contigs ######## ########################### if($self->{contigs}) { message("Getting the gamma contigs info"); my $sth7 = $db_manager->execute("select contig_anchors.ctg, contig_anchors.mlg, contig_anchors.start, contig_anchors.end, contig_anchors.rev_start, contig_anchors.rev_end, contig_anchors.spread_start, contig_anchors.spread_end, contig_anchors.dup_i, contig_anchors.dup_t from contigs, contig_anchors where contigs.type = 'gamma' and contigs.ctg = contig_anchors.ctg"); message("Saving the gamma contigs in the GFF file."); my @row7 = (); while(@row7 = $sth7->fetchrow_array()) { my $ctg = shift @row7; my $mlg = shift @row7; my $start = shift @row7; my $end = shift @row7; my $rev_start = shift @row7; my $rev_end = shift @row7; my $spread_start = shift @row7; my $spread_end = shift @row7; my $dup_i = shift @row7; my $dup_t = shift @row7; my $overflow = $end - $mlg_lengths{$mlg}; if($overflow >= 0) { $start -= $overflow + 200; $end -= $overflow + 200; } # if $overflow = $rev_end - $mlg_lengths{$mlg}; if($overflow >= 0) { $rev_start -= $overflow + 200; $rev_end -= $overflow + 200; } # if my $track = "gContigs"; my $name = $ctg; my $type = "gamma"; # get the matching clones my $matches = 0; my $sth7a = $db_manager->execute("select contig2clone.clone from contig2clone, clones where contig2clone.ctg = '$ctg' and contig2clone.clone = clones.clone and contig2clone.valid = 'yes' and clones.type = 'gamma'"); my @row7a = (); while(@row7a = $sth7a->fetchrow_array()) { my $clone = shift @row7a; $matches++; } # while my $note = ""; if($dup_t > 1) { $note = "$dup_i of $dup_t, $type, $matches clone/s. "; } # if else { $note = "$type, $matches clone/s. "; } # else print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; # Do the reverse contigs $track = "RevContigs"; print GFF_FILE "mlg$mlg\tvarious\t$track\t$rev_start\t$rev_end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; # Do the spread $track = "sContigs"; print GFF_FILE "mlg$mlg\tvarious\t$track\t$spread_start\t$spread_end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while message("Getting the beta contigs info"); my $sth7a = $db_manager->execute("select ctg, start, end from contigs_onQ"); message("Saving the beta contigs in the GFF file."); my @row7a = (); while(@row7a = $sth7a->fetchrow_array()) { my $ctg = shift @row7a; my $start = shift @row7a; my $end = shift @row7a; print GFF_FILE "mlgQueue\tvarious\tbContigs\t$start\t$end" . "\t.\t+\t.\tSequence \"$ctg\" ; Note\n"; } # while } # if (contigs) else { message("Not getting the contigs info"); } # else ########################### ############ QTL's ######## ########################### if($self->{qtls}) { message("Getting the QTL info"); my $sth8 = $db_manager->execute("select qtl, mlg, start, end, gene from qtl"); message("Saving the QTL's in the GFF file."); my @row8 = (); while(@row8 = $sth8->fetchrow_array()) { my $qtl = shift @row8; my $mlg = shift @row8; my $start = shift @row8; my $end = shift @row8; my $gene = shift @row8; my $track = "QTL"; my $name = $qtl; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note\n"; # print the QTL Gene, if appropriate if($gene ne "-1") { $track = "QTLGene"; my $note = $name; $name = $gene; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # if } # while } # if (qtls) else { message("Not getting the QTL info"); } # else ########################### ############ MTP's ######## ########################### if($self->{mtps}) { message("Getting the MTP info"); my $sth9 = $db_manager->execute("select clone, mtp from mtp"); message("Saving the MTP's in the GFF file."); my @row9 = (); while(@row9 = $sth9->fetchrow_array()) { my $clone = shift @row9; my $mtp = shift @row9; my $sth9a = $db_manager->execute("select mlg, start, end from clone_locations where clone = '$clone'"); my @row9a = (); while(@row9a = $sth9a->fetchrow_array()) { my $mlg = shift @row9a; my $start = shift @row9a; my $end = shift @row9a; my $track = "MTP"; my $name = $mtp; my $note = $clone; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while } # while } # if (mtps) else { message("Not getting the MTP info"); } # else ########################### ############ EST's ######## ########################### if($self->{ests}) { message("Getting the EST info"); my $sth10 = $db_manager->execute("select clone, est, comment from est"); message("Saving the EST's in the GFF file."); my @row10 = (); while(@row10 = $sth10->fetchrow_array()) { my $clone = shift @row10; my $est = shift @row10; my $comment = shift @row10; my $sth10a = $db_manager->execute("select mlg, start, end from clone_locations where clone = '$clone'"); my @row10a = (); while(@row10a = $sth10a->fetchrow_array()) { my $mlg = shift @row10a; my $start = shift @row10a; my $end = shift @row10a; my $track = "EST"; my $name = $est; my $note = $clone; $note .= ", $comment" if($comment ne ""); print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while } # while } # if (mtps) else { message("Not getting the EST info"); } # else ########################### ######### Sequences ####### ########################### if($self->{seq}) { message("Getting the sequence info"); my $sth11 = $db_manager->execute("select sequence, hit from sequence"); message("Saving the sequences in the GFF file."); my @row11 = (); while(@row11 = $sth11->fetchrow_array()) { my $sequence = shift @row11; my $hit = shift @row11; my $sth11a = $db_manager->execute("select mlg, start, end from clone_locations where clone = '$hit'"); my @row11a = (); while(@row11a = $sth11a->fetchrow_array()) { my $mlg = shift @row11a; my $start = shift @row11a; my $end = shift @row11a; my $track = "Sequence"; my $name = $sequence; my $note = $hit; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while my $sth11b = $db_manager->execute("select mlg, anchor from loci where locus = '$hit'"); my @row11b = (); while(@row11b = $sth11b->fetchrow_array()) { my $mlg = shift @row11b; my $anchor = shift @row11b; $anchor += 101 if($anchor < 101); my $overflow = ($anchor + 100) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - 100; my $end = $anchor + 100; my $track = "Sequence"; my $name = $sequence; my $note = $hit; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while } # while } # if else { message("Not getting the Sequence info"); } # else ########################### ## Related Genes ########## ########################### if($self->{rel}) { message("Creating a hash of the loci name changes"); my $loci_file_opened = TRUE; open(LOCI_FILE, "<", "../working_data/locus_name_changes.txt") or $loci_file_opened = FALSE; my %name_changes = (); if($loci_file_opened) { my @input_array = ; my $num_lines = scalar(@input_array); for(my $lcv = 0; $lcv < $num_lines; $lcv++) { my $current_line = $input_array[$lcv]; chomp $current_line; my @field_array = split /\t/, $current_line; my $old_name = $field_array[0]; my $new_name = $field_array[1]; $name_changes{$old_name} = $new_name; } # for close LOCI_FILE; } # if else { message("$gff_filename could not be opened: $!"); } # else message("Opening output file for bad related locations"); my $bad_relateds_file_opened = TRUE; open(BAD_RELATEDS_FILE, ">", "../working_data/bad_relateds.txt") or $bad_relateds_file_opened = FALSE; if($bad_relateds_file_opened == FALSE) { message("$gff_filename could not be opened: $!"); } # if message("Substituting bad loci names with their clones"); my $sth12z = $db_manager->execute("select clone, locus from bad_clone2locus"); my @row12z = (); while(@row12z = $sth12z->fetchrow_array()) { my $clone = shift @row12z; my $locus = shift @row12z; $name_changes{$locus} = $clone; } # while message("Getting the related genes info"); my $sth12 = $db_manager->execute("select related, hit, mlg, start, end from related"); message("Saving the related genes in the GFF file."); my @row12 = (); my $invalid_counter = 0; my $hit_code = 0; RELATED: while(@row12 = $sth12->fetchrow_array()) { my $related = shift @row12; my $hit = shift @row12; my $mlg = shift @row12; my $start = shift @row12; my $end = shift @row12; my $valid = FALSE; # See if the locus name was changed if(exists($name_changes{$hit})) { $hit = $name_changes{$hit}; } # if my $sth12a = $db_manager->execute("select mlg, start, end from clone_locations where clone = '$hit'"); my @row12a = (); while(@row12a = $sth12a->fetchrow_array()) { $valid = TRUE; my $mlg = shift @row12a; my $start = shift @row12a; my $end = shift @row12a; my $track = "RelatedGenes"; if($related =~ /^Glycine/) { $track = "RelatedG"; } # if if($related =~ /^G.max/) { $track = "RelatedG"; } # if my $name = $related; my $note = $hit; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while # precheck the ambiguous sat's if($hit =~ /^Sat\d/) { substr($hit, 3, 0) = "t"; $hit_code = 0; my $sth12a1 = $db_manager->execute("select mlg from loci where locus = '$hit'"); my @row12a1 = (); while(@row12a1 = $sth12a1->fetchrow_array()) { my $mlg = shift @row12a1; $hit_code = 1; } # while substr($hit, 3, 1) = "_"; my $sth12a2 = $db_manager->execute("select mlg from loci where locus = '$hit'"); my @row12a2 = (); while(@row12a2 = $sth12a2->fetchrow_array()) { my $mlg = shift @row12a2; if($hit_code == 1) { $hit_code = 3; } # if else { $hit_code = 2; } # else } # while if($hit_code == 0) { substr($hit, 3, 1) = ""; } # if elsif($hit_code == 1) { substr($hit, 3, 1) = "t"; } # if elsif($hit_code == 3) { substr($hit, 3, 1) = ""; } # elsif } # if my $sth12b = $db_manager->execute("select mlg, anchor from loci where locus = '$hit'"); my @row12b = (); while(@row12b = $sth12b->fetchrow_array()) { $valid = TRUE; my $mlg = shift @row12b; my $anchor = shift @row12b; $anchor += 101 if($anchor < 101); my $overflow = ($anchor + 100) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - 100; my $end = $anchor + 100; my $track = "RelatedGenes"; if($related =~ /^Glycine/) { $track = "RelatedG"; } # if if($related =~ /^G.max/) { $track = "RelatedG"; } # if my $name = $related; my $note = $hit; print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"$note\"\n"; } # while if($valid == FALSE) { $invalid_counter++; if($bad_relateds_file_opened == TRUE) { print BAD_RELATEDS_FILE "$hit\t$mlg\t$start\t$end\t$related\n"; } # if } # if } # while if($bad_relateds_file_opened == TRUE) { close BAD_RELATEDS_FILE; } # if message("Related Genes not located: $invalid_counter"); } # if else { message("Not getting the Related Genes info"); } # else ########################### ########## Confirmed ###### ########################### if($self->{con}) { message("Getting the Confirmed info"); my $sth13 = $db_manager->execute("select clone, mlg, cM from confirmed"); message("Saving the Confirmeds in the GFF file."); my @row13 = (); while(@row13 = $sth13->fetchrow_array()) { my $clone = shift @row13; my $mlg = shift @row13; my $cM = shift @row13; next if($cM == -1); next if($mlg =~ /^Y/); my $anchor = int($cM * FACTOR); my $half_length = int(AVE_CLONE_LENGTH / 2); $anchor = $half_length + 1 if ($anchor < $half_length + 1); my $overflow = ($anchor + $half_length) - $mlg_lengths{$mlg}; if($overflow >= 0) { $anchor -= $overflow + 200; } # if my $start = $anchor - $half_length; my $end = $start + AVE_CLONE_LENGTH; # see if this clone matches any existing clones my $matches = FALSE; my $sth13a = $db_manager->execute("select clone from clone_locations where clone = '$clone'"); my @row13a = (); while(@row13a = $sth13a->fetchrow_array()) { my $temp_clone = shift @row13a; $matches = TRUE; } # while my $track = "Confirmed"; my $name = $clone; # my $note = $clone; # $note .= ", $comment" if($comment ne ""); if($matches == TRUE) { print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note \"Matches\"\n"; } # if else { print GFF_FILE "mlg$mlg\tvarious\t$track\t$start\t$end" . "\t.\t+\t.\tSequence \"$name\" ; Note\n"; } # else } # while } # if (mtps) else { message("Not getting the EST info"); } # else message("Disconnecting from the database"); $db_manager->disconnect; close GFF_FILE; } # if($file_opened) else { # The GFF file could not be opened message("$gff_filename could not be opened: $!"); } # else press_enter; } # write 1