It would have been nice to provide a Web interface for the user who queries the data structures. However, for the simplicity of the example, we only provide here the code for a stand-alone program.
You are welcome to try and modify the code to have a Web interface, and to allow additional queries and manipulations of the information.
#!/usr/local/bin/perl
use strict 'vars';
use warnings;
#define file
my $source_file = "results/tyrosine_kinases_dump";
#declare variables for the data strucutres.
#they should fit the data structure names indicated in the dump file
my ($genes, $chrs);
#read data structures from file
open (SOURCE, $source_file) || die "cannot open \"$source_file\": $!";
undef $/; #read in file all at once
eval <SOURCE>; #recreate the data structures
die "cannot recreate data structures from \"$source_file\"; $@" if $@;
# $@ is a special Perl variable that holds an eval error, if there is one.
$/ = "\n"; #put the special variable $/ back to normal
close SOURCE;
#retrieve information from the data structures, by activating several subroutines
#1. print all gene symbols, sorted alphabetically
print_all_genes ($genes);
#2. ask the user to type in a gene symbol, and provide gene details.
show_gene_information ($genes);
#3. show list of genes in a given chromosome
show_genes_in_chromosome ($chrs);
###########################################################
# subroutines
###########################################################
#print all gene symbols, sorted alphabetically
sub print_all_genes {
my ($genes) = @_; #get data structure from main program
print "\nAll tyrosine kinase-rlated genes from LocusLink:\n\n";
my @sorted_symbols = sort keys %{$genes};
print join (" ", @sorted_symbols) , "\n";
}
###########################################################
#allow the user to query the $genes data structure:
#the user will enter gene symbol and will get details on that gene
sub show_gene_information {
my ($genes) = @_; #get data structure from main program
print "\nTo get details on a given gene,\n",
"please enter gene symbol (case insensitive): ";
my $symbol = <STDIN>;
chomp ($symbol);
#validate user input. exit subroutine if input is not valid
if ($symbol !~ /^\w+$/) {
print "Sorry, wrong input.\n";
return;
}
#convert user-entered symbol to upper case
$symbol = uc $symbol;
#print gene details.
#to help you write/understand the code for accessig the genes data structure,
#look at the tyrosine_kinases_dump file
my $details = $genes->{$symbol}; #$details is now a data structure -
#it is a reference to the hash that contains
#the details for the given gene
#if gene exists in the data structure, print details
if ($details) {
print "\n";
print "Gene name: $details->{'gene name'}\n",
"LocusLink ID: $details->{'LocusLink id'}\n",
"Chromosome: $details->{'chromosome'}\n",
"PubMed ID(s): ",
join ( ", ", @{$details->{'PubMed IDs'}} ), "\n",
"mRNA Acc.: ",
join ( ", ", @{$details->{'mRNA acc nrs'}} ), "\n",
"Protein Acc.: ",
join ( ", ", @{$details->{'protein acc nrs'}} ), "\n";
#you can use the gene symbol and the various IDs and accession numbers
#in the gene details to provide Web links to relevant databases, such as
#LocusLink itself, GeneCards, GenBank and PubMed. Here is an example
#for providing links to PubMed:
print "\nLink(s) to PubMed:\n";
my $PubMed_basic_URL = "http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Abstract&list_uids=";
my $pubmed_id;
foreach $pubmed_id ( @{$details->{'PubMed IDs'}} ) {
print "${PubMed_basic_URL}$pubmed_id\n";
}
#if gene symbol is not found - print error message.
} else {
print "Sorry, gene symbol is not found.\n";
}
return 1;
}
###########################################################
# show list of genes in a given chromosome
sub show_genes_in_chromosome {
my ($chrs) = @_; #get data structure from main program
#define chromosomes list
my @chromosomes = (1 .. 22, 'X', 'Y');
#ask user to enter chromosome name
print "\nTo get all genes in a given chromosome,\n",
"please enter chromosome number: ";
my $chr = <STDIN>;
chomp ($chr);
#convert chromosome to upper case
$chr = uc $chr;
#validate user input. exit subroutine if input is not valid
unless (grep /^$chr$/, @chromosomes) { #read about Perl's 'grep' function
#in one of Perl books or manuals
print "Sorry, wrong input.\n";
return;
}
#print gene symbols in the given chromosome
print "\nChromosome $chr contains the following gene(s):\n\n";
if ($chrs->{$chr}) { #if chromosome contains genes - print them in alphabetical order
my @gene_symbols = @{$chrs->{$chr}};
print join (" ", sort @gene_symbols), "\n";
} else { #if chromosome does not contain genes - send message
print "Sorry, in the current data set chromosome $chr does not contain genes.\n";
}
print "\n";
#TRY IT YOURSELF -
#
#how will you modify the subroutine so that it does not only print
#gene symbols, but also gives the full name of each gene?
return 1;
}