XGrid Program computeAminoFreq.pl
--Thiebaut 21:59, 9 November 2008 (UTC)
#! /usr/bin/perl -w
# computeAminoFreq.pl
# D. Thiebaut
# Given a file containing SEQRES lines of a PDB file, computes the
# frequency of occurence of all the amino acids listed.
# Prints out the frequencies.
#
# Syntax:
# ./computeAminoFreq.pl tempFile
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
# MAIN:
# 1. reads information from standard input
# 2. assumes lines look like
# SEQRES 1 A 63 MET LYS ARG GLU SER HIS LYS HIS ALA GLU GLN ALA ARG
# SEQRES 2 A 63 ARG ASN ARG LEU ALA VAL ALA LEU HIS GLU LEU ALA SER
# 3. breaks line into amino acids
# 4. counts number occurrences for each one
# 5. prints results as
# MET 6
# ASN 7
# LEU 3
# ...
#---------------------------------------------------------------------------
sub main {
my @lines = <STDIN>;
my $line;
my %aminos = ();
#--- parse each line ---
foreach $line ( @lines ) {
chomp $line; # remove blank spaces at the end
my @words = split( " ", $line ); # split into words
shift( @words ); # remove first 4 words
shift( @words );
shift( @words );
shift( @words );
#--- count amino acides ---
my $amino;
foreach $amino ( @words ) {
if ( exists( $aminos{ $amino } ) ) {
$aminos{ $amino } = $aminos{ $amino } + 1;
}
else {
$aminos{ $amino } = 1;
}
}
}
#--- display filename followed by amino acids and # occurrences ---
while ( my ( $key, $value ) = each( %aminos ) ){
print $key." ".$value. "\n";
}
}
main();