XGrid Program computeAminoFreq.pl

From CSclasswiki
Jump to: navigation, search

--Thiebaut 21:59, 9 November 2008 (UTC)


#! /usr/bin/perl -w
# computeAminoFreq.pl
# D. Thiebaut
# Given a file containing SEQRES lines of a PDB file, computes the
# frequency of occurence of all the amino acids listed.
# Prints out the frequencies.
# 
# Syntax:
#        ./computeAminoFreq.pl  tempFile
#---------------------------------------------------------------------------


#---------------------------------------------------------------------------
# MAIN: 
# 1. reads information from standard input
# 2. assumes lines look like
# SEQRES   1 A   63  MET LYS ARG GLU SER HIS LYS HIS ALA GLU GLN ALA ARG          
# SEQRES   2 A   63  ARG ASN ARG LEU ALA VAL ALA LEU HIS GLU LEU ALA SER         
# 3. breaks line into amino acids
# 4. counts number occurrences for each one
# 5. prints results as 
# MET 6
# ASN 7
# LEU 3
# ...
#---------------------------------------------------------------------------
sub main {

    my @lines = <STDIN>;
    my $line;
    my %aminos = ();

    #--- parse each line ---
    foreach $line ( @lines ) {
	chomp $line;        # remove blank spaces at the end
	my @words = split( " ", $line );  # split into words
	shift( @words );    # remove first 4 words
	shift( @words );
	shift( @words );
	shift( @words );

	#--- count amino acides ---
	my $amino;
	foreach $amino ( @words ) {
	    if ( exists( $aminos{ $amino } ) ) {
		$aminos{ $amino } = $aminos{ $amino } + 1;
	    }
	    else {
		$aminos{ $amino } = 1;
	    }
	}
    }
    
    #--- display filename followed by amino acids and # occurrences ---
    while ( my ( $key, $value ) = each( %aminos ) ){
	print $key." ".$value. "\n";
    }
}

main();