XGrid Program stripAmino.pl

From CSclasswiki
Jump to: navigation, search

--Thiebaut 21:54, 9 November 2008 (UTC)


#! /usr/bin/perl -w
# stripAmino.pl
# D. Thiebaut
# Given the name of a pdb file (uncompressed), this program goes to
# URL http://xgridmac.dyndns.org/~thiebaut/pdbFiles/ and gets the 
# given file from there.
# Then, the program scans the files and spits out the lines starting
# with the keyword SEQRES and stores them in a temporary file.
#  
# Syntax:
#         ./stripAmino.pl  pdbFile 
#
#---------------------------------------------------------------------------
use  LWP::Simple;
my $URL = "http://xgridmac.dyndns.org/~thiebaut/pdbFiles/";

#---------------------------------------------------------------------------
# readWebFile: given a pdbFile as parameter, fetches it from the 
# url defined in $URL.  Returns the array of lines in the pdb file.
#---------------------------------------------------------------------------
sub readWebFile {
    my ( $logfile ) = @_;

    my $url = $URL . $logfile;
    unless ( defined( $content=get( $url ) )) {
	die "Could not get $url\n";
    }
    @lines = split( '\n', $content );
    return @lines;
}

#---------------------------------------------------------------------------
# readFile: reads a file, whether it is compressed, ziped, or regular.
#           returns an array of lines
#---------------------------------------------------------------------------
sub readFile {
    my ( $logfile ) = @_;
    if ($logfile =~ /\.Z/)   { open(ACCESS,"uncompress -c $logfile |"); }
    elsif ($logfile =~ /\.gz/) { open(ACCESS,"gzip -d -c $logfile |"); }
    elsif ($logfile =~ /\.zip/) { open(ACCESS,"unzip -p $logfile |"); }
    else { open(ACCESS, "$logfile"); }

    @lines = <ACCESS>;
    close( ACCESS );
    return @lines;
}

#---------------------------------------------------------------------------
# MAIN: checks syntax
#       get pdb file name from command line
#       fetches pdb file from web server
#       processes pdb file and prints only SEQRES lines
#---------------------------------------------------------------------------
sub main {

    #--- check syntax  ----
    $argc = $#ARGV + 1;
    if ( $argc < 1 ) {
	print "Syntax stripAmino.pl filename\n\n";
	exit(1);
    }

    #--- get the lines of the pdb file ---
    my @lines = readWebFile( $ARGV[0] );

    my $line;
    my %aminos = ();

    #--- parse each line ---
    foreach $line ( @lines ) {
	if ( $line =~ /^SEQRES/ ) {
	    chomp $line;    # remove blank lines at the end
	    print $line . "\n";
	}
    }
}

main();