XGrid MakeBatchMulti.py

From CSclasswiki
Jump to: navigation, search

--Thiebaut 00:38, 10 November 2008 (UTC)


#! /Library/Frameworks/Python.framework/Versions/Current/bin/python
# D. Thiebaut
#
# Syntax:
#        makeBatchMulti.py batchFileName
#
# a simple python program that generates an XGrid batch file whose
#
# Example:
#  makeBatchMulti.py pipe7.plist
#
#
#  XGrid batch file maker
#  Creates a batch file for the XGrid system from a collection of 
#  data files and programs
#  
#  
#  Please enter your name to identify the batch job: dft
#  
#  Please enter the names of the different programs needed by the batch job.
#  Enter them one per line.  Press Enter twice when done.
#  Program #1 > pipe.pl
#  Program #2 > stripAmino.pl
#  Program #3 > computeAminoFreq.pl
#  Program #4 > 
#  
#  Please enter the names of the different data files needed.
#  No need to list the name of temporary files created by the programs.
#  Enter them one per line.  Press Enter twice when done.
#  Data file #1 > 
#  
#  Enter the commands that the XGrid should run.  Enter each command on one line.
#  For each line, enter the name of the program followed by all the arguments.
#  Do not use redirection or pipes in the command lines.
#  Enter an empty line to stop.
#  Command #1 > pipe.pl pdb100d.ent
#  Command #2 > pipe.pl pdb1a0a.ent
#  Command #3 > 
#  

import os
import sys
import popen2

DEBUG = False

# ----------------------------------------------------------------------------------
# PLIST STRINGS
# ----------------------------------------------------------------------------------
inputFilePlist           = """ "%s" = { fileData = <%s>; }; """
inputFileExecutablePlist = """  "%s" = { fileData = <%s>; isExecutable = YES; }; """
taskSpecification        = """%d = { arguments = ( %s ); command = "%s" ;  };"""

batch = """
{
    jobSpecification =     {
        applicationIdentifier = "com.apple.xgrid.cli";
        inputFiles =         {
            %s
        };
        name = %s;
        schedulerHints =  { 0 = %s; };
        submissionIdentifier = "%s";
        taskSpecifications = {
        %s
        };
    };
}
"""

def getPlistFormat( fileName ):
    """executes the hexdump Linux utility to create the PList contents of a file"""
    try:
        fin, fout = popen2.popen2( """hexdump  -v -e ' "" 4/1 "%%02x" " "'  %s"""
                               % fileName )
        output = fin.readline()
        fin.close()
        return output.strip()
    except:
        return None

def fileExists( fileName ):
    """ Returns true if file exists, false otherwise """
    return os.path.exists( fileName )

def getProgramFiles():
    """ Prompts the user for the names of the program files """

    #--- get the programs needed
    print "\nPlease enter the names of the different programs needed by the batch job."
    print "Enter them one per line.  Press Enter twice when done."

    programs = []
    i = 1
    while True:
          while True:
              prog = raw_input( "Program #%d > " % i )
              if fileExists( prog ) or len( prog ) < 2: break
              print "Error: File %s does not exist: please reenter!" % prog
          i = i+1
          if len( prog ) < 2: break
          programs.append( prog.strip() )

    #--- compute the plist version of each file ---
    programsPList = []
    for progName in programs:
        progName = progName.strip()
        # do not record programs that are specified by absolute paths
        if progName[0] == '/':
            continue
        progPList = getPlistFormat( progName )
        if progPList is not None:
            programsPList.append( inputFileExecutablePlist % ( progName, progPList ) )

    return [programs, programsPList]

def getDataFiles():
    """ Prompts the user for the name of the data files to include """

    #--- get the data files needed
    print "\nPlease enter the names of the different data files needed."
    print "No need to list the name of temporary files created by the programs."
    print "Enter them one per line.  Press Enter twice when done."

    dataFiles = []
    i = 1
    while True:
          while True:
              data = raw_input( "Data file #%d > " % i )
              if fileExists( data ) or len( data )< 2: break
              print "Error: File %s does not exist: please reenter!" % data
          i = i+1
          if len( data ) < 2: break
          dataFiles.append( data.strip() )

    #--- compute the plist version of each file ---
    dataPLists = []
    for dataName in dataFiles:
        dataPList = getPlistFormat( dataName )
        if dataPList is not None:
            dataPLists.append( inputFilePlist % ( dataName, dataPList ) )

    return [dataFiles, dataPLists ]

def getCommandLine( dataFiles, programFiles ):
    """ Prompts the user for the different commands to execute """

    print "\nEnter the commands that the XGrid should run.  Enter each command on one line."
    print "For each line, enter the name of the program followed by all the arguments."
    print "Do not use redirection or pipes in the command lines."
    print "Enter an empty line to stop."


    commandPList = []
    i = 0
    while True:
        command  = raw_input( "Command #%d > " % (i+1) )
        if len( command ) < 2:
            break
        arguments = command.split()
        jobName = arguments[0]
        newArgs = []
        for arg in arguments:
            arg = arg.strip()
            if arg[0]!='/' and ( arg in dataFiles+programFiles ):
                arg = "../working/"+arg
            newArgs.append( arg )
        program = newArgs[ 0 ]
        argumentString = ", ".join( newArgs[ 1: ] )
        commandPList.append( taskSpecification % ( i, argumentString, program ) )
        i = i + 1
            
    return commandPList
    
                         
def createPList( userName, commandPList, programs,
                 programPLists, dataFiles, dataPLists ):
    """ Takes all the information and packages it into a PList batch file """

    schedulerHints = "mathgrid5"
    commandString = "\n".join( commandPList )
    return batch % (
        "\n".join( programPLists + dataPLists ),  # inputfiles
        userName,                                 # name
        schedulerHints,                           # schedulerHints
        userName+" batch job",                    # job identifier
        commandString )                           # command in Plist format

def caption():
    """ caption printed when program is started """
    print "\n\nXGrid batch file maker"
    print "Creates a batch file for the XGrid system from a collection of "
    print "data files and programs\n\n"

def main():
    """ Main program: prompts for information from user, package it into batch file """

    global DEBUG

    #--- display caption ---
    caption()

    #--- check number of arguments ---
    if not DEBUG and len( sys.argv ) < 2:
        print "Syntax: ./makeBatchMulti.py batchFileName"
        sys.exit( 1 )


    if not DEBUG:
        batchFileName = sys.argv[ 1 ]
    else:
        batchFileName = "debugPipe.plist"
        
    #--- get user identifier ---
    userName = raw_input( "Please enter your name to identify the batch job: " )
    
    #--- get programs ---
    [ programs, programPLists ] = getProgramFiles()

    #--- get data files ---
    [ dataFiles, dataPLists ] = getDataFiles()

    #--- get the commands to be executed ---
    commandPList = getCommandLine( programs, dataFiles )

    #--- create batch file ---
    batchFile = createPList( userName, commandPList, programs,
                             programPLists, dataFiles, dataPLists )

    if DEBUG:
        print "\n\n=----------------------------------------------="
        print "commandPList = ", commandPList 
        
        print programs
        print "\n".join( programPLists )

        print dataFiles
        print "\n".join( dataPLists )

        print batchFile

    #--- store information in file ---
    open( batchFileName, "wt" ).write( batchFile )

main()