CSC111 getUrlInfo.py

From CSclasswiki
Jump to: navigation, search

--Thiebaut 18:38, 20 April 2009 (UTC)


# getUrlInfo.py
# D. Thiebaut
#
# Example program for a class that is used to get a section
# of text from a document located on the Web.
# The main program at the end illustrates how to use the class


import urllib2
import sys


class getUrlInfo:
    # getUrlInfo: keeps track of a Url, and two markers that are used to
    # define a section of text that is returned by the getInfo() method
    # The internal variables are
    # url: contains a url which is supposed to contain a %s formatting
    #      string
    # beginMarker: the text that immediately preceeds the section we're
    #      interested in
    # endMarker: the text that immediately follows the section we're
    #      interested in
    
    #--- constructor ---
    def __init__( self ):
        self.url         = None
        self.beginMarker = None
        self.endMarker   = None

    #--- mutators: used to set the different private data ---    
    def setUrl( self, u ):
        self.url = u

    def setMarkers( self, begin, end ):
        self.beginMarker = begin
        self.endMarker = end


    #--- getInfo: the main workhorse.  It receives an identifier that is substituted
    #--- in the url when the html text is retrieved.
    def getInfo( self, identifier ):
        
        #--- first get the html from the url ---
        f = urllib2.urlopen( self.url % identifier )
        html = f.read()

        #--- locate the markers ---
        beginIndex = html.find( self.beginMarker )
        endIndex = html.find( self.endMarker )

        #--- some robustness in case we haven't found the markers ---
        if beginIndex== -1 or endIndex== -1:
            return None

        #--- advanced to the beginning of the text we want
        beginIndex = beginIndex + len( self.beginMarker )

        #--- get it! ---
        return html[ beginIndex : endIndex ]



# ---------------------------------------------------------
# test area
# ---------------------------------------------------------

if __name__=="__main__":

    #--- create an object of type getUrlInfo ---
    tempSite = getUrlInfo()

    #--- initialize it ---
    tempSite.setUrl( "http://www.weather.com/outlook/"
                     +"travel/businesstraveler/local/%s" )
    tempSite.setMarkers( "\"obsTempTextA\">", "°" )

    #--- get the information from the site ---
    zip = "01002"
    temp = tempSite.getInfo( zip )
    if temp is not None:
        print "temperature in %s = %s degrees" % ( zip, temp )
    else:
        print "Error retrieving temperature"
        
                
    #--- get information for several areas in Massachusetts---
    #--- taken from http://www.mongabay.com/igapo/zip_codes_mass2.htm ---
    massZip = """
        02351 Abington
        02018 Accord
        01720 Acton
        02743 Acushnet
        01220 Adams
        01001 Agawam
        02134 Allston
        01913 Amesbury
        01002 Amherst"""   # just a sample of the whole list

    for line in massZip.split( '\n' ):
        words = line.split()
        if len( words ) != 2: continue
        zip = words[0]
        town = words[1]
        print "temperature in %s is %s" % (town, tempSite.getInfo( zip ) )