ruk·si

🐍 Python
BeautifulSoup

Updated at 2012-12-29 10:43
import urllib2
from BeautifulSoup import BeautifulSoup

# Create/open file called wunder.txt
f = open("wunder-data.txt", "w")

for m in range(1,13):
    for d in range(1, 32):

        # Check if already gone through month
        if (m == 2 and d > 28):
            break
        elif (m in [4, 6, 9, 11] and d > 30):
            break

        # Open wunderground.com url
        print "Getting data for " + str(d) + "." + str(m) + ".2010 ..."
        url = ("http://www.wunderground.com/history/airport/EFTU/2010/"
            str(m)
            "/"
            str(d)
            "/DailyHistory.html")
        page = urllib2.urlopen(url)

        # Get temperature from page
        soup = BeautifulSoup(page)
        # dayTemp = soup.body.nobr.b.string
        all = soup.findAll(attrs={"class":"nobr"})

        dayTemp = str(all[2].span.string)

        # Format month for timestamp
        if len(str(m)) < 2:
            mStamp = "0" + str(m)
        else:
            mStamp = str(m)

        # Format day for timestamp
        if len(str(d)) < 2:
            dStamp = "0" + str(d)
        else:
            dStamp = str(d)

        # Build timestamp
        timestamp = "2010" + mStamp + dStamp
        print "...got " + dayTemp

        # Write timestam and temperature to the file
        f.write(timestamp + "," + dayTemp + "\n")

# Done, close the file
print "Done! Closing file..."
f.close()