# Extract URLs from a web page to a CSV file # $ python extract-urls.py http://mysite.com/mypage.html myfile.csv # By Adrian Short 6 Sep 2012 import sys import urllib import csv from bs4 import BeautifulSoup url = sys.argv.pop(1) out_fn = sys.argv.pop(1) # output filename for CSV file infile = urllib.urlopen(url) html = infile.read() soup = BeautifulSoup(html) with open(out_fn, 'wb') as outfile: writer = csv.writer(outfile) # You can use a CSS selector as an alias for find_all() for link in soup('a'): writer.writerow([link.string, link.get('href')])