XKCD Comics Fetcher
Wednesday, July 29, 2009 12:45:21 PM
Do you like XKCD? Do you love Python? Well, here's a way to get your dose of romantic geeky fix! I whipped up a Python script that downloads all XKCD comics right in the comfort of your desktop. Just make sure you have Python interpreter installed on your computer, run this script thingy here, and away you go!
Click here to download the script!
Note! 600+ images will be downloaded at a time, so just sit back and enjoy the view! Alt text is saved in the file alt.txt
Click here to download the script!
Note! 600+ images will be downloaded at a time, so just sit back and enjoy the view! Alt text is saved in the file alt.txt
import urllib
import sgmllib
import re
import os
# Change the range values of the comics you want to download
comicRange = range(1, 615)
class Parser(sgmllib.SGMLParser):
url = 'http://xkcd.com/'
img_url = 'http://imgs.xkcd.com/comics/'
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.id = 0;
self.title = ''
self.alt = ''
self.image = ''
def parse(self, idx):
self.feed(urllib.urlopen(self.url + str(idx)).read())
def start_img(self, attributes):
"Parse the image file and alt text"
for name, value in attributes:
if name == 'src' and value.find(self.img_url) >= 0:
self.image = value[len(self.img_url):]
if name == 'title' and self.image:
self.alt = value
def handle_data(self, data):
"""Parse the title and link id"""
if data.find('404') >= 0:
self.id = -1;
if self.get_starttag_text() == '<h1>' and data:
self.title = data
if self.get_starttag_text() == '<h3>' and data:
if (data.find('Permanent link') >= 0):
m = re.search('\d+', data)
self.id = int(m.group(0))
def download(url):
print('Downloading...\n')
imgfile = urllib.urlopen(url).read()
outfile = open(xkcd.image, 'wb')
outfile.write(imgfile)
outfile.close()
if __name__ == '__main__':
xkcd = Parser()
for i in comicRange:
if i == 404: pass
xkcd.parse(i)
alt = ('-------------------------\n' +
'ID: ' + str(xkcd.id) + '\n' +
'TITLE: ' + xkcd.title + '\n' +
'IMAGE: ' + xkcd.image + '\n' +
'ALT: ' + xkcd.alt + '\n')
print(alt)
out = open('alt.txt', 'a')
out.write(alt)
out.close()
download(xkcd.img_url + xkcd.image)
print('\nDownload complete.')
xkcd.close()






