My Opera is closing 3rd of March

Enveloped Ideas

Random words and thoughts

XKCD Comics Fetcher

, ,

Do you like XKCD? Do you love Python? Well, here's a way to get your dose of romantic geeky fix! I whipped up a Python script that downloads all XKCD comics right in the comfort of your desktop. Just make sure you have Python interpreter installed on your computer, run this script thingy here, and away you go!

Click here to download the script!

Note! 600+ images will be downloaded at a time, so just sit back and enjoy the view! Alt text is saved in the file alt.txt

import urllib
import sgmllib
import re
import os

# Change the range values of the comics you want to download
comicRange = range(1, 615)


class Parser(sgmllib.SGMLParser):
    
    url = 'http://xkcd.com/'
    img_url = 'http://imgs.xkcd.com/comics/'

    def __init__(self, verbose=0):        

        sgmllib.SGMLParser.__init__(self, verbose)
        self.id = 0;
        self.title = ''
        self.alt = ''
        self.image = ''

    def parse(self, idx):
        self.feed(urllib.urlopen(self.url + str(idx)).read())
        
    def start_img(self, attributes):
        "Parse the image file and alt text"

        for name, value in attributes:
            if name == 'src' and value.find(self.img_url) >= 0:
                self.image = value[len(self.img_url):]
                
            if name == 'title' and self.image:
                self.alt = value

    def handle_data(self, data):
        """Parse the title and link id"""

        if data.find('404') >= 0:
            self.id = -1;

        if self.get_starttag_text() == '<h1>' and data:
            self.title = data

        if self.get_starttag_text() == '<h3>' and data:
            if (data.find('Permanent link') >= 0):
                m = re.search('\d+', data)
                self.id = int(m.group(0))


def download(url):
    print('Downloading...\n')

    imgfile = urllib.urlopen(url).read()
    outfile = open(xkcd.image, 'wb')
    outfile.write(imgfile)
    outfile.close()


if __name__ == '__main__':

    xkcd = Parser()

    for i in comicRange:

        if i == 404: pass        
        xkcd.parse(i)

        alt = ('-------------------------\n' +
               'ID:    ' + str(xkcd.id) + '\n' + 
               'TITLE: ' + xkcd.title + '\n' +
               'IMAGE: ' + xkcd.image + '\n' +
               'ALT:   ' + xkcd.alt + '\n')
        
        print(alt)

        out = open('alt.txt', 'a')
        out.write(alt)
        out.close()

        download(xkcd.img_url + xkcd.image)        

    print('\nDownload complete.')

    xkcd.close()

Doublemint Drive!What I liked about the iPod classic

Write a comment

New comments have been disabled for this post.