#!/usr/bin/env python # The following script will automatically download the current week's # TWIC games. Be sure to edit the setting of chessdb_database and # chessdb_spelling, near the top of the script. # This version fixes a few bugs, and will use lftp if you have it # installed, wget if not. If neither of those work, it will attempt to # download the URL directly, using Python's urllib.py module (which # doesn't do regetting, retrying, etc). # By John Wiegley # NOTE: This program comes with absolutely NO WARRANTY. If anything # goes wrong, it may delete your database entirely instead of adding # to it! I recommend backing up your database, trying it out, and # then adding a "last week rollback" type of copy command to your # cronjob, just to make sure. import glob import urllib import zipfile import tempfile import string import sys import re import os os.environ['PATH'] = os.environ['PATH'] + ":/usr/local/bin" if len(sys.argv) > 1: chessdb_database = sys.argv[1] else: chessdb_database = "/home/johnw/chessdb/twic" if len(sys.argv) > 2: chessdb_spelling = sys.argv[2] else: chessdb_spelling = "/home/johnw/src/chessdb/spelling.ssp" print "Downloading the Week in Chess main page..." url = urllib.urlopen("http://www.chesscenter.com/twic/twic.html") archive = None found = 0 for line in url.readlines(): match = re.search("http://[^\"]+", line) if match: archive = match.group(0) else: match = re.search(">PGN<", line) if match: found = 1 break if not found: print "Could not find PGN zipfile name in twic.html!" sys.exit(1) # I prefer to use lftp here, since it does all the retrying and status # display for me print "Getting PGN archive \"%s\"..." % archive afile = tempfile.mktemp(".zip") if os.path.isfile("/usr/bin/lftp"): status = os.system("lftp -c 'get %s -o %s; quit'" % (archive, afile)) else: status = os.system("wget -O %s %s" % (afile, archive)) if status != 0: print "lftp or wget not working, retrying directly..." fd = open(afile, "wb") fd.write(urllib.urlretrieve(archive)) fd.close() zip = zipfile.ZipFile(afile) databases = [] print "Unzipping and converting to chessdb databases..." for file in zip.namelist(): if re.search("\.pgn$", file): output = tempfile.mktemp(".pgn") outfd = open(output, "wb") outfd.write(zip.read(file)) outfd.close() database = tempfile.mktemp() os.system("pgnchessdb -f %s %s" % (output, database)) databases.append(database) os.unlink(output) zip.close() os.unlink(afile) print "Merging databases into %s.new..." % chessdb_database if databases: status = os.system("scmerge %s %s %s" % (chessdb_database + ".new", chessdb_database, string.join(databases, " "))) for file in databases: map(os.unlink, glob.glob("%s.s*" % file)) if status == 0: print "Moving new database to %s..." % chessdb_database map(os.unlink, glob.glob("%s.s*" % chessdb_database)) os.system("scmerge %s %s" % (chessdb_database, chessdb_database + ".new")) map(os.unlink, glob.glob("%s.s*" % (chessdb_database + ".new"))) print "Spell checking the new database..." os.system("sc_spell %s %s" % (chessdb_database, chessdb_spelling))