#!/usr/bin/python2 # # title: coilget.py # summary: Get a list of files from the coil library FTP site # author: Nicholas Fitzkee (nickfitzkee at jhu dot edu) # date: March 7, 2005 # # description: # By default the coil library does not provide an easy way to get # access to the fragment PDB files stored in the database. This is # partially by design: there are a huge number of files, and they # can easily be generated from a local copy of the regular PDB without # occupying additional hard disk space. A program that can take a # TOR file from the library and reconstruct PDB files given a local # copy of the PDB has been made available on the coil library website # (see the utilities section). Additionally, downloading so many PDB # files would take quite some time to transfer, even over a high speed # connection. Nevertheless, it may be desirable to do so in some # cases, which is the purpose of this program. # import sys, getopt, os, re, time, ftplib FTP = ftplib.FTP FTPHOST = 'roselab.jhu.edu' FILEROOT = '/pub/coil/nonwind' def run(argc, argv): sopt = 'e:d:h' lopt = ['email=', 'directory=', 'help'] try: opts, args = getopt.getopt(argv[1:], sopt, lopt) except getopt.error: usage() email = 'coilget@roselab.jhu.edu' dndir = os.path.realpath(os.curdir) for o, v in opts: if o in ('-e', '--email'): if valid_email(v): email = v else: print 'Invalid email address: %s' sys.exit(1) elif o in ('-d', '--directory'): dndir = v elif o in ('-h', '--help'): help() if not len(args): usage() if not os.path.exists(args[0]): print 'Unable to find file list: %s' % args[0] safe_makedirs(dndir) if not os.path.exists(dndir): print 'Unable to create destination directory: %s' % dndir sys.exit(1) f = open(args[0]) l = f.readline() ftp = FTP(FTPHOST, 'anonymous', email) print 'Retrieving files...' while l: tgt = l.strip() if tgt and tgt[0] <> '#': rdir, fname = os.path.split(tgt) remdir = '%s/%s' % (FILEROOT, rdir) locdir = '%s/%s' % (dndir, rdir) lfname = '%s/%s/%s' % (dndir, rdir, fname) safe_makedirs(locdir) try: fp = open(lfname, 'wb') except IOError, msg: print "Can't create %s: %s" % (lfname, str(msg)) continue try: ftp.cwd(remdir) except ftplib.error_perm, msg: print msg continue t0 = time.time() try: ftp.retrbinary('RETR %s' % fname, fp.write, 8*1024) except ftplib.error_perm, msg: print msg t1 = time.time() size = fp.tell() rate = (size)/(t1-t0) fp.close() tstr = time.strftime('%H:%M:%Ss', time.gmtime(int(round(t1-t0,0)))) if size >= 1024*1024: size = '%.2f' % (size / 1048576.) sufx = 'MB' elif size >= 1024: size = '%.2f' % (size / 1024.) sufx = 'kB' else: size = '%i' sufx = 'bytes' if rate >= 1024*1024: rate = '%.2f' % (rate / 1048576.) rsuf = 'MB/s' elif rate >= 1024: rate = '%.2f' % (rate / 1024.) rsuf = 'kB/s' else: rate = '%i' rsuf = 'bytes/s' print '%20s (%s %s, %s, %s %s)' % \ (fname, size, sufx, tstr, rate, rsuf) l = f.readline() f.close() def valid_email(tstr): """valid_email - check if an email address if RFS2822 compliant""" pstr = '^([_a-z0-9-]+)(\.[_a-z0-9-]+)*@([a-z0-9-]+)(\.[a-z0-9-]+)*' \ '(\.[a-z]{2,4})$' pat = re.compile(pstr) if pat.match(tstr): return 1 return 0 def safe_makedirs(dir): try: os.makedirs(dir) except OSError: return 0 return 1 def help(): print """coilget.py - retrieve files from the COIL library website By default the coil library does not provide an easy way to get access to the fragment PDB files stored in the database. This is partially by design: there are a huge number of files, and they can easily be generated from a local copy of the regular PDB without occupying additional hard disk space. A program that can take a TOR file from the library and reconstruct PDB files given a local copy of the PDB has been made available on the coil library website (see the utilities section). Additionally, downloading so many PDB files would take quite some time to transfer, even over a high speed connection. Nevertheless, it may be desirable to do so in some cases, which is the purpose of this program. usage: coilget.py [options] [-h] coilget.py takes the following options: -e | --email Specify the password for anonymous FTP access. By default, the address is "coilget@roselab.jhu.edu," but we would very much prefer to know who is actually downloading files from our server. -d | --directory Specify the root directory where the file downloads will go. The default is to use the current directory. -h | --help Display this help message. The program will download the files listed in the text file given (PDB File List). It may take some time, so be prepared to wait. """ sys.exit() def usage(): print """usage: coilget.py [options] [-h] """ sys.exit() if __name__ == '__main__': run(len(sys.argv), sys.argv)