BioConductor source download script

From Biolanguage.org

Jump to: navigation, search

BioConductor

 

#!/usr/bin/python
  import urllib, re, os   BioCv = '2.3'   # Create the folder where the source will be downloaded
folder = 'SourceBioC-'+BioCv if not os.path.exists(folder): os.mkdir(folder) mainFolder = os.getcwd()   # Get the name of all the packages
source = 'https://readonly:readonly@hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/' f = urllib.urlopen(source) s = f.read() f.close()   # Extract the name and put them in a list
tableName = [] s = s.split('\n') for line in s: nameMotif = re.compile('href="(.*)"') try: name = nameMotif.findall(line)[0] name = name.replace('/',) if not name.startswith('http') and name != '..': tableName.append(name) except: None     print '----------------' print "There is %s library found" % len(tableName) print '----------------'   i = 0 # For each go to the page of the release and download them
for lib in tableName: i = i + 1 source = 'http://bioconductor.org/packages/%s/bioc/html/%s.html' % (BioCv, lib) #print source
f = urllib.urlopen(source) s = f.read() f.close() s = s.split('\n')   # Change folder and retrieve the tarball
os.chdir(folder) for line in s: sourceMotif = re.compile('href=".*/(.*.tar.gz)"') try: source = sourceMotif.findall(line)[0] source = 'http://bioconductor.org/packages/%s/bioc/src/contrib/%s' % (BioCv, source) cmd = 'wget %s --quiet ' % source #print cmd
try: print "Retrieve package %s on %s" % (i, len(tableName)) print " " + lib os.system(cmd) except: print "Can not retrieve package: %s" % lib except: None os.chdir(mainFolder) print "----------------\n"

Toolbox