我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用urllib.URLopener()。
def get_biased_photos(tag, min_taken_date, max_taken_date): #Change Folder Path if os.path.isdir(tag): pass else: os.mkdir(tag) os.chdir(tag) #Run image download for page in range(1,8): photos = flickr.photos_search(tags=tag, page=page, per_page=500, tag_mode='all', sort="interestingness-desc", min_taken_date=min_taken_date, max_taken_date=max_taken_date) for photo in photos: try: url = photo.getURL(size='Original', urlType='source') urllist.append(url) image = urllib.URLopener() image.retrieve(url, os.path.basename(urlparse.urlparse(url).path)) print 'Downloading...', url except flickr.FlickrError: print 'Link no longer available (!)' ########################################################################
def load_model_from_url(url): # TODO: move this into a class.. global scoring_model url_opener = urllib.URLopener() temp_model_path = get_temp_model_path() url_opener.retrieve(url, temp_model_path) # try to load the model: try: temp_model = ScoringModel.from_file(temp_model_path) except Exception as e: print "Failed to load donwloaded model: %s"%e os.remove(temp_model_path) raise RuntimeError("Failed to load donwloaded model! error: %s"%e) # update model: scoring_model = temp_model # delete existing model if (path.isfile(model_file_path)): os.remove(model_file_path) os.rename(temp_model_path, model_file_path) # TODO: move this to an object with an init function...
def download_csv(): servers_dict = dict() global SERVERS_LIST_FILE print Colors.OKBLUE+"Downloading Latest 'dnscrypt-resolvers.csv'.."+Colors.ENDC try: csv_file = urllib.URLopener() csv_file.retrieve("https://raw.githubusercontent.com/jedisct1/dnscrypt-proxy/master/dnscrypt-resolvers.csv", "/opt/dnscrypt-resolvers.csv") except: print Colors.WARNING+"Unable to download 'dnscrypt-resolvers.csv'. Using default /usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"+Colors.ENDC if os.path.exists("/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"): SERVERS_LIST_FILE="/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv" else: print Colors.FAIL+"Default csv file not found. Exiting.."+Colors.ENDC exit(2) with open(SERVERS_LIST_FILE) as f: data = list(csv.reader(f, delimiter=",", quotechar='"', skipinitialspace=True))[1:] print "Index".ljust(5, " "), "Name".ljust(25, " "), "Location".ljust(25, " "), "DNSSEC".ljust(8, " "), "No Log".ljust( 7, " "), "Resolver Address".ljust(30) print "".ljust(100, "-") for rows, index in zip(data, enumerate(data)): servers_dict.setdefault(index[0], rows[0]) print str(index[0]).ljust(5, " "), rows[0].ljust(25, " "), rows[3].ljust(25, " "), rows[7].ljust(8, " "), \ rows[9].ljust(7, " "), rows[10].ljust(30, " ") return servers_dict
def try_download(_path, _file, _url, _stale,): now = time() url = URLopener() file_exists = isfile(_path+_file) == True if file_exists: file_old = (getmtime(_path+_file) + _stale) < now if not file_exists or (file_exists and file_old): try: url.retrieve(_url, _path+_file) result = 'ID ALIAS MAPPER: \'{}\' successfully downloaded'.format(_file) except IOError: result = 'ID ALIAS MAPPER: \'{}\' could not be downloaded'.format(_file) else: result = 'ID ALIAS MAPPER: \'{}\' is current, not downloaded'.format(_file) url.close() return result # LEGACY VERSION - MAKES A SIMPLE {INTEGER ID: 'CALLSIGN'} DICTIONARY
def get_p1p2data(self): import urllib, os if self.file_exist(): print "No need to download P1P2 DCB data..." return print "Start to download P1P2 DCB data..." weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year) if not os.path.isfile(self.sourcefn): try: download = urllib.URLopener() download.retrieve(weblink+self.sourcefn, self.sourcefn) except IOError: weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year_bom) download = urllib.URLopener() download.retrieve(weblink+self.sourcefn_bom, self.sourcefn) os.system("gzip -fd {0}".format(self.sourcefn))
def get_p1c1data(self): import urllib, os if self.file_exist(): print "No need to download P1C1 DCB data..." return print "Start to download P1C1 DCB data..." weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year) if not os.path.isfile(self.sourcefn): try: download = urllib.URLopener() download.retrieve(weblink+self.sourcefn, self.sourcefn) except IOError: weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year_bom) download = urllib.URLopener() download.retrieve(weblink+self.sourcefn_bom, self.sourcefn) os.system("gzip -fd {0}".format(self.sourcefn))
def download(start, end): parse_dict = np.load('parse_dict') image = urllib.URLopener() for k in parse_dict.keys()[start:end]: # makedir of k log.info('crawling images of class %s' % k) data_path = os.path.join('/media/DATA/ImageNet/Extra/', k) if not os.path.exists(data_path): os.mkdir(data_path) cnt = 0 for link in parse_dict[k][:500]: fn = os.path.join(data_path, '%s_%d.jpg' %(k, cnt)) cnt += 1 if cnt % 20 == 0: log.info('%d images' % cnt) # print fn try: image.retrieve(link, fn) except IOError: cnt -= 1 # print len(parse_dict[k])
def __init__(self, *args): self.version = "iegrab/0.1" self.open = self.iegrab apply(urllib.URLopener.__init__, (self,) + args)
def getLast(passwd): df = "http://10.5.5.9/" #DEFAULT PARTS p1 = "?t=" p2 = "&p=%" par1, par2, opt = photo_mode() #MOVING TO PHOTO MODE urllib2.urlopen(df + par1 + "/" + par2 + p1 + passwd + p2 + opt) time.sleep(1) print("\n\r[" + extra.colors.yellow + ".." + extra.colors.end + "] Taking a pic") par1, par2, opt = shut() #TAKE A PIC urllib2.urlopen(df + par1 + "/" + par2 + p1 + passwd + p2 + opt) time.sleep(2) url = "http://10.5.5.9:8080/gp/gpMediaList" #FIND THE PICTURE USING SOME REGEX content = urllib2.urlopen(url).read() content = str(content) content2 = content.split("},") last = content2[-1] last = re.findall('[A-Z+][0-9+]*', last) last = ''.join(last) last = re.sub(r'(JP)', r'.JP', last) time.sleep(1) print("\n\r[" + extra.colors.yellow + ".." + extra.colors.end + "] Downloading the pic") dow = "http://10.5.5.9:8080/DCIM/103GOPRO/" + last #DOWNLOAD THE PIC AND SAVE IT TO output/ getFoto = urllib.URLopener() getFoto.retrieve("http://10.5.5.9:8080/DCIM/103GOPRO/" + last, "outputs/" + last) print("\r\n[" + extra.colors.green + "+" + extra.colors.end + "] Picture saved in outputs/"+last+"\r\n") try : time.sleep(2) process = subprocess.Popen("eog -f outputs/"+last, shell=True, stdout=subprocess.PIPE) except : pass #TODO : ADD INFO() FUNCTION TO GET ALL INFORMATIONS ABOUT THE GOPRO AND ADD DELALL() THAT DELETE ALL FILES ON GOPRO
def download(self, links, target_folder='./data'): """Download images from a lisk of links""" # check links and folder: if len(links) < 1: print("Error: Empty list, no links provided") exit() self.images_links = links DatasetBuilder.check_folder_existance(target_folder) if target_folder[-1] == '/': target_folder = target_folder[:-1] # start downloading: print("Downloading files...") progress = 0 images_nbr = sum([len(self.images_links[key]) for key in self.images_links]) for keyword, links in self.images_links.items(): DatasetBuilder.check_folder_existance(target_folder + '/' + keyword, display_msg=False) for link in links: target_file = target_folder + '/' + keyword + '/' + link.split('/')[-1] try: f = urllib.URLopener() f.retrieve(link, target_file) except IOError: self.failed_links.append(link) progress = progress + 1 print("\r >> Download progress: ", (progress * 100 / images_nbr), "%...", end="") sys.stdout.flush() print("\r >> Download progress: ", (progress * 100 / images_nbr), "%") print(" >> ", (progress - len(self.failed_links)), " images downloaded") # save failed links: if len(self.failed_links): f2 = open(target_folder + "/failed_list.txt", 'w') for link in self.failed_links: f2.write(link + "\n") print(" >> Failed to download ", len(self.failed_links), " images: access not granted ", "(links saved to: '", target_folder, "/failed_list.txt')")
def run(self): # with self.output().open('w') as f: src = "http://api.bitcoincharts.com/v1/csv/coinbaseUSD.csv.gz" testfile = urllib.URLopener() testfile.retrieve(src, self.output())
def get_cifar100(save_dir=None, root_path=None): ''' If root_path is None, we download the data set from internet. Either save path or root path must not be None and not both. Returns Xtr, Ytr, Xte, Yte as numpy arrays ''' assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None)) if root_path is None: print 'Downloading CIFAR100 dataset...' tar_path = os.path.join(save_dir, "cifar-100-python.tar.gz") url = urllib.URLopener() url.retrieve("https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz", tar_path) print 'Download Done, Extracting...' tar = tarfile.open(tar_path) tar.extractall(save_dir) tar.close() root = os.path.join(save_dir, "cifar-100-python") if not root_path else root_path Xtr, Ytr = load_cifar100_data(os.path.join(root, 'train')) Xte, Yte = load_cifar100_data(os.path.join(root, 'test')) print 'Xtrain shape', Xtr.shape print 'Ytrain shape', Ytr.shape print 'Xtest shape', Xte.shape print 'Ytest shape', Yte.shape return Xtr, Ytr, Xte, Yte
def download(self, url, filedir, defaultdownloaddir='/sourceFileDownloads'): downloadDirectory = defaultdownloaddir if not os.path.exists(downloadDirectory): os.makedirs(downloadDirectory) fullURL = urlparse.urljoin(url, filedir) file = urllib.URLopener() fileDownloadPath = downloadDirectory + '/' + fullURL.split('/')[-1] file.retrieve(fullURL, fileDownloadPath) return fileDownloadPath
def _download_file(file_path, folder='data'): print("Downloading {}...".format(file_path)) test_file = urllib.URLopener() file_name = file_path.split('/')[-1] test_file.retrieve(file_path, '{}/{}'.format(folder, file_name))
def stop(self): self.stop_serving = True try: # This is to force stop the server loop urllib_request.URLopener().open('http://{}:{}'.format(self.host, self.port)) except IOError: pass logging.info('Shutting down the webserver') self.thread.join()
def download_csv(): global SERVERS_LIST_FILE print Colors.OKBLUE+"Downloading Latest 'dnscrypt-resolvers.csv'.."+Colors.ENDC try: csv_file = urllib.URLopener() csv_file.retrieve("https://raw.githubusercontent.com/jedisct1/dnscrypt-proxy/master/dnscrypt-resolvers.csv", "/opt/dnscrypt-resolvers.csv") except: print Colors.WARNING+"Unable to download 'dnscrypt-resolvers.csv'. Using default /usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"+Colors.ENDC if os.path.exists("/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"): SERVERS_LIST_FILE="/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv" else: print Colors.FAIL+"Default csv file not found. Exiting.."+Colors.ENDC exit(2)
def getCurrentServerConfig(): newConfigFile = urllib.URLopener() newConfigFile.retrieve("YOUR_SERVER_IP/scripts/automation/packages/system/serverconfig.ini", "/home/debian/serverconfig.ini") # Configure backup server
def getCurrentServerConfig(): newConfigFile = urllib.URLopener() newConfigFile.retrieve("YOUR_SERVER_IP/automation/packages/system/serverconfig.ini", "packages/system/serverconfig.ini") # Configure different server services
def test_urlopen(): # urllib url = urllib.quote('file:///bin/ls') urllib.urlopen(url, 'blah', 32) urllib.urlretrieve('file:///bin/ls', '/bin/ls2') opener = urllib.URLopener() opener.open('file:///bin/ls') opener.retrieve('file:///bin/ls') opener = urllib.FancyURLopener() opener.open('file:///bin/ls') opener.retrieve('file:///bin/ls') # urllib2 handler = urllib2.HTTPBasicAuthHandler() handler.add_password(realm='test', uri='http://mysite.com', user='bob') opener = urllib2.build_opener(handler) urllib2.install_opener(opener) urllib2.urlopen('file:///bin/ls') urllib2.Request('file:///bin/ls') # Python 3 urllib.request.urlopen('file:///bin/ls') urllib.request.urlretrieve('file:///bin/ls', '/bin/ls2') opener = urllib.request.URLopener() opener.open('file:///bin/ls') opener.retrieve('file:///bin/ls') opener = urllib.request.FancyURLopener() opener.open('file:///bin/ls') opener.retrieve('file:///bin/ls') # Six six.moves.urllib.request.urlopen('file:///bin/ls') six.moves.urllib.request.urlretrieve('file:///bin/ls', '/bin/ls2') opener = six.moves.urllib.request.URLopener() opener.open('file:///bin/ls') opener.retrieve('file:///bin/ls') opener = six.moves.urllib.request.FancyURLopener() opener.open('file:///bin/ls') opener.retrieve('file:///bin/ls')
def datareader(url,opener=urllib.URLopener().open): return opener(url).read()
def assure_model_file(model_file): model_file_path = os.path.join(CHECKPOINTS_DIR, model_file) if not os.path.isfile(model_file_path): url_opener = urllib.URLopener() print "downloading " + model_file url_opener.retrieve(MODEL_URL + "/" + model_file, model_file_path)
def _do(self, op, args): op, args = op.lower(), copy.copy(args) if op == 'show_url': self.show_url(url=args[0]) elif op in ('get_url', 'post_url'): url = args.pop(0) base_url = '/'.join(url.split('/')[:3]) uo = urllib.URLopener() for cookie, value in self.config.get('http_cookies', {} ).get(base_url, []): uo.addheader('Cookie', '%s=%s' % (cookie, value)) if op == 'post_url': (fn, hdrs) = uo.retrieve(url, data=args) else: (fn, hdrs) = uo.retrieve(url) hdrs = unicode(hdrs) with open(fn, 'rb') as fd: data = fd.read().strip() if data.startswith('{') and 'application/json' in hdrs: data = json.loads(data) if 'message' in data: self.notify_user(data['message']) elif op == "shell": try: for arg in args: rv = os.system(arg) if 0 != rv: raise OSError( 'Failed with exit code %d: %s' % (rv, arg)) except: traceback.print_exc() elif hasattr(self, op): getattr(self, op)(**(args or {}))
def download_python(): """ Download python for some reason..? """ banner("Downloading Python 2.7.x.msi, please wait...") urllib.URLopener().retrieve("https://www.python.org/ftp/python/2.7.12/python-2.7.12.msi") os.system('sudo wine msiexec /i python-2.7.12.msi /L*v log.txt') os.system('clear')
def download_python_win_exten(): """ Download Windows extenstion for python without checking the checksum.. """ banner("Downloading pywin32-220.win32-py2.7.exe (Windows extension), please wait...") urllib.URLopener().retrieve("https://ufpr.dl.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win32-py2.7.exe") os.system('sudo wine pywin32-220.win32-py2.7.exe') os.system( 'sudo wine /root/.wine/drive_c/Python27/python.exe /root/.wine/drive_c/Python27/Scripts/pip.exe install pyinstaller') os.system( 'sudo wine /root/.wine/drive_c/Python27/python.exe /root/.wine/drive_c/Python27/Scripts/pip.exe uninstall Crypto') os.system('sudo wine /root/.wine/drive_c/Python27/python.exe /root/.wine/drive_c/Python27/Scripts/pip.exe pycrypto') os.system('clear')
def download_vc_for_py(): """ Download the VC extenstion for python, this is a little less scary because it's from MS """ banner("Downloading VCForPython27.msi, please wait...") urllib.URLopener().retrieve("https://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi") os.system('wget https://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi') os.system('sudo wine msiexec /i VCForPython27.msi /L*v log2.txt') os.system('mkdir .OK') os.system('sudo rm -Rf log2.txt') os.system('sudo rm -Rf log.txt')
def save_file(url, file_name): urlopen = urllib.URLopener() fp = urlopen.open(url) data = fp.read() fp.close() file = open(path+file_name,'wb') file.write(data) file.close()
def fetch_metadata(url, path, maxage=600): """ :param url: metadata remote location :param path: metdata file name :param maxage: if max age of existing metadata file (s) is exceeded, the file will be fetched from the remote location """ fetch = False if not os.path.isfile(path): fetch = True logger.debug("metadata file %s not found", path) elif (os.path.getmtime(path) + maxage) < time.time(): fetch = True logger.debug("metadata file %s from %s is more than %s s old", path, strftime("%Y-%m-%d %H:%M:%S", time.localtime(os.path.getmtime(path))), maxage) else: logger.debug("metadata file %s is less than %s s old", path, maxage) if fetch: f=urllib.URLopener() try: f.retrieve(url, path) logger.debug("downloaded metadata from %s into %s", url, path) except: logger.debug("downloaded metadata from %s failed: %s", url, sys.exc_info()[0])
def get_cafile(self): """Download a certificate to authenticate the identity of the AWS IoT platform.""" authority_location = "https://www.symantec.com/content/en/us/enterprise/verisign/roots/VeriSign-Class%203-Public-Primary-Certification-Authority-G5.pem" url = urllib.URLopener() cafile = self.get_abs_path(CAFILE) url.retrieve(authority_location, cafile)
def csvComment(): 'Module for data to be fetched and parsed into csv' print 'started' with open(sortdata, 'r') as f: for line in f: line = line.strip('\n') durl='http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv' print durl testfile = urllib.URLopener() testfile.retrieve('http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv', line+'.csv') with open(line+'.csv') as f: columns = defaultdict(list) # each value in each column is appended to a list reader = csv.DictReader(f) # read rows into a dictionary format for row in reader: # read a row as {column1: value1, column2: value2,...} for (k,v) in row.items(): # go over each column name and value columns[k].append(v) # append the value into the appropriate list d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action'])) print d ##print rkdict ## for key, value in d.iteritems(): ## if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED': ## writer = csv.writer(open('final.csv', 'ab')) ## for (key, value)in zip(d.items()): ## writer.writerow([line, key, value ]) ## else: ## print 'No Comments found for '+line
def dictcsvFinalReview(): print 'started' with open(sortdata, 'r') as f: for line in f: line = line.strip('\n') durl='http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv' print durl testfile = urllib.URLopener() os.chdir(r'C:\Users\radhakrishnanr\Desktop\filescsv') testfile.retrieve('http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv', line+'.csv') columns = defaultdict(list) # each value in each column is appended to a list with open(line+'.csv') as f: reader = csv.DictReader(f) # read rows into a dictionary format for row in reader: # read a row as {column1: value1, column2: value2,...} for (k,v) in row.items(): # go over each column name and value columns[k].append(v) # append the value into the appropriate list # based on column name k d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action'])) print d ## for key, value in d.iteritems(): ## if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED': ## ## writer = csv.writer(open('final.csv', 'ab')) ## for (key, value) in zip(d,line): ## writer.writerow([line, key]) ## else: ## print 'No Comments found for '+line
def csvComment(): 'Module for data to be fetched and parsed into csv' print 'started' with open('sorted.txt', 'r') as f: for line in f: line = line.strip('\n') durl='http://fisheye.com/cru/'+line+'/reviewHistory.csv' print durl testfile = urllib.URLopener() testfile.retrieve('http://fisheye.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
def csvFinalReview(): print 'started' with open('sorted.txt', 'r') as f: for line in f: line = line.strip('\n') durl='http://fisheye.com/cru/'+line+'/reviewHistory.csv' print durl testfile = urllib.URLopener() testfile.retrieve('http://fisheye.com/cru/'+line+'/reviewHistory.csv', line+'.csv') columns = defaultdict(list) # each value in each column is appended to a list with open(line+'.csv') as f: reader = csv.DictReader(f) # read rows into a dictionary format for row in reader: # read a row as {column1: value1, column2: value2,...} for (k,v) in row.items(): # go over each column name and value columns[k].append(v) # append the value into the appropriate list # based on column name k d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action'])) for key, value in d.iteritems(): if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED': print file,key try: os.remove(line+'.csv') except IOError: pass #csvComment() #csvReview()
def get_gimdata(self): import urllib, os if self.file_exist(): print "No need to download GIM data..." return print "Start to download GIM data..." weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year) if not os.path.isfile(self.sourcefn[:-2]): if not os.path.isfile(self.sourcefn): download = urllib.URLopener() download.retrieve(weblink+self.sourcefn, self.sourcefn) os.system("gzip -fd {0}".format(self.sourcefn))
def get_navidata(self): import urllib, os if self.file_exist(): print "No need to download Navigation data..." return print "Start to download Navigation data..." if self.types in 'igslocal': weblink = "ftp://igscb.jpl.nasa.gov/pub/product/" if not (os.path.isfile(self.sourcefn_igs1) or os.path.isfile(self.sourcefn_igr1)): try: download = urllib.URLopener() download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks1, self.sourcefn_igs1), self.sourcefn_igs1) self.sourcefn1 = self.sourcefn_igs1[:-2] except IOError: download = urllib.URLopener() download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks1, self.sourcefn_igr1), self.sourcefn_igr1) self.sourcefn1 = self.sourcefn_igr1[:-2] if not (os.path.isfile(self.sourcefn_igs2) or os.path.isfile(self.sourcefn_igr2)): try: download = urllib.URLopener() download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks2, self.sourcefn_igs2), self.sourcefn_igs2) self.sourcefn2 = self.sourcefn_igs2[:-2] except IOError: download = urllib.URLopener() download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks2, self.sourcefn_igr2), self.sourcefn_igr2) self.sourcefn2 = self.sourcefn_igr2[:-2] elif self.types=='igsrt': weblink = "ftp://cddis.gsfc.nasa.gov/pub/gps/products/{0}/".format(self.sourcefn_igu[3:7]) download = urllib.URLopener() download.retrieve(weblink+self.sourcefn_igu, self.sourcefn_igu) self.sourcefn = self.sourcefn_igu[:-2] os.system("gzip -fd *sp3.Z")
def save_image(self, url, path): """ :param url: :param path: :return nothing: """ image = urllib.URLopener() image.retrieve(url, path)
def download_file(url, local_path): dir_path = path.dirname(local_path) if not path.exists(dir_path): print("Creating the directory '%s' ..." % dir_path) os.makedirs(dir_path) urllib.URLopener().retrieve(url, local_path)
def DownHTTP(url,fileName): fileHTTP = urllib.URLopener() if fileName == "": fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1]) else: fileHTTP.retrieve(url,fileName) ###### setup EDIT
def DownHTTP(url,fileName): fileHTTP = urllib.URLopener() if fileName == "": if os.path.isfile(url.split("/")[len(url.split("/"))-1]) == 1: newName = url.split("/")[len(url.split("/"))-1].split(".")[0]+"_."+url.split("/")[len(url.split("/"))-1].split(".")[1] fileHTTP.retrieve(url,newName) return " saved the file with the original name + \"_\"" else: fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1]) return " saved the file with the original name" else: fileHTTP.retrieve(url,fileName) return " saved the file with the given name"
def DownHTTP(url,fileName): fileHTTP = urllib.URLopener() if fileName == "": fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1]) else: fileHTTP.retrieve(url,fileName)
def downloadSource(self): download_file = URLopener() download_file.retrieve(self.__url, self.__filename) self.__sourceAvailable = True
def get_caltech101(save_dir=None, root_path=None): assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None)) if root_path is None: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE print 'Downloading Caltech101 dataset...' tar_path = os.path.join(save_dir, "101_ObjectCategories.tar.gz") url = urllib.URLopener(context=ctx) url.retrieve("https://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz", tar_path) print 'Download Done, Extracting...' tar = tarfile.open(tar_path) tar.extractall(save_dir) tar.close() root = os.path.join(save_dir, "101_ObjectCategories") if not root_path else root_path train_x = [] train_y = [] val_x = [] val_y = [] label = 0 for cls_folder in os.listdir(root): cls_root = os.path.join(root, cls_folder) if not os.path.isdir(cls_root): continue cls_images = [misc.imread(os.path.join(cls_root, img_name)) for img_name in os.listdir(cls_root)] cls_images = [np.repeat(np.expand_dims(img, 2), 3, axis=2) if len(img.shape) == 2 else img for img in cls_images] cls_images = np.array([np.reshape(misc.imresize(img, (224,224,3)), (3,224,224)) for img in cls_images]) new_index = np.random.permutation(np.arange(cls_images.shape[0])) cls_images = cls_images[new_index, :, :, :] train_x.append(cls_images[:30]) train_y.append(np.array([label]*30)) if len(cls_images) <= 80: val_x.append(cls_images[30:]) val_y.append(np.array([label]*(len(cls_images)-30))) else: val_x.append(cls_images[30:80]) val_y.append(np.array([label]*50)) label += 1 Xtr = np.concatenate(train_x) Ytr = np.concatenate(train_y) Xval= np.concatenate(val_x) Yval= np.concatenate(val_y) print 'Xtr shape ', Xtr.shape print 'Ytr shape ', Ytr.shape print 'Xval shape ', Xval.shape print 'Yval shape ', Yval.shape return Xtr, Ytr, Xval, Yval
def get_cifar10(save_dir=None, root_path=None): ''' If root_path is None, we download the data set from internet. Either save path or root path must not be None and not both. Returns Xtr, Ytr, Xte, Yte as numpy arrays ''' assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None)) if root_path is None: print 'Downloading CIFAR10 dataset...' tar_path = os.path.join(save_dir, "cifar-10-python.tar.gz") url = urllib.URLopener() url.retrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", tar_path) print 'Download Done, Extracting...' tar = tarfile.open(tar_path) tar.extractall(save_dir) tar.close() root = os.path.join(save_dir, "cifar-10-batches-py") if not root_path else root_path # Training Data xs = [] ys = [] for b in range(1,6): f = os.path.join(root, 'data_batch_%d' % (b, )) X, Y = load_CIFAR_batch(f) xs.append(X) ys.append(Y) Xtr = np.concatenate(xs) Ytr = np.concatenate(ys) print 'Xtrain shape', Xtr.shape print 'Ytrain shape', Ytr.shape # Testing data Xte, Yte = load_CIFAR_batch(os.path.join(root, 'test_batch')) print 'Xtest shape', Xte.shape print 'Ytest shape', Yte.shape return Xtr, Ytr, Xte, Yte
def get_svhn(save_dir=None, root_path=None): ''' If root_path is None, we download the data set from internet. Either save path or root path must not be None and not both. Returns Xtr, Ytr, Xte, Yte as numpy arrays ''' assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None)) if root_path is None: new_save_dir = os.path.join(save_dir, 'og_data') if not os.path.isdir(new_save_dir): os.mkdir(new_save_dir) train_mat = os.path.join(new_save_dir, "train_32x32.mat") test_mat = os.path.join(new_save_dir, "test_32x32.mat") url = urllib.URLopener() print 'Downloading Svhn Train...' url.retrieve("http://ufldl.stanford.edu/housenumbers/train_32x32.mat", train_mat) print 'Downloading Svhn Test...' url.retrieve("http://ufldl.stanford.edu/housenumbers/test_32x32.mat", test_mat) root = new_save_dir if not root_path else root_path train = io.loadmat(os.path.join(root, 'train_32x32.mat')) Xtr = train['X'] Ytr = train['y'] del train test = io.loadmat(os.path.join(root, 'test_32x32.mat')) Xte = test['X'] Yte = test['y'] del test Xtr = np.transpose(Xtr, (3, 2, 0, 1)) Xte = np.transpose(Xte, (3, 2, 0, 1)) Ytr = Ytr.reshape(Ytr.shape[:1]) - 1 Yte = Yte.reshape(Yte.shape[:1]) - 1 print 'Xtrain shape', Xtr.shape print 'Ytrain shape', Ytr.shape print 'Xtest shape', Xte.shape print 'Ytest shape', Yte.shape return Xtr, Ytr, Xte, Yte
def get_svhn_full(save_dir=None, root_path=None): ''' If root_path is None, we download the data set from internet. Either save path or root path must not be None and not both. Returns Xtr, Ytr, Xte, Yte as numpy arrays ''' assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None)) Xtr_small, Ytr_small, Xte, Yte = get_svhn(save_dir, root_path) if root_path is None: new_save_dir = os.path.join(save_dir, 'og_data') if not os.path.isdir(new_save_dir): os.mkdir(new_save_dir) extra_mat = os.path.join(new_save_dir, "extra_32x32.mat") url = urllib.URLopener() print 'Downloading Svhn Extra...' url.retrieve("http://ufldl.stanford.edu/housenumbers/extra_32x32.mat", extra_mat) root = new_save_dir if not root_path else root_path extra = io.loadmat(os.path.join(root, 'extra_32x32.mat')) Xtr_extra = extra['X'] Ytr_extra = extra['y'] Xtr_extra = np.transpose(Xtr_extra, (3, 2, 0, 1)) Ytr_extra = Ytr_extra.reshape(Ytr_extra.shape[:1]) - 1 print 'Xextra shape', Xtr_extra.shape print 'Yextra shape', Ytr_extra.shape val_x = [] val_y = [] train_x = [] train_y = [] for i in np.unique(Ytr_small): # Get 400 images from X_small X_small_label = Xtr_small[Ytr_small == i] val_x.append(X_small_label[:400]) val_y.append([i]*400) train_x.append(X_small_label[400:]) train_y.append([i]*(X_small_label.shape[0] - 400)) # Get 200 images from X_small X_extra_label = Xtr_extra[Ytr_extra == i] val_x.append(X_extra_label[:200]) val_y.append([i]*200) train_x.append(X_extra_label[200:]) train_y.append([i]*(X_extra_label.shape[0] - 200)) Xtr = np.concatenate(train_x) Ytr = np.concatenate(train_y) Xval = np.concatenate(val_x) Yval = np.concatenate(val_y) return Xtr, Ytr, Xval, Yval, Xte, Yte
def download(): choice = (raw_input('Type "tag" or "album" for corresponding choice. \nDo you want to download images by tag or specific album: ')) #counter is created in order to label the images when they are downloaded counter = 0 if(choice == 'album'): albumID = int(raw_input('Enter the ID of the folder you wish to download: ')) name = raw_input('Enter the username of the desired users pictures: ') # checking if the folder exists, creating a folder and moving into it if not os.path.exists(name+'/'+albumID): os.makedirs(name+'/'+albumID) os.chdir(name+'/'+albumID) print('Downloading...') # walk_set function loops through the pictures of a specific album for photo in flickr.walk_set(albumID): # beautiful soup opens up the direct link to the picture using authors id(name) and photo id, specifying sizes/k will # result in the highest quality picture available on flickr url = 'https://www.flickr.com/photos/'+ name+ '/' + photo.get('id') + '/sizes/k/' webpage = requests.get(url) soup = BeautifulSoup(webpage.text, 'html.parser') x = soup.findAll('img') # we read the html using soup and look for img, after which we look for src link and extract it for link in soup.find_all('img'): new = (link.get('src')) if(new.count(".jpg")) == 1: #the link is downloaded using URLopener() and saved with 'photo + counter' testfile = urllib.URLopener() testfile.retrieve(new, 'photo' + str(counter) + '.jpg' ) counter = counter + 1 elif(choice == 'tag'): tag = raw_input('Enter the tags(in format:tagName1,tagName2,tagName3 and etc): ') # checking if the folder exists, creating a folder and moving into it if not os.path.exists(tag): os.makedirs(tag) os.chdir(tag) # checking the total number of available pictures with the specific tag total = int(flickr.photos.search(tags=tag).find('photos').attrib['total']) print('There are ' + str(total) + ' pictures found \nDownloading...') # walk_set function loops through the pictures with the tag for more info go to flickrapi python documentation for photo in flickr.walk(tag_mode='all', tags=tag): author = photo.get('owner') # return the owner of the picture # beautiful soup opens up the direct link to the picture using authors id and photos id, specifying sizes/k will # result in the highest quality picture available on flickr url = 'https://www.flickr.com/photos/'+ author+ '/' + photo.get('id') + '/sizes/k/' webpage = requests.get(url) soup = BeautifulSoup(webpage.text, 'html.parser') x = soup.findAll('img') # we read the html using soup and look for img, after which we look for src link and extract it for link in soup.find_all('img'): new = (link.get('src')) if(new.count(".jpg")) == 1: #the link is downloaded using URLopener() and saved with 'photo + counter' testfile = urllib.URLopener() testfile.retrieve(new, 'photo' + str(counter) + '.jpg' ) counter = counter + 1 else: print('An Error appeared in your input. ') download()