我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用urllib.urlretrieve()。
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'COLT2016' if os.path.exists(dir_name) is False: os.mkdir(dir_name) maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl pbar.log('http://jmlr.org/proceedings/papers/v49/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://jmlr.org/proceedings/papers/v49/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'ICML2015' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl pbar.log('http://jmlr.org/proceedings/papers/v37/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://jmlr.org/proceedings/papers/v37/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'ICML2016' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl pbar.log('http://jmlr.org/proceedings/papers/v48/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://jmlr.org/proceedings/papers/v48/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="/paper/(.+?)"' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'NIPS2012' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl + '.pdf' pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf') if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="/paper/(.+?)"' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'NIPS2016' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl + '.pdf' pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf') if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="/paper/(.+?)"' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'NIPS2013' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl + '.pdf' pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf') if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="/paper/(.+?)"' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'NIPS2014' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): filename = dir_name + '/' + pdfurl + '.pdf' pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf') if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'CVPR2014' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): reg2 = r'papers/(.+?\.pdf)' pdfre2 = re.compile(reg2) filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0] pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://www.cv-foundation.org/openaccess/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'CVPR2016' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): reg2 = r'papers/(.+?\.pdf)' pdfre2 = re.compile(reg2) filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0] pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://www.cv-foundation.org/openaccess/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'CVPR2015' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): reg2 = r'papers/(.+?\.pdf)' pdfre2 = re.compile(reg2) filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0] pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://www.cv-foundation.org/openaccess/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_pdf(html): """ xxx""" reg = r'href="(.+?\.pdf)">pdf' pdfre = re.compile(reg) pdflist = re.findall(pdfre, html) dir_name = 'CVPR2013' maxrows = len(pdflist) pbar = prgbar.ProgressBar(total=maxrows) if os.path.exists(dir_name) is False: os.mkdir(dir_name) for idx, pdfurl in enumerate(pdflist): reg2 = r'papers/(.+?\.pdf)' pdfre2 = re.compile(reg2) filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0] pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl) if os.path.exists(filename) is True: pbar.log('Exist') else: urllib.urlretrieve( 'http://www.cv-foundation.org/openaccess/' + pdfurl, filename) pbar.update(index=(idx + 1)) pbar.finish()
def get_url(self, query): site1 = urllib.urlopen('http://www.youtube.com/results?search_query=%s'%query) html = site1.read() soup = BS(html) links = soup.findAll('a') vidlinks = [link.get('href') for link in links if link.get('href') is not None] vlink = [ i for i in vidlinks if '/watch?v=' in i][0] img_link = soup.findAll('img',{'alt':'Thumbnail', 'width':'185'})[0].get('src') img_url = 'http:%s' %img_link imagethread = threading.Thread(target=lambda:urllib.urlretrieve(img_url, 'Files\image.jpg')) imagethread.start() return vlink
def load(batch_size, test_batch_size, n_labelled=None): filepath = '/tmp/mnist.pkl.gz' url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' if not os.path.isfile(filepath): print "Couldn't find MNIST dataset in /tmp, downloading..." urllib.urlretrieve(url, filepath) with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f: train_data, dev_data, test_data = pickle.load(f) return ( mnist_generator(train_data, batch_size, n_labelled), mnist_generator(dev_data, test_batch_size, n_labelled), mnist_generator(test_data, test_batch_size, n_labelled) )
def download( self, tarDir = None, imgIds = [] ): ''' Download COCO images from mscoco.org server. :param tarDir (str): COCO results directory name imgIds (list): images to be downloaded :return: ''' if tarDir is None: print 'Please specify target directory' return -1 if len(imgIds) == 0: imgs = self.imgs.values() else: imgs = self.loadImgs(imgIds) N = len(imgs) if not os.path.exists(tarDir): os.makedirs(tarDir) for i, img in enumerate(imgs): tic = time.time() fname = os.path.join(tarDir, img['file_name']) if not os.path.exists(fname): urllib.urlretrieve(img['coco_url'], fname) print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)
def download_from_github(args): if os.path.exists('dappled.yml'): yml = ruamel.yaml.load(open('dappled.yml').read(), ruamel.yaml.RoundTripLoader) else: print('dappled.yml not found; please run "dappled init" first') sys.exit() if not yml.get('github'): return False github_repo = yml['github'] url = 'https://github.com/{owner}/{repo}/archive/{sha}.zip'.format(**github_repo) filename, headers = urllib.urlretrieve(url) with zipfile.ZipFile(filename) as zf: prefix = None for name in zf.namelist(): if prefix is None: # assume first entry is the directory name prefix = name continue path = name.replace(prefix, '') if path in ('dappled.yml', yml['filename'], 'environment.yml'): continue with open(path, 'wb') as f: f.write(zf.read(name))
def store_raw_images(): '''To download images from image-net (Change the url for different needs of cascades) ''' neg_images_link = 'http://image-net.org/api/text/imagenet.synset.geturls?wnid=n07942152' neg_image_urls = urllib2.urlopen(neg_images_link).read().decode() pic_num = 1 for i in neg_image_urls.split('\n'): try: print i urllib.urlretrieve(i, "neg/" + str(pic_num) + '.jpg') img = cv2.imread("neg/" + str(pic_num) +'.jpg', cv2.IMREAD_GRAYSCALE) resized_image = cv2.resize(img, (100, 100)) cv2.imwrite("neg/" + str(pic_num) + '.jpg', resized_image) pic_num = pic_num + 1 except: print "error"
def download_pretrained_models( models_root_dir='/tmp/sketch_rnn/models', pretrained_models_url=PRETRAINED_MODELS_URL): """Download pretrained models to a temporary directory.""" tf.gfile.MakeDirs(models_root_dir) zip_path = os.path.join( models_root_dir, os.path.basename(pretrained_models_url)) if os.path.isfile(zip_path): tf.logging.info('%s already exists, using cached copy', zip_path) else: tf.logging.info('Downloading pretrained models from %s...', pretrained_models_url) urllib.urlretrieve(pretrained_models_url, zip_path) tf.logging.info('Download complete.') tf.logging.info('Unzipping %s...', zip_path) with zipfile.ZipFile(zip_path) as models_zip: models_zip.extractall(models_root_dir) tf.logging.info('Unzipping complete.')
def fetch(self, output): dl_path = self.get_latest_tarball() raw_path, _ = urllib.urlretrieve(dl_path) # The downloaded file is a gzip'd tarball. extract_path = self._make_temp_directory("rootfetch-apple-extracted") sh.tar("-xzv", "-f", raw_path, "-C", extract_path, strip_components=1) # We now have a directory with all the apple files. We need to find the # roots directory, parse out all the different formats, then generate a # single file that has PEMs in it. certificates_path = os.path.join(extract_path, "certificates", "roots") for f in os.listdir(certificates_path): full_path = os.path.join(certificates_path, f) if not os.path.isfile(full_path): continue pem = self.make_pem(full_path) output.write("# ") output.write(f) output.write("\n") output.write("\n".join(pem)) output.write("\n\n")
def main(): uri, outfile, dataset = get_arguments() fd = tempfile.NamedTemporaryFile() progress = ProgressBar(widgets=[Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()]) def update(count, blockSize, totalSize): if progress.maxval is None: progress.maxval = totalSize progress.start() progress.update(min(count * blockSize, totalSize)) urllib.urlretrieve(uri, fd.name, reporthook = update) if dataset == 'zinc12': df = pandas.read_csv(fd.name, delimiter = '\t') df = df.rename(columns={'SMILES':'structure'}) df.to_hdf(outfile, 'table', format = 'table', data_columns = True) elif dataset == 'chembl22': df = pandas.read_table(fd.name,compression='gzip') df = df.rename(columns={'canonical_smiles':'structure'}) df.to_hdf(outfile, 'table', format = 'table', data_columns = True) pass else: df = pandas.read_csv(fd.name, delimiter = '\t') df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
def download_chrome_latest_rpm(arch): chrome_rpm = 'google-chrome-%s_current_%s.rpm' % (version_string, arch) path = 'https://dl.google.com/linux/direct/%s' % chrome_rpm if (args.clean): remove_file_if_exists(chrome_rpm) # Let's make sure we haven't already downloaded it. if os.path.isfile("./%s" % chrome_rpm): print "%s already exists!" % chrome_rpm else: print "Downloading %s" % path # Perhaps look at using python-progressbar at some point? info=urllib.urlretrieve(path, chrome_rpm, reporthook=dlProgress)[1] urllib.urlcleanup() print "" if (info["Content-Type"] != "binary/octet-stream" and info["Content-Type"] != "application/x-redhat-package-manager"): print 'Chrome %s rpms are not on servers.' % version_string remove_file_if_exists (chrome_rpm) sys.exit(1) # This is where the magic happens
def main(wf): from PIL import Image urllib.urlretrieve("http://radar.weather.gov/Overlays/Topo/Short/" + ws + "_Topo_Short.jpg",'static/r.gif') #topographical map urllib.urlretrieve("http://radar.weather.gov/Overlays/County/Short/" + ws + "_County_Short.gif",'static/1.gif') #Counties map urllib.urlretrieve("http://radar.weather.gov/Overlays/Highways/Short/" + ws + "_Highways_Short.gif",'static/2.gif') #Highways map urllib.urlretrieve("http://radar.weather.gov/Overlays/Cities/Short/" + ws + "_City_Short.gif",'static/3.gif') #Cities labels map print(ws) overlay=Image.open('static/1.gif').convert('RGBA') a=Image.open('static/2.gif').convert('RGBA') b=Image.open('static/3.gif').convert('RGBA') overlay.paste(a,(0,0),a) overlay.paste(b,(0,0),b) info=overlay.info info['transparency']=0 overlay.save('static/overlay.gif',**info) grayscale = Image.open('static/r.gif').convert('L') grayscale = grayscale.point(lambda p: p * 0.25) grayscale.save('static/grayscale.gif')
def __init__(self): self._target_size = 10 logging.info("loading minst data") path = os.path.join(tempfile.gettempdir(), "mnist.pkl.gz") if not os.path.exists(path): logging.info("downloading minst data") urllib.urlretrieve (MNIST_URL, path) self._train_set, self._valid_set, self._test_set = cPickle.load(gzip.open(path, 'rb')) # Moving validation examples to training set, leaving 1000 train_set_x = np.vstack((self._train_set[0], self._valid_set[0][:-1000])) train_set_y = np.hstack((self._train_set[1], self._valid_set[1][:-1000])) valid_set_x = self._valid_set[0][-1000:] valid_set_y = self._valid_set[1][-1000:] self._train_set = (train_set_x, train_set_y) self._valid_set = (valid_set_x, valid_set_y) logging.info("[mnist small validation] training data size: %d" % len(self._train_set[0])) logging.info("[mnist small validation] valid data size: %d" % len(self._valid_set[0])) logging.info("[mnist small validation] test data size: %d" % len(self._test_set[0]))
def loadData(src): print('Downloading ' + src) fname, h = urlretrieve(src, './delete.me') print('Done.') try: print('Extracting files...') with tarfile.open(fname) as tar: tar.extractall() print('Done.') print('Preparing train set...') trn = np.empty((0, numFeature + 1), dtype=np.int) for i in range(5): batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1) trn = np.vstack((trn, readBatch(batchName))) print('Done.') print('Preparing test set...') tst = readBatch('./cifar-10-batches-py/test_batch') print('Done.') finally: os.remove(fname) return (trn, tst)
def loadData(src, cimg): gzfname, h = urlretrieve(src, './delete.me') try: with gzip.open(gzfname) as gz: n = struct.unpack('I', gz.read(4)) if n[0] != 0x3080000: raise Exception('Invalid file: unexpected magic number.') n = struct.unpack('>I', gz.read(4))[0] if n != cimg: raise Exception('Invalid file: expected {0} entries.'.format(cimg)) crow = struct.unpack('>I', gz.read(4))[0] ccol = struct.unpack('>I', gz.read(4))[0] if crow != 28 or ccol != 28: raise Exception('Invalid file: expected 28 rows/cols per image.') res = np.fromstring(gz.read(cimg * crow * ccol), dtype=np.uint8) finally: os.remove(gzfname) return res.reshape((cimg, crow * ccol))
def load_or_download_mnist_files(filename, num_samples, local_data_dir): if (local_data_dir): local_path = os.path.join(local_data_dir, filename) else: local_path = os.path.join(os.getcwd(), filename) if os.path.exists(local_path): gzfname = local_path else: local_data_dir = os.path.dirname(local_path) if not os.path.exists(local_data_dir): os.makedirs(local_data_dir) filename = "http://yann.lecun.com/exdb/mnist/" + filename print ("Downloading from" + filename, end=" ") gzfname, h = urlretrieve(filename, local_path) print ("[Done]") return gzfname
def load_data(self, train_dfn="adult.data", test_dfn="adult.test"): ''' Load data (use files offered in the Tensorflow wide_n_deep_tutorial) ''' if not os.path.exists(train_dfn): urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_dfn) print("Training data is downloaded to %s" % train_dfn) if not os.path.exists(test_dfn): urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_dfn) print("Test data is downloaded to %s" % test_dfn) self.train_data = pd.read_csv(train_dfn, names=COLUMNS, skipinitialspace=True) self.test_data = pd.read_csv(test_dfn, names=COLUMNS, skipinitialspace=True, skiprows=1) self.train_data[self.label_column] = (self.train_data["income_bracket"].apply(lambda x: ">50K" in x)).astype(int) self.test_data[self.label_column] = (self.test_data["income_bracket"].apply(lambda x: ">50K" in x)).astype(int)
def music(m): banlist = redis.sismember('banlist', '{}'.format(m.from_user.id)) if str(banlist) == 'False': text = m.text.replace("/song ","") opener = urllib2.build_opener() f = opener.open('https://api.spotify.com/v1/search?limit=1&type=track&q={}'.format(text)) parsed_json = json.loads(f.read()) Artist = parsed_json['tracks']['items'][0]['artists'][0]['name'] name = parsed_json['tracks']['items'][0]['name'] music = parsed_json['tracks']['items'][0]['preview_url'] urllib.urlretrieve("{}".format(music), "song.ogg") image = parsed_json['tracks']['items'][0]['album']['images'][0]['url'] urllib.urlretrieve("{}".format(image), "song.png") bot.send_message(m.chat.id, "*Artist* : ```{}``` \n *Name* : ```{}```".format(Artist,name), parse_mode="Markdown") bot.send_sticker(m.chat.id, open('song.png')) bot.send_document(m.chat.id, open('song.ogg'), caption=" @OffLiNeTeam") #################################################################################################################################################################################################
def tostick(m): cid = m.chat.id if m.reply_to_message: if m.reply_to_message.photo: token = config.token fileid = m.reply_to_message.photo[1].file_id path1 = bot.get_file(fileid) path = path1.file_path link = "https://api.telegram.org/file/bot{}/{}".format(token,path) urllib.urlretrieve(link, "stick.png") file1 = open('stick.png', 'rb') bot.send_sticker(cid,file1) ################################################################################################################################################################################################# #bot.message_handler(commands=['clac'])
def aparat(m): import urllib import json import os text = m.text.split(' ',1)[1] url = urllib.urlopen('http://www.aparat.com/etc/api/videoBySearch/text/'+text) data = url.read() js = json.loads(data) title1 = js['videobysearch'][0]['title'] poster1 = js['videobysearch'][0]['big_poster'] uid1 = js['videobysearch'][0]['uid'] urllib.urlretrieve(poster1,'poster.png') bot.send_photo(m.chat.id, open('poster.png'), caption='Title : '+title1+'\nLink : http://www.aparat.com/v/'+uid1) os.remove('poster.png') #################################################################################################################################################################################################
def loadData(src): print ('Downloading ' + src) fname, h = urlretrieve(src, './delete.me') print ('Done.') try: print ('Extracting files...') with tarfile.open(fname) as tar: tar.extractall() print ('Done.') print ('Preparing train set...') trn = np.empty((0, numFeature + 1), dtype=np.int) for i in range(5): batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1) trn = np.vstack((trn, readBatch(batchName))) print ('Done.') print ('Preparing test set...') tst = readBatch('./cifar-10-batches-py/test_batch') print ('Done.') finally: os.remove(fname) return (trn, tst)
def loadData(src): print ('Downloading ' + src) fname, h = urlretrieve(src, './delete.me') print ('Done.') try: print ('Extracting files...') with tarfile.open(fname) as tar: tar.extractall() print ('Done.') print ('Preparing train set...') trn = np.empty((0, NumFeat + 1), dtype=np.int) for i in range(5): batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1) trn = np.vstack((trn, readBatch(batchName))) print ('Done.') print ('Preparing test set...') tst = readBatch('./cifar-10-batches-py/test_batch') print ('Done.') finally: os.remove(fname) return (trn, tst)
def download(self, tarDir=None, imgIds=[]): ''' Download COCO images from mscoco.org server. :param tarDir (str): COCO results directory name imgIds (list): images to be downloaded :return: ''' if tarDir is None: print('Please specify target directory') return -1 if len(imgIds) == 0: imgs = self.imgs.values() else: imgs = self.loadImgs(imgIds) N = len(imgs) if not os.path.exists(tarDir): os.makedirs(tarDir) for i, img in enumerate(imgs): tic = time.time() fname = os.path.join(tarDir, img['file_name']) if not os.path.exists(fname): urllib.urlretrieve(img['coco_url'], fname) print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic))
def googleStreetView(values, feature, parent): """ Returns a path to a local Google Street View image for the feature """ x,y = feature.geometry().asPoint() baseurl = "https://maps.googleapis.com/maps/api/streetview?" w = 150 h = 150 fov = 90 heading = 235 pitch = 10 params = "size={w}x{h}&".format(w,h) params += "location={y},{x}&".format(y,x) params += "fov={}&heading={}&pitch={}".format(fov, heading, pitch) url = baseurl + params tmpdir = "/qgis_data/tmp/" img = tmpdir + str(feature.id()) + ".jpg" if not os.path.isfile(img): urllib.urlretrieve(url, img) uri = "file://" + img return uri
def parse(self, response): se=Selector(response) #???????HtmlXPathSelector??? if(re.match("http://desk.zol.com.cn/fengjing/\d+x\d+/\d+.html", response.url)):#??url??????????url???? src=se.xpath("//ul[@class='pic-list2 clearfix']/li")#???ul?????li for i in range(len(src)):#??li?? imgURLs=se.xpath("//ul[@class='pic-list2 clearfix']/li[%d]/a/img/@src"%i).extract() #?????????? titles=se.xpath("//ul[@class='pic-list2 clearfix']/li[%d]/a/img/@title"%i).extract() if imgURLs: realUrl=imgURLs[0].replace("t_s208x130c5","t_s2560x1600c5") #???????????????? file_name=u"%s.jpg"%titles[0] #???????? path=os.path.join("D:\pics",file_name)#??????????????F??pics???? type = sys.getfilesystemencoding() print file_name.encode(type) item=WebcrawlerScrapyItem() #??item??????item??,?????????????item??? item['name']=file_name item['url']=realUrl print item["name"],item["url"] yield item #??item,???????item urllib.urlretrieve(realUrl,path) #?????????????????????????????????????? all_urls=se.xpath("//a/@href").extract()#???????url for url in all_urls: if url.startswith("/fengjing/1920x1080/"):#?????????????? yield Request("http://desk.zol.com.cn"+url,callback=self.parse)
def download_files(self, url, folder_domain): filename = url.split('/')[-1] full_filename = 'belatiFiles/{}/{}'.format(folder_domain, filename) full_filename_location = '{}/belatiFiles/{}/{}'.format(util.get_current_work_dir(), folder_domain, filename) meta = MetaExifExtractor() if not os.path.exists(os.path.dirname(full_filename)): try: os.makedirs(os.path.dirname(full_filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with tqdm(unit='B', unit_scale=True, miniters=1,desc=filename) as t: try: urllib.urlretrieve(url, filename=full_filename,reporthook=self.my_hook(t), data=None) except: pass meta_exif_json = meta.extract_json(full_filename_location) self.db.insert_public_doc(self.project_id, str(os.path.splitext(filename)[1]), str(url), str(full_filename), str(full_filename_location), str(meta_exif_json))
def get(url): # Build and open the URL opener = urllib2.build_opener() opener.addheaders = [('User-Agent', 'Mozilla/5.0')] response = opener.open(url) # HLTV redicrects to a .rar or .zip file final_url = response.geturl() # Gets the filename (everything after the last trailing /) filename = final_url.rsplit('/', 1)[-1] # Gets the Content-Length from the metadata from final_url filesize = (int(urllib.urlopen(final_url).info().getheaders("Content-Length")[0])/1024)/1024 # Tell user we are downloading filesize print "Starting %s: %s MB." % (filename, filesize) # Downloads the file to the directory the user enters urllib.urlretrieve(final_url, directory+"/"+filename) # Tell user the current status and file information print "Completed %s: %s MB." % (filename, filesize) return filesize
def save_img(img_url,file_name,file_path): #?????????? file_path??????????????? ~/weiboImgs??? try: if not os.path.exists(file_path): print '???',file_path,'????????' #os.mkdir(file_path) os.makedirs(file_path) #?????? file_suffix = os.path.splitext(img_url)[1] #??????????? filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix) #????????????? urllib.urlretrieve(img_url,filename=filename) print '??',filename,'???',file_path,'?' return file_suffix except IOError as e: print '??????',e except Exception as e: print '?? ?',e
def download_providers(self, url): """Download providers file from url""" path = tempfile.gettempdir() filename = os.path.join(path, 'providers.txt') print("\n----Downloading providers file----") if DEBUG: print("providers url = {}".format(url)) try: # context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) context = ssl._create_unverified_context() urllib.urlretrieve(url, filename, context=context) return filename except Exception: pass # fallback to no ssl context try: urllib.urlretrieve(url, filename) return filename except Exception, e: raise e
def download_picon_file(self, logourl, title, iconpath): if logourl: if not logourl.startswith('http'): logourl = 'http://{}'.format(logourl) piconname = self.get_picon_name(title) piconfilepath = os.path.join(iconpath, piconname) existingpicon = filter(os.path.isfile, glob.glob(piconfilepath + '*')) if not existingpicon: if DEBUG: print("Picon file doesn't exist downloading") print('PiconURL: {}'.format(logourl)) else: # Output some kind of progress indicator sys.stdout.write('.') sys.stdout.flush() try: urllib.urlretrieve(logourl, piconfilepath) except Exception, e: if DEBUG: print(e) return self.picon_post_processing(piconfilepath)