Python urllib 模块，urlretrieve() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用urllib.urlretrieve()。

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'COLT2016'
    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)
    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl
        pbar.log('http://jmlr.org/proceedings/papers/v49/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://jmlr.org/proceedings/papers/v49/' + pdfurl, filename)
        pbar.update(index=(idx + 1))
    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'ICML2015'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl
        pbar.log('http://jmlr.org/proceedings/papers/v37/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://jmlr.org/proceedings/papers/v37/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'ICML2016'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl
        pbar.log('http://jmlr.org/proceedings/papers/v48/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://jmlr.org/proceedings/papers/v48/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="/paper/(.+?)"'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'NIPS2012'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl + '.pdf'
        pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf')
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="/paper/(.+?)"'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'NIPS2016'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl + '.pdf'
        pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf')
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="/paper/(.+?)"'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'NIPS2013'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl + '.pdf'
        pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf')
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="/paper/(.+?)"'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'NIPS2014'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        filename = dir_name + '/' + pdfurl + '.pdf'
        pbar.log('http://papers.nips.cc/paper/' + pdfurl + '.pdf')
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://papers.nips.cc/paper/' + pdfurl + '.pdf', filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'CVPR2014'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        reg2 = r'papers/(.+?\.pdf)'
        pdfre2 = re.compile(reg2)
        filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0]
        pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://www.cv-foundation.org/openaccess/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'CVPR2016'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        reg2 = r'papers/(.+?\.pdf)'
        pdfre2 = re.compile(reg2)
        filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0]
        pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://www.cv-foundation.org/openaccess/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'CVPR2015'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        reg2 = r'papers/(.+?\.pdf)'
        pdfre2 = re.compile(reg2)
        filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0]
        pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://www.cv-foundation.org/openaccess/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：PaperCrawler 作者：JustJokerX | 项目源码 | 文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'CVPR2013'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        reg2 = r'papers/(.+?\.pdf)'
        pdfre2 = re.compile(reg2)
        filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0]
        pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://www.cv-foundation.org/openaccess/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()

项目：code 作者：ActiveState | 项目源码 | 文件源码

def get_url(self, query):
        site1 = urllib.urlopen('http://www.youtube.com/results?search_query=%s'%query)
        html = site1.read()
        soup = BS(html)

        links = soup.findAll('a')
        vidlinks = [link.get('href') for link in links if link.get('href') is not None]
        vlink = [ i for i in vidlinks if '/watch?v=' in i][0]

        img_link = soup.findAll('img',{'alt':'Thumbnail', 'width':'185'})[0].get('src')
        img_url =  'http:%s' %img_link

        imagethread = threading.Thread(target=lambda:urllib.urlretrieve(img_url, 'Files\image.jpg'))
        imagethread.start()

        return vlink

项目：SGAN 作者：YuhangSong | 项目源码 | 文件源码

def load(batch_size, test_batch_size, n_labelled=None):
    filepath = '/tmp/mnist.pkl.gz'
    url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'

    if not os.path.isfile(filepath):
        print "Couldn't find MNIST dataset in /tmp, downloading..."
        urllib.urlretrieve(url, filepath)

    with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f:
        train_data, dev_data, test_data = pickle.load(f)

    return (
        mnist_generator(train_data, batch_size, n_labelled), 
        mnist_generator(dev_data, test_batch_size, n_labelled), 
        mnist_generator(test_data, test_batch_size, n_labelled)
    )

项目：adversarial-frcnn 作者：xiaolonw | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：faster-rcnn-resnet 作者：Eniac-Xie | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：dappled 作者：lhon | 项目源码 | 文件源码

def download_from_github(args):
    if os.path.exists('dappled.yml'):
        yml = ruamel.yaml.load(open('dappled.yml').read(), ruamel.yaml.RoundTripLoader) 
    else:
        print('dappled.yml not found; please run "dappled init" first')
        sys.exit()

    if not yml.get('github'):
        return False

    github_repo = yml['github']
    url = 'https://github.com/{owner}/{repo}/archive/{sha}.zip'.format(**github_repo)

    filename, headers = urllib.urlretrieve(url)
    with zipfile.ZipFile(filename) as zf:
        prefix = None
        for name in zf.namelist():
            if prefix is None: # assume first entry is the directory name
                prefix = name
                continue
            path = name.replace(prefix, '')
            if path in ('dappled.yml', yml['filename'], 'environment.yml'):
                continue
            with open(path, 'wb') as f:
                f.write(zf.read(name))

项目：garden.facelock 作者：kivy-garden | 项目源码 | 文件源码

def store_raw_images():
    '''To download images from image-net
        (Change the url for different needs of cascades)
    '''
    neg_images_link = 'http://image-net.org/api/text/imagenet.synset.geturls?wnid=n07942152'
    neg_image_urls = urllib2.urlopen(neg_images_link).read().decode()

    pic_num = 1

    for i in neg_image_urls.split('\n'):
        try:

            print i
            urllib.urlretrieve(i, "neg/" + str(pic_num) + '.jpg')
            img = cv2.imread("neg/" + str(pic_num) +'.jpg',
                                cv2.IMREAD_GRAYSCALE)
            resized_image = cv2.resize(img, (100, 100))
            cv2.imwrite("neg/" + str(pic_num) + '.jpg', resized_image)
            pic_num = pic_num + 1

        except:
            print "error"

项目：sketch_rnn_classification 作者：payalbajaj | 项目源码 | 文件源码

def download_pretrained_models(
    models_root_dir='/tmp/sketch_rnn/models',
    pretrained_models_url=PRETRAINED_MODELS_URL):
  """Download pretrained models to a temporary directory."""
  tf.gfile.MakeDirs(models_root_dir)
  zip_path = os.path.join(
      models_root_dir, os.path.basename(pretrained_models_url))
  if os.path.isfile(zip_path):
    tf.logging.info('%s already exists, using cached copy', zip_path)
  else:
    tf.logging.info('Downloading pretrained models from %s...',
                    pretrained_models_url)
    urllib.urlretrieve(pretrained_models_url, zip_path)
    tf.logging.info('Download complete.')
  tf.logging.info('Unzipping %s...', zip_path)
  with zipfile.ZipFile(zip_path) as models_zip:
    models_zip.extractall(models_root_dir)
  tf.logging.info('Unzipping complete.')

项目：rootfetch 作者：zmap | 项目源码 | 文件源码

def fetch(self, output):
        dl_path = self.get_latest_tarball()
        raw_path, _ = urllib.urlretrieve(dl_path)

        # The downloaded file is a gzip'd tarball.
        extract_path = self._make_temp_directory("rootfetch-apple-extracted")
        sh.tar("-xzv", "-f", raw_path, "-C", extract_path, strip_components=1)

        # We now have a directory with all the apple files. We need to find the
        # roots directory, parse out all the different formats, then generate a
        # single file that has PEMs in it.
        certificates_path = os.path.join(extract_path, "certificates", "roots")
        for f in os.listdir(certificates_path):
            full_path = os.path.join(certificates_path, f)
            if not os.path.isfile(full_path):
                continue
            pem = self.make_pem(full_path)
            output.write("# ")
            output.write(f)
            output.write("\n")
            output.write("\n".join(pem))
            output.write("\n\n")

项目：keras-molecules 作者：maxhodak | 项目源码 | 文件源码

def main():
    uri, outfile, dataset = get_arguments()
    fd = tempfile.NamedTemporaryFile()
    progress = ProgressBar(widgets=[Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()])

    def update(count, blockSize, totalSize):
        if progress.maxval is None:
            progress.maxval = totalSize
            progress.start()
        progress.update(min(count * blockSize, totalSize))

    urllib.urlretrieve(uri, fd.name, reporthook = update)
    if dataset == 'zinc12':
        df = pandas.read_csv(fd.name, delimiter = '\t')
        df = df.rename(columns={'SMILES':'structure'})
        df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
    elif dataset == 'chembl22':
        df = pandas.read_table(fd.name,compression='gzip')
        df = df.rename(columns={'canonical_smiles':'structure'})
        df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
        pass
    else:
        df = pandas.read_csv(fd.name, delimiter = '\t')
        df.to_hdf(outfile, 'table', format = 'table', data_columns = True)

项目：chromium-libs-media-freeworld 作者：rpmfusion | 项目源码 | 文件源码

def download_chrome_latest_rpm(arch):

  chrome_rpm = 'google-chrome-%s_current_%s.rpm' % (version_string, arch)
  path = 'https://dl.google.com/linux/direct/%s' % chrome_rpm

  if (args.clean):
    remove_file_if_exists(chrome_rpm)

  # Let's make sure we haven't already downloaded it.
  if os.path.isfile("./%s" % chrome_rpm):
    print "%s already exists!" % chrome_rpm
  else:
    print "Downloading %s" % path
    # Perhaps look at using python-progressbar at some point?
    info=urllib.urlretrieve(path, chrome_rpm, reporthook=dlProgress)[1]
    urllib.urlcleanup()
    print ""
    if (info["Content-Type"] != "binary/octet-stream" and info["Content-Type"] != "application/x-redhat-package-manager"):
      print 'Chrome %s rpms are not on servers.' % version_string
      remove_file_if_exists (chrome_rpm)
      sys.exit(1)


# This is where the magic happens

项目：radar 作者：amoose136 | 项目源码 | 文件源码

def main(wf):   
    from PIL import Image   

    urllib.urlretrieve("http://radar.weather.gov/Overlays/Topo/Short/" + ws + "_Topo_Short.jpg",'static/r.gif') #topographical map
    urllib.urlretrieve("http://radar.weather.gov/Overlays/County/Short/" + ws + "_County_Short.gif",'static/1.gif') #Counties map
    urllib.urlretrieve("http://radar.weather.gov/Overlays/Highways/Short/" + ws + "_Highways_Short.gif",'static/2.gif') #Highways map
    urllib.urlretrieve("http://radar.weather.gov/Overlays/Cities/Short/" + ws + "_City_Short.gif",'static/3.gif')   #Cities labels map
    print(ws)


    overlay=Image.open('static/1.gif').convert('RGBA')
    a=Image.open('static/2.gif').convert('RGBA')
    b=Image.open('static/3.gif').convert('RGBA')
    overlay.paste(a,(0,0),a)
    overlay.paste(b,(0,0),b)
    info=overlay.info
    info['transparency']=0
    overlay.save('static/overlay.gif',**info)

    grayscale = Image.open('static/r.gif').convert('L')
    grayscale = grayscale.point(lambda p: p * 0.25)
    grayscale.save('static/grayscale.gif')

项目：lowrank-highwaynetwork 作者：Avmb | 项目源码 | 文件源码

def __init__(self):
        self._target_size = 10
        logging.info("loading minst data")
        path = os.path.join(tempfile.gettempdir(), "mnist.pkl.gz")
        if not os.path.exists(path):
            logging.info("downloading minst data")
            urllib.urlretrieve (MNIST_URL, path)
        self._train_set, self._valid_set, self._test_set = cPickle.load(gzip.open(path, 'rb'))

        # Moving validation examples to training set, leaving 1000
        train_set_x = np.vstack((self._train_set[0], self._valid_set[0][:-1000]))
        train_set_y = np.hstack((self._train_set[1], self._valid_set[1][:-1000]))
        valid_set_x = self._valid_set[0][-1000:]
        valid_set_y = self._valid_set[1][-1000:]
        self._train_set = (train_set_x, train_set_y)
        self._valid_set = (valid_set_x, valid_set_y)

        logging.info("[mnist small validation] training data size: %d" % len(self._train_set[0]))
        logging.info("[mnist small validation] valid data size: %d" % len(self._valid_set[0]))
        logging.info("[mnist small validation] test data size: %d" % len(self._test_set[0]))

项目：magenta 作者：tensorflow | 项目源码 | 文件源码

def download_pretrained_models(
    models_root_dir='/tmp/sketch_rnn/models',
    pretrained_models_url=PRETRAINED_MODELS_URL):
  """Download pretrained models to a temporary directory."""
  tf.gfile.MakeDirs(models_root_dir)
  zip_path = os.path.join(
      models_root_dir, os.path.basename(pretrained_models_url))
  if os.path.isfile(zip_path):
    tf.logging.info('%s already exists, using cached copy', zip_path)
  else:
    tf.logging.info('Downloading pretrained models from %s...',
                    pretrained_models_url)
    urllib.urlretrieve(pretrained_models_url, zip_path)
    tf.logging.info('Download complete.')
  tf.logging.info('Unzipping %s...', zip_path)
  with zipfile.ZipFile(zip_path) as models_zip:
    models_zip.extractall(models_root_dir)
  tf.logging.info('Unzipping complete.')

项目：ngraph 作者：NervanaSystems | 项目源码 | 文件源码

def loadData(src):
    print('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print('Done.')
    try:
        print('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        print('Done.')
        print('Preparing train set...')
        trn = np.empty((0, numFeature + 1), dtype=np.int)
        for i in range(5):
            batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
            trn = np.vstack((trn, readBatch(batchName)))
        print('Done.')
        print('Preparing test set...')
        tst = readBatch('./cifar-10-batches-py/test_batch')
        print('Done.')
    finally:
        os.remove(fname)
    return (trn, tst)

项目：ngraph 作者：NervanaSystems | 项目源码 | 文件源码

def loadData(src, cimg):
    gzfname, h = urlretrieve(src, './delete.me')
    try:
        with gzip.open(gzfname) as gz:
            n = struct.unpack('I', gz.read(4))
            if n[0] != 0x3080000:
                raise Exception('Invalid file: unexpected magic number.')
            n = struct.unpack('>I', gz.read(4))[0]
            if n != cimg:
                raise Exception('Invalid file: expected {0} entries.'.format(cimg))
            crow = struct.unpack('>I', gz.read(4))[0]
            ccol = struct.unpack('>I', gz.read(4))[0]
            if crow != 28 or ccol != 28:
                raise Exception('Invalid file: expected 28 rows/cols per image.')
            res = np.fromstring(gz.read(cimg * crow * ccol), dtype=np.uint8)
    finally:
        os.remove(gzfname)
    return res.reshape((cimg, crow * ccol))

项目：ai-gym 作者：tuzzer | 项目源码 | 文件源码

def load_or_download_mnist_files(filename, num_samples, local_data_dir):

    if (local_data_dir):
        local_path = os.path.join(local_data_dir, filename)
    else:
        local_path = os.path.join(os.getcwd(), filename)

    if os.path.exists(local_path):
        gzfname = local_path
    else:
        local_data_dir = os.path.dirname(local_path)
        if not os.path.exists(local_data_dir):
            os.makedirs(local_data_dir)
        filename = "http://yann.lecun.com/exdb/mnist/" + filename
        print ("Downloading from" + filename, end=" ")
        gzfname, h = urlretrieve(filename, local_path)
        print ("[Done]")

    return gzfname

项目：py-faster-rcnn-resnet-imagenet 作者：tianzhi0549 | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：tflearn 作者：tflearn | 项目源码 | 文件源码

def load_data(self, train_dfn="adult.data", test_dfn="adult.test"):
        '''
        Load data (use files offered in the Tensorflow wide_n_deep_tutorial)
        '''
        if not os.path.exists(train_dfn):
            urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_dfn)
            print("Training data is downloaded to %s" % train_dfn)

        if not os.path.exists(test_dfn):
            urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_dfn)
            print("Test data is downloaded to %s" % test_dfn)

        self.train_data = pd.read_csv(train_dfn, names=COLUMNS, skipinitialspace=True)
        self.test_data = pd.read_csv(test_dfn, names=COLUMNS, skipinitialspace=True, skiprows=1)

        self.train_data[self.label_column] = (self.train_data["income_bracket"].apply(lambda x: ">50K" in x)).astype(int)
        self.test_data[self.label_column] = (self.test_data["income_bracket"].apply(lambda x: ">50K" in x)).astype(int)

项目：TweenRoBot 作者：ThisIsAmir | 项目源码 | 文件源码

def music(m):
    banlist = redis.sismember('banlist', '{}'.format(m.from_user.id))
    if str(banlist) == 'False':
        text = m.text.replace("/song ","")
        opener = urllib2.build_opener()
        f = opener.open('https://api.spotify.com/v1/search?limit=1&type=track&q={}'.format(text))
        parsed_json = json.loads(f.read())
        Artist = parsed_json['tracks']['items'][0]['artists'][0]['name']
        name = parsed_json['tracks']['items'][0]['name']
        music = parsed_json['tracks']['items'][0]['preview_url']
        urllib.urlretrieve("{}".format(music), "song.ogg")
        image = parsed_json['tracks']['items'][0]['album']['images'][0]['url']
        urllib.urlretrieve("{}".format(image), "song.png")
        bot.send_message(m.chat.id, "*Artist* : ```{}``` \n *Name* : ```{}```".format(Artist,name), parse_mode="Markdown")
        bot.send_sticker(m.chat.id, open('song.png'))
        bot.send_document(m.chat.id, open('song.ogg'), caption=" @OffLiNeTeam")

#################################################################################################################################################################################################

项目：TweenRoBot 作者：ThisIsAmir | 项目源码 | 文件源码

def tostick(m):
    cid = m.chat.id
    if m.reply_to_message:
      if m.reply_to_message.photo:
        token = config.token
        fileid = m.reply_to_message.photo[1].file_id
        path1 = bot.get_file(fileid)
        path = path1.file_path
        link = "https://api.telegram.org/file/bot{}/{}".format(token,path)
        urllib.urlretrieve(link, "stick.png")
        file1 = open('stick.png', 'rb')
        bot.send_sticker(cid,file1)

#################################################################################################################################################################################################

#bot.message_handler(commands=['clac'])

项目：TweenRoBot 作者：ThisIsAmir | 项目源码 | 文件源码

def aparat(m):
    import urllib
    import json
    import os
    text = m.text.split(' ',1)[1]
    url = urllib.urlopen('http://www.aparat.com/etc/api/videoBySearch/text/'+text)
    data = url.read()
    js = json.loads(data)
    title1 = js['videobysearch'][0]['title']
    poster1 = js['videobysearch'][0]['big_poster']
    uid1 = js['videobysearch'][0]['uid']
    urllib.urlretrieve(poster1,'poster.png')
    bot.send_photo(m.chat.id, open('poster.png'), caption='Title : '+title1+'\nLink : http://www.aparat.com/v/'+uid1)
    os.remove('poster.png')

#################################################################################################################################################################################################

项目：focal-loss 作者：unsky | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：faster_rcnn_pytorch 作者：longcw | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：k8scntkSamples 作者：weehyong | 项目源码 | 文件源码

def loadData(src):
    print ('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print ('Done.')
    try:
        print ('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        print ('Done.')
        print ('Preparing train set...')
        trn = np.empty((0, numFeature + 1), dtype=np.int)
        for i in range(5):
            batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
            trn = np.vstack((trn, readBatch(batchName)))
        print ('Done.')
        print ('Preparing test set...')
        tst = readBatch('./cifar-10-batches-py/test_batch')
        print ('Done.')
    finally:
        os.remove(fname)
    return (trn, tst)

项目：k8scntkSamples 作者：weehyong | 项目源码 | 文件源码

def loadData(src):
    print ('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print ('Done.')
    try:
        print ('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        print ('Done.')
        print ('Preparing train set...')
        trn = np.empty((0, NumFeat + 1), dtype=np.int)
        for i in range(5):
            batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
            trn = np.vstack((trn, readBatch(batchName)))
        print ('Done.')
        print ('Preparing test set...')
        tst = readBatch('./cifar-10-batches-py/test_batch')
        print ('Done.')
    finally:
        os.remove(fname)
    return (trn, tst)

项目：k8scntkSamples 作者：weehyong | 项目源码 | 文件源码

def loadData(src):
    print ('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print ('Done.')
    try:
        print ('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        print ('Done.')
        print ('Preparing train set...')
        trn = np.empty((0, NumFeat + 1), dtype=np.int)
        for i in range(5):
            batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
            trn = np.vstack((trn, readBatch(batchName)))
        print ('Done.')
        print ('Preparing test set...')
        tst = readBatch('./cifar-10-batches-py/test_batch')
        print ('Done.')
    finally:
        os.remove(fname)
    return (trn, tst)

项目：odnl 作者：lilhope | 项目源码 | 文件源码

def download(self, tarDir=None, imgIds=[]):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print('Please specify target directory')
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic))

项目：RON 作者：taokong | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：QGIS-Python-Programming-Cookbook-Second-Edition 作者：PacktPublishing | 项目源码 | 文件源码

def googleStreetView(values, feature, parent):
    """
    Returns a path to a local Google Street View 
    image for the feature
    """
    x,y = feature.geometry().asPoint()
    baseurl = "https://maps.googleapis.com/maps/api/streetview?"
    w = 150
    h = 150
    fov = 90
    heading = 235
    pitch = 10
    params = "size={w}x{h}&".format(w,h)
    params += "location={y},{x}&".format(y,x)
    params += "fov={}&heading={}&pitch={}".format(fov, heading, pitch) 
    url = baseurl + params    
    tmpdir = "/qgis_data/tmp/"
    img = tmpdir + str(feature.id()) + ".jpg"
    if not os.path.isfile(img):
        urllib.urlretrieve(url, img)
    uri = "file://" + img
    return uri

项目：visually-informed-embedding-of-word-VIEW- 作者：oswaldoludwig | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：face-py-faster-rcnn 作者：playerkk | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：PythonCrawler-Scrapy-Mysql-File-Template 作者：lawlite19 | 项目源码 | 文件源码

def parse(self, response):
        se=Selector(response) #???????HtmlXPathSelector???
        if(re.match("http://desk.zol.com.cn/fengjing/\d+x\d+/\d+.html", response.url)):#??url??????????url????
            src=se.xpath("//ul[@class='pic-list2  clearfix']/li")#???ul?????li

            for i in range(len(src)):#??li??
                imgURLs=se.xpath("//ul[@class='pic-list2  clearfix']/li[%d]/a/img/@src"%i).extract() #??????????
                titles=se.xpath("//ul[@class='pic-list2  clearfix']/li[%d]/a/img/@title"%i).extract()

                if imgURLs:
                    realUrl=imgURLs[0].replace("t_s208x130c5","t_s2560x1600c5") #????????????????
                    file_name=u"%s.jpg"%titles[0] #????????

                    path=os.path.join("D:\pics",file_name)#??????????????F??pics????

                    type = sys.getfilesystemencoding()
                    print file_name.encode(type)  

                    item=WebcrawlerScrapyItem()  #??item??????item??,?????????????item???
                    item['name']=file_name 
                    item['url']=realUrl
                    print item["name"],item["url"]    

                    yield item  #??item,???????item

                    urllib.urlretrieve(realUrl,path)  #??????????????????????????????????????

            all_urls=se.xpath("//a/@href").extract()#???????url
            for url in all_urls:
                if url.startswith("/fengjing/1920x1080/"):#??????????????
                    yield Request("http://desk.zol.com.cn"+url,callback=self.parse)

项目：MSDN 作者：yikang-li | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：Belati 作者：aancw | 项目源码 | 文件源码

def download_files(self, url, folder_domain):
        filename = url.split('/')[-1]
        full_filename = 'belatiFiles/{}/{}'.format(folder_domain, filename)
        full_filename_location = '{}/belatiFiles/{}/{}'.format(util.get_current_work_dir(), folder_domain, filename)
        meta = MetaExifExtractor()

        if not os.path.exists(os.path.dirname(full_filename)):
            try:
                os.makedirs(os.path.dirname(full_filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise

        with tqdm(unit='B', unit_scale=True, miniters=1,desc=filename) as t:
            try:
                urllib.urlretrieve(url, filename=full_filename,reporthook=self.my_hook(t), data=None)
            except:
                pass

        meta_exif_json = meta.extract_json(full_filename_location)
        self.db.insert_public_doc(self.project_id, str(os.path.splitext(filename)[1]), str(url), str(full_filename), str(full_filename_location), str(meta_exif_json))

项目：deep-fashion 作者：zuowang | 项目源码 | 文件源码

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

项目：HLTVDemoDownloader 作者：ReagentX | 项目源码 | 文件源码

def get(url):
    # Build and open the URL
    opener = urllib2.build_opener()
    opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
    response = opener.open(url)

    # HLTV redicrects to a .rar or .zip file
    final_url = response.geturl()

    # Gets the filename (everything after the last trailing /)
    filename = final_url.rsplit('/', 1)[-1]

    # Gets the Content-Length from the metadata from final_url
    filesize = (int(urllib.urlopen(final_url).info().getheaders("Content-Length")[0])/1024)/1024

    # Tell user we are downloading filesize
    print "Starting %s: %s MB." % (filename, filesize)

    # Downloads the file to the directory the user enters
    urllib.urlretrieve(final_url, directory+"/"+filename)

    # Tell user the current status and file information
    print "Completed %s: %s MB." % (filename, filesize)
    return filesize

项目：weiboSpider 作者：chenmo1996 | 项目源码 | 文件源码

def save_img(img_url,file_name,file_path):
    #?????????? file_path???????????????  ~/weiboImgs???
    try:
        if not os.path.exists(file_path):
            print '???',file_path,'????????'
            #os.mkdir(file_path)
            os.makedirs(file_path)
        #??????
        file_suffix = os.path.splitext(img_url)[1]
        #???????????
        filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
       #?????????????
        urllib.urlretrieve(img_url,filename=filename)
        print '??',filename,'???',file_path,'?'
        return file_suffix
    except IOError as e:
        print '??????',e
    except Exception as e:
        print '?? ?',e

项目：e2m3u2bouquet 作者：su1s | 项目源码 | 文件源码

def download_providers(self, url):
        """Download providers file from url"""
        path = tempfile.gettempdir()
        filename = os.path.join(path, 'providers.txt')
        print("\n----Downloading providers file----")
        if DEBUG:
            print("providers url = {}".format(url))
        try:
            # context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
            context = ssl._create_unverified_context()
            urllib.urlretrieve(url, filename, context=context)
            return filename
        except Exception:
            pass    # fallback to no ssl context
        try:
            urllib.urlretrieve(url, filename)
            return filename
        except Exception, e:
           raise e

项目：e2m3u2bouquet 作者：su1s | 项目源码 | 文件源码

def download_picon_file(self, logourl, title, iconpath):
        if logourl:
            if not logourl.startswith('http'):
                logourl = 'http://{}'.format(logourl)
            piconname = self.get_picon_name(title)
            piconfilepath = os.path.join(iconpath, piconname)
            existingpicon = filter(os.path.isfile, glob.glob(piconfilepath + '*'))

            if not existingpicon:
                if DEBUG:
                    print("Picon file doesn't exist downloading")
                    print('PiconURL: {}'.format(logourl))
                else:
                    # Output some kind of progress indicator
                    sys.stdout.write('.')
                    sys.stdout.flush()
                try:
                    urllib.urlretrieve(logourl, piconfilepath)
                except Exception, e:
                    if DEBUG:
                        print(e)
                    return
                self.picon_post_processing(piconfilepath)