Python urllib 模块,URLopener() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用urllib.URLopener()

项目:FBI-Scraper    作者:GKalliatakis    | 项目源码 | 文件源码
def get_biased_photos(tag, min_taken_date, max_taken_date):
    #Change Folder Path
    if os.path.isdir(tag):
        pass
    else:
        os.mkdir(tag)
    os.chdir(tag)

    #Run image download
    for page in range(1,8):
        photos = flickr.photos_search(tags=tag, page=page, per_page=500, tag_mode='all',
                                      sort="interestingness-desc",
                                      min_taken_date=min_taken_date,
                                      max_taken_date=max_taken_date)
        for photo in photos:
            try:
                url = photo.getURL(size='Original', urlType='source')
                urllist.append(url)
                image = urllib.URLopener()
                image.retrieve(url, os.path.basename(urlparse.urlparse(url).path))
                print 'Downloading...', url
            except flickr.FlickrError:
                print 'Link no longer available (!)'
########################################################################
项目:corpus-to-graph-ml    作者:CatalystCode    | 项目源码 | 文件源码
def load_model_from_url(url):
    # TODO: move this into a class..
    global scoring_model
    url_opener = urllib.URLopener()
    temp_model_path =  get_temp_model_path()
    url_opener.retrieve(url, temp_model_path)

    # try to load the model:
    try:
        temp_model = ScoringModel.from_file(temp_model_path)
    except Exception as e:
        print "Failed to load donwloaded model: %s"%e
        os.remove(temp_model_path)
        raise RuntimeError("Failed to load donwloaded model! error: %s"%e)

    # update model:
    scoring_model = temp_model

    # delete existing model
    if (path.isfile(model_file_path)):
        os.remove(model_file_path)
    os.rename(temp_model_path, model_file_path)


# TODO: move this to an object with an init function...
项目:useless-scripts    作者:veerendra2    | 项目源码 | 文件源码
def download_csv():
    servers_dict = dict()
    global SERVERS_LIST_FILE
    print Colors.OKBLUE+"Downloading Latest 'dnscrypt-resolvers.csv'.."+Colors.ENDC
    try:
        csv_file = urllib.URLopener()
        csv_file.retrieve("https://raw.githubusercontent.com/jedisct1/dnscrypt-proxy/master/dnscrypt-resolvers.csv", "/opt/dnscrypt-resolvers.csv")
    except:
        print Colors.WARNING+"Unable to download 'dnscrypt-resolvers.csv'. Using default /usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"+Colors.ENDC
        if os.path.exists("/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"):
            SERVERS_LIST_FILE="/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"
        else:
            print Colors.FAIL+"Default csv file not found. Exiting.."+Colors.ENDC
            exit(2)
    with open(SERVERS_LIST_FILE) as f:
        data = list(csv.reader(f, delimiter=",", quotechar='"', skipinitialspace=True))[1:]
        print "Index".ljust(5, " "), "Name".ljust(25, " "), "Location".ljust(25, " "), "DNSSEC".ljust(8,
                                                                                                      " "), "No Log".ljust(
            7, " "), "Resolver Address".ljust(30)
        print "".ljust(100, "-")
        for rows, index in zip(data, enumerate(data)):
            servers_dict.setdefault(index[0], rows[0])
            print str(index[0]).ljust(5, " "), rows[0].ljust(25, " "), rows[3].ljust(25, " "), rows[7].ljust(8, " "), \
            rows[9].ljust(7, " "), rows[10].ljust(30, " ")
    return servers_dict
项目:dmr_utils    作者:n0mjs710    | 项目源码 | 文件源码
def try_download(_path, _file, _url, _stale,):
    now = time()
    url = URLopener()
    file_exists = isfile(_path+_file) == True
    if file_exists:
        file_old = (getmtime(_path+_file) + _stale) < now
    if not file_exists or (file_exists and file_old):
        try:
            url.retrieve(_url, _path+_file)
            result = 'ID ALIAS MAPPER: \'{}\' successfully downloaded'.format(_file)
        except IOError:
            result = 'ID ALIAS MAPPER: \'{}\' could not be downloaded'.format(_file)
    else:
        result = 'ID ALIAS MAPPER: \'{}\' is current, not downloaded'.format(_file)
    url.close()
    return result

# LEGACY VERSION - MAKES A SIMPLE {INTEGER ID: 'CALLSIGN'} DICTIONARY
项目:gps2tec    作者:weihan1107    | 项目源码 | 文件源码
def get_p1p2data(self):
        import urllib, os
        if self.file_exist():
            print "No need to download P1P2 DCB data..."
            return
        print "Start to download P1P2 DCB data..."
        weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year)
        if not os.path.isfile(self.sourcefn):
            try:
                download = urllib.URLopener()
                download.retrieve(weblink+self.sourcefn, self.sourcefn)
            except IOError:
                weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year_bom)
                download = urllib.URLopener()
                download.retrieve(weblink+self.sourcefn_bom, self.sourcefn)


        os.system("gzip -fd {0}".format(self.sourcefn))
项目:gps2tec    作者:weihan1107    | 项目源码 | 文件源码
def get_p1c1data(self):
        import urllib, os
        if self.file_exist():
            print "No need to download P1C1 DCB data..."
            return
        print "Start to download P1C1 DCB data..."
        weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year)
        if not os.path.isfile(self.sourcefn):
            try:
                download = urllib.URLopener()
                download.retrieve(weblink+self.sourcefn, self.sourcefn)
            except IOError:
                weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year_bom)
                download = urllib.URLopener()
                download.retrieve(weblink+self.sourcefn_bom, self.sourcefn)

        os.system("gzip -fd {0}".format(self.sourcefn))
项目:tf-tutorial    作者:zchen0211    | 项目源码 | 文件源码
def download(start, end):
  parse_dict = np.load('parse_dict')
  image = urllib.URLopener()
  for k in parse_dict.keys()[start:end]:
    # makedir of k
    log.info('crawling images of class %s' % k)
    data_path = os.path.join('/media/DATA/ImageNet/Extra/', k)
    if not os.path.exists(data_path):
      os.mkdir(data_path)
      cnt = 0
      for link in parse_dict[k][:500]:
        fn = os.path.join(data_path, '%s_%d.jpg' %(k, cnt))
        cnt += 1
        if cnt % 20 == 0: log.info('%d images' % cnt)
      # print fn
      try: 
        image.retrieve(link, fn)
      except IOError:
        cnt -= 1
    # print len(parse_dict[k])
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def __init__(self, *args):
            self.version = "iegrab/0.1"
        self.open = self.iegrab
        apply(urllib.URLopener.__init__, (self,) + args)
项目:PyHero    作者:Splinter0    | 项目源码 | 文件源码
def getLast(passwd):
    df = "http://10.5.5.9/" #DEFAULT PARTS
    p1 = "?t="
    p2 = "&p=%"

    par1, par2, opt = photo_mode() #MOVING TO PHOTO MODE
    urllib2.urlopen(df + par1 + "/" + par2 + p1 + passwd + p2 + opt)
    time.sleep(1)

    print("\n\r[" + extra.colors.yellow + ".." + extra.colors.end + "] Taking a pic")
    par1, par2, opt = shut()  #TAKE A PIC
    urllib2.urlopen(df + par1 + "/" + par2 + p1 + passwd + p2 + opt)
    time.sleep(2)

    url = "http://10.5.5.9:8080/gp/gpMediaList" #FIND THE PICTURE USING SOME REGEX
    content = urllib2.urlopen(url).read()
    content = str(content)
    content2 = content.split("},")
    last = content2[-1]
    last = re.findall('[A-Z+][0-9+]*', last)
    last = ''.join(last)
    last = re.sub(r'(JP)', r'.JP', last)

    time.sleep(1)
    print("\n\r[" + extra.colors.yellow + ".." + extra.colors.end + "] Downloading the pic")
    dow = "http://10.5.5.9:8080/DCIM/103GOPRO/" + last #DOWNLOAD THE PIC AND SAVE IT TO output/
    getFoto = urllib.URLopener()
    getFoto.retrieve("http://10.5.5.9:8080/DCIM/103GOPRO/" + last, "outputs/" + last)
    print("\r\n[" + extra.colors.green + "+" + extra.colors.end + "] Picture saved in outputs/"+last+"\r\n")
    try :
        time.sleep(2)
        process = subprocess.Popen("eog -f outputs/"+last, shell=True, stdout=subprocess.PIPE)
    except :
        pass

#TODO : ADD INFO() FUNCTION TO GET ALL INFORMATIONS ABOUT THE GOPRO AND ADD DELALL() THAT DELETE ALL FILES ON GOPRO
项目:images-web-crawler    作者:amineHorseman    | 项目源码 | 文件源码
def download(self, links, target_folder='./data'):
        """Download images from a lisk of links"""

        # check links and folder:
        if len(links) < 1:
            print("Error: Empty list, no links provided")
            exit()
        self.images_links = links
        DatasetBuilder.check_folder_existance(target_folder)
        if target_folder[-1] == '/':
            target_folder = target_folder[:-1]

        # start downloading:
        print("Downloading files...")
        progress = 0
        images_nbr = sum([len(self.images_links[key]) for key in self.images_links])
        for keyword, links in self.images_links.items():
            DatasetBuilder.check_folder_existance(target_folder + '/' + keyword, display_msg=False)
            for link in links:
                target_file = target_folder + '/' + keyword + '/' + link.split('/')[-1]
                try:
                    f = urllib.URLopener()
                    f.retrieve(link, target_file)
                except IOError:
                    self.failed_links.append(link)
                progress = progress + 1
                print("\r >> Download progress: ", (progress * 100 / images_nbr), "%...", end="")
                sys.stdout.flush()

        print("\r >> Download progress: ", (progress * 100 / images_nbr), "%")
        print(" >> ", (progress - len(self.failed_links)), " images downloaded")

        # save failed links:
        if len(self.failed_links):
            f2 = open(target_folder + "/failed_list.txt", 'w')
            for link in self.failed_links:
                f2.write(link + "\n")
            print(" >> Failed to download ", len(self.failed_links),
                  " images: access not granted ",
                  "(links saved to: '", target_folder, "/failed_list.txt')")
项目:crypto-forcast    作者:7yl4r    | 项目源码 | 文件源码
def run(self):
        # with self.output().open('w') as f:
        src = "http://api.bitcoincharts.com/v1/csv/coinbaseUSD.csv.gz"
        testfile = urllib.URLopener()
        testfile.retrieve(src, self.output())
项目:metaqnn    作者:bowenbaker    | 项目源码 | 文件源码
def get_cifar100(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        print 'Downloading CIFAR100 dataset...'
        tar_path = os.path.join(save_dir, "cifar-100-python.tar.gz")
        url = urllib.URLopener()
        url.retrieve("https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz", tar_path)
        print 'Download Done, Extracting...'
        tar = tarfile.open(tar_path)
        tar.extractall(save_dir)
        tar.close()

    root = os.path.join(save_dir, "cifar-100-python") if not root_path else root_path

    Xtr, Ytr = load_cifar100_data(os.path.join(root, 'train'))
    Xte, Yte = load_cifar100_data(os.path.join(root, 'test'))


    print 'Xtrain shape', Xtr.shape
    print 'Ytrain shape', Ytr.shape
    print 'Xtest shape', Xte.shape
    print 'Ytest shape', Yte.shape

    return Xtr, Ytr, Xte, Yte
项目:WPEAR    作者:stephenlienharrell    | 项目源码 | 文件源码
def download(self, url, filedir, defaultdownloaddir='/sourceFileDownloads'):
        downloadDirectory = defaultdownloaddir
        if not os.path.exists(downloadDirectory):
            os.makedirs(downloadDirectory)

        fullURL = urlparse.urljoin(url, filedir)
        file = urllib.URLopener()

        fileDownloadPath = downloadDirectory + '/' + fullURL.split('/')[-1]
        file.retrieve(fullURL, fileDownloadPath)
        return fileDownloadPath
项目:Vulcan    作者:rfratila    | 项目源码 | 文件源码
def _download_file(file_path, folder='data'):
    print("Downloading {}...".format(file_path))

    test_file = urllib.URLopener()
    file_name = file_path.split('/')[-1]
    test_file.retrieve(file_path, '{}/{}'.format(folder, file_name))
项目:nerodia    作者:watir    | 项目源码 | 文件源码
def stop(self):
        self.stop_serving = True
        try:
            # This is to force stop the server loop
            urllib_request.URLopener().open('http://{}:{}'.format(self.host, self.port))
        except IOError:
            pass
        logging.info('Shutting down the webserver')
        self.thread.join()
项目:useless-scripts    作者:veerendra2    | 项目源码 | 文件源码
def download_csv():
    global SERVERS_LIST_FILE
    print Colors.OKBLUE+"Downloading Latest 'dnscrypt-resolvers.csv'.."+Colors.ENDC
    try:
        csv_file = urllib.URLopener()
        csv_file.retrieve("https://raw.githubusercontent.com/jedisct1/dnscrypt-proxy/master/dnscrypt-resolvers.csv", "/opt/dnscrypt-resolvers.csv")
    except:
        print Colors.WARNING+"Unable to download 'dnscrypt-resolvers.csv'. Using default /usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"+Colors.ENDC
        if os.path.exists("/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"):
            SERVERS_LIST_FILE="/usr/share/dnscrypt-proxy/dnscrypt-resolvers.csv"
        else:
            print Colors.FAIL+"Default csv file not found. Exiting.."+Colors.ENDC
            exit(2)
项目:CIDDS    作者:markusring    | 项目源码 | 文件源码
def getCurrentServerConfig():
    newConfigFile = urllib.URLopener()
    newConfigFile.retrieve("YOUR_SERVER_IP/scripts/automation/packages/system/serverconfig.ini", "/home/debian/serverconfig.ini")

# Configure backup server
项目:CIDDS    作者:markusring    | 项目源码 | 文件源码
def getCurrentServerConfig():
    newConfigFile = urllib.URLopener()
    newConfigFile.retrieve("YOUR_SERVER_IP/scripts/automation/packages/system/serverconfig.ini", "/home/debian/serverconfig.ini")

# Configure backup server
项目:CIDDS    作者:markusring    | 项目源码 | 文件源码
def getCurrentServerConfig():
    newConfigFile = urllib.URLopener()
    newConfigFile.retrieve("YOUR_SERVER_IP/scripts/automation/packages/system/serverconfig.ini", "/home/debian/serverconfig.ini")

# Configure backup server
项目:CIDDS    作者:markusring    | 项目源码 | 文件源码
def getCurrentServerConfig():
    newConfigFile = urllib.URLopener()
    newConfigFile.retrieve("YOUR_SERVER_IP/automation/packages/system/serverconfig.ini", "packages/system/serverconfig.ini")

# Configure different server services
项目:bandit-ss    作者:zeroSteiner    | 项目源码 | 文件源码
def test_urlopen():
    # urllib
    url = urllib.quote('file:///bin/ls')
    urllib.urlopen(url, 'blah', 32)
    urllib.urlretrieve('file:///bin/ls', '/bin/ls2')
    opener = urllib.URLopener()
    opener.open('file:///bin/ls')
    opener.retrieve('file:///bin/ls')
    opener = urllib.FancyURLopener()
    opener.open('file:///bin/ls')
    opener.retrieve('file:///bin/ls')

    # urllib2
    handler = urllib2.HTTPBasicAuthHandler()
    handler.add_password(realm='test',
                         uri='http://mysite.com',
                         user='bob')
    opener = urllib2.build_opener(handler)
    urllib2.install_opener(opener)
    urllib2.urlopen('file:///bin/ls')
    urllib2.Request('file:///bin/ls')

    # Python 3
    urllib.request.urlopen('file:///bin/ls')
    urllib.request.urlretrieve('file:///bin/ls', '/bin/ls2')
    opener = urllib.request.URLopener()
    opener.open('file:///bin/ls')
    opener.retrieve('file:///bin/ls')
    opener = urllib.request.FancyURLopener()
    opener.open('file:///bin/ls')
    opener.retrieve('file:///bin/ls')

    # Six
    six.moves.urllib.request.urlopen('file:///bin/ls')
    six.moves.urllib.request.urlretrieve('file:///bin/ls', '/bin/ls2')
    opener = six.moves.urllib.request.URLopener()
    opener.open('file:///bin/ls')
    opener.retrieve('file:///bin/ls')
    opener = six.moves.urllib.request.FancyURLopener()
    opener.open('file:///bin/ls')
    opener.retrieve('file:///bin/ls')
项目:tichu-tournament    作者:aragos    | 项目源码 | 文件源码
def datareader(url,opener=urllib.URLopener().open):
        return opener(url).read()
项目:tensorflow-art    作者:nitroventures    | 项目源码 | 文件源码
def assure_model_file(model_file):
  model_file_path = os.path.join(CHECKPOINTS_DIR, model_file)

  if not os.path.isfile(model_file_path):
    url_opener = urllib.URLopener()
    print "downloading " + model_file
    url_opener.retrieve(MODEL_URL + "/" + model_file, model_file_path)
项目:gui-o-matic    作者:mailpile    | 项目源码 | 文件源码
def _do(self, op, args):
        op, args = op.lower(), copy.copy(args)

        if op == 'show_url':
            self.show_url(url=args[0])

        elif op in ('get_url', 'post_url'):
            url = args.pop(0)
            base_url = '/'.join(url.split('/')[:3])

            uo = urllib.URLopener()
            for cookie, value in self.config.get('http_cookies', {}
                                                 ).get(base_url, []):
                uo.addheader('Cookie', '%s=%s' % (cookie, value))

            if op == 'post_url':
                (fn, hdrs) = uo.retrieve(url, data=args)
            else:
                (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)

            with open(fn, 'rb') as fd:
                data = fd.read().strip()

            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
                if 'message' in data:
                    self.notify_user(data['message'])

        elif op == "shell":
            try:
                for arg in args:
                    rv = os.system(arg)
                    if 0 != rv:
                        raise OSError(
                            'Failed with exit code %d: %s' % (rv, arg))
            except:
                traceback.print_exc()

        elif hasattr(self, op):
            getattr(self, op)(**(args or {}))
项目:Insanity-Framework    作者:Exploit-install    | 项目源码 | 文件源码
def download_python():
    """ Download python for some reason..? """
    banner("Downloading Python 2.7.x.msi, please wait...")
    urllib.URLopener().retrieve("https://www.python.org/ftp/python/2.7.12/python-2.7.12.msi")
    os.system('sudo wine msiexec /i python-2.7.12.msi /L*v log.txt')
    os.system('clear')
项目:Insanity-Framework    作者:Exploit-install    | 项目源码 | 文件源码
def download_python_win_exten():
    """ Download Windows extenstion for python without checking the checksum.. """
    banner("Downloading pywin32-220.win32-py2.7.exe (Windows extension), please wait...")
    urllib.URLopener().retrieve("https://ufpr.dl.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win32-py2.7.exe")
    os.system('sudo wine pywin32-220.win32-py2.7.exe')
    os.system(
        'sudo wine /root/.wine/drive_c/Python27/python.exe /root/.wine/drive_c/Python27/Scripts/pip.exe install pyinstaller')
    os.system(
        'sudo wine /root/.wine/drive_c/Python27/python.exe /root/.wine/drive_c/Python27/Scripts/pip.exe uninstall Crypto')
    os.system('sudo wine /root/.wine/drive_c/Python27/python.exe /root/.wine/drive_c/Python27/Scripts/pip.exe pycrypto')
    os.system('clear')
项目:Insanity-Framework    作者:Exploit-install    | 项目源码 | 文件源码
def download_vc_for_py():
    """ Download the VC extenstion for python, this is a little less scary because it's from MS """
    banner("Downloading VCForPython27.msi, please wait...")
    urllib.URLopener().retrieve("https://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi")
    os.system('wget https://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi')
    os.system('sudo wine msiexec /i VCForPython27.msi /L*v log2.txt')
    os.system('mkdir .OK')
    os.system('sudo rm -Rf log2.txt')
    os.system('sudo rm -Rf log.txt')
项目:python_learn    作者:jetty-guo    | 项目源码 | 文件源码
def save_file(url, file_name):
    urlopen = urllib.URLopener()
    fp = urlopen.open(url)
    data = fp.read()
    fp.close()
    file = open(path+file_name,'wb')
    file.write(data)
    file.close()
项目:deb-python-pysaml2    作者:openstack    | 项目源码 | 文件源码
def fetch_metadata(url, path, maxage=600):
    """
    :param url:  metadata remote location
    :param path: metdata file name
    :param maxage: if max age of existing metadata file (s) is exceeded,
     the file will be fetched from the remote location
    """
    fetch = False
    if not os.path.isfile(path):
        fetch = True
        logger.debug("metadata file %s not found", path)
    elif (os.path.getmtime(path) + maxage) < time.time():
        fetch = True
        logger.debug("metadata file %s from %s is more than %s s old",
                     path,
                     strftime("%Y-%m-%d %H:%M:%S", time.localtime(os.path.getmtime(path))),
                     maxage)
    else:
        logger.debug("metadata file %s is less than %s s old", path, maxage)
    if fetch:
        f=urllib.URLopener()
        try:
            f.retrieve(url, path)
            logger.debug("downloaded metadata from %s into %s", url, path)
        except:
            logger.debug("downloaded metadata from %s failed: %s",
                         url, sys.exc_info()[0])
项目:snap-to-cloud-examples    作者:synapse-wireless    | 项目源码 | 文件源码
def get_cafile(self):
        """Download a certificate to authenticate the identity of the AWS IoT platform."""
        authority_location = "https://www.symantec.com/content/en/us/enterprise/verisign/roots/VeriSign-Class%203-Public-Primary-Certification-Authority-G5.pem"

        url = urllib.URLopener()
        cafile = self.get_abs_path(CAFILE)
        url.retrieve(authority_location, cafile)
项目:Random    作者:rkramesh    | 项目源码 | 文件源码
def csvComment():
     'Module for data to be fetched and parsed into csv'
     print 'started'    
     with open(sortdata, 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               testfile.retrieve('http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
               with open(line+'.csv') as f:
                    columns = defaultdict(list) # each value in each column is appended to a list
                    reader = csv.DictReader(f) # read rows into a dictionary format
                    for row in reader: # read a row as {column1: value1, column2: value2,...}
                        for (k,v) in row.items(): # go over each column name and value
                            columns[k].append(v) # append the value into the appropriate list

                    d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action']))
                    print d
                    ##print rkdict
##                    for key, value in d.iteritems():
##                        if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED':
##                            writer = csv.writer(open('final.csv', 'ab'))
##                            for (key, value)in zip(d.items()):
##                                       writer.writerow([line, key, value ])
##                        else:
##                            print 'No Comments found for '+line
项目:Random    作者:rkramesh    | 项目源码 | 文件源码
def dictcsvFinalReview():
     print 'started'    
     with open(sortdata, 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               os.chdir(r'C:\Users\radhakrishnanr\Desktop\filescsv')
               testfile.retrieve('http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
               columns = defaultdict(list) # each value in each column is appended to a list
               with open(line+'.csv') as f:
                    reader = csv.DictReader(f) # read rows into a dictionary format
                    for row in reader: # read a row as {column1: value1, column2: value2,...}
                        for (k,v) in row.items(): # go over each column name and value
                            columns[k].append(v) # append the value into the appropriate list
                                                 # based on column name k

               d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action']))
               print d

##               for key, value in d.iteritems():
##                    if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED':
##                        
##                        writer = csv.writer(open('final.csv', 'ab'))
##                        for (key, value) in zip(d,line):
##                            writer.writerow([line, key])
##                    else:
##                         print 'No Comments found for '+line
项目:Random    作者:rkramesh    | 项目源码 | 文件源码
def csvComment():
     'Module for data to be fetched and parsed into csv'
     print 'started'    
     with open('sorted.txt', 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               testfile.retrieve('http://fisheye.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
项目:Random    作者:rkramesh    | 项目源码 | 文件源码
def csvFinalReview():
     print 'started'    
     with open('sorted.txt', 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               testfile.retrieve('http://fisheye.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
               columns = defaultdict(list) # each value in each column is appended to a list
               with open(line+'.csv') as f:
                    reader = csv.DictReader(f) # read rows into a dictionary format
                    for row in reader: # read a row as {column1: value1, column2: value2,...}
                        for (k,v) in row.items(): # go over each column name and value
                            columns[k].append(v) # append the value into the appropriate list
                                                 # based on column name k

               d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action']))
               for key, value in d.iteritems():
                    if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED':
                        print file,key
                        try:
                            os.remove(line+'.csv')
                        except IOError:
                            pass







#csvComment()
#csvReview()
项目:gps2tec    作者:weihan1107    | 项目源码 | 文件源码
def get_gimdata(self):
        import urllib, os
        if self.file_exist():
            print "No need to download GIM data..."
            return
        print "Start to download GIM data..."
        weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year)
        if not os.path.isfile(self.sourcefn[:-2]): 
            if not os.path.isfile(self.sourcefn):
                download = urllib.URLopener()
                download.retrieve(weblink+self.sourcefn, self.sourcefn)
            os.system("gzip -fd {0}".format(self.sourcefn))
项目:gps2tec    作者:weihan1107    | 项目源码 | 文件源码
def get_navidata(self):
        import urllib, os
        if self.file_exist():
            print "No need to download Navigation data..."
            return

        print "Start to download Navigation data..."
        if self.types in 'igslocal':
            weblink = "ftp://igscb.jpl.nasa.gov/pub/product/"
            if not (os.path.isfile(self.sourcefn_igs1) or os.path.isfile(self.sourcefn_igr1)):
                try:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks1, self.sourcefn_igs1), self.sourcefn_igs1)
                    self.sourcefn1 = self.sourcefn_igs1[:-2]
                except IOError:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks1, self.sourcefn_igr1), self.sourcefn_igr1)
                    self.sourcefn1 = self.sourcefn_igr1[:-2]
            if not (os.path.isfile(self.sourcefn_igs2) or os.path.isfile(self.sourcefn_igr2)):
                try:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks2, self.sourcefn_igs2), self.sourcefn_igs2)
                    self.sourcefn2 = self.sourcefn_igs2[:-2]
                except IOError:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks2, self.sourcefn_igr2), self.sourcefn_igr2)
                    self.sourcefn2 = self.sourcefn_igr2[:-2]
        elif self.types=='igsrt':
            weblink = "ftp://cddis.gsfc.nasa.gov/pub/gps/products/{0}/".format(self.sourcefn_igu[3:7])
            download = urllib.URLopener()
            download.retrieve(weblink+self.sourcefn_igu, self.sourcefn_igu)
            self.sourcefn = self.sourcefn_igu[:-2]
        os.system("gzip -fd *sp3.Z")
项目:muzi-scanner    作者:sdslabs    | 项目源码 | 文件源码
def save_image(self, url, path):
        """
        :param url:
        :param path:
        :return nothing:
        """
        image = urllib.URLopener()
        image.retrieve(url, path)
项目:VanillaML    作者:vinhkhuc    | 项目源码 | 文件源码
def download_file(url, local_path):
    dir_path = path.dirname(local_path)
    if not path.exists(dir_path):
        print("Creating the directory '%s' ..." % dir_path)
        os.makedirs(dir_path)

    urllib.URLopener().retrieve(url, local_path)
项目:T2B-framework    作者:pielco11    | 项目源码 | 文件源码
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
    else:
        fileHTTP.retrieve(url,fileName)

###### setup  EDIT
项目:T2B-framework    作者:pielco11    | 项目源码 | 文件源码
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        if os.path.isfile(url.split("/")[len(url.split("/"))-1]) == 1:
            newName = url.split("/")[len(url.split("/"))-1].split(".")[0]+"_."+url.split("/")[len(url.split("/"))-1].split(".")[1]
            fileHTTP.retrieve(url,newName)
            return " saved the file with the original name + \"_\""
        else:
            fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
            return " saved the file with the original name"
    else:
        fileHTTP.retrieve(url,fileName)
        return " saved the file with the given name"
项目:T2B-framework    作者:pielco11    | 项目源码 | 文件源码
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
    else:
        fileHTTP.retrieve(url,fileName)
项目:T2B-framework    作者:pielco11    | 项目源码 | 文件源码
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
    else:
        fileHTTP.retrieve(url,fileName)

###### setup  EDIT
项目:T2B-framework    作者:pielco11    | 项目源码 | 文件源码
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        if os.path.isfile(url.split("/")[len(url.split("/"))-1]) == 1:
            newName = url.split("/")[len(url.split("/"))-1].split(".")[0]+"_."+url.split("/")[len(url.split("/"))-1].split(".")[1]
            fileHTTP.retrieve(url,newName)
            return " saved the file with the original name + \"_\""
        else:
            fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
            return " saved the file with the original name"
    else:
        fileHTTP.retrieve(url,fileName)
        return " saved the file with the given name"
项目:libSigNetSim    作者:vincent-noel    | 项目源码 | 文件源码
def downloadSource(self):

        download_file = URLopener()
        download_file.retrieve(self.__url, self.__filename)
        self.__sourceAvailable = True
项目:metaqnn    作者:bowenbaker    | 项目源码 | 文件源码
def get_caltech101(save_dir=None, root_path=None):
    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        ctx = ssl.create_default_context()
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE

        print 'Downloading Caltech101 dataset...'
        tar_path = os.path.join(save_dir, "101_ObjectCategories.tar.gz")
        url = urllib.URLopener(context=ctx)
        url.retrieve("https://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz", tar_path)
        print 'Download Done, Extracting...'
        tar = tarfile.open(tar_path)
        tar.extractall(save_dir)
        tar.close()

    root = os.path.join(save_dir, "101_ObjectCategories") if not root_path else root_path

    train_x = []
    train_y = []
    val_x = []
    val_y = []

    label = 0
    for cls_folder in os.listdir(root):
        cls_root = os.path.join(root, cls_folder)
        if not os.path.isdir(cls_root):
            continue

        cls_images = [misc.imread(os.path.join(cls_root, img_name)) for img_name in os.listdir(cls_root)]
        cls_images = [np.repeat(np.expand_dims(img, 2), 3, axis=2) if len(img.shape) == 2 else img for img in cls_images]
        cls_images = np.array([np.reshape(misc.imresize(img, (224,224,3)), (3,224,224)) for img in cls_images])
        new_index = np.random.permutation(np.arange(cls_images.shape[0]))
        cls_images = cls_images[new_index, :, :, :]

        train_x.append(cls_images[:30])
        train_y.append(np.array([label]*30))
        if len(cls_images) <= 80:
            val_x.append(cls_images[30:])
            val_y.append(np.array([label]*(len(cls_images)-30)))
        else:
            val_x.append(cls_images[30:80])
            val_y.append(np.array([label]*50))
        label += 1

    Xtr = np.concatenate(train_x)
    Ytr = np.concatenate(train_y)
    Xval= np.concatenate(val_x)
    Yval= np.concatenate(val_y)

    print 'Xtr shape ', Xtr.shape
    print 'Ytr shape ', Ytr.shape
    print 'Xval shape ', Xval.shape
    print 'Yval shape ', Yval.shape

    return Xtr, Ytr, Xval, Yval
项目:metaqnn    作者:bowenbaker    | 项目源码 | 文件源码
def get_cifar10(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        print 'Downloading CIFAR10 dataset...'
        tar_path = os.path.join(save_dir, "cifar-10-python.tar.gz")
        url = urllib.URLopener()
        url.retrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", tar_path)
        print 'Download Done, Extracting...'
        tar = tarfile.open(tar_path)
        tar.extractall(save_dir)
        tar.close()

    root = os.path.join(save_dir, "cifar-10-batches-py") if not root_path else root_path


    # Training Data
    xs = []
    ys = []
    for b in range(1,6):
        f = os.path.join(root, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    print 'Xtrain shape', Xtr.shape
    print 'Ytrain shape', Ytr.shape

    # Testing data
    Xte, Yte = load_CIFAR_batch(os.path.join(root, 'test_batch'))
    print 'Xtest shape', Xte.shape
    print 'Ytest shape', Yte.shape

    return Xtr, Ytr, Xte, Yte
项目:metaqnn    作者:bowenbaker    | 项目源码 | 文件源码
def get_svhn(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        new_save_dir = os.path.join(save_dir, 'og_data')
        if not os.path.isdir(new_save_dir):
            os.mkdir(new_save_dir)
        train_mat = os.path.join(new_save_dir, "train_32x32.mat")
        test_mat =  os.path.join(new_save_dir, "test_32x32.mat")
        url = urllib.URLopener()

        print 'Downloading Svhn Train...'
        url.retrieve("http://ufldl.stanford.edu/housenumbers/train_32x32.mat", train_mat)
        print 'Downloading Svhn Test...'
        url.retrieve("http://ufldl.stanford.edu/housenumbers/test_32x32.mat", test_mat)


    root = new_save_dir if not root_path else root_path

    train = io.loadmat(os.path.join(root, 'train_32x32.mat'))
    Xtr = train['X']
    Ytr = train['y']
    del train

    test = io.loadmat(os.path.join(root, 'test_32x32.mat'))
    Xte = test['X']
    Yte = test['y']
    del test

    Xtr = np.transpose(Xtr, (3, 2, 0, 1))
    Xte = np.transpose(Xte, (3, 2, 0, 1))
    Ytr = Ytr.reshape(Ytr.shape[:1]) - 1
    Yte = Yte.reshape(Yte.shape[:1]) - 1

    print 'Xtrain shape', Xtr.shape
    print 'Ytrain shape', Ytr.shape
    print 'Xtest shape', Xte.shape
    print 'Ytest shape', Yte.shape

    return Xtr, Ytr, Xte, Yte
项目:metaqnn    作者:bowenbaker    | 项目源码 | 文件源码
def get_svhn_full(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    Xtr_small, Ytr_small, Xte, Yte = get_svhn(save_dir, root_path)

    if root_path is None:
        new_save_dir = os.path.join(save_dir, 'og_data')
        if not os.path.isdir(new_save_dir):
            os.mkdir(new_save_dir)
        extra_mat = os.path.join(new_save_dir, "extra_32x32.mat")
        url = urllib.URLopener()

        print 'Downloading Svhn Extra...'
        url.retrieve("http://ufldl.stanford.edu/housenumbers/extra_32x32.mat", extra_mat)

    root = new_save_dir if not root_path else root_path
    extra = io.loadmat(os.path.join(root, 'extra_32x32.mat'))
    Xtr_extra = extra['X']
    Ytr_extra = extra['y']

    Xtr_extra = np.transpose(Xtr_extra, (3, 2, 0, 1))
    Ytr_extra = Ytr_extra.reshape(Ytr_extra.shape[:1]) - 1

    print 'Xextra shape', Xtr_extra.shape
    print 'Yextra shape', Ytr_extra.shape


    val_x = []
    val_y = []
    train_x = []
    train_y = []
    for i in np.unique(Ytr_small):
        # Get 400 images from X_small
        X_small_label = Xtr_small[Ytr_small == i]
        val_x.append(X_small_label[:400])
        val_y.append([i]*400)
        train_x.append(X_small_label[400:])
        train_y.append([i]*(X_small_label.shape[0] - 400))
        # Get 200 images from X_small
        X_extra_label = Xtr_extra[Ytr_extra == i]
        val_x.append(X_extra_label[:200])
        val_y.append([i]*200)
        train_x.append(X_extra_label[200:])
        train_y.append([i]*(X_extra_label.shape[0] - 200))

    Xtr = np.concatenate(train_x)
    Ytr = np.concatenate(train_y)
    Xval = np.concatenate(val_x)
    Yval = np.concatenate(val_y)

    return Xtr, Ytr, Xval, Yval, Xte, Yte
项目:flickr_downloader    作者:Denisolt    | 项目源码 | 文件源码
def download():
    choice = (raw_input('Type "tag" or "album" for corresponding choice. \nDo you want to download images by tag or specific album: '))
    #counter is created in order to label the images when they are downloaded
    counter = 0

    if(choice == 'album'):
        albumID = int(raw_input('Enter the ID of the folder you wish to download: '))
        name = raw_input('Enter the username of the desired users pictures: ')
        # checking if the folder exists, creating a folder and moving into it
        if not os.path.exists(name+'/'+albumID):
            os.makedirs(name+'/'+albumID)
        os.chdir(name+'/'+albumID)

        print('Downloading...')
        # walk_set function loops through the pictures of a specific album
        for photo in flickr.walk_set(albumID):
            # beautiful soup opens up the direct link to the picture using authors id(name) and photo id, specifying sizes/k will
            # result in the highest quality picture available on flickr
            url = 'https://www.flickr.com/photos/'+ name+ '/' + photo.get('id') + '/sizes/k/'
            webpage = requests.get(url)
            soup = BeautifulSoup(webpage.text, 'html.parser')
            x = soup.findAll('img')
            # we read the html using soup and look for img, after which we look for src link and extract it
            for link in soup.find_all('img'):
                new = (link.get('src'))
                if(new.count(".jpg")) == 1:
                    #the link is downloaded using URLopener() and saved with 'photo + counter'
                    testfile = urllib.URLopener()
                    testfile.retrieve(new, 'photo' + str(counter) + '.jpg' )
                    counter = counter + 1

    elif(choice == 'tag'):
        tag = raw_input('Enter the tags(in format:tagName1,tagName2,tagName3 and etc): ')
        # checking if the folder exists, creating a folder and moving into it
        if not os.path.exists(tag):
            os.makedirs(tag)
        os.chdir(tag)
        # checking the total number of available pictures with the specific tag
        total = int(flickr.photos.search(tags=tag).find('photos').attrib['total'])          
        print('There are ' + str(total) + ' pictures found \nDownloading...')
        # walk_set function loops through the pictures with the tag for more info go to flickrapi python documentation
        for photo in flickr.walk(tag_mode='all', tags=tag):
            author =  photo.get('owner') # return the owner of the picture
            # beautiful soup opens up the direct link to the picture using authors id and photos id, specifying sizes/k will
            # result in the highest quality picture available on flickr
            url = 'https://www.flickr.com/photos/'+ author+ '/' + photo.get('id') + '/sizes/k/'
            webpage = requests.get(url)
            soup = BeautifulSoup(webpage.text, 'html.parser')
            x = soup.findAll('img')
            # we read the html using soup and look for img, after which we look for src link and extract it
            for link in soup.find_all('img'):
                new = (link.get('src'))
                if(new.count(".jpg")) == 1:
                    #the link is downloaded using URLopener() and saved with 'photo + counter'
                    testfile = urllib.URLopener()
                    testfile.retrieve(new, 'photo' + str(counter) + '.jpg' )
                    counter = counter + 1
    else:
        print('An Error appeared in your input. ')
        download()