我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pickle.load()。
def pickle_load(path, compression=False): """Unpickle a possible compressed pickle. Parameters ---------- path: str path to the output file compression: bool if true assumes that pickle was compressed when created and attempts decompression. Returns ------- obj: object the unpickled object """ if compression: with zipfile.ZipFile(path, "r", compression=zipfile.ZIP_DEFLATED) as myzip: with myzip.open("data") as f: return pickle.load(f) else: with open(path, "rb") as f: return pickle.load(f)
def directory_has_smart_contract(location): # returns bool if there is a tsol contract in said directory # probably makes more sense to put this inside of the tsol package code_path = glob.glob(os.path.join(location, '*.tsol')) example = glob.glob(os.path.join(location, '*.json')) assert len(code_path) > 0 and len(example) > 0, 'Could not find *.tsol and *.json files in provided directory.' # pop off the first file name and turn the code into a file object code = open(code_path[0]) # turn the example into a dict with open(example[0]) as e: example = json.load(e) try: tsol.compile(code, example) except Exception as e: print(e) return False return True
def main(): ''' Run code specifed by data received over pipe ''' assert is_forking(sys.argv) handle = int(sys.argv[-1]) fd = msvcrt.open_osfhandle(handle, os.O_RDONLY) from_parent = os.fdopen(fd, 'rb') process.current_process()._inheriting = True preparation_data = load(from_parent) prepare(preparation_data) self = load(from_parent) process.current_process()._inheriting = False from_parent.close() exitcode = self._bootstrap() exit(exitcode)
def get_item_history(self, prior_or_train, reconstruct = False, none_idx = 49689): filepath = self.cache_dir + './item_history_' + prior_or_train + '.pkl' if (not reconstruct) and os.path.exists(filepath): with open(filepath, 'rb') as f: item_history = pickle.load(f) else: up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True) item_history = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index() item_history.loc[item_history.order_number == 1, 'product_id'] = item_history.loc[item_history.order_number == 1, 'product_id'] + [none_idx] item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True) # accumulate item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].transform(pd.Series.cumsum) # get unique item list item_history['product_id'] = item_history['product_id'].apply(set).apply(list) item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True) # shift each group to make it history item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].shift(1) for row in item_history.loc[item_history.product_id.isnull(), 'product_id'].index: item_history.at[row, 'product_id'] = [none_idx] item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index() item_history.columns = ['user_id', 'history_items'] with open(filepath, 'wb') as f: pickle.dump(item_history, f, pickle.HIGHEST_PROTOCOL) return item_history
def get_taxi_stats(data_path=data_path): file_name = 'taxi_data_stats.p' path = data_path + file_name if not os.path.isfile(path): download(file_name, data_path=data_path) import pickle stats = pickle.load(open(path, 'r')) sum_X = stats['sum_X'] sum_X2 = stats['sum_X2'] n = float(stats['n']) X_mean = sum_X / n X_std = ((sum_X2 - (sum_X**2)/n)/(n-1))**0.5 X_mean = np.reshape(X_mean, [1, -1]) X_std = np.reshape(X_std, [1, -1]) return X_mean, X_std
def __init__(self, counts=None, calledfuncs=None, infile=None, callers=None, outfile=None): self.counts = counts if self.counts is None: self.counts = {} self.counter = self.counts.copy() # map (filename, lineno) to count self.calledfuncs = calledfuncs if self.calledfuncs is None: self.calledfuncs = {} self.calledfuncs = self.calledfuncs.copy() self.callers = callers if self.callers is None: self.callers = {} self.callers = self.callers.copy() self.infile = infile self.outfile = outfile if self.infile: # Try to merge existing counts file. try: counts, calledfuncs, callers = \ pickle.load(open(self.infile, 'rb')) self.update(self.__class__(counts, calledfuncs, callers)) except (IOError, EOFError, ValueError), err: print >> sys.stderr, ("Skipping counts file %r: %s" % (self.infile, err))
def from_snapshot(self, sess, sfile, nfile): print('Restoring model snapshots from {:s}'.format(sfile)) self.saver.restore(sess, sfile) print('Restored.') # Needs to restore the other hyper-parameters/states for training, (TODO xinlei) I have # tried my best to find the random states so that it can be recovered exactly # However the Tensorflow state is currently not available with open(nfile, 'rb') as fid: st0 = pickle.load(fid) cur = pickle.load(fid) perm = pickle.load(fid) cur_val = pickle.load(fid) perm_val = pickle.load(fid) last_snapshot_iter = pickle.load(fid) np.random.set_state(st0) self.data_layer._cur = cur self.data_layer._perm = perm self.data_layer_val._cur = cur_val self.data_layer_val._perm = perm_val return last_snapshot_iter
def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: try: roidb = pickle.load(fid) except: roidb = pickle.load(fid, encoding='bytes') print('{} gt roidb loaded from {}'.format(self.name, cache_file)) return roidb gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote gt roidb to {}'.format(cache_file)) return gt_roidb
def iter_keys_values(self, keys, inds=None, verbose=False): for key in keys: if key not in self.keys_: raise RuntimeError('Key %s not found in dataset. keys: %s' % (key, self.keys_)) idx, ii = 0, 0 total_chunks = len(self.meta_file_.chunks) inds = np.sort(inds) if inds is not None else None for chunk_idx, chunk in enumerate(progressbar(self.meta_file_.chunks, size=total_chunks, verbose=verbose)): data = AttrDict.load(self.get_chunk_filename(chunk_idx)) # if inds is None: items = (data[key] for key in keys) for item in izip(*items): yield item # else: # for i, item in enumerate(data[key]): # if inds[ii] == idx + i: # yield item # ii += 1 # if ii >= len(inds): break # idx += len(data[key])
def iterchunks(self, key, batch_size=10, verbose=False): if key not in self.keys_: raise RuntimeError('Key %s not found in dataset. keys: %s' % (key, self.keys_)) idx, ii = 0, 0 total_chunks = len(self.meta_file_.chunks) batch_chunks = grouper(range(len(self.meta_file_.chunks)), batch_size) for chunk_group in progressbar(batch_chunks, size=total_chunks / batch_size, verbose=verbose): items = [] # print key, chunk_group for chunk_idx in chunk_group: # grouper will fill chunks with default none values if chunk_idx is None: continue # Load chunk data = AttrDict.load(self.get_chunk_filename(chunk_idx)) for item in data[key]: items.append(item) yield items
def district_hash_map(data_frame): district_map_f = "cluster_map.pickle" district_map_f_path = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR, district_map_f) if not os.path.exists(district_map_f_path): create_hash_district_map_dict() # load the needed map file with open(district_map_f_path, "rb") as f: map_rule = pickle.load(f) # map the needed cols.. for i in range(len(data_frame.columns)): if "district_hash" in data_frame.columns[i]: # map the hash according to the map rule district_hash_col = data_frame.columns[i] data_frame[district_hash_col] = data_frame[district_hash_col].replace(map_rule) # after mapping, delete its hash str new_name = re.sub("_hash","",district_hash_col) data_frame.rename(columns={district_hash_col: new_name}, inplace = True) return data_frame ## input the dir you want to map the hash
def setup(self, config): """ Load existing data for given worker. :param config: Configuration object. :type config: ``dict`` """ self.path = os.path.join( config[helper.DATA_ROOT], '%s_buckets-%s.pickle' % (self.NAME, config[helper.WORKER_ID])) with open(self.path, 'a') as _: pass with open(self.path, 'rb') as inp: try: self.buckets = pickle.load(inp) except Exception: self.buckets = {} config_related = config.get(helper.RELATED, {}).get(self.NAME, {}) self.min_score = config_related.get(helper.MIN_SCORE, 0.4) self.min_shared = config_related.get(helper.MIN_SHARED, 5) self.max_results = config_related.get(helper.MAX_RESULTS, 100)
def load_all(self, config): """ Load all existing data. :param config: Configuration object. :type config: ``dict`` """ self.buckets = {} for path in glob.glob(os.path.join( config[helper.DATA_ROOT], '%s_buckets-*.pickle' % self.NAME)): with open(path, 'rb') as inp: try: for key, value in pickle.load(inp).items(): if key in self.buckets: self.buckets[key]['bins'].update(value['bins']) else: self.buckets[key] = value except: logging.warning('could not load related_%s data', self.NAME)
def build_data_dict(self, layer_features, k=5): """ This build dict[id] = {label, spacing, 1={loc, p, layer1_feature, layer2_feature...}, 2={}...} :param layer_features: features from layer, e.g 67, 77 :param k: number of nodule considered as inputs :return: a combined dictionary """ with open(self.pkl_dir + self.data_file_name, 'rb') as data_file: data = cPickle.load(data_file) with open(self.pkl_dir + self.feature_file_name, 'rb') as feature_file: features = cPickle.load(feature_file) data_dict = {} for d,f in zip(data, features): pid = d['id'] data_dict[pid] = {'label':d['label'], 'spacing':d['spacing']} # add the features for i in range(k): data_dict[pid][i] = {'loc': f['loc_{}'.format(i)], 'p': f['p_{}'.format(i)]} for layer in layer_features: data_dict[pid][i][layer] = f['out_{}_{}'.format(i, layer)] return data_dict
def next_batch(self, batch_size): assert self.train_mode or self.validation_mode, "Please set mode, train, validation or test. e.g. DataLoad.train()" idx_next_batch = [(self.current_idx + i)%len(self.p_imgs) for i in range(self.batch_size)] patient_img_next_batch = [ self.p_imgs[idx] for idx in idx_next_batch] batch_image = [] batch_mask = [] for image in patient_img_next_batch: fi = gzip.open(self.data_path + image, 'rb') img = pickle.load(fi) img = np.expand_dims(img, axis=2) batch_image.append(img) fi.close() fm = gzip.open(self.mask_path + image, 'rb') mask = pickle.load(fm) fm.close() mask_binary_class = np.zeros([mask.shape[0],mask.shape[1],2]) mask_binary_class[:,:,0][mask == 0] = 1 mask_binary_class[:,:,1][mask == 1] = 1 batch_mask.append(mask_binary_class) self.current_idx = (self.current_idx + batch_size) % len(self.p_imgs) batched_image = np.stack(batch_image) batched_mask = np.stack(batch_mask) return batched_image, batched_mask
def sent_tokenize(text, lang='english'): """ Punkt sentence tokenizer from NLTK. """ global _nltk_sent_tokenizer try: _nltk_sent_tokenizer except NameError: # If the sentence tokenizer wasn't previously initialized. available_languages = ['czech', 'danish', 'dutch', 'english', 'estonian', 'finnish', 'french', 'german', 'greek', 'italian', 'norwegian', 'polish', 'portuguese', 'slovene', 'spanish', 'swedish', 'turkish'] assert lang in available_languages, "Punkt Tokenizer for {} not available".format(lang) # Checks that the punkt tokenizer model was previously downloaded. download('punkt', quiet=True) path_to_punkt = _nltk_downloader._download_dir + '/tokenizers/punkt/{}.pickle'.format(lang) with open(path_to_punkt, 'rb') as fin: _nltk_sent_tokenizer = pickle.load(fin) # Actual tokenization using the Punkt Model. return _nltk_sent_tokenizer.tokenize(text)
def read_fakelc(fakelcfile): ''' This just reads a pickled fake LC. ''' try: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd) except UnicodeDecodeError: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd, encoding='latin1') return lcdict ####################### ## UTILITY FUNCTIONS ## #######################
def read_pklc(lcfile): ''' This just reads a pickle. ''' try: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd) except UnicodeDecodeError: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd, encoding='latin1') return lcdict # LC format -> [default fileglob, function to read LC format]
def read_pklc(lcfile): ''' This just reads a pickle. ''' try: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd) except UnicodeDecodeError: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd, encoding='latin1') return lcdict # these translate filter operators given as strings to Python operators
def register(self, name, serializer): """Register ``serializer`` object under ``name``. Raises :class:`AttributeError` if ``serializer`` in invalid. .. note:: ``name`` will be used as the file extension of the saved files. :param name: Name to register ``serializer`` under :type name: ``unicode`` or ``str`` :param serializer: object with ``load()`` and ``dump()`` methods """ # Basic validation getattr(serializer, 'load') getattr(serializer, 'dump') self._serializers[name] = serializer
def get(self, sid): if not self.is_valid_key(sid): return self.new() try: f = open(self.get_session_filename(sid), 'rb') except IOError: if self.renew_missing: return self.new() data = {} else: try: try: data = load(f) except Exception: data = {} finally: f.close() return self.session_class(data, sid, False)
def find_neighbor(): h2c = pickle.load(open("comps/mobike/sol_carl/data/h2c.p","rb")) c2h = pickle.load(open("comps/mobike/sol_carl/data/c2h.p","rb")) print(len(h2c),len(c2h)) lc = [len(c2h[i]) for i in c2h] #distribution(lc) #point = list(h2c.keys())[0] point = "wx4snhx" print("hash", point, h2c[point]) lat,lon = h2c[point] #lat,lon = int(lat+0.5),int(lon+0.5) points = c2h[(lat,lon)] for la in [lat-0.01,lat,lat+0.01]: for lo in [lon-0.01,lon,lon+0.01]: coord = (la,lo) points = c2h.get(coord,[]) for p in points: d = geo_distance(h2c[p],(lat,lon)) print(coord,p,d)
def get_per_sample_tf(self, texts, field, silent=0): """ Each sample is a document. Input: texts: ["train","text"] """ if self.sample_tf is not None: return self.sample_tf = {} self.get_per_sample_words_count(texts, field, 1) for text in texts: name = "{}/{}_sample_tf_{}.p".format(self.flags.data_path,self.name,text) if os.path.exists(name): self.sample_tf[text] = pickle.load(open(name,'rb')) else: print("gen",name) tf_list = tf(self.sample_words_count[text],0) pickle.dump(tf_list,open(name,'wb')) self.sample_tf[text] = tf_list if silent==0: print("\n{} sample tf done".format(text))
def mean_target_rate(name,out,idcol,ycol): if os.path.exists(out): return pickle.load(open(out,'rb')) yc,cc = defaultdict(float),defaultdict(float) for c,row in enumerate(csv.DictReader(open(name))): y = float(row[ycol]) for i in row: if i in [idcol,ycol]: continue v = "%s-%s"%(i,row[i]) yc[v] += y cc[v] += 1.0 if c>0 and c%100000 == 0: print("rows %d len_cc %d"%(c,len(cc))) for i in yc: yc[i] = yc[i]/cc[i] pickle.dump(yc,open(out,'wb')) return yc
def tutor_fpout(): pklout = os.path.join(RESDIR, TUTORPKL) if os.path.exists(pklout): with open(pklout, 'rb') as f: fpout = pickle.load(f) else: print('re-creating fp results ... this could take a few minutes') zip_archive = os.path.join(DATADIR, ZIPFILE) with zipfile.ZipFile(zip_archive, 'r') as zfile: zfile.extractall(DATADIR) fpout = tutor_example() make_clean_dat() os.makedirs(RESDIR, exist_ok=True) with open(pklout, 'wb') as f: pickle.dump(fpout, f) return fpout
def load_egg(filepath): """ Loads pickled egg Parameters ---------- filepath : str Location of pickled egg Returns ---------- egg : Egg data object A loaded unpickled egg """ with open(filepath, 'rb') as f: egg = pickle.load(f) return egg
def unpickle_cookies(args, alias=None): """ Unpickles the cookies file for the given alias and returns the original object. If no file exists, then an empty cookies object is returned. """ if alias is None: alias = args.alias cookie_file = os.path.join( get_working_dir(args), alias + ".cookies") try: with open(cookie_file, "rb") as cookie_jar: cookies = pickle.load(cookie_jar) except BaseException: cookies = requests.cookies.RequestsCookieJar() return cookies
def loadData (self, filename, verbose=True, replace_missing=True): ''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'format' not in self.info.keys(): self.getFormatData(filename) if 'feat_num' not in self.info.keys(): self.getNbrFeatures(filename) data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse} data = data_func[self.info['format']](filename, self.info['feat_num']) # INPORTANT: when we replace missing values we double the number of variables if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)): vprint (verbose, "Replace missing values by 0 (slow, sorry)") data = data_converter.replace_missing(data) if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(data) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return data
def loadLabel (self, filename, verbose=True): ''' Get the solution/truth values''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'task' not in self.info.keys(): self.getTypeProblem(filename) # IG: Here change to accommodate the new multiclass label format if self.info['task'] == 'multilabel.classification': label = data_io.data(filename) elif self.info['task'] == 'multiclass.classification': label = data_converter.convert_to_num(data_io.data(filename)) else: label = np.ravel(data_io.data(filename)) # get a column vector #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(label) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return label
def generate(location): # cli wizard for creating a new contract from a template if directory_has_smart_contract(location): example_payload = json.load(open(glob.glob(os.path.join(location, '*.json'))[0])) print(example_payload) for k, v in example_payload.items(): value = input(k + ':') if value != '': example_payload[k] = value print(example_payload) code_path = glob.glob(os.path.join(location, '*.tsol')) tsol.compile(open(code_path[0]), example_payload) print('Code compiles with new payload.') selection = '' while True: selection = input('(G)enerate Solidity contract or (E)xport implementation:') if selection.lower() == 'g': output_name = input('Name your contract file without an extension:') code = tsol.generate_code(open(code_path[0]).read(), example_payload) open(os.path.join(location, '{}.sol'.format(output_name)), 'w').write(code) break if selection.lower() == 'e': output_name = input('Name your implementation file without an extension:') json.dump(example_payload, open(os.path.join(location, '{}.json'.format(output_name)), 'w')) break else: print('Provided directory does not contain a *.tsol and *.json or does not compile.')
def load(cls, file_obj): """Load serialized object from open JSON file. .. versionadded:: 1.8 :param file_obj: file handle :type file_obj: ``file`` object :returns: object loaded from JSON file :rtype: object """ return json.load(file_obj)
def load(cls, file_obj): """Load serialized object from open pickle file. .. versionadded:: 1.8 :param file_obj: file handle :type file_obj: ``file`` object :returns: object loaded from pickle file :rtype: object """ return cPickle.load(file_obj)
def _load(self): """Load cached settings from JSON file `self._filepath`.""" self._nosave = True d = {} with open(self._filepath, 'rb') as file_obj: for key, value in json.load(file_obj, encoding='utf-8').items(): d[key] = value self.update(d) self._original = deepcopy(d) self._nosave = False
def cached_data(self, name, data_func=None, max_age=60): """Return cached data if younger than ``max_age`` seconds. Retrieve data from cache or re-generate and re-cache data if stale/non-existant. If ``max_age`` is 0, return cached data no matter how old. :param name: name of datastore :param data_func: function to (re-)generate data. :type data_func: ``callable`` :param max_age: maximum age of cached data in seconds :type max_age: ``int`` :returns: cached data, return value of ``data_func`` or ``None`` if ``data_func`` is not set """ serializer = manager.serializer(self.cache_serializer) cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer)) age = self.cached_data_age(name) if (age < max_age or max_age == 0) and os.path.exists(cache_path): with open(cache_path, 'rb') as file_obj: self.logger.debug('Loading cached data from : %s', cache_path) return serializer.load(file_obj) if not data_func: return None data = data_func() self.cache_data(name, data) return data
def read_data(path): with open(path, 'rb') as f: data = pickle.load(f) return data
def unpickle(file): fo = open(file, 'rb') d = pickle.load(fo, encoding='latin1') fo.close() return {'x': np.cast[th.config.floatX]((-127.5 + d['data'].reshape((10000,3,32,32)))/128.), 'y': np.array(d['labels']).astype(np.uint8)}
def unpickle(file): import pickle fo = open(file, 'rb') dict = pickle.load(fo, encoding='latin1') fo.close() return dict
def to_categorical(y, nb_classes): y = np.asarray(y, dtype='int32') if not nb_classes: nb_classes = np.max(y)+1 Y = np.zeros((len(y), nb_classes)) for i in range(len(y)): Y[i, y[i]] = 1. return Y # load training and testing data
def load_batch(fpath): with open(fpath, 'rb') as f: d = pickle.load(f, encoding='latin1') data = d["data"] labels = d["labels"] return data, labels
def load_model(self, epoch=None): ''' Loads a saved model. If epoch id is provided, will load the corresponding model. Or else, will load the best model. ''' if not epoch: self.model = load_model("%s.model" % self.model_name_prefix, custom_objects=self.custom_objects) else: self.model = load_model("%s_%d.model" % (self.model_name_prefix, epoch), custom_objects=self.custom_objects) self.model.summary() self.data_processor = pickle.load(open("%s.dataproc" % self.model_name_prefix, "rb"))
def load_model(self, epoch=None): self.label_map = pickle.load(open("%s.label_map" % self.model_name_prefix, "rb")) super(PPRelationModel, self).load_model(epoch)
def load_model(self, epoch=None): ''' Loads a saved model. If epoch id is provided, will load the corresponding model. Or else, will load the best model. ''' if not epoch: self.model = load_model("%s.model" % self.model_name_prefix, custom_objects=self.custom_objects) else: self.model = load_model("%s_%d.model" % (self.model_name_prefix, epoch), custom_objects=self.custom_objects) self.data_processor = pickle.load(open("%s.dataproc" % self.model_name_prefix, "rb")) self.label_map = pickle.load(open("%s.labelmap" % self.model_name_prefix, "rb"))
def get_users_orders(self, prior_or_train): ''' get users' prior detailed orders ''' if os.path.exists(self.cache_dir + 'users_orders.pkl'): with open(self.cache_dir + 'users_orders.pkl', 'rb') as f: users_orders = pickle.load(f) else: orders = self.get_orders() order_products_prior = self.get_orders_items(prior_or_train) users_orders = pd.merge(order_products_prior, orders[['user_id', 'order_id', 'order_number', 'days_up_to_last']], on = ['order_id'], how = 'left') with open(self.cache_dir + 'users_orders.pkl', 'wb') as f: pickle.dump(users_orders, f, pickle.HIGHEST_PROTOCOL) return users_orders
def get_users_products(self, prior_or_train): ''' get users' all purchased products ''' if os.path.exists(self.cache_dir + 'users_products.pkl'): with open(self.cache_dir + 'users_products.pkl', 'rb') as f: users_products = pickle.load(f) else: users_products = self.get_users_orders(prior_or_train)[['user_id', 'product_id']].drop_duplicates() users_products['product_id'] = users_products.product_id.astype(int) users_products['user_id'] = users_products.user_id.astype(int) users_products = users_products.groupby(['user_id'])['product_id'].apply(list).reset_index() with open(self.cache_dir + 'users_products.pkl', 'wb') as f: pickle.dump(users_products, f, pickle.HIGHEST_PROTOCOL) return users_products
def get_baskets(self, prior_or_train, reconstruct = False, reordered = False, none_idx = 49689): ''' get users' baskets ''' if reordered: filepath = self.cache_dir + './reorder_basket_' + prior_or_train + '.pkl' else: filepath = self.cache_dir + './basket_' + prior_or_train + '.pkl' if (not reconstruct) and os.path.exists(filepath): with open(filepath, 'rb') as f: up_basket = pickle.load(f) else: up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True) uid_oid = up[['user_id', 'order_number']].drop_duplicates() up = up[up.reordered == 1][['user_id', 'order_number', 'product_id']] if reordered else up[['user_id', 'order_number', 'product_id']] up_basket = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index() up_basket = pd.merge(uid_oid, up_basket, on = ['user_id', 'order_number'], how = 'left') for row in up_basket.loc[up_basket.product_id.isnull(), 'product_id'].index: up_basket.at[row, 'product_id'] = [none_idx] up_basket = up_basket.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index() up_basket.columns = ['user_id', 'reorder_basket'] if reordered else ['user_id', 'basket'] #pdb.set_trace() with open(filepath, 'wb') as f: pickle.dump(up_basket, f, pickle.HIGHEST_PROTOCOL) return up_basket
def donations(filename='donationdata.pickle'): try: print("donation data pickled already. Grabbing data from donationdata.picke") with open(filename, 'rb') as handle: donations = pickle.load(handle) return donations except EOFError: print("donation data not pickled, grabbing directly from FEC and ProPublica APIs") donations = donations_helper() with open(filename, 'wb') as handle: pickle.dump(donations, handle, protocol=pickle.HIGHEST_PROTOCOL) return donations
def __init__(self, sh_run): self.sh_run = sh_run.split('\n') self.push_back = None self.index = 0 self.cache_misses = [] self.saved_cli_lines = [] try: self.cache = pickle.load(open(CACHE_PATH, 'rb')) print('LOADED {} items from the MAP CACHE'.format(len(self.cache))) except Exception: self.cache = {}
def get_obj_from_file(file_name): """ Questo metodo carica un oggetto dal file_name e lo restituisce. Se ci sono errori, ritorna -1 :param file_name: file da cui caricare l'oggetto :return: oggetto caricato dal file """ try: file = open(file_name,'rb') object_file = pickle.load(file) file.close() return object_file except Exception, e: print e return -1