我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用cPickle.load()。
def _load_data(self): """ Load data only if the present data is not checkpointed, else, just load the checkpointed data :return: None """ self.mapper = Mapper() self.mapper.generate_vocabulary(self.review_summary_file) self.X_fwd, self.X_bwd, self.Y = self.mapper.get_tensor(reverseflag=True) # Store all the mapper values in a dict for later recovery self.mapper_dict = dict() self.mapper_dict['seq_length'] = self.mapper.get_seq_length() self.mapper_dict['vocab_size'] = self.mapper.get_vocabulary_size() self.mapper_dict['rev_map'] = self.mapper.get_reverse_map() # Split into test and train data self._split_train_tst()
def _load_data(self): """ Load data only if the present data is not checkpointed, else, just load the checkpointed data :return: None """ self.mapper = Mapper() self.mapper.generate_vocabulary(self.review_summary_file) self.X, self.Y = self.mapper.get_tensor() # Store all the mapper values in a dict for later recovery self.mapper_dict = dict() self.mapper_dict['seq_length'] = self.mapper.get_seq_length() self.mapper_dict['vocab_size'] = self.mapper.get_vocabulary_size() self.mapper_dict['rev_map'] = self.mapper.get_reverse_map() # Split into test and train data self._split_train_tst()
def load_from_corpus(cls, reader, remake=False, src_or_tgt="src"): vocab_fname = reader.fname+".vocab-"+reader.mode+"-"+src_or_tgt if not remake and os.path.isfile(vocab_fname): return Vocab.load(vocab_fname) else: v = Vocab() count = 0 # count of sentences for item in reader: toklist = item for token in toklist: v.add(token) count += 1 if count % 10000 == 0: print("...", count, end="") print("\nSaving " + src_or_tgt + " vocab of size", v.size) v.START_TOK = v[reader.begin] if reader.begin is not None else None v.END_TOK = v[reader.end] if reader.end is not None else None v.save(vocab_fname) return v #### reader class
def _load_builder(path): # lifted straight from /usr/bin/swift-ring-builder from swift.common.ring import RingBuilder try: builder = pickle.load(open(path, 'rb')) if not hasattr(builder, 'devs'): builder_dict = builder builder = RingBuilder(1, 1, 1) builder.copy_from(builder_dict) except ImportError: # Happens with really old builder pickles builder = RingBuilder(1, 1, 1) builder.copy_from(pickle.load(open(path, 'rb'))) for dev in builder.devs: if dev and 'meta' not in dev: dev['meta'] = '' return builder
def _load_accumulators(self, main_loop): """Nasty method, use carefully""" for cg_name, model in main_loop.models.iteritems(): source = numpy.load(self.path_to_accumulators.format(cg_name)) accums_dict = {name.replace("-", "/"): value for name, value in source.items()} source.close() algo = main_loop.algorithm.algorithms[cg_name] model_params = model.get_params() steps = algo.steps.items() for pidx in xrange(len(steps)): # Get parameter name and its accumulators p = steps[pidx][0] name = [k for k, v in model_params.iteritems() if v == p][0] accums = accums_dict[name] # This is num_accums_per_param col = len(accums) for aidx in xrange(col): algo.step_rule_updates[pidx*col+aidx][0].set_value( accums[aidx])
def _load_accumulators(self, main_loop): """Load accumulators with some checks.""" for cg_name, model in main_loop.models.iteritems(): # Load accumulators accum_filename = self.path_to_accumulators.format(cg_name) if not os.path.isfile(accum_filename): logger.error(" Accumulators file does not exist [{}]" .format(accum_filename)) continue source = numpy.load(accum_filename) accums_to_load = {k: v for k, v in source.items()} source.close() algo = main_loop.algorithm.algorithms[cg_name] accums = algo.step_rule_updates # Set accumulators for acc in accums: try: acc.set_value(accums_to_load[acc.name]) except: logger.error(" Could not load {}".format(acc.name))
def get_dev_streams(config): """Setup development set stream if necessary.""" dev_streams = {} for cg in config['cgs']: if 'val_sets' in config and cg in config['val_sets']: logger.info('Building development stream for cg:[{}]'.format(cg)) eid = p_(cg)[0] dev_file = config['val_sets'][cg] # Get dictionary and fix EOS dictionary = cPickle.load(open(config['src_vocabs'][eid])) dictionary['<S>'] = 0 dictionary['<UNK>'] = config['unk_id'] dictionary['</S>'] = config['src_eos_idxs'][eid] # Get as a text file and convert it into a stream dev_dataset = TextFile([dev_file], dictionary, None) dev_streams[cg] = DataStream(dev_dataset) return dev_streams
def save_training_info(values, path): """ Gets a set of values as dictionary and append them to a log file. stores in <path>/train_log.pkl """ file_name = os.path.join(path, __train_log_file_name) try: with open(file_name, "rb") as f: log = pickle.load(f) except IOError: # first time log = {} for k in values.keys(): log[k] = [] for k, v in values.items(): log[k].append(v) with open(file_name, "wb") as f: pickle.dump(log, f)
def plot_traing_info(x, ylist, path): """ Loads log file and plot x and y values as provided by input. Saves as <path>/train_log.png """ file_name = os.path.join(path, __train_log_file_name) try: with open(file_name, "rb") as f: log = pickle.load(f) except IOError: # first time warnings.warn("There is no {} file here!!!".format(file_name)) return plt.figure() x_vals = log[x] for y in ylist: y_vals = log[y] if len(y_vals) != len(x_vals): warning.warn("One of y's: {} does not have the same length as x:{}".format(y, x)) plt.plot(x_vals, y_vals, label=y) # assert len(y_vals) == len(x_vals), "not the same len" plt.xlabel(x) plt.legend() #plt.show() plt.savefig(file_name[:-3]+'png', bbox_inches='tight') plt.close('all')
def from_snapshot(self, sess, sfile, nfile): print('Restoring model snapshots from {:s}'.format(sfile)) self.saver.restore(sess, sfile) print('Restored.') # Needs to restore the other hyper-parameters/states for training, (TODO xinlei) I have # tried my best to find the random states so that it can be recovered exactly # However the Tensorflow state is currently not available with open(nfile, 'rb') as fid: st0 = pickle.load(fid) cur = pickle.load(fid) perm = pickle.load(fid) cur_val = pickle.load(fid) perm_val = pickle.load(fid) last_snapshot_iter = pickle.load(fid) np.random.set_state(st0) self.data_layer._cur = cur self.data_layer._perm = perm self.data_layer_val._cur = cur_val self.data_layer_val._perm = perm_val return last_snapshot_iter
def build_data_dict(self, layer_features, k = 5): with open(self.pkl_dir + self.data_file_name, 'rb') as data_file: data = cPickle.load(data_file) with open(self.pkl_dir + self.feature_file_name, 'rb') as feature_file: features = cPickle.load(feature_file) data_dict = {} for d,f in zip(data, features): pid = d['id'] data_dict[pid] = {'label':d['label'], 'spacing':d['spacing']} # add the features for i in range(k): data_dict[pid]['loc_{}'.format(i)] = f['loc_{}'.format(i)] data_dict[pid]['p_{}'.format(i)] = f['p_{}'.format(i)] for layer in layer_features: data_dict[pid]['out_{}_{}'.format(i, layer)] = f['out_{}_{}'.format(i, layer)] return data_dict
def build_data_dict(self, layer_features, k=5): """ This build dict[id] = {label, spacing, 1={loc, p, layer1_feature, layer2_feature...}, 2={}...} :param layer_features: features from layer, e.g 67, 77 :param k: number of nodule considered as inputs :return: a combined dictionary """ with open(self.pkl_dir + self.data_file_name, 'rb') as data_file: data = cPickle.load(data_file) with open(self.pkl_dir + self.feature_file_name, 'rb') as feature_file: features = cPickle.load(feature_file) data_dict = {} for d,f in zip(data, features): pid = d['id'] data_dict[pid] = {'label':d['label'], 'spacing':d['spacing']} # add the features for i in range(k): data_dict[pid][i] = {'loc': f['loc_{}'.format(i)], 'p': f['p_{}'.format(i)]} for layer in layer_features: data_dict[pid][i][layer] = f['out_{}_{}'.format(i, layer)] return data_dict
def read_pklc(lcfile): ''' This just reads a pickle. ''' try: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd) except UnicodeDecodeError: with open(lcfile,'rb') as infd: lcdict = pickle.load(infd, encoding='latin1') return lcdict # these translate filter operators given as strings to Python operators
def register(self, name, serializer): """Register ``serializer`` object under ``name``. Raises :class:`AttributeError` if ``serializer`` in invalid. .. note:: ``name`` will be used as the file extension of the saved files. :param name: Name to register ``serializer`` under :type name: ``unicode`` or ``str`` :param serializer: object with ``load()`` and ``dump()`` methods """ # Basic validation getattr(serializer, 'load') getattr(serializer, 'dump') self._serializers[name] = serializer
def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb # gt_roidb = [self._load_pascal_annotation(index) gt_roidb = [self._load_pascal_labels(index) for index in self.image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
def selective_search_IJCV_roidb(self): """ Return the database of selective search regions of interest. Ground-truth ROIs are also included. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, '{:s}_selective_search_IJCV_top_{:d}_roidb.pkl'. format(self.name, self.config['top_k'])) if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = self.gt_roidb() ss_roidb = self._load_selective_search_IJCV_roidb(gt_roidb) roidb = datasets.imdb.merge_roidbs(gt_roidb, ss_roidb) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb
def load(batch_size, test_batch_size, n_labelled=None): filepath = '/tmp/mnist.pkl.gz' url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' if not os.path.isfile(filepath): print "Couldn't find MNIST dataset in /tmp, downloading..." urllib.urlretrieve(url, filepath) with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f: train_data, dev_data, test_data = pickle.load(f) return ( mnist_generator(train_data, batch_size, n_labelled), mnist_generator(dev_data, test_batch_size, n_labelled), mnist_generator(test_data, test_batch_size, n_labelled) )
def parse_standard_tfmeta(paths): meta_list = [] for path in paths: if isstring(path): if path.startswith('meta') and path.endswith('.pkl'): mpaths = [path] else: assert os.path.isdir(path) mpaths = filter(lambda x: x.startswith('meta') and x.endswith('.pkl'), os.listdir(path)) mpaths = [os.path.join(path, mp) for mp in mpaths] else: # in this case, it's a list assert isinstance(path, list) mpaths = path d = {} for mpath in mpaths: d.update(cPickle.load(open(mpath))) meta_list.append(d) return meta_list
def loadDepthMap(self,filename): """ Read a depth-map :param filename: file name to load :return: image data of depth image """ img = Image.open(filename) # top 8 bits of depth are packed into green channel and lower 8 bits into blue assert len(img.getbands()) == 3 r, g, b = img.split() r = np.asarray(r, np.int32) g = np.asarray(g, np.int32) b = np.asarray(b, np.int32) dpt = np.bitwise_or(np.left_shift(g, 8), b) imgdata = np.asarray(dpt, np.float32) return imgdata
def read_pickle(self,filename): try: import cPickle as pickle except ImportError: import pickle in_f = open(filename,"rb") tabversion = pickle.load(in_f) if tabversion != __tabversion__: raise VersionError("yacc table file version is out of date") self.lr_method = pickle.load(in_f) signature = pickle.load(in_f) self.lr_action = pickle.load(in_f) self.lr_goto = pickle.load(in_f) productions = pickle.load(in_f) self.lr_productions = [] for p in productions: self.lr_productions.append(MiniProduction(*p)) in_f.close() return signature # Bind all production function names to callable objects in pdict
def copy_images_for_classification(): ground_truth_dates = pickle.load(open(data_dir + 'ground_truth_dates.pickle', "rb")) ground_truth_dates = sorted(ground_truth_dates, key=lambda x: x[3], reverse=False) if not os.path.exists(classify_dir): os.mkdir(classify_dir) for seed_id, coin_id, result, labeled_date, bad_angle, bad_image in ground_truth_dates: if labeled_date < 1900: continue dir = crop_dir + str(coin_id / 100) + '/' new_dir = classify_dir + str(labeled_date) + '/' if not os.path.exists(new_dir): os.mkdir(new_dir) for image_id in range(0,57): filename = str(coin_id).zfill(5) + str(image_id).zfill(2) + '.png' old_filename = dir + filename new_filename = new_dir + filename shutil.copyfile(old_filename,new_filename)
def get_single_lmdb_filedata(seed_id, max_value_cutoff): seeds = pickle.load(open(data_dir + 'seed_data.pickle', "rb")) filedata = [] values = seeds[seed_id] # this is handy for large groups (heads,tails) # values.sort(key=lambda x: x[0], reverse=True) # best_results_by_angle_group = {} # for max_value, angle, image_id in values: # rounded_angle = int(round(angle / 5) * 5) # if not rounded_angle in best_results_by_angle_group.keys(): # best_results_by_angle_group[rounded_angle] = [max_value, angle, image_id] # else: # if max_value > best_results_by_angle_group[rounded_angle][0]: # best_results_by_angle_group[rounded_angle] = [max_value, angle, image_id] # values = best_results_by_angle_group.values() filedata.append([seed_id, crop_dir + str(seed_id) + '.png', 0]) for image_id, test_values in values.iteritems(): max_value, angle = test_values if max_value > max_value_cutoff: filedata.append([image_id, crop_dir + str(image_id) + '.png', angle]) return filedata
def create_new_indexes(total_new_seed_imgs, total_new_test_imgs): seeds = pickle.load(open(data_dir + 'seed_data.pickle', "rb")) seed_image_ids = [] test_image_ids = [] count = 0 for seed_image_id, values in seeds.iteritems(): values.sort(key=lambda x: x[0], reverse=False) # seed_image_ids.append(values[0:total_new_seed_imgs][2]) # test_image_ids.append(values[total_new_seed_imgs:total_new_seed_imgs+total_new_test_imgs][2]) for max_value, angle, image_id in values: count += 1 if count < total_new_seed_imgs: seed_image_ids.append(image_id) else: if count < total_new_seed_imgs + total_new_test_imgs: test_image_ids.append(image_id) count = 0 pickle.dump(seed_image_ids, open(data_dir + 'seed_image_ids.pickle', "wb")) pickle.dump(test_image_ids, open(data_dir + 'test_image_ids.pickle', "wb"))
def get_ground_truth_dates(total_coin_results): #ground_truth_dates = pickle.load(open(data_dir + 'get_ground_truth_dates.pickle', "rb")) ground_truth_date_dict = {} for seed_id, values in total_coin_results.iteritems(): for coin_id, result in values.iteritems(): if coin_id not in ground_truth_date_dict.iterkeys(): ground_truth_date_dict[coin_id] = [seed_id, 0] if result > ground_truth_date_dict[coin_id][1]: ground_truth_date_dict[coin_id] = [seed_id, result] #it bugs me I am not using a more pythonic way here: ground_truth_date_array = [] for coin_id, values in ground_truth_date_dict.iteritems(): seed_id = values[0] result = values[1] ground_truth_date_array.append([seed_id,coin_id, result,0,False,False]) ground_truth_date_array = sorted(ground_truth_date_array, key=lambda x: x[2],reverse = True) ground_truth_date_array = sorted(ground_truth_date_array, key=lambda x: x[0]) pickle.dump(ground_truth_date_array, open(data_dir + 'ground_truth_dates.pickle', "wb")) return ground_truth_date_array
def save_good_coin_ids(data_dir, seed_image_id,cut_off,remove_image_ids): #todo save_good_test_ids is not correct this needs a database: good_coin_ids = {} filename = data_dir + 'good_coin_ids.pickle' if os.path.exists(filename): #good_coin_ids = set(pickle.load(open(filename, "rb"))) pass values = results_dict[seed_image_id].iteritems() for test_image_id, test_values in values: max_value, angle = test_values coin_id = test_image_id/100 if max_value > cut_off: good_coin_ids.add(test_image_id) good_coin_ids.difference_update(remove_image_ids) print 'good_test_ids len: ' , len(good_coin_ids) pickle.dump(good_coin_ids, open(filename, "wb"))
def gt_roidb(self): """ return ground truth image regions database :return: imdb[image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = [self.load_pascal_annotation(index) for index in self.image_set_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
def selective_search_roidb(self, gt_roidb): """ get selective search roidb and ground truth roidb :param gt_roidb: ground truth roidb :return: roidb of selective search (ground truth included) """ cache_file = os.path.join(self.cache_path, self.name + '_ss_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb if self.image_set != 'test': ss_roidb = self.load_selective_search_roidb(gt_roidb) roidb = IMDB.merge_roidbs(gt_roidb, ss_roidb) else: roidb = self.load_selective_search_roidb(None) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb
def gt_roidb(self): """ return ground truth image regions database :return: imdb[image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = [self.load_annotation(index) for index in self.image_set_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = [self._load_coco_annotation(index) for index in self._image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
def render_plots(pickle_file, output): with open(pickle_file, 'rb') as f: results = pickle.load(f) C.Util.plot( method_labels=results['methods'], data_bytes=results['comp_bytes'], ratios=results['ratios'], com_speed=results['total_comp_speed'], com_speed_stderr=results['total_comp_speed_std'], dcom_speed=results['total_decomp_speed'], dcom_speed_stderr=results['total_decomp_speed_std'], save=output, dpi=300, bw=False )
def verify_all(folder_paths): """ Calls verify_one on each folder path. Also checks to make sure all the answer vocabularies are the same. """ adict_paths = [] for folder_path in folder_paths: paths = verify_one(folder_path) adict_paths.append(paths[2]) adicts = [] for path in adict_paths: with open(path, 'r') as f: adict = json.load(f) adicts.append(adict) if len(adicts) > 1: for a2 in adicts[1:]: if set(adicts[0].keys()) != set(a2.keys()): print set(adicts[0].keys()) - set(a2.keys()) print set(a2.keys()) - set(adicts[0].keys()) raise Exception('Answer vocab mismatch') return adicts
def verify_one(folder_path): """ Makes sure all the required files exist in the folder. If so, returns the paths to all the files. """ model_path = glob.glob(folder_path + '/*.caffemodel') assert len(model_path) == 1, 'one .caffemodel per folder, please' model_path = model_path[0] proto_path = folder_path + '/proto_test.prototxt' adict_path = folder_path + '/adict.json' vdict_path = folder_path + '/vdict.json' aux_path = folder_path + '/aux.json' assert os.path.exists(proto_path), 'proto_test.prototxt missing' assert os.path.exists(adict_path), 'adict.json missing' assert os.path.exists(vdict_path), 'vdict.json missing' assert os.path.exists(aux_path), 'aux.json missing' with open(aux_path, 'r') as f: aux = json.load(f) batch_size = int(aux['batch_size']) data_shape = tuple(map(int, aux['data_shape'])) img_feature_prefix = aux['img_feature_prefix'] spatial_coord = aux['spatial_coord'] if 'spatial_coord' in aux else False glove = aux['glove'] if 'glove' in aux else False return model_path, proto_path, adict_path, vdict_path, batch_size, data_shape, img_feature_prefix, spatial_coord, glove
def get_qid_valid_answer_dict(ques_file, adict): """ Returns a dictionary mapping question IDs to valid neuron indices. """ print 'Multiple choice mode: making valid answer dictionary...' valid_answer_dict = {} with open(ques_file, 'r') as f: qdata = json.load(f) for q in qdata['questions']: valid_answer_dict[q['question_id']] = q['multiple_choices'] for qid in valid_answer_dict: answers = valid_answer_dict[qid] valid_indices = [] for answer in answers: if answer in adict: valid_indices.append(adict[answer]) if len(valid_indices) == 0: print "we won't be able to answer qid", qid valid_answer_dict[qid] = valid_indices return valid_answer_dict
def funcion_Open(self): fileName = QtGui.QFileDialog.getOpenFileName() if len(fileName)>0: self.timer.stop() self.variableStats={} self.reqStats={} self.limpiatablas() self.comboBox.clear() self.comboBox.addItem("All") self.comboBox_2.clear() self.comboBox_2.addItem("All") self.comboBox_3.clear() self.comboBox_3.addItem("All") Proxynet.clearRequests() f = file(fileName) Proxynet.addRequests(pickl.load(f)) f.close() self.numRequests=0 self.timerFunc() self.timer.start(500)
def from_command_line(cls, *args, **keys): params = list() for name, param in cls.params(): if name not in keys: params.append((name, param)) bot_name = inspect.getmodulename(inspect.stack()[1][1]) if "ABUSEHELPER_CONF_FROM_STDIN" in os.environ: defaults = dict(pickle.load(sys.stdin)) defaults.setdefault("bot_name", bot_name) added = cls._from_dict(params, **defaults) else: added = cls._from_sys_argv(params, bot_name=bot_name) added.update(keys) return cls(*args, **added)
def __init__(self, state_file=None): self.file = None self.sessions = dict() self.state = dict() if state_file is not None: self.file = open_file(state_file) try: if not lock_file_nonblocking(self.file): raise RuntimeError("state file %r already in use" % state_file) except: self.file.close() raise try: self.state = pickle.load(self.file) except EOFError: pass self.errors = idiokit.consume()
def load_data(self, dataset_path, share = False): """Load the data set. """ f = gzip.open(dataset_path, 'rb') train_set, valid_set, test_set = pickle.load(f) f.close() # share the data train_set_x, train_set_y = self.shared_dataset(train_set, train=True) valid_set_x, valid_set_y = self.shared_dataset(valid_set) test_set_x, test_set_y = self.shared_dataset(test_set) if share: reval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] else: reval = [train_set, valid_set, test_set] # NON-shared data (they didn't share the data in the code Crino!!!!!) return reval
def set_params(mo, bparams): i = 0 for la in mo.layers: we = bparams[i:i+2] print len(we) la.set_weights(we) i += 2 return mo #with open("best_model_keras.pkl", 'r') as f: # b_params = pkl.load(f) # #model = set_params(model, b_params) #out = model.predict(xvl, batch_size=xvl.shape[0], verbose=0) #error = np.mean(np.mean(np.power(out - yvl, 2), axis=1)) #print "Error vl", error #sys.exit() #init_p = get_params(model) #with open("init_keras_param.pkl", 'w') as f: # pkl.dump(init_p, f)
def loadData (self, filename, verbose=True, replace_missing=True): ''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'format' not in self.info.keys(): self.getFormatData(filename) if 'feat_num' not in self.info.keys(): self.getNbrFeatures(filename) data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse} data = data_func[self.info['format']](filename, self.info['feat_num']) # INPORTANT: when we replace missing values we double the number of variables if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)): vprint (verbose, "Replace missing values by 0 (slow, sorry)") data = data_converter.replace_missing(data) if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(data) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return data
def loadLabel (self, filename, verbose=True): ''' Get the solution/truth values''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'task' not in self.info.keys(): self.getTypeProblem(filename) # IG: Here change to accommodate the new multiclass label format if self.info['task'] == 'multilabel.classification': label = data_io.data(filename) elif self.info['task'] == 'multiclass.classification': label = data_converter.convert_to_num(data_io.data(filename)) else: label = np.ravel(data_io.data(filename)) # get a column vector #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(label) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return label
def _split_train_tst(self): """ divide the data into training and testing data Create the X_trn, X_tst, for both forward and backward, and Y_trn and Y_tst Note that only the reviews are changed, and not the summary. :return: None """ num_samples = self.Y.shape[0] mapper_file = self.checkpointer.get_mapper_file_location() if not self.checkpointer.is_mapper_checkpointed(): print 'No mapper checkpoint found. Fresh loading in progress ...' # Now shuffle the data sample_id = range(num_samples) random.shuffle(sample_id) print 'Dumping the mapper shuffle for reuse.' Pickle.dump(sample_id, open(mapper_file, 'wb')) print 'Dump complete. Moving Forward...' else: print 'Mapper Checkpoint found... Reading from mapper dump' sample_id = Pickle.load(open(mapper_file, 'rb')) print 'Mapping unpickling complete.. Moving forward...' self.X_fwd = self.X_fwd[sample_id] self.X_bwd = self.X_bwd[sample_id] self.Y = self.Y[sample_id] # Now divide the data into test ans train set test_fraction = 0.01 self.test_size = int(test_fraction * num_samples) self.train_size = num_samples - self.test_size # Forward review self.X_trn_fwd = self.X_fwd[0:self.train_size] self.X_tst_fwd = self.X_fwd[self.train_size:num_samples] # Backward review self.X_trn_bwd = self.X_bwd[0:self.train_size] self.X_tst_bwd = self.X_bwd[self.train_size:num_samples] # Summary self.Y_trn = self.Y[0:self.train_size] self.Y_tst = self.Y[self.train_size:num_samples]