我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用dill.load()。
def load(path, num_cpu=16): """Load act function that was returned by learn function. Parameters ---------- path: str path to the act function pickle num_cpu: int number of cpus to use for executing the policy Returns ------- act: ActWrapper function that takes a batch of observations and returns actions. """ return ActWrapper.load(path, num_cpu=num_cpu)
def __LaunchTask(self, task, PredCols): """""" print('\n---- Begin to deal with %s' % (task)) start = time.time() if(task == 'MissingValue'): self.TrainData, self.TestData = self._missing.impute((self.TrainData, self.TestData)) elif(task == 'NewFeature'): self.TrainData, self.TestData = self._newfeat.create((self.TrainData, self.TestData), PredCols) elif(task == 'FeatureEncoding'): with open('%s/featmap.pkl' % self._InputDir, 'rb') as i_file: d_feat = pickle.load(i_file) i_file.close() self.TrainData, self.TestData = self._encoding.ordinal((self.TrainData, self.TestData), d_feat) elif(task == 'FeatureSelection'): self.TrainData, self.TestData = self._select.select((self.TrainData, self.TestData)) end = time.time() print('---- Task %s done, time consumed %ds' % (task, (end - start)))
def load_session(fname=None): if conf.interactive_shell.lower() == "ipython": log_interactive.error("There are issues with load_session in ipython. Use python for interactive shell, or use -s parameter to load session") return import dill as pickle if fname is None: fname = conf.session try: s = pickle.load(gzip.open(fname,"rb")) except IOError: s = pickle.load(open(fname,"rb")) scapy_session = builtins.__dict__["scapy_session"] scapy_session.clear() scapy_session.update(s)
def saga_cv_cache(*args): arghash = sha1(repr(args).encode('utf-8')).hexdigest() fn = "res/baseline_linear_{}.dill".format(arghash) try: with open(fn, 'rb') as f: out = dill.load(f) logging.info("Loaded cached version.") except FileNotFoundError: logging.info("Computing...") out = saga_cv(*args) with open(fn, 'wb') as f: dill.dump(out, f) return out
def load(path, act_params, num_cpu=16): """Load act function that was returned by learn function. Parameters ---------- path: str path to the act function pickle num_cpu: int number of cpus to use for executing the policy Returns ------- act: ActWrapper function that takes a batch of observations and returns actions. """ return ActWrapper.load(path, num_cpu=num_cpu, act_params=act_params)
def update(self): if not secure or check_key(): with open(os.path.join(path, 'preql_queries.pickle')) as pq: self.queries = pickle.load(pq) with open(os.path.join(path, "synthdb_internal.pickle")) as sdb: funcs = pickle.load(sdb) node_topo_funcs.update(funcs['node_topo_funcs']) link_topo_funcs.update(funcs['link_topo_funcs']) graph_format.update(funcs['graph_format']) removers.update(funcs['removers']) graph_tool_functions.update(funcs['graph_tool_functions']) graph_generator_functions.update(funcs['graph_generator_functions']) walkers.update(funcs['walkers']) topo_formats.update(funcs['topo_formats']) return json.dumps("Functions updated.") else: wrong_key()
def restart_workflow(args, outdir): with open(args.inputfile, 'r') as infile: runner = dill.load(infile) if isinstance(runner.engine, pyccc.Docker) and runner.engine.client is None: runner.engine.client = runner.engine.connect_to_docker() engine, RunnerClass = get_execution_env(args) assert RunnerClass is runner.__class__ if args.setoutput: set_ui_outputs(runner, args) print ' ---- RESTARTING WORKFLOW "%s" ----\n' % runner.workflow.name run_workflow(runner, outdir) if args.dumptasks: dump_all_tasks(runner, outdir)
def process_input_file(inputfile): """ Figure out whether we're being passed a file, a description of a file, or just raw JSON """ try: jsraw = _get_json(inputfile) except ValueError: pass else: print jsraw inputjson = json.loads(jsraw) return inputjson ext = inputfile.split('.')[-1] if ext in ('js', 'json', 'yml', 'yaml'): with open(inputfile, 'r') as infile: inputjson = yaml.load(infile) else: with open(inputfile, 'r') as infile: inputjson = {'filename': inputfile, 'content': infile.read()} return inputjson
def test_create(self, mock): value = 1 function_name = 'test_function' @Lambda(name=function_name, bucket='test', key='test', client=self.client) def foo(): return value package = DeploymentPackage(foo) zfp = zipfile.ZipFile(StringIO(package.zip_bytes(foo.dumped_code)), "r") func = dill.load(zfp.open('.lambda.dump')) self.assertEqual(func(), value) resp_create = foo.create() self.assertEqual(resp_create['FunctionName'], function_name) # moto doesn't support ZipFile only lambda deployments, while # aws doen't allow other arguments when scpesifying ZipFile argument #resp_get = foo.get() #self.assertEqual(resp_get['Configuration']['FunctionName'], function_name)
def run_sample_type_prediction(tag_to_val, mapped_terms, real_props): # Load the dilled vectorizer and model vectorizer_f = pr.resource_filename(__name__, join("predict_sample_type", "sample_type_vectorizor.dill")) classifier_f = pr.resource_filename(__name__, join("predict_sample_type", "sample_type_classifier.dill")) with open(vectorizer_f, "rb") as f: vectorizer = dill.load(f) with open(classifier_f, "rb") as f: model = dill.load(f) # Make sample-type prediction feat_v = vectorizer.convert_to_features( get_ngrams_from_tag_to_val(tag_to_val), mapped_terms) predicted, confidence = model.predict( feat_v, mapped_terms, real_props) return predicted, confidence
def dill_words(num_words, fname="words.dill"): fname = os.path.join(os.path.dirname(os.path.realpath(__file__)), fname) try: if os.path.isfile(fname): words = dill.load(open(fname, "rb")) if(len(words) < ip_handling.iutils.get_ipv6_word_possibilities()): os.remove(fname) raise Exception # go into except block to reload words return words else: words = load_words(num_words) if(len(words) < ip_handling.iutils.get_ipv6_word_possibilities()): raise Exception # go into except block to reload words dill.dump(words, open(fname, "wb")) return words except: try: words = load_words(num_words) if(len(words) < ip_handling.iutils.get_ipv6_word_possibilities()): raise Exception # go into except block to reload words dill.dump(words, open(fname, "wb")) return words except: return load_words(num_words)
def _load_file(path): """ Use to load a file with dill, used for loading puzzle. file should have \ been wrote with dill. :param str path: path to the file to load :return: object (excepted [Puzzle](doc/puzzle.md) but no check is made) """ try: with open(path, "rb") as e: puzzle.Puzzle.dynamique_type() return dill.load(e) except Exception as e: print "Exception while loading file %s" % path print e return None
def get_files(folder_name, algorithm, args): foldersTmp = os.listdir(folder_name) folders = [] for folder in foldersTmp: if folder[0] == '.': continue folders.append(folder) imgs = [] for folder in folders: path = folder_name + folder + '/' if not os.path.isdir(path): continue files = os.listdir(path) for file_str in files: complete_file_str = str((os.path.join(path, file_str))) if os.path.isfile(complete_file_str) and (complete_file_str.endswith('.jpg') or complete_file_str.endswith('.JPG')): imgs.append((os.path.join(path, file_str), folder)) return imgs # load instances from filename
def main(prepare, use, do, get, params, debug): if get is not None: do = get if prepare is not None and use in ['ht', 'yjb', 'yh', 'gf', 'xq']: user = easytrader.use(use, debug) user.prepare(prepare) with open(ACCOUNT_OBJECT_FILE, 'wb') as f: dill.dump(user, f) if do is not None: with open(ACCOUNT_OBJECT_FILE, 'rb') as f: user = dill.load(f) if len(params) > 0: result = getattr(user, do)(*params) else: result = getattr(user, do) json_result = json.dumps(result, indent=4, ensure_ascii=False, sort_keys=True) click.echo(json_result)
def read(self): """ Reads from persisted files. Returns ------- sparse matrix The train data sparse matrix The label data ThesaurusReader Unpickled ThesaurusReader object """ print('Reading persisted features') X = self._load_sparse_csr(self._persist_name('X')) y = self._load_sparse_csr(self._persist_name('y')) with open(self._persist_name('TR'), mode='rb') as f: tr = pickle.load(f) return X, y, tr
def create_processed_dataframe_from_mongo(dbname): ''' INPUT - dbname: this is the name of the mongo database where the information will be extracted from OUTPUT - df Returns a dataframe that has everything needed in order to do modelling ''' df = extract_user_information_from_mongo(dbname, 'topictweets') # df = pd.read_csv('data/clinton_df.csv') # df.id = df.id.apply(str) feature_dict = extract_feature_information_from_mongo(dbname, 'timelinetweets') # with open('data/clinton_tweets_dict.pkl', 'r') as f: # feature_dict = pickle.load(f) df = df.drop_duplicates(subset='id', keep='last') users_who_tweeted = set(feature_dict.keys()) dfusers_who_tweeted = df[df.id.isin(users_who_tweeted)] # subset the initial user dataframe to have ONLY the users who tweeted df = combine_user_info_with_feature_dict(dfusers_who_tweeted, feature_dict) df = process_feature_information_for_modelling(df, feature_dict) df = drop_unnecessary_columns_from_test_data(df) return df
def restore_state(self, path): """Returns loaded state""" try: with open(path, 'rb') as f: if self.encrypt: state = pickle.loads(self.decrypt_data(pickle.load(f))) else: state = pickle.load(f) LOG.debug("Restoring state successs") except Exception as e: LOG.debug("Restoring state from %s failed with %s" % ( path, e)) state = StateMachine(self.bot, state_path=path) LOG.debug("Successfully inicialized new state.") return state
def restore(file_name="dump.bin"): return pickle.load(open(file_name, 'rb')) # class Encoding: # pass # @extension # class Math: # WOOOT? just # import math as Math # def __getattr__(self, attr): # import sys # import math # # ruby method_missing !!! # import inspect # for name, obj in inspect.getmembers(sys.modules['math']): # if name==attr: return obj # return False
def load(path, num_cpu=16): with open(path, "rb") as f: model_data, act_params = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act, act_params)
def restore(self): print('Try load previous plot....') try: self._since_beginning = dill.load(open(self.LOGDIR+'log.pkl', "r")) self._iter = dill.load(open(self.LOGDIR+'iteration.pkl', "r")) # self._iter += 1 print('Restore plot from iter: '+str(self._iter)) return self._iter except Exception, e: print('Previous plot unfounded') return 0 print('')
def __MergeData(cls, InputDir, OutputDir, mode): """""" if(mode == 'train'): ActionDataFile = '%s/train_2016_v2.csv' % InputDir OutputFile = '%s/train.pkl' % OutputDir else: ActionDataFile = '%s/sample_submission.csv' % InputDir OutputFile = '%s/test.pkl' % OutputDir print(OutputFile) PropertyDataFile = '%s/properties_2016.csv' % InputDir ## load ActionData = pd.read_csv(ActionDataFile, parse_dates=['transactiondate']) PropertyData = pd.read_csv(PropertyDataFile) ## left join MergedData = ActionData.merge(PropertyData, how='left', on='parcelid') ## output into pkl file if (os.path.exists(OutputDir) == False): os.makedirs(OutputDir) with open(OutputFile, 'wb') as o_file: pickle.dump(MergedData, o_file, -1) o_file.close() return ## split rawcensustractandblock into census, tract and block
def __ParseCTB(cls, InputDir, OutputDir, mode): """""" if(mode == 'train'): InputFile = '%s/train.pkl' % InputDir OutputFile = '%s/train.pkl' % OutputDir else: InputFile = '%s/test.pkl' % InputDir OutputFile = '%s/test.pkl' % OutputDir ## load with open(InputFile, 'rb') as i_file: df_data = pickle.load(i_file) i_file.close() ## extract census, tract and block identifies df_data['rawcensustractandblock'] = (df_data['rawcensustractandblock'] * 1000).astype(np.float64).astype(np.int64) df_data['fipsid'] = ((df_data['rawcensustractandblock'] / 10000000).astype(np.int64)).astype(str) df_data['tractandblock'] = df_data['rawcensustractandblock'] % 10000000 df_data['tractid'] = ((df_data['tractandblock'] / 10).astype(np.int64)).astype(str) df_data['blockid'] = ((df_data['tractandblock'] % 10).astype(np.int64)).astype(str) df_data.drop(['fips', 'rawcensustractandblock', 'tractandblock'], axis = 1, inplace = True) ## output into pkl file if (os.path.exists(OutputDir) == False): os.makedirs(OutputDir) with open(OutputFile, 'wb') as o_file: pickle.dump(df_data, o_file, -1) o_file.close() return
def __SplitData(cls, InputDir, OutputDir, mode): """""" if(mode == 'train'): InputFileData = '%s/train.pkl' % InputDir else: InputFileData = '%s/test.pkl' % InputDir InputFileFeatMap = '%s/featmap.pkl' % InputDir ## load with open(InputFileData, 'rb') as i_file: df_data = pickle.load(i_file) i_file.close() with open(InputFileFeatMap, 'rb') as i_file: d_feat = pickle.load(i_file) i_file.close() if (os.path.exists(OutputDir) == False): os.makedirs(OutputDir) with open('%s/featmap.pkl' % OutputDir, 'wb') as o_file: pickle.dump(d_feat, o_file, -1) o_file.close() ## output into individual pkl files for i in range(12): month = i + 1 df_MonthData = df_data[(df_data['transactiondate'].dt.month == month)] with open('%s/%s.pkl'% (OutputDir, month), 'wb') as o_file: pickle.dump(df_MonthData, o_file, -1) o_file.close() return ## launch single task
def LoadFromHdfFile(InputDir, mode = 'train'): if(mode == 'train'): data = pd.read_hdf(path_or_buf= '%s/train.hdf' % InputDir, key='train') elif(mode == 'valid'): data = pd.read_hdf(path_or_buf= '%s/valid.hdf' % InputDir, key='valid') else: data = pd.read_hdf(path_or_buf= '%s/test.hdf' % InputDir, key='test') return data ## class method, load data with pkl format
def LoadFromPklFile(InputDir): with open('%s/train.pkl' % InputDir, 'rb') as i_file: TrainData = pickle.load(i_file) i_file.close() with open('%s/test.pkl' % InputDir, 'rb') as i_file: TestData = pickle.load(i_file) i_file.close() return TrainData,TestData ## class method, load data with text format
def download_and_unzip_result(url, job_hash): r = requests.get(url, stream=True) status_check(r) total_size = int(r.headers.get('content-length', 0)) with open('download.zip', 'wb') as f: pbar = tqdm(total=total_size, unit='B', unit_scale=True) chunk_size = 1024 * 32 # 32kb for data in r.iter_content(chunk_size): f.write(data) pbar.update(chunk_size) # again there might be a pbar issue here pbar.close() zip_content = open("download.zip", "rb").read() z = ZipFile(io.BytesIO(zip_content)) z.extractall() remove('download.zip') result = None # output of the script new_files = None # names of new files created by the script pickle_path = path.abspath(path.join(job_hash, job_hash + '.pkl')) if path.isfile(pickle_path): with open(pickle_path, 'rb') as f: # Hack: a workaround for dill's pickling problem # import_all() result = dill.load(f) # unimport_all() remove(pickle_path) if path.isdir(job_hash): new_files = listdir(job_hash) for name in new_files: rename(path.join(job_hash, name), name) rmtree(job_hash) return result, new_files
def load(path, num_cpu=16): with open(path, "rb") as f: model_data, act_params = dill.load(f) act = build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act, act_params)
def test_verify_features_does_not_work_by_default(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) try: keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5' os.remove(keras_file_name) except: pass with warnings.catch_warnings(record=True) as w: results = saved_ml_pipeline.named_steps['final_model'].verify_features(df_titanic_test) print('Here are the caught warnings:') print(w) assert len(w) == 1 assert results == None
def test_verify_features_finds_no_missing_features_when_none_are_missing(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' , 'sex': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, verify_features=True) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) missing_features = saved_ml_pipeline.named_steps['final_model'].verify_features(df_titanic_test) print('missing_features') print(missing_features) print("len(missing_features['prediction_not_training'])") print(len(missing_features['prediction_not_training'])) print("len(missing_features['training_not_prediction'])") print(len(missing_features['training_not_prediction'])) assert len(missing_features['prediction_not_training']) == 0 assert len(missing_features['training_not_prediction']) == 0
def load_ml_model(file_name): with open(file_name, 'rb') as read_file: base_pipeline = dill.load(read_file) if isinstance(base_pipeline, utils_categorical_ensembling.CategoricalEnsembler): for step in base_pipeline.transformation_pipeline.named_steps: pipeline_step = base_pipeline.transformation_pipeline.named_steps[step] try: if pipeline_step.get('model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning': pipeline_step.model = insert_deep_learning_model(pipeline_step, file_name) except AttributeError: pass for step in base_pipeline.trained_models: pipeline_step = base_pipeline.trained_models[step] try: if pipeline_step.get('model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning': pipeline_step.model = insert_deep_learning_model(pipeline_step, file_name) except AttributeError: pass else: for step in base_pipeline.named_steps: pipeline_step = base_pipeline.named_steps[step] try: if pipeline_step.get('model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning': pipeline_step.model = insert_deep_learning_model(pipeline_step, file_name) except AttributeError: pass return base_pipeline # Keeping this here for legacy support
def Load(file): """ Loads a model from specified file """ with open(file,'rb') as file: model=dill.load(file) return model
def update_session(fname=None): import dill as pickle if fname is None: fname = conf.session try: s = pickle.load(gzip.open(fname,"rb")) except IOError: s = pickle.load(open(fname,"rb")) scapy_session = builtins.__dict__["scapy_session"] scapy_session.update(s) ################ ##### Main ##### ################
def load_object(fname): import dill as pickle return pickle.load(gzip.open(fname,"rb"))
def load_csr(f, return_y=False): npz = np.load(f) X = csr_matrix((npz['data'], npz['indices'], npz['indptr']), shape=npz['shape']) if return_y: return X, npz['y'] else: return X
def load_results(key, args): fn = cache_fname(key, args) with open(fn, "rb") as f: return dill.load(f)
def saga_score_struct_cache(*args): arghash = sha1(repr(("score_struct",) + args).encode('utf-8')).hexdigest() fn = "res/baseline_linear_{}.dill".format(arghash) try: with open(fn, 'rb') as f: out = dill.load(f) logging.info("Loaded cached version.") except FileNotFoundError: logging.info("Computing...") out = saga_score_struct(*args) with open(fn, 'wb') as f: dill.dump(out, f) return out
def linear_cv_score(dataset, alpha, l1_ratio, constraints): fn = cache_fname("linear_cv_score", (dataset, alpha, l1_ratio, constraints)) if os.path.exists(fn): logging.info("Loading {}".format(fn)) with open(fn, "rb") as f: return dill.load(f) load, ids = get_dataset_loader(dataset, split="train") n_folds = 5 if dataset == 'ukp' else 3 scores = [] for k, (tr, val) in enumerate(KFold(n_folds).split(ids)): Y_marg, bl = saga_decision_function(dataset, k, alpha, alpha, l1_ratio) val_docs = list(load(ids[val])) Y_true = [doc.label for doc in val_docs] Y_pred = bl.fast_decode(Y_marg, val_docs, constraints) scores.append(bl._score(Y_true, Y_pred)) with open(fn, "wb") as f: logging.info("Saving {}".format(fn)) dill.dump(scores, f) return scores
def svmstruct_cv_score(dataset, C, class_weight, constraints, compat_features, second_order_features): fn = cache_fname("svmstruct_cv_score", (dataset, C, class_weight, constraints, compat_features, second_order_features)) if os.path.exists(fn): logging.info("Cached file already exists.") with open(fn, "rb") as f: return dill.load(f) load, ids = get_dataset_loader(dataset, split="train") n_folds = 5 if dataset == 'ukp' else 3 # below are boolean logical ops grandparents = second_order_features and dataset == 'ukp' coparents = second_order_features siblings = second_order_features and dataset == 'cdcp' scores = [] all_Y_pred = [] for k, (tr, val) in enumerate(KFold(n_folds).split(ids)): train_docs = list(load(ids[tr])) val_docs = list(load(ids[val])) clf, Y_val, Y_pred = fit_predict(train_docs, val_docs, dataset, C, class_weight, constraints, compat_features, second_order_features, grandparents, coparents, siblings) all_Y_pred.extend(Y_pred) scores.append(clf.model._score(Y_val, Y_pred)) with open(fn, "wb") as f: dill.dump((scores, all_Y_pred), f) return scores, all_Y_pred
def read_dill(file_): """ Deserialize a computation from a file or file-like object :param file_: If string, writes to a file :type file_: File-like object, or string """ if isinstance(file_, six.string_types): with open(file_, 'rb') as f: return dill.load(f) else: return dill.load(file_)
def load(self, pkl, filepath): entity_type = pkl.get_entity_type(self._entity_type_id) self.__dict__ = entity_type.__dict__ # initialize index self._ann_obj = AnnoyIndex(pkl.get_nfactor(), entity_type._metric) # mmap the file self._ann_obj.load(filepath)
def load_entities(self, entities, file_getter): """Load underlying entities.""" for k in entities: annoy_filepath = file_getter.get_file_path('{}.ann'.format(k)) try: self._annoy_objects[k].load(self, annoy_filepath) except IOError as e: raise IOError( "Error: cannot load file {0}, which was built " "with the model. '{1}'".format(annoy_filepath, e) )