我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用utils.prepare_data()。
def getCorrelation(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[1]; p2 = i[2]; score = float(i[0]) if len(p1.split()[0].split('_')) == 2: X1, X2, SX1, SX2 = getSeqs2(p1,p2,words) else: X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def getCorrelation(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[0]; p2 = i[1]; score = float(i[2]) X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def getAcc(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] ct = 0 for i in lines: i = i.split("\t") p1 = i[1]; p2 = i[2]; score = i[0] if len(p1.split()[0].split('_')) == 2: X1, X2, SX1, SX2 = getSeqs2(p1,p2,words) else: X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) ct += 1 if ct % 100 == 0: x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) seq1 = [] seq2 = [] golds.append(score) if len(seq1) > 0: x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) return acc(preds,golds)
def getAcc_para(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] ct = 0 for i in lines: i = i.split("\t") p1 = i[1]; p2 = i[2]; score = i[0] if len(p1.split()[0].split('_')) == 2: X1, X2, SX1, SX2 = getSeqs2(p1,p2,words) else: X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) ct += 1 if ct % 100 == 0: x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) seq1 = [] seq2 = [] golds.append(score) if len(seq1) > 0: x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) return acc_para(preds,golds)
def getAccSentiment(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] ct = 0 for i in lines: i = i.split("\t") p1 = i[0]; score = i[1] X1 = getSeq(p1,words) seq1.append(X1) ct += 1 if ct % 100 == 0: x1,m1 = utils.prepare_data(seq1) scores = model.scoring_function(x1,m1) scores = np.squeeze(scores) preds.extend(scores.tolist()) seq1 = [] golds.append(score) if len(seq1) > 0: x1,m1 = utils.prepare_data(seq1) scores = model.scoring_function(x1,m1) scores = np.squeeze(scores) preds.extend(scores.tolist()) return accSentiment(preds,golds)
def getpairs(model, batch, params): g1 = [] g2 = [] for i in batch: g1.append(i[0].embeddings) g2.append(i[1].embeddings) g1x, g1mask = utils.prepare_data(g1) g2x, g2mask = utils.prepare_data(g2) embg1 = model.feedforward_function(g1x, g1mask) embg2 = model.feedforward_function(g2x, g2mask) for idx, i in enumerate(batch): i[0].representation = embg1[idx, :] i[1].representation = embg2[idx, :] pairs = getPairsFast(batch, params.type) p1 = [] p2 = [] for i in pairs: p1.append(i[0].embeddings) p2.append(i[1].embeddings) p1x, p1mask = utils.prepare_data(p1) p2x, p2mask = utils.prepare_data(p2) return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask)
def evaluate(self, wopt, int_opt, X_test, base_model=None): # get the true class labels y_true = self.query(X_test) if X_test.shape[1] != self.num_features(): X_test = self.encode(X_test) # predict classes using the optimized coefficients y_pred = predict_classes(X_test, wopt, int_opt, self.classes) """ _, _, X, _, _ = utils.prepare_data(self.model_id, onehot=False) X = X.values for i in range(len(y_true)): if y_true[i] != y_pred[i]: print y_true[i], y_pred[i], X[i] """ if base_model is not None: y_pred_base = base_model.predict(X_test) return accuracy_score(y_true, y_pred), \ accuracy_score(y_true, y_pred_base) return accuracy_score(y_true, y_pred)
def gen_data(p, data, batch_size = 1): # generate data for the model # y in train data is a matrix (batch_size, seq_length) # y in test data is an array x = data['x'][p:p + batch_size] y = data['y'][p:p + batch_size] batch_data = {'x':x,'y':y} if data.has_key('t'): batch_data['t'] = data['t'][p:p + batch_size] ret = utils.prepare_data(batch_data, VOCAB_SIZE, one_hot=ONE_HOT, sigmoid_on=SIGMOID_ON) return ret
def getAcc(model,words,f): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] ct = 0 for i in lines: i = i.split("\t") p1 = i[0]; p2 = i[1]; score = i[2] X1, X2 = getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) ct += 1 if ct % 100 == 0: x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) seq1 = [] seq2 = [] golds.append(score) if len(seq1) > 0: x1,m1 = utils.prepare_data(seq1) x2,m2 = utils.prepare_data(seq2) scores = model.scoring_function(x1,x2,m1,m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) return acc(preds,golds)
def main(): parser = argparse.ArgumentParser() parser.add_argument('data', type=str, help='a dataset') parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--verbose', action='store_true') parser.add_argument('--incomplete', dest='incomplete', action='store_true', help='allow incomplete queries') args = parser.parse_args() dataset = args.data seed = args.seed incomplete = args.incomplete verbose = args.verbose if verbose: level = logging.INFO logger = logging.getLogger() logger.setLevel(level) ch = logging.StreamHandler(sys.stderr) ch.setLevel(level) formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) logger.addHandler(ch) np.random.seed(seed) _, _, X, _, _ = utils.prepare_data(dataset, onehot=False, labelEncode=False) cat_idx = [i for i in range(len(X.columns)) if isinstance(X.iloc[0][i], basestring)] cont_idx = range(X.shape[1]) for i in cat_idx: cont_idx.remove(i) X = X[cat_idx + cont_idx].values ext = AWSRegressionExtractor(dataset, X.copy(), cat_idx, incomplete=incomplete) try: X_test = X[0:500] if ext.binning: r = -decimal.Decimal(str(ext.eps)).as_tuple().exponent for i, t in enumerate(ext.feature_types): if t == "NUMERIC": X_test[:, i] = np.round(X_test[:, i].astype(np.float), r) except ValueError: X_test = None ext.run(args.data, X_test, 500, random_seed=seed, alphas=[1], methods=['passive'], baseline=False)