Python utils 模块,prepare_data() 实例源码

我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用utils.prepare_data()

项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
    if len(p1.split()[0].split('_')) == 2:
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
    else:
            X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:iclr2016    作者:jwieting    | 项目源码 | 文件源码
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getAcc(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = i[0]
    if len(p1.split()[0].split('_')) == 2:
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
    else:
            X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        ct += 1
        if ct % 100 == 0:
            x1,m1 = utils.prepare_data(seq1)
            x2,m2 = utils.prepare_data(seq2)
            scores = model.scoring_function(x1,x2,m1,m2)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
            seq2 = []
        golds.append(score)
    if len(seq1) > 0:
        x1,m1 = utils.prepare_data(seq1)
        x2,m2 = utils.prepare_data(seq2)
        scores = model.scoring_function(x1,x2,m1,m2)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return acc(preds,golds)
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getAcc_para(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = i[0]
    if len(p1.split()[0].split('_')) == 2:
            X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
        else:
            X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        ct += 1
        if ct % 100 == 0:
            x1,m1 = utils.prepare_data(seq1)
            x2,m2 = utils.prepare_data(seq2)
            scores = model.scoring_function(x1,x2,m1,m2)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
            seq2 = []
        golds.append(score)
    if len(seq1) > 0:
        x1,m1 = utils.prepare_data(seq1)
        x2,m2 = utils.prepare_data(seq2)
        scores = model.scoring_function(x1,x2,m1,m2)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return acc_para(preds,golds)
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getAccSentiment(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; score = i[1]
        X1 = getSeq(p1,words)
        seq1.append(X1)
        ct += 1
        if ct % 100 == 0:
            x1,m1 = utils.prepare_data(seq1)
            scores = model.scoring_function(x1,m1)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
        golds.append(score)
    if len(seq1) > 0:
        x1,m1 = utils.prepare_data(seq1)
        scores = model.scoring_function(x1,m1)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return accSentiment(preds,golds)
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getpairs(model, batch, params):
    g1 = []
    g2 = []

    for i in batch:
        g1.append(i[0].embeddings)
        g2.append(i[1].embeddings)

    g1x, g1mask = utils.prepare_data(g1)
    g2x, g2mask = utils.prepare_data(g2)

    embg1 = model.feedforward_function(g1x, g1mask)
    embg2 = model.feedforward_function(g2x, g2mask)

    for idx, i in enumerate(batch):
        i[0].representation = embg1[idx, :]
        i[1].representation = embg2[idx, :]

    pairs = getPairsFast(batch, params.type)
    p1 = []
    p2 = []
    for i in pairs:
        p1.append(i[0].embeddings)
        p2.append(i[1].embeddings)

    p1x, p1mask = utils.prepare_data(p1)
    p2x, p2mask = utils.prepare_data(p2)

    return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask)
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def evaluate(self, wopt, int_opt, X_test, base_model=None):
        # get the true class labels
        y_true = self.query(X_test)

        if X_test.shape[1] != self.num_features():
            X_test = self.encode(X_test)

        # predict classes using the optimized coefficients
        y_pred = predict_classes(X_test, wopt, int_opt, self.classes)

        """
        _, _, X, _, _ = utils.prepare_data(self.model_id, onehot=False)
        X = X.values
        for i in range(len(y_true)):
            if y_true[i] != y_pred[i]:
                print y_true[i], y_pred[i], X[i]
        """

        if base_model is not None:

            y_pred_base = base_model.predict(X_test)

            return accuracy_score(y_true, y_pred), \
                   accuracy_score(y_true, y_pred_base)

        return accuracy_score(y_true, y_pred)
项目:time_lstm    作者:DarryO    | 项目源码 | 文件源码
def gen_data(p, data, batch_size = 1):
    # generate data for the model
    # y in train data is a matrix (batch_size, seq_length)
    # y in test data is an array
    x = data['x'][p:p + batch_size]
    y = data['y'][p:p + batch_size]
    batch_data = {'x':x,'y':y}
    if data.has_key('t'):
        batch_data['t'] = data['t'][p:p + batch_size]

    ret = utils.prepare_data(batch_data, VOCAB_SIZE, one_hot=ONE_HOT, sigmoid_on=SIGMOID_ON)
    return ret
项目:iclr2016    作者:jwieting    | 项目源码 | 文件源码
def getAcc(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = i[2]
        X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        ct += 1
        if ct % 100 == 0:
            x1,m1 = utils.prepare_data(seq1)
            x2,m2 = utils.prepare_data(seq2)
            scores = model.scoring_function(x1,x2,m1,m2)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
            seq2 = []
        golds.append(score)
    if len(seq1) > 0:
        x1,m1 = utils.prepare_data(seq1)
        x2,m2 = utils.prepare_data(seq2)
        scores = model.scoring_function(x1,x2,m1,m2)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return acc(preds,golds)
项目:iclr2016    作者:jwieting    | 项目源码 | 文件源码
def getAccSentiment(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; score = i[1]
        X1 = getSeq(p1,words)
        seq1.append(X1)
        ct += 1
        if ct % 100 == 0:
            x1,m1 = utils.prepare_data(seq1)
            scores = model.scoring_function(x1,m1)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
        golds.append(score)
    if len(seq1) > 0:
        x1,m1 = utils.prepare_data(seq1)
        scores = model.scoring_function(x1,m1)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return accSentiment(preds,golds)
项目:iclr2016    作者:jwieting    | 项目源码 | 文件源码
def getpairs(model, batch, params):
    g1 = []
    g2 = []

    for i in batch:
        g1.append(i[0].embeddings)
        g2.append(i[1].embeddings)

    g1x, g1mask = utils.prepare_data(g1)
    g2x, g2mask = utils.prepare_data(g2)

    embg1 = model.feedforward_function(g1x, g1mask)
    embg2 = model.feedforward_function(g2x, g2mask)

    for idx, i in enumerate(batch):
        i[0].representation = embg1[idx, :]
        i[1].representation = embg2[idx, :]

    pairs = getPairsFast(batch, params.type)
    p1 = []
    p2 = []
    for i in pairs:
        p1.append(i[0].embeddings)
        p2.append(i[1].embeddings)

    p1x, p1mask = utils.prepare_data(p1)
    p2x, p2mask = utils.prepare_data(p2)

    return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask)
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('data', type=str, help='a dataset')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
    parser.add_argument('--verbose', action='store_true')
    parser.add_argument('--incomplete', dest='incomplete',
                        action='store_true', help='allow incomplete queries')
    args = parser.parse_args()

    dataset = args.data
    seed = args.seed
    incomplete = args.incomplete
    verbose = args.verbose

    if verbose:
        level = logging.INFO

        logger = logging.getLogger()
        logger.setLevel(level)
        ch = logging.StreamHandler(sys.stderr)
        ch.setLevel(level)
        formatter = logging.Formatter('%(message)s')
        ch.setFormatter(formatter)
        logger.addHandler(ch)

    np.random.seed(seed)

    _, _, X, _, _ = utils.prepare_data(dataset, onehot=False, labelEncode=False)

    cat_idx = [i for i in range(len(X.columns))
               if isinstance(X.iloc[0][i], basestring)]
    cont_idx = range(X.shape[1])
    for i in cat_idx:
        cont_idx.remove(i)
    X = X[cat_idx + cont_idx].values

    ext = AWSRegressionExtractor(dataset, X.copy(), cat_idx,
                                 incomplete=incomplete)

    try:
        X_test = X[0:500]

        if ext.binning:
            r = -decimal.Decimal(str(ext.eps)).as_tuple().exponent
            for i, t in enumerate(ext.feature_types):
                if t == "NUMERIC":
                    X_test[:, i] = np.round(X_test[:, i].astype(np.float), r)
    except ValueError:
        X_test = None

    ext.run(args.data, X_test, 500, random_seed=seed,
            alphas=[1], methods=['passive'], baseline=False)