Python scipy.spatial.distance 模块,hamming() 实例源码

我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用scipy.spatial.distance.hamming()

项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
            num_training_samples=num_epochs_per_subj*(num_subjects-1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
            labels[0:num_training_samples])
    X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels,
                                                                 num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data, raw_data2)), labels,
            num_training_samples=num_training_samples)
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))

# python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
项目:taar    作者:mozilla    | 项目源码 | 文件源码
def compute_clients_dist(self, client_data):
        client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
        client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES]

        # Compute the distances between the user and the cached continuous
        # and categorical features.
        cont_features = distance.cdist(self.continuous_features,
                                       np.array([client_continuous_feats]),
                                       'canberra')
        # The lambda trick is needed to prevent |cdist| from force-casting the
        # string features to double.
        cat_features = distance.cdist(self.categorical_features,
                                      np.array([client_categorical_feats]),
                                      lambda x, y: distance.hamming(x, y))

        # Take the product of similarities to attain a univariate similarity score.
        # Addition of 0.001 to the continuous features avoids a zero value from the
        # categorical variables, allowing categorical features precedence.
        return (cont_features + 0.001) * cat_features
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_multilabel_hamming_loss():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])
    w = np.array([1, 3])

    assert_equal(hamming_loss(y1, y2), 1 / 6)
    assert_equal(hamming_loss(y1, y1), 0)
    assert_equal(hamming_loss(y2, y2), 0)
    assert_equal(hamming_loss(y2, 1 - y2), 1)
    assert_equal(hamming_loss(y1, 1 - y1), 1)
    assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6)
    assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
    assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12)
    assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12)
    assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3)
    # sp_hamming only works with 1-D arrays
    assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj):
    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i+1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(list(zip(training_data, training_data)), training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        predict = clf.predict(list(zip(test_data, test_data)))
        print(predict)
        print(clf.decision_function(list(zip(test_data, test_data))))
        incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
        logger.info(
            'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' %
            (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
             (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
        )
        print(clf.score(list(zip(test_data, test_data)), test_labels))
项目:python_mozetl    作者:mozilla    | 项目源码 | 文件源码
def similarity_function(x, y):
    """ Similarity function for comparing user features.

    This actually really should be implemented in taar.similarity_recommender
    and then imported here for consistency.
    """

    def safe_get(field, row, default_value):
        # Safely get a value from the Row. If the value is None, get the
        # default value.
        return row[field] if row[field] is not None else default_value

    # Extract the values for the categorical and continuous features for both
    # the x and y samples. Use an empty string as the default value for missing
    # categorical fields and 0 for the continuous ones.
    x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES]
    x_continuous_features = [safe_get(k, x, 0) for k in CONTINUOUS_FEATURES]
    y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES]
    y_continuous_features = [safe_get(k, y, 0) for k in CONTINUOUS_FEATURES]

    # Here a larger distance indicates a poorer match between categorical variables.
    j_d = (distance.hamming(x_categorical_features, y_categorical_features))
    j_c = (distance.canberra(x_continuous_features, y_continuous_features))

    # Take the product of similarities to attain a univariate similarity score.
    # Add a minimal constant to prevent zero values from categorical features.
    # Note: since both the distance function return a Numpy type, we need to
    # call the |item| function to get the underlying Python type. If we don't
    # do that this job will fail when performing KDE due to SPARK-20803 on
    # Spark 2.2.0.
    return abs((j_c + 0.001) * j_d).item()
项目:teneto    作者:wiheto    | 项目源码 | 文件源码
def check_distance_funciton_input(distance_func_name,netinfo): 
    """
    Funciton returns distance_func_name given netinfo. 
    """

    if distance_func_name == 'default' and netinfo['nettype'][0] == 'b':
        print('Default distance funciton specified. As network is binary, using Hamming')
        distance_func_name = 'hamming'
    elif distance_func_name == 'default' and netinfo['nettype'][0] == 'w':
        distance_func_name = 'euclidean'
        print(
            'Default distance funciton specified. '
            'As network is weighted, using Euclidean')

    return distance_func_name
项目:kvae    作者:simonkamronn    | 项目源码 | 文件源码
def plot_trajectory_uncertainty(true, gen, filter, smooth, filename):
    sequences, timesteps, h, w = true.shape

    errors = dict(Generated=list(), Filtered=list(), Smoothed=list())
    for label, var in zip(('Generated', 'Filtered', 'Smoothed'), (gen, filter, smooth)):
        for step in range(timesteps):
            errors[label].append(hamming(true[:, step].ravel() > 0.5, var[:, step].ravel() > 0.5))

        plt.plot(np.linspace(1, timesteps, num=timesteps).astype(int), errors[label], linewidth=3, ms=20, label=label)
    plt.xlabel('Steps', fontsize=20)
    plt.ylabel('Hamming distance', fontsize=20)
    plt.legend(fontsize=20)
    plt.savefig(filename)
    plt.close()
项目:mitre    作者:gerberlab    | 项目源码 | 文件源码
def sample(config, model=None):
    """ Create sampler and sample per options in configuration file.

    If there is a configuration option 'load_model_from_pickle' in
    section 'sampling' the function tries to load that model, ignoring
    the data argument (the option value shold be the path to a pickle
    file containing a single LogisticRuleModel object.) If the model
    argument is None, and that configuration option is not present, an
    exception results.

    """
    if config.has_option('sampling','load_model_from_pickle'):
        with open(config.get('sampling','load_model_from_pickle')) as f:
            model = pickle.load(f)

    if model is None:
        raise ValueError('Model must be passed as argument if not specified in config file.')

    l = [hamming(model.data.y,t) for t in model.rule_population.flat_truth]
    arbitrary_rl = rules.RuleList(
        [[model.rule_population.flat_rules[np.argmin(l)]]]
     )
    sampler = logit_rules.LogisticRuleSampler(model,
                                              arbitrary_rl)

    if config.has_option('sampling','sampling_time'):
        sampling_time = config.getfloat('sampling','sampling_time')
        logger.info('Starting sampling: will continue for %.1f seconds' %
                    sampling_time)
        sampler.sample_for(sampling_time)
    elif config.has_option('sampling','total_samples'):
        total_samples = config.getint('sampling','total_samples')
        logger.info('Starting to draw %d samples' % total_samples)
        sampler.sample(total_samples)
    else:
        raise ValueError('Either number of samples or sampling time must be specified.')


    if config.has_option('sampling', 'pickle_sampler'):
        prefix = config.get('description','tag')
        if config.getboolean('sampling','pickle_sampler'):
            filename = prefix + '_sampler_object.pickle'
            with open(filename, 'w') as f:
                pickle.dump(sampler,f)
            logger.info('Sampler written to %s' % filename)

    return sampler
项目:teneto    作者:wiheto    | 项目源码 | 文件源码
def getDistanceFunction(requested_metric):
    """

    This function returns a specified distance function.


    **PARAMETERS**

    :'requested_metric': can be 'hamming', 'eculidean' or any of the functions in https://docs.scipy.org/doc/scipy/reference/spatial.distance.html which only require u and v as input. 

    **OUTPUT**

    returns distance function (as function)

    **HISTORY**

    :Created: Dec 2016, WHT
    :Updated (v0.2.1): Aug 2017, WHT. Changed from distance functions being in misc to using scipy. 

    """

    distance_options = {
        'braycurtis': distance.braycurtis,
        'canberra': distance.canberra,
        'chebyshev': distance.chebyshev,
        'cityblock': distance.cityblock,
        'correlation': distance.correlation,
        'cosine': distance.cosine,
        'euclidean': distance.euclidean,
        'sqeuclidean': distance.sqeuclidean,
        'dice': distance.dice,
        'hamming': distance.hamming,
        'jaccard': distance.jaccard,
        'kulsinski': distance.kulsinski,
        'matching': distance.matching,
        'rogerstanimoto': distance.rogerstanimoto,
        'russellrao': distance.russellrao,
        'sokalmichener': distance.sokalmichener,
        'sokalsneath': distance.sokalsneath,
        'yule': distance.yule,
    }

    if requested_metric in distance_options: 
        return distance_options[requested_metric]
    else:
        raise ValueError('Distance function cannot be found.')