我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用scipy.spatial.distance.hamming()。
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj] rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj] clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels, num_training_samples=num_epochs_per_subj*(num_subjects-1)) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[0:num_epochs_per_subj] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels))
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) num_training_samples=num_epochs_per_subj*(num_subjects-1) clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])), labels[0:num_training_samples]) X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:])) predict = clf.predict(X) print(predict) print(clf.decision_function(X)) test_labels = labels[num_training_samples:] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(X, test_labels))
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) num_training_samples=num_epochs_per_subj*(num_subjects-1) clf.fit(list(zip(raw_data, raw_data2)), labels, num_training_samples=num_training_samples) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[num_training_samples:] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels)) # python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
def compute_clients_dist(self, client_data): client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES] client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES] # Compute the distances between the user and the cached continuous # and categorical features. cont_features = distance.cdist(self.continuous_features, np.array([client_continuous_feats]), 'canberra') # The lambda trick is needed to prevent |cdist| from force-casting the # string features to double. cat_features = distance.cdist(self.categorical_features, np.array([client_categorical_feats]), lambda x, y: distance.hamming(x, y)) # Take the product of similarities to attain a univariate similarity score. # Addition of 0.001 to the continuous features avoids a zero value from the # categorical variables, allowing categorical features precedence. return (cont_features + 0.001) * cat_features
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) w = np.array([1, 3]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, 1 - y2), 1) assert_equal(hamming_loss(y1, 1 - y1), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12) assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12) assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3) # sp_hamming only works with 1-D arrays assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj): # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation for i in range(num_subjects): leave_start = i * num_epochs_per_subj leave_end = (i+1) * num_epochs_per_subj training_data = raw_data[0:leave_start] + raw_data[leave_end:] test_data = raw_data[leave_start:leave_end] training_labels = labels[0:leave_start] + labels[leave_end:] test_labels = labels[leave_start:leave_end] clf.fit(list(zip(training_data, training_data)), training_labels) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') predict = clf.predict(list(zip(test_data, test_data))) print(predict) print(clf.decision_function(list(zip(test_data, test_data)))) incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' % (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) print(clf.score(list(zip(test_data, test_data)), test_labels))
def similarity_function(x, y): """ Similarity function for comparing user features. This actually really should be implemented in taar.similarity_recommender and then imported here for consistency. """ def safe_get(field, row, default_value): # Safely get a value from the Row. If the value is None, get the # default value. return row[field] if row[field] is not None else default_value # Extract the values for the categorical and continuous features for both # the x and y samples. Use an empty string as the default value for missing # categorical fields and 0 for the continuous ones. x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES] x_continuous_features = [safe_get(k, x, 0) for k in CONTINUOUS_FEATURES] y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES] y_continuous_features = [safe_get(k, y, 0) for k in CONTINUOUS_FEATURES] # Here a larger distance indicates a poorer match between categorical variables. j_d = (distance.hamming(x_categorical_features, y_categorical_features)) j_c = (distance.canberra(x_continuous_features, y_continuous_features)) # Take the product of similarities to attain a univariate similarity score. # Add a minimal constant to prevent zero values from categorical features. # Note: since both the distance function return a Numpy type, we need to # call the |item| function to get the underlying Python type. If we don't # do that this job will fail when performing KDE due to SPARK-20803 on # Spark 2.2.0. return abs((j_c + 0.001) * j_d).item()
def check_distance_funciton_input(distance_func_name,netinfo): """ Funciton returns distance_func_name given netinfo. """ if distance_func_name == 'default' and netinfo['nettype'][0] == 'b': print('Default distance funciton specified. As network is binary, using Hamming') distance_func_name = 'hamming' elif distance_func_name == 'default' and netinfo['nettype'][0] == 'w': distance_func_name = 'euclidean' print( 'Default distance funciton specified. ' 'As network is weighted, using Euclidean') return distance_func_name
def plot_trajectory_uncertainty(true, gen, filter, smooth, filename): sequences, timesteps, h, w = true.shape errors = dict(Generated=list(), Filtered=list(), Smoothed=list()) for label, var in zip(('Generated', 'Filtered', 'Smoothed'), (gen, filter, smooth)): for step in range(timesteps): errors[label].append(hamming(true[:, step].ravel() > 0.5, var[:, step].ravel() > 0.5)) plt.plot(np.linspace(1, timesteps, num=timesteps).astype(int), errors[label], linewidth=3, ms=20, label=label) plt.xlabel('Steps', fontsize=20) plt.ylabel('Hamming distance', fontsize=20) plt.legend(fontsize=20) plt.savefig(filename) plt.close()
def sample(config, model=None): """ Create sampler and sample per options in configuration file. If there is a configuration option 'load_model_from_pickle' in section 'sampling' the function tries to load that model, ignoring the data argument (the option value shold be the path to a pickle file containing a single LogisticRuleModel object.) If the model argument is None, and that configuration option is not present, an exception results. """ if config.has_option('sampling','load_model_from_pickle'): with open(config.get('sampling','load_model_from_pickle')) as f: model = pickle.load(f) if model is None: raise ValueError('Model must be passed as argument if not specified in config file.') l = [hamming(model.data.y,t) for t in model.rule_population.flat_truth] arbitrary_rl = rules.RuleList( [[model.rule_population.flat_rules[np.argmin(l)]]] ) sampler = logit_rules.LogisticRuleSampler(model, arbitrary_rl) if config.has_option('sampling','sampling_time'): sampling_time = config.getfloat('sampling','sampling_time') logger.info('Starting sampling: will continue for %.1f seconds' % sampling_time) sampler.sample_for(sampling_time) elif config.has_option('sampling','total_samples'): total_samples = config.getint('sampling','total_samples') logger.info('Starting to draw %d samples' % total_samples) sampler.sample(total_samples) else: raise ValueError('Either number of samples or sampling time must be specified.') if config.has_option('sampling', 'pickle_sampler'): prefix = config.get('description','tag') if config.getboolean('sampling','pickle_sampler'): filename = prefix + '_sampler_object.pickle' with open(filename, 'w') as f: pickle.dump(sampler,f) logger.info('Sampler written to %s' % filename) return sampler
def getDistanceFunction(requested_metric): """ This function returns a specified distance function. **PARAMETERS** :'requested_metric': can be 'hamming', 'eculidean' or any of the functions in https://docs.scipy.org/doc/scipy/reference/spatial.distance.html which only require u and v as input. **OUTPUT** returns distance function (as function) **HISTORY** :Created: Dec 2016, WHT :Updated (v0.2.1): Aug 2017, WHT. Changed from distance functions being in misc to using scipy. """ distance_options = { 'braycurtis': distance.braycurtis, 'canberra': distance.canberra, 'chebyshev': distance.chebyshev, 'cityblock': distance.cityblock, 'correlation': distance.correlation, 'cosine': distance.cosine, 'euclidean': distance.euclidean, 'sqeuclidean': distance.sqeuclidean, 'dice': distance.dice, 'hamming': distance.hamming, 'jaccard': distance.jaccard, 'kulsinski': distance.kulsinski, 'matching': distance.matching, 'rogerstanimoto': distance.rogerstanimoto, 'russellrao': distance.russellrao, 'sokalmichener': distance.sokalmichener, 'sokalsneath': distance.sokalsneath, 'yule': distance.yule, } if requested_metric in distance_options: return distance_options[requested_metric] else: raise ValueError('Distance function cannot be found.')