我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用sklearn.metrics.mutual_info_score()。
def calc_mutual_information(x, y, bins): try: if bins == -1: bins = doane_bin(x) if bins == np.inf: bins = sturges_bin(x) except ValueError: bins = 10.0 # print "bins", bins try: c_xy = np.histogram2d(x, y, bins)[0] mi = metrics.mutual_info_score(None, None, contingency=c_xy) # print "success" except Exception,e: print "error with mi calc", str(e) mi = 0 return mi
def calc(gr_truth, predicted): # precision, recall, fscore, _ = score(gr_truth, predicted, average='micro') # print('precision: {}'.format(precision)) # print('recall: {}'.format(recall)) # print('fscore: {}'.format(fscore)) # print('jaccard: {}'.format(jaccard_similarity_score(gr_truth, predicted, normalize=True))) # print('mutual: {}'.format(mutual_info_score(gr_truth, predicted))) # print('mutual adj: {}'.format(adjusted_mutual_info_score(gr_truth, predicted))) # print('mutual norm: {}'.format(normalized_mutual_info_score(gr_truth, predicted))) return normalized_mutual_info_score(gr_truth, predicted)
def mutual_info_rank_features(feature_vecs, binary_labels): """ Given a set of feature vectors and binary labels, return the list of indices of the features ranked by mutual information with the binary labels. Args: feature_vecs: list of feature vectors binary_labels: list of binary labels """ # Convert Features to Boolean values bin_feature_vecs = [] for feature_v in feature_vecs: nfv = [] for elem in feature_v: if elem > 0: nfv.append(1) else: nfv.append(0) bin_feature_vecs.append(nfv) mutual_infos = [] num_features = len(bin_feature_vecs[0]) for i in range(num_features): row_i = [x[i] for x in bin_feature_vecs] mi = mutual_info_score(row_i, binary_labels) mutual_infos.append(mi) ranked_indices = [index for (mi,index) in sorted(zip(mutual_infos,[x for x in range(num_features)]))] return ranked_indices
def __init__(self, X, Y, method="ICAP"): """ This class provides easy access to mutual information based filter feature selection. The default mutual information estimation algorithm used is the histogram binning method. If a more sophisticated approach is required, use the change_MI_estimator function to apply your own method. :param X: (n_samples, n_features) numpy array containing the training data :param Y: (n_samples) numpy array containing target labels :param method: filter criterion that will be applied to select the features. Available criteria are: (as string) "CIFE" [Lin1996], "ICAP" [Jakulin2005], "CMIM" [Fleuret2004], "JMI"[Yang1999] """ if X.shape[0] != len(Y): raise ValueError("X must have as many samples as there are labels in Y") self._n_features = X.shape[1] def normalize_data_for_MI(X): for i in range(X.shape[1]): std = X[:, i].std() if std != 0.: X[:, i] /= std X[:, i] -= X[:, i].min() return np.floor(X).astype("int") self._X = normalize_data_for_MI(np.asarray(X)) self._Y = np.asarray(Y) self._method_str = method self._methods = { "CIFE": self.__J_CIFE, "ICAP": self.__J_ICAP, "CMIM": self.__J_CMIM, "JMI": self.__J_JMI, "mRMR": self.__J_mRMR, "MIFS": self.__J_MIFS } self._filter_criterion_kwargs = {} self.change_method(method) self._method = self._methods[method] self._mutual_information_estimator = lambda X1, X2: mutual_info_score(X1,X2)/np.log(2.0) self._redundancy = np.zeros((self._n_features, self._n_features)) - 1. self._relevancy = np.zeros((self._n_features)) - 1 self._class_cond_red = np.zeros((self._n_features, self._n_features)) - 1 self._class_cond_mi_method = self._calculate_class_conditional_MI