我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用scipy.maximum()。
def bac_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized balanced accuracy. The binarization and the normalization differ for the multi-label and multi-class case. ''' label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum (eps, tp) pos_num = sp.maximum (eps, tp+fn) tpr = tp / pos_num # true positive rate (sensitivity) if (task != 'multiclass.classification') or (label_num==1): tn = sp.maximum (eps, tn) neg_num = sp.maximum (eps, tn+fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5*(tpr + tnr) base_bac = 0.5 # random predictions for binary case else: bac = tpr base_bac = 1./label_num # random predictions for multiclass case bac = mvmean(bac) # average over all classes # Normalize: 0 for random, 1 for perfect score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac)) return score
def prior_log_loss(frac_pos, task = 'binary.classification'): ''' Baseline log loss. For multiplr classes ot labels return the volues for each column''' eps = 1e-15 frac_pos_ = sp.maximum (eps, frac_pos) if (task != 'multiclass.classification'): # binary case frac_neg = 1-frac_pos frac_neg_ = sp.maximum (eps, frac_neg) pos_class_log_loss_ = - frac_pos * np.log(frac_pos_) neg_class_log_loss_ = - frac_neg * np.log(frac_neg_) base_log_loss = pos_class_log_loss_ + neg_class_log_loss_ # base_log_loss = mvmean(base_log_loss) # print('binary {}'.format(base_log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # multiclass case fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case # Only ONE label is 1 in the multiclass case active for each line pos_class_log_loss_ = - frac_pos * np.log(fp) base_log_loss = np.sum(pos_class_log_loss_) return base_log_loss # sklearn implementations for comparison
def pac_metric (solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag=False [sample_num, label_num] = solution.shape if label_num==1: task='binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1.* sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k,:] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss-base_log_loss)) if len(diff.shape)>0: diff=max(diff) if(diff)>1e-10: print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def log_loss(solution, prediction, task = 'binary.classification'): ''' Log loss for binary and multiclass. ''' [sample_num, label_num] = solution.shape eps = 1e-15 pred = np.copy(prediction) # beware: changes in prediction occur through this sol = np.copy(solution) if (task == 'multiclass.classification') and (label_num>1): # Make sure the lines add up to one for multi-class classification norma = np.sum(prediction, axis=1) for k in range(sample_num): pred[k,:] /= sp.maximum (norma[k], eps) # Make sure there is a single label active per line for multi-class classification sol = binarize_predictions(solution, task='multiclass.classification') # For the base prediction, this solution is ridiculous in the multi-label case # Bounding of predictions to avoid log(0),1/0,... pred = sp.minimum (1-eps, sp.maximum (eps, pred)) # Compute the log loss pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0) if (task != 'multiclass.classification') or (label_num==1): # The multi-label case is a bunch of binary problems. # The second class is the negative class for each column. neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0) log_loss = pos_class_log_loss + neg_class_log_loss # Each column is an independent problem, so we average. # The probabilities in one line do not add up to one. # log_loss = mvmean(log_loss) # print('binary {}'.format(log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # For the multiclass case the probabilities in one line add up one. log_loss = pos_class_log_loss # We sum the contributions of the columns. log_loss = np.sum(log_loss) #print('multiclass {}'.format(log_loss)) return log_loss
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
def open_file(maindir): """ Creates the digital RF reading object. Args: maindir (:obj:'str'): The directory where the data is located. Returns: drfObj (obj:"DigitalRFReader"): Digital RF Reader object. chandict (obj:"dict"): Dictionary that holds info for the channels. start_indx (obj:'long'): Start index in samples. end_indx (obj:'long'): End index in samples. """ mainpath = os.path.expanduser(maindir) drfObj = drf.DigitalRFReader(mainpath) chans = drfObj.get_channels() chandict={} start_indx, end_indx=[0, sp.inf] # Get channel info for ichan in chans: curdict = {} curdict['sind'], curdict['eind'] = drfObj.get_bounds(ichan) # determine the read boundrys assuming the sampling is the same. start_indx = sp.maximum(curdict['sind'], start_indx) end_indx = sp.minimum(curdict['eind'], end_indx) dmetadict = drfObj.read_metadata(start_indx, end_indx, ichan) dmetakeys = dmetadict.keys() curdict['sps'] = dmetadict[dmetakeys[0]]['samples_per_second'] curdict['fo'] = dmetadict[dmetakeys[0]]['center_frequencies'].ravel()[0] chandict[ichan] = curdict return (drfObj, chandict, start_indx, end_indx)
def loglossl(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
def my_logloss(act, pred): epsilon = 1e-15 pred = K.maximum(epsilon, pred) pred = K.minimum(1 - epsilon, pred) ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred)) ll = ll * -1.0 / K.shape(act)[0] return ll
def logloss(act, pred): ''' ???????? :param act: :param pred: :return: ''' epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
def acc_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Compute the accuracy. Get the accuracy stats acc = (tpr + fpr) / (tn + fp + tp + fn) Normalize, so 1 is the best and zero mean random... :param solution: :param prediction: :param task: :return: """ label_num = solution.shape[1] bin_predictions = binarize_predictions(prediction, task) tn, fp, tp, fn = acc_stat(solution, bin_predictions) # Bounding to avoid division by 0 eps = np.float(1e-15) tp = np.sum(tp) fp = np.sum(fp) tn = np.sum(tn) fn = np.sum(fn) if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1): accuracy = (np.sum(tp) + np.sum(tn)) / ( np.sum(tp) + np.sum(fp) + np.sum(tn) + np.sum(fn) ) else: accuracy = np.sum(tp) / (np.sum(tp) + np.sum(fp)) if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1): base_accuracy = 0.5 # random predictions for binary case else: base_accuracy = 1. / label_num # Normalize: 0 for random, 1 for perfect score = (accuracy - base_accuracy) / sp.maximum(eps, (1 - base_accuracy)) return score
def bac_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Compute the normalized balanced accuracy. The binarization and the normalization differ for the multi-label and multi-class case. :param solution: :param prediction: :param task: :return: """ label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn, fp, tp, fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum(eps, tp) pos_num = sp.maximum(eps, tp + fn) tpr = tp / pos_num # true positive rate (sensitivity) if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1): tn = sp.maximum(eps, tn) neg_num = sp.maximum(eps, tn + fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5 * (tpr + tnr) base_bac = 0.5 # random predictions for binary case else: bac = tpr base_bac = 1. / label_num # random predictions for multiclass case bac = np.mean(bac) # average over all classes # Normalize: 0 for random, 1 for perfect score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac)) return score
def log_loss(solution, prediction, task=BINARY_CLASSIFICATION): """Log loss for binary and multiclass.""" [sample_num, label_num] = solution.shape eps = 1e-15 pred = np.copy(prediction) # beware: changes in prediction occur through this sol = np.copy(solution) if (task == MULTICLASS_CLASSIFICATION) and (label_num > 1): # Make sure the lines add up to one for multi-class classification norma = np.sum(prediction, axis=1) for k in range(sample_num): pred[k, :] /= sp.maximum(norma[k], eps) # Make sure there is a single label active per line for multi-class # classification sol = binarize_predictions(solution, task=MULTICLASS_CLASSIFICATION) # For the base prediction, this solution is ridiculous in the # multi-label case # Bounding of predictions to avoid log(0),1/0,... pred = sp.minimum(1 - eps, sp.maximum(eps, pred)) # Compute the log loss pos_class_log_loss = -np.mean(sol * np.log(pred), axis=0) if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1): # The multi-label case is a bunch of binary problems. # The second class is the negative class for each column. neg_class_log_loss = -np.mean((1 - sol) * np.log(1 - pred), axis=0) log_loss = pos_class_log_loss + neg_class_log_loss # Each column is an independent problem, so we average. # The probabilities in one line do not add up to one. # log_loss = mvmean(log_loss) # print('binary {}'.format(log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # For the multiclass case the probabilities in one line add up one. log_loss = pos_class_log_loss # We sum the contributions of the columns. log_loss = np.sum(log_loss) # print('multiclass {}'.format(log_loss)) return log_loss
def logloss(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y*sp.log(p) + sp.subtract(1,y)*sp.log(sp.subtract(1,p))) res *= -1.0/len(y) return res
def logloss(act, preds): epsilon = 1e-15 preds = sp.maximum(epsilon, preds) preds = sp.minimum(1 - epsilon, preds) ll = sum(act * sp.log(preds) + sp.subtract(1, act) * sp.log(sp.subtract(1, preds))) ll = ll * -1.0 / len(act) return ll
def nms(dets,proba, T): dets = dets.astype("float") if len(dets) == 0: return [] x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = proba areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = sp.maximum(x1[i], x1[order[1:]]) yy1 = sp.maximum(y1[i], y1[order[1:]]) xx2 = sp.minimum(x2[i], x2[order[1:]]) yy2 = sp.minimum(y2[i], y2[order[1:]]) w = sp.maximum(0.0, xx2 - xx1 + 1) h = sp.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = sp.where(ovr <= T)[0] order = order[inds + 1] return keep
def self_eval(pred,train_data): ''' :pred :train_data ?? labels ''' try: labels=train_data.get_label() except: labels=train_data epsilon = 1e-15 pred = np.maximum(epsilon, pred) pred = np.minimum(1-epsilon,pred) ll = sum(labels*np.log(pred) + (1 - labels)*np.log(1 - pred)) ll = ll * (-1.0)/len(labels) return 'log loss', ll, False
def f1_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized f1 measure. The binarization differs for the multi-label and multi-class case. A non-weighted average over classes is taken. The score is normalized.''' label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 true_pos_num = sp.maximum (eps, tp+fn) found_pos_num = sp.maximum (eps, tp+fp) tp = sp.maximum (eps, tp) tpr = tp / true_pos_num # true positive rate (recall) ppv = tp / found_pos_num # positive predictive value (precision) arithmetic_mean = 0.5 * sp.maximum (eps, tpr+ppv) # Harmonic mean: f1 = tpr*ppv/arithmetic_mean # Average over all classes f1 = mvmean(f1) # Normalize: 0 for random, 1 for perfect if (task != 'multiclass.classification') or (label_num==1): # How to choose the "base_f1"? # For the binary/multilabel classification case, one may want to predict all 1. # In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos) # frac_pos = mvmean(solution.ravel()) # base_f1 = 2 * frac_pos / (1+frac_pos) # or predict random values with probability 0.5, in which case # base_f1 = 0.5 # the first solution is better only if frac_pos > 1/3. # The solution in which we predict according to the class prior frac_pos gives # f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5 # So, because the f1 score is used if frac_pos is small (typically <0.1) # the best is to assume that base_f1=0.5 base_f1 = 0.5 # For the multiclass case, this is not possible (though it does not make much sense to # use f1 for multiclass problems), so the best would be to assign values at random to get # tpr=ppv=frac_pos, where frac_pos=1/label_num else: base_f1=1./label_num score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1)) return score
def outlier_removed_fit(m, w = None, n_iter=10, polyord=7): """ Remove outliers using fited data. Args: m (:obj:`numpy array`): Phase curve. n_iter (:obj:'int'): Number of iteration outlier removal polyorder (:obj:'int'): Order of polynomial used. Returns: fit (:obj:'numpy array'): Curve with outliers removed """ if w is None: w = sp.ones_like(m) W = sp.diag(sp.sqrt(w)) m2 = sp.copy(m) tv = sp.linspace(-1, 1, num=len(m)) A = sp.zeros([len(m), polyord]) for j in range(polyord): A[:, j] = tv**(float(j)) A2 = sp.dot(W,A) m2w = sp.dot(m2,W) fit = None for i in range(n_iter): xhat = sp.linalg.lstsq(A2, m2w)[0] fit = sp.dot(A, xhat) # use gradient for central finite differences which keeps order resid = sp.gradient(fit - m2) std = sp.std(resid) bidx = sp.where(sp.absolute(resid) > 2.0*std)[0] for bi in bidx: A2[bi,:]=0.0 m2[bi]=0.0 m2w[bi]=0.0 if debug_plot: plt.plot(m2,label="outlier removed") plt.plot(m,label="original") plt.plot(fit,label="fit") plt.legend() plt.ylim([sp.minimum(fit)-std*3.0,sp.maximum(fit)+std*3.0]) plt.show() return(fit)
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array. :param solution: :param prediction: :param task: :return: """ debug_flag = False sample_num, label_num = solution.shape if label_num == 1: task = BINARY_CLASSIFICATION eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1. * sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k, :] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss - base_log_loss)) if len(diff.shape) > 0: diff = max(diff) if (diff) > 1e-10: print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = np.mean(np.exp(-the_log_loss)) base_pac = np.mean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def f1_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Compute the normalized f1 measure. The binarization differs for the multi-label and multi-class case. A non-weighted average over classes is taken. The score is normalized. :param solution: :param prediction: :param task: :return: """ label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn, fp, tp, fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 true_pos_num = sp.maximum(eps, tp + fn) found_pos_num = sp.maximum(eps, tp + fp) tp = sp.maximum(eps, tp) tpr = tp / true_pos_num # true positive rate (recall) ppv = tp / found_pos_num # positive predictive value (precision) arithmetic_mean = 0.5 * sp.maximum(eps, tpr + ppv) # Harmonic mean: f1 = tpr * ppv / arithmetic_mean # Average over all classes f1 = np.mean(f1) # Normalize: 0 for random, 1 for perfect if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1): # How to choose the "base_f1"? # For the binary/multilabel classification case, one may want to predict all 1. # In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos) # frac_pos = mvmean(solution.ravel()) # base_f1 = 2 * frac_pos / (1+frac_pos) # or predict random values with probability 0.5, in which case # base_f1 = 0.5 # the first solution is better only if frac_pos > 1/3. # The solution in which we predict according to the class prior frac_pos gives # f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5 # So, because the f1 score is used if frac_pos is small (typically <0.1) # the best is to assume that base_f1=0.5 base_f1 = 0.5 # For the multiclass case, this is not possible (though it does not make much sense to # use f1 for multiclass problems), so the best would be to assign values at random to get # tpr=ppv=frac_pos, where frac_pos=1/label_num else: base_f1 = 1. / label_num score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1)) return score