我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.nanmean()。
def do_seg_tests(net, iter, save_format, dataset, layer='score', gt='label'): n_cl = net.blobs[layer].channels if save_format: save_format = save_format.format(iter) hist, loss = compute_hist(net, save_format, dataset, layer, gt) # mean loss print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss # overall accuracy acc = np.diag(hist).sum() / hist.sum() print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc # per-class accuracy acc = np.diag(hist) / hist.sum(1) print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc) # per-class IU iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu) freq = hist.sum(1) / hist.sum() print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \ (freq[freq > 0] * iu[freq > 0]).sum() return hist
def get_scores(self): """Returns accuracy score evaluation result. - overall accuracy - mean accuracy - mean IU - fwavacc """ hist = self.confusion_matrix acc = np.diag(hist).sum() / hist.sum() acc_cls = np.diag(hist) / hist.sum(axis=1) acc_cls = np.nanmean(acc_cls) iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) mean_iu = np.nanmean(iu) freq = hist.sum(axis=1) / hist.sum() fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() cls_iu = dict(zip(range(self.n_classes), iu)) return {'Overall Acc: \t': acc, 'Mean Acc : \t': acc_cls, 'FreqW Acc : \t': fwavacc, 'Mean IoU : \t': mean_iu,}, cls_iu
def getOverallResults(self): if self.multilabel: accs = [0] * len(self.optimize_labels) aucs = [0] * len(self.optimize_labels) f1s = [0] * len(self.optimize_labels) precisions = [0] * len(self.optimize_labels) recalls = [0] * len(self.optimize_labels) for i in range(len(self.optimize_labels)): accs[i] = self.training_val_results['acc'][self.optimize_labels[i]][-1] aucs[i] = self.training_val_results['auc'][self.optimize_labels[i]][-1] f1s[i] = self.training_val_results['f1'][self.optimize_labels[i]][-1] precisions[i] = self.training_val_results['precision'][self.optimize_labels[i]][-1] recalls[i] = self.training_val_results['recall'][self.optimize_labels[i]][-1] return np.nanmean(accs), np.nanmean(aucs), np.nanmean(f1s), np.nanmean(precisions), np.nanmean(recalls) else: acc = self.training_val_results['acc'][-1] auc = self.training_val_results['auc'][-1] f1 = self.training_val_results['f1'][-1] precision = self.training_val_results['precision'][-1] recall = self.training_val_results['recall'][-1] return acc, auc, f1, precision, recall
def getOverallResults(self, average_over_tasks=False): if average_over_tasks: accs = [0] * len(self.optimize_labels) aucs = [0] * len(self.optimize_labels) f1s = [0] * len(self.optimize_labels) precisions = [0] * len(self.optimize_labels) recalls = [0] * len(self.optimize_labels) for i in range(len(self.optimize_labels)): accs[i] = self.training_val_results_per_task['acc'][self.optimize_labels[i]][-1] aucs[i] = self.training_val_results_per_task['auc'][self.optimize_labels[i]][-1] f1s[i] = self.training_val_results_per_task['f1'][self.optimize_labels[i]][-1] precisions[i] = self.training_val_results_per_task['precision'][self.optimize_labels[i]][-1] recalls[i] = self.training_val_results_per_task['recall'][self.optimize_labels[i]][-1] return np.nanmean(accs), np.nanmean(aucs), np.nanmean(f1s), np.nanmean(precisions), np.nanmean(recalls) else: acc = self.training_val_results['acc'][-1] auc = self.training_val_results['auc'][-1] f1 = self.training_val_results['f1'][-1] precision = self.training_val_results['precision'][-1] recall = self.training_val_results['recall'][-1] return acc, auc, f1, precision, recall
def getValidationResults(self, results_dict): self.classifier.trainUntilConverged() results_dict['num_clusters'] = self.classifier.K if self.users_as_tasks: val_acc, val_auc = self.getAccuracyAucOnAllTasks(self.val_tasks) results_dict['val_acc'] = val_acc results_dict['val_auc'] = val_auc else: accs = [] aucs = [] for t in range(self.n_tasks): acc, auc = self.getAccuracyAucOnOneTask(self.val_tasks, t) task_name = self.val_tasks[t]['Name'] results_dict['TaskAcc-' + helper.getFriendlyLabelName(task_name)] = acc results_dict['TaskAuc-' + helper.getFriendlyLabelName(task_name)] = auc if task_name in self.optimize_labels: accs.append(acc) aucs.append(auc) results_dict['val_acc'] = np.nanmean(accs) results_dict['val_auc'] = np.nanmean(aucs) return results_dict
def CaseInterpreter(overlap, NumSNPs, topHits, probScore): overlap_thres = 0.5 num_lines = len(probScore) case = 10 if len(topHits) == 1: case = 0 note = "Unique hit" elif np.nanmean(probScore[topHits]) > prob_thres: case = 2 note = "Ambiguous sample: Accessions in top hits can be really close" elif overlap > overlap_thres: case = 3 note = "Ambiguous sample: Sample might contain mixture of DNA or contamination" elif overlap < overlap_thres: case = 4 note = "Ambiguous sample: Overlap of SNPs is very low, sample may not be in database" if case > 4: case = 1 note = "Ambiguous sample" return (case, note)
def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''): cmap = sns.cubehelix_palette(as_cmap=True, light=.9) m, nv = mis.shape for j in range(m): inds = np.argsort(- mis[j, :])[:topk] if len(inds) >= 2: plt.clf() order = np.argsort(cont[:,j]) subdata = data[:, inds][order].T subdata -= np.nanmean(subdata, axis=1, keepdims=True) subdata /= np.nanstd(subdata, axis=1, keepdims=True) columns = [column_label[i] for i in inds] sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata)) filename = '{}/heatmaps/group_num={}.png'.format(prefix, j) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) plt.title("Latent factor {}".format(j)) plt.yticks(rotation=0) plt.savefig(filename, bbox_inches='tight') plt.close('all') #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j], # outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1)
def all_pairs_normalized_distances_reference(X): """ Reference implementation of normalized all-pairs distance, used for testing the more efficient implementation above for equivalence. """ n_samples, n_cols = X.shape # matrix of mean squared difference between between samples D = np.ones((n_samples, n_samples), dtype="float32") * np.inf for i in range(n_samples): diffs = X - X[i, :].reshape((1, n_cols)) missing_diffs = np.isnan(diffs) missing_counts_per_row = missing_diffs.sum(axis=1) valid_rows = missing_counts_per_row < n_cols D[i, valid_rows] = np.nanmean( diffs[valid_rows, :] ** 2, axis=1) return D
def label_accuracy_score(label_trues, label_preds, n_class): """Returns accuracy score evaluation result. - overall accuracy - mean accuracy - mean IU - fwavacc """ hist = np.zeros((n_class, n_class)) for lt, lp in zip(label_trues, label_preds): hist += _fast_hist(lt.flatten(), lp.flatten(), n_class) acc = np.diag(hist).sum() / hist.sum() acc_cls = np.diag(hist) / hist.sum(axis=1) acc_cls = np.nanmean(acc_cls) iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) mean_iu = np.nanmean(iu) freq = hist.sum(axis=1) / hist.sum() fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() return acc, acc_cls, mean_iu, fwavacc # ----------------------------------------------------------------------------- # Visualization # -----------------------------------------------------------------------------
def plot(self, data, size, newdata=None): assert size == 1 newvalues = None R = np.array([np.nan] + [abs(data[i] - data[i + 1]) for i in range(len(data) - 1)]) if newdata: newdata = data[-1:] + newdata n = len(newdata) newvalues = [abs(newdata[i] - newdata[i + 1]) for i in range(n - 1)] Rbar = np.nanmean(R) lclr = D3[2] * Rbar uclr = D4[2] * Rbar return (R, Rbar, lclr, uclr, self._title)
def plot(self, data, size, newdata=None): assert size == 1 newvalues = None R = np.array([np.nan] + [abs(data[i] - data[i + 1]) for i in range(len(data) - 1)]) if newdata: newvalues = newdata Rbar = np.nanmean(R) Xbar = np.mean(data) lclx = Xbar - 3 * (Rbar / d2[2]) uclx = Xbar + 3 * (Rbar / d2[2]) return (data, Xbar, lclx, uclx, self._title)
def hourly_wind_speed(wind_speeds, times): """Average wind speed over hours and return a 1x24 numpy array. Arguments: wind_speeds -- a np array of all wind speeds times -- a np array of all times with indexes corresponding to wind_speeds """ avg_hourly_ws = [] new_times = [] hours = np.array([t.hour for t in times]) #Make an array of just the hours. for i in range(24): avg_hourly_ws.append(np.nanmean(wind_speeds[hours == i])) new_times.append(i) return np.array(new_times), np.array(avg_hourly_ws) #Return the wind speeds and their corresponding times as a NumPy array #Gets average wind dir for each hour of the day (returns 24h averaged over multiple days)
def extract_data(self): raw_edf = mne.io.read_raw_edf(self.filename, stim_channel='auto') raw_edf.load_data() # correct nan values data = raw_edf.get_data() # do not correct stimulus channel assert raw_edf.ch_names[-1] == 'STI 014' for i_chan in range(data.shape[0] - 1): # first set to nan, than replace nans by nanmean. this_chan = data[i_chan] data[i_chan] = np.where(this_chan == np.min(this_chan), np.nan, this_chan) mask = np.isnan(data[i_chan]) chan_mean = np.nanmean(data[i_chan]) data[i_chan, mask] = chan_mean gdf_events = raw_edf.find_edf_events() raw_edf = mne.io.RawArray(data, raw_edf.info, verbose='WARNING') # remember gdf events raw_edf.info['gdf_events'] = gdf_events return raw_edf
def load_dataset(): if(not os.path.exists("./dataset/training.csv")): print("dataset does not exist") raise Exception #load dataset labeled_image = pd.read_csv("./dataset/training.csv") #preprocessing dataframe image = np.array(labeled_image["Image"].values).reshape(-1,1) image = np.apply_along_axis(lambda img: (img[0].split()),1,image) image = image.astype(np.int32) #because train_img elements are string before preprocessing image = image.reshape(-1,96*96) # data 96 * 96 size image label = labeled_image.values[:,:-1] label = label.astype(np.float32) #nan value to mean value col_mean = np.nanmean(label, axis=0) indices = np.where(np.isnan(label)) label[indices] = np.take(col_mean, indices[1]) return image, label
def on_episode_end(self, episode, logs): duration = timeit.default_timer() - self.starts[episode] metrics = self.metrics[episode] if np.isnan(metrics).all(): mean_metrics = np.array([np.nan for _ in self.metrics_names]) else: mean_metrics = np.nanmean(metrics, axis=0) assert len(mean_metrics) == len(self.metrics_names) data = list(zip(self.metrics_names, mean_metrics)) data += list(logs.items()) data += [('episode', episode), ('duration', duration)] for key, value in data: if key not in self.data: self.data[key] = [] self.data[key].append(value) if self.interval is not None and episode % self.interval == 0: self.save_data() # Clean up. del self.metrics[episode] del self.starts[episode]
def timeline_aggregate_plot(padded, title='', cmap="jet", plot=True): fig, ax = plt.subplots(ncols=2, nrows=2, sharex=True, sharey=False, figsize=(12, 8)) fig, ax[0] = timeline_plot( padded, title, cmap=cmap, plot=False, fig=fig, ax=ax[0]) ax[1, 0].plot(np.nanmean(padded, axis=0), lw=0.5, c='black', drawstyle='steps-post') ax[1, 0].set_title('mean/timestep') padded = tr.right_pad_to_left_pad(padded) ax[1, 1].plot(np.nanmean(padded, axis=0), lw=0.5, c='black', drawstyle='steps-post') ax[1, 1].set_title('mean/timestep') fig.suptitle(title, fontsize=14) if plot: fig.show() return None, None else: return fig, ax
def test_run_roi_stats_via_API(): "Tests whether roi stats can be computed (not their accuracy) and the return values match in size." summary_methods = ['median', 'mean', 'std', 'variation', 'entropy', 'skew', 'kurtosis'] # 'mode' returns more than one value; 'gmean' requires only positive values, # 'hmean' can not always be computed from scipy.stats import trim_mean, kstat from functools import partial trimmed_mean = partial(trim_mean, proportiontocut=0.05) third_kstat = partial(kstat, n=3) summary_methods.extend([trimmed_mean, third_kstat]) # checking support for nan-handling callables summary_methods.extend([np.nanmedian, np.nanmean]) for summary_method in summary_methods: roi_medians = graynet.roiwise_stats_indiv(subject_id_list, fs_dir, base_feature=base_feature, chosen_roi_stats=summary_method, atlas=atlas, smoothing_param=fwhm, out_dir=out_dir, return_results=True) for sub in subject_id_list: if roi_medians[sub].size != num_roi_wholebrain: raise ValueError('invalid summary stats - #nodes do not match.')
def add_MACD(data, Ns=[12,26,9]): ''' :param data: DataFrame containing stock price info in the second column :param Ns: List of short term long term EMA to use and look-back window of MACD's EMA :return: ''' symbol = data.columns.values[1] # assuming stock price is in the second column in data MACD = cal_EMA(data.ix[:,[symbol]],N=Ns[0]) - cal_EMA(data.ix[:,[symbol]],N=Ns[1]) data['MACD'] = MACD signal = cal_EMA(data.MACD[Ns[1]:],N=Ns[2]) # # normalized them # MACD = (MACD - np.nanmean(MACD))/(2*np.nanstd(MACD)) # signal = (signal - np.nanmean(signal))/(2*np.nanstd(signal)) data['MACD'] = MACD data['Signal'] = 'NaN' data.loc[Ns[1]:,'Signal'] = signal return data
def add_MACD(data, Ns=None): ''' :param data: DataFrame containing stock price info in the second column :param Ns: List of short term long term EMA to use and look-back window of MACD's EMA :return: ''' if Ns is None: Ns = [12, 26, 9] symbol = data.columns.values[1] # assuming stock price is in the second column in data MACD = cal_EMA(data.loc[:, symbol], N=Ns[0]) - cal_EMA(data.loc[:, symbol], N=Ns[1]) data['MACD'] = MACD signal = cal_EMA(data.MACD[Ns[1]:], N=Ns[2]) # # normalized them # MACD = (MACD - np.nanmean(MACD))/(2*np.nanstd(MACD)) # signal = (signal - np.nanmean(signal))/(2*np.nanstd(signal)) # data['MACD'] = MACD data['Signal'] = 'NaN' data.loc[Ns[1]:, 'Signal'] = signal return data
def mean_spectra(region,line,file_extension,restFreq,spec_param): ''' Sum spectra over entire mapped region Cubes are missing BUNIT header parameter. Fix. ''' filein = '{0}/0{}_{1}_{2}_trim.fits'.format(region,line,file_extension) #add_fits_units(filein,'K') cube = SpectralCube.read(filein) #trim_edge_cube(cube) slice_unmasked = cube.unmasked_data[:,:,:] if line == 'NH3_33': slice_unmasked[spec_param['mask33_chans'][0]:spec_param['mask33_chans'][1],:,:]=0. summed_spectrum = np.nanmean(slice_unmasked,axis=(1,2)) cube2 = cube.with_spectral_unit(u.km/u.s,velocity_convention='radio', rest_value=restFreq*u.GHz) return summed_spectrum, cube2.spectral_axis
def meaniou(self, predictor, predict_dir, image_size): segparams = util.SegParams() classes = segparams.feature_classes().values() num_classes = len(classes) + 1 hist = np.zeros((num_classes, num_classes)) image_names = [filename.strip() for filename in os.listdir( predict_dir) if filename.endswith('.jpg')] for image_filename in image_names: final_prediction_map = predictor.predict( os.path.join(predict_dir, image_filename)) final_prediction_map = final_prediction_map.transpose( 0, 2, 1).squeeze() gt_name = os.path.join(predict_dir, image_filename[:-4] + '_final_mask' + '.png') gt = convert(gt_name, image_size) gt = np.asarray(gt) gt = convert_labels(gt, image_size, image_size) hist += compute_hist(gt, final_prediction_map, num_classes=num_classes) iou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) meaniou = np.nanmean(iou) return meaniou
def get_arg_best(self): best_idx = -1 best_value = sys.maxint for i, trial in enumerate(self.trials): tmp_res = np.NaN if np.isfinite(trial['result']): tmp_res = trial['result'] elif np.isfinite(trial['instance_results']).any(): tmp_res = wrapping_util.nan_mean(trial['instance_results']) # np.nanmean is not available in older numpy versions # tmp_res = scipy.nanmean(trial['instance_results']) else: continue if tmp_res < best_value: best_idx = i best_value = tmp_res if best_idx == -1: raise ValueError("No best value found.") return best_idx # Get the best value so far, for more documentation see get_arg_best
def do_seg_tests(net, iter, save_format, n_dataset, layer='score', gt='label'): print 'do seg tests' print '........................' n_cl = net.blobs[layer].channels if save_format: save_format = save_format.format(iter) hist, loss = compute_hist(net, save_format, n_dataset, layer, gt) # mean loss print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss # overall accuracy acc = np.diag(hist).sum() / hist.sum() print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc # per-class accuracy acc = np.diag(hist) / hist.sum(1) print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc) # per-class IU iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu) freq = hist.sum(1) / hist.sum() print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \ (freq[freq > 0] * iu[freq > 0]).sum() return hist
def lnlike(self, pars): # Pull theta out of pars theta = pars[:self.Nbins] # Generate the inner summation gamma = np.ones_like(self.bin_idx) * np.nan good = (self.bin_idx < self.Nbins) & (self.bin_idx >= 0) # nans in q get put in nonexistent bins gamma[good] = self.Nobs * self.censoring_fcn(self.mcmc_samples[good]) * theta[self.bin_idx[good]] summation = np.nanmean(gamma, axis=1) # Calculate the integral I = self._integral_fcn(theta) # Generate the log-likelihood ll = -I + np.nansum(np.log(summation)) return ll
def summarize_sensitivity(sens_df): """ Summarize the sensitivity analysis by finding the detection rate and average significance as a function of teff and vsini Parameters: =========== - sens_df: pandas DataFrame The DataFrame such as generated by read_hdf5 Returns: ======== A pandas dataframe with the summary """ cols = ['star', 'date', '[Fe/H]', 'logg', 'addmode', 'temperature', 'vsini'] detrate = sens_df.groupby(cols).apply(lambda d: (d.significance > 5).sum() / float(len(d))) detrate = detrate.reset_index().rename(columns={0: 'detrate'}) significance = sens_df.groupby(cols).apply(lambda d: np.nanmean(d.significance)) significance = significance.reset_index().rename(columns={0: 'significance'}) detrate['significance'] = significance['significance'] return detrate
def _print_train_val(self): """ Print training and validation information """ ClassificationSW._print_train_val(self) cur_iter = self._cur_iter cur_round = self._cur_round # display training errors if cur_iter % cfg.TRAIN.TRAIN_FREQ == 0: err_train = self._err_mean print 'Round {}, Iteration {}: training error = {}'.format(cur_round, cur_iter, err_train.mean()) # if self._model_params.model is not None: # print 'err_corr: {}'.format(self._err_corr) # display validation errors if cur_iter % cfg.TRAIN.VAL_FREQ == 0: # perform validation err_val = np.zeros((cfg.TRAIN.VAL_SIZE * cfg.TRAIN.IMS_PER_BATCH, self._num_classes)) for i in xrange(cfg.TRAIN.VAL_SIZE * cfg.TRAIN.IMS_PER_BATCH): self._solver.test_nets[0].forward() err_val[i,:] = (self._solver.test_nets[0].blobs['error'].data > 0.5) err_val = np.nanmean(err_val, axis=0) print 'Round {}, Iteration {}: validation error = {}'.format(cur_round, cur_iter, np.nanmean(err_val))
def _print_train_val(self): """ Print training and validation information """ # evaluate training performance ClassificationSW._print_train_val(self) cur_iter = self._cur_iter cur_round = self._cur_round # display training errors if cur_iter % cfg.TRAIN.TRAIN_FREQ == 0: err_train = self._err_mean print 'Round {}, Iteration {}: training error = {}'.format(cur_round, cur_iter, err_train.mean()) # display validation errors if cur_iter % cfg.TRAIN.VAL_FREQ == 0: # perform validation err_val = np.zeros((cfg.TRAIN.VAL_SIZE * cfg.TRAIN.IMS_PER_BATCH, )) for i in xrange(cfg.TRAIN.VAL_SIZE * cfg.TRAIN.IMS_PER_BATCH): self._solver.test_nets[0].forward() err_val[i,:] = 1.0 - self._solver.test_nets[0].blobs['acc'].data err_val = np.nanmean(err_val, axis=0) print 'Round {}, Iteration {}: validation error = {}'.format(cur_round, cur_iter, np.nanmean(err_val))
def on_step_begin(self, step, logs): if self.step % self.interval == 0: if len(self.episode_rewards) > 0: metrics = np.array(self.metrics) assert metrics.shape == (self.interval, len(self.metrics_names)) formatted_metrics = '' if not np.isnan(metrics).all(): # not all values are means means = np.nanmean(self.metrics, axis=0) assert means.shape == (len(self.metrics_names),) for name, mean in zip(self.metrics_names, means): formatted_metrics += ' - {}: {:.3f}'.format(name, mean) formatted_infos = '' if len(self.infos) > 0: infos = np.array(self.infos) if not np.isnan(infos).all(): # not all values are means means = np.nanmean(self.infos, axis=0) assert means.shape == (len(self.info_names),) for name, mean in zip(self.info_names, means): formatted_infos += ' - {}: {:.3f}'.format(name, mean) print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean(self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos)) print('') self.reset() print('Interval {} ({} steps performed)'.format(self.step // self.interval + 1, self.step))
def corr(data): ns = data.shape[0]; nt = data.shape[1]; pairs = make_pairs(ns); npp = len(pairs); mean = np.nanmean(data, axis = 0); var = np.nanvar(data - mean, axis = 0); c = np.zeros(nt); for p in pairs: c += np.nanmean( (data[p[0]] - mean) * (data[p[1]] - mean), axis = 0) / var; c /= npp; return c;
def load_stage_binned(strain = 'n2', wid = all, dtype = 'speed', nbins_per_stage = 10, function = np.nanmean, nout = 1): """Load data an bin at different stages""" if isinstance(wid, int): wids = [wid]; else: wids = wid; data = load(strain = strain, wid = wids, dtype = dtype); sbins = stage_bins(strain = strain, wid = wids, nbins_per_stage = nbins_per_stage); bdata = bin_data(data, sbins, function = function, nout = nout); if isinstance(wid, int): bdata = bdata[0]; return bdata; ############################################################################ ### Accessing aligned data ############################################################################
def binned_average(x, bin_size = 10, function = np.nanmean): """Binned average of a signal""" n = len(x); r = n % bin_size; if r != 0: xp = np.pad(x, (0, bin_size - r), mode = 'constant', constant_values = np.nan); else: xp = x; #print xp.shape s = len(xp) / bin_size; xp.shape = (s,bin_size); return function(xp, axis = 1); ############################################################################ ### Aligning Data ############################################################################
def plotTimeVsLvls(ax, runs, *args, **kwargs): """Plots Time vs TOL of @runs, as returned by MIMCDatabase.readRunData() ax is in instance of matplotlib.axes """ ax.set_xlabel(r'$\ell$') ax.set_ylabel('Time (s)') ax.set_yscale('log') fnNorm = kwargs.pop("fnNorm") if "__calc_moments" in kwargs: _, _, Tl, M, _ = kwargs.pop("__calc_moments") else: _, _, Tl, M, _ = __calc_moments(runs, seed=kwargs.pop('seed', None), direction=kwargs.pop('direction', None), fnNorm=fnNorm) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) min_tl = np.nanpercentile(Tl, 5, axis=1) med = np.nanmean(Tl, axis=1) max_tl = np.nanpercentile(Tl, 95, axis=1) line = ax.errorbar(np.arange(0, len(Tl)), med, yerr=[med-min_tl, max_tl-med], *args, **kwargs) return line[0].get_xydata(), [line]
def r(self): """ Pearson correlation of the fitted Variogram :return: """ # get the experimental and theoretical variogram and cacluate means experimental, model = self.__model_deviations() mx = np.nanmean(experimental) my = np.nanmean(model) # claculate the single pearson correlation terms term1 = np.nansum(np.fromiter(map(lambda x, y: (x-mx) * (y-my), experimental, model), np.float)) t2x = np.nansum(np.fromiter(map(lambda x: (x-mx)**2, experimental), np.float)) t2y = np.nansum(np.fromiter(map(lambda y: (y-my)**2, model), np.float)) return term1 / (np.sqrt(t2x * t2y))
def shifts_from_picked_coordinate(self, locs, coordinate): ''' Calculates the shift from each channel to each other along a given coordinate. ''' n_channels = len(locs) # Calculating center of mass for each channel and pick coms = [] for channel_locs in locs: coms.append([]) for group_locs in channel_locs: group_com = np.mean(getattr(group_locs, coordinate)) coms[-1].append(group_com) # Calculating image shifts d = np.zeros((n_channels, n_channels)) for i in range(n_channels-1): for j in range(i+1, n_channels): d[i, j] = np.nanmean([cj - ci for ci, cj in zip(coms[i], coms[j])]) return d
def per_class_acc(predictions, label_tensor): labels = label_tensor size = predictions.shape[0] num_class = predictions.shape[3] hist = np.zeros((num_class, num_class)) for i in range(size): hist += fast_hist(labels[i].flatten(), predictions[i].argmax(2).flatten(), num_class) acc_total = np.diag(hist).sum() / hist.sum() print ('accuracy = %f'%np.nanmean(acc_total)) iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) print ('mean IU = %f'%np.nanmean(iu)) for ii in range(num_class): if float(hist.sum(1)[ii]) == 0: acc = 0.0 else: acc = np.diag(hist)[ii] / float(hist.sum(1)[ii]) print(" class # %d accuracy = %f "%(ii,acc))
def htmt(self): htmt_ = pd.DataFrame(pd.DataFrame.corr(self.data_), index=self.manifests, columns=self.manifests) mean = [] allBlocks = [] for i in range(self.lenlatent): block_ = self.Variables['measurement'][ self.Variables['latent'] == self.latent[i]] allBlocks.append(list(block_.values)) block = htmt_.ix[block_, block_] mean_ = (block - np.diag(np.diag(block))).values mean_[mean_ == 0] = np.nan mean.append(np.nanmean(mean_)) comb = [[k, j] for k in range(self.lenlatent) for j in range(self.lenlatent)] comb_ = [(np.sqrt(mean[comb[i][1]] * mean[comb[i][0]])) for i in range(self.lenlatent ** 2)] comb__ = [] for i in range(self.lenlatent ** 2): block = (htmt_.ix[allBlocks[comb[i][1]], allBlocks[comb[i][0]]]).values # block[block == 1] = np.nan comb__.append(np.nanmean(block)) htmt__ = np.divide(comb__, comb_) where_are_NaNs = np.isnan(htmt__) htmt__[where_are_NaNs] = 0 htmt = pd.DataFrame(np.tril(htmt__.reshape( (self.lenlatent, self.lenlatent)), k=-1), index=self.latent, columns=self.latent) return htmt
def imputeSNPs(X): snpsMean = np.nanmean(X, axis=0) isNan = np.isnan(X) for i,m in enumerate(snpsMean): X[isNan[:,i], i] = m return X
def test_ignore_nan(self): """ Test that NaNs are handled correctly """ stream = [np.random.random(size = (16,12)) for _ in range(5)] for s in stream: s[randint(0, 15), randint(0,11)] = np.nan with catch_warnings(): simplefilter('ignore') from_iaverage = last(iaverage(stream, ignore_nan = True)) from_numpy = np.nanmean(np.dstack(stream), axis = 2) self.assertTrue(np.allclose(from_iaverage, from_numpy))
def test_against_numpy_nanmean(self): """ Test results against numpy.mean""" source = [np.random.random((16, 12, 5)) for _ in range(10)] for arr in source: arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan stack = np.stack(source, axis = -1) for axis in (0, 1, 2, None): with self.subTest('axis = {}'.format(axis)): from_numpy = np.nanmean(stack, axis = axis) out = last(imean(source, axis = axis, ignore_nan = True)) self.assertSequenceEqual(from_numpy.shape, out.shape) self.assertTrue(np.allclose(out, from_numpy))
def trainAndCrossValidate(self): num_folds = min(self.num_cross_folds, len(self.crossVal_X)) accs = [] aucs = [] f1s = [] precisions = [] recalls = [] for f in range(num_folds): val_X = self.crossVal_X[f] val_Y = self.crossVal_y[f] train_folds_X = [self.crossVal_X[x] for x in range(num_folds) if x != f] train_folds_Y = [self.crossVal_y[x] for x in range(num_folds) if x != f] train_X = train_folds_X[0] train_Y = train_folds_Y[0] for i in range(1,len(train_folds_X)): train_X = np.concatenate((train_X,train_folds_X[i])) train_Y = np.concatenate((train_Y,train_folds_Y[i])) self.train_X = train_X self.train_y = train_Y self.val_X = val_X self.val_y = val_Y acc, auc, f1, precision, recall = self.trainAndValidate() accs.append(acc) aucs.append(auc) f1s.append(f1) precisions.append(precision) recalls.append(recall) if PRINT_CROSS_VAL_FOLDS: print "\t\tPer-fold cross-validation accuracy: ", accs return np.nanmean(accs), np.nanmean(aucs), np.nanmean(f1s), np.nanmean(precisions), np.nanmean(recalls)