我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.setdiff1d()。
def confirmContent_NoOrder(self): ''' Confirms that all information is the same except for the maxRange field. Order is ignored ''' sf = StationFinder() stations = sf.findStation(unitCode = self.unitCode, distance = self.distance, climateParameters = self.climateParameters, sdate = self.sdate, edate = self.edate) test_data = numpy.array(stations._dumpMetaToList()) ref_data = [] with open(Test_StationFinder.rootFolder + self.refFile, 'r')as refFile: r = csv.reader(refFile) for line in r: ref_data.append(line) ref_data = numpy.array(ref_data) refFile.close() self.results = list(numpy.setdiff1d(ref_data[:,Test_StationFinder.testColumns] ,test_data[:,Test_StationFinder.testColumns]))
def confirmContent(self): ''' Confirms that all information is the same, ignoring record order ''' dr = StationDataRequestor() wxData = dr.getDailyWxObservations(climateStations = self.climateStations, climateParameters = self.climateParameters ,sdate = self.sdate, edate = self.edate) wxData.export('temp.csv') infile = open('temp.csv','r') testData = infile.read() refDataFile = open(Test_StationDataRequestor_getDailyWxObs.rootFolder + self.refDataFile, 'r') refData = refDataFile.read() infile.close() refDataFile.close() os.remove('temp.csv') self.result = list(numpy.setdiff1d(refData.split('/n'), testData.split('/n')))
def confirmContent(self): ''' Confirms that all information is the same, ignoring record order ''' dr = StationDataRequestor() wxData = dr.getMonthlyWxSummaryByYear(climateStations = self.climateStations, climateParameters = self.climateParameters, reduceCodes = self.reduceCodes ,sdate = self.sdate, edate = self.edate, maxMissing = self.maxMissing, includeNormals = self.includeNormals, includeNormalDepartures = self.includeNormalDepartures) wxData.export('temp.csv') infile = open('temp.csv','r') testData = infile.read() refDataFile = open(Test_StationDataRequestor_getMonthlyWxSummaryByYear.rootFolder + self.refDataFile, 'r') refData = refDataFile.read() infile.close() refDataFile.close() os.remove('temp.csv') self.result = list(numpy.setdiff1d(refData.split('/n'), testData.split('/n')))
def confirmContent(self): ''' Confirms that all information is the same, ignoring record order ''' dr = StationDataRequestor() wxData = dr.getYearlyWxSummary(climateStations = self.climateStations, climateParameters = self.climateParameters, reduceCodes = self.reduceCodes ,sdate = self.sdate, edate = self.edate, maxMissing = self.maxMissing, includeNormals = self.includeNormals, includeNormalDepartures = self.includeNormalDepartures) wxData.export('temp.csv') infile = open('temp.csv','r') testData = infile.read() refDataFile = open(Test_StationDataRequestor_getYearlyWxSummary.rootFolder + self.refDataFile, 'r') refData = refDataFile.read() infile.close() refDataFile.close() os.remove('temp.csv') self.result = list(numpy.setdiff1d(refData.split('/n'), testData.split('/n')))
def check_multiplication_dims(dims, N, M, vidx=False, without=False): dims = array(dims, ndmin=1) if len(dims) == 0: dims = arange(N) if without: dims = setdiff1d(range(N), dims) if not np.in1d(dims, arange(N)).all(): raise ValueError('Invalid dimensions') P = len(dims) sidx = np.argsort(dims) sdims = dims[sidx] if vidx: if M > N: raise ValueError('More multiplicants than dimensions') if M != N and M != P: raise ValueError('Invalid number of multiplicants') if P == M: vidx = sidx else: vidx = sdims return sdims, vidx else: return sdims
def _annotate_epochs(self, threshes, epochs): """Get essential annotations for epochs given thresholds.""" ch_type = _get_ch_type_from_picks(self.picks, epochs.info)[0] drop_log, bad_sensor_counts = self._vote_bad_epochs(epochs) interp_channels, fix_log = self._get_epochs_interpolation( epochs, drop_log=drop_log, ch_type=ch_type) (bad_epochs_idx, sorted_epoch_idx, n_epochs_drop) = self._get_bad_epochs( bad_sensor_counts, ch_type=ch_type) bad_epochs_idx = np.sort(bad_epochs_idx) good_epochs_idx = np.setdiff1d(np.arange(len(epochs)), bad_epochs_idx) return (drop_log, bad_sensor_counts, interp_channels, fix_log, bad_epochs_idx, good_epochs_idx)
def has_approx_support(m, m_hat, prob=0.01): """Returns 1 if model selection error is less than or equal to prob rate, 0 else. NOTE: why does np.nonzero/np.flatnonzero create so much problems? """ m_nz = np.flatnonzero(np.triu(m, 1)) m_hat_nz = np.flatnonzero(np.triu(m_hat, 1)) upper_diagonal_mask = np.flatnonzero(np.triu(np.ones(m.shape), 1)) not_m_nz = np.setdiff1d(upper_diagonal_mask, m_nz) intersection = np.in1d(m_hat_nz, m_nz) # true positives not_intersection = np.in1d(m_hat_nz, not_m_nz) # false positives true_positive_rate = 0.0 if len(m_nz): true_positive_rate = 1. * np.sum(intersection) / len(m_nz) true_negative_rate = 1. - true_positive_rate false_positive_rate = 0.0 if len(not_m_nz): false_positive_rate = 1. * np.sum(not_intersection) / len(not_m_nz) return int(np.less_equal(true_negative_rate + false_positive_rate, prob))
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ y = column_or_1d(y, warn=True) classes = np.unique(y) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) self.classes_ = np.hstack((self.classes_, diff)) return np.searchsorted(self.classes_, y)[0]
def initialize(self, coordinates): #not run until after first selection! # set up mines # randomly place mines anywhere *except* first selected location AND surrounding cells # so that first selection is always a 0 # weird, yes, but that's how the original minesweeper worked availableCells = range(self.totalCells) selected = coordinates[0]*self.dim2 + coordinates[1] offLimits = np.array([selected-self.dim2-1, selected-self.dim2, selected-self.dim2+1, selected-1, selected, selected+1, selected+self.dim2-1, selected+self.dim2, selected+self.dim2+1]) #out of bounds is ok availableCells = np.setdiff1d(availableCells, offLimits) self.nMines = np.minimum(self.nMines, len(availableCells)) #in case there are fewer remaining cells than mines to place minesFlattened = np.zeros([self.totalCells]) minesFlattened[np.random.choice(availableCells, self.nMines, replace=False)] = 1 self.mines = minesFlattened.reshape([self.dim1, self.dim2]) # set up neighbors for i in range(self.dim1): for j in range(self.dim2): nNeighbors = 0 for k in range(-1, 2): if i + k >= 0 and i + k < self.dim1: for l in range(-1, 2): if j + l >= 0 and j + l < self.dim2 and (k != 0 or l != 0): nNeighbors += self.mines[i + k, j + l] self.neighbors[i, j] = nNeighbors #done self.initialized = True
def find_neighbours(self, idx, features): """ Finds the neighbours of the given point which are at a maximum distance of self.eps from it. :param idx: Index of the current point :param features: Dataset, array-like object of shape (nb_samples, nb_features) :returns: List containing the indexes of the neighbours """ data = features[np.setdiff1d(np.arange(features.shape[0]), idx)] distances = self.get_distances(features[idx], data) same_cluster = [idx] for i, dist in enumerate(distances.tolist()[0]): real_index = i if i < idx else i + 1 if dist <= self.eps: same_cluster.append(real_index) return same_cluster
def load_co2_data(prop=0.8): from sklearn.datasets import fetch_mldata from sklearn import cross_validation data = fetch_mldata('mauna-loa-atmospheric-co2').data X = data[:, [1]] y = data[:, 0] y = y[:, None] X = X.astype(np.float64) ntrain = y.shape[0] train_inds = npr.choice(range(ntrain), int(prop*ntrain), replace=False) valid_inds = np.setdiff1d(range(ntrain), train_inds) X_train, y_train = X[train_inds].copy(), y[train_inds].copy() X_valid, y_valid = X[valid_inds].copy(), y[valid_inds].copy() return X_train, y_train, X_valid, y_valid ############################ Training & Visualizing ############################
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y.ravel(), warn=True) classes = np.unique(y) if isinstance(classes[0], np.float64): classes = classes[np.isfinite(classes)] _check_numpy_unicode_bug(classes) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) print(self.classes_) raise ValueError("y contains new labels: %s" % str(diff)) return np.searchsorted(self.classes_, y).reshape(-1, 1)
def inverse_transform(self, y): """Transform labels back to original encoding. Parameters ---------- y : numpy array of shape [n_samples] Target values. Returns ------- y : numpy array of shape [n_samples] """ check_is_fitted(self, 'classes_') diff = np.setdiff1d(y, np.arange(len(self.classes_))) if diff: raise ValueError("y contains new labels: %s" % str(diff)) y = np.asarray(y) return self.classes_[y]
def sortclusters(CoPaM, Mc, minGenesinClust = 11): Mcloc = np.array(Mc) [Np, K] = Mcloc.shape largerThanMax = np.max(Mcloc) + 1 Cf = np.zeros(K, dtype=int) - 1 for i in range(Np-1,-1,-1): C = np.argsort(Mcloc[i])[::-1] M = Mcloc[i,C] Cf[np.all([M >= minGenesinClust, Cf == 0], axis=0)] = C[np.all([M >= minGenesinClust, Cf == 0], axis=0)] if i > 0: Mcloc[i-1, Cf[Cf != 0]] = largerThanMax Cf[Cf==-1] = np.setdiff1d(np.arange(K), Cf) return np.array(CoPaM)[:, Cf] # Clustering helping function for parallel loop
def test_fix_types(): """Test fixing of channel types """ for fname, change in ((hp_fif_fname, True), (test_fif_fname, False), (ctf_fname, False)): raw = Raw(fname) mag_picks = pick_types(raw.info, meg='mag') other_picks = np.setdiff1d(np.arange(len(raw.ch_names)), mag_picks) # we don't actually have any files suffering from this problem, so # fake it if change: for ii in mag_picks: raw.info['chs'][ii]['coil_type'] = FIFF.FIFFV_COIL_VV_MAG_T2 orig_types = np.array([ch['coil_type'] for ch in raw.info['chs']]) raw.fix_mag_coil_types() new_types = np.array([ch['coil_type'] for ch in raw.info['chs']]) if not change: assert_array_equal(orig_types, new_types) else: assert_array_equal(orig_types[other_picks], new_types[other_picks]) assert_true((orig_types[mag_picks] != new_types[mag_picks]).all()) assert_true((new_types[mag_picks] == FIFF.FIFFV_COIL_VV_MAG_T3).all())
def get_finished_jobs(job_ids): """Get a list of finished job ids for the given list of jobs Keyword arguments: job_ids -- list of lobs that shall be checked """ data = get_qstat_as_df() finished_jobs = [] if len(data) == 0: return job_ids ids_in_data = data[data["JOBID"].isin(job_ids)] finished_jobs = np.setdiff1d(job_ids, ids_in_data["JOBID"]) return np.array(finished_jobs)
def fix_predictions(self, X, predictions, bias): idxs_users_missing, idxs_items_missing = self.indices_missing # Set average when neither the user nor the item exist g_avg = bias['globalAvg'] common_indices = np.intersect1d(idxs_users_missing, idxs_items_missing) predictions[common_indices] = g_avg # Only users exist (return average + {dUser}) if 'dUsers' in bias: missing_users = np.setdiff1d(idxs_users_missing, common_indices) if len(missing_users) > 0: user_idxs = X[missing_users, self.order[0]] predictions[missing_users] = g_avg + bias['dUsers'][user_idxs] # Only items exist (return average + {dItem}) if 'dItems' in bias: missing_items = np.setdiff1d(idxs_items_missing, common_indices) if len(missing_items) > 0: item_idxs = X[missing_items, self.order[1]] predictions[missing_items] = g_avg + bias['dItems'][item_idxs] return predictions
def measure_background(image, Fibers, width=30, niter=3, order=3): t = [] a,b = image.shape ygrid,xgrid = np.indices(image.shape) ygrid = 1. * ygrid.ravel() / a xgrid = 1. * xgrid.ravel() / b image = image.ravel() s = np.arange(a*b) for fiber in Fibers: t.append(fiber.D*fiber.yind + fiber.xind) t = np.hstack(t) t = np.array(t, dtype=int) ind = np.setdiff1d(s,t) mask = np.zeros((a*b)) mask[ind] = 1. mask[ind] = 1.-is_outlier(image[ind]) sel = np.where(mask==1.)[0] for i in xrange(niter): V = polyvander2d(xgrid[sel],ygrid[sel],[order,order]) sol = np.linalg.lstsq(V, image[sel])[0] vals = np.dot(V,sol) - image[sel] sel = sel[~is_outlier(vals)] V = polyvander2d(xgrid,ygrid,[order,order]) back = np.dot(V, sol).reshape(a,b) return back
def _parallel_predict_log_proba(estimators, estimators_features, X, n_classes): """Private function used to compute log probabilities within a job.""" n_samples = X.shape[0] log_proba = np.empty((n_samples, n_classes)) log_proba.fill(-np.inf) all_classes = np.arange(n_classes, dtype=np.int) for estimator, features in zip(estimators, estimators_features): log_proba_estimator = estimator.predict_log_proba(X[:, features]) if n_classes == len(estimator.classes_): log_proba = np.logaddexp(log_proba, log_proba_estimator) else: log_proba[:, estimator.classes_] = np.logaddexp( log_proba[:, estimator.classes_], log_proba_estimator[:, range(len(estimator.classes_))]) missing = np.setdiff1d(all_classes, estimator.classes_) log_proba[:, missing] = np.logaddexp(log_proba[:, missing], -np.inf) return log_proba
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) classes = np.unique(y) _check_numpy_unicode_bug(classes) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) raise ValueError("y contains new labels: %s" % str(diff)) return np.searchsorted(self.classes_, y)
def _match_info(self): """ Helper function to create match info """ assert self.matches is not None, 'No matches yet!' self.matches = { 'match_pairs' : self.matches, 'treated' : np.unique(list(self.matches.keys())), 'control' : np.unique(list(self.matches.values())) } self.matches['dropped'] = np.setdiff1d(list(range(self.nobs)), np.append(self.matches['treated'], self.matches['control']))
def confirmAsciiGrid(self): gr = GridRequestor() data = gr.getGrids(sdate = self.sdate, edate = self.edate, unitCode = self.unitCode, distance = self.distance, climateParameters = self.climateParameters, duration = self.duration) testDataFile = data.export()[0] testFile = open(testDataFile,'r') testData = testFile.read() testFile.close() os.remove(testDataFile) os.remove(testDataFile[:-3] + 'prj') refDataFile = open(Test_GridRequestor.rootFolder + self.refDataFile,'r') refData = refDataFile.read() refDataFile.close() self.result = list(numpy.setdiff1d(refData.split('/n'),testData.split('/n')))
def prepare(data_valid): print(data_valid.shape) batch = data_valid.shape[0] N = data_valid.shape[1] data_invalid = np.random.randint(0,2,(batch,N),dtype=np.int8) print(data_valid.shape,data_invalid.shape) ai = data_invalid.view([('', data_invalid.dtype)] * N) av = data_valid.view ([('', data_valid.dtype)] * N) data_invalid = np.setdiff1d(ai, av).view(data_valid.dtype).reshape((-1, N)) return prepare_binary_classification_data(data_valid, data_invalid) # default values
def prepare(data): num = len(data) dim = data.shape[1]//2 print(data.shape,num,dim) pre, suc = data[:,:dim], data[:,dim:] suc_invalid = np.copy(suc) random.shuffle(suc_invalid) data_invalid = np.concatenate((pre,suc_invalid),axis=1) ai = data_invalid.view([('', data_invalid.dtype)] * 2*dim) av = data.view ([('', data.dtype)] * 2*dim) data_invalid = np.setdiff1d(ai, av).view(data_invalid.dtype).reshape((-1, 2*dim)) inputs = np.concatenate((data,data_invalid),axis=0) outputs = np.concatenate((np.ones((num,1)),np.zeros((len(data_invalid),1))),axis=0) print(inputs.shape,outputs.shape) io = np.concatenate((inputs,outputs),axis=1) random.shuffle(io) train_n = int(2*num*0.9) train, test = io[:train_n], io[train_n:] train_in, train_out = train[:,:dim*2], train[:,dim*2:] test_in, test_out = test[:,:dim*2], test[:,dim*2:] return train_in, train_out, test_in, test_out # default values
def set_difference(a, b): assert a.shape[1:] == b.shape[1:] a = a.copy() b = b.copy() a_v = a.view([('', a.dtype)] * a.shape[1]) b_v = b.view([('', b.dtype)] * b.shape[1]) return np.setdiff1d(a_v, b_v).view(a.dtype).reshape((-1, a.shape[1]))
def test_indices(self, test_indices): if test_indices is None: self._train_indices = np.arange(0, len(self.y)) else: self._test_indices = test_indices self._train_indices = np.setdiff1d(np.arange(0, len(self.y)), self.test_indices)
def setdiff1d(ar1, ar2, assume_unique=False): """ Find the set difference of two arrays. Return the sorted, unique values in `ar1` that are not in `ar2`. Parameters ---------- ar1 : array_like Input array. ar2 : array_like Input comparison array. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. Returns ------- setdiff1d : ndarray Sorted 1D array of values in `ar1` that are not in `ar2`. See Also -------- numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Examples -------- >>> a = np.array([1, 2, 3, 2, 4, 1]) >>> b = np.array([3, 4, 5, 6]) >>> np.setdiff1d(a, b) array([1, 2]) """ if assume_unique: ar1 = np.asarray(ar1).ravel() else: ar1 = unique(ar1) ar2 = unique(ar2) return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
def stabilize(self, prior_columns, percent): """ This activates prior columns to force active in order to maintain the given percent of column overlap between time steps. Always call this between compute and learn! """ # num_active = (len(self.columns) + len(prior_columns)) / 2 num_active = len(self.columns) overlap = self.columns.overlap(prior_columns) stabile_columns = int(round(num_active * overlap)) target_columns = int(round(num_active * percent)) add_columns = target_columns - stabile_columns if add_columns <= 0: return eligable_columns = np.setdiff1d(prior_columns.flat_index, self.columns.flat_index) eligable_excite = self.raw_excitment[eligable_columns] selected_col_nums = np.argpartition(-eligable_excite, add_columns-1)[:add_columns] selected_columns = eligable_columns[selected_col_nums] selected_index = np.unravel_index(selected_columns, self.columns.dimensions) # Learn. Note: selected columns will learn twice. The previously # active segments learn now, the current most excited segments in the # method SP.learn(). # Or learn not at all if theres a bug in my code... # if self.multisegment: # if hasattr(self, 'prior_segment_excitement'): # segment_excitement = self.prior_segment_excitement[selected_index] # seg_idx = np.argmax(segment_excitement, axis=-1) # self.proximal.learn_outputs(input_sdr=input_sdr, # output_sdr=selected_index + (seg_idx,)) # self.prev_segment_excitement = self.segment_excitement # else: # 1/0 self.columns.flat_index = np.concatenate([self.columns.flat_index, selected_columns])
def sample_weights(self, idxs, scores): N = len(scores) S1 = scores[np.setdiff1d(np.arange(N), idxs)].sum() return np.tile([float(N), float(S1)], (len(idxs), 1))
def _generate_pairs(ids): id_i, id_j = np.meshgrid(ids, ids, indexing='ij') # Grouping the input object rois id_i = id_i.reshape(-1) id_j = id_j.reshape(-1) # remove the diagonal items id_num = len(ids) diagonal_items = np.array(range(id_num)) diagonal_items = diagonal_items * id_num + diagonal_items all_id = range(len(id_i)) selected_id = np.setdiff1d(all_id, diagonal_items) id_i = id_i[selected_id] id_j = id_j[selected_id] return id_i, id_j
def ttv(self, v, modes=[], without=False): """ Tensor times vector product Parameters ---------- v : 1-d array or tuple of 1-d arrays Vector to be multiplied with tensor. modes : array_like of integers, optional Modes in which the vectors should be multiplied. without : boolean, optional If True, vectors are multiplied in all modes **except** the modes specified in ``modes``. """ if not isinstance(v, tuple): v = (v, ) dims, vidx = check_multiplication_dims(modes, self.ndim, len(v), vidx=True, without=without) for i in range(len(dims)): if not len(v[vidx[i]]) == self.shape[dims[i]]: raise ValueError('Multiplicant is wrong size') remdims = np.setdiff1d(range(self.ndim), dims) return self._ttv_compute(v, dims, vidx, remdims) #@abstractmethod #def ttt(self, other, modes=None): # pass
def test_boolean_spheres_overlap(): r"""Test to make sure that boolean objects (spheres, overlap) behave the way we expect. Test overlapping spheres. """ ds = fake_amr_ds() sp1 = ds.sphere([0.45, 0.45, 0.45], 0.15) sp2 = ds.sphere([0.55, 0.55, 0.55], 0.15) # Get indices of both. i1 = sp1["index","morton_index"] i2 = sp2["index","morton_index"] # Make some booleans bo1 = sp1 & sp2 bo2 = sp1 - sp2 bo3 = sp1 | sp2 bo4 = ds.union([sp1, sp2]) bo5 = ds.intersection([sp1, sp2]) # Now make sure the indices also behave as we expect. lens = np.intersect1d(i1, i2) apple = np.setdiff1d(i1, i2) both = np.union1d(i1, i2) b1 = bo1["index","morton_index"] b1.sort() b2 = bo2["index","morton_index"] b2.sort() b3 = bo3["index","morton_index"] b3.sort() assert_array_equal(b1, lens) assert_array_equal(b2, apple) assert_array_equal(b3, both) b4 = bo4["index","morton_index"] b4.sort() b5 = bo5["index","morton_index"] b5.sort() assert_array_equal(b3, b4) assert_array_equal(b1, b5) bo6 = sp1 ^ sp2 b6 = bo6["index", "morton_index"] b6.sort() assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_regions_overlap(): r"""Test to make sure that boolean objects (regions, overlap) behave the way we expect. Test overlapping regions. """ ds = fake_amr_ds() re1 = ds.region([0.55]*3, [0.5]*3, [0.6]*3) re2 = ds.region([0.6]*3, [0.55]*3, [0.65]*3) # Get indices of both. i1 = re1["index","morton_index"] i2 = re2["index","morton_index"] # Make some booleans bo1 = re1 & re2 bo2 = re1 - re2 bo3 = re1 | re2 bo4 = ds.union([re1, re2]) bo5 = ds.intersection([re1, re2]) # Now make sure the indices also behave as we expect. cube = np.intersect1d(i1, i2) bite_cube = np.setdiff1d(i1, i2) both = np.union1d(i1, i2) b1 = bo1["index","morton_index"] b1.sort() b2 = bo2["index","morton_index"] b2.sort() b3 = bo3["index","morton_index"] b3.sort() assert_array_equal(b1, cube) assert_array_equal(b2, bite_cube) assert_array_equal(b3, both) b4 = bo4["index","morton_index"] b4.sort() b5 = bo5["index","morton_index"] b5.sort() assert_array_equal(b3, b4) assert_array_equal(b1, b5) bo6 = re1 ^ re2 b6 = bo6["index", "morton_index"] b6.sort() assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_ellipsoids_overlap(): r"""Test to make sure that boolean objects (ellipsoids, overlap) behave the way we expect. Test overlapping ellipsoids. """ ds = fake_amr_ds() ell1 = ds.ellipsoid([0.45]*3, 0.05, 0.05, 0.05, np.array([0.1]*3), 0.1) ell2 = ds.ellipsoid([0.55]*3, 0.05, 0.05, 0.05, np.array([0.1]*3), 0.1) # Get indices of both. i1 = ell1["index","morton_index"] i2 = ell2["index","morton_index"] # Make some booleans bo1 = ell1 & ell2 bo2 = ell1 - ell2 bo3 = ell1 | ell2 bo4 = ds.union([ell1, ell2]) bo5 = ds.intersection([ell1, ell2]) # Now make sure the indices also behave as we expect. overlap = np.intersect1d(i1, i2) diff = np.setdiff1d(i1, i2) both = np.union1d(i1, i2) b1 = bo1["index","morton_index"] b1.sort() b2 = bo2["index","morton_index"] b2.sort() b3 = bo3["index","morton_index"] b3.sort() assert_array_equal(b1, overlap) assert_array_equal(b2, diff) assert_array_equal(b3, both) b4 = bo4["index","morton_index"] b4.sort() b5 = bo5["index","morton_index"] b5.sort() assert_array_equal(b3, b4) assert_array_equal(b1, b5) bo6 = ell1 ^ ell2 b6 = bo6["index", "morton_index"] b6.sort() assert_array_equal(b6, np.setxor1d(i1, i2))
def test_boolean_slices_overlap(): r"""Test to make sure that boolean objects (slices, overlap) behave the way we expect. Test overlapping slices. """ ds = fake_amr_ds() sl1 = ds.r[:,:,0.25] sl2 = ds.r[:,0.75,:] # Get indices of both. i1 = sl1["index","morton_index"] i2 = sl2["index","morton_index"] # Make some booleans bo1 = sl1 & sl2 bo2 = sl1 - sl2 bo3 = sl1 | sl2 bo4 = ds.union([sl1, sl2]) bo5 = ds.intersection([sl1, sl2]) # Now make sure the indices also behave as we expect. line = np.intersect1d(i1, i2) orig = np.setdiff1d(i1, i2) both = np.union1d(i1, i2) b1 = bo1["index","morton_index"] b1.sort() b2 = bo2["index","morton_index"] b2.sort() b3 = bo3["index","morton_index"] b3.sort() assert_array_equal(b1, line) assert_array_equal(b2, orig) assert_array_equal(b3, both) b4 = bo4["index","morton_index"] b4.sort() b5 = bo5["index","morton_index"] b5.sort() assert_array_equal(b3, b4) assert_array_equal(b1, b5) bo6 = sl1 ^ sl2 b6 = bo6["index", "morton_index"] b6.sort() assert_array_equal(b6, np.setxor1d(i1, i2))
def analyze_false(validData,validDataNumbers,validLabels,model): 'Calculating precision and recall for best model...' predictions = np.squeeze((model.predict(validDataNumbers) > 0.5).astype('int32')) c1_inds = np.where(validLabels == 1)[0] pos_inds = np.where((predictions+validLabels) == 2)[0] #np.squeeze(predictions) == validLabels neg_inds = np.setdiff1d(c1_inds,pos_inds) seq_lengths = np.zeros((validData.shape[0])) for ind,row in np.ndenumerate(validData): seq_lengths[ind] = len(wordpunct_tokenize(row.lower().strip())) mean_true_length = np.mean(seq_lengths[pos_inds]) mean_false_length = np.mean(seq_lengths[neg_inds]) return mean_false_length,mean_true_length
def _get_epochs_interpolation(self, epochs, drop_log, ch_type, verbose='progressbar'): """Interpolate the bad epochs.""" # 1: bad segment, # 2: interpolated fix_log = drop_log.copy() ch_names = epochs.ch_names non_picks = np.setdiff1d(range(epochs.info['nchan']), self.picks) interp_channels = list() n_interpolate = self.n_interpolate[ch_type] for epoch_idx in range(len(epochs)): n_bads = drop_log[epoch_idx, self.picks].sum() if n_bads == 0: continue else: if n_bads <= n_interpolate: interp_chs_mask = drop_log[epoch_idx] == 1 else: # get peak-to-peak for channels in that epoch data = epochs[epoch_idx].get_data()[0] peaks = np.ptp(data, axis=-1) peaks[non_picks] = -np.inf # find channels which are bad by rejection threshold interp_chs_mask = drop_log[epoch_idx] == 1 # ignore good channels peaks[~interp_chs_mask] = -np.inf # find the ordering of channels amongst the bad channels sorted_ch_idx_picks = np.argsort(peaks)[::-1] # then select only the worst n_interpolate channels interp_chs_mask[ sorted_ch_idx_picks[n_interpolate:]] = False fix_log[epoch_idx][interp_chs_mask] = 2 interp_chs = np.where(interp_chs_mask)[0] interp_chs = [ch_name for idx, ch_name in enumerate(ch_names) if idx in interp_chs] interp_channels.append(interp_chs) return interp_channels, fix_log
def comprz_dB(xx,fr=0.05): """ Compress signal in such a way that is logarithmic but also avoids negative values """ x = numpy.copy(xx) sh = xx.shape x = x.reshape(-1) x = comprz(x) x = numpy.setdiff1d(x,numpy.array([0.0])) xs = numpy.sort(x) mini = xs[int(fr*len(x))] mn = numpy.ones_like(xx)*mini xx = numpy.where(xx > mini, xx, mn) xx = xx.reshape(sh) return(10.0*numpy.log10(xx))
def random_balanced_partitions(data, first_size, labels, random=np.random): """Split data into a balanced random partition and the rest Partition the `data` array into two random partitions, using the `labels` array (of equal size) to guide the choice of elements of the first returned array. Example: random_balanced_partition(['a', 'b', 'c'], 2, [3, 5, 5]) # Both labels 3 and 5 need to be presented once, so # the result can be either (['a', 'b'], ['c']) or # (['a', 'c'], ['b']) but not (['b', 'c'], ['a']). Args: data (ndarray): data to be split first_size (int): size of the first partition balance (ndarray): according to which balancing is done random (RandomState): source of randomness Return: tuple of two ndarrays """ assert len(data) == len(labels) classes, class_counts = np.unique(labels, return_counts=True) assert len(classes) <= 10000, "surprisingly many classes: {}".format(len(classes)) assert first_size % len(classes) == 0, "not divisible: {}/{}".format(first_size, len(classes)) assert np.all(class_counts >= first_size // len(classes)), "not enough examples of some class" idxs_per_class = [np.nonzero(labels == klass)[0] for klass in classes] chosen_idxs_per_class = [ random.choice(idxs, first_size // len(classes), replace=False) for idxs in idxs_per_class ] first_idxs = np.concatenate(chosen_idxs_per_class) second_idxs = np.setdiff1d(np.arange(len(labels)), first_idxs) assert first_idxs.shape == (first_size,) assert second_idxs.shape == (len(data) - first_size,) return data[first_idxs], data[second_idxs]
def least_squares_multipliers(self, x): """Compute least-squares multipliers estimates.""" al_model = self.model slack_model = self.model.model m = slack_model.m n = slack_model.n lim = max(2 * m, 2 * n) J = slack_model.jop(x) # Determine which bounds are active to remove appropriate columns of J on_bound = self.get_active_bounds(x, slack_model.Lvar, slack_model.Uvar) free_vars = np.setdiff1d(np.arange(n, dtype=np.int), on_bound) Jred = ReducedJacobian(J, np.arange(m, dtype=np.int), free_vars) g = slack_model.grad(x) - J.T * al_model.pi lsqr = LSQRSolver(Jred.T) lsqr.solve(g[free_vars], itnlim=lim) if lsqr.optimal: al_model.pi += lsqr.x.copy() else: self.log.debug("lsqr failed to converge") return
def _open_file(self): # only apply _skip property at the beginning of the trajectory skip = self._data_source._skip[self._itraj] + self.skip if self._t == 0 else 0 nt = self._data_source._skip[self._itraj] + self._data_source._lengths[self._itraj] # calculate an index set, which rows to skip (includes stride) skip_rows = np.empty(0) if skip > 0: skip_rows = np.zeros(nt) skip_rows[:skip] = np.arange(skip) if not self.uniform_stride: all_frames = np.arange(nt) skip_rows = np.setdiff1d(all_frames, self.ra_indices_for_traj(self._itraj), assume_unique=True) elif self.stride > 1: all_frames = np.arange(nt) if skip_rows is not None: wanted_frames = np.arange(skip, nt, self.stride) else: wanted_frames = np.arange(0, nt, self.stride) skip_rows = np.setdiff1d( all_frames, wanted_frames, assume_unique=True) self._skip_rows = skip_rows try: fh = open(self._data_source.filenames[self._itraj], mode=self._data_source.DEFAULT_OPEN_MODE) self._file_handle = fh except EnvironmentError: self._logger.exception() raise
def load_boston_data(prop=400/506): from sklearn import datasets boston = datasets.load_boston() X, y = boston.data, boston.target y = y[:, None] ntrain = y.shape[0] train_inds = npr.choice(range(ntrain), int(prop*ntrain), replace=False) valid_inds = np.setdiff1d(range(ntrain), train_inds) X_train, y_train = X[train_inds].copy(), y[train_inds].copy() X_valid, y_valid = X[valid_inds].copy(), y[valid_inds].copy() return X_train, y_train, X_valid, y_valid ############################ Training Phase ############################
def _load_cv_data(self, list_files): """Load training and cross-validation sets.""" # Split files for training and validation sets val_files = np.array_split(list_files, self.n_folds) train_files = np.setdiff1d(list_files, val_files[self.fold_idx]) # Load a npz file print "Load training set:" data_train, label_train = self._load_npz_list_files(train_files) print " " print "Load validation set:" data_val, label_val = self._load_npz_list_files(val_files[self.fold_idx]) print " " # Reshape the data to match the input of the model - conv2d data_train = np.squeeze(data_train) data_val = np.squeeze(data_val) data_train = data_train[:, :, np.newaxis, np.newaxis] data_val = data_val[:, :, np.newaxis, np.newaxis] # Casting data_train = data_train.astype(np.float32) label_train = label_train.astype(np.int32) data_val = data_val.astype(np.float32) label_val = label_val.astype(np.int32) return data_train, label_train, data_val, label_val
def _load_cv_data(self, list_files): """Load sequence training and cross-validation sets.""" # Split files for training and validation sets val_files = np.array_split(list_files, self.n_folds) train_files = np.setdiff1d(list_files, val_files[self.fold_idx]) # Load a npz file print "Load training set:" data_train, label_train = self._load_npz_list_files(train_files) print " " print "Load validation set:" data_val, label_val = self._load_npz_list_files(val_files[self.fold_idx]) print " " return data_train, label_train, data_val, label_val