我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.unique()。
def __init__(self, filename, target_map, classifier='svm'): self.seed_ = 0 self.filename_ = filename self.target_map_ = target_map self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32) self.epoch_no_ = 0 self.st_time_ = time.time() # Setup classifier print('-------------------------------') print('====> Building Classifier, setting class weights') if classifier == 'svm': self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']} self.clf_base_ = LinearSVC(random_state=self.seed_) elif classifier == 'sgd': self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'], self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_, warm_start=True, n_jobs=-1, n_iter=1, verbose=4) else: raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]' % classifier)
def silhouette_score(series, clusters): distances = np.zeros((series.shape[0], series.shape[0])) for idx_a, metric_a in enumerate(series): for idx_b, metric_b in enumerate(series): distances[idx_a, idx_b] = _sbd(metric_a, metric_b)[0] labels = np.zeros(series.shape[0]) for i, (cluster, indicies) in enumerate(clusters): for index in indicies: labels[index] = i # silhouette is only defined, if we have 2 clusters with assignments at # minimum if len(np.unique(labels)) == 1 or (len(np.unique(labels)) >= distances.shape[0]): #if len(np.unique(labels)) == 1: return labels, -1 else: return labels, _silhouette_score(distances, labels, metric='precomputed')
def transform(self, img, lbl): img = img[:, :, ::-1] img = img.astype(np.float64) img -= self.mean img = m.imresize(img, (self.img_size[0], self.img_size[1])) # Resize scales images from 0 to 255, thus we need # to divide by 255.0 img = img.astype(float) / 255.0 # NHWC -> NCWH img = img.transpose(2, 0, 1) lbl = self.encode_segmap(lbl) classes = np.unique(lbl) lbl = lbl.astype(float) lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F') lbl = lbl.astype(int) assert(np.all(classes == np.unique(lbl))) img = torch.from_numpy(img).float() lbl = torch.from_numpy(lbl).long() return img, lbl
def get_normalized_dispersion(mat_mean, mat_var, nbins=20): mat_disp = (mat_var - mat_mean) / np.square(mat_mean) quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins)) quantiles = np.append(quantiles, mat_mean.max()) # merge bins with no difference in value quantiles = np.unique(quantiles) if len(quantiles) <= 1: # pathological case: the means are all identical. just return raw dispersion. return mat_disp # calc median dispersion per bin (disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles) # calc median absolute deviation of dispersion per bin disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0 disp_abs_dev = abs(mat_disp - disp_meds_arr) (disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles) # calculate normalized dispersion disp_mads_arr = disp_mads[disp_bins-1] disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr return disp_norm
def gl_init(self,array_table): self.gl_hide = False self.gl_vertex_array = gl.VertexArray() glBindVertexArray(self.gl_vertex_array) self.gl_vertex_buffer = gl.Buffer() glBindBuffer(GL_ARRAY_BUFFER,self.gl_vertex_buffer) self.gl_element_count = 3*gl_count_triangles(self) self.gl_element_buffer = gl.Buffer() glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,self.gl_element_buffer) vertex_type = numpy.dtype([array_table[attribute].field() for attribute in self.attributes]) vertex_count = sum(len(primitive.vertices) for primitive in self.primitives) vertex_array = numpy.empty(vertex_count,vertex_type) for attribute in self.attributes: array_table[attribute].load(self,vertex_array) vertex_array,element_map = numpy.unique(vertex_array,return_inverse=True) element_array = gl_create_element_array(self,element_map,self.gl_element_count) glBufferData(GL_ARRAY_BUFFER,vertex_array.nbytes,vertex_array,GL_STATIC_DRAW) glBufferData(GL_ELEMENT_ARRAY_BUFFER,element_array.nbytes,element_array,GL_STATIC_DRAW)
def get_best_split(X, y, criterion): """ Obtain the best splitting point and resulting children for the data set X, y Args: X, y (numpy.ndarray, data set) criterion (gini or entropy) Returns: dict {index: index of the feature, value: feature value, children: left and right children} """ best_index, best_value, best_score, children = None, None, 1, None for index in range(len(X[0])): for value in np.sort(np.unique(X[:, index])): groups = split_node(X, y, index, value) impurity = weighted_impurity([groups[0][1], groups[1][1]], criterion) if impurity < best_score: best_index, best_value, best_score, children = index, value, impurity, groups return {'index': best_index, 'value': best_value, 'children': children}
def consideronlylabels(self, list2consider, verbose = False): """ Add labels to the ignoredlabels list (set) and update the self._labels cache. """ if isinstance(list2consider, int): list2consider = [list2consider] toignore = set(np.unique(self.image))-set(list2consider) integers = np.vectorize(lambda x : int(x)) toignore = integers(list(toignore)).tolist() if verbose: print 'Adding labels', toignore,'to the list of labels to ignore...' self._ignoredlabels.update(toignore) if verbose: print 'Updating labels list...' self._labels = self.__labels()
def main(max_iter): # prepare npdl.utils.random.set_seed(1234) # data digits = load_digits() X_train = digits.data X_train /= np.max(X_train) Y_train = digits.target n_classes = np.unique(Y_train).size # model model = npdl.model.Model() model.add(npdl.layers.Dense(n_out=500, n_in=64, activation=npdl.activations.ReLU())) model.add(npdl.layers.Dense(n_out=n_classes, activation=npdl.activations.Softmax())) model.compile(loss=npdl.objectives.SCCE(), optimizer=npdl.optimizers.SGD(lr=0.005)) # train model.fit(X_train, npdl.utils.data.one_hot(Y_train), max_iter=max_iter, validation_split=0.1)
def get_weighted_mask(self, image_shape, mask_shape,ROI_mask=None, labels_mask=None): if labels_mask is None: raise ValueError('SamplingScheme error: please specify a labels_mask for this sampling scheme') print(np.unique(labels_mask)) mask_boundaries = self.get_mask_boundaries(image_shape, mask_shape,ROI_mask) final_mask = np.zeros((self.n_categories,) + labels_mask.shape, dtype="int16") for index_cat in range(self.n_categories): final_mask[index_cat] = (labels_mask == index_cat,) * mask_boundaries final_mask = 1.0 * final_mask / np.reshape(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1),(self.n_categories,)+(1,)*len(image_shape)) print(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1)) return final_mask
def get_channel_id_by_file_name(self, filename): """ Checking parameters of NCS, NSE and NTT Files for given filename and return channel_id if result is consistent :param filename: :return: """ channel_ids = [] channel_ids += [k for k in self.parameters_ncs if self.parameters_ncs[k]['filename'] == filename] channel_ids += [k for k in self.parameters_nse if self.parameters_nse[k]['filename'] == filename] channel_ids += [k for k in self.parameters_ntt if self.parameters_ntt[k]['filename'] == filename] if len(np.unique(np.asarray(channel_ids))) == 1: return channel_ids[0] elif len(channel_ids) > 1: raise ValueError( 'Ambiguous channel ids detected. Filename %s is associated' ' to different channels of NCS and NSE and NTT %s' '' % (filename, channel_ids)) else: # if filename was not detected return None
def __read_unit(self, unit_id, channel_idx): """ Creates unit with unit id for given channel id. """ # define a name for spiketrain # (unique identifier: 1000 * elid + unit_nb) name = "Unit {0}".format(1000 * channel_idx + unit_id) # define description for spiketrain desc = 'Unit from channel: {0}, id: {1}'.format( channel_idx, self.__get_unit_classification(unit_id)) un = Unit( name=name, description=desc, file_origin='.'.join([self._filenames['nev'], 'nev'])) # add additional annotations un.annotate(ch_idx=int(channel_idx)) un.annotate(unit_id=int(unit_id)) return un
def __draw_pk2(self): self.__cleanPk2() if self.units is not None: unique_units = np.unique(self.units) unique_units = unique_units.tolist() pca_1,pca_2 = self.PCAusedList.currentText().split("-") pca_1 = np.int(pca_1)-1 pca_2 = np.int(pca_2)-1 if self.wavePCAs[0].shape[0]>2: xs = self.wavePCAs[:,pca_1] ys = self.wavePCAs[:,pca_2] self.PcaScatterItem = [] seg_num = 5000 for i,ite_unit in enumerate(unique_units): mask = self.units==ite_unit temp_xs = xs[mask] temp_ys = ys[mask] segs = int(ceil(temp_xs.shape[0]/float(seg_num))) for j in range(segs): temp_xs_j = temp_xs[j*seg_num:(j+1)*seg_num] temp_ys_j = temp_ys[j*seg_num:(j+1)*seg_num] self.PcaScatterItem.append(pg.ScatterPlotItem(temp_xs_j,temp_ys_j,pen=self.colors[ite_unit],brush=self.colors[ite_unit],size=3,symbol="o")) for i in range(len(self.PcaScatterItem)): self.pk2.addItem(self.PcaScatterItem[i])
def cal_event_count(timestamps): """Calculate event count based on timestamps. Parameters ---------- timestamps : numpy.ndarray timestamps array in 1D array Returns ------- event_arr : numpy.ndarray array has 2 rows, first row contains timestamps, second row consists of corresponding event count at particular timestep """ event_ts, event_count = np.unique(timestamps, return_counts=True) return np.asarray((event_ts, event_count))
def recode_groups(groups, propensity): # Code groups as 0 and 1 groups = (groups == groups.unique()[0]) N = len(groups) N1 = groups[groups == 1].index N2 = groups[groups == 0].index g1 = propensity[groups == 1] g2 = propensity[groups == 0] # Check if treatment groups got flipped - the smaller should correspond to N1/g1 if len(N1) > len(N2): N1, N2, g1, g2 = N2, N1, g2, g1 return groups, N1, N2, g1, g2 ################################################################################ ############################# Base Matching Class ############################## ################################################################################
def minScalErr(stec,el,z,thisBias): """ this determines the slope of the vTEC vs. Elevation line, which should be minimized in the minimum scalloping technique for receiver bias removal inputs: stec - time indexed Series of slant TEC values el - corresponding elevation values, also Series z - mapping function values to convert to vTEC from entire file, may contain nans, Series thisBias - the bias to be tested and minimized """ intel=np.asarray(el[stec.index],int) # bin the elevation values into int sTEC=np.asarray(stec,float) zmap = z[stec.index] c=np.array([(i,np.average((sTEC[intel==i]-thisBias) /zmap[intel==i])) for i in np.unique(intel) if i>30]) return np.polyfit(c[:,0],c[:,1],1)[0]
def filter_sort_unique(self, max_objval=float('Inf')): # filter if max_objval < float('inf'): good_idx = self.objvals <= max_objval self.objvals = self.objvals[good_idx] self.solutions = self.solutions[good_idx] if len(self.objvals) > 0: sort_idx = np.argsort(self.objvals) self.objvals = self.objvals[sort_idx] self.solutions = self.solutions[sort_idx] # unique b = np.ascontiguousarray(self.solutions).view( np.dtype((np.void, self.solutions.dtype.itemsize * self.P))) _, unique_idx = np.unique(b, return_index=True) self.objvals = self.objvals[unique_idx] self.solutions = self.solutions[unique_idx]
def reset(self): """ Resets the state of the generator""" self.step = 0 Y = np.argmax(self.Y,1) labels = np.unique(Y) idx = [] smallest = len(Y) for i,label in enumerate(labels): where = np.where(Y==label)[0] if smallest > len(where): self.slabel = i smallest = len(where) idx.append(where) self.idx = idx self.labels = labels self.n_per_class = int(self.batch_size // len(labels)) self.n_batches = int(np.ceil((smallest//self.n_per_class)))+1 self.update_probabilities()
def __init__(self, X, Y, batch_size,cropsize=0, truncate=False, sequential=False, random=True, val=False, class_weights=None): assert len(X) == len(Y), 'X and Y must be the same length {}!={}'.format(len(X),len(Y)) if sequential: print('Using sequential mode') print ('starting normal generator') self.X = X self.Y = Y self.rnd_idx = np.arange(len(Y)) self.Y_last_epoch = [] self.val = val self.step = 0 self.i = 0 self.cropsize=cropsize self.truncate = truncate self.random = False if sequential or val else random self.batch_size = int(batch_size) self.sequential = sequential self.c_weights = class_weights if class_weights else dict(zip(np.unique(np.argmax(Y,1)),np.ones(len(np.argmax(Y,1))))) assert set(np.argmax(Y,1)) == set([int(x) for x in self.c_weights.keys()]), 'not all labels in class weights' self.n_batches = int(len(X)//batch_size if truncate else np.ceil(len(X)/batch_size)) if self.random: self.randomize()
def next_normal(self): x_batch = self.X[self.step*self.batch_size:(self.step+1)*self.batch_size] y_batch = self.Y[self.step*self.batch_size:(self.step+1)*self.batch_size] diff = len(x_batch[0]) - self.cropsize if self.cropsize!=0 and not self.val: start = np.random.choice(np.arange(0,diff+5,5), len(x_batch)) x_batch = [x[start[i]:start[i]+self.cropsize,:] for i,x in enumerate(x_batch)] elif self.cropsize !=0 and self.val: x_batch = [x[diff//2:diff//2+self.cropsize] for i,x in enumerate(x_batch)] x_batch = np.array(x_batch, dtype=np.float32) y_batch = np.array(y_batch, dtype=np.int32) self.step+=1 if self.val: self.Y_last_epoch.extend(y_batch) return x_batch # for validation generator, save the new y_labels else: weights = np.ones(len(y_batch)) for t in np.unique(np.argmax(y_batch,1)): weights[np.argmax(y_batch,1)==t] = self.c_weights[t] return (x_batch,y_batch)
def get_preds_true_for_task(self,train_tasks, test_tasks, param_dict): t = param_dict['task_num'] X = train_tasks[t]['X'] y = train_tasks[t]['Y'] test_X = test_tasks[t]['X'] true_y = list(test_tasks[t]['Y'].flatten()) if len(y)==0 or len(X)==0 or len(test_X) == 0 or len(true_y)==0: return None, None if self.cant_train_with_one_class and len(np.unique(y))==1: preds = list(np.unique(y)[0]*np.ones(len(true_y))) else: preds = self.train_and_predict_task(t, X, y, test_X, param_dict) return preds, true_y
def getClasses(labels): """ Get unique values from a column of labels. Parameters ---------- labels: array-like of shape = [number_samples] or [number_samples, number_outputs] The target values (class labels in classification). Return ---------- classes: ndarray The sorted unique labels ids: ndarray The indices of the first occurrences of the unique values in the original array. """ uniques, ids = numpy.unique(labels, return_inverse=True) return uniques, ids
def grid_spacing(self): interval = [1,10] p1 = Parameter('A', 'integer', lower=interval[0], upper=interval[1]) p2 = Parameter('B', 'continuous', lower=interval[0], upper=interval[1]) p3 = Parameter('C', 'categorical', possible_values=['Bla1', 'Bla2']) p4 = Parameter('D', 'boolean') grid_sizes = {'A': 5, 'B': 6} grid_search = GridSearchOptimizer(model, [p1, p2, p3, p4], clf_score, grid_sizes) grid = grid_search.grid for params in grid: self.assertIn(params['A'], range(*interval)) self.assertIn(params['B']>=interval[0]) self.assertIn(params['B']<=interval[1]) self.assertIn(params['C'], ['Bla1', 'Bla2']) self.assertIn(params['D'], ['True', 'False']) lenA = len(np.unique([params['A'] for params in grid])) lenB = len(np.unique([params['B'] for params in grid])) lenC = len(np.unique([params['C'] for params in grid])) lenD = len(np.unique([params['D'] for params in grid])) self.assertTrue((lenA==grid_sizes['A']) or (lenA==grid_sizes['A']+1)) self.assertTrue((lenB==grid_sizes['B']) or (lenB==grid_sizes['B']+1)) self.assertTrue((lenC==grid_sizes['C']) or (lenC==grid_sizes['C']+1)) self.assertTrue((lenD==grid_sizes['D']) or (lenD==grid_sizes['D']+1))
def logscale_spec(spec, sr=44100, factor=20.): timebins, freqbins = np.shape(spec) scale = np.linspace(0, 1, freqbins) ** factor scale *= (freqbins-1)/max(scale) scale = np.unique(np.round(scale)) # create spectrogram with new freq bins newspec = np.complex128(np.zeros([timebins, len(scale)])) for i in range(0, len(scale)): if i == len(scale)-1: newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1) else: newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1) # list center freq of bins allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1]) freqs = [] for i in range(0, len(scale)): if i == len(scale)-1: freqs += [np.mean(allfreqs[scale[i]:])] else: freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])] return newspec, freqs
def free_parameters(self, data): """ Compute free parameters for the model fit using K-Means """ K = np.unique(self.labels_).shape[0] # number of clusters n, d = data.shape r = (K - 1) + (K * d) if self.metric == 'euclidean': r += 1 # one parameter for variance elif self.metric == 'mahalanobis': if self.covar_type == 'full' and self.covar_tied: r += (d * (d + 1) * 0.5) # half of the elements (including diagonal) in the matrix if self.covar_type == 'full' and not self.covar_tied: r += (d * (d + 1) * 0.5 * K) # half of the elements (including diagonal) in the matrix if self.covar_type == 'diag' and self.covar_tied: r += d # diagonal elements of the matrix if self.covar_type == 'diag' and not self.covar_tied: r += (d * K) # diagonal elements of the matrix if self.covar_type == 'spher' and self.covar_tied: r += 1 # all diagonal elements are equal if self.covar_type == 'spher' and not self.covar_tied: r += K # all diagonal elements are equal return r
def sim_target_supervised(target_data, target_labels, sigma, idx, target_params): cur_labels = target_labels[idx] N = cur_labels.shape[0] N_labels = len(np.unique(cur_labels)) Gt, mask = np.zeros((N, N)), np.zeros((N, N)) for i in range(N): for j in range(N): if cur_labels[i] == cur_labels[j]: Gt[i, j] = 0.8 mask[i, j] = 1 else: Gt[i, j] = 0.1 mask[i, j] = 0.8 / (N_labels - 1) return np.float32(Gt), np.float32(mask)
def get_Surface_Potentials(mtrue, survey, src, field_obj): phi = field_obj['phi'] CCLoc = mesh.gridCC XLoc = np.unique(mesh.gridCC[:, 0]) surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc) phiSurface = phi[surfaceInd] phiScale = 0. if(survey == "Pole-Dipole" or survey == "Pole-Pole"): refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC') # refPoint = CCLoc[refInd] # refSurfaceInd = np.where(xSurface == refPoint[0]) # phiScale = np.median(phiSurface) phiScale = phi[refInd] phiSurface = phiSurface - phiScale return XLoc, phiSurface, phiScale
def Plot_ChargesDensity(XYZ, sig0, sig1, R, E0, ax): xr, yr, zr = np.unique(XYZ[:, 0]), np.unique(XYZ[:, 1]), np.unique(XYZ[:, 2]) xcirc = xr[np.abs(xr) <= R] Et, Ep, Es = get_ElectricField(XYZ, sig0, sig1, R, E0) rho = get_ChargesDensity(XYZ, sig0, sig1, R, Et, Ep) ax.set_xlim([xr.min(), xr.max()]) ax.set_ylim([yr.min(), yr.max()]) ax.set_aspect('equal') Cplot = ax.pcolor(xr, yr, rho.reshape(xr.size, yr.size)) cb1 = plt.colorbar(Cplot, ax=ax) cb1.set_label(label= 'Charge Density ($C/m^2$)', size=ftsize_label) #weight='bold') cb1.ax.tick_params(labelsize=ftsize_axis) ax.plot(xcirc, np.sqrt(R**2-xcirc**2), '--k', xcirc, -np.sqrt(R**2-xcirc**2), '--k') ax.set_ylabel('Y coordinate ($m$)', fontsize=ftsize_label) ax.set_xlabel('X coordinate ($m$)', fontsize=ftsize_label) ax.tick_params(labelsize=ftsize_axis) ax.set_title('Charges Density', fontsize=ftsize_title) return ax
def unique(eq): eq = eqsize(eq) c1 = [None] * eq.shape for i in range(0, eq.size): c1.append[i] = hash(eq[i]) c1 = np.asarray(c1) if c1.ndim == 1: _, ia, ic = np.unique(c1, return_index=True, return_inverse=True) ia = (ia[:, ]).conj().T ic = (ic[:, ]).conj().T u = eq[ia] else: a = c1 b = np.ascontiguousarray(a).view( np.dtype((np.void, a.dtype.itemsize * a.shape[1]))) _, ia, ic = np.unique(b, return_index=True, return_inverse=True) return u, ia, ic
def getTypeProblem (self, solution_filename): ''' Get the type of problem directly from the solution file (in case we do not have an info file)''' if 'task' not in self.info.keys(): solution = np.array(data_converter.file_to_array(solution_filename)) target_num = solution.shape[1] self.info['target_num']=target_num if target_num == 1: # if we have only one column solution = np.ravel(solution) # flatten nbr_unique_values = len(np.unique(solution)) if nbr_unique_values < len(solution)/8: # Classification self.info['label_num'] = nbr_unique_values if nbr_unique_values == 2: self.info['task'] = 'binary.classification' self.info['target_type'] = 'Binary' else: self.info['task'] = 'multiclass.classification' self.info['target_type'] = 'Categorical' else: # Regression self.info['label_num'] = 0 self.info['task'] = 'regression' self.info['target_type'] = 'Numerical' else: # Multilabel or multiclass self.info['label_num'] = target_num self.info['target_type'] = 'Binary' if any(item > 1 for item in map(np.sum,solution.astype(int))): self.info['task'] = 'multilabel.classification' else: self.info['task'] = 'multiclass.classification' return self.info['task']
def tiedrank(a): ''' Return the ranks (with base 1) of a list resolving ties by averaging. This works for numpy arrays.''' m=len(a) # Sort a in ascending order (sa=sorted vals, i=indices) i=a.argsort() sa=a[i] # Find unique values uval=np.unique(a) # Test whether there are ties R=np.arange(m, dtype=float)+1 # Ranks with base 1 if len(uval)!=m: # Average the ranks for the ties oldval=sa[0] newval=sa[0] k0=0 for k in range(1,m): newval=sa[k] if newval==oldval: # moving average R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1) else: k0=k; oldval=newval # Invert the index S=np.empty(m) S[i]=R return S
def binarization (array): ''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0''' array = np.array(array, dtype=float) # conversion needed to use np.inf after if len(np.unique(array)) > 2: raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array)))) # manipulation which aims at avoid error in data with for example classes '1' and '2'. array[array == np.amax(array)] = np.inf array[array == np.amin(array)] = 0 array[array == np.inf] = 1 return np.array(array, dtype=int)
def __init__(self, images, labels, fake_data=False): if fake_data: self._num_examples = 10000 else: assert images.shape[0] == labels.shape[0], ( "images.shape: %s labels.shape: %s" % (images.shape, labels.shape)) self._num_examples = images.shape[0] # Convert shape from [num examples, rows, columns, depth] # to [num examples, rows*columns] (assuming depth == 1) self.imageShape = images.shape[1:] self.imageChannels = self.imageShape[2] images = images.reshape(images.shape[0], images.shape[1] * images.shape[2] * images.shape[3]) # Convert from [0, 255] -> [0.0, 1.0]. images = images.astype(numpy.float32) images = numpy.multiply(images, 1.0 / 255.0) self._images = images self._labels = labels try: if len(numpy.shape(self._labels)) == 1: self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels))) except: traceback.print_exc() self._epochs_completed = 0 self._index_in_epoch = 0
def cluster_service(path, service, cluster_size, prev_metadata=None): filename = os.path.join(path, service["preprocessed_filename"]) df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True) initial_idx = None if prev_metadata: initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns) # adjust cluster_size if an initial assigment has been found if initial_idx is not None: cluster_size = len(np.unique(initial_idx)) prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size) if os.path.exists(prefix + "_1.png"): print("skip " + prefix) return (None, None) cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx) if cluster_size < 2: # no silhouette_score for cluster size 1 return (None, None) print("silhouette_score: %f" % score) # protect the write access to the metadata file metadata_lock.acquire() with metadata.update(path) as data: for srv in data["services"]: if srv["name"] == service["name"]: if "clusters" not in srv: srv["clusters"] = {} d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics) srv["clusters"][cluster_size] = d metadata_lock.release() return (service["name"], cluster_size)
def view_waveforms_clusters(data, halo, threshold, templates, amps_lim, n_curves=200, save=False): nb_templates = templates.shape[1] n_panels = numpy.ceil(numpy.sqrt(nb_templates)) mask = numpy.where(halo > -1)[0] clust_idx = numpy.unique(halo[mask]) fig = pylab.figure() square = True center = len(data[0] - 1)//2 for count, i in enumerate(xrange(nb_templates)): if square: pylab.subplot(n_panels, n_panels, count + 1) if (numpy.mod(count, n_panels) != 0): pylab.setp(pylab.gca(), yticks=[]) if (count < n_panels*(n_panels - 1)): pylab.setp(pylab.gca(), xticks=[]) subcurves = numpy.where(halo == clust_idx[count])[0] for k in numpy.random.permutation(subcurves)[:n_curves]: pylab.plot(data[k], '0.5') pylab.plot(templates[:, count], 'r') pylab.plot(amps_lim[count][0]*templates[:, count], 'b', alpha=0.5) pylab.plot(amps_lim[count][1]*templates[:, count], 'b', alpha=0.5) xmin, xmax = pylab.xlim() pylab.plot([xmin, xmax], [-threshold, -threshold], 'k--') pylab.plot([xmin, xmax], [threshold, threshold], 'k--') #pylab.ylim(-1.5*threshold, 1.5*threshold) ymin, ymax = pylab.ylim() pylab.plot([center, center], [ymin, ymax], 'k--') pylab.title('Cluster %d' %i) if nb_templates > 0: pylab.tight_layout() if save: pylab.savefig(os.path.join(save[0], 'waveforms_%s' %save[1])) pylab.close() else: pylab.show() del fig
def check_consistent_length(*arrays): """Check that all arrays have consistent first dimensions. Checks whether all objects in arrays have the same shape or length. Parameters ---------- *arrays : list or tuple of input objects. Objects that will be checked for consistent length. """ uniques = np.unique([_num_samples(X) for X in arrays if X is not None]) if len(uniques) > 1: raise ValueError("Found arrays with inconsistent numbers of samples: " "%s" % str(uniques))
def transform(self, img, lbl): """transform :param img: :param lbl: """ img = img[:, :, ::-1] img = img.astype(np.float64) img -= self.mean img = m.imresize(img, (self.img_size[0], self.img_size[1])) # Resize scales images from 0 to 255, thus we need # to divide by 255.0 img = img.astype(float) / 255.0 # NHWC -> NCWH img = img.transpose(2, 0, 1) classes = np.unique(lbl) lbl = lbl.astype(float) lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F') lbl = lbl.astype(int) if not np.all(classes == np.unique(lbl)): print("WARN: resizing labels yielded fewer classes") if not np.all(np.unique(lbl) < self.n_classes): raise ValueError("Segmentation map contained invalid class values") img = torch.from_numpy(img).float() lbl = torch.from_numpy(lbl).long() return img, lbl
def fit(self, X, C, y, regions, kernelType, reml=True, maxiter=100): #construct a list of kernel names (one for each region) if (kernelType == 'adapt'): kernelNames = self.buildKernelAdapt(X, C, y, regions, reml, maxiter) else: kernelNames = [kernelType] * len(regions) #perform optimization kernelObj, hyp_kernels, sig2e, fixedEffects = self.optimize(X, C, y, kernelNames, regions, reml, maxiter) #compute posterior distribution Ktraintrain = kernelObj.getTrainKernel(hyp_kernels) post = self.infExact_scipy_post(Ktraintrain, C, y, sig2e, fixedEffects) #fix intercept if phenotype is binary if (len(np.unique(y)) == 2): controls = (y<y.mean()) cases = ~controls meanVec = C.dot(fixedEffects) mu, var = self.getPosteriorMeanAndVar(np.diag(Ktraintrain), Ktraintrain, post, meanVec) fixedEffects[0] -= optimize.minimize_scalar(self.getNegLL, args=(mu, np.sqrt(sig2e+var), controls, cases), method='brent').x #construct trainObj trainObj = dict([]) trainObj['sig2e'] = sig2e trainObj['hyp_kernels'] = hyp_kernels trainObj['fixedEffects'] = fixedEffects trainObj['kernelNames'] = kernelNames return trainObj
def load_scan(path): slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)] #slices.sort(key = lambda x: int(x.InstanceNumber)) acquisitions = [x.AcquisitionNumber for x in slices] vals, counts = np.unique(acquisitions, return_counts=True) vals = vals[::-1] # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc. counts = counts[::-1] ## take the acquistions that has more entries; if these are identical take the later entrye acq_val_sel = vals[np.argmax(counts)] ##acquisitions = sorted(np.unique(acquisitions), reverse=True) if len(vals) > 1: print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path) slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel] slices = slices2 ## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones. ## example of the '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485' #slices.sort(key = lambda x: int(x.ImagePositionPatient[2])) # from v 8, BUG should be float slices.sort(key = lambda x: float(x.ImagePositionPatient[2])) # from v 9 try: slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2]) except: slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation) for s in slices: s.SliceThickness = slice_thickness return slices
def largest_label_volume(im, bg=-1): vals, counts = np.unique(im, return_counts=True) counts = counts[vals != bg] vals = vals[vals != bg] if len(counts) > 0: return vals[np.argmax(counts)] else: return None #image=sample_image
def get_chunks_by_gem_group(self): """ Return exactly one chunk per gem group.""" gem_group_arr = self.get_column('gem_group') # verify gem groups are sorted assert np.all(np.diff(gem_group_arr)>=0) unique_ggs = np.unique(gem_group_arr) gg_key = lambda i: gem_group_arr[i] chunk_iter = self.get_chunks_from_partition(unique_ggs, gg_key) for (gg, chunk) in zip(unique_ggs, chunk_iter): yield (gg, chunk[0], chunk[1])
def compute_readpairs_per_umi_threshold(reads, subsample_rate): ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products. reads (np.array(int)) - Read pairs for each UMI subsample_rate (float) - Subsample reads to this fraction. Returns threshold (int) - The RPPU threshold in the subsampled space ''' if len(np.unique(reads)) < 2: print 'Skipping RPPU threshold calculation.' return 1 print 'RPPU subsample rate: %0.4f' % subsample_rate reads = np.random.binomial(reads, subsample_rate) reads = reads[reads > 0] if len(np.unique(reads)) < 2: print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.' return 1 new_n50 = tk_stats.NX(reads, 0.5) print 'New N50: %d:' % new_n50 # Log-transform counts log_reads = np.log(reads) # Run K-Means. Reshape necessary because kmeans takes a matrix. kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1))) kmeans.predict(log_reads.reshape((-1,1))) # Take the cluster with the smallest mean min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0] print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_)))) print 'RPPU component members: ' + str(np.bincount(kmeans.labels_)) # Take the max element in the min-cluster threshold = np.max(reads[kmeans.labels_ == min_cluster]) return threshold
def append_data_column(ds, column): # Extend the dataset to fit the new data new_count = column.shape[0] existing_count = ds.shape[0] ds.resize((existing_count + new_count,)) levels = get_levels(ds) if levels is not None: # update levels if we have new unique values if type(column.values) == p.Categorical: added_levels = set(column.values.categories) - set(levels) elif len(column) == 0: # Workaround for bug in pandas - get a crash in .unique() for an empty series added_levels = set([]) else: added_levels = set(column.unique()) - set(levels) new_levels = list(levels) new_levels.extend(added_levels) # Check if the new categorical column has more levels # than the current bit width supports. # If so, rewrite the existing column data w/ more bits if len(new_levels) > np.iinfo(ds.dtype).max: new_dtype = pick_cat_dtype(len(new_levels)) ds = widen_cat_column(ds, new_dtype) new_levels = np.array(new_levels, dtype=np.object) new_data = make_index_array(new_levels, column.values, ds.dtype) clear_levels(ds) create_levels(ds, new_levels) else: new_data = column # Append new data ds[existing_count:(existing_count + new_count)] = new_data
def _label2rgb_avg(label_field, image, bg_label=0, bg_color=(0, 0, 0)): """Visualise each segment in `label_field` with its mean color in `image`. Parameters ---------- label_field : array of int A segmentation of an image. image : array, shape ``label_field.shape + (3,)`` A color image of the same spatial shape as `label_field`. bg_label : int, optional A value in `label_field` to be treated as background. bg_color : 3-tuple of int, optional The color for the background label Returns ------- out : array, same shape and type as `image` The output visualization. """ out = np.zeros_like(image) labels = np.unique(label_field) bg = (labels == bg_label) if bg.any(): labels = labels[labels != bg_label] out[bg] = bg_color for label in labels: mask = (label_field == label).nonzero() color = image[mask].mean(axis=0) out[mask] = color return out
def stan_map(vector): """ Create a map of vector items : id. """ unique_items = np.unique(vector) return {item: id_ for id_, item in enumerate(unique_items, start=1)}