我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.flatnonzero()。
def numpy_groupby(values, keys): """ Group a collection of numpy arrays by key arrays. Yields (key_tuple, view_tuple) where key_tuple is the key grouped on and view_tuple is a tuple of views into the value arrays. values: tuple of arrays to group keys: tuple of sorted, numeric arrays to group by """ if len(values) == 0: return if len(values[0]) == 0: return for key_array in keys: assert len(key_array) == len(keys[0]) for value_array in values: assert len(value_array) == len(keys[0]) # The indices where any of the keys differ from the previous key become group boundaries key_change_indices = np.logical_or.reduce(tuple(np.concatenate(([1], np.diff(key))) != 0 for key in keys)) group_starts = np.flatnonzero(key_change_indices) group_ends = np.roll(group_starts, -1) group_ends[-1] = len(keys[0]) for group_start, group_end in itertools.izip(group_starts, group_ends): yield tuple(key[group_start] for key in keys), tuple(value[group_start:group_end] for value in values)
def compute_clustering_accuracy(label1, label2): """ From clustering_on_transcript_compatibility_counts, see github for MIT license """ uniq1,uniq2 = np.unique(label1),np.unique(label2) # Create two dictionaries. Each will store the indices of each label entries1,entries2 = {},{} for label in uniq1: entries1[label] = set(np.flatnonzero((label1==label))) for label in uniq2: entries2[label] = set(np.flatnonzero((label2==label))) # Create an intersection matrix which counts the number of entries that overlap for each label combination W = np.zeros((len(uniq1),len(uniq2))) for i,j in itertools.product(range(len(uniq1)),range(len(uniq2))): W[i,j]=len(entries1[uniq1[i]].intersection(entries2[uniq2[j]])) # find the max weight matching match_val = get_max_wt_matching(uniq1,uniq2,W) # return the error rate return (1-match_val/float(len(label1)))*100
def visualize(X, Y, classes, samples_per_class=10): nb_classes = len(classes) for y, cls in enumerate(classes): idxs = np.flatnonzero(Y == y) idxs = np.random.choice(idxs, samples_per_class, replace=False) for i, idx in enumerate(idxs): plt_idx = i * nb_classes + y + 1 plt.subplot(samples_per_class, nb_classes, plt_idx) plt.imshow(X[idx], cmap='gray') plt.axis('off') if i == 0: plt.title(cls) #plt.show() plt.savefig('img/data.png') plt.clf()
def brute_radius_search(self, v, radius2=None, batchsize=1024): v = v.ravel() norm2 = self.get_dataset('norm2') v_norm2 = np.sum(v * v) candidates = [] ids = self.ids[:] for i in xrange(0, self.num_ids, batchsize): blockdata = self.data[i:i+batchsize, :] dists = norm2[i:i+batchsize] + v_norm2 - 2 * np.dot(blockdata, v) assert dists.ndim == 1 if radius2: for j in np.flatnonzero(dists < radius2): candidates.append((dists[j], ids[i+j])) else: for j in xrange(dists.shape[0]): candidates.append((dists[j], ids[i+j])) candidates.sort() return candidates
def points_in_front(self, points, inverted=False, ret_indices=False): ''' Given an array of points, return the points which lie either on the plane or in the half-space in front of it (i.e. in the direction of the plane normal). points: An array of points. inverted: When `True`, invert the logic. Return the points that lie behind the plane instead. ret_indices: When `True`, return the indices instead of the points themselves. ''' sign = self.sign(points) if inverted: mask = np.less_equal(sign, 0) else: mask = np.greater_equal(sign, 0) indices = np.flatnonzero(mask) return indices if ret_indices else points[indices]
def has_approx_support(m, m_hat, prob=0.01): """Returns 1 if model selection error is less than or equal to prob rate, 0 else. NOTE: why does np.nonzero/np.flatnonzero create so much problems? """ m_nz = np.flatnonzero(np.triu(m, 1)) m_hat_nz = np.flatnonzero(np.triu(m_hat, 1)) upper_diagonal_mask = np.flatnonzero(np.triu(np.ones(m.shape), 1)) not_m_nz = np.setdiff1d(upper_diagonal_mask, m_nz) intersection = np.in1d(m_hat_nz, m_nz) # true positives not_intersection = np.in1d(m_hat_nz, not_m_nz) # false positives true_positive_rate = 0.0 if len(m_nz): true_positive_rate = 1. * np.sum(intersection) / len(m_nz) true_negative_rate = 1. - true_positive_rate false_positive_rate = 0.0 if len(not_m_nz): false_positive_rate = 1. * np.sum(not_intersection) / len(not_m_nz) return int(np.less_equal(true_negative_rate + false_positive_rate, prob))
def get_clusters(C): nonassigned = list(range(len(C))) clusters = [] while nonassigned: queue = set([nonassigned[0]]) clusters.append([]) while queue: node = queue.pop() clusters[-1].append(node) nonassigned.remove(node) queue.update(n for n in np.flatnonzero(C[node]) if n in nonassigned) C = np.zeros_like(C) for cluster in clusters: for i in cluster: C[i, cluster] = True return clusters, C
def __init__(self, geom, allowed=None): self._coords = [] geom = geom.supercell() dist = geom.dist(geom) radii = np.array([get_property(sp, 'covalent_radius') for sp in geom.species]) bondmatrix = dist < 1.3*(radii[None, :]+radii[:, None]) self.fragments, C = get_clusters(bondmatrix) radii = np.array([get_property(sp, 'vdw_radius') for sp in geom.species]) shift = 0. C_total = C.copy() while not C_total.all(): bondmatrix |= ~C_total & (dist < radii[None, :]+radii[:, None]+shift) C_total = get_clusters(bondmatrix)[1] shift += 1. for i, j in combinations(range(len(geom)), 2): if bondmatrix[i, j]: bond = Bond(i, j, C=C) self.append(bond) for j in range(len(geom)): for i, k in combinations(np.flatnonzero(bondmatrix[j, :]), 2): ang = Angle(i, j, k, C=C) if ang.eval(geom.coords) > pi/4: self.append(ang) for bond in self.bonds: self.extend(get_dihedrals([bond.i, bond.j], geom.coords, bondmatrix, C))
def pinv(A, log=lambda _: None): U, D, V = np.linalg.svd(A) thre = 1e3 thre_log = 1e8 gaps = D[:-1]/D[1:] try: n = np.flatnonzero(gaps > thre)[0] except IndexError: n = len(gaps) else: gap = gaps[n] if gap < thre_log: log('Pseudoinverse gap of only: {:.1e}'.format(gap)) D[n+1:] = 0 D[:n+1] = 1/D[:n+1] return U.dot(np.diag(D)).dot(V)
def inverse(self, encoded, duration=None): '''Inverse static tag transformation''' ann = jams.Annotation(namespace=self.namespace, duration=duration) if np.isrealobj(encoded): detected = (encoded >= 0.5) else: detected = encoded for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]: vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd))) ann.append(time=0, duration=duration, value=vd, confidence=encoded[vid]) return ann
def quant2ind(U): """ Use quantization matrix to return slices of non-zero elements. :param U: quantization matrix :return: list of `slice`s for non-zero elements of U .. note:: This function assumes that the pulsar TOAs were sorted by time. """ inds = [] for cc, col in enumerate(U.T): epinds = np.flatnonzero(col) if epinds[-1] - epinds[0] + 1 != len(epinds): raise ValueError('ERROR: TOAs not sorted properly!') inds.append(slice(epinds[0], epinds[-1]+1)) return inds
def find(a,n=None,d=None,nargout=1): if d: raise NotImplementedError # there is no promise that nonzero or flatnonzero # use or will use indexing of the argument without # converting it to array first. So we use asarray # instead of asanyarray if nargout == 1: i = np.flatnonzero(np.asarray(a)).reshape(1,-1)+1 if n is not None: i = i.take(n) return matlabarray(i) if nargout == 2: i,j = np.nonzero(np.asarray(a)) if n is not None: i = i.take(n) j = j.take(n) return (matlabarray((i+1).reshape(-1,1)), matlabarray((j+1).reshape(-1,1))) raise NotImplementedError
def error_contour(self, phi, epsilon = 2.0, delta_phi = None): """Error between worm shape and contour from image given implicitly by phi Arguments: phi (array): implicit worm contour, if none calculated from actual shape delta_phi (number): range of phi values to consider for error estimate epsilon (number): refularization parameter for the step functions curvature (bool): error due to curvature on phi """ phi2 = self.phi(size = phi.shape); if delta_phi is None: error = np.sum((np.tanh(phi / epsilon)-np.tanh(phi2 / epsilon))**2); else: idx = np.flatnonzero(np.logical_and(phi2 <= delta_phi, phi2 >= -delta_phi)) error =np.sum((np.tanh(phi[idx] / epsilon)-np.tanh(phi2[idx] / epsilon))**2); return error;
def compute_EER(Pfa, Pmiss): """ computes the equal error rate (EER) given Pmiss or False Negative Rate and Pfa or False Positive Rate calculated for a range of operating points on the DET curve @Author: "Timothee Kheyrkhah, Omid Sadjadi" """ fpr = Pfa fnr = Pmiss diff_pm_fa = fnr - fpr x1 = np.flatnonzero(diff_pm_fa >= 0)[0] x2 = np.flatnonzero(diff_pm_fa < 0)[-1] a = (fnr[x1] - fpr[x1]) / (fpr[x2] - fpr[x1] - (fnr[x2] - fnr[x1])) return fnr[x1] + a * (fnr[x2] - fnr[x1])
def visual_data(self): print "visualized some cifar-10 picture..." classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] num_classes = len(classes) samples_per_class = 7 for y, cl in enumerate(classes): idxs = np.flatnonzero(self.y_train == y) idxs = np.random.choice(idxs, samples_per_class, replace=False) for i, idx in enumerate(idxs): plt_idx = i * num_classes + y + 1 plt.subplot(samples_per_class, num_classes, plt_idx) plt.imshow(self.X_train[idx].astype('uint8')) plt.axis('off') if i == 0: plt.title(cl) plt.show() print "visualized data done...\n---------\n" #
def imcrop_tosquare(img): """Make any image a square image. Parameters ---------- img : np.ndarray Input image to crop, assumed at least 2d. Returns ------- crop : np.ndarray Cropped image. """ size = np.min(img.shape[:2]) extra = img.shape[:2] - size crop = img for i in np.flatnonzero(extra): crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i) return crop
def boundary_nodes(fs): """Find the list of boundary nodes in fs. This is a unit-square-specific solution. A more elegant solution would employ the mesh topology and numbering. """ eps = 1.e-10 f = Function(fs) def on_boundary(x): """Return 1 if on the boundary, 0. otherwise.""" if x[0] < eps or x[0] > 1 - eps or x[1] < eps or x[1] > 1 - eps: return 1. else: return 0. f.interpolate(on_boundary) return np.flatnonzero(f.values)
def computeEndToEnd(endtoendDistances, polWeights): # Initiate variables weightedEndtoendSq=np.zeros(c.nBeads) weightedEndtoendSqStd=np.zeros(c.nBeads) # Loop over all possible polymer lengths for z in range(c.nBeads): # Get data t1 = np.squeeze(np.asarray(endtoendDistances)[:,z]) t2 = np.squeeze(np.asarray(polWeights)[:,z]); # Count non-Zero dataLength = len( np.flatnonzero(t2!=0) ); # Calculate mean and standard deveation of mean weightedEndtoendSq[z]=np.average(t1, weights=t2) weightedEndtoendSqStd[z]=( (np.average((t1 - weightedEndtoendSq[z])**2, weights=t2)) / (dataLength))**(1/2) return weightedEndtoendSq, weightedEndtoendSqStd # Calculate gyradius and errors
def computeGyradiusStd(gyradiusSq, polWeights): # Calc Weighted gyradius and standard deviation # Initiate variables weightedGyradiusSq=np.zeros(c.nBeads) weightedGyradiusSqStd=np.zeros(c.nBeads) # Loop over all possible polymer lengths for z in range(c.nBeads): # Get weight for bead number z in all polymers w = np.squeeze(np.asarray(polWeights)[:,z]) # Count nonzero dataLength = len( np.flatnonzero(w!=0) ) # Calculate mean and standard deveation of mean weightedGyradiusSq[z]=np.average(gyradiusSq[:,z], weights=w) weightedGyradiusSqStd[z]=( (np.average((gyradiusSq[:,z] - weightedGyradiusSq[z])**2, weights=w)) / (dataLength))**(1/2) return weightedGyradiusSq, weightedGyradiusSqStd
def set_dropout(self): n_nodes = self.n_nodes dropout=np.random.binomial([np.ones(sum(n_nodes))], self.DORATE)[0].astype(np.bool) dropout[0:n_nodes[0]]=False # no dropout units in first layer dropout[-n_nodes[-1]:]=False # no dropout units in last layer self.dropout = dropout # ths_d ??? self.ths_d = np.zeros(len(self.ths)).astype(np.bool) for l in range(len(self.ths_l)): if (l<len(self.ths_l)-1): # input node? dropout ? ?? for i in np.flatnonzero(self.__get_dropout(l)): self.ths_d[self.ths_l[l]+\ # ??? +1? bias ?? ?? np.arange(n_nodes[l+1])*(n_nodes[l]+1)+(i+1)] = True # output node? dropout ? ?? for i in np.flatnonzero(self.__get_dropout(l+1)): self.ths_d[self.ths_l[l]+i*(n_nodes[l]+1):\ self.ths_l[l]+(i+1)*(n_nodes[l]+1)] = True
def mapToNewPos(curposIDs, bigtosmall): ''' Convert list of old ids to new positions after bigtosmall reordering. Example ------- >>> curposIDs = [0, 2, 4] >>> N = [11, 9, 3, 1, 5] >>> bigtosmall = argsort_bigtosmall_stable(N) >>> print bigtosmall [0 1 4 2 3] >>> newposIDs = mapToNewPos(curposIDs, bigtosmall) >>> print newposIDs [0, 3, 2] ''' newposIDs = np.zeros_like(curposIDs) for posID in range(len(curposIDs)): newposIDs[posID] = np.flatnonzero(bigtosmall == curposIDs[posID])[0] return newposIDs.tolist()
def findMergePairByUID(self, uidA, uidB): ''' Find which currently tracked merge pair contains desired uids. Returns ------- rowID : int index of tracked merge quantities related to specific uid pair. ''' assert hasattr(self, 'mUIDPairs') rowID = np.flatnonzero( np.logical_and(uidA == self.mUIDPairs[:, 0], uidB == self.mUIDPairs[:, 1])) if not rowID.size == 1: raise ValueError( 'Bad search for correct merge UID pair.\n' + str(rowID)) rowID = rowID[0] return rowID
def _discardAnyTrackedPairsThatOverlapWithAorB(self, uidA, uidB): ''' Update to discard remaining pairs that overlap uidA/uidB. Post Condition -------------- Attributes mUIDPairs and _MergeTerms dont have any more info about other pairs (uidj,uidk) where where uidA or uidB are involved. ''' if hasattr(self, 'mUIDPairs'): mUIDPairs = self.mUIDPairs # Remove any other pairs associated with kA or kB keepRowIDs = ((mUIDPairs[:, 0] != uidA) * (mUIDPairs[:, 1] != uidA) * (mUIDPairs[:, 0] != uidB) * (mUIDPairs[:, 1] != uidB)) keepRowIDs = np.flatnonzero(keepRowIDs) self.setMergeUIDPairs(mUIDPairs[keepRowIDs]) # Remove any other pairs related to kA, kB if self.hasMergeTerms(): for key, dims in self._MergeTerms._FieldDims.items(): mArr = getattr(self._MergeTerms, key) if dims[0] == 'M': mArr = mArr[keepRowIDs] self._MergeTerms.setField(key, mArr, dims=dims)
def makePlanForEmptyComps(curSS, dtargetMinCount=0.01, **kwargs): """ Create a Plan dict for any empty states. Returns ------- Plan : dict with either no fields, or two fields named * candidateIDs * candidateUIDs Any "empty" Plan dict indicates that no empty comps exist. """ Nvec = curSS.getCountVec() emptyIDs = np.flatnonzero(Nvec < dtargetMinCount) if len(emptyIDs) == 0: return dict() Plan = dict(candidateIDs=emptyIDs.tolist(), candidateUIDs=curSS.uIDs[emptyIDs].tolist(), ) return Plan
def argsortBigToSmallByTiers(tierAScores, tierBScores): ''' Perform argsort, prioritizing first arr then second arr. Example ------- >>> AScores = [1, 1, 1, 0, 0] >>> BScores = [6, 5, 4, 8, 7] >>> print argsortBigToSmallByTiers(AScores, BScores) [0 1 2 3 4] >>> AScores = [1, 1, 1, 0, 0, -1, -1, -1] >>> BScores = [6, 5, 4, 8, 7, 11, 1.5, -1.5] >>> print argsortBigToSmallByTiers(AScores, BScores) [0 1 2 3 4 5 6 7] >>> print argsortBigToSmallByTiers(BScores, AScores) [5 3 4 0 1 2 6 7] ''' tierAScores = np.asarray(tierAScores, dtype=np.float64) sortids = argsort_bigtosmall_stable(tierAScores) limits = np.flatnonzero(np.diff(tierAScores[sortids]) != 0) + 1 sortids = sortids[ argsort_bigtosmall_stable(tierBScores, limits=limits)] return sortids
def plot_sequence(seqID, Data, dimID=0, maxT=200): Xseq = Data.X[Data.doc_range[seqID]:Data.doc_range[seqID + 1]] Zseq = Data.TrueParams['Z'][ Data.doc_range[seqID]:Data.doc_range[ seqID + 1]] Xseq = Xseq[:maxT, dimID] # Xseq is 1D after this statement! Zseq = Zseq[:maxT] # Plot X, colored by segments Z changePts = np.flatnonzero(np.abs(np.diff(Zseq))) changePts = np.hstack([0, changePts + 1]) for ii, loc in enumerate(changePts[:-1]): nextloc = changePts[ii + 1] ts = np.arange(loc, nextloc) xseg = Xseq[loc:nextloc] kseg = int(Zseq[loc]) color = GaussViz.Colors[kseg % len(GaussViz.Colors)] pylab.plot(ts, xseg, '.-', color=color, markersize=8) pylab.plot( [nextloc - 1, nextloc], [Xseq[nextloc - 1], Xseq[nextloc]], 'k:') pylab.ylim([-2, 14])
def calc_underprediction_scores_per_word(model, Data, LP=None, **kwargs): ''' Find scalar score for each vocab word. Larger => worse prediction. ''' if LP is None: LP = model.calc_local_params(Data) DocWordFreq_emp = calcWordFreqPerDoc_empirical(Data) DocWordFreq_model = calcWordFreqPerDoc_model(model, LP) uError = np.maximum(DocWordFreq_emp - DocWordFreq_model, 0) # For each word, identify set of relevant documents DocWordMat = Data.to_sparse_docword_matrix().toarray() score = np.zeros(Data.vocab_size) # TODO: only consider words with many docs overall for vID in xrange(Data.vocab_size): countPerDoc = DocWordMat[:, vID] typicalWordCount = np.median(countPerDoc[countPerDoc > 0]) candidateDocs = np.flatnonzero(countPerDoc > typicalWordCount) if len(candidateDocs) < 10: continue score[vID] = np.mean(uError[candidateDocs, vID]) # Only give positive probability to words with above average score score = score - np.mean(score) score = np.maximum(score, 0) score = score * score # make more peaked! score /= score.sum() return score
def _sample_target_GroupXData(Data, model, LP, **kwargs): ''' Draw sample subset of provided GroupXData dataset ''' randstate = kwargs['randstate'] if not hasValidKey('targetCompID', kwargs): raise NotImplementedError('TODO') ktarget = kwargs['targetCompID'] targetProbThr = kwargs['targetCompFrac'] mask = LP['resp'][:, ktarget] > targetProbThr objIDs = np.flatnonzero(mask) if len(objIDs) < 2: return None, dict() randstate.shuffle(objIDs) targetObjIDs = objIDs[:kwargs['targetMaxSize']] TargetData = Data.select_subset_by_mask(atomMask=targetObjIDs, doTrackFullSize=False) TargetInfo = dict(ktarget=ktarget) return TargetData, TargetInfo
def removeEmptyComps_SSandLP(self, SS, LP): ''' Remove all parameters related to empty components from SS and LP Returns -------- SS : bnpy SuffStatBag LP : dict for local params ''' badks = np.flatnonzero(SS.N[:-1] < 1) # Remove in order, from largest index to smallest for k in badks[::-1]: SS.removeComp(k) mask = LP['Z'] > k LP['Z'][mask] -= 1 if 'resp' in LP: del LP['resp'] return SS, LP
def get_data_by_id(self, ids): """ Helper for getting current data values from stored identifiers :param float|list ids: ids for which data are requested :return: the stored ids :rtype: np.ndarray """ if self.ids is None: raise ValueError("IDs not stored in node {}".format(self.name)) if self.data is None: raise ValueError("No data in node {}".format(self.name)) ids = np.array(ids, ndmin=1, copy=False) found_items = np.in1d(ids, self.ids) if not np.all(found_items): raise ValueError("Cannot find {} among {}".format(ids[np.logical_not(found_items)], self.name)) idx = np.empty(len(ids), dtype='int') for k, this_id in enumerate(ids): if self.ids.ndim > 1: idx[k] = np.flatnonzero(np.all(self.ids == this_id, axis=1))[0] else: idx[k] = np.flatnonzero(self.ids == this_id)[0] return np.array(self.data, ndmin=1)[idx]
def subset_test(lin_op): """ Test that subsetting a linear operator produces the correct outputs. :param LinearOperator lin_op: the linear operator """ sub_idx = np.random.rand(lin_op.shape[0], 1) > 0.5 # make sure at least one element included sub_idx[np.random.randint(0, len(sub_idx))] = True sub_idx = np.flatnonzero(sub_idx) sub_lin_op = undertest.get_subset_lin_op(lin_op, sub_idx) # test projection to subset of indices x = np.random.randn(lin_op.shape[1], np.random.randint(1, 3)) np.testing.assert_array_almost_equal(sub_lin_op * x, (lin_op * x)[sub_idx, :]) # test back projection from subset of indices y = np.random.randn(len(sub_idx), np.random.randint(1, 3)) z = np.zeros((lin_op.shape[0], y.shape[1])) z[sub_idx] = y np.testing.assert_array_almost_equal(sub_lin_op.rmatvec(y), lin_op.rmatvec(z))
def test_get_data_by_id(self): dim, data, cpd, ids = self.gen_data() node = undertest.Node(name='test node', data=data, cpd=cpd, ids=ids) # test setting of ids np.testing.assert_array_equal(node.ids, ids) # test for one id idx = np.random.randint(0, dim) np.testing.assert_array_equal(node.get_data_by_id(ids[idx]).ravel(), node.data[idx]) # test for a random set of ids ids_subset = np.random.choice(ids, dim, replace=True) np.testing.assert_array_equal(node.get_data_by_id(ids_subset), [node.data[np.flatnonzero(ids == x)[0]] for x in ids_subset]) # test for all ids self.assertEqual(node.get_all_data_and_ids(), {x: node.get_data_by_id(x) for x in ids}) # test when data are singleton dim, _, cpd, ids = self.gen_data(dim=1) node = undertest.Node(name='test node', data=1, cpd=cpd, ids=ids) self.assertEqual(node.get_all_data_and_ids(), {x: node.get_data_by_id(x) for x in ids})
def migrate_settings(cls, settings_, version): if version < 2: # delete the saved attr_value to prevent crashes try: del settings_["context_settings"][0].values["attr_value"] except: pass # migrate selection if version <= 2: try: current_context = settings_["context_settings"][0] selection = getattr(current_context, "selection", None) if selection is not None: selection = [(i, 1) for i in np.flatnonzero(np.array(selection))] settings_.setdefault("imageplot", {})["selection_group_saved"] = selection except: pass
def redraw_integral(self): dis = [] if np.any(self.curveplot.selection_group) and self.curveplot.data: # select data ind = np.flatnonzero(self.curveplot.selection_group)[0] show = self.curveplot.data[ind:ind+1] previews = self.flow_view.preview_n() for i in range(self.preprocessormodel.rowCount()): if i in previews: item = self.preprocessormodel.item(i) desc = item.data(DescriptionRole) params = item.data(ParametersRole) if not isinstance(params, dict): params = {} preproc = desc.viewclass.createinstance(params) preproc.metas = False datai = preproc(show) di = datai.domain.attributes[0].compute_value.draw_info(show) color = self.flow_view.preview_color(i) dis.append({"draw": di, "color": color}) refresh_integral_markings(dis, self.markings_list, self.curveplot)
def run_differential_expression(matrix, clusters, sseq_params=None): """ Compute differential expression for each cluster vs all other cells Args: matrix - GeneBCMatrix : gene expression data clusters - np.array(int) : 1-based cluster labels sseq_params - dict : params from compute_sseq_params """ n_clusters = np.max(clusters) if sseq_params is None: print "Computing params..." sys.stdout.flush() sseq_params = compute_sseq_params(matrix.m) # Create a numpy array with 3*K columns; # each group of 3 columns is mean, log2, pvalue for cluster i all_de_results = np.zeros((matrix.genes_dim, 3*n_clusters)) for cluster in xrange(1, 1+n_clusters): in_cluster = clusters == cluster group_a = np.flatnonzero(in_cluster) group_b = np.flatnonzero(np.logical_not(in_cluster)) print 'Computing DE for cluster %d...' % cluster sys.stdout.flush() de_result = sseq_differential_expression( matrix.m, group_a, group_b, sseq_params) all_de_results[:, 0+3*(cluster-1)] = de_result['norm_mean_a'] all_de_results[:, 1+3*(cluster-1)] = de_result['log2_fold_change'] all_de_results[:, 2+3*(cluster-1)] = de_result['adjusted_p_value'] return DIFFERENTIAL_EXPRESSION(all_de_results)
def select_nonzero_axes(self): new_mat = GeneBCMatrix(list(self.genes), list(self.bcs)) new_mat.m = self.m nonzero_bcs = np.flatnonzero(new_mat.get_reads_per_bc()) if new_mat.bcs_dim > len(nonzero_bcs): new_mat = new_mat.select_barcodes(nonzero_bcs) nonzero_genes = np.flatnonzero(new_mat.get_reads_per_gene()) if new_mat.genes_dim > len(nonzero_genes): new_mat = new_mat.select_genes(nonzero_genes) return new_mat, nonzero_bcs, nonzero_genes
def gridsearch_report(results, n_top=3): for i in range(1, n_top + 1): candidates = np.flatnonzero(results['rank_test_score'] == i) for candidate in candidates: LOGINFO("Model with rank: {0}".format(i)) LOGINFO("Mean validation score: {0:.3f} (std: {1:.3f})".format( results['mean_test_score'][candidate], results['std_test_score'][candidate])) LOGINFO("Parameters: {0}".format(results['params'][candidate])) ################################### ## NON-PERIODIC VAR FEATURE LIST ## ###################################
def computeObjvals(self, getObjval): new = self.copy() compute_ind = np.flatnonzero(np.isnan(new.objvals)) new.objvals[compute_ind] = map(getObjval, new.solutions[compute_ind]) return new
def _iter_indices(self): for y_train, y_test in super(ShuffleLabelsOut, self)._iter_indices(): # these are the indices of classes in the partition # invert them into data indices train = np.flatnonzero(np.in1d(self.y_indices, y_train)) test = np.flatnonzero(np.in1d(self.y_indices, y_test)) yield train, test
def get_chrom(abs_pos, chr_info=None, c=None): if chr_info is None: try: chr_info = get_chrom_names_cumul_len(c) except: return None try: chr_id = np.flatnonzero(chr_info[2] > abs_pos)[0] - 1 except IndexError: return None return chr_info[0][chr_id]
def abs_coord_2_bin(c, pos, chr_info): try: chr_id = np.flatnonzero(chr_info[2] > pos)[0] - 1 except IndexError: return c.info['nbins'] chrom = chr_info[0][chr_id] relPos = pos - chr_info[2][chr_id] return c.offset((chrom, relPos, chr_info[1][chrom]))
def prepare(self, isrc, tbudget, preallocate_aggressively, prediscretize, stderr=None): # preallocate_aggressively = {-1: minimize dynamic memory usage, 0: minimize initialization latency, 1: maximize speed} network = self.network if stderr is not None: tprev = timeit.default_timer(); print_("Computing optimal update order...", end=' ', file=stderr) ibudget = int(discretize_up(numpy.asarray([tbudget], float), self.discretization)) (stack, _, visited_iedges, end_itimes) = dijkstra(network, False, isrc, self.timins, True, ibudget, self.min_itimes_to_dest) eused = numpy.flatnonzero((0 <= visited_iedges) & (visited_iedges <= ibudget)).tolist() self.end_itimes = end_itimes.tolist() if stderr is not None: print_(int((timeit.default_timer() - tprev) * 1000), "ms", file=stderr); del tprev if prediscretize: if stderr is not None: tprev = timeit.default_timer(); print_("Discretizing edges...", end=' ', file=stderr) for eiused, tidist in zip(eused, network.discretize_edges( list(map(network.edges.hmm.__getitem__, eused)), list(map(network.edges.tmin.__getitem__, eused)), self.discretization, suppress_calculation=self.suppress_calculation )): self.cached_edges_tidist[eiused] = tidist if stderr is not None: print_(int((timeit.default_timer() - tprev) * 1000), "ms", file=stderr); del tprev if preallocate_aggressively >= 0: # uv[i][t] should be the probability of reaching the destination from node i in <= t steps (so for T = 0 we get uv[idst] == [1.0]) # Rationale: uv[i] should be the convolution of edge[i,j] with uv[j], with no elements missing. for i in xrange(len(self.min_itimes_to_dest)): m = max(self.end_itimes[i] - self.min_itimes_to_dest[i], 0) self.uv[i].ensure_size(m, preallocate_aggressively > 0) for eij in eused: m = max(self.end_itimes[network.edges.begin[eij]] - (self.timins[eij] + self.min_itimes_to_dest[network.edges.end[eij]]), 0) self.ue[eij].ensure_size(m, preallocate_aggressively > 0) self.we[eij].ensure_size(m, preallocate_aggressively > 0) return (stack, eused, ibudget)
def _compute_trial_pred_labels_from_cnt_y(self, dataset, all_preds, ): # Todo: please test this # we only want the preds that are for the same labels as the last label in y # (there might be parts of other class-data at start, for trialwise misclass we assume # they are contained in other trials at the end...) preds_per_trial = compute_preds_per_trial_for_set( all_preds, self.input_time_length, dataset) trial_labels = [] trial_pred_labels = [] for trial_pred, trial_y in zip(preds_per_trial, dataset.y): # first cut to the part actually having predictions trial_y = trial_y[-trial_pred.shape[1]:] wanted_class = trial_y[-1] trial_labels.append(wanted_class) # extract the first marker different from the wanted class # by starting from the back of the trial i_last_sample = np.flatnonzero(trial_y[::-1] != wanted_class) if len(i_last_sample) > 0: i_last_sample = i_last_sample[0] # remember last sample is now from back trial_pred = trial_pred[:, -i_last_sample:] trial_pred_label = np.argmax(np.mean(trial_pred, axis=1)) trial_pred_labels.append(trial_pred_label) trial_labels = np.array(trial_labels) trial_pred_labels = np.array(trial_pred_labels) return trial_labels, trial_pred_labels
def grid_report(results, n_top=3): r"""Report the top grid search scores. Parameters ---------- results : dict of numpy arrays Mean test scores for each grid search iteration. n_top : int, optional The number of grid search results to report. Returns ------- None : None """ for i in range(1, n_top + 1): candidates = np.flatnonzero(results['rank_test_score'] == i) for candidate in candidates: logger.info("Model with rank: {0}".format(i)) logger.info("Mean validation score: {0:.3f} (std: {1:.3f})".format( results['mean_test_score'][candidate], results['std_test_score'][candidate])) logger.info("Parameters: {0}".format(results['params'][candidate])) # # Function hyper_grid_search #
def peak_finder(spectrum, energy): ''' PEAK_FINDER will search for peaks within a certain range determined by the Energy given. It takes a spectrum object and an Energy value as input. The energy range to look in is given by the Full-Width-Half-Maximum (FWHM). If more than one peak is found in the given range, the peak with the highest amount of counts will be used. ''' e0 = spectrum.energy_cal[0] eslope = spectrum.energy_cal[1] energy_axis = e0 + eslope*spectrum.channel peak_energy = [] # rough estimate of fwhm. fwhm = 0.05*energy**0.5 fwhm_range = 1 # peak search area start_region = np.flatnonzero(energy_axis > energy - fwhm_range * fwhm)[0] end_region = np.flatnonzero(energy_axis > energy + fwhm_range * fwhm)[0] y = spectrum.data[start_region:end_region] indexes = peakutils.indexes(y, thres=0.5, min_dist=4) tallest_peak = [] if indexes.size == 0: peak_energy.append(int((end_region - start_region) / 2) + start_region) else: for i in range(indexes.size): spot = spectrum.data[indexes[i]+start_region] tallest_peak.append(spot) indexes = indexes[np.argmax(tallest_peak)] peak_energy.append(int(indexes+start_region)) peak_energy = float(energy_axis[peak_energy]) return(peak_energy)
def peak_finder(spectrum, energy): ''' PEAK_FINDER will search for peaks within a certain range determined by the Energy given. It takes a Spectra file and an Energy value as input. The energy range to look in is given by the Full-Width-Half-Maximum (FWHM). If more than one peak is found in the given range, the peak with the highest amount of counts will be used. ''' e0 = spectrum.energy_cal[0] eslope = spectrum.energy_cal[1] energy_axis = e0 + eslope*spectrum.channel peak_energy = [] # rough estimate of fwhm. fwhm = 0.05*energy**0.5 fwhm_range = 1 # peak search area start_region = np.flatnonzero(energy_axis > energy - fwhm_range * fwhm)[0] end_region = np.flatnonzero(energy_axis > energy + fwhm_range * fwhm)[0] y = spectrum.data[start_region:end_region] indexes = peakutils.indexes(y, thres=0.5, min_dist=4) tallest_peak = [] if indexes.size == 0: peak_energy.append(int((end_region - start_region) / 2) + start_region) else: for i in range(indexes.size): spot = spectrum.data[indexes[i]+start_region] tallest_peak.append(spot) indexes = indexes[np.argmax(tallest_peak)] peak_energy.append(int(indexes+start_region)) peak_energy = float(energy_axis[peak_energy]) return(peak_energy)
def _transform2quat(self): """Construct quaternion from the transform/rotation matrix :returns: quaternion formed from transform matrix :rtype: numpy array """ # Code was copied from perl PDL code that uses backwards index ordering T = self.transform.transpose() den = np.array([ 1.0 + T[0, 0] - T[1, 1] - T[2, 2], 1.0 - T[0, 0] + T[1, 1] - T[2, 2], 1.0 - T[0, 0] - T[1, 1] + T[2, 2], 1.0 + T[0, 0] + T[1, 1] + T[2, 2]]) max_idx = np.flatnonzero(den == max(den))[0] q = np.zeros(4) q[max_idx] = 0.5 * sqrt(max(den)) denom = 4.0 * q[max_idx] if (max_idx == 0): q[1] = (T[1, 0] + T[0, 1]) / denom q[2] = (T[2, 0] + T[0, 2]) / denom q[3] = -(T[2, 1] - T[1, 2]) / denom if (max_idx == 1): q[0] = (T[1, 0] + T[0, 1]) / denom q[2] = (T[2, 1] + T[1, 2]) / denom q[3] = -(T[0, 2] - T[2, 0]) / denom if (max_idx == 2): q[0] = (T[2, 0] + T[0, 2]) / denom q[1] = (T[2, 1] + T[1, 2]) / denom q[3] = -(T[1, 0] - T[0, 1]) / denom if (max_idx == 3): q[0] = -(T[2, 1] - T[1, 2]) / denom q[1] = -(T[0, 2] - T[2, 0]) / denom q[2] = -(T[1, 0] - T[0, 1]) / denom return q
def check_blank_slices(volume, slice_dim='z'): idx_dim = 'zyx'.find(slice_dim) axes_other = tuple(i for i in range(3) if (i != idx_dim)) assert idx_dim >= 0 means = np.mean(volume, axis=axes_other) assert means.ndim == 1 threshold = 10 median_of_means = np.median(means) mask_bads = np.logical_or(means < threshold, means < 0.5*median_of_means) if np.count_nonzero(mask_bads): idx_bads = np.flatnonzero(mask_bads) msg = 'bad {:s}: {:s}'.format(slice_dim, str(tuple(idx_bads))) return False, msg return True, 'okay'