我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.count_nonzero()。
def classification_metrics(y, y_pred, threshold): metrics = {} metrics['threshold'] = threshold_from_predictions(y, y_pred, 0) metrics['np.std(y_pred)'] = np.std(y_pred) metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y) denom = np.count_nonzero(y == False) num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold)) if denom > 0: metrics['fpr'] = float(num) / float(denom) if any(y) and not all(y): metrics['auc'] = roc_auc_score(y, y_pred) y_pred_bool = y_pred >= threshold if (any(y_pred_bool) and not all(y_pred_bool)): metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool) metrics['recall'] = recall_score(y, y_pred_bool) return metrics
def action_label_counts(directory, data_loader, n_actions=18, n=None): episode_paths = frame.episode_paths(directory) label_counts = [0, 0] action_label_counts = [[0, 0] for i in range(n_actions)] if n is not None: np.random.shuffle(episode_paths) episode_paths = episode_paths[:n] for episode_path in tqdm.tqdm(episode_paths): try: features, labels = data_loader.load_features_and_labels([episode_path]) except: traceback.print_exc() else: for label in range(len(label_counts)): label_counts[label] += np.count_nonzero(labels == label) for action in range(n_actions): actions = np.reshape(np.array(features["action"]), [-1]) action_label_counts[action][label] += np.count_nonzero( np.logical_and(labels == label, actions == action)) return label_counts, action_label_counts
def metrics(self, X, y): metrics = {} y_pred_pair, loss = self.predict_proba_with_loss(X, y) y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe metrics['loss'] = loss threshold = self.threshold_from_data(X, y) metrics['threshold'] = threshold metrics['np.std(y_pred)'] = np.std(y_pred) denom = np.count_nonzero(y == False) num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold)) metrics['fpr'] = float(num) / float(denom) if any(y) and not all(y): metrics['auc'] = roc_auc_score(y, y_pred) y_pred_bool = y_pred >= threshold if (any(y_pred_bool) and not all(y_pred_bool)): metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool) metrics['recall'] = recall_score(y, y_pred_bool) return metrics
def _set_seq_qual_metrics(self, seq, qual, seq_type, cache): cache.seq_types.add(seq_type) qvs = tk_fasta.get_qvs(qual) num_bases_q30 = np.count_nonzero(qvs >= 30) # Don't count no-calls towards Q30 denominator. # Assume no-calls get Q <= 2 num_bases_called = np.count_nonzero(qvs > 2) num_bases = len(seq) num_bases_n = seq.count('N') cache.total_bases[seq_type] += num_bases cache.called_bases[seq_type] += num_bases_called cache.q30_bases[seq_type] += num_bases_q30 cache.n_bases[seq_type] += num_bases_n
def test_fill_missing(): info = CloudVolume.create_new_info( num_channels=1, # Increase this number when we add more tests for RGB layer_type='image', data_type='uint8', encoding='raw', resolution=[ 1,1,1 ], voxel_offset=[0,0,0], volume_size=[128,128,64], mesh='mesh', chunk_size=[ 64,64,64 ], ) vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, info=info) vol.commit_info() vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, fill_missing=True) assert np.count_nonzero(vol[:]) == 0 vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, fill_missing=True, cache=True) assert np.count_nonzero(vol[:]) == 0 assert np.count_nonzero(vol[:]) == 0 vol.flush_cache() delete_layer('/tmp/cloudvolume/empty_volume')
def _constrained_sum_sample_pos(n, total): # in this setting, there will be no empty groups generated by this function n = int(n) total = int(total) normalized_list = [int(total) + 1] while sum(normalized_list) > total and np.greater_equal(normalized_list, np.zeros(n)).all(): indicator = True while indicator: normalized_list = list(map(round, map(lambda x: x * total, np.random.dirichlet(np.ones(n), 1).tolist()[0]))) normalized_list = list(map(int, normalized_list)) indicator = len(normalized_list) - np.count_nonzero(normalized_list) != 0 sum_ = 0 for ind, q in enumerate(normalized_list): if ind < len(normalized_list) - 1: sum_ += q # TODO: there is a bug here; sometimes it assigns -1 to the end of the array, but pass the while condition normalized_list[len(normalized_list) - 1] = abs(total - sum_) assert sum(normalized_list) == total, "ERROR: the constrainedSumSamplePos-sampled list does not sum to #edges." return map(str, normalized_list)
def calculateCoM(self, dpt): """ Calculate the center of mass :param dpt: depth image :return: (x,y,z) center of mass """ dc = dpt.copy() dc[dc < self.minDepth] = 0 dc[dc > self.maxDepth] = 0 cc = ndimage.measurements.center_of_mass(dc > 0) num = numpy.count_nonzero(dc) com = numpy.array((cc[1]*num, cc[0]*num, dc.sum()), numpy.float) if num == 0: return numpy.array((0, 0, 0), numpy.float) else: return com/num
def compute_test_accuracy(X_test, Y_test, model, prediction_type, cellgroup_map_array): prediction = model.predict(X_test) auc = [] if prediction_type=="cellgroup": prediction = np.dot(prediction, cellgroup_map_array) Y_test = np.dot(Y_test, cellgroup_map_array) mask = ~np.logical_or(Y_test.sum(1)==0, Y_test.sum(1)==Y_test.shape[1]) for y,pred in zip(Y_test.T,prediction.T): pos = np.logical_and(mask, y==1) neg = np.logical_and(mask, y==0) try: U = stats.mannwhitneyu(pred[pos], pred[neg])[0] auc.append(1.-U/(np.count_nonzero(pos)*np.count_nonzero(neg))) except ValueError: auc.append(0.5) return auc
def aePredict(self, graph): self.initCG() graph = graph.cleaned() carriers = self.getLSTMFeatures(graph.nodes) beamconf = AEBeamConfiguration(len(graph.nodes), 1, np.array(graph.heads), self.stack_features, self.buffer_features) beamconf.initconf(0, self.root_first) while not beamconf.isComplete(0): valid = beamconf.validTransitions(0) if np.count_nonzero(valid) < 1: break scores, exprs = self._aeEvaluate(beamconf.extractFeatures(0), carriers) best, bestscore = max(((i, s) for i, s in enumerate(scores) if valid[i]), key=itemgetter(1)) beamconf.makeTransition(0, best) graph.heads = [i if i > 0 else 0 for i in list(beamconf.getHeads(0))] return graph
def test_nonzero_twodim(self): x = np.array([[0, 1, 0], [2, 0, 3]]) assert_equal(np.count_nonzero(x), 3) assert_equal(np.nonzero(x), ([0, 1, 1], [1, 0, 2])) x = np.eye(3) assert_equal(np.count_nonzero(x), 3) assert_equal(np.nonzero(x), ([0, 1, 2], [0, 1, 2])) x = np.array([[(0, 1), (0, 0), (1, 11)], [(1, 1), (1, 0), (0, 0)], [(0, 0), (1, 5), (0, 1)]], dtype=[('a', 'f4'), ('b', 'u1')]) assert_equal(np.count_nonzero(x['a']), 4) assert_equal(np.count_nonzero(x['b']), 5) assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1])) assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2])) assert_(not x['a'].T.flags.aligned) assert_equal(np.count_nonzero(x['a'].T), 4) assert_equal(np.count_nonzero(x['b'].T), 5) assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0])) assert_equal(np.nonzero(x['b'].T), ([0, 0, 1, 2, 2], [0, 1, 2, 0, 2]))
def test_zeros(self): types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] for dt in types: d = np.zeros((13,), dtype=dt) assert_equal(np.count_nonzero(d), 0) # true for ieee floats assert_equal(d.sum(), 0) assert_(not d.any()) d = np.zeros(2, dtype='(2,4)i4') assert_equal(np.count_nonzero(d), 0) assert_equal(d.sum(), 0) assert_(not d.any()) d = np.zeros(2, dtype='4i4') assert_equal(np.count_nonzero(d), 0) assert_equal(d.sum(), 0) assert_(not d.any()) d = np.zeros(2, dtype='(2,4)i4, (2,4)i4') assert_equal(np.count_nonzero(d), 0)
def decode(self, vec, pretty=False, strict=True): # TODO: Whether we should use 'strict' mode depends on whether the model # we got this vector from does softmax sampling of visibles. Anywhere this # is called on fantasy samples, we should use the model to set this param. if issparse(vec): vec = vec.toarray().reshape(-1) assert vec.shape == (self.nchars * self.maxlen,) chars = [] for position_index in range(self.maxlen): # Hack - insert a tab between name parts in binomial mode if isinstance(self, BinomialShortTextCodec) and pretty and position_index == self.maxlen/2: chars.append('\t') subarr = vec[position_index * self.nchars:(position_index + 1) * self.nchars] if np.count_nonzero(subarr) != 1 and strict: char = self.MYSTERY else: char_index = np.argmax(subarr) char = self.alphabet[char_index] if pretty and char == self.FILLER: # Hack char = ' ' if isinstance(self, BinomialShortTextCodec) else '' chars.append(char) return ''.join(chars)
def count_per_month(career_months_array): '''Month_Form Returns number of employees remaining for each month (not retired). Cumulative sum of career_months_array input (np array) that are greater or equal to each incremental loop month number. Note: alternate method to this function is value count of mnums: df_actives_each_month = pd.DataFrame(df_idx.mnum.value_counts()) df_actives_each_month.columns = ['count'] input career_months_array output of career_months function. This input is an array containing the number of months each employee will work until retirement. ''' max_career = career_months_array.max() + 1 emp_count_array = np.zeros(max_career) for i in range(0, max_career): emp_count_array[i] = np.count_nonzero(career_months_array >= i) return emp_count_array.astype(int) # GENERATE MONTH SKELETON
def describe_numeric_1d(series, **kwargs): stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(), 'max': series.max()} stats['range'] = stats['max'] - stats['min'] for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]): stats[pretty_name(x)] = series.dropna().quantile(x) # The dropna() is a workaround for https://github.com/pydata/pandas/issues/13098 stats['iqr'] = stats['75%'] - stats['25%'] stats['kurtosis'] = series.kurt() stats['skewness'] = series.skew() stats['sum'] = series.sum() stats['mad'] = series.mad() stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN stats['type'] = "NUM" stats['n_zeros'] = (len(series) - np.count_nonzero(series)) stats['p_zeros'] = stats['n_zeros'] / len(series) # Histograms stats['histogram'] = histogram(series, **kwargs) stats['mini_histogram'] = mini_histogram(series, **kwargs) return pd.Series(stats, name=series.name)
def get_symmetry_code_tri(pts): if len(pts) == 1: return '_s3()' elif len(pts) == 3: # Symmetry group [[a, a, b], [a, b, a], [b, a, a]]. # Find the equal value `a`. tol = 1.0e-12 beta = pts[0] - pts[0][0] ct = numpy.count_nonzero(abs(beta) < tol) assert ct in [1, 2], beta val = pts[0][0] if ct == 2 else pts[0][1] return '_s21({:.15e})'.format(val) # Symmetry group [[a, b, c], [c, a, b], ...]. assert len(pts) == 6 # Take the two largest value from a, b, c. pt0 = numpy.sort(pts[0]) return '_s111({:.15e}, {:.15e})'.format(pt0[2], pt0[1])
def score(self): 'Return score from B perspective. If W is winning, score is negative.' working_board = np.copy(self.board) while EMPTY in working_board: unassigned_spaces = np.where(working_board == EMPTY) c = unassigned_spaces[0][0], unassigned_spaces[1][0] territory, borders = find_reached(working_board, c) border_colors = set(working_board[b] for b in borders) X_border = BLACK in border_colors O_border = WHITE in border_colors if X_border and not O_border: territory_color = BLACK elif O_border and not X_border: territory_color = WHITE else: territory_color = UNKNOWN # dame, or seki place_stones(working_board, territory_color, territory) return np.count_nonzero(working_board == BLACK) - np.count_nonzero(working_board == WHITE) - self.komi
def step(self): """ Half of the step of k-means """ if self.step_completed: d = self.data.X points = [d[self.clusters == i] for i in range(len(self.centroids))] for i in range(len(self.centroids)): c_points = points[i] self.centroids[i, :] = (np.average(c_points, axis=0) if len(c_points) > 0 else np.nan) # reinitialize empty centroids nan_c = np.isnan(self.centroids).any(axis=1) if np.count_nonzero(nan_c) > 0: self.centroids[nan_c] = self.random_positioning( np.count_nonzero(nan_c)) self.centroids_moved = True else: self.clusters = self.find_clusters(self.centroids) self.centroids_moved = False self.step_no += 1 self.centroids_history = self.set_list( self.centroids_history, self.step_no, np.copy(self.centroids))
def rmse(self, tid_counts): error = np.zeros(shape=[self.cls_nb]) err_nb = 0 self._progress('\ntid \t true_count \t obs_count \t difference', end='\n', verbosity=VERBOSITY.VERBOSE) for tid in tid_counts: true_counts = self.tid_counts[tid] obs_counts = tid_counts[tid] diff = np.asarray(true_counts) - np.asarray(obs_counts) err_nb += np.count_nonzero(diff) error += diff*diff if diff.any(): self._progress('{} \t{} \t{} \t{}'.format(tid, true_counts, obs_counts, diff), end='\n', verbosity=VERBOSITY.VERBOSE) error /= len(tid_counts) rmse = np.sqrt(error).sum() / self.cls_nb error_fraction = err_nb / (len(tid_counts)* self.cls_nb) return rmse, error_fraction
def test_rank_archimedean_spiral(): def archimedean_spiral(n_steps=100, max_radius=1.0, turns=4.0): r = np.linspace(0.0, max_radius, n_steps) angle = r * 2.0 * np.pi * turns / max_radius x = r * np.cos(angle) y = r * np.sin(angle) return np.hstack((x[:, np.newaxis], y[:, np.newaxis])), r X_train, r_train = archimedean_spiral(n_steps=100) X_test, r_test = archimedean_spiral(n_steps=1000, max_radius=1.1) rsvm = RankingSVM(random_state=0) rsvm.fit(X_train) y_train = rsvm.predict(X_train) y_test = rsvm.predict(X_test) assert_true(np.all(y_train[1:] < y_train[:-1])) assert_greater(np.count_nonzero(y_test[1:] < y_test[:-1]), 970)
def ser(x, y): """Measure symbol error rate between symbols in x and y. :param x: symbol array #1 :param y: symbol array #2 :returns: symbol error rate >>> import arlpy >>> arlpy.comms.ser([0,1,2,3], [0,1,2,2]) 0.25 """ x = _np.asarray(x, dtype=_np.int) y = _np.asarray(y, dtype=_np.int) n = _np.product(_np.shape(x)) e = _np.count_nonzero(x^y) return float(e)/n
def tokenize(self, file_name): """Tokenizes the file and produces a dataset.""" lines = read_lines(file_name) random.shuffle(lines) unk = self.word_dict.get_idx('<unk>') dataset, total, unks = [], 0, 0 for line in lines: tokens = line.split() input_idxs = self.context_dict.w2i(get_tag(tokens, 'input')) word_idxs = self.word_dict.w2i(get_tag(tokens, 'dialogue')) item_idxs = self.item_dict.w2i(get_tag(tokens, 'output')) dataset.append((input_idxs, word_idxs, item_idxs)) # compute statistics total += len(input_idxs) + len(word_idxs) + len(item_idxs) unks += np.count_nonzero([idx == unk for idx in word_idxs]) if self.verbose: print('dataset %s, total %d, unks %s, ratio %0.2f%%' % ( file_name, total, unks, 100. * unks / total)) return dataset
def polyfit_baseline(bands, intensities, poly_order=5, num_stdv=3., max_iter=200): '''Iteratively fits a polynomial, discarding far away points as peaks. Similar in spirit to ALS and related methods. Automated method for subtraction of fluorescence from biological Raman spectra Lieber & Mahadevan-Jansen 2003 ''' fit_pts = intensities.copy() # precalculate [x^p, x^p-1, ..., x^1, x^0] poly_terms = bands[:, None] ** np.arange(poly_order, -1, -1) for _ in range(max_iter): coefs = np.polyfit(bands, fit_pts.T, poly_order) baseline = poly_terms.dot(coefs).T diff = fit_pts - baseline thresh = diff.std(axis=-1) * num_stdv mask = diff > np.array(thresh, copy=False)[..., None] unfitted = np.count_nonzero(mask) if unfitted == 0: break fit_pts[mask] = baseline[mask] # these points are peaks, discard else: print("Warning: polyfit_baseline didn't converge in %d iters" % max_iter) return baseline
def restore_shape(arry, step, r): '''Reduces and adjust the shape and content of `arry` according to r. Args: arry: A 2d array with shape of [T, C] step: An int. Overlapping span. r: Reduction factor Returns: A 2d array with shape of [-1, C*r] ''' T, C = arry.shape sliced = np.split(arry, list(range(step, T, step)), axis=0) started = False for s in sliced: if not started: restored = np.vstack(np.split(s, r, axis=1)) started = True else: restored = np.vstack((restored, np.vstack(np.split(s, r, axis=1)))) # Trim zero paddings restored = restored[:np.count_nonzero(restored.sum(axis=1))] return restored
def get_index_first_non_zero_slice(self, dimension): """Get the index of the first non zero slice in this map. Args: dimension (int): the dimension to search in Returns: int: the slice index with the first non zero values. """ slice_index = [slice(None)] * (self.max_dimension() + 1) if dimension > len(slice_index) - 1: raise ValueError('The given dimension {} is not supported.'.format(dimension)) for index in range(self.shape[dimension]): slice_index[dimension] = index if np.count_nonzero(self.data[slice_index]) > 0: return index return 0
def test_get_mask(): chunk = test_get_chunks(n_chunks=1)[0] distance = 3 n_side = 32 mask = get_mask(distance, chunk.shape, dims=(2, 1, 0)) n_side_shell = n_side - 2*distance count_exp = 2*n_side_shell**2 + (n_side_shell - 1)*4*(n_side_shell - 2) count_got = np.count_nonzero(mask) print('DEBUG: non-zeros exp: {} | got: {}'.format(count_exp, count_got)) assert count_exp == count_got distance = 5 n_side_shell = n_side - 2*distance mask = get_mask(distance, chunk.shape, dims=(2, 1)) count_exp = (n_side_shell - 1)*4*n_side count_got = np.count_nonzero(mask) print('DEBUG: non-zeros exp: {} | got: {}'.format(count_exp, count_got)) assert count_exp == count_got
def get_padded_seq_lengths(padded): """Returns the number of (seq_len) non-nan elements per sequence. :param padded: 2d or 3d tensor with dim 2 the time dimension """ if len(padded.shape) == 2: # (n_seqs,n_timesteps) seq_lengths = np.count_nonzero(~np.isnan(padded), axis=1) elif len(padded.shape) == 3: # (n_seqs,n_timesteps,n_features,..) seq_lengths = np.count_nonzero(~np.isnan(padded[:, :, 0]), axis=1) else: print('not yet implemented') # TODO return seq_lengths
def import_data(data_csvs_in, types_csv_in, values_csv_in, groups_csv_in, dataset_out, encoding='utf-8'): """Import a comma-delimited list of csv files into internal treecat format. Common encodings include: utf-8, cp1252. """ schema = load_schema(types_csv_in, values_csv_in, groups_csv_in, encoding) data = np.concatenate([ load_data(schema, data_csv_in, encoding) for data_csv_in in data_csvs_in.split(',') ]) data.flags.writeable = False print('Imported data shape: [{}, {}]'.format(data.shape[0], data.shape[1])) ragged_index = schema['ragged_index'] for v, name in enumerate(schema['feature_names']): beg, end = ragged_index[v:v + 2] count = np.count_nonzero(data[:, beg:end].max(1)) if count == 0: print('WARNING: No values found for feature {}'.format(name)) feature_types = [TY_MULTINOMIAL] * len(schema['feature_names']) table = Table(feature_types, ragged_index, data) dataset = { 'schema': schema, 'table': table, } pickle_dump(dataset, dataset_out)
def build_feature_files(base_directory, new_directory, data_loader, n=None, negative_example_keep_prob=1.0): os.makedirs(new_directory, exist_ok=False) episode_paths = frame.episode_paths(base_directory) label_counts = [0, 0] if n is not None: np.random.shuffle(episode_paths) episode_paths = episode_paths[:n] for episode_path in tqdm.tqdm(episode_paths): try: features, labels = data_loader.load_features_and_labels([episode_path]) except: traceback.print_exc() else: keep = np.logical_or(labels, (np.less( np.random.rand(len(labels)), negative_example_keep_prob))) labels = labels[keep] for i in range(len(label_counts)): label_counts[i] += np.count_nonzero(labels == i) features = {k: v[keep] for k, v in features.items()} new_path = path_relative_to_new_directory(base_directory, new_directory, episode_path, ".features") os.makedirs(os.path.dirname(new_path), exist_ok=True) with open(new_path, 'wb') as f: pickle.dump((features, labels), f) return label_counts
def threshold_from_data(self, X, y): y_bool = y == 1. ## true if x is a catast y_pred = self.predict_proba(X) if np.count_nonzero(y) == 0: return np.max(y_pred) return np.min(y_pred[y_bool][:,1]) # TODO CHANGED FROM WILL CODE
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1): """Determines a threshold for classifying examples as positive Args: y: labels y_pred: scores from the classifier recall: Threshold is set to classify at least this fraction of positive labelled examples as positive false_positive_margin: Threshold is set to acheive desired recall, and then is extended to include an additional fraction of negative labelled examples equal to false_positive_margin (This allows adding a buffer to the threshold while maintaining a constant "cost") """ n_positive = np.count_nonzero(y) n_negative = len(y) - n_positive if n_positive == 0: return np.max(y_pred) if false_positive_margin == 0 and recall == 1: return np.min(y_pred[y]) ind = np.argsort(y_pred) y_pred_sorted = y_pred[ind] y_sorted = y[ind] so_far = [0, 0] j = 0 for i in reversed(range(len(y_sorted))): so_far[y_sorted[i]] += 1 if so_far[1] >= int(np.floor(recall * n_positive)): j = i break so_far = [0, 0] if false_positive_margin == 0: return y_pred_sorted[j] k = 0 for i in reversed(range(j)): so_far[y_sorted[i]] += 1 if so_far[0] >= false_positive_margin * n_negative: k = i break return y_pred_sorted[k]
def predict_proba(self, features): predictions = [] for classifier in self.classifiers: predictions.append(classifier.predict_raw(features)) return np.count_nonzero(predictions)
def predict_proba_raw(self, obs=None, action=None): predictions = [] for classifier in self.classifiers: predictions.append(classifier.predict_raw(obs, action)) return np.count_nonzero(predictions)
def predict_raw(self, obs=None, action=None): predictions = [] for classifier in self.classifiers: predictions.append(classifier.predict_raw(obs, action)) return self.apply_threshold(np.count_nonzero(predictions))
def predict_raw_with_score(self, obs=None, action=None): predictions = [] for classifier in self.classifiers: predictions.append(classifier.predict_raw(obs, action)) return self.apply_threshold(np.count_nonzero(predictions)), np.count_nonzero(predictions)
def predict(self, features): predictions = [] for classifier in self.classifiers: predictions.append(classifier.predict(features)) return self.apply_threshold(np.count_nonzero(predictions))