我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用numpy.argpartition()。
def test_partition_cdtype(self): d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41), ('Lancelot', 1.9, 38)], dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')]) tgt = np.sort(d, order=['age', 'height']) assert_array_equal(np.partition(d, range(d.size), order=['age', 'height']), tgt) assert_array_equal(d[np.argpartition(d, range(d.size), order=['age', 'height'])], tgt) for k in range(d.size): assert_equal(np.partition(d, k, order=['age', 'height'])[k], tgt[k]) assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k], tgt[k]) d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot']) tgt = np.sort(d) assert_array_equal(np.partition(d, range(d.size)), tgt) for k in range(d.size): assert_equal(np.partition(d, k)[k], tgt[k]) assert_equal(d[np.argpartition(d, k)][k], tgt[k])
def format_lines(video_ids, predictions, labels, top_k): batch_size = len(video_ids) for video_index in range(batch_size): n_recall = max(int(numpy.sum(labels[video_index])), 1) # labels label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:] label_predictions = [(class_index, predictions[video_index][class_index]) for class_index in label_indices] label_predictions = sorted(label_predictions, key=lambda p: -p[1]) label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions]) # predictions top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:] top_k_predictions = [(class_index, predictions[video_index][class_index]) for class_index in top_k_indices] top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1]) top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions]) # compute PERR top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:] positives = [labels[video_index][class_index] for class_index in top_n_indices] perr = sum(positives) / float(n_recall) # URL url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8') yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"
def argpartition(a, kth, axis=-1): """Returns the indices that would partially sort an array. Args: a (cupy.ndarray): Array to be sorted. kth (int or sequence of ints): Element index to partition by. If supplied with a sequence of k-th it will partition all elements indexed by k-th of them into their sorted position at once. axis (int or None): Axis along which to sort. Default is -1, which means sort along the last axis. If None is supplied, the array is flattened before sorting. Returns: cupy.ndarray: Array of the same type and shape as ``a``. .. note:: For its implementation reason, `cupy.argpartition` fully sorts the given array as `cupy.argsort` does. It also does not support ``kind`` and ``order`` parameters that ``numpy.argpartition`` supports. .. seealso:: :func:`numpy.argpartition` """ return a.argpartition(kth, axis=axis)
def CSMToBinary(D, Kappa): """ Turn a cross-similarity matrix into a binary cross-simlarity matrix If Kappa = 0, take all neighbors If Kappa < 1 it is the fraction of mutual neighbors to consider Otherwise Kappa is the number of mutual neighbors to consider """ N = D.shape[0] M = D.shape[1] if Kappa == 0: return np.ones((N, M)) elif Kappa < 1: NNeighbs = int(np.round(Kappa*M)) else: NNeighbs = Kappa J = np.argpartition(D, NNeighbs, 1)[:, 0:NNeighbs] I = np.tile(np.arange(N)[:, None], (1, NNeighbs)) V = np.ones(I.size) [I, J] = [I.flatten(), J.flatten()] ret = sparse.coo_matrix((V, (I, J)), shape=(N, M)) return ret.toarray()
def closest_docs(self, query, k=1): """Closest docs by dot product between query and documents in tfidf weighted word vector space. """ spvec = self.text2spvec(query) res = spvec * self.doc_mat if len(res.data) <= k: o_sort = np.argsort(-res.data) else: o = np.argpartition(-res.data, k)[0:k] o_sort = o[np.argsort(-res.data[o])] doc_scores = res.data[o_sort] doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]] return doc_ids, doc_scores
def bottom_top_k_along_row(arr, k, ordered=True): """ bottom and top k of a 2d np.array, along the rows http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array/18691983 """ assert k>0, "bottom_top_k_along_row/column() requires k>0." rows = arr.shape[0] if ordered: tmp = np.argsort(arr, axis=1) idx_bot = tmp[:, :k] idx_top = tmp[:,-k:] else: idx_bot = np.argpartition(arr, k, axis=1)[:,:k] idx_top = np.argpartition(arr, -k, axis=1)[:,-k:] indices = np.concatenate((idx_bot, idx_top), axis=1) vals = arr[np.repeat(np.arange(rows), 2*k), indices.ravel()].reshape(rows,2*k) return vals, indices
def top_k_recommendations(self, sequence, k=10, exclude=None, **kwargs): if exclude is None: exclude = [] last_item = int(sequence[-1][0]) if last_item not in self.previous_recommendations: self.get_all_recommendations(last_item) all_recommendations = deepcopy(self.previous_recommendations[last_item]) for s in sequence: all_recommendations[int(s[0])] = 0 for i in exclude: all_recommendations[i] = 0 ranking = np.zeros(self.n_items) for i, x in enumerate(all_recommendations.most_common(k)): ranking[x[0]] = k-i return np.argpartition(-ranking, range(k))[:k]
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None): ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids) ''' if exclude is None: exclude = [] last_item = sequence[-1][0] output = np.dot(self.V_user_item[user_id, :], self.V_item_user.T) + np.dot(self.V_prev_next[last_item, :], self.V_next_prev.T) # Put low similarity to viewed items to exclude them from recommendations output[[i[0] for i in sequence]] = -np.inf output[exclude] = -np.inf # find top k according to output return list(np.argpartition(-output, range(k))[:k])
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None): ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids) ''' if exclude is None: exclude = [] user_items = [i[0] for i in sequence] output = self.item_score(user_id, user_items) # Put low similarity to viewed items to exclude them from recommendations output[[i[0] for i in sequence]] = -np.inf output[exclude] = -np.inf # find top k according to output return list(np.argpartition(-output, range(k))[:k])
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None): ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids) ''' if exclude is None: exclude = [] last_item = sequence[-1][0] output = self.bias + np.dot(self.V[user_id, :], self.H.T) # Put low similarity to viewed items to exclude them from recommendations output[[i[0] for i in sequence]] = -np.inf output[exclude] = -np.inf # find top k according to output return list(np.argpartition(-output, range(k))[:k])
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None, **kwargs): ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids) ''' # Compile network if needed if not hasattr(self, 'predict_function'): self._compile_predict_function() # Prepare RNN input X = np.zeros((1, self._input_size())) # input of the RNN X[0, :] = self._one_hot_encoding([i[0] for i in sequence]) # Run RNN output = self.predict_function(X.astype(theano.config.floatX))[0] # Put low similarity to viewed items to exclude them from recommendations output[[i[0] for i in sequence]] = -np.inf output[exclude] = -np.inf # find top k according to output return list(np.argpartition(-output, range(k))[:k])
def _compile_test_function(self): ''' Differs from base test function because of the added softmax operation ''' print("Compiling test...") deterministic_output = T.nnet.softmax(lasagne.layers.get_output(self.l_out, deterministic=True)) if self.interactions_are_unique: deterministic_output *= (1 - self.exclude) theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore') def precision_test_function(theano_inputs, k=10): output = theano_test_function(*theano_inputs) ids = np.argpartition(-output, range(k), axis=-1)[0, :k] return ids self.test_function = precision_test_function print("Compilation done.")
def smallest_k(matrix: np.ndarray, k: int, only_first_row: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], np.ndarray]: """ Find the smallest elements in a numpy matrix. :param matrix: Any matrix. :param k: The number of smallest elements to return. :param only_first_row: If true the search is constrained to the first row of the matrix. :return: The row indices, column indices and values of the k smallest items in matrix. """ if only_first_row: flatten = matrix[:1, :].flatten() else: flatten = matrix.flatten() # args are the indices in flatten of the k smallest elements args = np.argpartition(flatten, k)[:k] # args are the indices in flatten of the sorted k smallest elements args = args[np.argsort(flatten[args])] # flatten[args] are the values for args return np.unravel_index(args, matrix.shape), flatten[args]
def probs(self, x): dists = np.hstack([self.distFunc(x, cls) for cls in self.trainData]) indices = np.argpartition(dists, self.k, axis=1)[:,:self.k] #start = 0 #votes = list() #for cls in self.trainData: # end = start + cls.shape[0] # votes.append(np.sum(np.logical_and(start <= indices, indices < end), axis=1)) # start = end ends = np.cumsum([len(cls) for cls in self.trainData]) starts = ends - np.array([len(cls) for cls in self.trainData]) votes = [np.sum(np.logical_and(start <= indices, indices < end), axis=1) for start, end in zip(starts, ends)] votes = np.vstack(votes).T #probs = np.zeros((x.shape[0], self.nCls)) #probs[np.arange(probs.shape[0]), np.argmax(votes, axis=1)] = 1.0 ##probs = util.softmax(votes / float(self.k)) probs = votes / float(self.k) return probs
def argmaxk_rows_opt1(arr, k=10, sort=False): """ Optimized implementation. When sort=False it is equal to argmaxk_rows_basic. When sort=True and k << arr.shape[1], it is should be faster, because we argsort only subarray of k max elements from each row of arr (arr.shape[0] x k) instead of the whole array arr (arr.shape[0] x arr.shape[1]). """ best_inds = np.argpartition(arr, kth=-k, axis=1)[:, -k:] # column indices of k max elements in each row (m x k) if not sort: return best_inds # generate row indices corresponding to best_ids (just current row id in each row) (m x k) rows = np.arange(best_inds.shape[0], dtype=np.intp)[:, np.newaxis].repeat(best_inds.shape[1], axis=1) best_elems = arr[rows, best_inds] # select k max elements from each row using advanced indexing (m x k) # indices which sort each row of best_elems in descending order (m x k) best_elems_inds = np.argsort(best_elems, axis=1)[:, ::-1] # reorder best_indices so that arr[i, sorted_best_inds[i,:]] will be sorted in descending order sorted_best_inds = best_inds[rows, best_elems_inds] return sorted_best_inds
def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}): X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param) cosX = cosine_similarity(X) #return sparse.csc_matrix(X.dot(X.transpose())),newIds n = cosX.shape[0] graph = np.zeros((n,n)) tic = time.time() for i in range(n): for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]: if j == i: continue #graph[i, j] += cosX[i, j] #graph[j, i] += cosX[i, j] graph[i, j] += 1 graph[j, i] += 1 toc = time.time() - tic return sparse.csc_matrix(graph), newIds
def generateCosineNeighborGraphfromX(X, kNeighbors=10): cosX = cosine_similarity(X) # return sparse.csc_matrix(X.dot(X.transpose())),newIds #print cosX.shape n = cosX.shape[0] graph = np.zeros((n, n)) tic = time.time() for i in range(n): for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]: if j == i: continue # graph[i, j] += cosX[i, j] # graph[j, i] += cosX[i, j] graph[i, j] += 1 graph[j, i] += 1 toc = time.time() - tic #print 'graph generation done in %f seconds.' % toc return sparse.csc_matrix(graph)
def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors): # Generate cosine similarity graph n = X_ent.shape[0] cosX = cosine_similarity(X_word) graph = np.zeros((n, n)) for i in range(n): for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]: if j == i: continue graph[i, j] = cosX[i, j] graph[j, i] = cosX[i, j] D = sparse.diags([graph.sum(axis=0)], [0]) L = D - graph f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1)) score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10) laplacian_score = score return laplacian_score
def compute_nearest_neighbors(self, num_neighbors): result_list = [] for key, value in self.im2index.iteritems(): neighbor_list = [key] similarity_scores = self.similarity_mat[value] # removes best match as same as key ind = np.argpartition(similarity_scores, -(num_neighbors + 1))[-(num_neighbors + 1):-1] ind = ind[np.argsort(similarity_scores[ind])] neighbors = [self.index2im[x] for x in ind] neighbor_list.extend(neighbors) result_list.append(neighbor_list) # compute neighbor statistics NearestNeighbour.compute_neighbor_stats(result_list, num_neighbors) # plot the TSNE plot self.plot_tsne() return result_list
def _calculate_topk_ndces(self, k): """ Calculate the indices of the k specialists with highest b-value, including the base classifier regardless of its b-value. Args: k: int >= 0, approximately specifying the number of derived specialists to select. Precisely, the best k (by Wilson error bound) are taken, along with the base classifier if it is not already one of the best k. Returns: A list containing the indices of the top k classifiers. The list always at least contains the base classifier's index (i.e. 0). Therefore, the list is of length k if the base classifier is one of the top k, and length k+1 otherwise. If k is greater than the total number of derived specialists, returns all of them. """ assert self.label_corrs is not None , "Label correlations must be calculated before top k indices." if k < len(self.label_corrs): topk_ndces = set(np.argpartition(-self.label_corrs, k)[:k]) #Only does a partial sort of b! else: topk_ndces = set(range(len(self.label_corrs))) topk_ndces.add(0) return list(topk_ndces & set(self._relevant_ndces))
def argsort(x, topn=None, reverse=False): """ Return indices of the `topn` smallest elements in array `x`, in ascending order. If reverse is True, return the greatest elements instead, in descending order. """ x = np.asarray(x) # unify code path for when `x` is not a np array (list, tuple...) if topn is None: topn = x.size if topn <= 0: return [] if reverse: x = -x if topn >= x.size or not hasattr(np, 'argpartition'): return np.argsort(x)[:topn] # np >= 1.8 has a fast partial argsort, use that! most_extreme = np.argpartition(x, topn)[:topn] return most_extreme.take(np.argsort(x.take(most_extreme))) # resort topn into order
def _select_target_neighbors(self): """Find the target neighbors of each sample, that stay fixed during training. Returns ------- array_like An array of neighbors indices for each sample with shape (n_samples, n_neighbors). """ self.logger.info('Finding target neighbors...') target_neighbors = np.empty((self.X_.shape[0], self.n_neighbors_), dtype=int) for class_ in self.classes_: class_ind, = np.where(np.equal(self.y_, class_)) dist = euclidean_distances(self.X_[class_ind], squared=True) np.fill_diagonal(dist, np.inf) neigh_ind = np.argpartition(dist, self.n_neighbors_ - 1, axis=1) neigh_ind = neigh_ind[:, :self.n_neighbors_] # argpartition doesn't guarantee sorted order, so we sort again but only the k neighbors row_ind = np.arange(len(class_ind))[:, None] neigh_ind = neigh_ind[row_ind, np.argsort(dist[row_ind, neigh_ind])] target_neighbors[class_ind] = class_ind[neigh_ind] return target_neighbors
def select_next_words(self, next_costs, next_probs, step_num, how_many): # Pick only on the first line (for the beginning of sampling) # This will avoid duplicate <q> token. if step_num == 0: flat_next_costs = next_costs[:1, :].flatten() else: # Set the next cost to infinite for finished utterances (they will be replaced) # by other utterances in the beam flat_next_costs = next_costs.flatten() voc_size = next_costs.shape[1] args = numpy.argpartition(flat_next_costs, how_many)[:how_many] args = args[numpy.argsort(flat_next_costs[args])] return numpy.unravel_index(args, next_costs.shape), flat_next_costs[args]
def find_nbest(score, n, threshold=None): num_vars = score.shape[1] score = score.flatten() nbest = np.argpartition(score, n)[:n] beam_indices = nbest / num_vars var_indices = nbest % num_vars nbest_score = score[nbest] if threshold: best = np.max(nbest_score) cond = nbest_score > best + threshold nbest_score = nbest_score[cond] beam_indices = beam_indices[cond] var_indices = var_indices[cond] return nbest_score, beam_indices, var_indices
def tfidf_retrieval(tfidf_vec, train_contexts_txt, train_responses_txt, output_file): print type(tfidf_vec) tfidf_vec = tfidf_vec.toarray() print tfidf_vec.shape prod_mat = np.dot(tfidf_vec, tfidf_vec.T) print prod_mat.shape prod_mat = prod_mat / mat_vector_2norm_squared(tfidf_vec) print prod_mat.shape response_list = [] for i in xrange(len(prod_mat)): row = prod_mat[i] # No idea what's going on here. See the following page: # stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array ind = np.argpartition(row, -2)[-2:] ind = ind[np.argsort(row[ind])][0] response_list.append(train_responses_txt[ind]) print train_contexts_txt[i] print response_list[i] with open(output_file, 'w') as f1: for response in response_list: f1.write(response)
def visualize_frequent_words(vectors_2d: np.ndarray, dataset: DataSet, k: int, ax: plt.Axes = None) -> None: word_ids, counts = np.unique(dataset.data, return_counts=True) indices = np.argpartition(-counts, k)[:k] frequent_word_ids = word_ids[indices] if ax is None: fig, ax = plt.subplots(figsize=(13, 13)) else: fig = None vectors_2d = vectors_2d[frequent_word_ids] ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1], s=2, alpha=0.25) for i, id in enumerate(frequent_word_ids): ax.annotate(dataset.vocabulary.to_word(id), (vectors_2d[i, 0], vectors_2d[i, 1])) if fig is not None: fig.tight_layout() fig.show()
def GetFeatures(self, data): closestPrototypesIndxs = [] D = self.layers[0] - (np.array(data)*self.stateScale + self.bias) D = np.sqrt(sum(D.T**2)) # a bottlenect for sure indexes = np.argpartition(D, self.c[0], axis=0)[:self.c[0]] for i in range(1,len(self.layers)): D = np.sum(np.setxor1d(self.layers[i], indexes, True), axis=1) # phi = np.zeros(self.prototypeList[i]) # phi[indexes] = 1 # D = np.sum(np.logical_xor(self.layers[i], phi), axis=1) indexes = np.argpartition(D, self.c[i], axis=0)[:self.c[i]] return indexes
def process_frame_for_game_play(frame): """Assumes a grayscale frame""" histogram = skimage.exposure.histogram(frame[40:]) if np.unique(histogram[0]).size < 3: return None max_indices = np.argpartition(histogram[0], -3)[-3:] for index in sorted(max_indices)[:2]: frame[frame == index] = 0 threshold = skimage.filters.threshold_otsu(frame[40:]) bw_frame = frame > threshold return bw_frame
def format_lines(video_ids, predictions, top_k): batch_size = len(video_ids) for video_index in range(batch_size): top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:] line = [(class_index, predictions[video_index][class_index]) for class_index in top_indices] # print("Type - Test :") # print(type(video_ids[video_index])) # print(video_ids[video_index].decode('utf-8')) line = sorted(line, key=lambda p: -p[1]) yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair for pair in line) + "\n"
def calculate_precision_at_equal_recall_rate(predictions, actuals): """Performs a local (numpy) calculation of the PERR. Args: predictions: Matrix containing the outputs of the model. Dimensions are 'batch' x 'num_classes'. actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x 'num_classes'. Returns: float: The average precision at equal recall rate across the entire batch. """ aggregated_precision = 0.0 num_videos = actuals.shape[0] for row in numpy.arange(num_videos): num_labels = int(numpy.sum(actuals[row])) top_indices = numpy.argpartition(predictions[row], -num_labels)[-num_labels:] item_precision = 0.0 for label_index in top_indices: if predictions[row][label_index] > 0: item_precision += actuals[row][label_index] item_precision /= top_indices.size aggregated_precision += item_precision aggregated_precision /= num_videos return aggregated_precision
def top_k_triplets(predictions, labels, k=20): """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in (prediction, class) format""" m = len(predictions) k = min(k, m) indices = numpy.argpartition(predictions, -k)[-k:] return [(index, predictions[index], labels[index]) for index in indices]
def format_lines(video_ids, predictions, top_k): batch_size = len(video_ids) for video_index in range(batch_size): top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:] line = [(class_index, predictions[video_index][class_index]) for class_index in top_indices] line = sorted(line, key=lambda p: -p[1]) yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair for pair in line) + "\n"
def __call__(self, words, weights, vocabulary_max): if len(words) < vocabulary_max * self.trigger_ratio: return words, weights if not isinstance(words, numpy.ndarray): words = numpy.array(words) # Tail optimization does not help with very large vocabularies if len(words) > vocabulary_max * 2: indices = numpy.argpartition(weights, len(weights) - vocabulary_max) indices = indices[-vocabulary_max:] words = words[indices] weights = weights[indices] return words, weights # Vocabulary typically consists of these three parts: # 1) the core - we found it's border - `core_end` - 15% # 2) the body - 70% # 3) the minor tail - 15% # (1) and (3) are roughly the same size # (3) can be safely discarded, (2) can be discarded with care, # (1) shall never be discarded. sorter = numpy.argsort(weights)[::-1] weights = weights[sorter] trend_start = int(len(weights) * 0.2) trend_finish = int(len(weights) * 0.8) z = numpy.polyfit(numpy.arange(trend_start, trend_finish), numpy.log(weights[trend_start:trend_finish]), 1) exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights))) avg_error = numpy.abs(weights[trend_start:trend_finish] - exp_z[trend_start:trend_finish]).mean() tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1]) weights = weights[:-tail_size][:vocabulary_max] words = words[sorter[:-tail_size]][:vocabulary_max] return words, weights