我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.array()。
def min_side(_, pos): """ Given an object pixels' positions, return the minimum side length of its bounding box :param _: pixel values (unused) :param pos: pixel position (1-D) :return: minimum bounding box side length """ xs = np.array([i / SSIZE for i in pos]) ys = np.array([i % SSIZE for i in pos]) minx = np.amin(xs) miny = np.amin(ys) maxx = np.amax(xs) maxy = np.amax(ys) ct1 = compute_line(np.array([minx, miny]), np.array([minx, maxy])) ct2 = compute_line(np.array([minx, miny]), np.array([maxx, miny])) return min(ct1, ct2)
def _cascade_evaluation(self, X_test, y_test): """ Evaluate the accuracy of the cascade using X and y. :param X_test: np.array Array containing the test input samples. Must be of the same shape as training data. :param y_test: np.array Test target values. :return: float the cascade accuracy. """ casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0) casc_pred = np.argmax(casc_pred_prob, axis=1) casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred) print('Layer validation accuracy = {}'.format(casc_accuracy)) return casc_accuracy
def _create_feat_arr(self, X, prf_crf_pred): """ Concatenate the original feature vector with the predicition probabilities of a cascade layer. :param X: np.array Array containing the input samples. Must be of shape [n_samples, data] where data is a 1D array. :param prf_crf_pred: list Prediction probabilities by a cascade layer for X. :return: np.array Concatenation of X and the predicted probabilities. To be used for the next layer in a cascade forest. """ swap_pred = np.swapaxes(prf_crf_pred, 0, 1) add_feat = swap_pred.reshape([np.shape(X)[0], -1]) feat_arr = np.concatenate([add_feat, X], axis=1) return feat_arr
def shuffleBlock(self,cells,d,tlx,tly,cols,rows,width,height): if tlx+cols < width and tly+rows < height: temp = [] for row in range( rows): for col in range( cols): temp.append(d[cells[tlx+col][tly+row]]) temp = np.array(temp) oldState = temp.copy() np.random.shuffle(temp) i = 0 for row in range( rows): for col in range( cols): d[cells[tlx+col][tly+row]] = temp[i] i+=1 return oldState else: return []
def train(self, dataset, train_split=0.8, dense_size=32, learning_rate=0.001, batch_size=32, epochs=50, activation='relu'): self.__load_dataset(dataset, train_split) train_x = np.array(self.__train_data[:, 0].tolist()) train_y = to_categorical(self.__train_data[:, 1], 2) test_x = np.array(self.__test_data[:, 0].tolist()) test_y = to_categorical(self.__test_data[:, 1], 2) print(train_x.shape) self.__model = Sequential() self.__model.add(Dense(dense_size, input_dim=train_x.shape[1], activation=activation, init='glorot_uniform')) self.__model.add(Dense(train_y.shape[1], activation='softmax', init='glorot_uniform')) self.__model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['categorical_accuracy']) self.__model.fit(train_x, train_y, batch_size=batch_size, nb_epoch=epochs, validation_data=(test_x, test_y), verbose=2)
def normalize_array (solution, prediction): ''' Use min and max of solution as scaling factors to normalize prediction, then threshold it to [0, 1]. Binarize solution to {0, 1}. This allows applying classification scores to all cases. In principle, this should not do anything to properly formatted classification inputs and outputs.''' # Binarize solution sol=np.ravel(solution) # convert to 1-d array maxi = np.nanmax((filter(lambda x: x != float('inf'), sol))) # Max except NaN and Inf mini = np.nanmin((filter(lambda x: x != float('-inf'), sol))) # Mini except NaN and Inf if maxi == mini: print('Warning, cannot normalize') return [solution, prediction] diff = maxi - mini mid = (maxi + mini)/2. new_solution = np.copy(solution) new_solution[solution>=mid] = 1 new_solution[solution<mid] = 0 # Normalize and threshold predictions (takes effect only if solution not in {0, 1}) new_prediction = (np.copy(prediction) - float(mini))/float(diff) new_prediction[new_prediction>1] = 1 # and if predictions exceed the bounds [0, 1] new_prediction[new_prediction<0] = 0 # Make probabilities smoother #new_prediction = np.power(new_prediction, (1./10)) return [new_solution, new_prediction]
def mvmean(R, axis=0): ''' Moving average to avoid rounding errors. A bit slow, but... Computes the mean along the given axis, except if this is a vector, in which case the mean is returned. Does NOT flatten.''' if len(R.shape)==0: return R average = lambda x: reduce(lambda i, j: (0, (j[0]/(j[0]+1.))*i[1]+(1./(j[0]+1))*j[1]), enumerate(x))[1] R=np.array(R) if len(R.shape)==1: return average(R) if axis==1: return np.array(map(average, R)) else: return np.array(map(average, R.transpose())) # ======= All metrics used for scoring in the challenge ======== ### REGRESSION METRICS (work on raw solution and prediction) # These can be computed on all solutions and predictions (classification included)
def data_binary_sparse (filename, nbr_features): ''' This function takes as an argument a file representing a binary sparse matrix binary_sparse_matrix[i][j] = a means matrix[i][j] = 1 It converts it into a numpy array an returns this array. ''' data = data_converter.file_to_array (filename) nbr_samples = len(data) dok_sparse = dok_matrix ((nbr_samples, nbr_features)) # the construction is easier w/ dok_sparse print ("Converting {} to dok sparse matrix".format(filename)) for row in range (nbr_samples): for feature in data[row]: dok_sparse[row, int(feature)-1] = 1 print ("Converting {} to csr sparse matrix".format(filename)) return dok_sparse.tocsr() # ================ Copy results from input to output ==========================
def generate_one_summary(self, review): """ Create summary for one review using Encoder Decoder Seq2Seq model :param review: The input review :return: Output Summary of the model """ review = review.T review = [np.array([int(x)]) for x in review] feed_dict_rev = {self.enc_inp[t]: review[t] for t in range(self.seq_length)} feed_dict_rev.update({self.labels[t]: review[t] for t in range(self.seq_length)}) summary = self.sess.run(self.dec_outputs_tst, feed_dict_rev) summary = [logits_t.argmax(axis=1) for logits_t in summary] summary = [x[0] for x in summary] return summary
def __crawl_review(self): """ Crawl review :return: review [numpy array] """ review_list = [] print 'Crawling Reviews....' num_lines = 0 with open(self.raw_data_file) as infile: for line in infile: if line.startswith('review/text'): if num_lines >= self.num_reviews: break num_lines += 1 _,review = line.split('/text: ') review_list.append(review) return np.array(review_list)
def __crawl_summary(self): """ Crawl summary :return: summary [numpy array] """ summary_list = [] print 'Crawling Summary....' num_lines = 0 with open(self.raw_data_file) as infile: for line in infile: if line.startswith('review/summary'): if num_lines >= self.num_reviews: break num_lines += 1 _,summary = line.split('/summary: ') summary_list.append(summary) return np.array(summary_list)
def reshape_array(array, newsize, pixcombine='sum'): """ Reshape an array to a give size using either the sum, mean or median of the pixels binned Note that the old array dimensions have to be multiples of the new array dimensions --- INPUT --- array Array to reshape (combine pixels) newsize New size of array pixcombine The method to combine the pixels with. Choices are sum, mean and median """ sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1] pdb.set_trace() if pixcombine == 'sum': reshapedarray = array.reshape(sh).sum(-1).sum(1) elif pixcombine == 'mean': reshapedarray = array.reshape(sh).mean(-1).mean(1) elif pixcombine == 'median': reshapedarray = array.reshape(sh).median(-1).median(1) return reshapedarray # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen): db = read_data(path_test) X = create_sequences(db[:-maxlen], win_size=maxlen, step=maxlen) X = np.reshape(X, (X.shape[0], X.shape[1], input_size)) # build the model: 1 layer LSTM print('Build model...') model = Sequential() model.add(LSTM(hidden_size, return_sequences=False, input_shape=(maxlen, input_size))) model.add(Dense(maxlen)) model.load_weights(save_dir + model_name) model.compile(loss='mse', optimizer='adam') prediction = model.predict(X, batch_size, verbose=1) prediction = prediction.flatten() # prediction_container = np.array(prediction).flatten() Y = db[maxlen:] plt.plot(prediction, label='prediction') plt.plot(Y, label='true') plt.legend() plt.show()
def word_list_to_embedding(words, embeddings, embedding_dimension=50): ''' :param words: an n x (2*window_size + 1) matrix from data_to_mat :param embeddings: an embedding dictionary where keys are strings and values are embeddings; the output from embeddings_to_dict :param embedding_dimension: the dimension of the values in embeddings; in this assignment, embedding_dimension=50 :return: an n x ((2*window_size + 1)*embedding_dimension) matrix where each entry of the words matrix is replaced with its embedding ''' m, n = words.shape words = words.reshape((-1)) return np.array([embeddings[w] for w in words], dtype=np.float32).reshape(m, n*embedding_dimension) # # End Twitter Helper Functions #
def __init__(self, N, L, comm, precision, communication="Alltoall", padsize=1.5, threads=1, planner_effort=defaultdict(lambda: "FFTW_MEASURE")): R2C.__init__(self, N, L, comm, precision, communication=communication, padsize=padsize, threads=threads, planner_effort=planner_effort) # Reuse all shapes from r2c transform R2C simply by resizing the final complex z-dimension: self.Nf = N[2] self.Nfp = int(self.padsize*self.N[2]) # Independent complex wavenumbers in z-direction for padded array # Rename since there's no real space self.original_shape_padded = self.real_shape_padded self.original_shape = self.real_shape self.transformed_shape = self.complex_shape self.original_local_slice = self.real_local_slice self.transformed_local_slice = self.complex_local_slice self.ks = (fftfreq(N[2])*N[2]).astype(int)
def bag_of_tokens(config, labels, label_lengths): if config.train_output_embeddings: with tf.variable_scope('embed', reuse=True): output_embeddings = tf.get_variable('output_embedding') else: output_embeddings = tf.constant(config.output_embedding_matrix) #everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32) #everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32) labels = tf.constant(np.array(labels)) embedded_output = tf.gather(output_embeddings, labels) print('embedded_output before', embedded_output) #mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32) # note: this multiplication will broadcast the mask along all elements of the depth dimension # (which is why we run the expand_dims to choose how to broadcast) #embedded_output = embedded_output * tf.expand_dims(mask, axis=2) #print('embedded_output after', embedded_output) return tf.reduce_sum(embedded_output, axis=1)
def _compute_process_and_covariance_matrices(self, dt): """Computes the transition and covariance matrix of the process model and measurement model. Args: dt (float): Timestep of the discrete transition. Returns: F (numpy.ndarray): Transition matrix. Q (numpy.ndarray): Process covariance matrix. R (numpy.ndarray): Measurement covariance matrix. """ F = np.array(np.bmat([[np.eye(3), dt * np.eye(3)], [np.zeros((3, 3)), np.eye(3)]])) self.process_matrix = F q_p = self.process_covariance_position q_v = self.process_covariance_velocity Q = np.diag([q_p, q_p, q_p, q_v, q_v, q_v]) ** 2 * dt r = self.measurement_covariance R = r * np.eye(4) self.process_covariance = Q self.measurement_covariance = R return F, Q, R
def sample(self, sample_size=20, text=None): """Sample the documents.""" p = 1 if text != None: try: x, word_idxs = self.reader.get(text) except Exception as e: print(e) return else: x, word_idxs = self.reader.random() print(" [*] Text: %s" % " ".join([self.reader.idx2word[word_idx] for word_idx in word_idxs])) cur_ps = self.sess.run(self.p_x_i, feed_dict={self.x: x}) word_idxs = np.array(cur_ps).argsort()[-sample_size:][::-1] ps = cur_ps[word_idxs] for idx, (cur_p, word_idx) in enumerate(zip(ps, word_idxs)): print(" [%d] %-20s: %.8f" % (idx+1, self.reader.idx2word[word_idx], cur_p)) p *= cur_p print(" [*] perp : %8.f" % -np.log(p))
def plot_nucleotide_diversity(ax, fqlists, invert=False): ''' Create a FastQC-like "?Per base sequence content" plot Plot fraction of nucleotides per position zip will stop when shortest read is exhausted ''' if invert: fqlists = [list(reversed(read)) for read in fqlists] numreads = len(fqlists) sns.set_style("darkgrid") l_A, = ax.plot( np.array([pos.count('A') / numreads for pos in zip(*fqlists)]), 'green', label='A') l_T, = ax.plot( np.array([pos.count('T') / numreads for pos in zip(*fqlists)]), 'red', label='T') l_G, = ax.plot( np.array([pos.count('G') / numreads for pos in zip(*fqlists)]), 'black', label='G') l_C, = ax.plot( np.array([pos.count('C') / numreads for pos in zip(*fqlists)]), 'blue', label='C') if invert: ax.set_xticklabels(-1 * ax.get_xticks().astype(int)) return [l_A, l_T, l_G, l_C]
def plot_qual(ax, quallist, invert=False): ''' Create a FastQC-like "?Per base sequence quality?" plot Plot average quality per position zip will stop when shortest read is exhausted ''' sns.set_style("darkgrid") if invert: l_Q, = ax.plot(np.array([np.mean(position) for position in zip( *[list(reversed(read)) for read in quallist])]), 'orange', label="Quality") ax.set_xlabel('Position in read from end') ax.set_xticklabels(-1 * ax.get_xticks().astype(int)) else: l_Q, = ax.plot(np.array([np.mean(position) for position in zip(*quallist)]), 'orange', label="Quality") ax.set_xlabel('Position in read from start') return l_Q
def d_x2(self, factors=None): """Creates a sparse matrix for computing the second derivative with respect to x multiplied by factors given for every point. Uses central difference quotient. Args: factors: Factor for each point to be applied after derivation. Returns: Sparse matrix the calculate second derivatives of field components. """ # use ones as factors if none are specified if factors is None: factors = np.array(1).repeat(self.num_points) return sp.dia_matrix((np.array([factors, -2*factors, factors]), [-1, 0, 1]), shape=(self.num_points, self.num_points))
def plot_region(self, region): """Shows the given region in the field plot. Args: region: Region to be plotted. """ if type(region) == reg.PointRegion: self.axes.plot(np.ones(2) * region.point_coordinates / self._x_axis_factor, np.array([-1, 1]) * self.scale, color='black') elif type(region) == reg.LineRegion: self.axes.plot(np.ones(2) * region.line_coordinates[0] / self._x_axis_factor, np.array([-1, 1]) * self.scale, color='black') self.axes.plot(np.ones(2) * region.line_coordinates[1] / self._x_axis_factor, np.array([-1, 1]) * self.scale, color='black') else: raise TypeError('Unknown type in region list: {}'.format(type(region)))
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5): p = [] l = [] cnt = 0 g = gen_minibatch1(tokens, features, mini_batch_size, False) for token, feature in g: if cnt % 100 == 0: print(cnt) cnt +=1 # print token.size() # y_pred = get_predictions(token, word_attn, sent_attn) # print y_pred y_pred = get_predictions(token, feature, word_attn, sent_attn) # print y_pred # _, y_pred = torch.max(y_pred, 1) # y_pred = y_pred[:, 1] # print y_pred p.append(np.ndarray.flatten(y_pred.data.cpu().numpy())) p = [item for sublist in p for item in sublist] p = np.array(p) return p
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5): p = [] l = [] cnt = 0 g = gen_minibatch1(tokens, features, mini_batch_size, False) for token, feature in g: if cnt % 100 == 0: print cnt cnt +=1 # print token.size() # y_pred = get_predictions(token, word_attn, sent_attn) # print y_pred y_pred = get_predictions(token, feature, word_attn, sent_attn) # print y_pred # _, y_pred = torch.max(y_pred, 1) # y_pred = y_pred[:, 1] # print y_pred p.append(np.ndarray.flatten(y_pred.data.cpu().numpy())) p = [item for sublist in p for item in sublist] p = np.array(p) return p
def _ncc_c(x, y): """ >>> _ncc_c([1,2,3,4], [1,2,3,4]) array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667, 0.36666667, 0.13333333]) >>> _ncc_c([1,1,1], [1,1,1]) array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]) >>> _ncc_c([1,2,3], [-1,-1,-1]) array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]) """ den = np.array(norm(x) * norm(y)) den[den == 0] = np.Inf x_len = len(x) fft_size = 1<<(2*x_len-1).bit_length() cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size))) cc = np.concatenate((cc[-(x_len-1):], cc[:x_len])) return np.real(cc) / den
def layout_tree(correlation): """Layout tree for visualization with e.g. matplotlib. Args: correlation: A [V, V]-shaped numpy array of latent correlations. Returns: A [V, 3]-shaped numpy array of spectral positions of vertices. """ assert len(correlation.shape) == 2 assert correlation.shape[0] == correlation.shape[1] assert correlation.dtype == np.float32 laplacian = -correlation np.fill_diagonal(laplacian, 0) np.fill_diagonal(laplacian, -laplacian.sum(axis=0)) evals, evects = scipy.linalg.eigh(laplacian, eigvals=[1, 2, 3]) assert np.all(evals > 0) assert evects.shape[1] == 3 return evects
def __init__(self, N, V, tree_prior, config): """Initialize a model with an empty subsample. Args: N (int): Number of rows in the dataset. V (int): Number of columns (features) in the dataset. tree_prior: A [K]-shaped numpy array of prior edge log odds, where K is the number of edges in the complete graph on V vertices. config: A global config dict. """ assert isinstance(N, int) assert isinstance(V, int) assert isinstance(tree_prior, np.ndarray) assert isinstance(config, dict) K = V * (V - 1) // 2 # Number of edges in complete graph. assert V <= 32768, 'Invalid # features > 32768: {}'.format(V) assert tree_prior.shape == (K, ) assert tree_prior.dtype == np.float32 self._config = config.copy() self._num_rows = N self._tree_prior = tree_prior self._tree = TreeStructure(V) assert self._tree.num_vertices == V self._program = make_propagation_program(self._tree.tree_grid) self._added_rows = set()
def sample_tree(self): """Samples a random tree. Returns: A pair (edges, edge_logits), where: edges: A list of (vertex, vertex) pairs. edge_logits: A [K]-shaped numpy array of edge logits. """ logger.info('TreeCatTrainer.sample_tree given %d rows', len(self._added_rows)) SERIES.sample_tree_num_rows.append(len(self._added_rows)) complete_grid = self._tree.complete_grid edge_logits = self.compute_edge_logits() assert edge_logits.shape[0] == complete_grid.shape[1] assert edge_logits.dtype == np.float32 edges = self.get_edges() edges = sample_tree(complete_grid, edge_logits, edges) return edges, edge_logits
def compute_edge_logits(self): """Compute non-normalized logprob of all V(V-1)/2 candidate edges. This is used for sampling and estimating the latent tree. """ V, E, K, M = self._VEKM vert_logits = logprob_dc(self._vert_ss, self._vert_prior, axis=1) if len(self._added_rows) == V: assignments = self._assignments else: assignments = self._assignments[sorted(self._added_rows), :] assignments = np.array(assignments, order='F') parallel = self._config['learning_parallel'] result = treecat_compute_edge_logits(M, self._tree.complete_grid, self._gammaln_table, assignments, vert_logits, parallel) result += self._tree_prior return result
def train(self): """Train a TreeCat model using subsample-annealed MCMC. Returns: A trained model as a dictionary with keys: config: A global config dict. tree: A TreeStructure instance with the learned latent structure. edge_logits: A [K]-shaped array of all edge logits. suffstats: Sufficient statistics of features, vertices, and edges and a ragged_index for the features array. assignments: An [N, V]-shaped numpy array of latent cluster ids for each cell in the dataset, where N be the number of data rows and V is the number of features. """ model = TreeTrainer.train(self) model['assignments'] = self._assignments model['suffstats'] = { 'ragged_index': self._table.ragged_index, 'vert_ss': self._vert_ss, 'edge_ss': self._edge_ss, 'feat_ss': self._feat_ss, 'meas_ss': self._meas_ss, } return model
def __init__(self, data, tree_prior, config): """Initialize a model with an empty subsample. Args: data: An [N, V]-shaped numpy array of real-valued data. tree_prior: A [K]-shaped numpy array of prior edge log odds, where K is the number of edges in the complete graph on V vertices. config: A global config dict. """ assert isinstance(data, np.ndarray) data = np.asarray(data, np.float32) assert len(data.shape) == 2 N, V = data.shape D = config['model_latent_dim'] E = V - 1 # Number of edges in the tree. TreeTrainer.__init__(self, N, V, tree_prior, config) self._data = data self._latent = np.zeros([N, V, D], np.float32) # This is symmetric positive definite. self._vert_ss = np.zeros([V, D, D], np.float32) # This is arbitrary (not necessarily symmetric). self._edge_ss = np.zeros([E, D, D], np.float32) # This represents (count, mean, covariance). self._feat_ss = np.zeros([V, D, 1 + 1 + D], np.float32)
def train(self): """Train a TreeGauss model using subsample-annealed MCMC. Returns: A trained model as a dictionary with keys: config: A global config dict. tree: A TreeStructure instance with the learned latent structure. edge_logits: A [K]-shaped array of all edge logits. suffstats: Sufficient statistics of features and vertices. latent: An [N, V, M]-shaped numpy array of latent states, where N is the number of data rows, V is the number of features, and M is the dimension of each latent variable. """ model = TreeTrainer.train(self) model['latent'] = self._latent model['suffstats'] = { 'vert_ss': self._vert_ss, 'edge_ss': self._edge_ss, 'feat_ss': self._feat_ss, } return model
def train_ensemble(table, tree_prior, config): """Train a TreeCat ensemble model using subsample-annealed MCMC. The ensemble size is controlled by config['model_ensemble_size']. Let N be the number of data rows and V be the number of features. Args: table: A Table instance holding N rows of V features of data. tree_prior: A [K]-shaped numpy array of prior edge log odds, where K is the number of edges in the complete graph on V vertices. config: A global config dict. Returns: A trained model as a dictionary with keys: tree: A TreeStructure instance with the learned latent structure. suffstats: Sufficient statistics of features, vertices, and edges. assignments: An [N, V] numpy array of latent cluster ids for each cell in the dataset. """ tasks = [] for sub_seed in range(config['model_ensemble_size']): sub_config = config.copy() sub_config['seed'] += sub_seed tasks.append((table, tree_prior, sub_config)) return parallel_map(_train_model, tasks)
def test_server_logprob_normalized(N, V, C, M): model = generate_fake_model(N, V, C, M) config = TINY_CONFIG.copy() config['model_num_clusters'] = M model['config'] = config server = TreeCatServer(model) # The total probability of all categorical rows should be 1. ragged_index = model['suffstats']['ragged_index'] factors = [] for v in range(V): C = ragged_index[v + 1] - ragged_index[v] factors.append([one_hot(c, C) for c in range(C)]) data = np.array( [np.concatenate(columns) for columns in itertools.product(*factors)], dtype=np.int8) logprobs = server.logprob(data) logtotal = np.logaddexp.reduce(logprobs) assert logtotal == pytest.approx(0.0, abs=1e-5)
def observed_perplexity(self, counts): """Compute perplexity = exp(entropy) of observed variables. Perplexity is an information theoretic measure of the number of clusters or latent classes. Perplexity is a real number in the range [1, M], where M is model_num_clusters. Args: counts: A [V]-shaped array of multinomial counts. Returns: A [V]-shaped numpy array of perplexity. """ V, E, M, R = self._VEMR if counts is not None: counts = np.ones(V, dtype=np.int8) assert counts.shape == (V, ) assert counts.dtype == np.int8 assert np.all(counts > 0) observed_entropy = np.empty(V, dtype=np.float32) for v in range(V): beg, end = self._ragged_index[v:v + 2] probs = np.dot(self._feat_cond[beg:end, :], self._vert_probs[v, :]) observed_entropy[v] = multinomial_entropy(probs, counts[v]) return np.exp(observed_entropy)
def observed_perplexity(self, counts): """Compute perplexity = exp(entropy) of observed variables. Perplexity is an information theoretic measure of the number of clusters or observed classes. Perplexity is a real number in the range [1, dim[v]], where dim[v] is the number of categories in an observed categorical variable or 2 for an ordinal variable. Args: counts: A [V]-shaped array of multinomial counts. Returns: A [V]-shaped numpy array of perplexity. """ result = self._ensemble[0].observed_perplexity(counts) for server in self._ensemble[1:]: result += server.observed_perplexity(counts) result /= len(self._ensemble) return result
def latent_correlation(self): """Compute correlation matrix among latent features. This computes the generalization of Pearson's correlation to discrete data. Let I(X;Y) be the mutual information. Then define correlation as rho(X,Y) = sqrt(1 - exp(-2 I(X;Y))) Returns: A [V, V]-shaped numpy array of feature-feature correlations. """ result = self._ensemble[0].latent_correlation() for server in self._ensemble[1:]: result += server.latent_correlation() result /= len(self._ensemble) return result
def logprob(self, rows, evidence=None): """Compute non-normalized log probabilies of many rows of data. If evidence is specified, compute conditional log probability; otherwise compute unconditional log probability. Args: data: A list of rows of data, where each row is a sparse dict mapping feature name to feature value. evidence: An optional row of conditioning data, as a sparse dict mapping feature name to feature value. Returns: An [len(rows)]-shaped numpy array of log probabilities. """ data = import_rows(self._schema, rows) if evidence is None: return self._server.logprob(data) else: ragged_evidence = import_rows(self._schema, [evidence]) return (self._server.logprob(data + ragged_evidence) - self._server.logprob(data + evidence))
def sample(self, N, evidence=None): """Draw N samples from the posterior distribution. Args: N: The number of samples to draw. evidence: An optional single row of conditioning data, as a sparse dict mapping feature name to feature value. Returns: An [N, R]-shaped numpy array of sampled multinomial data. """ if evidence is None: data = None else: data = import_rows(self._schema, [evidence])[0] ragged_samples = self._server.sample(N, self._counts, data) return export_rows(self._schema, ragged_samples)
def fit(self, X, y): """ Training the gcForest on input data X and associated target y. :param X: np.array Array containing the input samples. Must be of shape [n_samples, data] where data is a 1D array. :param y: np.array 1D array containing the target values. Must be of shape [n_samples] """ if np.shape(X)[0] != len(y): raise ValueError('Sizes of y and X do not match.') mgs_X = self.mg_scanning(X, y) _ = self.cascade_forest(mgs_X, y)
def contest(self, b, g, r): """ Search for biased BGR values Finds closest neuron (min dist) and updates self.freq finds best neuron (min dist-self.bias) and returns position for frequently chosen neurons, self.freq[i] is high and self.bias[i] is negative self.bias[i] = self.GAMMA*((1/self.NETSIZE)-self.freq[i])""" i, j = self.SPECIALS, self.NETSIZE dists = abs(self.network[i:j] - np.array([b,g,r])).sum(1) bestpos = i + np.argmin(dists) biasdists = dists - self.bias[i:j] bestbiaspos = i + np.argmin(biasdists) self.freq[i:j] *= (1-self.BETA) self.bias[i:j] += self.BETAGAMMA * self.freq[i:j] self.freq[bestpos] += self.BETA self.bias[bestpos] -= self.BETAGAMMA return bestbiaspos
def rasterMaskToGrid( rasterMask ): grid = [] mask = rasterMask['mask'] for y in range(rasterMask['height']): for x in range(rasterMask['width']): if mask[y,x]==0: grid.append([x,y]) grid = np.array(grid,dtype=np.float) if not (rasterMask is None) and rasterMask['hex'] is True: f = math.sqrt(3.0)/2.0 offset = -0.5 if np.argmin(rasterMask['mask'][0]) > np.argmin(rasterMask['mask'][1]): offset = 0.5 for i in range(len(grid)): if (grid[i][1]%2.0==0.0): grid[i][0]-=offset grid[i][1] *= f return grid
def match_matrix(event: Event): """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR. Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match matrix that looks like this (sans labels): #1 #2 #3 #4 #5 #6 #7 qm1_red 1 0 1 0 1 0 0 qm1_blue 0 1 0 1 0 1 0 """ match_list = [] for match in filter(lambda match: match['comp_level'] == 'qm', event.matches): matchRow = [] for team in event.teams: matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0) match_list.append(matchRow) matchRow = [] for team in event.teams: matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0) match_list.append(matchRow) mat = numpy.array(match_list) sum_matches = numpy.sum(mat, axis=0) avg_team_matches = sum(sum_matches) / float(len(sum_matches)) return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]
def get_img(data_path): # Getting image array from path: img = imread(data_path) img = imresize(img, (64, 64)) return img
def get_dataset(dataset_path='Data/Train_Data'): # Getting all data from data path: try: X = np.load('Data/npy_train_data/X.npy') Y = np.load('Data/npy_train_data/Y.npy') except: inputs_path = dataset_path+'/input' images = listdir(inputs_path) # Geting images X = [] Y = [] for img in images: img_path = inputs_path+'/'+img x_img = get_img(img_path).astype('float32').reshape(64, 64, 3) x_img /= 255. y_img = get_img(img_path.replace('input/', 'mask/mask_')).astype('float32').reshape(64, 64, 1) y_img /= 255. X.append(x_img) Y.append(y_img) X = np.array(X) Y = np.array(Y) # Create dateset: if not os.path.exists('Data/npy_train_data/'): os.makedirs('Data/npy_train_data/') np.save('Data/npy_train_data/X.npy', X) np.save('Data/npy_train_data/Y.npy', Y) X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42) return X, X_test, Y, Y_test
def read_groundtruth(): ret = [] with open( os.path.join( os.path.abspath(os.path.dirname(__file__)), 'groundtruth.txt'), 'rb') as lines: for line in lines: ret.append(line[:-2]) return np.array(ret)
def extract_digits(self, image): """ Extract digits from a binary image representing a sudoku :param image: binary image/sudoku :return: array of digits and their probabilities """ prob = np.zeros(4, dtype=np.float32) digits = np.zeros((4, 9, 9), dtype=object) for i in range(4): labeled, features = label(image, structure=CROSS) objs = find_objects(labeled) for obj in objs: roi = image[obj] # center of bounding box cy = (obj[0].stop + obj[0].start) / 2 cx = (obj[1].stop + obj[1].start) / 2 dists = cdist([[cy, cx]], CENTROIDS, 'euclidean') pos = np.argmin(dists) cy, cx = pos % 9, pos / 9 # 28x28 image, center relative to sudoku prediction = self.classifier.classify(morph(roi)) if digits[i, cy, cx] is 0: # Newly found digit digits[i, cy, cx] = prediction prob[i] += prediction[0, 0] elif prediction[0, 0] > digits[i, cy, cx][0, 0]: # Overlapping! (noise), choose the most probable prediction prob[i] -= digits[i, cy, cx][0, 0] digits[i, cy, cx] = prediction prob[i] += prediction[0, 0] image = np.rot90(image) logging.info(prob) return digits[np.argmax(prob)]
def diagonal(_, pos): """ Given an object pixels' positions, return the diagonal length of its bound box :param _: pixel values (unused) :param pos: pixel position (1-D) :return: diagonal of bounding box """ xs = np.array([i / SSIZE for i in pos]) ys = np.array([i % SSIZE for i in pos]) minx = np.amin(xs) miny = np.amin(ys) maxx = np.amax(xs) maxy = np.amax(ys) return compute_line(np.array([minx, miny]), np.array([maxx, maxy]))