我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.delete()。
def get(self, X): X = np.array(X) X_nan = np.isnan(X) imputed = self.meanImput(X.copy()) if len(self.estimators_) > 1: for i, estimator_ in enumerate(self.estimators_): X_s = np.delete(imputed, i, 1) y_nan = X_nan[:, i] X_unk = X_s[y_nan] result_ = [] if len(X_unk) > 0: for unk in X_unk: result_.append(estimator_.predict(unk)) X[y_nan, i] = result_ return X
def loadLogoSet(path, rows,cols,test_data_rate=0.15): random.seed(612) _, imgID = readItems('data.txt') y, _ = modelDict(path) nPics = len(y) faceassset = np.zeros((nPics,rows,cols), dtype = np.uint8) ### gray images noImg = [] for i in range(nPics): temp = cv2.imread(path +'logo/'+imgID[i]+'.jpg', 0) if temp == None: noImg.append(i) elif temp.size < 1000: noImg.append(i) else: temp = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC) faceassset[i,:,:] = temp y = np.delete(y, noImg,0); faceassset = np.delete(faceassset, noImg, 0) nPics = len(y) index = random.sample(np.arange(nPics), int(nPics*test_data_rate)) x_test = faceassset[index,:,:]; x_train = np.delete(faceassset, index, 0) y_test = y[index]; y_train = np.delete(y, index, 0) return (x_train, y_train), (x_test, y_test)
def repeat(tensor: tf.Tensor, repeats: int, axis: int) -> tf.Tensor: """ Repeat elements of the input tensor in the specified axis ``repeats``-times. .. note:: Chaining of this op may produce TF warnings although the performance seems to be unaffected. :param tensor: TF tensor to be repeated :param repeats: number of repeats :param axis: axis to repeat :return: tensor with repeated elements """ shape = tensor.get_shape().as_list() dims = np.arange(len(tensor.shape)) prepare_perm = np.hstack(([axis], np.delete(dims, axis))) restore_perm = np.hstack((dims[1:axis+1], [0], dims[axis+1:])) indices = tf.cast(tf.floor(tf.range(0, shape[axis]*repeats)/tf.constant(repeats)), 'int32') shuffled = tf.transpose(tensor, prepare_perm) repeated = tf.gather(shuffled, indices) return tf.transpose(repeated, restore_perm)
def main(): iris = load_iris() test_idx = [0, 50, 100] # training Data train_target = np.delete(iris.target, test_idx) train_data = np.delete(iris.data, test_idx, axis=0) # testing data test_target = iris.target[test_idx] test_data = iris.data[test_idx] # Train Classifier clf = tree.DecisionTreeClassifier() clf = clf.fit(train_data, train_target) print(clf.predict(test_data)) # Run main
def _calc_B_for_tetra3d11(nodes,volume): A = np.ones((4,4)) belta = np.zeros(4) gama = np.zeros(4) delta = np.zeros(4) for i,nd in enumerate(nodes): A[i,1:] = nd.coord for i in range(4): belta[i] = (-1)**(i+1)*np.linalg.det(np.delete(np.delete(A,i,0),1,1)) gama[i] = (-1)**(i+2)*np.linalg.det(np.delete(np.delete(A,i,0),2,1)) delta[i] = (-1)**(i+1)*np.linalg.det(np.delete(np.delete(A,i,0),3,1)) B = 1./(6.*volume)*np.array([[belta[0],0.,0.,belta[1],0.,0.,belta[2],0.,0.,belta[3],0.,0.], [0.,gama[0],0.,0.,gama[1],0.,0.,gama[2],0.,0.,gama[3],0.], [0.,0.,delta[0],0.,0.,delta[1],0.,0.,delta[2],0.,0.,delta[3]], [gama[0],belta[0],0.,gama[1],belta[1],0.,gama[2],belta[2],0,gama[3],belta[3],0.], [0.,delta[0],gama[0],0.,delta[1],gama[1],0.,delta[2],gama[2],0.,delta[3],gama[3]], [delta[0],0.,belta[0],delta[1],0.,belta[1],delta[2],0.,belta[2],delta[3],0,belta[3]]]) return B
def append_neg_and_retrain(self, feat=None, force=False): if feat is not None: num = feat.shape[0] self.neg = np.vstack((self.neg, feat)) self.num_neg_added += num if self.num_neg_added > self.retrain_limit or force: self.num_neg_added = 0 new_w_b, pos_scores, neg_scores = self.train() # scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1] # easy_inds = np.where(neg_scores < self.evict_thresh)[0] not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0] if len(not_easy_inds) > 0: self.neg = self.neg[not_easy_inds, :] # self.neg = np.delete(self.neg, easy_inds) print(' Pruning easy negatives') print(' Cache holds {} pos examples and {} neg examples'. format(self.pos.shape[0], self.neg.shape[0])) print(' {} pos support vectors'.format((pos_scores <= 1).sum())) print(' {} neg support vectors'.format((neg_scores >= -1).sum())) return new_w_b else: return None
def tune_tal(mono_phi_score, tal_list): errs = [] tals = [] for tal in tal_list: err = [] for i in range(len(mono_phi_score)): mono_1 = numpy.delete(mono_phi_score, i, axis=0) dim_h = mono_phi_score[i][:-1] value_h, alpha = train_predict_regression(mono_1, dim_h, tal) err.append((value_h - mono_phi_score[i][-1])**2) err = numpy.mean(err) errs.append(err) tals.append(tal) print 'regression tal:', tal, 'err', err idx = numpy.argmin(errs) return tals[idx]
def eliminate_overlapping_locations(f, separation): """ Makes sure that no position is within `separation` from each other, by deleting one of the that are to close to each other. """ separation = validate_tuple(separation, f.shape[1]) assert np.greater(separation, 0).all() # Rescale positions, so that pairs are identified below a distance of 1. f = f / separation while True: duplicates = cKDTree(f, 30).query_pairs(1) if len(duplicates) == 0: break to_drop = [] for pair in duplicates: to_drop.append(pair[1]) f = np.delete(f, to_drop, 0) return f * separation
def setdiff(eq1, eq2): eq1, eq2 = eqsize(eq1, eq2) c1 = [None] * eq1.shape c2 = [None] * eq2.shape for i in range(0, eq1.size): c1.append[i] = hash(eq2[i]) for i in range(0, eq2.size): c2[i] = hash(eq2[i]) ia = np.delete(np.arange(np.alen(c1)), np.searchsorted(c1, c2)) ia = (ia[:]).conj().T p = eq1[ia] return p, ia
def McCormack(x_nods_quantity, grid, transfer_velocity, time_step, x_step): if (transfer_velocity[0] > 0): new_grid = grid for m in range(2, x_nods_quantity - 1): sigma = transfer_velocity[m] * time_step / x_step new_grid[m] = grid[m] - np.dot(sigma, (grid[m] - grid[m-1])) + \ np.dot(sigma**2, (grid[m] - grid[m-2])) else: new_grid = grid for m in range(2, x_nods_quantity - 1): sigma = transfer_velocity[m] * time_step / x_step new_grid[m] = grid[m] - np.dot(sigma, (grid[m+1] - grid[m])) + \ np.dot(sigma ** 2, (grid[m+2] - grid[m])) #new_grid = np.delete(grid, [0, 1]) # returning array without additional nod and border condition return new_grid
def test_silence_frame_removal_given_hts_labels(): qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed") binary_dict, continuous_dict = hts.load_question_set(qs_file_name) input_state_label = join(DATA_DIR, "label_state_align", "arctic_a0001.lab") labels = hts.load(input_state_label) features = fe.linguistic_features(labels, binary_dict, continuous_dict, add_frame_features=True, subphone_features="full" ) # Remove silence frames indices = labels.silence_frame_indices() features = np.delete(features, indices, axis=0) y = np.fromfile(join(DATA_DIR, "nn_no_silence_lab_425", "arctic_a0001.lab"), dtype=np.float32).reshape(-1, features.shape[-1]) assert features.shape == y.shape assert np.allclose(features, y) # Make sure we can get same results with Merlin
def validate(self): wav_dir = join(self.data_root, self.subset, "wav") if not isdir(wav_dir): raise RuntimeError("{} doesn't exist.".format(wav_dir)) miss_indices = [] for idx, name in enumerate(self.names): wav_path = join(wav_dir, name + ".wav") if not exists(wav_path): miss_indices.append(idx) if len(miss_indices) > 0: warn("{}/{} wav files were missing in subset {}.".format( len(miss_indices), len(self.names), self.subset)) self.names = np.delete(self.names, miss_indices) self.transcriptions = np.delete(self.transcriptions, miss_indices)
def data_split(arr): ''' num2 = df.values num2 = np.delete(num2,) ''' df2 = df df3 = df #print arr df2 = df2.drop([i for i in arr]) df3 = df3.drop([i for i in xrange(0,len(df)) if i not in arr]) return (df2,df3)
def FileReader(file_list,param_list): row_add = np.zeros(shape=(1,len(param_list)+1)) for file in file_list: hdulist = fits.open(file,memmap=True) data_in = hdulist[1].data col_add = np.zeros(shape=(len(data_in),1)) print file for param in param_list: data_now = np.reshape(data_in[param],(len(data_in[param]),1)) col_add = np.append(col_add,data_now,axis=1) row_add = np.append(row_add,col_add,axis=0) del hdulist row_add = np.delete(row_add,0,axis=0) row_add = np.delete(row_add,0,axis=1) return row_add
def create_vertex_groups(groups=['common', 'not_used'], weights=[0.0, 0.0], ob=None): '''Creates vertex groups and sets weights. "groups" is a list of strings for the names of the groups. "weights" is a list of weights corresponding to the strings. Each vertex is assigned a weight for each vertex group to avoid calling vertex weights that are not assigned. If the groups are already present, the previous weights will be preserved. To reset weights delete the created groups''' if ob is None: ob = bpy.context.object vg = ob.vertex_groups for g in range(0, len(groups)): if groups[g] not in vg.keys(): # Don't create groups if there are already there vg.new(groups[g]) vg[groups[g]].add(range(0,len(ob.data.vertices)), weights[g], 'REPLACE') else: vg[groups[g]].add(range(0,len(ob.data.vertices)), 0, 'ADD') # This way we avoid resetting the weights for existing groups.
def linregress(self): """Get the linear regression of the mean values in this plot. Returns a tuple containing the best-fit line y-values for this plotter's t_axis, the drift coefficient, and the ``linregress`` named tuple from scipy.stats.linregress.""" cleandata = np.delete(self.plot_vars.means, self.bad_indices.means) cleantimes = np.delete(self.t_axis, self.bad_indices.means) if len(cleandata) != 0: r = scipy.stats.linregress(cleantimes, cleandata) bestfit = r.slope * self.t_axis + r.intercept driftcoeff = r.slope / SEC_PER[self.t_units] else: bestfit = 0 driftcoeff = 0 r = None return self.LinRegress(bestfit=bestfit, driftcoeff=driftcoeff, linregress=r)
def trend(self): """Subtract the trend specified in ``Plotter.plot_properties['detrend']`` from each plot. Trend can be the 'mean' value of the plot, the 'linear' least squares best fit, a custom-specified number, or simply 'none' if no trend should be removed.""" if self.plot_properties['detrend'] == 'mean': # delete bad indices before calculating the trend, since they # can skew the trend. cleandata = np.delete(self.plot_vars.means, self.bad_indices.means) if len(cleandata) != 0: trend = cleandata.mean() else: trend = 0 elif self.plot_properties['detrend'] == 'none': trend = 0 elif self.plot_properties['detrend'] == 'linear': trend, driftcoeff, linregress = self.linregress else: trend = self.plot_properties['detrend'] return trend
def plot_timeseries(self, ax, **kwargs): """Scale up by 10^9 since plots are in ns, not seconds. Remove any indices considered bad in ``plot_properties``""" # define the variables for our plots y = np.delete(self.plot_vars.means - self.trend, self.bad_indices.means) / SEC_PER['ns'] t = np.delete(self.t_axis, self.bad_indices.means) yerr = np.delete(self.plot_vars.stds, self.bad_indices.means) / SEC_PER['ns'] mint = np.delete(self.t_axis, self.bad_indices.mins) miny = np.delete(self.plot_vars.mins - self.trend, self.bad_indices.mins) / SEC_PER['ns'] maxt = np.delete(self.t_axis, self.bad_indices.maxs) maxy = np.delete(self.plot_vars.maxs - self.trend, self.bad_indices.maxs) / SEC_PER['ns'] # plot everything, but only if the plotted data has nonzero length # in order to avoid an annoying matplotlib bug when adding legends. if len(t) != 0: ax.errorbar(t, y, marker="o", color="green", linestyle='none', yerr=yerr, label="Means +/- Std. Dev.") if len(mint) != 0: ax.scatter(mint, miny, marker="^", color="blue", label="Minima") if len(maxt) != 0: ax.scatter(maxt, maxy, marker="v", color="red", label="Maxima")
def plot_timeseries(self, ax, **kwargs): """Scale up by 10^9 since plots are in ns, not seconds. Remove any indices considered bad in ``plot_properties``""" # define the variables for our plots t = np.delete(self.t_axis, self.bad_indices.means) y = np.delete(self.plot_vars.means - self.trend, self.bad_indices.means) / SEC_PER['ns'] yerr = np.delete(self.plot_vars.stds, self.bad_indices.means) / SEC_PER['ns'] mint = np.delete(self.t_axis, self.bad_indices.absmins) miny = np.delete(self.plot_vars.absmins - self.trend, self.bad_indices.absmins) / SEC_PER['ns'] maxt = np.delete(self.t_axis, self.bad_indices.absmaxs) maxy = np.delete(self.plot_vars.absmaxs - self.trend, self.bad_indices.absmaxs) / SEC_PER['ns'] # plot everything, but only if the plotted data has nonzero length # in order to avoid an annoying matplotlib bug when adding legends. if len(t) != 0: ax.errorbar(t, y, marker="o", color="green", linestyle='none', yerr=yerr, label="Means +/- Std. Dev.") if len(mint) != 0: ax.scatter(mint,miny,marker="^", color="blue", label="Abs. Minima") if len(maxt) != 0: ax.scatter(maxt,maxy,marker="v", color="red", label="Abs. Maxima")
def plot_timeseries(self, ax, **kwargs): ax.plot(np.delete(self.t_axis, self.bad_indices.means), np.delete(self.plot_vars.means - self.trend, self.bad_indices.means) / SEC_PER['ns'], marker="o", color="green", label="Recorded Signal") # put the start and/or end time in the plot as a vertical line unitfactor = SEC_PER[self.t_units] dq_start = (self.dq_segment.start.gpsSeconds - self.start) / unitfactor dq_end = (self.dq_segment.end.gpsSeconds - self.start) / unitfactor zorder = self.plot_properties['start_end_zorder'] if self.t_lim[0] <= dq_start: deep_pink = '#FF1493' plot_vertical_marker(ax, [dq_start], zorder=zorder, label="Start of Segment", color=deep_pink) if dq_end <= self.t_lim[1]: midnight_blue = '#191970' plot_vertical_marker(ax, [dq_end], zorder=zorder, label="End of Segment", color=midnight_blue)
def remove_indexes(self, rm_idx_list, rearranged_props): """ The k-points with velocity < 1 cm/s (either in valence or conduction band) are taken out as those are troublesome later with extreme values (e.g. too high elastic scattering rates) :param rm_idx_list ([int]): the kpoint indexes that need to be removed for each property :param rearranged_props ([str]): list of properties for which some indexes need to be removed :return: """ for i, tp in enumerate(["n", "p"]): for ib in range(self.cbm_vbm[tp]["included"]): rm_idx_list_ib = list(set(rm_idx_list[tp][ib])) rm_idx_list_ib.sort(reverse=True) rm_idx_list[tp][ib] = rm_idx_list_ib logging.debug("# of {}-type kpoints indexes with low velocity or off-energy: {}".format(tp,len(rm_idx_list_ib))) for prop in rearranged_props: self.kgrid[tp][prop] = np.array([np.delete(self.kgrid[tp][prop][ib], rm_idx_list[tp][ib], axis=0) \ for ib in range(self.cbm_vbm[tp]["included"])])
def transform(self, X): check_is_fitted(self, ['statistics_', 'estimators_', 'gamma_']) X = check_array(X, copy=True, dtype=np.float64, force_all_finite=False) if X.shape[1] != self.statistics_.shape[1]: raise ValueError("X has %d features per sample, expected %d" % (X.shape[1], self.statistics_.shape[1])) X_nan = np.isnan(X) imputed = self.initial_imputer.fit_transform(X) if len(self.estimators_) > 1: for i, estimator_ in enumerate(self.estimators_): X_s = np.delete(imputed, i, 1) y_nan = X_nan[:, i] X_unk = X_s[y_nan] if len(X_unk) > 0: X[y_nan, i] = estimator_.predict(X_unk) else: estimator_ = self.estimators_[0] X[X_nan] = estimator_.inverse_transform(estimator_.transform(imputed))[X_nan] return X
def _run_TR_from_scan_onsets(self, n_T, scan_onsets=None): if scan_onsets is None: # assume that all data are acquired within the same scan. n_run = 1 run_TRs = np.array([n_T], dtype=int) else: # Each value in the scan_onsets tells the index at which # a new scan starts. For example, if n_T = 500, and # scan_onsets = [0,100,200,400], this means that the time points # of 0-99 are from the first scan, 100-199 are from the second, # 200-399 are from the third and 400-499 are from the fourth run_TRs = np.int32(np.diff(np.append(scan_onsets, n_T))) run_TRs = np.delete(run_TRs, np.where(run_TRs == 0)) n_run = run_TRs.size # delete run length of 0 in case of duplication in scan_onsets. logger.info('I infer that the number of volumes' ' in each scan are: {}'.format(run_TRs)) return run_TRs, n_run
def chooseErrorData(self, game, lesson=None): ''' Choose saved error function data by lesson and game name in history database. ''' self.history.setGame(game) self.load() if lesson is not None: self.error_data_training = np.split(self.data[0,:], np.argwhere(self.data[0,:] == -1))[lesson][1:] self.error_data_test = np.split(self.data[1,:], np.argwhere(self.data[1,:] == -1))[lesson][1:] else: self.error_data_training = np.delete(self.data[0,:], np.argwhere(self.data[0,:]==-1)) self.error_data_test = np.delete(self.data[1,:], np.argwhere(self.data[1,:]==-1)) # ------------------- for test and show reasons only ----------------------
def add_state(self, state): if state is None: self.queue = None return state = np.asarray(state) axis = len(state.shape) # extra dimension for observation observation = np.reshape(state, state.shape + (1,)) if self.queue is None: self.queue = np.repeat(observation, self.stacked_num, axis=axis) else: # remove oldest observation from the beginning of the observation queue self.queue = np.delete(self.queue, 0, axis=axis) # append latest observation to the end of the observation queue self.queue = np.append(self.queue, observation, axis=axis)
def margins(doc_scores): margin_win = np.zeros_like(doc_scores) margin_lose = np.zeros_like(doc_scores) for j in range(doc_scores.shape[1]): my_scores = doc_scores[:, j] others = np.delete(doc_scores, j, axis=1) if FROM == 'second': margin_win[:, j] = np.maximum(my_scores - others.max(axis=1), 0) margin_lose[:, j] = np.maximum(others.min(axis=1) - my_scores, 0) if FROM == 'other': margin_win[:, j] = np.maximum(my_scores - others.min(axis=1), 0) margin_lose[:, j] = np.maximum(others.max(axis=1) - my_scores, 0) elif FROM == 'median': margin_win[:, j] = np.maximum(my_scores - np.median(others, axis=1), 0) margin_lose[:, j] = np.maximum(np.median(others, axis=1) - my_scores, 0) return margin_win, margin_lose
def filter_annotations(self, image_group, annotations_group, group): # test all annotations for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): assert(isinstance(annotations, np.ndarray)), '\'load_annotations\' should return a list of numpy arrays, received: {}'.format(type(annotations)) # test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0] invalid_indices = np.where( (annotations[:, 2] <= annotations[:, 0]) | (annotations[:, 3] <= annotations[:, 1]) | (annotations[:, 0] < 0) | (annotations[:, 1] < 0) | (annotations[:, 2] > image.shape[1]) | (annotations[:, 3] > image.shape[0]) )[0] # delete invalid indices if len(invalid_indices): warnings.warn('Image with id {} (shape {}) contains the following invalid boxes: {}.'.format( group[index], image.shape, [annotations[invalid_index, :] for invalid_index in invalid_indices] )) annotations_group[index] = np.delete(annotations, invalid_indices, axis=0) return image_group, annotations_group
def cellslice(UC, P_UC, slicing): if slicing == 1: P_UCS = P_UC UCS = UC else: P_UCS = 0 # points in sliced unit cell UCS = zeros([6, 1]) for i in range(P_UC): if UC[0, i] in (2, 5, 7): # noslicing edges, rotators, diagnostics UCS = hstack((UCS, UC[:, i].reshape(6, 1))) P_UCS += 1 else: UCS = hstack((UCS, UC[:, i].reshape(6, 1).repeat(slicing, 1))) P_UCS += slicing UCS = delete(UCS, 0, axis=1) UCS[1, :] = UCS[1, :]/slicing s = hstack((0, cumsum(UCS[1, :]))) return s, UCS, P_UCS
def test_fit_to_less_width(self): """Fit a tensor to a smalles width (i.e. trimming). Given a 3D tensor of shape [batch, length, width], apply the `ops.fit()` operator to it with the a smaller `width` as the target one and check that the last axis of the tensor have been deleted. """ batch = 2 length = 5 width = 4 fit_width = 3 delta = width - fit_width shape = [None, None, None] input_ = tf.placeholder(dtype=tf.float32, shape=shape) output = ops.fit(input_, fit_width) input_actual = np.random.rand(batch, length, width) # pylint: disable=I0011,E1101 delete_idx = [width - (i + 1) for i in range(delta)] output_expected = np.delete(input_actual, delete_idx, axis=2) # pylint: disable=I0011,E1101 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) output_actual = sess.run(output, {input_: input_actual}) self.assertAllClose(output_expected, output_actual)
def prepare_data(img_folder): X, Y, captcha_text = vecmp.load_dataset(folder=img_folder) # invert and normalize to [0,1] #X = (255- Xdata)/255.0 # standarization # compute mean across the rows, sum elements from each column and divide x_mean = X.mean(axis=0) x_std = X.std(axis=0) X = (X - x_mean) / (x_std + 0.00001) test_size = min(1000, X.shape[0]) random_idx = np.random.choice(X.shape[0], test_size, replace=False) test_X = X[random_idx, :] test_Y = Y[random_idx, :] X = np.delete(X, random_idx, axis=0) Y = np.delete(Y, random_idx, axis=0) return (X,Y,test_X,test_Y)
def __init__(self, table,reg=False,lamda=0): """Initializes Class for Linear Regression Parameters ---------- table : ndarray(n-rows,m-features + 1) Numerical training data, last column as training values reg : Boolean Set True to enable regularization, false by default """ #regularization parameters self.reg = reg self.lamda = lamda self.num_training = np.shape(table)[0] # remove the last column from training data to extract features data self.X = np.delete(table, -1, 1) # add a column of ones in front of the training data self.X = np.insert(self.X, 0, np.ones(self.num_training), axis=1) self.num_features = np.shape(self.X)[1] # extract the values of the training set from the provided data self.y = table[:, self.num_features - 1] # create parameters and initialize to 1 self.theta = np.ones(self.num_features)
def compute_cost(self): """Computes cost based on the current values of the parameters Returns ------- cost : float Cost of the selection of current set of parameters """ hypothesis = LogisticRegression.sigmoid(np.dot(self.X, self.theta)) #new ndarray to prevent intercept from theta array to be changed theta=np.delete(self.theta,0) #regularization term reg = (self.lamda/2*self.num_training)*np.sum(np.power(theta,2)) cost = -(np.sum(self.y * np.log(hypothesis) + (1 - self.y) * (np.log(1 - hypothesis)))) / self.num_training #if regularization is true, add regularization term and return cost if self.reg: return cost + reg return cost
def unpad(matrix): ''' Strip off a column (e.g. of ones). Transform from: array([[1., 2., 3., 1.], [2., 3., 4., 1.], [5., 6., 7., 1.]]) to: array([[1., 2., 3.], [2., 3., 4.], [5., 6., 7.]]) ''' if matrix.ndim != 2 or matrix.shape[1] != 4: raise ValueError("Invalid shape %s: unpad expects nx4" % (matrix.shape,)) if not all(matrix[:, 3] == 1.): raise ValueError('Expected a column of ones') return np.delete(matrix, 3, axis=1)
def BFS(self, start, fs=None): ''' Returns the BFS tree for the graph starting from start ''' to_be_processed = np.array([start], dtype=np.int) known = np.array([], dtype=np.int) tree = np.array([], dtype=object) if fs is None: fs = self.FSs while len(to_be_processed) > 0: # pop current_node = to_be_processed[-1] to_be_processed = np.delete(to_be_processed, -1) for node in fs[current_node]: if node not in known: known = np.append(known, node) tree = np.append(tree, None) tree[-1] = (current_node, node) # push to_be_processed = np.insert(to_be_processed, 0, node) return tree
def DFS(self, start, fs=None): ''' Returns the DFS tree for the graph starting from start ''' to_be_processed = np.array([start], dtype=np.int) known = np.array([], dtype=np.int) tree = np.array([], dtype=object) if fs is None: fs = self.FSs while len(to_be_processed) > 0: # pop current_node = to_be_processed[0] to_be_processed = np.delete(to_be_processed, 0) for node in fs[current_node]: if node not in known: known = np.append(known, node) tree = np.append(tree, None) tree[-1] = (current_node, node) # push to_be_processed = np.insert(to_be_processed, 0, node) return tree
def topological_sort(self): ''' Returns a list topological sorted nodes ''' if self.is_cyclic(self.FSs): print 'cannot apply labels, graph contains cycles' return big_l = [] # Empty list that will contain the sorted elements # Set of all nodes with no incoming edges big_s = set([0]) bs_copy = self.BSs.copy() while len(big_s) > 0: n = big_s.pop() big_l.append(n) for m in self.FSs[n]: bs_copy[m] = np.delete(bs_copy[m], np.where(bs_copy[m] == n)) # bs_copy[m].remove(n) if len(bs_copy[m]) == 0: big_s.add(int(m)) return big_l
def _mask_clip(self, row_or_col): ''' Cuts out items from matrix that do not contain at least k values on axis=0 ''' mat = self.mat k = self.k lil = mat.tolil() to_remove = [] for idx, i in enumerate(lil.rows): if len(i) < k: to_remove.append(idx) lil.rows = np.delete(lil.rows, to_remove) lil.data = np.delete(lil.data, to_remove) if row_or_col == 'row': self.row_idx = np.delete(range(lil.shape[0]), to_remove) elif row_or_col == 'col': self.col_idx = np.delete(range(lil.shape[0]), to_remove) remaining = lil.shape[0] - len(to_remove) lil = lil[:remaining] self.mat = lil return self
def __call__(self, index_list, padded_value=-1): """ Args: index_list (np.ndarray): list of word indices. Batch size 1 is expected. padded_value (int): the value used for padding Returns: word_list (list): list of words """ # Remove padded values assert type(index_list) == np.ndarray, 'index_list should be np.ndarray.' index_list = np.delete(index_list, np.where(index_list == -1), axis=0) # Convert from indices to the corresponding words word_list = list(map(lambda x: self.map_dict[x], index_list)) return word_list
def __call__(self, index_list, padded_value=-1): """ Args: index_list (list): phone indices padded_value (int): the value used for padding Returns: str_phone (string): a sequence of phones """ # Remove padded values assert type(index_list) == np.ndarray, 'index_list should be np.ndarray.' index_list = np.delete(index_list, np.where(index_list == -1), axis=0) # Convert from indices to the corresponding phones phone_list = list(map(lambda x: self.map_dict[x], index_list)) str_phone = ' '.join(phone_list) return str_phone
def buildTree(self, data, features): classification = data[:, -1] uniqueValues = set(classification) if len(uniqueValues) == 1: return classification[0] if len(data[0]) == 1: return self.majorityCnt(classification) infomatinoGain = InformationGain() bestFeature = infomatinoGain.chooseBestFeatureToSplit(data) bestFeatureLabel = features[bestFeature] decisionTree = {bestFeatureLabel: {}} featureValues = set(data[:, bestFeature]) tmpFeatures = np.delete(features, bestFeature, axis=0) for value in featureValues: subData = infomatinoGain.splitData(data, bestFeature, value) decisionTree[bestFeatureLabel][value] = self.buildTree(subData, tmpFeatures) return decisionTree