我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用scipy.cluster.vq.whiten()。
def k_means_cluster_Predict(data_list,info): array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))]) ks = list(range(1,len(info))) KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks] BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans] ks_picked=ks[BIC.index(max(BIC))] if ks_picked==1: return [data_list] else: out=[] std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])] whitened = whiten(array_diagnal) centroids, distortion=kmeans(whitened,ks_picked) idx,_= vq(whitened,centroids) for x in range(ks_picked): group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]] out.append(group1) return out
def kmeans_numpy(d, headers, K, whiten=True): # assign to A the result of getting the data from your Data object A = d.get_data(headers) # assign to W the result of calling vq.whiten on A W = vq.whiten(A) # assign to codebook, bookerror the result of calling vq.kmeans with W and K codebook, bookerror = vq.kmeans(W, K) # assign to codes, error the result of calling vq.vq with W and the codebook codes, error = vq.vq(W, codebook) # return codebook, codes, and error return codebook, codes, error # prep the k-means clustering algorithm by getting initial cluster means
def vector_quantize(data_dict, vs, bins): codebooks = {} vq_data = {} for size in vs.keys(): all_size_data = [] for disease in vs[size]: all_size_data.extend(data_dict[disease]) #whitened = sp.whiten(all_size_data) #codebooks[size] = sp.kmeans(whitened, bins)[0] codebooks[size] = sp.kmeans(np.asarray(all_size_data), bins)[0] pickle.dump(codebooks,open("all_codebooks.pkl","wb")) for dis in data_dict.keys(): n = len(data_dict[dis]) m = len(data_dict[dis][0]) vq_data[dis] = map(str,sp.vq(np.reshape(data_dict[dis],(n,m)), codebooks[len(data_dict[dis][0])])[0]) return vq_data
def k_means_cluster(data_list): if max(data_list[0])-min(data_list[0])>10 and max(data_list[1])-min(data_list[1])>10: array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))]) ks = list(range(1,min([5,len(data_list[0])+1]))) KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks] KMeans_predict=[cluster.KMeans(n_clusters = i, init="k-means++").fit_predict(array_diagnal) for i in ks] BIC=[] BIC_rec=[] for x in ks: if KMeans_predict[x-1].max()<x-1: continue else: BIC_i=compute_bic(KMeans[x-1],array_diagnal) if abs(BIC_i)<10**8: BIC.append(BIC_i) BIC_rec.append(x) #BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans] #ks_picked=ks[BIC.index(max(BIC))] ks_picked=BIC_rec[BIC.index(max(BIC))] if ks_picked==1: return [data_list] else: out=[] std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])] whitened = whiten(array_diagnal) centroids, distortion=kmeans(whitened,ks_picked) idx,_= vq(whitened,centroids) for x in range(ks_picked): group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]] out.append(group1) return out else: return [data_list]
def get_mfcc_feat(self): # creating codebook with all models mfcc_feats = None for filename in glob.iglob('../data/voices/*.wav'): print filename (rate, sig) = wav.read(filename) # MFCC Features. Each row corresponds to MFCC for a frame mfcc_person = mfcc(sig.astype(np.float64), rate) if mfcc_feats is None: mfcc_feats = mfcc_person else: mfcc_feats = np.concatenate((mfcc_feats, mfcc_person), axis=0) # Normalize the features whitened = whiten(mfcc_feats) self.codebook, labeled_obs = kmeans2(data=whitened, k=3)
def argparser(): try: import argparse except ImportError: import compat.argparse as argparse ap=argparse.ArgumentParser() ap.add_argument('vectors', nargs=1, metavar='FILE', help='word vectors') ap.add_argument('-a', '--approximate', default=False, action='store_true', help='filter by approximate similarity (with -t)') ap.add_argument('-i', '--min-index', default=0, type=int, help='index of first word (default 0)') ap.add_argument('-M', '--metric', default=DEFAULT_METRIC, choices=sorted(metrics.keys()), help='distance metric to apply') ap.add_argument('-n', '--normalize', default=False, action='store_true', help='normalize vectors to unit length') ap.add_argument('-r', '--max-rank', metavar='INT', default=None, type=int, help='only consider r most frequent words') ap.add_argument('-t', '--threshold', metavar='FLOAT', default=None, type=float, help='only output distances <= t') ap.add_argument('-T', '--tolerance', metavar='FLOAT', default=0.1, type=float, help='approximation tolerace (with -a)') ap.add_argument('-w', '--whiten', default=False, action='store_true', help='normalize features to unit variance ') ap.add_argument('-W', '--words', default=False, action='store_true', help='output words instead of indices') return ap
def process_options(args): options = argparser().parse_args(args) if options.max_rank is not None and options.max_rank < 1: raise ValueError('max-rank must be >= 1') if options.threshold is not None and options.threshold < 0.0: raise ValueError('threshold must be >= 0') if options.tolerance is not None and options.tolerance < 0.0: raise ValueError('tolerance must be >= 0') if options.approximate and not options.threshold: raise ValueError('approximate only makes sense with a threshold') if options.approximate and options.metric != 'cosine': raise NotImplementedError('approximate only supported for cosine') wv = wvlib.load(options.vectors[0], max_rank=options.max_rank) if options.normalize: logging.info('normalize vectors to unit length') wv.normalize() words, vectors = wv.words(), wv.vectors() if options.whiten: # whitening should be implemented in wvlib to support together with # approximate similarity if options.approximate: raise NotImplemenedError logging.info('normalize features to unit variance') vectors = whiten(vectors) return words, vectors, wv, options
def kmeans(d, headers, K, metric, whiten=True, categories=None): '''Takes in a Data object, a set of headers, and the number of clusters to create Computes and returns the codebook, codes and representation errors. If given an Nx1 matrix of categories, it uses the category labels to calculate the initial cluster means. ''' # assign to A the result getting the data given the headers try: A = d.get_data(headers) except AttributeError: A = d if whiten: W = vq.whiten(A) else: W = A codebook = kmeans_init(W, K, categories) # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook codebook, codes, errors = kmeans_algorithm(W, codebook, metric) # return the codebook, codes, and representation error return codebook, codes, errors # test function
def cluster(matrix): whitened = whiten(matrix.todense()) # for x in range(25, 40): # means, distortion = kmeans(whitened, x) # print distortion means, distortion = kmeans(whitened, 30) # pickle.dump(means, open('30means-' + sys.argv[1] + '.pkl', 'wb')) return means, distortion
def kmeans_classify(features, shape, label=True, fill=False): """Run the k-means algorithm.""" print("Starting kmeans") whitened = whiten(features) init = np.array((whitened.min(0), whitened.mean(0), whitened.max(0))) codebook, _ = kmeans(whitened, init) classified, _ = vq(whitened, codebook) print("Finished kmeans") return classified