我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用scipy.cluster.vq.kmeans2()。
def get_mfcc_feat(self): # creating codebook with all models mfcc_feats = None for filename in glob.iglob('../data/voices/*.wav'): print filename (rate, sig) = wav.read(filename) # MFCC Features. Each row corresponds to MFCC for a frame mfcc_person = mfcc(sig.astype(np.float64), rate) if mfcc_feats is None: mfcc_feats = mfcc_person else: mfcc_feats = np.concatenate((mfcc_feats, mfcc_person), axis=0) # Normalize the features whitened = whiten(mfcc_feats) self.codebook, labeled_obs = kmeans2(data=whitened, k=3)
def _calculate_gap(self, X: Union[pd.DataFrame, np.ndarray], n_refs: int, n_clusters: int) -> Tuple[float, int]: """ Calculate the gap value of the given data, n_refs, and number of clusters. Return the resutling gap value and n_clusters """ # Holder for reference dispersion results ref_dispersions = np.zeros(n_refs) # type: np.ndarray # For n_references, generate random sample and perform kmeans getting resulting dispersion of each loop for i in range(n_refs): # Create new random reference set random_data = np.random.random_sample(size=X.shape) # type: np.ndarray # Fit to it, getting the centroids and labels, and add to accumulated reference dispersions array. centroids, labels = kmeans2(data=random_data, k=n_clusters, iter=10, minit='points') # type: Tuple[np.ndarray, np.ndarray] dispersion = self._calculate_dispersion(X=random_data, labels=labels, centroids=centroids) # type: float ref_dispersions[i] = dispersion # Fit cluster to original data and create dispersion calc. centroids, labels = kmeans2(data=X, k=n_clusters, iter=10, minit='points') dispersion = self._calculate_dispersion(X=X, labels=labels, centroids=centroids) # Calculate gap statistic gap_value = np.log(np.mean(ref_dispersions)) - np.log(dispersion) return gap_value, int(n_clusters)
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype("float64"), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
def init_hypers(self, y_train): """Summary Returns: TYPE: Description Args: y_train (TYPE): Description """ N = self.N M = self.M Din = self.Din Dout = self.Dout x_train = self.x_train if N < 10000: centroids, label = kmeans2(x_train, M, minit='points') else: randind = np.random.permutation(N) centroids = x_train[randind[0:M], :] zu = centroids if N < 10000: X1 = np.copy(x_train) else: randind = np.random.permutation(N) X1 = X[randind[:5000], :] x_dist = cdist(X1, X1, 'euclidean') triu_ind = np.triu_indices(N) ls = np.zeros((Din, )) d2imed = np.median(x_dist[triu_ind]) for i in range(Din): ls[i] = 2 * np.log(d2imed + 1e-16) sf = np.log(np.array([0.5])) params = dict() params['sf'] = sf params['ls'] = ls params['zu'] = zu params['sn'] = np.log(0.01) return params
def Kmeans(data, num_K): centroid, label = kmeans2( data=data, k=num_K, iter=100, minit='points', missing='warn') return centroid, label
def init_hypers(self, x_train=None, key_suffix=''): """Summary Args: x_train (None, optional): Description key_suffix (str, optional): Description Returns: TYPE: Description """ # dict to hold hypers, inducing points and parameters of q(U) N = self.N M = self.M Din = self.Din Dout = self.Dout if x_train is None: ls = np.log(np.ones((Din, )) + 0.1 * np.random.rand(Din, )) sf = np.log(np.array([1])) zu = np.tile(np.linspace(-1, 1, M).reshape((M, 1)), (1, Din)) else: if N < 10000: centroids, label = kmeans2(x_train, M, minit='points') else: randind = np.random.permutation(N) centroids = x_train[randind[0:M], :] zu = centroids if N < 10000: X1 = np.copy(x_train) else: randind = np.random.permutation(N) X1 = X[randind[:5000], :] x_dist = cdist(X1, X1, 'euclidean') triu_ind = np.triu_indices(N) ls = np.zeros((Din, )) d2imed = np.median(x_dist[triu_ind]) for i in range(Din): ls[i] = np.log(d2imed + 1e-16) sf = np.log(np.array([0.5])) params = dict() params['sf' + key_suffix] = sf params['ls' + key_suffix] = ls params['zu' + key_suffix] = zu return params
def bounding_ellipsoids(points, pointvol=0., vol_dec=0.5, vol_check=2.): """ Calculate a set of ellipsoids that bound the collection of points. Parameters ---------- points : `~numpy.ndarray` with shape (npoints, ndim) A set of coordinates. pointvol : float, optional Volume represented by a single point. When provided, used to set a minimum bound on the ellipsoid volume as `npoints * pointvol`. Default is `0.`. vol_dec : float, optional The required fractional reduction in volume after splitting an ellipsoid in order to to accept the split. Default is `0.5`. vol_check : float, optional The factor used to when checking whether the volume of the original bounding ellipsoid is large enough to warrant more trial splits via `ell.vol > vol_check * npoints * pointvol`. Default is `2.0`. Returns ------- mell : :class:`MultiEllipsoid` object The :class:`MultiEllipsoid` object used to bound the collection of points. """ if not HAVE_KMEANS: raise ValueError("scipy.cluster.vq.kmeans2 is required to compute " "ellipsoid decompositions.") # pragma: no cover # Calculate the bounding ellipsoid for the points possibly # enlarged to a minimum volume. ell = bounding_ellipsoid(points, pointvol=pointvol) # Recursively split the bounding ellipsoid until the volume of each # split no longer decreases by a factor of `vol_dec`. ells = _bounding_ellipsoids(points, ell, pointvol=pointvol, vol_dec=vol_dec, vol_check=vol_check) return MultiEllipsoid(ells=ells)