Python scipy.cluster.vq 模块,vq() 实例源码

我们从Python开源项目中,提取了以下37个代码示例,用于说明如何使用scipy.cluster.vq.vq()

项目:vapor    作者:mills-lab    | 项目源码 | 文件源码
def k_means_cluster_Predict(data_list,info):
    array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
    ks = list(range(1,len(info)))
    KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
    BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
    ks_picked=ks[BIC.index(max(BIC))]
    if ks_picked==1:
        return [data_list]
    else:
        out=[]
        std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
        whitened = whiten(array_diagnal)
        centroids, distortion=kmeans(whitened,ks_picked)
        idx,_= vq(whitened,centroids)
        for x in range(ks_picked):
            group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
            out.append(group1)
        return out
项目:data-analysis    作者:ymohanty    | 项目源码 | 文件源码
def kmeans_numpy(d, headers, K, whiten=True):
    # assign to A the result of getting the data from your Data object
    A = d.get_data(headers)

    # assign to W the result of calling vq.whiten on A
    W = vq.whiten(A)

    # assign to codebook, bookerror the result of calling vq.kmeans with W and K
    codebook, bookerror = vq.kmeans(W, K)

    # assign to codes, error the result of calling vq.vq with W and the codebook
    codes, error = vq.vq(W, codebook)

    # return codebook, codes, and error
    return codebook, codes, error


# prep the k-means clustering algorithm by getting initial cluster means
项目:GenefyHMM    作者:adit-39    | 项目源码 | 文件源码
def vector_quantize(data_dict, vs, bins):
    codebooks = {}
    vq_data = {}
    for size in vs.keys():
        all_size_data = []
        for disease in vs[size]:
            all_size_data.extend(data_dict[disease])
        #whitened = sp.whiten(all_size_data)
        #codebooks[size] = sp.kmeans(whitened, bins)[0]
        codebooks[size] = sp.kmeans(np.asarray(all_size_data), bins)[0]
    pickle.dump(codebooks,open("all_codebooks.pkl","wb"))
    for dis in data_dict.keys():
        n = len(data_dict[dis])
        m = len(data_dict[dis][0])
        vq_data[dis] = map(str,sp.vq(np.reshape(data_dict[dis],(n,m)), codebooks[len(data_dict[dis][0])])[0])
    return vq_data
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def get_histogram(self, data): 
        """
        Project the descriptions on to the codebook/vocabulary, 
        returning the histogram of words
        [N x 1] => [1 x K] histogram
        """
        if self.method == 'vq' or self.method == 'bow': 
            code = self.get_code(data)
            code_hist = self.bow(data, code, self.K)
        elif self.method == 'vlad': 
            code = self.get_code(data)
            code_hist = self.vlad(data, code)
        elif self.method == 'fisher': 
            code = self.get_code(data)
            code_hist = self.fisher(data, code)
        else: 
            raise NotImplementedError('''Histogram method %s not implemented. '''
                                      '''Use vq/bow or vlad or fisher!''' % self.method)            
        return code_hist
项目:vapor    作者:mills-lab    | 项目源码 | 文件源码
def k_means_cluster(data_list):
    if max(data_list[0])-min(data_list[0])>10 and max(data_list[1])-min(data_list[1])>10:
        array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
        ks = list(range(1,min([5,len(data_list[0])+1])))
        KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
        KMeans_predict=[cluster.KMeans(n_clusters = i, init="k-means++").fit_predict(array_diagnal) for i in ks]
        BIC=[]
        BIC_rec=[]
        for x in ks:
            if KMeans_predict[x-1].max()<x-1: continue
            else:
                BIC_i=compute_bic(KMeans[x-1],array_diagnal)
                if abs(BIC_i)<10**8:
                    BIC.append(BIC_i)
                    BIC_rec.append(x)
        #BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
        #ks_picked=ks[BIC.index(max(BIC))]
        ks_picked=BIC_rec[BIC.index(max(BIC))]
        if ks_picked==1:
            return [data_list]
        else:
            out=[]
            std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
            whitened = whiten(array_diagnal)
            centroids, distortion=kmeans(whitened,ks_picked)
            idx,_= vq(whitened,centroids)
            for x in range(ks_picked):
                group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
                out.append(group1)
            return out
    else:
        return [data_list]
项目:Caffe-Python-Tutorial    作者:tostq    | 项目源码 | 文件源码
def quantize_net(net, codebook):
    layers = codebook.keys()
    codes_W = {}
    print "================Perform quantization=============="
    for layer in layers:
        print "Quantize layer:", layer
        W = net.params[layer][0].data
        codes, _ = scv.vq(W.flatten(), codebook[layer]) # ???????????
        # codes = stochasitc_quantize2(W.flatten(), codebook[layer]) # ?????????
        codes = np.reshape(codes, W.shape)
        codes_W[layer] = np.array(codes, dtype=np.uint32)
        # ?????????????
        W_q = np.reshape(codebook[layer][codes], W.shape)
        np.copyto(net.params[layer][0].data, W_q)

    return codes_W
项目:Caffe-Python-Tutorial    作者:tostq    | 项目源码 | 文件源码
def recover_all(net, dir_t, idx=0):
    layers = net.params.keys()
    net.copy_from(dir_t + 'caffemodel%d' % idx)
    codebook = pickle.load(open(dir_t + 'codebook%d' % idx))
    maskCode = {}
    codeDict = {}
    for layer in layers:
        W = net.params[layer][0].data
        # ????
        codes, _ = scv.vq(W.flatten(), codebook[layer])
        # ????????
        maskCode[layer] = np.reshape(codes, W.shape)
        codeBookSize = len(codebook[layer])
        a = maskCode[layer].flatten()
        b = xrange(len(a))

        codeDict[layer] = {}
        for i in xrange(len(a)):
            # codeDict????????????maskCode???????????
            codeDict[layer].setdefault(a[i], []).append(b[i])

    return codebook, maskCode, codeDict
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def voxel_cell_vertex_extraction(img,**kwargs):

    shape = np.array(img.shape)
    neighborhood_img = []
    for x in np.arange(-1,2):
        for y in np.arange(-1,2):
            for z in np.arange(-1,2):
                neighborhood_img.append(img[1+x:shape[0]-1+x,1+y:shape[1]-1+y,1+z:shape[2]-1+z])
    neighborhood_img = np.sort(np.transpose(neighborhood_img,(1,2,3,0))).reshape((shape-2).prod(),27)
    neighborhoods = np.array(map(np.unique,neighborhood_img))
    neighborhood_size = np.array(map(len,neighborhoods)).reshape(shape[0]-2,shape[1]-2,shape[2]-2)
    neighborhoods = np.array(neighborhoods).reshape(shape[0]-2,shape[1]-2,shape[2]-2)

    vertex_coords = np.where(neighborhood_size==4)
    vertex_points = np.transpose(vertex_coords)+1
    vertex_cells = np.array([p for p in neighborhoods[vertex_coords]],int)

    unique_cell_vertices = array_unique(vertex_cells)
    vertices_matching = vq(vertex_cells,unique_cell_vertices)[0]
    unique_cell_vertex_points = np.array([np.mean(vertex_points[vertices_matching == v],axis=0) for v in xrange(len(unique_cell_vertices))])

    cell_vertex_dict = dict(zip([tuple(c) for c in unique_cell_vertices],list(unique_cell_vertex_points)))

    return cell_vertex_dict
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def bow_histogram(data, codebook, pts=None, shape=None): 
    code, dist = vq(data, codebook)
    code_hist = bow(data, code, codebook.shape[0])
    return code_hist
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def get_code(self, data): 
        """
        Transform the [N x D] data to [N x 1] where n_i \in {1, ... , K}
        returns the cluster indices
        """
        if self.quantizer == 'vq': 
            code, dist = vq(data, self.codebook)
        elif self.quantizer == 'kdtree': 
            dist, code = self.index.query(data, k=1)
        else: 
            raise NotImplementedError('Quantizer %s not implemented. Use vq or kdtree!' % self.quantizer)

        return code
项目:Caffe-Python-Tutorial    作者:tostq    | 项目源码 | 文件源码
def quantize_net_with_dict(net, layers, codebook, use_stochastic=False, timing=False):
    start_time = time.time()
    codeDict = {} # ?????????????
    maskCode = {} # ??????
    for layer in layers:
        print "Quantize layer:", layer
        W = net.params[layer][0].data
        if use_stochastic:
            codes = stochasitc_quantize2(W.flatten(), codebook[layer])
        else:
            codes, _ = scv.vq(W.flatten(), codebook[layer])
        W_q = np.reshape(codebook[layer][codes], W.shape)
        net.params[layer][0].data[...] = W_q

        maskCode[layer] = np.reshape(codes, W.shape)
        codeBookSize = len(codebook[layer])
        a = maskCode[layer].flatten()
        b = xrange(len(a))

        codeDict[layer] = {}
        for i in xrange(len(a)):
            codeDict[layer].setdefault(a[i], []).append(b[i])

    if timing:
        print "Update codebook time:%f" % (time.time() - start_time)

    return codeDict, maskCode
项目:noteshrink    作者:mzucker    | 项目源码 | 文件源码
def apply_palette(img, palette, options):

    '''Apply the pallete to the given image. The first step is to set all
background pixels to the background color; then, nearest-neighbor
matching is used to map each foreground color to the closest one in
the palette.

    '''

    if not options.quiet:
        print('  applying palette...')

    bg_color = palette[0]

    fg_mask = get_fg_mask(bg_color, img, options)

    orig_shape = img.shape

    pixels = img.reshape((-1, 3))
    fg_mask = fg_mask.flatten()

    num_pixels = pixels.shape[0]

    labels = np.zeros(num_pixels, dtype=np.uint8)

    labels[fg_mask], _ = vq(pixels[fg_mask], palette)

    return labels.reshape(orig_shape[:-1])

######################################################################
项目:mmfeat    作者:douwekiela    | 项目源码 | 文件源码
def quantize(self):
        clusters = range(self.centroids.shape[0] + 1)
        histograms = {}
        for fname in sorted(self.data.keys()):
            if self.data[fname] is None: continue
            idx,_ = vq(self.data[fname], self.centroids)
            histograms[fname], _ = np.histogram(idx, bins=clusters, normed=self.normalize)
        return histograms
项目:mmfeat    作者:douwekiela    | 项目源码 | 文件源码
def sequences(self):
        sequences = {}
        for fname in sorted(self.data.keys()):
            if self.data[fname] is None: continue
            idx,_ = vq(self.data[fname], self.centroids)
            sequences[fname] = idx
        return sequences
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def triangle_topomesh(triangles, positions, **kwargs):

    triangles = np.array(triangles)
    positions = array_dict(positions)

    edges = array_unique(np.sort(np.concatenate(triangles[:,triangle_edge_list],axis=0)))

    triangle_edges = np.sort(np.concatenate(triangles[:,triangle_edge_list]))

    start_time = time()
    print "--> Generating triangle topomesh"

    triangle_edge_matching = vq(triangle_edges,edges)[0]

    triangle_topomesh = PropertyTopomesh(3)
    for c in np.unique(triangles):
        triangle_topomesh.add_wisp(0,c)
    for e in edges:
        eid = triangle_topomesh.add_wisp(1)
        for pid in e:
            triangle_topomesh.link(1,eid,pid)
    for t in triangles:
        fid = triangle_topomesh.add_wisp(2)
        for eid in triangle_edge_matching[3*fid:3*fid+3]:
            triangle_topomesh.link(2,fid,eid)
    triangle_topomesh.add_wisp(3,0)
    for fid in triangle_topomesh.wisps(2):
        triangle_topomesh.link(3,0,fid)
    triangle_topomesh.update_wisp_property('barycenter',0,positions.values(np.unique(triangles)),keys=np.unique(triangles))    

    end_time = time()
    print "<-- Generating triangle topomesh [",end_time-start_time,"s]"

    return triangle_topomesh
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def quad_topomesh(quads, positions, faces_as_cells=False, **kwargs):
    quads = np.array(quads)
    positions = array_dict(positions)

    edges = array_unique(np.sort(np.concatenate(quads[:,quad_edge_list],axis=0)))

    quad_edges = np.sort(np.concatenate(quads[:,quad_edge_list]))

    start_time = time()
    print "--> Generating quad topomesh"

    quad_edge_matching = vq(quad_edges,edges)[0]

    quad_topomesh = PropertyTopomesh(3)
    for c in np.unique(quads):
        quad_topomesh.add_wisp(0,c)
    for e in edges:
        eid = quad_topomesh.add_wisp(1)
        for pid in e:
            quad_topomesh.link(1,eid,pid)
    for q in quads:
        fid = quad_topomesh.add_wisp(2)
        for eid in quad_edge_matching[4*fid:4*fid+4]:
            quad_topomesh.link(2,fid,eid)
    if not faces_as_cells:
        quad_topomesh.add_wisp(3,0)
        for fid in quad_topomesh.wisps(2):
            quad_topomesh.link(3,0,fid)
    else:
        for fid in quad_topomesh.wisps(2):
            quad_topomesh.add_wisp(3,fid)
            quad_topomesh.link(3,fid,fid)

    quad_topomesh.update_wisp_property('barycenter',0,positions.values(np.unique(quads)),keys=np.unique(quads))    

    end_time = time()
    print "<-- Generating quad topomesh [",end_time-start_time,"s]"

    return quad_topomesh
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def implicit_surface(density_field,size,resolution,iso=0.5):
    import numpy as np
    from scipy.cluster.vq                       import kmeans, vq
    from openalea.container import array_dict

    from skimage.measure import marching_cubes
    surface_points, surface_triangles = marching_cubes(density_field,iso)

    surface_points = (np.array(surface_points))*(size*resolution/np.array(density_field.shape)) - size*resolution/2.

    points_ids = np.arange(len(surface_points))
    points_to_delete = []
    for p,point in enumerate(surface_points):
        matching_points = np.sort(np.where(vq(surface_points,np.array([point]))[1] == 0)[0])
        if len(matching_points) > 1:
            points_to_fuse = matching_points[1:]
            for m_p in points_to_fuse:
                surface_triangles[np.where(surface_triangles==m_p)] = matching_points[0]
                points_to_delete.append(m_p)

    points_to_delete = np.unique(points_to_delete)
    print len(points_to_delete),"points deleted"
    surface_points = np.delete(surface_points,points_to_delete,0)
    points_ids = np.delete(points_ids,points_to_delete,0)
    surface_triangles = array_dict(np.arange(len(surface_points)),points_ids).values(surface_triangles)

    for p,point in enumerate(surface_points):
        matching_points = np.where(vq(surface_points,np.array([point]))[1] == 0)[0]
        if len(matching_points) > 1:
            print p,point
            raw_input()

    triangles_to_delete = []
    for t,triangle in enumerate(surface_triangles):
        if len(np.unique(triangle)) < 3:
            triangles_to_delete.append(t)
        # elif triangle.max() >= len(surface_points):
        #     triangles_to_delete.append(t)
    surface_triangles = np.delete(surface_triangles,triangles_to_delete,0)

    return surface_points, surface_triangles
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def performance_measure(reference_set,experimental_set,measure='jaccard_index'):
    VP = (vq(experimental_set,reference_set)[1]==0).sum()
    FP = (vq(experimental_set,reference_set)[1]>0).sum()
    FN = (vq(reference_set,experimental_set)[1]>0).sum()

    if measure == 'true_positive':
        return VP
    elif measure == 'precision':
        return VP/float(VP+FP) 
    elif measure == 'recall':
        return VP/float(VP+FN) 
    elif measure == 'dice_index':
        return 2*VP / float(2*VP+FP+FN)
    elif measure == 'jaccard_index':
        return VP/float(VP+FP+FN)
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def jaccard_index(reference_set,experimental_set):
    VP = (vq(experimental_set,reference_set)[1]==0).sum()
    FP = (vq(experimental_set,reference_set)[1]>0).sum()
    FN = (vq(reference_set,experimental_set)[1]>0).sum()
    return VP/float(VP+FP+FN)
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def density_plot(figure,X,Y,color,xlabel="",ylabel="",n_points=10,linewidth=1,marker_size=40.,alpha=1.0,label=""):
    font = fm.FontProperties(family = 'Trebuchet', weight ='light')
    #font = fm.FontProperties(family = 'CenturyGothic',fname = '/Library/Fonts/Microsoft/Century Gothic', weight ='light')
    figure.patch.set_facecolor('white')
    axes = figure.add_subplot(111)
    # axes.plot(X,Y,linewidth=1,color=tuple(color2),alpha=0.2)
    # ratios = (Y-Y.min())/(Y.max()-Y.min())
    # X_min = X.mean()-3*X.std()
    # X_max = X.mean()+3*X.std()
    X_min = np.percentile(X,100/n_points)
    X_max = np.percentile(X,100 - 100/n_points)
    Y_min = np.percentile(Y,100/n_points)
    # Y_min = Y.mean()-3*Y.std()
    Y_max = np.percentile(Y,100 - 100/n_points)

    X_grid = np.linspace(X_min,X_max,n_points)
    Y_grid = np.linspace(Y_min,Y_max,n_points)

    X_sampled = X_grid[vq(X,X_grid)[0]]
    Y_sampled = Y_grid[vq(Y,Y_grid)[0]]

    point_density = {}
    for x in np.unique(X_sampled):
        point_count = nd.sum(np.ones_like(np.where(X_sampled==x)),Y_sampled[np.where(X_sampled==x)],index=np.unique(Y_sampled))
        for i,y in enumerate(np.unique(Y_sampled)):
            point_density[(x,y)] = point_count[i]/len(Y)

    point_area = np.array([np.pi*10.0*marker_size*point_density[(x,y)]/np.array(point_density.values()).max() for x,y in zip(X_sampled,Y_sampled)])
    #colors = np.random.rand(len(X))
    colors = np.array([point_density[(x,y)]/np.array(point_density.values()).max() * color for x,y in zip(X_sampled,Y_sampled)])
    colors += np.array([(1-point_density[(x,y)]/np.array(point_density.values()).max()) * np.ones(3) for x,y in zip(X_sampled,Y_sampled)])

    axes.scatter(X_sampled,Y_sampled,s=point_area,c=colors,linewidth=linewidth,alpha=alpha,label=label)
    axes.set_xlim(X_min,X_max)
    axes.set_xlabel(xlabel,fontproperties=font, size=10, style='italic')
    axes.set_xticklabels(axes.get_xticks(),fontproperties=font, size=12)
    axes.set_ylim(Y_min,Y_max)
    axes.set_ylabel(ylabel, fontproperties=font, size=10, style='italic')
    axes.set_yticklabels(axes.get_yticks(),fontproperties=font, size=12)
项目:simple-linear-regression    作者:williamd4112    | 项目源码 | 文件源码
def kmeans(x, k):
    centroids, dist = _kmeans(x, k)
    idx, _ = vq(x,centroids)
    return idx, centroids, dist
项目:data-analysis    作者:ymohanty    | 项目源码 | 文件源码
def kmeans(d, headers, K, metric, whiten=True, categories=None):
    '''Takes in a Data object, a set of headers, and the number of clusters to create
    Computes and returns the codebook, codes and representation errors.
    If given an Nx1 matrix of categories, it uses the category labels
    to calculate the initial cluster means.
    '''

    # assign to A the result getting the data given the headers
    try:
        A = d.get_data(headers)
    except AttributeError:
        A = d

    if whiten:
        W = vq.whiten(A)
    else:
        W = A

    codebook = kmeans_init(W, K, categories)

    # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
    codebook, codes, errors = kmeans_algorithm(W, codebook, metric)

    # return the codebook, codes, and representation error
    return codebook, codes, errors


# test function
项目:speaker_recognizer_robot    作者:shrutiyer    | 项目源码 | 文件源码
def process_audio(self, isTraining, sound_file):
        """ Takes in a wav file and outputs labeled observations of the audio
            isTraining: bool that is true if the model is being trained
        """
        (rate, sig) = wav.read(sound_file)
        sig = sig.astype(np.float64)
        # MFCC Features. Each row corresponds to MFCC for a frame
        mfcc_feat = mfcc(sig, rate)

        labeled_obs = vq(mfcc_feat, self.codebook)[0]

        self.voice_obs = labeled_obs
项目:clitorisvulgaris    作者:fhoehl    | 项目源码 | 文件源码
def find_dominant_colors(image):
    """Cluster the colors of the image in CLUSTER_NUMBER of clusters. Returns
    an array of dominant colors reverse sorted by cluster size.
    """

    array = img_as_float(fromimage(image))

    # Reshape from MxNx4 to Mx4 array
    array = array.reshape(scipy.product(array.shape[:2]), array.shape[2])

    # Remove transparent pixels if any (channel 4 is alpha)
    if array.shape[-1] > 3:
        array = array[array[:, 3] == 1]

    # Finding centroids (centroids are colors)
    centroids, _ = kmeans(array, CLUSTER_NUMBER)

    # Allocate pixel to a centroid cluster
    observations, _ = vq(array, centroids)

    # Calculate the number of pixels in a cluster
    histogram, _ = scipy.histogram(observations, len(centroids))

    # Sort centroids by number of pixels in their cluster
    sorted_centroids = sorted(zip(centroids, histogram),
                              key=lambda x: x[1],
                              reverse=True)

    sorted_colors = tuple((couple[0] for couple in sorted_centroids))

    return sorted_colors
项目:mht    作者:jonatanolofsson    | 项目源码 | 文件源码
def kmeans_classify(features, shape, label=True, fill=False):
    """Run the k-means algorithm."""
    print("Starting kmeans")
    whitened = whiten(features)
    init = np.array((whitened.min(0), whitened.mean(0), whitened.max(0)))
    codebook, _ = kmeans(whitened, init)
    classified, _ = vq(whitened, codebook)
    print("Finished kmeans")
    return classified
项目:GenefyHMM    作者:adit-39    | 项目源码 | 文件源码
def testHMMs(inp_file="test_input.txt"):
    inp_vals = map(float, open("test_input.txt","r").read().strip().split(','))
    HMMs = pickle.load(open("trained_HMMs_saved.pkl","rb"))
    codebooks = pickle.load(open("all_codebooks.pkl","rb"))
    vs = pickle.load(open("size_mapping.pkl","rb"))

    results = {}

    for size in vs.keys():
        # organize input data into vectors of particular size
        c = 0
        vecs = []
        for i in range(len(inp_vals)/int(size)):
            vecs.append(inp_vals[c:c+size])
            c += size
        #print vecs
        # Vector Quantizing
        n = len(vecs)
        vq_seq = map(str,sp.vq(np.reshape(vecs,(n,size)), codebooks[size])[0])
        if len(vq_seq) > 0:
            diseases = vs[size]
            for disease in diseases:
                HMM_obj = HMM("initial.json")
                HMM_obj.A = HMMs[disease]["A"]
                HMM_obj.B = HMMs[disease]["B"]
                HMM_obj.pi = HMMs[disease]["pi"]
                prob = HMM_obj.forward_scaled(vq_seq)
                results[disease] = math.exp(prob)

    for i in HMMs.keys():
        if not results.has_key(i):
            results[i] = "Not Enough Data to Predict"

    print "RESULTS:"        
    for dis in results.keys():
        print dis.capitalize()+": "+str(results[dis])
项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def cal_vlad(self, descriptors, centers):
        if self.flag is False:
            self.load()
        if self.stdSlr is not None:
            descriptors = self.stdSlr.transform(descriptors)
        dimensions = descriptors[0].shape[0]
        vlad_vector = np.zeros((len(centers),dimensions), dtype=np.float32)
        center_idx, distance = vq(descriptors, centers)
        for i,idx in enumerate(center_idx):
            vlad_vector[idx] += (descriptors[i] - centers[idx])
        vlad_vector = cv2.normalize(vlad_vector)
        vlad_vector = vlad_vector.flatten()
        return vlad_vector
项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def flann(cls, feature, words):
        return vq(feature, words)
项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def flann(cls, feature, words):
        return vq(feature, words)
项目:PyFusionGUI    作者:SyntaxVoid    | 项目源码 | 文件源码
def k_means_clustering(instance_array, n_clusters=9, sin_cos = 1, number_of_starts = 30, seed=None, use_scikit=1,**kwargs):
    '''
    This runs the k-means clustering algorithm as implemented in scipy - change to scikit-learn?

    SH: 7May2013
    '''
    from sklearn.cluster import KMeans
    print 'starting kmeans algorithm, k=%d, retries : %d, sin_cos = %d'%(n_clusters,number_of_starts,sin_cos)
    if sin_cos==1:
        print '  using sine and cosine of the phases'
        sin_cos_instances = np.zeros((instance_array.shape[0],instance_array.shape[1]*2),dtype=float)
        sin_cos_instances[:,::2]=np.cos(instance_array)
        sin_cos_instances[:,1::2]=np.sin(instance_array)
        input_array = sin_cos_instances
        #code_book,distortion = vq.kmeans(sin_cos_instances, n_clusters,iter=number_of_starts)
        #cluster_assignments, point_distances = vq.vq(sin_cos_instances, code_book)
    else:
        print '  using raw phases'
        input_array = instance_array
        #code_book,distortion = vq.kmeans(instance_array, n_clusters,iter=number_of_starts)
        #cluster_assignments, point_distances = vq.vq(instance_array, code_book)
    #pickle.dump(multiple_run_results,file(k_means_output_filename,'w'))
    if use_scikit:
        print 'using scikit learn'
        tmp = KMeans(init='k-means++', n_clusters=n_clusters, n_init = number_of_starts, n_jobs=1, random_state = seed)
        cluster_assignments = tmp.fit_predict(input_array)
        code_book = tmp.cluster_centers_
    else:
        print 'using vq from scipy'
        code_book,distortion = vq.kmeans(input_array, n_clusters,iter=number_of_starts)
        cluster_assignments, point_distances = vq.vq(input_array, code_book)
    if sin_cos:
        cluster_details = {'k_means_centroids_sc':code_book}
    else:
        cluster_details = {'k_means_centroids':code_book}
    return cluster_assignments, cluster_details

##################################################################################
#############################k-means periodic algorithm##############################
项目:tissue_analysis    作者:VirtualPlants    | 项目源码 | 文件源码
def compute_image_property(self, property_name, min_contact_area=None, sub_factor=8.):
        """
        """
        computable_properties = ['barycenter','volume','neighborhood_size','layer','mean_curvature','gaussian_curvature']
        try:
            assert property_name in computable_properties
        except:
            print "Property \""+property_name+"\" can not be computed on image"
            print "Try with one of the following :"
            for p in computable_properties:
                print "  * "+p
        else:
            if self._image is not None:
                if property_name in ['barycenter','volume']:
                    graph = graph_from_image(self._image,background=self.background,spatio_temporal_properties=[property_name],ignore_cells_at_stack_margins=self.ignore_cells_at_stack_margins)
                    property_dict = graph.vertex_property(property_name)
                elif property_name == 'neighborhood_size':
                    neighbors = [self.image_graph.neighbors(l) for l in self.labels]
                    property_dict = dict(zip(self.labels,map(len,neighbors)))
                elif property_name == 'layer':
                    if min_contact_area is None:
                        min_contact_area = self.min_contact_area
                    graph = graph_from_image(self._image,background=self.background,spatio_temporal_properties=['L1'],ignore_cells_at_stack_margins=self.ignore_cells_at_stack_margins, min_contact_area=min_contact_area)
                    first_layer = graph.vertex_property('L1')
                    second_layer_cells = [v for v in graph.vertices() if np.any([first_layer[n] for n in graph.neighbors(v)]) and not first_layer[v]]
                    second_layer = dict(zip(list(graph.vertices()),[v in second_layer_cells for v in graph.vertices()]))
                    property_dict = dict(zip(self.labels,[1 if first_layer[l] else 2 if second_layer[l] else 3 for l in self.labels]))
                elif property_name in ['mean_curvature','gaussian_curvature']:
                    if not self.has_image_property('barycenter'):
                        self.compute_image_property('barycenter')
                    if not self.has_image_property('layer'):
                        print "--> Computing layer property"
                        self.compute_image_property('layer')

                    cell_centers = self.image_property('barycenter')
                    L1_cells = self.labels[self.image_property('layer').values()==1]

                    from openalea.cellcomplex.property_topomesh.utils.implicit_surfaces import implicit_surface_topomesh
                    from openalea.cellcomplex.property_topomesh.property_topomesh_analysis import compute_topomesh_property, compute_topomesh_vertex_property_from_faces
                    from openalea.cellcomplex.property_topomesh.property_topomesh_optimization import property_topomesh_vertices_deformation, topomesh_triangle_split

                    sub_binary_image = (self._image!=self.background).astype(float)[::sub_factor,::sub_factor,::sub_factor]
                    surface_topomesh = implicit_surface_topomesh(sub_binary_image,np.array(sub_binary_image.shape),sub_factor*np.array(self._image.voxelsize),center=False)
                    property_topomesh_vertices_deformation(surface_topomesh,iterations=10)

                    compute_topomesh_property(surface_topomesh,'barycenter',2)
                    compute_topomesh_property(surface_topomesh,'normal',2,normal_method='orientation')

                    compute_topomesh_vertex_property_from_faces(surface_topomesh,'normal',adjacency_sigma=2,neighborhood=5)
                    compute_topomesh_property(surface_topomesh,'mean_curvature',2)
                    compute_topomesh_vertex_property_from_faces(surface_topomesh,property_name,adjacency_sigma=2,neighborhood=5)

                    surface_cells = L1_cells[vq(surface_topomesh.wisp_property('barycenter',0).values(),cell_centers.values(L1_cells))[0]]
                    surface_topomesh.update_wisp_property('label',0,array_dict(surface_cells,list(surface_topomesh.wisps(0))))

                    L1_cell_property = nd.sum(surface_topomesh.wisp_property(property_name,0).values(),surface_cells,index=L1_cells)/nd.sum(np.ones_like(surface_cells),surface_cells,index=L1_cells)
                    L1_cell_property = array_dict(L1_cell_property,L1_cells)    
                    property_dict = array_dict([L1_cell_property[l] if (l in L1_cells) and (not np.isnan(L1_cell_property[l])) else 0 for l in self.labels],self.labels)

                property_data = [property_dict[l] for l in self.labels]
                self.update_image_property(property_name,property_data)
项目:attributeBasedClustering    作者:silenteddie    | 项目源码 | 文件源码
def kmeans_clustering (vectorLayer, attributesList, normalize, clusterNumber, outputFieldName):
    from scipy.cluster.vq import kmeans,vq
    from numpy import array
    fullObjectsList = []
    features = vectorLayer.getFeatures()

    for feature in features:
        fullObjectsList.append([])
        for attribute in attributesList:
            if feature[attribute[0]]:
                fullObjectsList[len(fullObjectsList)-1].append(feature[attribute[0]])
            else:
                fullObjectsList[len(fullObjectsList)-1].append(0)

    #NORMALIZING
    if normalize:
        i = 0
        maxValues = []
        while i < len(attributesList):

            maxValues.append(max(abs(item[i]) for item in fullObjectsList))
            i += 1

        j = 0
        while j < len(fullObjectsList):
            i = 0
            while i < len(fullObjectsList[j]):
                fullObjectsList[j][i] = (fullObjectsList[j][i] * 1.0) / (maxValues[i] * 1.0)
                i += 1
            j += 1

    data = array(fullObjectsList)

    centroids,_ = kmeans(data, clusterNumber, 25)
    idx,_ = vq(data,centroids)
    idx = idx.tolist()
    vectorLayerDataProvider = vectorLayer.dataProvider()

    # Create field of not exist
    if vectorLayer.fieldNameIndex(outputFieldName) == -1:
        vectorLayerDataProvider.addAttributes([QgsField(outputFieldName, QVariant.Int)])

    vectorLayer.updateFields()
    vectorLayer.startEditing()
    attrIdx = vectorLayer.fieldNameIndex(outputFieldName)
    features = vectorLayer.getFeatures()

    i = 0
    for feature in features:
        vectorLayer.changeAttributeValue(feature.id(), attrIdx, int(idx[i]))
        i += 1

    vectorLayer.updateFields()
    vectorLayer.commitChanges()
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def tetrahedra_topomesh(tetrahedra, positions, **kwargs):

    tetrahedra = np.array(tetrahedra)
    positions = array_dict(positions)

    tetrahedra_triangles = array_unique(np.concatenate(np.sort(tetrahedra[:,tetra_triangle_list])))

    tetrahedra_triangle_edges = tetrahedra_triangles[:,triangle_edge_list]
    tetrahedra_triangle_vectors = positions.values(tetrahedra_triangle_edges[...,1]) - positions.values(tetrahedra_triangle_edges[...,0])
    tetrahedra_triangle_lengths = np.linalg.norm(tetrahedra_triangle_vectors,axis=2)
    tetrahedra_triangle_perimeters = tetrahedra_triangle_lengths.sum(axis=1)

    tetrahedra_edges = array_unique(np.concatenate(tetrahedra_triangles[:,triangle_edge_list],axis=0))

    start_time = time()
    print "--> Generating tetrahedra topomesh"
    triangle_edges = np.concatenate(tetrahedra_triangles[:,triangle_edge_list],axis=0)
    triangle_edge_matching = vq(triangle_edges,tetrahedra_edges)[0]

    tetrahedra_faces = np.concatenate(np.sort(tetrahedra[:,tetra_triangle_list]))
    tetrahedra_triangle_matching = vq(tetrahedra_faces,tetrahedra_triangles)[0]

    tetrahedra_topomesh = PropertyTopomesh(3)
    for c in np.unique(tetrahedra_triangles):
        tetrahedra_topomesh.add_wisp(0,c)
    for e in tetrahedra_edges:
        eid = tetrahedra_topomesh.add_wisp(1)
        for pid in e:
            tetrahedra_topomesh.link(1,eid,pid)
    for t in tetrahedra_triangles:
        fid = tetrahedra_topomesh.add_wisp(2)
        for eid in triangle_edge_matching[3*fid:3*fid+3]:
            tetrahedra_topomesh.link(2,fid,eid)
    for t in tetrahedra:
        cid = tetrahedra_topomesh.add_wisp(3)
        for fid in tetrahedra_triangle_matching[4*cid:4*cid+4]:
            tetrahedra_topomesh.link(3,cid,fid)
    tetrahedra_topomesh.update_wisp_property('barycenter',0,positions.values(np.unique(tetrahedra_triangles)),keys=np.unique(tetrahedra_triangles))        

    end_time = time()
    print "<-- Generating tetrahedra topomesh [",end_time-start_time,"s]"

    return tetrahedra_topomesh
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def poly_topomesh(polys, positions, faces_as_cells=False, **kwargs):
    polys = np.array(polys)
    positions = array_dict(positions)

    poly_lengths = np.array(map(len,polys))
    poly_edge_list = [np.transpose([np.arange(l),(np.arange(l)+1)%l]) for l in poly_lengths]

    edges = array_unique(np.sort(np.concatenate([np.array(p)[l] for p,l in zip(polys,poly_edge_list)],axis=0)))

    poly_edges = np.sort(np.concatenate([np.array(p)[l] for p,l in zip(polys,poly_edge_list)],axis=0))

    start_time = time()
    print "--> Generating poly topomesh"

    poly_edge_matching = vq(poly_edges,edges)[0]

    poly_topomesh = PropertyTopomesh(3)
    for c in np.unique(polys):
        poly_topomesh.add_wisp(0,c)
    for e in edges:
        eid = poly_topomesh.add_wisp(1)
        for pid in e:
            poly_topomesh.link(1,eid,pid)
    total_poly_length = 0
    for q,l in zip(polys,poly_lengths):
        fid = poly_topomesh.add_wisp(2)
        for eid in poly_edge_matching[total_poly_length:total_poly_length+l]:
            poly_topomesh.link(2,fid,eid)
        total_poly_length += l
    if not faces_as_cells:
        poly_topomesh.add_wisp(3,0)
        for fid in poly_topomesh.wisps(2):
            poly_topomesh.link(3,0,fid)
    else:
        for fid in poly_topomesh.wisps(2):
            poly_topomesh.add_wisp(3,fid)
            poly_topomesh.link(3,fid,fid)
    poly_topomesh.update_wisp_property('barycenter',0,positions.values(np.unique(polys)),keys=np.unique(polys))    

    end_time = time()
    print "<-- Generating poly topomesh [",end_time-start_time,"s]"

    return poly_topomesh
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def minibatch_kmedians(X, M=None, n_components=10, n_iter=100,
                       minibatch_size=100, random_state=None):
    n_clusters = n_components
    if M is not None:
        assert M.shape[0] == n_components
        assert M.shape[1] == X.shape[1]
    if random_state is None:
        random_state = np.random.RandomState(random_state)
    elif not hasattr(random_state, 'shuffle'):
        # Assume integer passed
        random_state = np.random.RandomState(int(random_state))
    if M is None:
        ind = np.arange(len(X)).astype('int32')
        random_state.shuffle(ind)
        M = X[ind[:n_clusters]]

    center_counts = np.zeros(n_clusters)
    pts = list(np.arange(len(X), minibatch_size)) + [len(X)]
    if len(pts) == 1:
        # minibatch size > dataset size case
        pts = [0, None]
    minibatch_indices = zip(pts[:-1], pts[1:])
    for i in range(n_iter):
        for mb_s, mb_e in minibatch_indices:
            Xi = X[mb_s:mb_e]
            # Broadcasted Manhattan distance
            # Could be made faster with einsum perhaps
            centers = np.abs(Xi[:, None, :] - M[None]).sum(
                axis=-1).argmin(axis=1)

            def count_update(c):
                center_counts[c] += 1
            [count_update(c) for c in centers]
            scaled_lr = 1. / center_counts[centers]
            Mi = M[centers]
            scaled_lr = scaled_lr[:, None]
            # Gradient of abs
            Mi = Mi - scaled_lr * ((Xi - Mi) / np.sqrt((Xi - Mi) ** 2 + 1E-9))
            M[centers] = Mi
    # Reassign centers to nearest datapoint
    mem, _ = vq(M, X)
    M = X[mem]
    return M