Python sklearn.manifold 模块,MDS 实例源码

我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用sklearn.manifold.MDS

项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def plot_MDS(*data):
    '''
    graph after MDS
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    mds=manifold.MDS(n_components=2)
    X_r=mds.fit_transform(X)

    ### graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    for label ,color in zip( np.unique(y),colors):
        position=y==label
        ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label),color=color)

    ax.set_xlabel("X[0]")
    ax.set_ylabel("X[1]")
    ax.legend(loc="best")
    ax.set_title("MDS")
    plt.show()
项目:sakmapper    作者:szairis    | 项目源码 | 文件源码
def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs):
    """
    input: N x F dataframe of observations
    output: N x n_dim image of input data under lens function
    """
    if n_dim != 2:
        raise 'error: image of data set must be two-dimensional'
    if dist not in ['euclidean', 'correlation']:
        raise 'error: only euclidean and correlation distance metrics are supported'
    if lens == 'pca' and dist != 'euclidean':
        raise 'error: PCA requires the use of euclidean distance metric'

    if lens == 'pca':
        df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index)
    elif lens == 'mds':
        D = metrics.pairwise.pairwise_distances(df, metric=dist)
        df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index)
    elif lens == 'neighbor':
        D = metrics.pairwise.pairwise_distances(df, metric=dist)
        df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index)
    else:
        raise 'error: only PCA, MDS, neighborhood lenses are supported'

    return df_lens
项目:miniMDS    作者:seqcode    | 项目源码 | 文件源码
def infer_clusters(contactMat, clusters, offsets, alpha, classical=False):
    """Infers 3D coordinates for multiple clusters with same contact matrix"""
    assert sum([len(cluster.getPointNums()) for cluster in clusters]) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0 

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    if classical:   #classical MDS
        coords = st.cmds(distMat)
    else:
        mds = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=-1)
        coords = mds.fit_transform(distMat)

    for offset, cluster in zip(offsets, clusters):
        for i in range(len(cluster.getPoints())):   
            cluster.getPoints()[i].pos = coords[i + offset]
项目:miniMDS    作者:seqcode    | 项目源码 | 文件源码
def infer_cluster(contactMat, cluster, alpha, classical=False):
    """Infers 3D coordinates for one cluster"""
    assert len(cluster.getPointNums()) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0 

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    if classical:   #classical MDS
        coords = st.cmds(distMat)
    else:
        mds = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=-1)
        coords = mds.fit_transform(distMat)

    for i in range(len(cluster.getPoints())):   
        cluster.getPoints()[i].pos = coords[i]
项目:miniMDS    作者:seqcode    | 项目源码 | 文件源码
def fullMDS(path, classical, alpha):
    """MDS without partitioning"""
    cluster = dt.clusterFromBed(path, None, None)
    contactMat = dt.matFromBed(path, cluster)
    infer_cluster(contactMat, cluster, alpha, classical)
    return cluster
项目:BotBoosted    作者:brityboy    | 项目源码 | 文件源码
def visualize_tweets(W, topic_number, color):
    '''
    INPUT
         - W matrix of observations
         - topic_number - this is the number of the topic to be checked
         - color - this is the color to be used in creating the scatterplot
    OUTPUT
         - a scatter plot of the relative location of the different topics
         from each other in a flattened space using multidimensional scaling
    Returns none
    '''
    # mds = MDS(n_jobs=-1)
    topic_list = np.apply_along_axis(np.argmax, 1, W)
    Wsubset = W[topic_list == topic_number]
    pca = PCA(n_components=2)
    pca = PCA(n_components=2)
    hflat = pca.fit_transform(Wsubset)
    plt.scatter(hflat[:, 0], hflat[:, 1], color=color, alpha=.1)
    plt.title('these are the {} tweets in topic # {}'.format(Wsubset.shape[0],
                                                             topic_number+1))
    # plt.show()
项目:neoSBM    作者:piratepeel    | 项目源码 | 文件源码
def embedding(vi_mat,LL,n_neighbors=10):
    n_components=2
    Y = manifold.MDS(n_components,dissimilarity='precomputed').fit_transform(vi_mat)

    color=np.zeros(1000)
    color[:6]=np.ones(6)

    #~ plt.figure()
    #~ plt.plot(Y[:, 0], Y[:, 1], 'k.')
    #~ plt.plot(Y[-n_close:, 0], Y[-n_close:, 1], 'r.')
    #~ for i in xrange(6):
        #~ plt.plot(Y[i, 0], Y[i, 1], 'bo',ms=3+3*i)
    #~ plt.scatter(Y[:, 0], Y[:, 1], c=LL)

    fig=plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(Y[:, 0], Y[:, 1], LL[:,0], c=LL[:,0], marker='o')
    fig=plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(Y[:, 0], Y[:, 1], LL[:,1], c=LL[:,1], marker='o')

    return Y

#################################################################
#load known partitions
项目:Dragonfly    作者:duaneloh    | 项目源码 | 文件源码
def do_embedding(self, event=None):
        converted = self.parent.converted
        if converted is None:
            #self.conversion.convert_frames()
            self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging
            converted = self.parent.converted

        method_ind = self.method.currentIndex()
        print('Doing %s' % self.method.currentText())
        if method_ind == 0:
            self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1)
        elif method_ind == 1:
            self.embedder = manifold.Isomap(n_components=4, n_jobs=-1)
        elif method_ind == 2:
            self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified')
        elif method_ind == 3:
            self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense')
        elif method_ind == 4:
            self.embedder = manifold.MDS(n_components=4, n_jobs=-1)
        elif method_ind == 5:
            self.embedder = manifold.TSNE(n_components=3, init='pca')
        self.embedder.fit(converted)
        self.embed = self.embedder.embedding_
        self.embed_plot = self.embed

        self.gen_hist()
        self.plot_embedding()
        if not self.embedded:
            self.add_classes_frame()
        self.embedded = True
项目:texta    作者:texta-tk    | 项目源码 | 文件源码
def index(request):
    if 'model' not in request.session:
        return HttpResponseRedirect(URL_PREFIX + '/')
    template = loader.get_template('conceptualiser.html')

    lexicons = []

    for lexicon in Lexicon.objects.all().filter(author=request.user):
        setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count())
        lexicons.append(lexicon)

    methods = ["PCA","TSNE","MDS"]

    return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods},request))
项目:dyfunconn    作者:makism    | 项目源码 | 文件源码
def encode(self, data, metric='euclidean'):
        """ Employ a nearest-neighbor rule to encode the given ``data`` using the codebook.

        Parameters
        ----------
        data : real array-like, shape(n_samples, n_features)
            Data matrix, each row represents a sample.

        metric : string
            One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html.

            Valid options include:

             - euclidean
             - cityblock
             - l1
             - cosine

        Returns
        -------
        encoded_data : real array-like, shape(n_samples, n_features)
            ``data``, as represented by the prototypes in codebook.
        ts_symbols : list, shape(n_samples, 1)
            A discrete symbolic time series
        """
        # Perform a proposed data mining procedure as described in [Laskaris2004].
        mds = MDS(1, random_state=self.rng)
        protos_1d = mds.fit_transform(self.protos).ravel()
        sorted_protos_1d = np.argsort(protos_1d)

        sprotos = self.protos[sorted_protos_1d]

        nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto', metric=metric).fit(sprotos)
        _, self.__symbols = nbrs.kneighbors(data)
        self.__encoding = sprotos[self.__symbols]

        return (self.__encoding, self.__symbols)
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition    作者:PacktPublishing    | 项目源码 | 文件源码
def plot_demo_1():
    X = np.c_[np.ones(5), 2 * np.ones(5), 10 * np.ones(5)].T
    y = np.array([0, 1, 2])

    fig = pylab.figure(figsize=(10, 4))

    ax = fig.add_subplot(121, projection='3d')
    ax.set_axis_bgcolor('white')

    mds = manifold.MDS(n_components=3)
    Xtrans = mds.fit_transform(X)

    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on example data set in 3 dimensions")
    ax.view_init(10, -15)

    mds = manifold.MDS(n_components=2)
    Xtrans = mds.fit_transform(X)

    ax = fig.add_subplot(122)
    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on example data set in 2 dimensions")

    filename = "mds_demo_1.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
项目:miniMDS    作者:seqcode    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser(description="Reconstruct 3D coordinates from normalized intrachromosomal Hi-C BED files.")
    parser.add_argument("path", help="path to intrachromosomal Hi-C BED file")
    parser.add_argument("--classical", action="store_true", help="use classical MDS (default: metric MDS)")
    parser.add_argument("-l", help="path to low-resolution intrachromosomal Hi-C BED file")
    parser.add_argument("-p", type=float, default=0.1, help="domain size parameter: larger value means fewer clusters created (for partitioned MDS only)")
    parser.add_argument("-m", type=float, default=0.05, help="minimum domain size parameter: prevents clusters from being too small (for partitioned MDS only)")
    parser.add_argument("-o", help="path to output file")
    parser.add_argument("-r", default=32000000, help="maximum RAM to use (in kb)")
    parser.add_argument("-n", default=3, help="number of threads")
    parser.add_argument("-a", type=float, default=4, help="alpha factor for converting contact frequencies to physical distances")
    args = parser.parse_args()

    if args.l is None:  #not partitioned
        cluster = fullMDS(args.path, args.classical, args.a)

    else:   #partitioned
        params = (args.p, args.m, args.r, args.n, args.a)
        names = ("Domain size parameter", "Minimum domain size", "Maximum memory", "Number of threads", "Alpha")
        intervals = ((0,1), (0,1), (0, None), (0, None), (1, None))
        if not tools.args_are_valid(params, names, intervals):
            sys.exit(0)

        cluster = partitionedMDS(args.path, args.l, params)

    if args.o is not None:
        cluster.write(args.o)
项目:AppsOfDataAnalysis    作者:nhanloukiala    | 项目源码 | 文件源码
def mds(dataset, labels, attNames, **kwargs):
    mds = manifold.MDS(n_components=2, max_iter=300)
    trained = mds.fit_transform(dataset)
    plot(trained, labels, attNames, **kwargs)

# Assignment 2
项目:Waskom_PNAS_2017    作者:WagnerLabPapers    | 项目源码 | 文件源码
def mds_variance_explained(corrmat, mds_coords):
    """Determine how much variance is explained by projection onto MDS coords."""
    orig_dist = (1 - corrmat)[np.triu_indices_from(corrmat, 1)]
    mds_dist = distance.pdist(mds_coords)
    r, _ = stats.pearsonr(orig_dist, mds_dist)
    return r ** 2
项目:hmm-reps    作者:rug-compling    | 项目源码 | 文件源码
def get_twodim_reps(reps, seed, distance=euclidean_distances):
    reps = reps.astype(np.float64)
    similarities = distance(reps)
    mds = MDS(n_components=2, dissimilarity="precomputed", random_state=seed)
    return mds.fit(similarities).embedding_
项目:BotBoosted    作者:brityboy    | 项目源码 | 文件源码
def visualize_topics(H):
    '''
    INPUT
         - H matrix of topics
    OUTPUT
         - a scatter plot of the relative location of the different topics
         from each other in a flattened space using PCA
         - color_list - the list of colors to be used in the next
         visualizations of the tweets
    Returns the color list
    '''
    mds = MDS(n_jobs=-1)
    # pca = PCA(n_components=2)
    # hflat = pca.fit_transform(H)
    hflat = mds.fit_transform(H)
    # colors = cm.rainbow(hflat.shape[0]-1)
    colors = cycle(["r", "b", "g", "c", "m", "y", "k", "w"])
    color_list = []
    for i, row in enumerate(hflat):
        color = next(colors)
        plt.scatter(row[0], row[1],
                    label='topic number {}'.format(i+1), color=color)
        color_list.append(color)
    plt.legend(loc='best')
    plt.show()
    return color_list, mds
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_MDS(*data):
    '''
    test MDS method
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    for n in [4,3,2,1]:
        mds=manifold.MDS(n_components=n)
        mds.fit(X)
        print('stress(n_components={0}) : {1}'.format (n, str(mds.stress_)))
项目:POT    作者:rflamary    | 项目源码 | 文件源码
def smacof_mds(C, dim, max_iter=3000, eps=1e-9):
    """
    Returns an interpolated point cloud following the dissimilarity matrix C
    using SMACOF multidimensional scaling (MDS) in specific dimensionned
    target space

    Parameters
    ----------
    C : ndarray, shape (ns, ns)
        dissimilarity matrix
    dim : int
          dimension of the targeted space
    max_iter :  int
        Maximum number of iterations of the SMACOF algorithm for a single run
    eps : float
        relative tolerance w.r.t stress to declare converge

    Returns
    -------
    npos : ndarray, shape (R, dim)
           Embedded coordinates of the interpolated point cloud (defined with
           one isometry)
    """

    rng = np.random.RandomState(seed=3)

    mds = manifold.MDS(
        dim,
        max_iter=max_iter,
        eps=1e-9,
        dissimilarity='precomputed',
        n_init=1)
    pos = mds.fit(C).embedding_

    nmds = manifold.MDS(
        2,
        max_iter=max_iter,
        eps=1e-9,
        dissimilarity="precomputed",
        random_state=rng,
        n_init=1)
    npos = nmds.fit_transform(C, init=pos)

    return npos


##############################################################################
# Data preparation
# ----------------
#
# The four distributions are constructed from 4 simple images
项目:POT    作者:rflamary    | 项目源码 | 文件源码
def smacof_mds(C, dim, max_iter=3000, eps=1e-9):
    """
    Returns an interpolated point cloud following the dissimilarity matrix C
    using SMACOF multidimensional scaling (MDS) in specific dimensionned
    target space

    Parameters
    ----------
    C : ndarray, shape (ns, ns)
        dissimilarity matrix
    dim : int
          dimension of the targeted space
    max_iter :  int
        Maximum number of iterations of the SMACOF algorithm for a single run
    eps : float
        relative tolerance w.r.t stress to declare converge

    Returns
    -------
    npos : ndarray, shape (R, dim)
           Embedded coordinates of the interpolated point cloud (defined with
           one isometry)
    """

    rng = np.random.RandomState(seed=3)

    mds = manifold.MDS(
        dim,
        max_iter=max_iter,
        eps=1e-9,
        dissimilarity='precomputed',
        n_init=1)
    pos = mds.fit(C).embedding_

    nmds = manifold.MDS(
        2,
        max_iter=max_iter,
        eps=1e-9,
        dissimilarity="precomputed",
        random_state=rng,
        n_init=1)
    npos = nmds.fit_transform(C, init=pos)

    return npos


##############################################################################
# Data preparation
# ----------------
#
# The four distributions are constructed from 4 simple images
项目:texta    作者:texta-tk    | 项目源码 | 文件源码
def load_terms(request):    

    lexicon_ids = json.loads(request.POST['lids'])

    try:
        model = model_manager.get_model(request.session['model']).model
    except LookupError as e:
        return HttpResponseRedirect(URL_PREFIX + '/')

    if model.wv.syn0norm is None:
        model.init_sims()

    words = [word for word in Word.objects.filter(lexicon__id__in = lexicon_ids) if word.wrd.encode('utf-8') in model.wv.vocab]
    feature_vectors = [model.wv.syn0norm[model.wv.vocab[word.wrd.encode('utf-8')].index] for word in words]

    output = {'terms':[],'concepts':[]}

    if len(feature_vectors):
        X = np.array(feature_vectors)

        if request.POST['method'] == 'TSNE':
            transformer = TSNE(n_components=2, random_state=0,metric='cosine',learning_rate=50)
        elif request.POST['method'] == 'MDS':
            transformer = MDS(n_components=2, max_iter=600,dissimilarity="precomputed", n_jobs=1)
            X = pairwise_distances(X,metric='cosine',n_jobs=1)
        else:
            transformer = PCA(n_components=2)

        transformed_feature_vectors = transformer.fit_transform(X).tolist()

        terms = []
        concepts = {}

        for i in range(len(words)):
            term = {'id':words[i].id,'term':words[i].wrd,'count':model.wv.vocab[words[i].wrd.encode('utf-8')].count,'x':transformed_feature_vectors[i][0] if len(feature_vectors) > 1 else 0,'y':transformed_feature_vectors[i][1] if len(feature_vectors) > 1 else 0}

            term_concepts = TermConcept.objects.filter(term__term = words[i].wrd).filter(concept__author = request.user)
            if term_concepts:
                concept_id = term_concepts[0].concept.id
                descriptive_term = term_concepts[0].concept.descriptive_term.term
                descriptive_term_id = term_concepts[0].concept.descriptive_term.id
                if concept_id not in concepts:
                    concepts[concept_id] = {'id':concept_id,'terms':[],'descriptive_term':descriptive_term,'descriptive_term_id':Word.objects.filter(wrd=descriptive_term)[0].id}

                concepts[concept_id]['terms'].append(term)

            else:
                terms.append(term)

        output['terms'].extend(terms)
        output['concepts'].extend([concepts[concept_id] for concept_id in concepts])

        logging.getLogger(INFO_LOGGER).info(json.dumps({'process':'CREATE CONCEPTS','event':'terms_loaded','args':{'user_name':request.user.username,'lexicon_ids':lexicon_ids,'dim_red_method':request.POST['method']}}))
    else:
        logging.getLogger(INFO_LOGGER).warning(json.dumps({'process':'CREATE CONCEPTS','event':'term_loading_failed','args':{'user_name':request.user.username,'lexicon_ids':lexicon_ids,'dim_red_method':request.POST['method']},'reason':'No terms to load.'}))


    return HttpResponse(json.dumps(output), content_type='application/json')
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition    作者:PacktPublishing    | 项目源码 | 文件源码
def plot_iris_mds():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    # MDS

    fig = pylab.figure(figsize=(10, 4))

    ax = fig.add_subplot(121, projection='3d')
    ax.set_axis_bgcolor('white')

    mds = manifold.MDS(n_components=3)
    Xtrans = mds.fit_transform(X)

    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on Iris data set in 3 dimensions")
    ax.view_init(10, -15)

    mds = manifold.MDS(n_components=2)
    Xtrans = mds.fit_transform(X)

    ax = fig.add_subplot(122)
    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on Iris data set in 2 dimensions")

    filename = "mds_demo_iris.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")

    # PCA

    fig = pylab.figure(figsize=(10, 4))

    ax = fig.add_subplot(121, projection='3d')
    ax.set_axis_bgcolor('white')

    pca = decomposition.PCA(n_components=3)
    Xtrans = pca.fit(X).transform(X)

    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black')
    pylab.title("PCA on Iris data set in 3 dimensions")
    ax.view_init(50, -35)

    pca = decomposition.PCA(n_components=2)
    Xtrans = pca.fit_transform(X)

    ax = fig.add_subplot(122)
    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black')
    pylab.title("PCA on Iris data set in 2 dimensions")

    filename = "pca_demo_iris.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
项目:motion-classification    作者:matthiasplappert    | 项目源码 | 文件源码
def validate(classifier, train, test, args, report_base_name):
    print('\nTraining classifier on %d samples ...' % len(train.X))
    start = timeit.default_timer()
    classifier.fit(train.X, train.y)
    stop = timeit.default_timer()
    print('Classifier trained, took %f seconds' % (stop - start))

    for method in args.loglikelihood_methods:
        report_name = report_base_name + '_' + method

        if args.calculate_distances:
            print('\nCalculating distances ...')
            start = timeit.default_timer()
            distances = classifier.distances(loglikelihood_method=method, n_samples=500)
            print('Distances calculated, took %f seconds' % (timeit.default_timer() - start))

            report = _generate_distance_reports(distances, target_names=train.target_names)
            _handle_report(report, report_name + '_distances', args)

            # Calculate proto symbol space
            #mds = MDS(n_components=5, dissimilarity='precomputed')
            #coordinates = mds.fit_transform(distances)
            #_plot_proto_symbol_space(coordinates, train.target_names, report_name + '_scatter', args)

        # Get loglikelihoods for train set
        print('\nValidating classifier on training set with %d samples ...' % len(train.X))
        loglikelihoods_train = _calculate_loglikelihoods(classifier, train.X, method)
        report = _generate_loglikelihood_reports(loglikelihoods_train, train.y, target_names=train.target_names)
        _handle_report(report, report_name + '_train_loglikelihoods', args)

        # Fit decision makers
        loglikelihoods_test = None
        for idx, decision_maker in enumerate(get_decision_makers(args)):
            if decision_maker is not None:
                name = args.decision_makers[idx]
                if hasattr(decision_maker, 'fit') and callable(getattr(decision_maker, 'fit')):
                    print('\nTraining decision maker %s on %d loglikelihoods ...' % (name, len(loglikelihoods_train)))
                    decision_maker.fit(loglikelihoods_train, train.y)
                    print('Decision maker trained, took %f seconds' % (stop - start))
                else:
                    print('\nUsing decision maker %s ...' % name)
                y_pred = _calculate_predictions(decision_maker, loglikelihoods_train)
                report = _generate_classification_reports(train.y, y_pred, target_names=train.target_names)
                _handle_report(report, report_name + '_train_classification_' + name, args)

            # Validate on test set
            print('\nValidating classifier on test set with %d samples ...' % len(test.X))
            if loglikelihoods_test is None:
                loglikelihoods_test = _calculate_loglikelihoods(classifier, test.X, method)
                report = _generate_loglikelihood_reports(loglikelihoods_test, test.y, target_names=test.target_names)
                _handle_report(report, report_name + '_test_loglikelihoods', args)
            if decision_maker is not None:
                y_pred = _calculate_predictions(decision_maker, loglikelihoods_test)
                report = _generate_classification_reports(test.y, y_pred, target_names=test.target_names)
                _handle_report(report, report_name + '_test_classification_' + name, args)
项目:text-analytics-with-python    作者:dipanjanS    | 项目源码 | 文件源码
def plot_clusters(num_clusters, feature_matrix,
                  cluster_data, movie_data,
                  plot_size=(16,8)):
    # generate random color for clusters                  
    def generate_random_color():
        color = '#%06x' % random.randint(0, 0xFFFFFF)
        return color
    # define markers for clusters    
    markers = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd']
    # build cosine distance matrix
    cosine_distance = 1 - cosine_similarity(feature_matrix) 
    # dimensionality reduction using MDS
    mds = MDS(n_components=2, dissimilarity="precomputed", 
              random_state=1)
    # get coordinates of clusters in new low-dimensional space
    plot_positions = mds.fit_transform(cosine_distance)  
    x_pos, y_pos = plot_positions[:, 0], plot_positions[:, 1]
    # build cluster plotting data
    cluster_color_map = {}
    cluster_name_map = {}
    for cluster_num, cluster_details in cluster_data.items():
        # assign cluster features to unique label
        cluster_color_map[cluster_num] = generate_random_color()
        cluster_name_map[cluster_num] = ', '.join(cluster_details['key_features'][:5]).strip()
    # map each unique cluster label with its coordinates and movies
    cluster_plot_frame = pd.DataFrame({'x': x_pos,
                                       'y': y_pos,
                                       'label': movie_data['Cluster'].values.tolist(),
                                       'title': movie_data['Title'].values.tolist()
                                        })
    grouped_plot_frame = cluster_plot_frame.groupby('label')
    # set plot figure size and axes
    fig, ax = plt.subplots(figsize=plot_size) 
    ax.margins(0.05)
    # plot each cluster using co-ordinates and movie titles
    for cluster_num, cluster_frame in grouped_plot_frame:
         marker = markers[cluster_num] if cluster_num < len(markers) \
                  else np.random.choice(markers, size=1)[0]
         ax.plot(cluster_frame['x'], cluster_frame['y'], 
                 marker=marker, linestyle='', ms=12,
                 label=cluster_name_map[cluster_num], 
                 color=cluster_color_map[cluster_num], mec='none')
         ax.set_aspect('auto')
         ax.tick_params(axis= 'x', which='both', bottom='off', top='off',        
                        labelbottom='off')
         ax.tick_params(axis= 'y', which='both', left='off', top='off',         
                        labelleft='off')
    fontP = FontProperties()
    fontP.set_size('small')    
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.01), fancybox=True, 
              shadow=True, ncol=5, numpoints=1, prop=fontP) 
    #add labels as the film titles
    for index in range(len(cluster_plot_frame)):
        ax.text(cluster_plot_frame.ix[index]['x'], 
                cluster_plot_frame.ix[index]['y'], 
                cluster_plot_frame.ix[index]['title'], size=8)  
    # show the plot           
    plt.show()
项目:TensorFlow_DCIGN    作者:yselivonchyk    | 项目源码 | 文件源码
def visualize_encodings(encodings, file_name=None,
                        grid=None, skip_every=999, fast=False, fig=None, interactive=False):
  encodings = manual_pca(encodings)
  if encodings.shape[1] <= 3:
    return print_data_only(encodings, file_name, fig=fig, interactive=interactive)

  encodings = encodings[0:720]
  hessian_euc = dist.squareform(dist.pdist(encodings[0:720], 'euclidean'))
  hessian_cos = dist.squareform(dist.pdist(encodings[0:720], 'cosine'))
  grid = (3, 4) if grid is None else grid
  project_ops = []

  n = 2
  project_ops.append(("LLE ltsa       N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa')))
  project_ops.append(("LLE modified   N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified')))
  project_ops.append(('MDS euclidean  N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed')))
  project_ops.append(("TSNE 30/2000   N:%d" % n, TSNE(perplexity=30, n_components=n, init='pca', n_iter=2000)))
  n = 3
  project_ops.append(("LLE ltsa       N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa')))
  project_ops.append(("LLE modified   N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified')))
  project_ops.append(('MDS euclidean  N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed')))
  project_ops.append(('MDS cosine     N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed')))

  plot_places = []
  for i in range(12):
    u, v = int(i / (skip_every - 1)), i % (skip_every - 1)
    j = v + u * skip_every + 1
    plot_places.append(j)

  fig = get_figure(fig)
  fig.set_size_inches(fig.get_size_inches()[0] * grid[0] / 1.,
                      fig.get_size_inches()[1] * grid[1] / 2.0)

  for i, (name, manifold) in enumerate(project_ops):
    is3d = 'N:3' in name

    try:
      if is3d:
        subplot = plt.subplot(grid[0], grid[1], plot_places[i], projection='3d')
      else:
        subplot = plt.subplot(grid[0], grid[1], plot_places[i])

      data_source = encodings if not _needs_hessian(manifold) else \
        (hessian_cos if 'cosine' in name else hessian_euc)
      projections = manifold.fit_transform(data_source)
      scatter(subplot, projections, is3d, _build_radial_colors(len(data_source)))
      subplot.set_title(name)
    except:
      print(name, "Unexpected error: ", sys.exc_info()[0], sys.exc_info()[1] if len(sys.exc_info()) > 1 else '')

  visualize_data_same(encodings, grid=grid, places=plot_places[-4:])
  if not interactive:
    save_fig(file_name, fig)
  ut.print_time('visualization finished')
项目:visartm    作者:bigartm    | 项目源码 | 文件源码
def get_arrangement_permutation(
        dist,
        mode,
        model=None,
        clusters=None,
        init_perm=None):
    start_time = time.time()

    if mode == "none":
        return [i for i in range(dist.shape[0])]
    if mode == "hamilton":
        from .hamilton_path import HamiltonPath
        hp = HamiltonPath(dist, caller=model)
        hp.solve()
        perm = hp.path
    elif mode == "hamilton_annealing":
        from .hamilton_path import HamiltonPath
        hp = HamiltonPath(dist, caller=model)
        hp.solve_annealing()
        perm = hp.path
    elif mode == "tsne":
        from sklearn.manifold import TSNE
        tsne_model = TSNE(n_components=1, random_state=0, metric="precomputed")
        tsne_result = tsne_model.fit_transform(dist).reshape(-1)
        perm = np.argsort(tsne_result)
    elif mode == "mds":
        from sklearn.manifold import MDS
        mds = MDS(
            n_components=1,
            max_iter=3000,
            eps=1e-9,
            random_state=0,
            dissimilarity="precomputed",
            n_jobs=4)
        result = mds.fit_transform(dist).reshape(-1)
        perm = np.argsort(result)
    elif mode == "dendro":
        from algo.arranging.dendro_arranger import DendroArranger
        da = DendroArranger(dist)
        perm = da.arrange()
    else:
        raise ValueError("Unknown mode: %s" % mode)

    if model:
        from .quality import NDS, MNR
        model.NDS = NDS(dist, perm)
        model.log("NDS=%f" % model.NDS)
        model.log("MNR=%f" % MNR(dist, perm))
        model.log("Time=%f" % (time.time() - start_time))

    return perm