Python sklearn.manifold 模块,TSNE 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.manifold.TSNE

项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def tsne_cluster_cuisine(df,sublist):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)

    palette = sns.color_palette("hls", len(sublist))
    plt.figure(figsize=(10,10))
    for i,cuisine in enumerate(sublist):
        plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
        tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
    plt.legend()

#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def word_cloud(word_embedding_matrix, vocab, s, save_file='scatter.png'):
    words = [(i, vocab[i]) for i in s]
    model = TSNE(n_components=2, random_state=0)
    #Note that the following line might use a good chunk of RAM
    tsne_embedding = model.fit_transform(word_embedding_matrix)
    words_vectors = tsne_embedding[np.array([item[1] for item in words])]

    plt.subplots_adjust(bottom = 0.1)
    plt.scatter(
        words_vectors[:, 0], words_vectors[:, 1], marker='o', cmap=plt.get_cmap('Spectral'))

    for label, x, y in zip(s, words_vectors[:, 0], words_vectors[:, 1]):
        plt.annotate(
            label,
            xy=(x, y), xytext=(-20, 20),
            textcoords='offset points', ha='right', va='bottom',
            fontsize=20,
            # bbox=dict(boxstyle='round,pad=1.', fc='yellow', alpha=0.5),
            arrowprops=dict(arrowstyle = '<-', connectionstyle='arc3,rad=0')
            )
    plt.show()
    # plt.savefig(save_file)
项目:pyro    作者:uber    | 项目源码 | 文件源码
def plot_tsne(z_mu, classes, name):
    import numpy as np
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from sklearn.manifold import TSNE
    model_tsne = TSNE(n_components=2, random_state=0)
    z_states = z_mu.data.cpu().numpy()
    z_embed = model_tsne.fit_transform(z_states)
    classes = classes.data.cpu().numpy()
    fig666 = plt.figure()
    for ic in range(10):
        ind_vec = np.zeros_like(classes)
        ind_vec[:, ic] = 1
        ind_class = classes[:, ic] == 1
        color = plt.cm.Set1(ic)
        plt.scatter(z_embed[ind_class, 0], z_embed[ind_class, 1], s=10, color=color)
        plt.title("Latent Variable T-SNE per Class")
        fig666.savefig('./vae_results/'+str(name)+'_embedding_'+str(ic)+'.png')
    fig666.savefig('./vae_results/'+str(name)+'_embedding.png')
项目:singlecell-dash    作者:czbiohub    | 项目源码 | 文件源码
def compute_bulk_smushing(self):
        """Get average signal from each plate ('bulk') and find 2d embedding"""

        grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])

        if os.path.exists(self.bulk_smushed_cache_file):
            smushed = pd.read_csv(self.bulk_smushed_cache_file, names=[0, 1],
                                  header=0, index_col=0)
            # if the set of plates hasn't changed, return the cached version
            if set(grouped.groups) == set(smushed.index):
                return smushed

        # if the cache was missing or invalid, compute a new projection
        medians = grouped.median()
        smusher = TSNE(random_state=0, perplexity=10, metric='cosine')
        smushed = pd.DataFrame(smusher.fit_transform(medians),
                               index=medians.index)

        smushed.to_csv(self.bulk_smushed_cache_file)

        return smushed
项目:singlecell-dash    作者:czbiohub    | 项目源码 | 文件源码
def compute_cell_smushing(self):
        """Within each plate, find a 2d embedding of all cells"""
        grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])

        if os.path.exists(self.cell_smushed_cache_file):
            smusheds = pd.read_pickle(self.cell_smushed_cache_file)
            # if nothing is missing, return the cached version
            if not set(grouped.groups) - set(smusheds):
                return smusheds
        else:
            smusheds = {}

        for plate_name, genes_subset in grouped:
            if plate_name not in smusheds:
                cell_smusher = TSNE(metric='cosine', random_state=0)
                cell_smushed = pd.DataFrame(
                    cell_smusher.fit_transform(genes_subset),
                    index=genes_subset.index)
                smusheds[plate_name] = cell_smushed

        pd.to_pickle(smusheds, self.cell_smushed_cache_file)

        return smusheds
项目:tensorflow_tutorial    作者:lpty    | 项目源码 | 文件源码
def gen(self):
        embedding, _ = self.embedding()
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, tf.train.latest_checkpoint('.'))
            embedding = sess.run(embedding)
        # ???
        data = embedding[:self.viz_words, :]
        # ???????
        tsne = TSNE(n_components=2, init='pca', random_state=0)
        embed_tsne = tsne.fit_transform(data)
        # ??
        plt.subplots(figsize=(10, 10))
        for idx in range(self.viz_words):
            plt.scatter(*embed_tsne[idx, :], color='steelblue')
            plt.annotate(self.train_text.int_to_vocab[idx], (embed_tsne[idx, 0], embed_tsne[idx, 1]), alpha=0.7)
        plt.show()
项目:Msc_Multi_label_ZeroShot    作者:thomasSve    | 项目源码 | 文件源码
def main():

    args = parse_args()

    print('Called with args:')
    print(args)
    lang_db = get_language_model(args.lang_name)
    imdb = get_imdb(args.imdb_name)

    # Get words in space
    vocabulary = imdb.get_labels(args.space)

    # Get features for words
    wv = [lang_db.word_vector(w) for w in vocabulary]
    from sklearn.metrics.pairwise import cosine_similarity
    from scipy import spatial
    #spatial.distance.cosine(dataSetI, dataSetII)
    tsne = TSNE(n_components=2, random_state=0)
    np.set_printoptions(suppress=True)
    Y = tsne.fit_transform(wv)

    plt.scatter(Y[:, 0], Y[:, 1])
    for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
        plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
    plt.show()
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def main(we_file='glove_model_50.npz', w2i_file='glove_word2idx_50.json'):
    words = ['japan', 'japanese', 'england', 'english', 'australia', 'australian', 'china', 'chinese', 'italy', 'italian', 'french', 'france', 'spain', 'spanish']

    with open(w2i_file) as f:
        word2idx = json.load(f)

    npz = np.load(we_file)
    W = npz['arr_0']
    V = npz['arr_1']
    We = (W + V.T) / 2

    idx = [word2idx[w] for w in words]
    # We = We[idx]

    tsne = TSNE()
    Z = tsne.fit_transform(We)
    Z = Z[idx]
    plt.scatter(Z[:,0], Z[:,1])
    for i in xrange(len(words)):
        plt.annotate(s=words[i], xy=(Z[i,0], Z[i,1]))
    plt.show()
项目:auDeep    作者:auDeep    | 项目源码 | 文件源码
def take_action(self, parsed_args):
        if not parsed_args.input.exists():
            raise IOError("failed to open data set at {}".format(parsed_args.input))

        data_set = load(parsed_args.input)

        features = np.reshape(data_set.features, [data_set.num_instances, -1])

        if features.shape[1] > 50:
            self.log.info("applying PCA")

            pca = PCA(n_components=200)
            pca.fit(features)
            features = pca.transform(features)

        self.log.info("computing T-SNE embedding")
        tsne = TSNE(perplexity=parsed_args.perplexity,
                    learning_rate=parsed_args.learning_rate,
                    verbose=self.app_args.verbose_level)

        embedding = tsne.fit_transform(features)

        self.log.info("plotting embedding")
        self.plot_with_labels(data_set, embedding)
项目:motion-classification    作者:matthiasplappert    | 项目源码 | 文件源码
def _plot_proto_symbol_space(coordinates, target_names, name, args):
    # Reduce to 2D so that we can plot it.
    coordinates_2d = TSNE().fit_transform(coordinates)

    n_samples = coordinates_2d.shape[0]
    x = coordinates_2d[:, 0]
    y = coordinates_2d[:, 1]
    colors = cm.rainbow(np.linspace(0, 1, n_samples))

    fig = plt.figure(1)
    plt.clf()
    ax = fig.add_subplot(111)
    dots = []
    for idx in xrange(n_samples):
        dots.append(ax.plot(x[idx], y[idx], "o", c=colors[idx], markersize=15)[0])
        ax.annotate(target_names[idx],  xy=(x[idx], y[idx]))
    lgd = ax.legend(dots, target_names, ncol=4, numpoints=1, loc='upper center', bbox_to_anchor=(0.5,-0.1))
    ax.grid('on')

    if args.output_dir is not None:
        path = os.path.join(args.output_dir, name + '.pdf')
        print('Saved plot to file "%s"' % path)
        fig.savefig(path, bbox_extra_artists=(lgd,), bbox_inches='tight')
    else:
        plt.show()
项目:gan-error-avoidance    作者:aleju    | 项目源码 | 文件源码
def embed_or_load_cache(codes, gen, r_idx, batch_size, save_path):
    cache_fp = os.path.join(save_path, 'tsne_plots', 'embedded_points_r%02d.csv' % (r_idx,))
    if os.path.isfile(cache_fp):
        lines = open(cache_fp).readlines()
        lines = [line.strip().split(",") for line in lines[1:]]
        vals = [(float(x), float(y)) for (x, y) in lines]
        return np.array(vals, dtype=np.float32)
    else:
        codes_r = generate_codes_by_r(gen, codes, r_idx, batch_size)

        print(codes_r.shape)
        print("Embedding %s via TSNE..." % (str(codes_r.shape),))
        tsne = TSNE(perplexity=40, n_iter=10000, learning_rate=4000, verbose=True)
        #tsne = TSNE(perplexity=40, n_iter=10000, n_jobs=4, verbose=True)
        #tsne = PCA(n_components=2)
        codes_r_2d = tsne.fit_transform(codes_r.astype(np.float64))
        print("shape after embedding: %s" % (str(codes_r_2d.shape),))

        with open(cache_fp, "w") as f:
            f.write("#x,y\n")
            for i in xrange(codes_r.shape[0]):
                f.write("%.6f,%.6f\n" % (codes_r_2d[i, 0], codes_r_2d[i, 1]))
        return codes_r_2d
项目:cancer_nn    作者:tanmoyopenroot    | 项目源码 | 文件源码
def plotInputData(X, Y, title, data_len):

    time_start = time.time()   
    X = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300).fit_transform(X)
    print("After Reduction Data Shape : {0}".format(X.shape))    
    print 't-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start) 

    # Main scatter plot and plot annotation
    f, ax = plt.subplots(figsize=(7, 7))
    ax.scatter(X[:data_len / 2, 0] * 10, X[:data_len / 2, 1] * 10, marker = 'o', color = 'green', s=30, alpha=0.5)
    ax.scatter(X[data_len / 2:, 0] * 10, X[data_len / 2:, 1] * 10, marker = '^', color = 'blue', s=30, alpha=0.5)
    plt.legend(["Melanoma", "Benign"], loc='upper right') 
    plt.title(title)
    plt.ylabel('Y')
    plt.xlabel('X')

    # plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
    # plt.xlabel('X')
    # plt.ylabel('Y')
    # plt.title('SVC Data Plot')
    plt.show()
项目:DLDisambiguation    作者:Labyrinth108    | 项目源码 | 文件源码
def main():
    # model_file = "../data/word2vec/character.model"
    model_file = "../data/word2vec_new/word.model"
    checkSimilarity(model_file, "?")

    # character_wv_file = '../data/word2vec/character_model.txt'
    # word_wv_file = '../data/word2vec/word_model.txt'
    #
    # embeddings_file = word_wv_file
    # wv, vocabulary = load_embeddings(embeddings_file)
    #
    # tsne = TSNE(n_components=2, random_state=0)
    # np.set_printoptions(suppress=True)
    # Y = tsne.fit_transform(wv[:1000, :])
    #
    # plt.scatter(Y[:, 0], Y[:, 1])
    # for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
    #     plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
    # plt.show()
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def __init__(self, ax=None, decompose='svd', decompose_by=50, classes=None,
               colors=None, colormap=None, **kwargs):
        """
        Initialize the TSNE visualizer with visual hyperparameters.
        """
        super(TSNEVisualizer, self).__init__(ax=ax, **kwargs)

        # Visualizer parameters
        self.classes_ = classes
        self.n_instances_ = 0

        # Visual Parameters
        # TODO: Only colors currently works to select the colors of classes.
        self.colors = colors
        self.colormap = colormap

        # TSNE Parameters
        self.transformer_ = self.make_transformer(decompose, decompose_by, kwargs)
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def finalize(self, **kwargs):
        """
        Finalize the drawing by adding a title and legend, and removing the
        axes objects that do not convey information about TNSE.
        """

        # Add a title
        self.set_title(
            "TSNE Projection of {} Documents".format(self.n_instances_)
        )

        # Remove the ticks
        self.ax.set_yticks([])
        self.ax.set_xticks([])

        # Add the legend outside of the figure box.
        if self.classes_:
            box = self.ax.get_position()
            self.ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
            self.ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
项目:anime_recs    作者:Cpierse    | 项目源码 | 文件源码
def tSNE_model(Vt,aid_dict):
    tsne_model = TSNE(n_components=2, verbose=1, random_state=0)
    tsne_V = tsne_model.fit_transform(np.transpose(Vt))
    # Put data in a pandas dataframe:
    tsne_df = pd.DataFrame(tsne_V, columns=['x', 'y'])
    # Save it:
    tsne_df.to_csv('results\\tsne_svd.csv')
    # Get anime names:
    con = sqlite3.connect('user_anime_data.db')
    cur = con.cursor()
    anime_data = cur.execute('SELECT Anime, Name, Score FROM animeData').fetchall()
    anime_data=dict([(x[0],(x[1],x[2])) for x in anime_data])
    anime_names = [anime_data[aid_dict[x]][0] for x in range(Vt.shape[1])]
    anime_scores = [anime_data[aid_dict[x]][1] for x in range(Vt.shape[1])]
    anime_ids = [aid_dict[x] for x in range(Vt.shape[1])]
    tsne_df['anime_name'] = anime_names
    tsne_df['anime_id'] = anime_ids
    tsne_df['rating'] = anime_scores
    return tsne_df

# Plotting the data:
项目:VariationalAutoEncoder    作者:despoisj    | 项目源码 | 文件源码
def computeTSNEProjectionOfLatentSpace(X, encoder, display=True):
    # Compute latent space representation
    print("Computing latent space projection...")
    X_encoded = encoder.predict(X)

    # Compute t-SNE embedding of latent space
    print("Computing t-SNE embedding...")
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    X_tsne = tsne.fit_transform(X_encoded)

    # Plot images according to t-sne embedding
    if display:
        print("Plotting t-SNE visualization...")
        fig, ax = plt.subplots()
        imscatter(X_tsne[:, 0], X_tsne[:, 1], imageData=X, ax=ax, zoom=0.15)
        plt.show()
    else:
        return X_tsne

# Show dataset images with T-sne projection of pixel space
项目:VariationalAutoEncoder    作者:despoisj    | 项目源码 | 文件源码
def computeTSNEProjectionOfPixelSpace(X, display=True):
    # Compute t-SNE embedding of latent space
    print("Computing t-SNE embedding...")
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    X_tsne = tsne.fit_transform(X.reshape([-1,imageSize*imageSize*3]))

    # Plot images according to t-sne embedding
    if display:
        print("Plotting t-SNE visualization...")
        fig, ax = plt.subplots()
        imscatter(X_tsne[:, 0], X_tsne[:, 1], imageData=X, ax=ax, zoom=0.15)
        plt.show()
    else:
        return X_tsne

# Reconstructions for samples in dataset
项目:artificio    作者:ankonzoid    | 项目源码 | 文件源码
def plot_tsne(images, X, filename):

    def imscatter(x, y, images, ax=None, zoom=1.0):
        if ax is None:
            ax = plt.gca()
        x, y = np.atleast_1d(x, y)
        artists = []
        for x0, y0, img0 in zip(x, y, images):
            im = OffsetImage(img0, zoom=zoom)
            ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
            artists.append(ax.add_artist(ab))
        ax.update_datalim(np.column_stack([x, y]))
        ax.autoscale()
        return artists

    def plot_embedding(X, imgs, title=None):
        x_min, x_max = np.min(X, 0), np.max(X, 0)
        X = (X - x_min) / (x_max - x_min)

        plt.figure()
        ax = plt.subplot(111)
        for i in range(X.shape[0]):
            plt.text(X[i, 0], X[i, 1], ".", fontdict={'weight': 'bold', 'size': 9})
        if hasattr(offsetbox, 'AnnotationBbox'):
            imscatter(X[:,0], X[:,1], imgs, zoom=0.1, ax=ax)

        plt.xticks([]), plt.yticks([])
        if title is not None:
            plt.title(title)

    print("Computing t-SNE embedding")
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    X_tsne = tsne.fit_transform(X)
    plot_embedding(X_tsne, images, "t-SNE embedding of images")
    plt.savefig(filename, bbox_inches='tight')

# Driver
项目:NNLM    作者:kanoh-k    | 项目源码 | 文件源码
def plot(self, filename="./corpus/model/blog.png"):
        tsne = TSNE(perplexity=30, n_components=2, init="pca", n_iter=5000)
        plot_only=500
        low_dim_embeddings = tsne.fit_transform(self.final_embeddings[:plot_only, :])
        reversed_dictionary = dict(zip(self.dictionary.values(), self.dictionary.keys()))
        labels = [reversed_dictionary[i] for i in range(plot_only)]

        plt.figure(figsize=(18, 18))
        for i, label in enumerate(labels):
            x, y = low_dim_embeddings[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                        xy=(x, y),
                        xytext=(5, 2),
                        textcoords="offset points",
                        ha="right",
                        va="bottom")
        plt.savefig(filename)
        print("Scatter plot was saved to", filename)
项目:Tensorflow-Turitors    作者:Xls1994    | 项目源码 | 文件源码
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
        plt.figure(figsize=(18, 18))  # in inches
        for i, label in enumerate(labels):
            x, y = low_dim_embs[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(5, 2),
                         textcoords='offset points',
                         ha='right',
                         va='bottom')

        plt.savefig(filename)

# TSNE ????
项目:GEM    作者:palash1992    | 项目源码 | 文件源码
def plot_embedding2D(node_pos, node_colors=None, di_graph=None):
    node_num, embedding_dimension = node_pos.shape
    if(embedding_dimension > 2):
        print("Embedding dimension greater than 2, use tSNE to reduce it to 2")
        model = TSNE(n_components=2)
        node_pos = model.fit_transform(node_pos)

    if di_graph is None:
        # plot using plt scatter
        plt.scatter(node_pos[:, 0], node_pos[:, 1], c=node_colors)
    else:
        # plot using networkx with edge structure
        pos = {}
        for i in range(node_num):
            pos[i] = node_pos[i, :]
        if node_colors:
            nx.draw_networkx_nodes(di_graph, pos,
                                   node_color=node_colors,
                                   width=0.1, node_size=100,
                                   arrows=False, alpha=0.8,
                                   font_size=5)
        else:
            nx.draw_networkx(di_graph, pos, node_color=node_colors,
                             width=0.1, node_size=300, arrows=False,
                             alpha=0.8, font_size=12)
项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def main():
    f = open('label.txt','w')
    #target_names = np.array(args.names)
    X, target_names, y = getXY(args.image_dir)
    X = np.asfarray(X,dtype='float')
    colors = cm.gnuplot2(np.linspace(0, 1, len(target_names)))

    #X_pca = PCA(n_components=128).fit_transform(X)
    X_pca = X
    tsne = TSNE(n_components=2, init='random', random_state=0)
    X_r = tsne.fit_transform(X_pca)

    for c, i, target_name in zip(colors,
                             list(range(0, len(target_names))),
                             target_names):
        plt.scatter(X_r[y[i], 0], X_r[y[i], 1],
                c=c, label=str(i+1))
        f.write(target_name+'\n')
    plt.legend()
    plt.savefig("{}/10crop1.png".format('./'))
    f.close()
项目:Ransome-killer    作者:gau820827    | 项目源码 | 文件源码
def twoDB(Xtrain, Ytrain):


    pca = PCA()
    reduced = pca.fit_transform(Xtrain)


    tsne = TSNE()
    Z = tsne_divide(tsne, reduced, len(Ytrain))


    # Save the PCA vectors
    with open("packet_tsneBinfo"+str(reduce_number), "wb") as f:
        pickle.dump(Z, f)
        pickle.dump(Xtrain, f)
        pickle.dump(Ytrain, f)

    return
项目:Ransome-killer    作者:gau820827    | 项目源码 | 文件源码
def threeDB(Xtrain, Ytrain):


    pca = PCA()
    reduced = pca.fit_transform(Xtrain)

    tsne = TSNE(n_components=3)
    Z = tsne_divide(tsne, reduced, len(Ytrain))


    # Save the PCA vectors
    with open("packet_3DtsneBinfo"+str(reduce_number), "wb") as f:
        pickle.dump(Z, f)
        pickle.dump(Xtrain, f)
        pickle.dump(Ytrain, f)

    return
项目:photinia    作者:XoriieInpottn    | 项目源码 | 文件源码
def main(flags):
    with open(flags.emb_file, 'rb') as f:
        emb_dict = pickle.load(f)
    final_embeddings = []
    words = []
    for k, v in emb_dict.items():
        words.append(k)
        final_embeddings.append(v)
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    low_dim_embs = tsne.fit_transform(final_embeddings[:flags.plot_num])
    labels = words[:flags.plot_num]
    plot_with_labels(low_dim_embs, labels)
    return 0
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def plot_bokeh(df,sublist,filename):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)
    #cannot use seaborn palette for bokeh
    palette =['red','green','blue','yellow']
    colors =[]
    for i in range(len(sublist)):
        for j in range(lenlist[i+1]-lenlist[i]):
            colors.append(palette[i])
    #plot with boken
    output_file(filename)
    source = ColumnDataSource(
            data=dict(x=tsne[:,0],y=tsne[:,1],
                cuisine = df_sub['cuisine'],
                recipe = df_sub['recipeName']))

    hover = HoverTool(tooltips=[
                ("cuisine", "@cuisine"),
                ("recipe", "@recipe")])

    p = figure(plot_width=1000, plot_height=1000, tools=[hover],
               title="flavor clustering")

    p.circle('x', 'y', size=10, source=source,fill_color=colors)

    show(p)
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
    # markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
    markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
    plt.rc('legend',**{'fontsize':30})
    classes_to_visual = list(set(classes_to_visual))
    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers

    class_ids = dict(zip(classes_to_visual, range(C)))

    if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
        codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
    else:
        codes, labels = doc_codes, doc_labels

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    plt.figure(figsize=(10, 10), facecolor='white')

    for c in classes_to_visual:
        idx = np.array(labels) == c
        # idx = get_indices(labels, c)
        plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=1, marker=markers[class_ids[c]],
                        markersize=10, label=c)
    legend = plt.legend(loc='upper right', shadow=True)
    # plt.title("tsne")
    # plt.savefig(save_file)
    plt.savefig(save_file, format='eps', dpi=2000)
    plt.show()
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def plot_tsne_3d(doc_codes, doc_labels, classes_to_visual, save_file, maker_size=None, opaque=None):
    markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
    plt.rc('legend',**{'fontsize':20})
    colors = ['r', 'b', 'g', 'c', 'm', 'y', 'k']
    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers
    while True:
        if C <= len(colors):
            break
        colors += colors

    class_ids = dict(zip(classes_to_visual, range(C)))

    if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
        codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
    else:
        codes, labels = doc_codes, doc_labels

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=3, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    fig = plt.figure(figsize=(10, 10), facecolor='white')
    ax = fig.add_subplot(111, projection='3d')

    # The problem is that the legend function don't support the type returned by a 3D scatter.
    # So you have to create a "dummy plot" with the same characteristics and put those in the legend.
    scatter_proxy = []
    for i in range(C):
        cls = classes_to_visual[i]
        idx = np.array(labels) == cls
        ax.scatter(X[idx, 0], X[idx, 1], X[idx, 2], c=colors[i], alpha=opaque[i] if opaque else 1, s=maker_size[i] if maker_size else 20, marker=markers[i], label=cls)
        scatter_proxy.append(mpl.lines.Line2D([0],[0], linestyle="none", c=colors[i], marker=markers[i], label=cls))
    ax.legend(scatter_proxy, classes_to_visual, numpoints=1)
    plt.savefig(save_file)
    plt.show()
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def DBN_plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
    markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]

    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers

    class_ids = dict(zip(classes_to_visual.keys(), range(C)))

    codes, labels = doc_codes, doc_labels

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    plt.figure(figsize=(10, 10), facecolor='white')

    for c in classes_to_visual.keys():
        idx = np.array(labels) == c
        # idx = get_indices(labels, c)
        plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
                        markersize=6, label=classes_to_visual[c])
    legend = plt.legend(loc='upper center', shadow=True)
    plt.title("tsne")
    plt.savefig(save_file)
    plt.show()
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def reuters_visualize_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
    """
        Visualize the input data on a 2D PCA plot. Depending on the number of components,
        the plot will contain an X amount of subplots.
        @param doc_codes:
        @param number_of_components: The number of principal components for the PCA plot.
    """

    # markers = ["p", "s", "h", "H", "+", "x", "D"]
    markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]

    C = len(classes_to_visual)
    while True:
        if C <= len(markers):
            break
        markers += markers

    class_names = classes_to_visual.keys()
    class_ids = dict(zip(class_names, range(C)))
    class_names = set(class_names)
    codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if class_names.intersection(set(doc_labels[doc]))])

    X = np.r_[list(codes)]
    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    np.set_printoptions(suppress=True)
    X = tsne.fit_transform(X)

    plt.figure(figsize=(10, 10), facecolor='white')

    for c in classes_to_visual.keys():
        idx = get_indices(labels, c)
        plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
                        markersize=6, label=classes_to_visual[c])
    legend = plt.legend(loc='upper center', shadow=True)
    plt.title("tsne")
    plt.savefig(save_file)
    plt.show()
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def fit_tsne(values):
    if not values:
        return

    start = time.time()
    mat = np.array(values)
    model = TSNE(n_components=2, random_state=0, learning_rate=150, init='pca')
    fitted = model.fit_transform(mat)
    print "FIT TSNE TOOK %s" % (time.time() - start)

    return fitted
项目:agent-trainer    作者:lopespm    | 项目源码 | 文件源码
def save_visualization_to_image(self, inputs, outputs, folder_path_for_result_image):
        print("Computing t-SNE embedding")
        x = np.array([state.reshape(-1, ) for state in inputs])
        y = outputs
        tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
        x_tsne = tsne.fit_transform(x)
        self._tsne_plot_embedding(x=x_tsne,
                                  y=y,
                                  inputs=inputs,
                                  path_result_image=os.path.join(folder_path_for_result_image, "t-SNE.png"))
项目:keras-molecules    作者:maxhodak    | 项目源码 | 文件源码
def visualize_latent_rep(args, model, x_latent):
    print("pca_on=%r pca_comp=%d tsne_comp=%d tsne_perplexity=%f tsne_lr=%f" % (
        args.use_pca,
        args.pca_components,
        args.tsne_components,
        args.tsne_perplexity,
        args.tsne_lr
    ))

    if args.use_pca:
        pca = PCA(n_components = args.pca_components)
        x_latent = pca.fit_transform(x_latent)

    figure(figsize=(6, 6))
    scatter(x_latent[:, 0], x_latent[:, 1], marker='.')
    show()

    tsne = TSNE(n_components = args.tsne_components,
                perplexity = args.tsne_perplexity,
                learning_rate = args.tsne_lr,
                n_iter = args.tsne_iterations,
                verbose = 4)
    x_latent_proj = tsne.fit_transform(x_latent)
    del x_latent

    figure(figsize=(6, 6))
    scatter(x_latent_proj[:, 0], x_latent_proj[:, 1], marker='.')
    show()
项目:dcss_single_cell    作者:srmcc    | 项目源码 | 文件源码
def tSNE_pairwise(D):
    """
    From clustering_on_transcript_compatibility_counts, see github for MIT license
    """
    tsne = manifold.TSNE(n_components=2, random_state=0, metric='precomputed', n_iter=2000, verbose=1);
    X_tsne = tsne.fit_transform(D);
    return X_tsne

# Plot function with Zeisel's colors corresponding to labels
项目:Dragonfly    作者:duaneloh    | 项目源码 | 文件源码
def do_embedding(self, event=None):
        converted = self.parent.converted
        if converted is None:
            #self.conversion.convert_frames()
            self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging
            converted = self.parent.converted

        method_ind = self.method.currentIndex()
        print('Doing %s' % self.method.currentText())
        if method_ind == 0:
            self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1)
        elif method_ind == 1:
            self.embedder = manifold.Isomap(n_components=4, n_jobs=-1)
        elif method_ind == 2:
            self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified')
        elif method_ind == 3:
            self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense')
        elif method_ind == 4:
            self.embedder = manifold.MDS(n_components=4, n_jobs=-1)
        elif method_ind == 5:
            self.embedder = manifold.TSNE(n_components=3, init='pca')
        self.embedder.fit(converted)
        self.embed = self.embedder.embedding_
        self.embed_plot = self.embed

        self.gen_hist()
        self.plot_embedding()
        if not self.embedded:
            self.add_classes_frame()
        self.embedded = True
项目:smiles-neural-network    作者:PMitura    | 项目源码 | 文件源码
def visualize2D(model, layerID, inputData, labels, withTime = False):
    print("\n  Generating output distribution for layer {}".format(layerID))
    vLayer = K.function([model.layers[0].input], [model.layers[layerID].output])
    result = vLayer([inputData])

    values = []
    for instance in result:
        for line in instance:
            array = []
            for val in line:
                if withTime:
                    for deepVal in val:
                        array.append(deepVal)
                else:
                    array.append(val)
            values.append(array)
    npvalues = np.array(values)

    model = TSNE(n_components = 2, random_state = 0)
    # model = PCA(n_components = 2)
    scatterValues = model.fit_transform(npvalues)
    labels2D = np.zeros((len(labels), 1))
    for i in range(len(labels)):
        labels2D[i][0] = labels[i]
    scatterValues = np.hstack((scatterValues, labels2D))

    dFrame = pd.DataFrame(scatterValues, columns = ('a', 'b', 'c'))
    plot = dFrame.plot.scatter(x = 'a', y = 'b', c = 'c', cmap = 'plasma')
    fig = plot.get_figure()
    fig.savefig('{}/{}'.format(cc.cfg['plots']['dir'],SCATTER_NAME))

    print("  ...done")
项目:texta    作者:texta-tk    | 项目源码 | 文件源码
def index(request):
    if 'model' not in request.session:
        return HttpResponseRedirect(URL_PREFIX + '/')
    template = loader.get_template('conceptualiser.html')

    lexicons = []

    for lexicon in Lexicon.objects.all().filter(author=request.user):
        setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count())
        lexicons.append(lexicon)

    methods = ["PCA","TSNE","MDS"]

    return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods},request))
项目:FYP-AutoTextSum    作者:MrRexZ    | 项目源码 | 文件源码
def generate_tsne(self, path="glove/model/model", size=(100, 100), word_count=1000, embeddings=None):
        if embeddings is None:
            embeddings = self.embeddings
        from sklearn.manifold import TSNE
        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
        low_dim_embs = tsne.fit_transform(numpy.asarray(list(embeddings.values())))
        labels = self.words[:word_count]
        return _plot_with_labels(low_dim_embs, labels, path, size)
项目:Vulcan    作者:rfratila    | 项目源码 | 文件源码
def display_tsne(train_x, train_y, label_map=None):
    """
    t-distributed Stochastic Neighbor Embedding (t-SNE) visualization [1].

    [1]: Maaten, L., Hinton, G. (2008). Visualizing Data using t-SNE.
            JMLR 9(Nov):2579--2605.

    Args:
        train_x: 2d numpy array (batch, features) of samples
        train_y: 2d numpy array (batch, labels) for samples
        label_map: a dict of labelled (str(int), string) key, value pairs
    """
    tsne = TSNE(n_components=2, random_state=0)
    x_transform = tsne.fit_transform(train_x)
    y_unique = np.unique(train_y)
    if label_map is None:
        label_map = {str(i): str(i) for i in y_unique}
    elif not isinstance(label_map, dict):
        raise ValueError('label_map most be a dict of a key'
                         ' mapping to its true label')
    colours = plt.cm.rainbow(np.linspace(0, 1, len(y_unique)))
    plt.figure()
    for index, cl in enumerate(y_unique):
        plt.scatter(x=x_transform[train_y == cl, 0],
                    y=x_transform[train_y == cl, 1],
                    s=100,
                    c=colours[index],
                    marker='o',
                    edgecolors='none',
                    label=label_map[str(cl)])
    plt.xlabel('X in t-SNE')
    plt.ylabel('Y in t-SNE')
    plt.legend(loc='upper right')
    plt.title('t-SNE visualization')
    plt.show(False)
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def main():
    model = TSNE(n_components=2)
    countries = dictdata(getCountrydict())
    result = model.fit_transform(countries.getData())
    hidden, graph = plt.subplots()
    graph.scatter(result[:, 0], result[:, 1], s=1)
    for i, country in enumerate(countries.getName()):
        graph.annotate(country, xy=(result[i, 0], result[i, 1]), size=10)
    plt.show()
项目:pandora    作者:mikekestemont    | 项目源码 | 文件源码
def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'):
        # collect embeddings for mfi:
        X = np.asarray([self.w2v_model[w] for w in self.mfi \
                            if w in self.w2v_model], dtype='float32')
        # dimension reduction:
        tsne = TSNE(n_components=2)
        coor = tsne.fit_transform(X) # unsparsify

        plt.clf()
        sns.set_style('dark')
        sns.plt.rcParams['axes.linewidth'] = 0.4
        fig, ax1 = sns.plt.subplots()  

        labels = self.mfi
        # first plot slices:
        x1, x2 = coor[:,0], coor[:,1]
        ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none')
        # clustering on top (add some colouring):
        clustering = AgglomerativeClustering(linkage='ward',
                            affinity='euclidean', n_clusters=nb_clusters)
        clustering.fit(coor)
        # add names:
        for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_):
            ax1.text(x, y, name, ha='center', va="center",
                     color=plt.cm.spectral(cluster_label / 10.),
                     fontdict={'family': 'Arial', 'size': 8})
        # control aesthetics:
        ax1.set_xlabel('')
        ax1.set_ylabel('')
        ax1.set_xticklabels([])
        ax1.set_xticks([])
        ax1.set_yticklabels([])
        ax1.set_yticks([])
        sns.plt.savefig(outputfile, bbox_inches=0)
项目:wtfrnn    作者:juliakreutzer    | 项目源码 | 文件源码
def main(_):
  """Train a word2vec model."""
  if not FLAGS.train_data or not FLAGS.save_path:
    print("--train_data and --save_path must be specified.")
    sys.exit(1)
  opts = Options()
  with tf.Graph().as_default(), tf.Session() as session:
    model = Word2Vec(opts, session)
    for _ in xrange(opts.epochs_to_train):
      model.train()  # Process one epoch
    # Perform a final save.
    model.saver.save(session,
                     os.path.join(opts.save_path, opts.name+".model.base.ckpt"),
                     global_step=model.global_step)
    model.nearby(['Switzerland'])

    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
    plot_only = len(model._id2word)
    final_embeddings = model._emb.eval(session)
    print(final_embeddings)
    pkl.dump(final_embeddings,open("embeddings/"+opts.name+".emb.base.pkl","wb"))
    pkl.dump(model._word2id, open("dicts/"+opts.name+".w2i.base.pkl","wb"))
    pkl.dump(model._id2word, open("dicts/"+opts.name+".i2w.base.pkl","wb"))
    low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
    print(low_dim_embs)
   # print(zip(model._id2word.iteritems(),low_dim_embs))
    labels = [model._id2word[i] for i in xrange(plot_only)]
    plot_with_labels(low_dim_embs, labels,"plots/"+opts.name+".tsne.base.png")

    if FLAGS.interactive:
      # E.g.,
      # [0]: model.analogy('france', 'paris', 'russia')
      # [1]: model.nearby(['proton', 'elephant', 'maxwell'])
      _start_shell(locals())
项目:visualize-tsne    作者:YontiLevin    | 项目源码 | 文件源码
def calculate_tsne(self):
        self._perform_svd()
        if self.method == SKLEARN:
            tsne_vectors = TSNE(n_components=2, perplexity=40, verbose=2).fit_transform(self.data_vectors)
        else:
            tsne_vectors = MATTENS_TSNE(self.data_vectors, no_dims=2, initial_dims=self.data_vectors.shape[1],
                                        perplexity=40.0)
        self.tsne_vectors = tsne_vectors
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def main():
    tsne = TSNE(perplexity=40)
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1])
    for i in xrange(D):
        plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1]))
    plt.show()
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def main():
    X, Y = get_donut_data()

    plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
    plt.show()

    tsne = TSNE(perplexity=40)
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
    plt.show()
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def main():
    Xtrain, Ytrain, _, _ = getKaggleMNIST()

    sample_size = 1000
    X = Xtrain[:sample_size]
    Y = Ytrain[:sample_size]

    tsne = TSNE()
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
    plt.show()
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def main():
    Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()
    dbn = DBN([1000, 750, 500], UnsupervisedModel=AutoEncoder)
    # dbn = DBN([1000, 750, 500, 10])
    output = dbn.fit(Xtrain, pretrain_epochs=2)
    print "output.shape", output.shape

    # sample before using t-SNE because it requires lots of RAM
    sample_size = 600
    tsne = TSNE()
    reduced = tsne.fit_transform(output[:sample_size])
    plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain[:sample_size], alpha=0.5)
    plt.title("t-SNE visualization")
    plt.show()

    # t-SNE on raw data
    reduced = tsne.fit_transform(Xtrain[:sample_size])
    plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain[:sample_size], alpha=0.5)
    plt.title("t-SNE visualization")
    plt.show()

    pca = PCA()
    reduced = pca.fit_transform(output)
    plt.scatter(reduced[:,0], reduced[:,1], s=100, c=Ytrain, alpha=0.5)
    plt.title("PCA visualization")
    plt.show()
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def main():
    X, Y = get_xor_data()

    plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
    plt.show()

    tsne = TSNE(perplexity=40)
    Z = tsne.fit_transform(X)
    plt.scatter(Z[:,0], Z[:,1], s=100, c=Y, alpha=0.5)
    plt.show()
项目:laughter    作者:ganesh-srinivas    | 项目源码 | 文件源码
def main():
    audio_embeddings_dict = cPickle.load(open(AUDIO_EMBEDDINGS_DICT, 'rb'))
    audio_label_indices_dict = cPickle.load(open(AUDIO_LABEL_INDICES_DICT, 'rb'))

    X = []
    ids = []
    for k in audio_embeddings_dict.keys()[:EXAMPLES_SIZE_LIMIT]:
       for embedding in audio_embeddings_dict[k]:
           X.append(embedding) 
           ids.append(audio_label_indices_dict[k])

    # Apply t-SNE
    tsne = TSNE(n_components=N_COMPONENTS, perplexity=PERPLEXITY, \
                learning_rate=LEARNING_RATE, n_iter=N_ITER)
    Xtransformed = tsne.fit_transform(X)

    # save the embeddings along with the list of class IDs associated with
    # the clip from which it was taken.

    # Header for output file
    if N_COMPONENTS == 2:
        output_lines = ["dim1,dim2,labels"]
    elif N_COMPONENTS == 3:
        output_lines = ["dim1,dim2,dim3,labels"]

    for i in range(len(Xtransformed)):
        output_lines.append(",".join([str(j) for j in Xtransformed[i]])+ \
                            "," + ",".join([str(k) for k in ids[i]]))

    output_file_contents = "\n".join(output_lines) 
    with open(OUTPUT_FILENAME, 'w') as fh:
        fh.write(output_file_contents)