我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用sklearn.manifold.MDS。
def plot_MDS(*data): ''' graph after MDS :param data: train_data, train_value :return: None ''' X,y=data mds=manifold.MDS(n_components=2) X_r=mds.fit_transform(X) ### graph fig=plt.figure() ax=fig.add_subplot(1,1,1) colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label),color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("MDS") plt.show()
def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs): """ input: N x F dataframe of observations output: N x n_dim image of input data under lens function """ if n_dim != 2: raise 'error: image of data set must be two-dimensional' if dist not in ['euclidean', 'correlation']: raise 'error: only euclidean and correlation distance metrics are supported' if lens == 'pca' and dist != 'euclidean': raise 'error: PCA requires the use of euclidean distance metric' if lens == 'pca': df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index) elif lens == 'mds': D = metrics.pairwise.pairwise_distances(df, metric=dist) df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index) elif lens == 'neighbor': D = metrics.pairwise.pairwise_distances(df, metric=dist) df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index) else: raise 'error: only PCA, MDS, neighborhood lenses are supported' return df_lens
def infer_clusters(contactMat, clusters, offsets, alpha, classical=False): """Infers 3D coordinates for multiple clusters with same contact matrix""" assert sum([len(cluster.getPointNums()) for cluster in clusters]) == len(contactMat) at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat, alpha) at.makeSymmetric(distMat) if classical: #classical MDS coords = st.cmds(distMat) else: mds = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=-1) coords = mds.fit_transform(distMat) for offset, cluster in zip(offsets, clusters): for i in range(len(cluster.getPoints())): cluster.getPoints()[i].pos = coords[i + offset]
def infer_cluster(contactMat, cluster, alpha, classical=False): """Infers 3D coordinates for one cluster""" assert len(cluster.getPointNums()) == len(contactMat) at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat, alpha) at.makeSymmetric(distMat) if classical: #classical MDS coords = st.cmds(distMat) else: mds = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=-1) coords = mds.fit_transform(distMat) for i in range(len(cluster.getPoints())): cluster.getPoints()[i].pos = coords[i]
def fullMDS(path, classical, alpha): """MDS without partitioning""" cluster = dt.clusterFromBed(path, None, None) contactMat = dt.matFromBed(path, cluster) infer_cluster(contactMat, cluster, alpha, classical) return cluster
def visualize_tweets(W, topic_number, color): ''' INPUT - W matrix of observations - topic_number - this is the number of the topic to be checked - color - this is the color to be used in creating the scatterplot OUTPUT - a scatter plot of the relative location of the different topics from each other in a flattened space using multidimensional scaling Returns none ''' # mds = MDS(n_jobs=-1) topic_list = np.apply_along_axis(np.argmax, 1, W) Wsubset = W[topic_list == topic_number] pca = PCA(n_components=2) pca = PCA(n_components=2) hflat = pca.fit_transform(Wsubset) plt.scatter(hflat[:, 0], hflat[:, 1], color=color, alpha=.1) plt.title('these are the {} tweets in topic # {}'.format(Wsubset.shape[0], topic_number+1)) # plt.show()
def embedding(vi_mat,LL,n_neighbors=10): n_components=2 Y = manifold.MDS(n_components,dissimilarity='precomputed').fit_transform(vi_mat) color=np.zeros(1000) color[:6]=np.ones(6) #~ plt.figure() #~ plt.plot(Y[:, 0], Y[:, 1], 'k.') #~ plt.plot(Y[-n_close:, 0], Y[-n_close:, 1], 'r.') #~ for i in xrange(6): #~ plt.plot(Y[i, 0], Y[i, 1], 'bo',ms=3+3*i) #~ plt.scatter(Y[:, 0], Y[:, 1], c=LL) fig=plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(Y[:, 0], Y[:, 1], LL[:,0], c=LL[:,0], marker='o') fig=plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(Y[:, 0], Y[:, 1], LL[:,1], c=LL[:,1], marker='o') return Y ################################################################# #load known partitions
def do_embedding(self, event=None): converted = self.parent.converted if converted is None: #self.conversion.convert_frames() self.parent.converted = np.load(self.parent.output_folder+'/converted.npy') #FIXME For debugging converted = self.parent.converted method_ind = self.method.currentIndex() print('Doing %s' % self.method.currentText()) if method_ind == 0: self.embedder = manifold.SpectralEmbedding(n_components=4, n_jobs=-1) elif method_ind == 1: self.embedder = manifold.Isomap(n_components=4, n_jobs=-1) elif method_ind == 2: self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='modified') elif method_ind == 3: self.embedder = manifold.LocallyLinearEmbedding(n_components=4, n_jobs=-1, n_neighbors=20, method='hessian', eigen_solver='dense') elif method_ind == 4: self.embedder = manifold.MDS(n_components=4, n_jobs=-1) elif method_ind == 5: self.embedder = manifold.TSNE(n_components=3, init='pca') self.embedder.fit(converted) self.embed = self.embedder.embedding_ self.embed_plot = self.embed self.gen_hist() self.plot_embedding() if not self.embedded: self.add_classes_frame() self.embedded = True
def index(request): if 'model' not in request.session: return HttpResponseRedirect(URL_PREFIX + '/') template = loader.get_template('conceptualiser.html') lexicons = [] for lexicon in Lexicon.objects.all().filter(author=request.user): setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count()) lexicons.append(lexicon) methods = ["PCA","TSNE","MDS"] return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods},request))
def encode(self, data, metric='euclidean'): """ Employ a nearest-neighbor rule to encode the given ``data`` using the codebook. Parameters ---------- data : real array-like, shape(n_samples, n_features) Data matrix, each row represents a sample. metric : string One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html. Valid options include: - euclidean - cityblock - l1 - cosine Returns ------- encoded_data : real array-like, shape(n_samples, n_features) ``data``, as represented by the prototypes in codebook. ts_symbols : list, shape(n_samples, 1) A discrete symbolic time series """ # Perform a proposed data mining procedure as described in [Laskaris2004]. mds = MDS(1, random_state=self.rng) protos_1d = mds.fit_transform(self.protos).ravel() sorted_protos_1d = np.argsort(protos_1d) sprotos = self.protos[sorted_protos_1d] nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto', metric=metric).fit(sprotos) _, self.__symbols = nbrs.kneighbors(data) self.__encoding = sprotos[self.__symbols] return (self.__encoding, self.__symbols)
def plot_demo_1(): X = np.c_[np.ones(5), 2 * np.ones(5), 10 * np.ones(5)].T y = np.array([0, 1, 2]) fig = pylab.figure(figsize=(10, 4)) ax = fig.add_subplot(121, projection='3d') ax.set_axis_bgcolor('white') mds = manifold.MDS(n_components=3) Xtrans = mds.fit_transform(X) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black') pylab.title("MDS on example data set in 3 dimensions") ax.view_init(10, -15) mds = manifold.MDS(n_components=2) Xtrans = mds.fit_transform(X) ax = fig.add_subplot(122) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black') pylab.title("MDS on example data set in 2 dimensions") filename = "mds_demo_1.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
def main(): parser = argparse.ArgumentParser(description="Reconstruct 3D coordinates from normalized intrachromosomal Hi-C BED files.") parser.add_argument("path", help="path to intrachromosomal Hi-C BED file") parser.add_argument("--classical", action="store_true", help="use classical MDS (default: metric MDS)") parser.add_argument("-l", help="path to low-resolution intrachromosomal Hi-C BED file") parser.add_argument("-p", type=float, default=0.1, help="domain size parameter: larger value means fewer clusters created (for partitioned MDS only)") parser.add_argument("-m", type=float, default=0.05, help="minimum domain size parameter: prevents clusters from being too small (for partitioned MDS only)") parser.add_argument("-o", help="path to output file") parser.add_argument("-r", default=32000000, help="maximum RAM to use (in kb)") parser.add_argument("-n", default=3, help="number of threads") parser.add_argument("-a", type=float, default=4, help="alpha factor for converting contact frequencies to physical distances") args = parser.parse_args() if args.l is None: #not partitioned cluster = fullMDS(args.path, args.classical, args.a) else: #partitioned params = (args.p, args.m, args.r, args.n, args.a) names = ("Domain size parameter", "Minimum domain size", "Maximum memory", "Number of threads", "Alpha") intervals = ((0,1), (0,1), (0, None), (0, None), (1, None)) if not tools.args_are_valid(params, names, intervals): sys.exit(0) cluster = partitionedMDS(args.path, args.l, params) if args.o is not None: cluster.write(args.o)
def mds(dataset, labels, attNames, **kwargs): mds = manifold.MDS(n_components=2, max_iter=300) trained = mds.fit_transform(dataset) plot(trained, labels, attNames, **kwargs) # Assignment 2
def mds_variance_explained(corrmat, mds_coords): """Determine how much variance is explained by projection onto MDS coords.""" orig_dist = (1 - corrmat)[np.triu_indices_from(corrmat, 1)] mds_dist = distance.pdist(mds_coords) r, _ = stats.pearsonr(orig_dist, mds_dist) return r ** 2
def get_twodim_reps(reps, seed, distance=euclidean_distances): reps = reps.astype(np.float64) similarities = distance(reps) mds = MDS(n_components=2, dissimilarity="precomputed", random_state=seed) return mds.fit(similarities).embedding_
def visualize_topics(H): ''' INPUT - H matrix of topics OUTPUT - a scatter plot of the relative location of the different topics from each other in a flattened space using PCA - color_list - the list of colors to be used in the next visualizations of the tweets Returns the color list ''' mds = MDS(n_jobs=-1) # pca = PCA(n_components=2) # hflat = pca.fit_transform(H) hflat = mds.fit_transform(H) # colors = cm.rainbow(hflat.shape[0]-1) colors = cycle(["r", "b", "g", "c", "m", "y", "k", "w"]) color_list = [] for i, row in enumerate(hflat): color = next(colors) plt.scatter(row[0], row[1], label='topic number {}'.format(i+1), color=color) color_list.append(color) plt.legend(loc='best') plt.show() return color_list, mds
def test_MDS(*data): ''' test MDS method :param data: train_data, train_value :return: None ''' X,y=data for n in [4,3,2,1]: mds=manifold.MDS(n_components=n) mds.fit(X) print('stress(n_components={0}) : {1}'.format (n, str(mds.stress_)))
def smacof_mds(C, dim, max_iter=3000, eps=1e-9): """ Returns an interpolated point cloud following the dissimilarity matrix C using SMACOF multidimensional scaling (MDS) in specific dimensionned target space Parameters ---------- C : ndarray, shape (ns, ns) dissimilarity matrix dim : int dimension of the targeted space max_iter : int Maximum number of iterations of the SMACOF algorithm for a single run eps : float relative tolerance w.r.t stress to declare converge Returns ------- npos : ndarray, shape (R, dim) Embedded coordinates of the interpolated point cloud (defined with one isometry) """ rng = np.random.RandomState(seed=3) mds = manifold.MDS( dim, max_iter=max_iter, eps=1e-9, dissimilarity='precomputed', n_init=1) pos = mds.fit(C).embedding_ nmds = manifold.MDS( 2, max_iter=max_iter, eps=1e-9, dissimilarity="precomputed", random_state=rng, n_init=1) npos = nmds.fit_transform(C, init=pos) return npos ############################################################################## # Data preparation # ---------------- # # The four distributions are constructed from 4 simple images
def load_terms(request): lexicon_ids = json.loads(request.POST['lids']) try: model = model_manager.get_model(request.session['model']).model except LookupError as e: return HttpResponseRedirect(URL_PREFIX + '/') if model.wv.syn0norm is None: model.init_sims() words = [word for word in Word.objects.filter(lexicon__id__in = lexicon_ids) if word.wrd.encode('utf-8') in model.wv.vocab] feature_vectors = [model.wv.syn0norm[model.wv.vocab[word.wrd.encode('utf-8')].index] for word in words] output = {'terms':[],'concepts':[]} if len(feature_vectors): X = np.array(feature_vectors) if request.POST['method'] == 'TSNE': transformer = TSNE(n_components=2, random_state=0,metric='cosine',learning_rate=50) elif request.POST['method'] == 'MDS': transformer = MDS(n_components=2, max_iter=600,dissimilarity="precomputed", n_jobs=1) X = pairwise_distances(X,metric='cosine',n_jobs=1) else: transformer = PCA(n_components=2) transformed_feature_vectors = transformer.fit_transform(X).tolist() terms = [] concepts = {} for i in range(len(words)): term = {'id':words[i].id,'term':words[i].wrd,'count':model.wv.vocab[words[i].wrd.encode('utf-8')].count,'x':transformed_feature_vectors[i][0] if len(feature_vectors) > 1 else 0,'y':transformed_feature_vectors[i][1] if len(feature_vectors) > 1 else 0} term_concepts = TermConcept.objects.filter(term__term = words[i].wrd).filter(concept__author = request.user) if term_concepts: concept_id = term_concepts[0].concept.id descriptive_term = term_concepts[0].concept.descriptive_term.term descriptive_term_id = term_concepts[0].concept.descriptive_term.id if concept_id not in concepts: concepts[concept_id] = {'id':concept_id,'terms':[],'descriptive_term':descriptive_term,'descriptive_term_id':Word.objects.filter(wrd=descriptive_term)[0].id} concepts[concept_id]['terms'].append(term) else: terms.append(term) output['terms'].extend(terms) output['concepts'].extend([concepts[concept_id] for concept_id in concepts]) logging.getLogger(INFO_LOGGER).info(json.dumps({'process':'CREATE CONCEPTS','event':'terms_loaded','args':{'user_name':request.user.username,'lexicon_ids':lexicon_ids,'dim_red_method':request.POST['method']}})) else: logging.getLogger(INFO_LOGGER).warning(json.dumps({'process':'CREATE CONCEPTS','event':'term_loading_failed','args':{'user_name':request.user.username,'lexicon_ids':lexicon_ids,'dim_red_method':request.POST['method']},'reason':'No terms to load.'})) return HttpResponse(json.dumps(output), content_type='application/json')
def plot_iris_mds(): iris = datasets.load_iris() X = iris.data y = iris.target # MDS fig = pylab.figure(figsize=(10, 4)) ax = fig.add_subplot(121, projection='3d') ax.set_axis_bgcolor('white') mds = manifold.MDS(n_components=3) Xtrans = mds.fit_transform(X) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black') pylab.title("MDS on Iris data set in 3 dimensions") ax.view_init(10, -15) mds = manifold.MDS(n_components=2) Xtrans = mds.fit_transform(X) ax = fig.add_subplot(122) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black') pylab.title("MDS on Iris data set in 2 dimensions") filename = "mds_demo_iris.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight") # PCA fig = pylab.figure(figsize=(10, 4)) ax = fig.add_subplot(121, projection='3d') ax.set_axis_bgcolor('white') pca = decomposition.PCA(n_components=3) Xtrans = pca.fit(X).transform(X) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black') pylab.title("PCA on Iris data set in 3 dimensions") ax.view_init(50, -35) pca = decomposition.PCA(n_components=2) Xtrans = pca.fit_transform(X) ax = fig.add_subplot(122) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black') pylab.title("PCA on Iris data set in 2 dimensions") filename = "pca_demo_iris.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
def validate(classifier, train, test, args, report_base_name): print('\nTraining classifier on %d samples ...' % len(train.X)) start = timeit.default_timer() classifier.fit(train.X, train.y) stop = timeit.default_timer() print('Classifier trained, took %f seconds' % (stop - start)) for method in args.loglikelihood_methods: report_name = report_base_name + '_' + method if args.calculate_distances: print('\nCalculating distances ...') start = timeit.default_timer() distances = classifier.distances(loglikelihood_method=method, n_samples=500) print('Distances calculated, took %f seconds' % (timeit.default_timer() - start)) report = _generate_distance_reports(distances, target_names=train.target_names) _handle_report(report, report_name + '_distances', args) # Calculate proto symbol space #mds = MDS(n_components=5, dissimilarity='precomputed') #coordinates = mds.fit_transform(distances) #_plot_proto_symbol_space(coordinates, train.target_names, report_name + '_scatter', args) # Get loglikelihoods for train set print('\nValidating classifier on training set with %d samples ...' % len(train.X)) loglikelihoods_train = _calculate_loglikelihoods(classifier, train.X, method) report = _generate_loglikelihood_reports(loglikelihoods_train, train.y, target_names=train.target_names) _handle_report(report, report_name + '_train_loglikelihoods', args) # Fit decision makers loglikelihoods_test = None for idx, decision_maker in enumerate(get_decision_makers(args)): if decision_maker is not None: name = args.decision_makers[idx] if hasattr(decision_maker, 'fit') and callable(getattr(decision_maker, 'fit')): print('\nTraining decision maker %s on %d loglikelihoods ...' % (name, len(loglikelihoods_train))) decision_maker.fit(loglikelihoods_train, train.y) print('Decision maker trained, took %f seconds' % (stop - start)) else: print('\nUsing decision maker %s ...' % name) y_pred = _calculate_predictions(decision_maker, loglikelihoods_train) report = _generate_classification_reports(train.y, y_pred, target_names=train.target_names) _handle_report(report, report_name + '_train_classification_' + name, args) # Validate on test set print('\nValidating classifier on test set with %d samples ...' % len(test.X)) if loglikelihoods_test is None: loglikelihoods_test = _calculate_loglikelihoods(classifier, test.X, method) report = _generate_loglikelihood_reports(loglikelihoods_test, test.y, target_names=test.target_names) _handle_report(report, report_name + '_test_loglikelihoods', args) if decision_maker is not None: y_pred = _calculate_predictions(decision_maker, loglikelihoods_test) report = _generate_classification_reports(test.y, y_pred, target_names=test.target_names) _handle_report(report, report_name + '_test_classification_' + name, args)
def plot_clusters(num_clusters, feature_matrix, cluster_data, movie_data, plot_size=(16,8)): # generate random color for clusters def generate_random_color(): color = '#%06x' % random.randint(0, 0xFFFFFF) return color # define markers for clusters markers = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd'] # build cosine distance matrix cosine_distance = 1 - cosine_similarity(feature_matrix) # dimensionality reduction using MDS mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1) # get coordinates of clusters in new low-dimensional space plot_positions = mds.fit_transform(cosine_distance) x_pos, y_pos = plot_positions[:, 0], plot_positions[:, 1] # build cluster plotting data cluster_color_map = {} cluster_name_map = {} for cluster_num, cluster_details in cluster_data.items(): # assign cluster features to unique label cluster_color_map[cluster_num] = generate_random_color() cluster_name_map[cluster_num] = ', '.join(cluster_details['key_features'][:5]).strip() # map each unique cluster label with its coordinates and movies cluster_plot_frame = pd.DataFrame({'x': x_pos, 'y': y_pos, 'label': movie_data['Cluster'].values.tolist(), 'title': movie_data['Title'].values.tolist() }) grouped_plot_frame = cluster_plot_frame.groupby('label') # set plot figure size and axes fig, ax = plt.subplots(figsize=plot_size) ax.margins(0.05) # plot each cluster using co-ordinates and movie titles for cluster_num, cluster_frame in grouped_plot_frame: marker = markers[cluster_num] if cluster_num < len(markers) \ else np.random.choice(markers, size=1)[0] ax.plot(cluster_frame['x'], cluster_frame['y'], marker=marker, linestyle='', ms=12, label=cluster_name_map[cluster_num], color=cluster_color_map[cluster_num], mec='none') ax.set_aspect('auto') ax.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off') ax.tick_params(axis= 'y', which='both', left='off', top='off', labelleft='off') fontP = FontProperties() fontP.set_size('small') ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.01), fancybox=True, shadow=True, ncol=5, numpoints=1, prop=fontP) #add labels as the film titles for index in range(len(cluster_plot_frame)): ax.text(cluster_plot_frame.ix[index]['x'], cluster_plot_frame.ix[index]['y'], cluster_plot_frame.ix[index]['title'], size=8) # show the plot plt.show()
def visualize_encodings(encodings, file_name=None, grid=None, skip_every=999, fast=False, fig=None, interactive=False): encodings = manual_pca(encodings) if encodings.shape[1] <= 3: return print_data_only(encodings, file_name, fig=fig, interactive=interactive) encodings = encodings[0:720] hessian_euc = dist.squareform(dist.pdist(encodings[0:720], 'euclidean')) hessian_cos = dist.squareform(dist.pdist(encodings[0:720], 'cosine')) grid = (3, 4) if grid is None else grid project_ops = [] n = 2 project_ops.append(("LLE ltsa N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa'))) project_ops.append(("LLE modified N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified'))) project_ops.append(('MDS euclidean N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed'))) project_ops.append(("TSNE 30/2000 N:%d" % n, TSNE(perplexity=30, n_components=n, init='pca', n_iter=2000))) n = 3 project_ops.append(("LLE ltsa N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa'))) project_ops.append(("LLE modified N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified'))) project_ops.append(('MDS euclidean N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed'))) project_ops.append(('MDS cosine N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed'))) plot_places = [] for i in range(12): u, v = int(i / (skip_every - 1)), i % (skip_every - 1) j = v + u * skip_every + 1 plot_places.append(j) fig = get_figure(fig) fig.set_size_inches(fig.get_size_inches()[0] * grid[0] / 1., fig.get_size_inches()[1] * grid[1] / 2.0) for i, (name, manifold) in enumerate(project_ops): is3d = 'N:3' in name try: if is3d: subplot = plt.subplot(grid[0], grid[1], plot_places[i], projection='3d') else: subplot = plt.subplot(grid[0], grid[1], plot_places[i]) data_source = encodings if not _needs_hessian(manifold) else \ (hessian_cos if 'cosine' in name else hessian_euc) projections = manifold.fit_transform(data_source) scatter(subplot, projections, is3d, _build_radial_colors(len(data_source))) subplot.set_title(name) except: print(name, "Unexpected error: ", sys.exc_info()[0], sys.exc_info()[1] if len(sys.exc_info()) > 1 else '') visualize_data_same(encodings, grid=grid, places=plot_places[-4:]) if not interactive: save_fig(file_name, fig) ut.print_time('visualization finished')
def get_arrangement_permutation( dist, mode, model=None, clusters=None, init_perm=None): start_time = time.time() if mode == "none": return [i for i in range(dist.shape[0])] if mode == "hamilton": from .hamilton_path import HamiltonPath hp = HamiltonPath(dist, caller=model) hp.solve() perm = hp.path elif mode == "hamilton_annealing": from .hamilton_path import HamiltonPath hp = HamiltonPath(dist, caller=model) hp.solve_annealing() perm = hp.path elif mode == "tsne": from sklearn.manifold import TSNE tsne_model = TSNE(n_components=1, random_state=0, metric="precomputed") tsne_result = tsne_model.fit_transform(dist).reshape(-1) perm = np.argsort(tsne_result) elif mode == "mds": from sklearn.manifold import MDS mds = MDS( n_components=1, max_iter=3000, eps=1e-9, random_state=0, dissimilarity="precomputed", n_jobs=4) result = mds.fit_transform(dist).reshape(-1) perm = np.argsort(result) elif mode == "dendro": from algo.arranging.dendro_arranger import DendroArranger da = DendroArranger(dist) perm = da.arrange() else: raise ValueError("Unknown mode: %s" % mode) if model: from .quality import NDS, MNR model.NDS = NDS(dist, perm) model.log("NDS=%f" % model.NDS) model.log("MNR=%f" % MNR(dist, perm)) model.log("Time=%f" % (time.time() - start_time)) return perm