我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用matplotlib.pyplot.annotate()。
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" plt.figure(figsize=(18, 18)) # in inches x = low_dim_embs[:, 0] y = low_dim_embs[:, 1] plt.scatter(x, y) for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.show() # plt.savefig(filename)
def plot_confusion_matrix(cm, col, title, cmap=plt.cm.viridis): plt.imshow(cm, interpolation='nearest', cmap=cmap) for i in range(cm.shape[0]): plt.annotate("%.2f" %cm[i][i],xy=(i,i), horizontalalignment='center', verticalalignment='center') plt.title(title,fontsize=18) plt.colorbar(fraction=0.046, pad=0.04) tick_marks = np.arange(len(col.unique())) plt.xticks(tick_marks, sorted(col.unique()),rotation=90) plt.yticks(tick_marks, sorted(col.unique())) plt.tight_layout() plt.ylabel('True label',fontsize=18) plt.xlabel('Predicted label',fontsize=18) #using flavor network to project recipes from ingredient matrix to flavor matrix
def word_cloud(word_embedding_matrix, vocab, s, save_file='scatter.png'): words = [(i, vocab[i]) for i in s] model = TSNE(n_components=2, random_state=0) #Note that the following line might use a good chunk of RAM tsne_embedding = model.fit_transform(word_embedding_matrix) words_vectors = tsne_embedding[np.array([item[1] for item in words])] plt.subplots_adjust(bottom = 0.1) plt.scatter( words_vectors[:, 0], words_vectors[:, 1], marker='o', cmap=plt.get_cmap('Spectral')) for label, x, y in zip(s, words_vectors[:, 0], words_vectors[:, 1]): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', fontsize=20, # bbox=dict(boxstyle='round,pad=1.', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle = '<-', connectionstyle='arc3,rad=0') ) plt.show() # plt.savefig(save_file)
def drawComplex(origData, ripsComplex, axes=[-6,8,-6,6]): plt.clf() plt.axis(axes) plt.scatter(origData[:,0],origData[:,1]) #plotting just for clarity for i, txt in enumerate(origData): plt.annotate(i, (origData[i][0]+0.05, origData[i][1])) #add labels #add lines for edges for edge in [e for e in ripsComplex if len(e)==2]: #print(edge) pt1,pt2 = [origData[pt] for pt in [n for n in edge]] #plt.gca().add_line(plt.Line2D(pt1,pt2)) line = plt.Polygon([pt1,pt2], closed=None, fill=None, edgecolor='r') plt.gca().add_line(line) #add triangles for triangle in [t for t in ripsComplex if len(t)==3]: pt1,pt2,pt3 = [origData[pt] for pt in [n for n in triangle]] line = plt.Polygon([pt1,pt2,pt3], closed=False, color="blue",alpha=0.3, fill=True, edgecolor=None) plt.gca().add_line(line) plt.show()
def drawComplex(data, ph, axes=[-6, 8, -6, 6]): plt.clf() plt.axis(axes) # axes = [x1, x2, y1, y2] plt.scatter(data[:, 0], data[:, 1]) # plotting just for clarity for i, txt in enumerate(data): plt.annotate(i, (data[i][0] + 0.05, data[i][1])) # add labels # add lines for edges for edge in [e for e in ph.ripsComplex if len(e) == 2]: # print(edge) pt1, pt2 = [data[pt] for pt in [n for n in edge]] # plt.gca().add_line(plt.Line2D(pt1,pt2)) line = plt.Polygon([pt1, pt2], closed=None, fill=None, edgecolor='r') plt.gca().add_line(line) # add triangles for triangle in [t for t in ph.ripsComplex if len(t) == 3]: pt1, pt2, pt3 = [data[pt] for pt in [n for n in triangle]] line = plt.Polygon([pt1, pt2, pt3], closed=False, color="blue", alpha=0.3, fill=True, edgecolor=None) plt.gca().add_line(line) plt.show()
def plot_venn_diagram(subsets=(73014, 11928, 2929, 15670, 2370, 1422, 851), set_labels=('EMS', 'Mental Health', 'JIMS')): ''' Plot a 3-circles-venn-diagram :params tuple subsets: (Abc, aBc, ABc, abC, AbC, aBC, ABC) return: None :rtype: None ''' fig = plt.figure(figsize=(8,8)) v = venn3(subsets=subsets, set_labels = set_labels) for i in range(len(v.subset_labels)): v.subset_labels[i].set_text('{:,}'.format(subsets[i])) #v.subset_labels[0].set_text('73,014') #v.subset_labels[1].set_text('11,928') #v.subset_labels[2].set_text('2,929') #v.subset_labels[3].set_text('15,670') #v.subset_labels[4].set_text('2,370') #v.subset_labels[5].set_text('1,422') #v.subset_labels[6].set_text('851') plt.title("Venn Diagram") #plt.annotate("Unknown Set",xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70), \ # ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1), \ # arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray')) fig.savefig("venn.png")
def projection(embeddings, token_list): for k in range(6): embeddings=np.concatenate((embeddings, embeddings), axis=0) proj = PCA(embeddings) PCA_proj=proj.Y print PCA_proj.shape #plotting words within the 2D space of the two principal components: list=token_list[0] for n in range(maxlen): plt.plot(PCA_proj[n][0]+1,PCA_proj[n][1], 'w.') plt.annotate(list[n], xy=(PCA_proj[n][0],PCA_proj[n][1]), xytext=(PCA_proj[n][0],PCA_proj[n][1])) plt.show() plt.ishold() return
def gen(self): embedding, _ = self.embedding() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, tf.train.latest_checkpoint('.')) embedding = sess.run(embedding) # ??? data = embedding[:self.viz_words, :] # ??????? tsne = TSNE(n_components=2, init='pca', random_state=0) embed_tsne = tsne.fit_transform(data) # ?? plt.subplots(figsize=(10, 10)) for idx in range(self.viz_words): plt.scatter(*embed_tsne[idx, :], color='steelblue') plt.annotate(self.train_text.int_to_vocab[idx], (embed_tsne[idx, 0], embed_tsne[idx, 1]), alpha=0.7) plt.show()
def main(): args = parse_args() print('Called with args:') print(args) lang_db = get_language_model(args.lang_name) imdb = get_imdb(args.imdb_name) # Get words in space vocabulary = imdb.get_labels(args.space) # Get features for words wv = [lang_db.word_vector(w) for w in vocabulary] from sklearn.metrics.pairwise import cosine_similarity from scipy import spatial #spatial.distance.cosine(dataSetI, dataSetII) tsne = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) Y = tsne.fit_transform(wv) plt.scatter(Y[:, 0], Y[:, 1]) for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]): plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points') plt.show()
def plotFronts(fronts, x0, x1, **kwargs): fig=plt.figure() ax=plt.gca() if 'size' in kwargs: ax.scatter(x0, x1, c='k', s=kwargs['size']) else: ax.plot(x0, x1,'ok') for l0 in fronts: tmp0=x0[l0] tmp1=x1[l0] ax.plot(tmp0, tmp1,'-') if 'annotate' in kwargs and kwargs['annotate']: for label, x, y in zip(range(0,len(x0)), x0, x1): plt.annotate( label, xy = (x, y), xytext = (-10, 10), textcoords = 'offset points', ha = 'right', va = 'bottom', arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3, rad=-0.2')) return fig
def main(we_file='glove_model_50.npz', w2i_file='glove_word2idx_50.json'): words = ['japan', 'japanese', 'england', 'english', 'australia', 'australian', 'china', 'chinese', 'italy', 'italian', 'french', 'france', 'spain', 'spanish'] with open(w2i_file) as f: word2idx = json.load(f) npz = np.load(we_file) W = npz['arr_0'] V = npz['arr_1'] We = (W + V.T) / 2 idx = [word2idx[w] for w in words] # We = We[idx] tsne = TSNE() Z = tsne.fit_transform(We) Z = Z[idx] plt.scatter(Z[:,0], Z[:,1]) for i in xrange(len(words)): plt.annotate(s=words[i], xy=(Z[i,0], Z[i,1])) plt.show()
def plot(self, node=None): x = [p.x for p in self.map] y = [p.y for p in self.map] plt.scatter(x, y) for p in self.map: plt.annotate(p.index, xy=(p.x, p.y), xytext=(-5, -5), textcoords='offset points', ha='right', va='bottom') if p.pred is not None: plt.plot([p.x, p.pred.x], [p.y, p.pred.y], c='g', linewidth=4) for i in range(len(node.state[2])): plt.plot([node.state[0][i], node.state[0][node.state[2][i]]], [node.state[1][i], node.state[1][node.state[2][i]]], c='k') plt.show()
def visualize_2D_trip(self, trip): plt.figure(figsize=(30,30)) rcParams.update({'font.size': 22}) # Plot cities plt.scatter(trip[:,0], trip[:,1], s=200) # Plot tour tour=np.array(list(range(len(trip))) + [0]) X = trip[tour, 0] Y = trip[tour, 1] plt.plot(X, Y,"--", markersize=100) # Annotate cities with order labels = range(len(trip)) for i, (x, y) in zip(labels,(zip(X,Y))): plt.annotate(i,xy=(x, y)) plt.xlim(0,100) plt.ylim(0,100) plt.show() # Heatmap of permutations (x=cities; y=steps)
def visualize_2D_trip(self,trip,tw_open,tw_close): plt.figure(figsize=(30,30)) rcParams.update({'font.size': 22}) # Plot cities colors = ['red'] # Depot is first city for i in range(len(tw_open)-1): colors.append('blue') plt.scatter(trip[:,0], trip[:,1], color=colors, s=200) # Plot tour tour=np.array(list(range(len(trip))) + [0]) X = trip[tour, 0] Y = trip[tour, 1] plt.plot(X, Y,"--", markersize=100) # Annotate cities with TW tw_open = np.rint(tw_open) tw_close = np.rint(tw_close) time_window = np.concatenate((tw_open,tw_close),axis=1) for tw, (x, y) in zip(time_window,(zip(X,Y))): plt.annotate(tw,xy=(x, y)) plt.xlim(0,60) plt.ylim(0,60) plt.show() # Heatmap of permutations (x=cities; y=steps)
def plot(self, length=5): if self.populates == None or self.depopulates == None: raise TypeError('Transition not linked to a populated and depopulated state') else: # pull the state information start_state_x = 2*length*self.depopulates.band + 2*length start_state_y = self.depopulates.energy end_state_x = 2*length*self.populates.band + 2*length end_state_y = self.populates.energy # draw the arrow from the middle of the state start_trans_xy = (start_state_x + length/2, start_state_y) end_trans_xy = (end_state_x + length/2, end_state_y) props = dict(facecolor='red', width=0.05, headwidth=10) plt.annotate("", xy=end_trans_xy, xytext=start_trans_xy, arrowprops=props) # label the transition av_x = (start_state_x + end_state_x + length)/2 av_y = (start_state_y + end_state_y + length)/2 plt.annotate(str(self.energy), xy=(av_x, av_y))
def main(): # model_file = "../data/word2vec/character.model" model_file = "../data/word2vec_new/word.model" checkSimilarity(model_file, "?") # character_wv_file = '../data/word2vec/character_model.txt' # word_wv_file = '../data/word2vec/word_model.txt' # # embeddings_file = word_wv_file # wv, vocabulary = load_embeddings(embeddings_file) # # tsne = TSNE(n_components=2, random_state=0) # np.set_printoptions(suppress=True) # Y = tsne.fit_transform(wv[:1000, :]) # # plt.scatter(Y[:, 0], Y[:, 1]) # for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]): # plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points') # plt.show()
def plot_with_para_labels(low_dim_embs, para_labels, filename='tsne-para.png'): assert low_dim_embs.shape[0] == len(para_labels), "label number must equal embedding number" plt.clf() plt.figure(figsize=(200, 36)) axes = plt.axes([0.5, 0.1, 0.4, 0.8]) axes.scatter(low_dim_embs[:, 0], low_dim_embs[:, 1], marker='o', cmap=plt.cm.seismic, s=80) for label, x, y in zip(para_labels, low_dim_embs[:, 0], low_dim_embs[:, 1]): plt.annotate( label, xy=(x, y), xytext=(-8, -3), textcoords='offset points', ha='right', va='bottom', bbox=None, arrowprops=None) plt.savefig(filename)
def showFigure(y): plot.xlabel('# objects') plot.ylabel(y) plot.xticks(xs, ['1'] + ['']*10 + ['12'] + ['']*11 + ['24'] + ['']*10 + ['36']) ys = range(-1,int(MAXIMUMY + 1)) print MAXIMUMY plot.ylim(ymin = -1) #plot.axvline(x = 12,ymin = 0,ymax = 24,color = 'k') # plot.annotate("Within-sample generalization", # rotation = 90, # xytext = (11.5,10), # xy = (7,10), # arrowprops = dict(facecolor = 'black',shrink = 0.05)) # plot.annotate("out-of-sample generalization", # rotation = 90, # xytext = (12.5,10), # xy = (12 + 5,10), # arrowprops = dict(facecolor = 'black',shrink = 0.05)) plot.legend(['SMC+NN (100 particles)','NN (10 particles)','SMC (1000 particles)','LSTM (1000 particles)'], loc = 0, fontsize = 9) plot.show()
def plot(self, filename="./corpus/model/blog.png"): tsne = TSNE(perplexity=30, n_components=2, init="pca", n_iter=5000) plot_only=500 low_dim_embeddings = tsne.fit_transform(self.final_embeddings[:plot_only, :]) reversed_dictionary = dict(zip(self.dictionary.values(), self.dictionary.keys())) labels = [reversed_dictionary[i] for i in range(plot_only)] plt.figure(figsize=(18, 18)) for i, label in enumerate(labels): x, y = low_dim_embeddings[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords="offset points", ha="right", va="bottom") plt.savefig(filename) print("Scatter plot was saved to", filename)
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" plt.figure(figsize=(18, 18)) # in inches for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename) # TSNE ????
def plot_time_vs_s(time, norm, point_labels, title): plt.figure() colors = ['g', 'b', 'y'] for i, l in enumerate(sorted(norm.keys())): if l is not "fbpca": plt.plot(time[l], norm[l], label=l, marker='o', c=colors.pop()) else: plt.plot(time[l], norm[l], label=l, marker='^', c='red') for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, -20), textcoords='offset points', ha='right', va='bottom') plt.legend(loc="upper right") plt.suptitle(title) plt.ylabel("norm discrepancy") plt.xlabel("running time [s]")
def scatter_time_vs_s(time, norm, point_labels, title): plt.figure() size = 100 for i, l in enumerate(sorted(norm.keys())): if l is not "fbpca": plt.scatter(time[l], norm[l], label=l, marker='o', c='b', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, -80), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) else: plt.scatter(time[l], norm[l], label=l, marker='^', c='red', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, 30), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) plt.legend(loc="best") plt.suptitle(title) plt.ylabel("norm discrepancy") plt.xlabel("running time [s]")
def plot_annotations(annotations): # draw the movement between the word through the decades as a series of # annotations on the graph annotations.sort(key=lambda w: w[1], reverse=True) prev = annotations[0][-1] for ww, decade, ann in annotations[1:]: plt.annotate('', xy=prev, xytext=ann, arrowprops=dict(facecolor='blue', shrink=0.1, alpha=0.3,width=2, headwidth=15)) print prev, ann prev = ann
def plot_with_labels(embeds, labels, filename="output.png"): plt.figure(figsize=(18, 18)) pca = decomposition.PCA(n_components=2) pca.fit(embeds) Y = pca.transform(embeds) for i, label in enumerate(labels): x, y = Y[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename)
def pocket_composition(self,mp,sefiles,cycle,massbot,masstop,isotopes,label,legend,color,title): ###hdf5out ''' mass_range - required to plot data in a certain mass range. Needed for read_iso_abund_marco cycle - which cycle from the h5 file?. Needed for read_iso_abund_marco stable - logic if want to plot only stable or not. i_decay - if = 1 I plot not decayed, if = 2 I plot decayed. Make sense only if stable is true. ''' mass_range=[massbot,masstop] sefiles.average_iso_abund_marco(mass_range,cycle,stable=False,i_decay=1) mass=[] plotiso=[] startyields=[] plotiso_massfrac=[] for i in range(len(isotopes)): startyields.append(sefiles.get(sefiles.se.cycles[0],isotopes[i])[0]) for j in range(len(sefiles.se.isotopes)): if sefiles.se.isotopes[j] in isotopes: plotiso.append(mp.average_mass_frac[j]) mass.append(sefiles.se.A[j]) for i in range(len(isotopes)): plotiso_massfrac.append(plotiso[i]/startyields[i]) plt.plot(mass,plotiso_massfrac,marker='*',markersize=8,mfc=color,linestyle='None',label=legend) #plt.plot(mass,plotiso_massfrac,marker='*') if label ==True: for j in range(len(isotopes)): plt.annotate(isotopes[j], xytext = (0, 10),textcoords = 'offset points' ,xy=(mass[j], plotiso_massfrac[j])) mass=np.array(mass) plt.xlim(mass.min()-4,mass.max()+4) plt.legend() plt.title(title) plt.xlabel("mass number") plt.ylabel("Isotopic production factors")
def plot(self, two_d_embeddings, labels): plt.figure() for i, label in enumerate(labels): x, y = two_d_embeddings[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y)) plt.show()
def plot_scores(scores, names, save=None): scores = asarray(scores) # roc_auc and f1 plot plot.figure() plot.scatter(scores[:, 0], scores[:, 1]) for i, txt in enumerate(names): plot.annotate(txt, (scores[i, 0], scores[i, 1])) plot.xlim(xmax=1.0) plot.ylim(ymax=1.0) plot.ylabel('ROC AUC Score') plot.xlabel('F1 score') if save: plot.savefig(save) else: plot.show()
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" plt.figure(figsize=(18, 18)) #in inches for i, label in enumerate(labels): x, y = low_dim_embs[i,:] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename)
def _plot_with_labels(low_dim_embs, labels, path, size): import matplotlib.pyplot as plt assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" figure = plt.figure(figsize=size) # in inches for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') if path is not None: figure.savefig(path) plt.close(figure)
def plot_with_labels(low_dim_embs, labels, filename='tsne_c5s5r5.png'): assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" colors = cm.rainbow(np.linspace(0, 1, len(np.unique(np.array(labels))))) plt.figure(figsize=(18, 18)) #in inches for i, label in enumerate(labels): x, y = low_dim_embs[i,:] plt.scatter(x, y, color=colors[label]) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename)
def imvort(self,kt,cax=[-1,1],scaled=None,maxi=None): t,z2d = self.read_crop('vorticity',kt) t,psi = self.read_crop('psi',kt) z2d[z2d==0]=nan cm=redblue(nstripes=10) cm.set_bad((0.3,0.3,0.3,1)) if scaled==None: im=plt.imshow(flipud(z2d),vmin=cax[0],vmax=cax[1],cmap=cm,extent=self.domain) else: idx=where(~isnan(z2d)) print('\n',shape(idx)) #wc = median(abs(z2d[idx[0],idx[1]])) wc = median(abs(z2d[idx])) print('wc=%g'%wc) z2d = flipud(z2d) im=plt.imshow(sign(z2d)*log(1+(z2d/wc)**2),vmin=-12,vmax=12,cmap=cm,extent=self.domain) plt.colorbar(im) if maxi == None: maxi = roundlog( max(abs(psi))) #print(linspace(-maxi,maxi,21)) ci = maxi/10 if scaled==None: plt.annotate('CI=%2g'%ci,(0.95,0.05),xycoords='axes fraction',color='g',fontsize=16,horizontalalignment='right') plt.contour(self.x[self.xidx],self.y[self.yidx],psi,linspace(-maxi,maxi,21),colors='g',linewidths=2) #'ci=%2g'%ci #plt.contour(psi,[0],colors='k',linewidths=2) if scaled==None: plt.title('N = %i / t = %4.2f'%(self.nx,t),fontsize=14) plt.xlabel('x') plt.ylabel('y') else: plt.title(r'$t_v=%4.0f$'%t,fontsize=16) plt.xlabel(r'$x$',fontsize=16) plt.ylabel(r'$y$',fontsize=16)
def roc(y_label,y_score,name): fpr = dict() tpr = dict() thresholds = dict() roc_auc = dict() for i in range(2): fpr[i], tpr[i], thresholds[i] = roc_curve(y_label[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) ind_max = np.argmax(1 - fpr[1] + tpr[1]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_label.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # Plot of a ROC curve for a specific class plt.figure(num=None, figsize=(8, 6), dpi=150, facecolor='w', edgecolor='w') plt.plot(fpr[1], tpr[1], label='ROC Curve (Area = %0.2f)' % roc_auc[1],color="g") plt.plot([fpr[1][ind_max], fpr[1][ind_max]], [fpr[1][ind_max], tpr[1][ind_max]], 'k:') plt.annotate(r'$\bf J$', xy=(fpr[1][ind_max]-0.04, (fpr[1][ind_max] + tpr[1][ind_max])/2), color='black', fontsize=20) plt.plot(fpr[1][ind_max], tpr[1][ind_max], marker ='v', markersize=10, linestyle='None', color='brown', label="Decision Threshold (DT),\nMax. Youden's J Statistic") plt.annotate('DT: %0.2f\nTPR: %0.2f\nFPR: %0.2f' % (thresholds[1][ind_max], tpr[1][ind_max], fpr[1][ind_max]), xy=(fpr[1][ind_max]+0.015, tpr[1][ind_max]-0.175), color='black', fontsize=20) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.tick_params(axis='both', which='major', labelsize=15) plt.tick_params(axis='both', which='minor', labelsize=15) plt.xlabel('False Positive Rate (1 - Specificity)',fontsize=20, labelpad=15) plt.ylabel('True Positive Rate (Sensitivity)',fontsize=20, labelpad=15) plt.legend(loc="lower right",fontsize=20,numpoints=1) plt.savefig(name, bbox_inches='tight') plt.close()
def plot_ga_result(statistics): import matplotlib.pyplot as plt gen = [] bests = [] worsts = [] avgs = [] avg_time_per_gen = 0 for key, value in statistics.items(): if key != 'avg_time_per_gen': gen.append(key) bests.append(value['best']) worsts.append(value['worst']) avgs.append(value['avg']) elif key == 'avg_time_per_gen': avg_time_per_gen = value arrow_idx = int(len(gen) * 0.7) arrow_x = gen[arrow_idx] arrow_y = bests[arrow_idx] plt.plot(gen, bests, 'g-') plt.annotate('best', xy=(arrow_x, arrow_y,)) arrow_y = worsts[arrow_idx] plt.plot(gen, worsts, 'r-') plt.annotate('worst', xy=(arrow_x, arrow_y)) arrow_y = avgs[arrow_idx] plt.plot(gen, avgs, 'b-') plt.annotate('avg', xy=(arrow_x, arrow_y)) plt.ylabel('Fitness') plt.xlabel('Generation') xmin, xmax, ymin, ymax = plt.axis() textX = abs(xmax - xmin) * 0.1 textY = abs(ymax) * 0.95 plt.text(textX, textY, 'avg time per gen: %f (sec.)'%(avg_time_per_gen)) plt.grid(True) plt.show()
def wv_visualize(self, model_path, word=["??", "??"]): """ ?????????????????? ??? model_path: Word2Vec ???? """ # ???? model = word2vec.Word2Vec.load(model_path) # ?????????? words = [wp[0] for wp in model.most_similar(word, topn=20)] # ?????????? wordsInVector = [model[word] for word in words] # ?? PCA ?? pca = PCA(n_components=2) pca.fit(wordsInVector) X = pca.transform(wordsInVector) # ???? xs = X[:, 0] ys = X[:, 1] plt.figure(figsize=(12, 8)) plt.scatter(xs, ys, marker='o') # ??????????? for i, w in enumerate(words): plt.annotate( w, xy=(xs[i], ys[i]), xytext=(6, 6), textcoords='offset points', ha='left', va='top', **dict(fontsize=10) ) plt.show()
def save(img): # Convert from 8-bit integers to floats for plt.imshow to work in the range of [0,1] img = np.array(img, dtype=np.float64) / 255 plt.imshow(img) plt.annotate("Number of colors: " + str(n_colors) + "\nApprox file size: " + str(round(calc_file_size(img)/1024, 1)) + " kB", xy=(.25, .25), xytext=(40, 100), fontsize=10, bbox={'facecolor':'white', 'alpha':0.85, 'pad':5}) plt.axis('off') plt.savefig(str(n_colors) + "colors.png", bbox_inches='tight') plt.close()
def plot_with_labels(low_dim_embs, labels, filename="plots/.tsne.png"): assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" plt.figure(figsize=(18, 18)) #in inches for i, label in enumerate(labels): x, y = low_dim_embs[i,:] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename)
def cluster_scatter_plot(similarity_file): def get_cmap(N): '''Returns a function that maps each index in 0, 1, ... N-1 to a distinct RGB color.''' color_norm = colors.Normalize(vmin=0, vmax=N-1) scalar_map = cmx.ScalarMappable(norm=color_norm, cmap='hsv') def map_index_to_rgb_color(index): return scalar_map.to_rgba(index) return map_index_to_rgb_color with open(similarity_file, 'r', 'utf-8') as f: similarity_data = json.load(f) labels = [] point_colors = [] num_clusters = len(similarity_data['cluster2doc'].keys()) cmap = get_cmap(num_clusters) for model_name in similarity_data['model_names']: model_name = os.path.splitext(os.path.basename(model_name))[0] cluster_label = similarity_data['doc2cluster'][model_name] point_colors.append(cmap(cluster_label)) labels.append(re.compile(r"\s\([0-9]*\)-iter.*", re.IGNORECASE).split(model_name, 1)[0]) embeddings = SpectralEmbedding(affinity='precomputed').fit_transform(np.array(similarity_data['similarity_matrix'])) fig, ax = plt.subplots() x = embeddings[:, 0] y = embeddings[:, 1] annotes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * 10 N = 100 scatter = ax.scatter(x, y, c=point_colors[:],s=100*np.ones(shape=N)) tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels) mpld3.plugins.connect(fig, tooltip) mpld3.show() # plt.scatter(tsne_embeddings[20:40, 0], tsne_embeddings[20:40, 1], c='b') # for label, x, y in zip(labels, tsne_embeddings[:, 0], tsne_embeddings[:, 1]): # plt.annotate( # label, # xy = (x, y), # # textcoords = 'offset points', # bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5)) # plt.show()
def main(): tsne = TSNE(perplexity=40) Z = tsne.fit_transform(X) plt.scatter(Z[:,0], Z[:,1]) for i in xrange(D): plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1])) plt.show()
def main(we_file='word_embeddings.npy', w2i_file='wikipedia_word2idx.json', Model=PCA): We = np.load(we_file) V, D = We.shape with open(w2i_file) as f: word2idx = json.load(f) idx2word = {v:k for k,v in word2idx.iteritems()} model = Model() Z = model.fit_transform(We) plt.scatter(Z[:,0], Z[:,1]) for i in xrange(V): plt.annotate(s=idx2word[i], xy=(Z[i,0], Z[i,1])) plt.show()
def main(): svd = TruncatedSVD() Z = svd.fit_transform(X) plt.scatter(Z[:,0], Z[:,1]) for i in xrange(D): plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1])) plt.show()
def plot_accuracy_by_freq_compare(freqs1, accuracies1, freqs2, accuracies2, label1, label2, title, filename=None, scale_acc=1.00, yscale_base=10.0, alpha=0.8, tags=None): plt.plot(freqs1, accuracies1, marker='o', color='r', label=label1, linestyle='None', fillstyle='none', alpha=alpha) plt.plot(freqs2, accuracies2, marker='+', color='c', label=label2, linestyle='None', fillstyle='none', alpha=alpha) if tags: print 'tags:', tags, 'len:', len(tags) print 'len(freqs1):', len(freqs1), 'len(freqs2)', len(freqs2) print 'len(accuracies1):', len(accuracies1), 'len(accuracies2)', len(accuracies2) if len(tags) == len(freqs1) and len(tags) == len(freqs2): print 'annotating tags' for i, tag in enumerate(tags): plt.annotate(tag, (freqs[1][i], accuracies[1][i])) plt.xscale('symlog') #plt.yscale('log', basey=yscale_base) plt.legend(loc='lower right', prop={'size':14}) plt.xlabel('Frequency', size='large', fontweight='demibold') plt.ylabel('Accuracy', size='large', fontweight='demibold') plt.ylim(ymax=1.01*scale_acc) plt.title(title, fontweight='demibold') plt.tight_layout() if filename: print 'saving plot to:', filename plt.savefig(filename)
def plot_accuracy_by_tag_compare(accuracies1, accuracies2, tags, tag_freq_dict, label1, label2, title, filename=None, scale_acc=1.00, yscale_base=10.0, alpha=0.5): #from adjustText import adjust_text tag_freqs = [tag_freq_dict[tag] for tag in tags] #plt.plot(tag_freqs, accuracies1, marker='o', color='r', label=label1, linestyle='None', fillstyle='none', alpha=alpha) #plt.plot(tag_freqs, accuracies2, marker='+', color='y', label=label2, linestyle='None', fillstyle='none', alpha=alpha) # plt.plot(tag_freqs, accuracies2-accuracies1, marker='o', color='c', label=label2, linestyle='None', fillstyle='none', alpha=alpha) plt.scatter(tag_freqs, accuracies2-accuracies1, s=np.pi * (0.5 * (accuracies2-accuracies1)+10 )**2, c = np.random.rand(len(tag_freqs)), alpha=0.5) print 'annotating tags' texts = [] for i, tag in enumerate(tags): #plt.annotate(tag, (tag_freqs[i], accuracies1[i]), xytext=(-10,10), \ # textcoords='offset points', ha='right', va='bottom', \ # arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0')) #plt.annotate(tag, (tag_freqs[i], accuracies1[i])) #plt.annotate(tag, (tag_freqs[i], accuracies2[i])) plt.annotate(tag, (tag_freqs[i], accuracies2[i]-accuracies1[i]), horizontalalignment='center', verticalalignment='center', size=10+0.05*(accuracies2[i]-accuracies1[i])) #texts.append(plt.text(tag_freqs[i], accuracies1[i], tag)) #adjust_text(texts, force_text=0.05, arrowprops=dict(arrowstyle="-|>", color='r', alpha=0.5)) plt.xscale('symlog') #plt.yscale('log', basey=yscale_base) #plt.legend(loc='lower right', prop={'size':14}) plt.xlabel('Frequency', size='large', fontweight='demibold') plt.ylabel('Increase in Accuracy', size='large', fontweight='demibold') #plt.ylim(ymax=1.05*scale_acc) plt.ylim(ymax=1.15*max(accuracies2-accuracies1)) plt.xlim(min(tag_freqs) / 2, max(tag_freqs) * 5) plt.title(title, fontweight='demibold') plt.tight_layout() if filename: print 'saving plot to:', filename plt.savefig(filename)
def draw_clusters_plot(self, X, predict_result, short_filenames): plt.subplot(111) colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk']) colors = np.hstack([colors] * 20) plt.scatter(X[:, 0], X[:, 1], color=colors[predict_result].tolist(), s=50) for label, x, y in zip(short_filenames, X[:, 0], X[:, 1]): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0')) plt.xticks(()) plt.yticks(()) plt.grid()
def viewLSAGraphics2D(self, plt, xs, ys, filenames): plt.figure(1) plt.clf() plt.subplot(1, 2, 1) plt.plot(xs, ys, 'go') plt.xlabel('x') plt.ylabel('y') plt.title('Documents weights') plt.grid(True) for i in range(len(filenames)): plt.annotate(filenames[i], xy=(xs[i], ys[i]), textcoords='data') plt.subplot(1, 2, 2) ax = plt.gca() ax.quiver(0, 0, xs, ys, angles='xy', scale_units='xy', scale=1, linewidth=.01) ax.set_xlim([-1, 1]) ax.set_ylim([-1, 1]) for i in range(len(filenames)): plt.annotate(filenames[i], xy=(xs[i], ys[i]), textcoords='data') plt.xlabel('X-component') plt.ylabel('Y-component') plt.title('Documents vectors') self.addfig(plt.gcf())
def draw_annotate(fig, ax, x, y, textx, texty, text, color=[0, 0, 0, 1], arrowcolor=[0, 0, 0, 0.3]): plt.annotate( text, xy=(x, y), xytext=(textx, texty), arrowprops=dict(color=arrowcolor, shrink=0, width=0.5, headwidth=8), fontsize=14, color=color, xycoords="data", textcoords="data", horizontalalignment='center', verticalalignment='center' )
def stock(): #?????????????????, ????????? stock_list = {"zsyh":"600036","jsyh":"601939","szzs":"000001","pfyh":"600000","msyh":"600061"} for stock, code in stock_list.items(): globals()[stock] = tsh.get_hist_data(code,start="2015-01-01",end="2016-04-16") stock_list2 = stock_list.keys() #print(stock_list2) sl = [globals()[st]["close"] for st in stock_list2] df_close = pd.concat(sl,axis=1,join='inner') df_close.columns = stock_list2 #print(df_close) df_close.sort_index(ascending=True,inplace=True) #ascending ?????????????????? pc_ret = df_close.pct_change() #???????????????? print(pc_ret) make_end_line() print(pc_ret.mean()) make_end_line() #???????????? plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="hex")) #?? ????????1?????????? 0????? -1???????? plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="scatter")) plt.show(sns.jointplot("zsyh","szzs",pc_ret,kind="scatter")) plt.show(sns.pairplot(pc_ret[["jsyh","zsyh","pfyh","msyh"]].dropna())) #?????????? print(pc_ret.std()) #???????????????????????????? make_end_line() rets = pc_ret.dropna() print(rets.mean()) make_end_line() area = np.pi *20 #???? plt.scatter(rets.mean(),rets.std()) #???rets?????????xy? plt.xlabel("Expected Return")#????xy???? plt.ylabel("Risk") for label,x,y in zip(rets.columns,rets.mean(),rets.std()): plt.annotate( label, xy = (x,y),xytext = (50,50), textcoords = "offset points",ha = "right",va = "bottom", arrowprops = dict(arrowstyle = "-",connectionstyle = "arc3,rad=-0.3")) plt.show()