我们从Python开源项目中,提取了以下40个代码示例,用于说明如何使用matplotlib.pylab.scatter()。
def plot_volcano(logFC,p_val,sample_name,saveName,logFC_thresh): fig=pl.figure() ## To plot and save pl.scatter(logFC[(p_val>0.05)|(abs(logFC)<logFC_thresh)],-np.log10(p_val[(p_val>0.05)|(abs(logFC)<logFC_thresh)]),color='blue',alpha=0.5); pl.scatter(logFC[(p_val<0.05)&(abs(logFC)>logFC_thresh)],-np.log10(p_val[(p_val<0.05)&(abs(logFC)>logFC_thresh)]),color='red'); pl.hlines(-np.log10(0.05),min(logFC),max(logFC)) pl.vlines(-logFC_thresh,min(-np.log10(p_val)),max(-np.log10(p_val))) pl.vlines(logFC_thresh,min(-np.log10(p_val)),max(-np.log10(p_val))) pl.xlim(-3,3) pl.xlabel('Log Fold Change') pl.ylabel('-log10(p-value)') pl.savefig(saveName) pl.close(fig) # def plot_histograms(df_peaks,pntr_list): # # for pntr in pntr_list: # colName =pntr[2]+'_Intragenic_position' # pl.hist(df_peaks[colName]) # pl.xlabel(colName) # pl.ylabel() # pl.show()
def test_plot_error_ellipse(self): # Generate random data x = np.random.normal(0, 1, 300) s = np.array([2.0, 2.0]) y1 = np.random.normal(s[0] * x) y2 = np.random.normal(s[1] * x) data = np.array([y1, y2]) # Calculate covariance and plot error ellipse cov = np.cov(data) plot_error_ellipse([0.0, 0.0], cov) debug = False if debug: plt.scatter(data[0, :], data[1, :]) plt.xlim([-8, 8]) plt.ylim([-8, 8]) plt.show() plt.clf()
def gp_partd(Xtrain,ytrain,Xtest,ytest): gp = gaussian_process(Xtrain[:,3],ytrain,Xtrain[:,3],ytrain) gp.init_kernel_matrices(b=5,var=2) gp.predict_test() x = np.asarray(Xtrain[:,3]).flatten() xsortind = np.argsort(x) y1 = np.asarray(ytrain).flatten() y2 = np.asarray(gp.test_predictions).flatten() plt.figure() plt.scatter(x[xsortind],y1[xsortind]) plt.plot(x[xsortind],y2[xsortind],'b-') plt.xlabel('Car Weight (Dimension 4)') plt.ylabel('Outcome') plt.title('Visualizing model through single dimension') plt.savefig('hw3_gaussian_dim4_viz') plt.show()
def plot(embeddings, labels): assert embeddings.shape[0] >= len(labels), 'More labels than embeddings' pylab.figure(figsize=(15, 15)) # in inches for i, label in enumerate(labels): x, y = embeddings[i, :] pylab.scatter(x, y) pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') pylab.show()
def draw(m, name, extra=None): FIG.clf() matrix = m orig_shape = np.shape(matrix) # lose the channel shape in the end of orig_shape new_shape = orig_shape[:-1] matrix = np.reshape(matrix, new_shape) ax = FIG.add_subplot(1,1,1) ax.set_aspect('equal') plt.imshow(matrix, interpolation='nearest', cmap=plt.cm.gray) # plt.imshow(matrix, interpolation='nearest', cmap=plt.cm.ocean) plt.colorbar() if extra != None: greens, reds = extra grn_x, grn_y, = greens red_x, red_y = reds plt.scatter(x=grn_x, y=grn_y, c='g', s=40) plt.scatter(x=red_x, y=red_y, c='r', s=40) # # put a blue dot at (10, 20) # plt.scatter([10], [20]) # # put a red dot, size 40, at 2 locations: # plt.scatter(x=[3, 4], y=[5, 6], c='r', s=40) # # plt.plot() plt.savefig(name)
def draw_annotate(x_cords, y_cords, anns, name): FIG.clf() y = x_cords z = y_cords n = anns fig = FIG ax = fig.add_subplot(1,1,1) ax.set_xlim([0,L]) ax.set_ylim([0,L]) ax.set_ylim(ax.get_ylim()[::-1]) ax.scatter(z, y) for i, txt in enumerate(n): ax.annotate(txt, (z[i],y[i])) fig.savefig(name)
def run_cluster_MM(nat_param=True): import GPy # create dataset print "creating dataset..." N = 100 k1 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 10, 10, 0.1, 0.1]), ARD=True) k2 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 0.1, 10, 0.1, 10]), ARD=True) k3 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[0.1, 0.1, 10, 10, 10]), ARD=True) X = np.random.normal(0, 1, (N, 5)) A = np.random.multivariate_normal(np.zeros(N), k1.K(X), 10).T B = np.random.multivariate_normal(np.zeros(N), k2.K(X), 10).T C = np.random.multivariate_normal(np.zeros(N), k3.K(X), 10).T Y = np.vstack((A, B, C)) labels = np.hstack((np.zeros(A.shape[0]), np.ones( B.shape[0]), np.ones(C.shape[0]) * 2)) # inference np.random.seed(42) print "inference ..." M = 30 D = 5 lvm = vfe.SGPLVM(Y, D, M, lik='Gaussian', nat_param=nat_param) lvm.optimise(method='L-BFGS-B', maxiter=20) # lvm.optimise(method='adam', adam_lr=0.05, maxiter=2000) ls = np.exp(lvm.sgp_layer.ls) print ls inds = np.argsort(ls) plt.figure() mx, vx = lvm.get_posterior_x() plt.scatter(mx[:, inds[0]], mx[:, inds[1]], c=labels) zu = lvm.sgp_layer.zu plt.plot(zu[:, inds[0]], zu[:, inds[1]], 'ko') # plt.show() plt.savefig('/tmp/gplvm_cluster_MM.pdf')
def run_cluster_MC(): import GPy # create dataset print "creating dataset..." N = 100 k1 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 10, 10, 0.1, 0.1]), ARD=True) k2 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 0.1, 10, 0.1, 10]), ARD=True) k3 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[0.1, 0.1, 10, 10, 10]), ARD=True) X = np.random.normal(0, 1, (N, 5)) A = np.random.multivariate_normal(np.zeros(N), k1.K(X), 10).T B = np.random.multivariate_normal(np.zeros(N), k2.K(X), 10).T C = np.random.multivariate_normal(np.zeros(N), k3.K(X), 10).T Y = np.vstack((A, B, C)) labels = np.hstack((np.zeros(A.shape[0]), np.ones( B.shape[0]), np.ones(C.shape[0]) * 2)) # inference np.random.seed(42) print "inference ..." M = 30 D = 5 alpha = 0.5 lvm = vfe.SGPLVM(Y, D, M, lik='Gaussian') lvm.optimise(method='adam', adam_lr=0.05, maxiter=2000, prop_mode=config.PROP_MC) ls = np.exp(lvm.sgp_layer.ls) print ls inds = np.argsort(ls) plt.figure() mx, vx = lvm.get_posterior_x() plt.scatter(mx[:, inds[0]], mx[:, inds[1]], c=labels) zu = lvm.sgp_layer.zu plt.plot(zu[:, inds[0]], zu[:, inds[1]], 'ko') # plt.show() plt.savefig('/tmp/gplvm_cluster_MC.pdf')
def run_frey(): # import dataset data = pods.datasets.brendan_faces() # Y = data['Y'][:50, :] Y = data['Y'] Yn = Y - np.mean(Y, axis=0) Yn /= np.std(Y, axis=0) Y = Yn # inference print "inference ..." M = 30 D = 20 lvm = vfe.SGPLVM(Y, D, M, lik='Gaussian') lvm.optimise(method='L-BFGS-B', maxiter=10) plt.figure() mx, vx = lvm.get_posterior_x() zu = lvm.sgp_layer.zu plt.scatter(mx[:, 0], mx[:, 1]) plt.plot(zu[:, 0], zu[:, 1], 'ko') nx = ny = 30 x_values = np.linspace(-5, 5, nx) y_values = np.linspace(-5, 5, ny) sx = 28 sy = 20 canvas = np.empty((sx * ny, sy * nx)) for i, yi in enumerate(x_values): for j, xi in enumerate(y_values): z_mu = np.array([[xi, yi]]) x_mean, x_var = lvm.predict_f(z_mu) canvas[(nx - i - 1) * sx:(nx - i) * sx, j * sy:(j + 1) * sy] = x_mean.reshape(sx, sy) plt.figure(figsize=(8, 10)) Xi, Yi = np.meshgrid(x_values, y_values) plt.imshow(canvas, origin="upper", cmap="gray") plt.tight_layout() plt.show()
def run_cluster_MC(): import GPy # create dataset print "creating dataset..." N = 100 k1 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 10, 10, 0.1, 0.1]), ARD=True) k2 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 0.1, 10, 0.1, 10]), ARD=True) k3 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[0.1, 0.1, 10, 10, 10]), ARD=True) X = np.random.normal(0, 1, (N, 5)) A = np.random.multivariate_normal(np.zeros(N), k1.K(X), 10).T B = np.random.multivariate_normal(np.zeros(N), k2.K(X), 10).T C = np.random.multivariate_normal(np.zeros(N), k3.K(X), 10).T Y = np.vstack((A, B, C)) labels = np.hstack((np.zeros(A.shape[0]), np.ones( B.shape[0]), np.ones(C.shape[0]) * 2)) # inference print "inference ..." M = 30 D = 5 alpha = 0.5 lvm = aep.SGPLVM(Y, D, M, lik='Gaussian') lvm.optimise(method='adam', adam_lr=0.05, maxiter=2000, alpha=alpha, prop_mode=config.PROP_MC) ls = np.exp(lvm.sgp_layer.ls) print ls inds = np.argsort(ls) plt.figure() mx, vx = lvm.get_posterior_x() plt.scatter(mx[:, inds[0]], mx[:, inds[1]], c=labels) zu = lvm.sgp_layer.zu # plt.plot(zu[:, inds[0]], zu[:, inds[1]], 'ko') # plt.show() plt.savefig('/tmp/gplvm_cluster.pdf')
def run_frey(): # import dataset data = pods.datasets.brendan_faces() # Y = data['Y'][:50, :] Y = data['Y'] Yn = Y - np.mean(Y, axis=0) Yn /= np.std(Y, axis=0) Y = Yn # inference print "inference ..." M = 30 D = 20 lvm = aep.SGPLVM(Y, D, M, lik='Gaussian') # lvm.train(alpha=0.5, no_epochs=10, n_per_mb=100, lrate=0.1, fixed_params=['sn']) lvm.optimise(method='L-BFGS-B', alpha=0.1, maxiter=10) plt.figure() mx, vx = lvm.get_posterior_x() zu = lvm.sgp_layer.zu plt.scatter(mx[:, 0], mx[:, 1]) plt.plot(zu[:, 0], zu[:, 1], 'ko') nx = ny = 30 x_values = np.linspace(-5, 5, nx) y_values = np.linspace(-5, 5, ny) sx = 28 sy = 20 canvas = np.empty((sx * ny, sy * nx)) for i, yi in enumerate(x_values): for j, xi in enumerate(y_values): z_mu = np.array([[xi, yi]]) x_mean, x_var = lvm.predict_f(z_mu) canvas[(nx - i - 1) * sx:(nx - i) * sx, j * sy:(j + 1) * sy] = x_mean.reshape(sx, sy) plt.figure(figsize=(8, 10)) Xi, Yi = np.meshgrid(x_values, y_values) plt.imshow(canvas, origin="upper", cmap="gray") plt.tight_layout() plt.show()
def plot_trajectory(state_true, state_estimated): plt.plot(state_true[:, 0], state_true[:, 2], color="red") plt.scatter(state_estimated[:, 0].tolist()[::10], state_estimated[:, 2].tolist()[::10], marker="o", color="blue")
def test_compute_inliers(self): sample = self.ransac.sample(self.data) dist = self.ransac.compute_distance(sample, self.data) self.ransac.compute_inliers(dist) # def test_optimize(self): # debug = False # # for i in range(10): # m_pred, c_pred, mask = self.ransac.optimize(self.data) # if debug: # print("m_true: ", self.m_true) # print("m_pred: ", m_pred) # print("c_true: ", self.c_true) # print("c_pred: ", c_pred) # # self.assertTrue(abs(m_pred - self.m_true) < 0.5) # self.assertTrue(abs(c_pred - self.c_true) < 0.5) # # # Plot RANSAC optimized result # debug = False # if debug: # x = np.linspace(0.0, 10.0, num=100) # y = m_pred * x + c_pred # plt.scatter(self.data[0, :], self.data[1, :]) # plt.plot(x, y) # plt.show()
def plot(embeddings, labels): assert embeddings.shape[0] >= len(labels), 'More labels than embeddings' pylab.figure(figsize=(15,15)) # in inches for i, label in enumerate(labels): x, y = embeddings[i,:] pylab.scatter(x, y) pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') pylab.show()
def _plot_mi_func(x, y): mi = mutual_info(x, y) title = "NI($X_1$, $X_2$) = %.3f" % mi pylab.scatter(x, y) pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$")
def _plot_correlation_func(x, y): r, p = pearsonr(x, y) title = "Cor($X_1$, $X_2$) = %.3f" % r pylab.scatter(x, y) pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$") f1 = scipy.poly1d(scipy.polyfit(x, y, 1)) pylab.plot(x, f1(x), "r--", linewidth=2) # pylab.xticks([w*7*24 for w in [0,1,2,3,4]], ['week %i'%(w+1) for w in # [0,1,2,3,4]])
def draw(name, feature4096): plt.figure(name) feature4096 = feature4096.reshape(64,64) for i, x in enumerate(feature4096): for j, y in enumerate(x): #if y <= 0: # print ' ', #else: # print '%3.1f'%y, plt.scatter([j],[i], s=[y*1000]) #print plt.axis([-1, 65, -1, 65]) plt.show()
def vis_data(data,classes): X_embedded = TSNE(n_components=2, perplexity=40, verbose=2).fit_transform(data) plt.figure() colors = cm.rainbow(np.linspace(0, 1, 17)) for i in range(17): ind = np.where(classes==i) plt.scatter(X_embedded[ind,0],X_embedded[ind,1],color = colors[i],marker ='x',label = i) plt.legend() # Raw data
def plotBestFit(weights): import matplotlib.pylab as plt import seaborn as sns dataMat,labelMat=loadDataSet() dataArr=array(dataMat) n=shape(dataArr)[0] xcord1=[] ycord1=[] xcord2=[] ycord2=[] for i in range(n): if int(labelMat[i])==1: xcord1.append(dataArr[i,1]) ycord1.append(dataArr[i,2]) else: xcord2.append(dataArr[i,1]) ycord2.append(dataArr[i,2]) # fig=plt.figure plt.scatter(xcord1,ycord1,s=30,c="red",marker="s",label="X1") plt.scatter(xcord2,ycord2,s=30,c="green",label="X2") x=arange(-3.0,3.0,0.1) y=(-float(weights[0])-float(weights[1])*x)/float(weights[2]) plt.plot(x,y,c="purple",label="fitted line") plt.legend() plt.xlabel("X1") plt.ylabel("X2") plt.show()
def draw_pic(data_list_set): for i in range(len(data_list_set)): parse_x=[] parse_y=[] for j in range(len(data_list_set[i])): parse_x.append(data_list_set[i][j].x) parse_y.append(data_list_set[i][j].y) plt.scatter(parse_x,parse_y,c=numpy.random.rand(3,1),alpha=0.65,label="Team:"+str(i),s=40) plt.legend() plt.title("The Result From The Cluster") plt.show()
def run_mnist(): np.random.seed(42) # import dataset f = gzip.open('./tmp/data/mnist.pkl.gz', 'rb') (x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f) f.close() Y = x_train[:100, :] labels = t_train[:100] Y[Y < 0.5] = -1 Y[Y > 0.5] = 1 # inference print "inference ..." M = 30 D = 2 # lvm = vfe.SGPLVM(Y, D, M, lik='Gaussian') lvm = vfe.SGPLVM(Y, D, M, lik='Probit') # lvm.train(alpha=0.5, no_epochs=10, n_per_mb=100, lrate=0.1, fixed_params=['sn']) lvm.optimise(method='L-BFGS-B') plt.figure() mx, vx = lvm.get_posterior_x() zu = lvm.sgp_layer.zu plt.scatter(mx[:, 0], mx[:, 1], c=labels) plt.plot(zu[:, 0], zu[:, 1], 'ko') nx = ny = 30 x_values = np.linspace(-5, 5, nx) y_values = np.linspace(-5, 5, ny) sx = 28 sy = 28 canvas = np.empty((sx * ny, sy * nx)) for i, yi in enumerate(x_values): for j, xi in enumerate(y_values): z_mu = np.array([[xi, yi]]) x_mean, x_var = lvm.predict_f(z_mu) t = x_mean / np.sqrt(1 + x_var) Z = 0.5 * (1 + special.erf(t / np.sqrt(2))) canvas[(nx - i - 1) * sx:(nx - i) * sx, j * sy:(j + 1) * sy] = Z.reshape(sx, sy) plt.figure(figsize=(8, 10)) Xi, Yi = np.meshgrid(x_values, y_values) plt.imshow(canvas, origin="upper", cmap="gray") plt.tight_layout() plt.show()
def run_cluster(): import GPy # create dataset print "creating dataset..." N = 50 k1 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 10, 10, 0.1, 0.1]), ARD=True) k2 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[10, 0.1, 10, 0.1, 10]), ARD=True) k3 = GPy.kern.RBF(5, variance=1, lengthscale=1. / np.random.dirichlet(np.r_[0.1, 0.1, 10, 10, 10]), ARD=True) X = np.random.normal(0, 1, (N, 5)) A = np.random.multivariate_normal(np.zeros(N), k1.K(X), 10).T B = np.random.multivariate_normal(np.zeros(N), k2.K(X), 10).T C = np.random.multivariate_normal(np.zeros(N), k3.K(X), 10).T Y = np.vstack((A, B, C)) labels = np.hstack((np.zeros(A.shape[0]), np.ones( B.shape[0]), np.ones(C.shape[0]) * 2)) # inference print "inference ..." M = 20 D = 5 lvm_aep = aep.SGPLVM(Y, D, M, lik='Gaussian') lvm_aep.optimise(method='L-BFGS-B', alpha=0.1, maxiter=2000) lvm = ep.SGPLVM(Y, D, M, lik='Gaussian') lvm.update_hypers(lvm_aep.get_hypers()) # # a quick hack to initialise the factors # lvm.sgp_layer.t1 = np.tile(lvm_aep.sgp_layer.theta_2[np.newaxis, :, :] / lvm.N, [lvm.N, 1, 1]) # lvm.sgp_layer.t2 = np.tile(lvm_aep.sgp_layer.theta_1[np.newaxis, :, :, :] / lvm.N, [lvm.N, 1, 1, 1]) # lvm.sgp_layer.update_posterior() # lvm.tx1 = lvm_aep.factor_x1 # lvm.tx2 = lvm_aep.factor_x2 lvm.inference(alpha=0.1, no_epochs=10, parallel=True, decay=0.5) ls = np.exp(lvm.sgp_layer.ls) print ls inds = np.argsort(ls) plt.figure() mx, vx = lvm.get_posterior_x() plt.scatter(mx[:, inds[0]], mx[:, inds[1]], c=labels) zu = lvm.sgp_layer.zu plt.plot(zu[:, inds[0]], zu[:, inds[1]], 'ko') plt.show()
def run_mnist(): np.random.seed(42) # import dataset f = gzip.open('./tmp/data/mnist.pkl.gz', 'rb') (x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f) f.close() Y = x_train[:100, :] labels = t_train[:100] Y[Y < 0.5] = -1 Y[Y > 0.5] = 1 # inference print "inference ..." M = 30 D = 2 # lvm = aep.SGPLVM(Y, D, M, lik='Gaussian') lvm = aep.SGPLVM(Y, D, M, lik='Probit') # lvm.train(alpha=0.5, no_epochs=10, n_per_mb=100, lrate=0.1, fixed_params=['sn']) lvm.optimise(method='L-BFGS-B', alpha=0.1) plt.figure() mx, vx = lvm.get_posterior_x() zu = lvm.sgp_layer.zu plt.scatter(mx[:, 0], mx[:, 1], c=labels) plt.plot(zu[:, 0], zu[:, 1], 'ko') nx = ny = 30 x_values = np.linspace(-5, 5, nx) y_values = np.linspace(-5, 5, ny) sx = 28 sy = 28 canvas = np.empty((sx * ny, sy * nx)) for i, yi in enumerate(x_values): for j, xi in enumerate(y_values): z_mu = np.array([[xi, yi]]) x_mean, x_var = lvm.predict_f(z_mu) t = x_mean / np.sqrt(1 + x_var) Z = 0.5 * (1 + special.erf(t / np.sqrt(2))) canvas[(nx - i - 1) * sx:(nx - i) * sx, j * sy:(j + 1) * sy] = Z.reshape(sx, sy) plt.figure(figsize=(8, 10)) Xi, Yi = np.meshgrid(x_values, y_values) plt.imshow(canvas, origin="upper", cmap="gray") plt.tight_layout() plt.show()
def run_xor(): from operator import xor from scipy import special # create dataset print "generating dataset..." n = 25 Y = np.zeros((0, 3)) for i in [0, 1]: for j in [0, 1]: a = i * np.ones((n, 1)) b = j * np.ones((n, 1)) c = xor(bool(i), bool(j)) * np.ones((n, 1)) Y_ij = np.hstack((a, b, c)) Y = np.vstack((Y, Y_ij)) Y = 2 * Y - 1 # inference print "inference ..." M = 10 D = 2 lvm = aep.SGPLVM(Y, D, M, lik='Probit') lvm.optimise(method='L-BFGS-B', alpha=0.1, maxiter=200) # predict given inputs mx, vx = lvm.get_posterior_x() lims = [-1.5, 1.5] x = np.linspace(*lims, num=101) y = np.linspace(*lims, num=101) X, Y = np.meshgrid(x, y) X_ravel = X.ravel() Y_ravel = Y.ravel() inputs = np.vstack((X_ravel, Y_ravel)).T my, vy = lvm.predict_f(inputs) t = my / np.sqrt(1 + vy) Z = 0.5 * (1 + special.erf(t / np.sqrt(2))) for d in range(3): plt.figure() plt.scatter(mx[:, 0], mx[:, 1]) zu = lvm.sgp_layer.zu plt.plot(zu[:, 0], zu[:, 1], 'ko') plt.contour(X, Y, np.log(Z[:, d] + 1e-16).reshape(X.shape)) plt.xlim(*lims) plt.ylim(*lims) # Y_test = np.array([[1, -1, 1], [-1, 1, 1], [-1, -1, -1], [1, 1, -1]]) # # impute missing data # for k in range(3): # Y_test_k = Y_test # missing_mask = np.ones_like(Y_test_k) # missing_mask[:, k] = 0 # my_pred, vy_pred = lvm.impute_missing( # Y_test_k, missing_mask, # alpha=0.1, no_iters=100, add_noise=False) # print k, my_pred, vy_pred, Y_test_k plt.show()
def plot_simple_demo_1(): pylab.clf() fig = pylab.figure(num=None, figsize=(10, 4)) pylab.subplot(121) title = "Original feature space" pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$") x1 = np.arange(0, 10, .2) x2 = x1 + np.random.normal(scale=1, size=len(x1)) good = (x1 > 5) | (x2 > 5) bad = ~good x1g = x1[good] x2g = x2[good] pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue") x1b = x1[bad] x2b = x2[bad] pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white") pylab.grid(True) pylab.subplot(122) X = np.c_[(x1, x2)] pca = decomposition.PCA(n_components=1) Xtrans = pca.fit_transform(X) Xg = Xtrans[good] Xb = Xtrans[bad] pylab.scatter( Xg[:, 0], np.zeros(len(Xg)), edgecolor="blue", facecolor="blue") pylab.scatter( Xb[:, 0], np.zeros(len(Xb)), edgecolor="red", facecolor="white") title = "Transformed feature space" pylab.title(title) pylab.xlabel("$X'$") fig.axes[1].get_yaxis().set_visible(False) print(pca.explained_variance_ratio_) pylab.grid(True) pylab.autoscale(tight=True) filename = "pca_demo_1.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
def plot_simple_demo_2(): pylab.clf() fig = pylab.figure(num=None, figsize=(10, 4)) pylab.subplot(121) title = "Original feature space" pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$") x1 = np.arange(0, 10, .2) x2 = x1 + np.random.normal(scale=1, size=len(x1)) good = x1 > x2 bad = ~good x1g = x1[good] x2g = x2[good] pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue") x1b = x1[bad] x2b = x2[bad] pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white") pylab.grid(True) pylab.subplot(122) X = np.c_[(x1, x2)] pca = decomposition.PCA(n_components=1) Xtrans = pca.fit_transform(X) Xg = Xtrans[good] Xb = Xtrans[bad] pylab.scatter( Xg[:, 0], np.zeros(len(Xg)), edgecolor="blue", facecolor="blue") pylab.scatter( Xb[:, 0], np.zeros(len(Xb)), edgecolor="red", facecolor="white") title = "Transformed feature space" pylab.title(title) pylab.xlabel("$X'$") fig.axes[1].get_yaxis().set_visible(False) print(pca.explained_variance_ratio_) pylab.grid(True) pylab.autoscale(tight=True) filename = "pca_demo_2.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
def plot_simple_demo_lda(): pylab.clf() fig = pylab.figure(num=None, figsize=(10, 4)) pylab.subplot(121) title = "Original feature space" pylab.title(title) pylab.xlabel("$X_1$") pylab.ylabel("$X_2$") good = x1 > x2 bad = ~good x1g = x1[good] x2g = x2[good] pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue") x1b = x1[bad] x2b = x2[bad] pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white") pylab.grid(True) pylab.subplot(122) X = np.c_[(x1, x2)] lda_inst = lda.LDA(n_components=1) Xtrans = lda_inst.fit_transform(X, good) Xg = Xtrans[good] Xb = Xtrans[bad] pylab.scatter( Xg[:, 0], np.zeros(len(Xg)), edgecolor="blue", facecolor="blue") pylab.scatter( Xb[:, 0], np.zeros(len(Xb)), edgecolor="red", facecolor="white") title = "Transformed feature space" pylab.title(title) pylab.xlabel("$X'$") fig.axes[1].get_yaxis().set_visible(False) pylab.grid(True) pylab.autoscale(tight=True) filename = "lda_demo.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
def plotScatter(self, xList, yList, saveFigPath): ''' ?????? xList ???? yList ???????????? ?????? saveFigPath ???? :param xList: ???? :param yList: ???? :param saveFigPath: ???????? :return: ''' # ???????????? # ??????????? 2 # ???????? 2 ????? if len(xList[0]) >= 2: x1List = map(lambda x: x[0], xList) x2List = map(lambda x: x[1], xList) else: # 1 ? 2 ???????? 2 ? x1List = x2List = map(lambda x: x[0], xList) # ???? scatterFig= plt.figure(saveFigPath) # ????????? colorDict = {-1: 'm', 1: 'r', 2: 'b', 3: 'pink', 4: 'orange'} # ????? map(lambda idx: \ plt.scatter(x1List[idx], \ x2List[idx], \ marker='o', \ color=colorDict[yList[idx]], \ label=yList[idx]), \ xrange(len(x1List))) # ????????? # ySet = set(yList) # map(lambda y: \ # plt.legend(str(y), \ # loc='best'), \ # ySet) # ?????????????? plt.title(saveFigPath) plt.xlabel(r'$x^1$') plt.ylabel(r'$x^2$') plt.grid(True) plt.savefig(saveFigPath) plt.show()
def kmeans(): T=20 n_train = 500 mix_weights = (0.2,0.5,0.3) #create a class object: km = kmeans_gaussian(n_train,mix_weights) # set k-values kvalues = range(2,6) colors = ['blue','green','red','black','yellow'] #store the cluster assignments: k_backup = [3,5] cluster_assgn35 = [] plt.figure() for i in range(len(kvalues)): km.set_k(kvalues[i]) km.initialize_cluster_centers() km.train(T) plt.plot(range(1,T+1),km.objective,colors[i]) #store cluster assignments for k=3,5 if kvalues[i] in k_backup: cluster_assgn35.append(km.cluster_assgn[:,0]) plt.xticks(range(1,T+1)) plt.xlabel('Iterations') plt.ylabel('Objective') plt.title('Objective vs Iteration for K = [2,3,4,5]') plt.legend(['K = %d'%i for i in kvalues]) # plt.savefig('hw4_1a_kmean_obj') # plt.show() #plot part b: for i in range(2): plt.figure() colors_arr = [colors[int(x)] for x in cluster_assgn35[i]] plt.scatter(km.data[:,0],km.data[:,1],c=colors_arr) plt.xlabel('Dimension 1') plt.ylabel('Dimension 2') plt.title('Scatter plot with cluster assignment for K=%d'%k_backup[i]) plt.savefig('hw4_2_k%d.png'%k_backup[i]) plt.show() ############################# #### PART B - MATRIX FACT #############################