我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用matplotlib.pyplot.hist()。
def sample(): ''' Draw a sample from the distribution of polar angle of the angular momentum vector, :math:`\\theta`, computed using the Monte Carlo technique discussed in the paper. .. plot:: :align: center from planetplanet.photo import theta import matplotlib.pyplot as pl x = [theta.sample() for i in range(10000)] pl.hist(x, bins = 50) pl.xlabel(r'$\\theta$ [deg]', fontweight = 'bold') pl.ylabel('Probability', fontweight = 'bold') pl.show() ''' y = np.random.random() f = lambda x: CDF(x) - y while np.sign(f(0)) == np.sign(f(1)): y = np.random.random() f = lambda x: CDF(x) - y return brentq(f, 0, 1)
def test_evidence(self): # 2 sigma tolerance tolerance = 2.0*np.sqrt(self.work.NS.state.info/self.work.NS.Nlive) print('2-sigma statistic error in logZ: {0:0.3f}'.format(tolerance)) print('Analytic logZ {0}'.format(self.model.analytic_log_Z)) print('Estimated logZ {0}'.format(self.work.NS.logZ)) pos=self.work.posterior_samples['x'] #t,pval=stats.kstest(pos,self.model.distr.cdf) stat,pval = stats.normaltest(pos.T) print('Normal test p-value {0}'.format(str(pval))) plt.figure() plt.hist(pos.ravel(),normed=True) x=np.linspace(self.model.bounds[0][0],self.model.bounds[0][1],100) plt.plot(x,self.model.distr.pdf(x)) plt.title('NormalTest pval = {0}'.format(pval)) plt.savefig('posterior.png') plt.figure() plt.plot(pos.ravel(),',') plt.title('chain') plt.savefig('chain.png') self.assertTrue(np.abs(self.work.NS.logZ - GaussianModel.analytic_log_Z)<tolerance, 'Incorrect evidence for normalised distribution: {0:.3f} instead of {1:.3f}'.format(self.work.NS.logZ,GaussianModel.analytic_log_Z )) self.assertTrue(pval>0.01,'Normaltest test failed: KS stat = {0}'.format(pval))
def plot_histogram(x, n_bins, title, x_label, y_label): """ Plots a histogram from a list of data. Args: x: A list of floats representing the data. n_bins: An int representing the number of bins to plot. title: A string representing the title of the graph. x_label: A string representing the label for the x-axis. y_label: A string representing the label for the y-axis. """ plt.title(title) plt.xlabel(x_label) plt.ylabel(y_label) plt.hist(x, bins=n_bins) plt.show() # probability
def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename): # times in ms bins = np.linspace(0, 5000, 50) data = np.vstack([parseTimes, hashTimes, compileTimes]).T fig, ax = plt.subplots() plt.hist(data, bins, alpha=0.7, label=['parsing', 'hashing', 'compiling'], color=[parseColor, hashColor, compileColor]) plt.legend(loc='upper right') plt.xlabel('time [ms]') plt.ylabel('#files') fig.savefig(filename) fig, ax = plt.subplots() boxplot_data = [[i/1000 for i in parseTimes], [i/1000 for i in hashTimes], [i/1000 for i in compileTimes]] # times to s plt.boxplot(boxplot_data, 0, 'rs', 0, [5, 95]) plt.xlabel('time [s]') plt.yticks([1, 2, 3], ['parsing', 'hashing', 'compiling']) #lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right fig.savefig(filename[:-4] + '_boxplots' + GRAPH_EXTENSION)
def disp_gap_bydate(self): gaps_mean = self.gapdf.groupby('time_date')['gap'].mean() gaps_mean.plot(kind='bar') plt.ylabel('Mean of gap') plt.title('Date/Gap Correlation') # for i in gaps_mean.index: # plt.plot([i,i], [0, gaps_mean[i]], 'k-') plt.show() return # def drawGapDistribution(self): # self.gapdf[self.gapdf['gapdf'] < 10]['gapdf'].hist(bins=50) # # sns.distplot(self.gapdf['gapdf']); # # sns.distplot(self.gapdf['gapdf'], hist=True, kde=False, rug=False) # # plt.hist(self.gapdf['gapdf']) # plt.show() # return # def drawGapCorrelation(self): # _, (ax1, ax2) = plt.subplots(nrows=2, ncols=1) # res = self.gapdf.groupby('start_district_id')['gapdf'].sum() # ax1.bar(res.index, res.values) # res = self.gapdf.groupby('time_slotid')['gapdf'].sum() # ax2.bar(res.index.map(lambda x: x[11:]), res.values) # plt.show() # return
def scoreHists(scoresFN,outFN,numBins,geneNames,scoreType): '''Read through a scores file, and separate into all pairwise comparisons. Then plot hist of each.''' # currently, this seems to require a display for interactive # plots. would be nice to make it run without that... pairD = readScorePairs(scoresFN,geneNames,scoreType) pyplot.ioff() # turn off interactive mode with PdfPages(outFN) as pdf: for key in pairD: fig = pyplot.figure() pyplot.hist(pairD[key],bins=numBins) pyplot.title('-'.join(key)) pdf.savefig() pyplot.close()
def gimpMarkup(self, hints = gimpContours, image = "2x2-red-1.jpg", feature = "top-left-monitor"): r = Rectangle(*hints[image][feature]) contour = r.asContour() cv2.drawContours(self.img, [contour], -1, (0, 255, 0), 5 ) title = self.tgen.next(feature) if self.show: ImageViewer(self.img).show(window=title, destroy = self.destroy, info = self.info, thumbnailfn = title) roi = r.getRoi(self.img) self.rois[feature] = roi # Histogram the ROI to get the spread of intensities, in each channel and grayscale title = '%s-roi.jpg' % feature if self.show: ImageViewer(roi).show(window=title, destroy = self.destroy, info = self.info, thumbnailfn = title) colors = ('b','g','r') for i,col in enumerate(colors): hist = cv2.calcHist([roi], [i], None, [256], [0,256]) plt.plot(hist, color = col) plt.xlim([0,256]) #plt.hist(roi.ravel(), 256, [0,256]) plt.show() cmap = ColorMapper(roi) cmap.mapit(1) title = self.tgen.next('colourMapping') if self.show: ImageViewer(self.img).show(window=title, destroy = self.destroy, info = self.info, thumbnailfn = title) cv2.waitKey()
def info(self,burn=1000,plot=False): """ Print the summary statistics and optionally plot the results """ rows=len(self.varnames) cols=2 chain=np.array(self.chain[burn:]) nsize=chain.shape[0] # print rows,cols print '%4s %16s %12s %12s [%12s, %12s, %12s]'%('no','name','mean','stddev','16%','50%','84%') for i,name in enumerate(self.varnames): temp=np.percentile(chain[:,i],[16.0,84.0,50.0]) print '%4i %16s %12g %12g [%12g, %12g, %12g]'%(i,name,np.mean(chain[:,i]),(temp[1]-temp[0])/2.0,temp[0],temp[2],temp[1]) if plot: ax=plt.subplot(rows,cols,2*i+1) # plt.text(0.05,0.9,r'$\tau$='+'%5.1f'%(acor.acor(chain[:,i])[0]),transform=ax.transAxes) plt.plot(chain[:,i]) plt.ylabel(self.model.descr[name][3]) plt.xlabel('Iteration') ax=plt.subplot(rows,cols,2*i+2) plt.hist(chain[:,i],bins=100,histtype='step') plt.text(0.05,0.9,sround(np.mean(chain[:,i]),temp[0],temp[1]),transform=ax.transAxes) plt.xlabel(self.model.descr[name][3]) # plt.text(0.05,0.9,'%6g %3g (%4g-%4g)'%(np.mean(chain[:,i]),(temp[1]-temp[0])/2.0,temp[0],temp[1]),transform=ax.transAxes)
def make_artifact_plots(data, outname, pos_arts, neg_arts, stds): colors = [cm.Dark2(x) for x in np.linspace(0, 1, len(stds))] f, (ax1, ax2, ax3) = plt.subplots(3, 1) if len(pos_arts) == 0 and len(neg_arts) == 0: # nothing to do plt.savefig(outname + ".png") return for c, (poss, negs) in enumerate(zip(pos_arts, neg_arts)): extrema = np.array(poss + negs, dtype=int) for i in extrema: ax1.plot(data[i - 10:i + 10, c] / stds[c], linewidth=0.5, color=colors[c]) plt.sca(ax2) plt.hist(data[extrema, c] / stds[c], bins=20, fill=None, edgecolor=colors[c]) ax3.vlines(extrema, 0, 1, color=colors[c]) ax1.set_ylabel("standard deviation") ax1.set_title("artifacts") ax2.set_title("amplitude distribution") ax3.set_title("artifact locations") plt.savefig(outname + ".png")
def hist_test(): mu, sigma = 100, 15 x = mu + sigma * np.random.randn(10000) # ?????? n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75) plt.xlabel('Smarts') plt.ylabel('Probability') #???? plt.title('Histogram of IQ') #???? plt.text(60, .025, r'$mu=100, sigma=15$') plt.axis([40, 160, 0, 0.03]) plt.grid(True) plt.show()
def draw(self): if not os.path.isdir(self.txt_folder): print "Folder not exist!" return False txt_file_list = os.listdir(self.txt_folder) for i in range(len(txt_file_list)): log_data_list = self.logparse(self.txt_folder + txt_file_list[i]) for j in range(len(log_data_list)): plt.figure() input_data = log_data_list[j]["data"] plt.hist(input_data) plt.grid() title_str = "epoch:%d, block:%d, layer:%d" % (log_data_list[j]["epoch"], log_data_list[j]["block"], log_data_list[j]["layer"]) plt.title(title_str) save_path = self.fig_folder + "epoch_%d_block_%d_layer_%d.png" % (log_data_list[j]["epoch"], log_data_list[j]["block"], log_data_list[j]["layer"]) plt.savefig(save_path, format="png") plt.close()
def fit_koff(nmax=523, NN=4e8, **params): tbind = params.pop("tbind") params["kd"] = 1e9/tbind dx = params.pop("dx") rw = randomwalk.get_rw(NAME, params, setup=setup_rw, calc=True) rw.domains[1].dx = dx times = draw_empirically(rw, N=NN, nmax=nmax, success=False) bins = np.logspace(np.log10(min(times)), np.log10(max(times)), 35) #bins = np.logspace(-3., 2., 35) hist, _ = np.histogram(times, bins=bins) cfd = np.cumsum(hist)/float(np.sum(hist)) t = 0.5*(bins[:-1] + bins[1:]) tmean = times.mean() toff = NLS(t, cfd, t0=tmean) koff = 1./toff return dict(t=t, cfd=cfd, toff=toff, tmean=tmean, koff=koff) ##### run rw in collect mode and draw bindings from empirical distributions
def exponential_hist(times, a, b, **params): cutoff = 0.03 # cutoff frequency in ms if len(times) == 0: return bins = np.logspace(a, b, 100) hist = plt.hist(times, bins=bins, alpha=0.5, **params) plt.xscale("log") params.pop("label") color = params.pop("color") total = integrate_hist(hist, cutoff) if sum(times > cutoff) == 0: return tmean = times[times > cutoff].mean() T = np.logspace(a-3, b, 1000) fT = np.exp(-T/tmean)*T/tmean fT *= total/integrate_values(T, fT, cutoff) plt.plot(T, fT, label="exp. fit, mean = %.2f ms" % (tmean,), color="dark" + color, **params) plt.xlim(10**a, 10**b)
def plot_weight_histogram(model, outfile, lower=-0.25, upper=0.25): n = len(model.params) plt.clf() for (i, theano_shared_params) in enumerate(model.params): weights = theano_shared_params.get_value() values = weights.flatten() plt.subplot(n,1,i+1) frame = plt.gca() frame.axes.get_yaxis().set_ticks([]) if i != n-1: ## only keep bottom one frame.axes.get_xaxis().set_ticks([]) plt.hist(values, 100) plt.xlim(lower, upper) print(' param no. %s'%(i)) print(get_stats(theano_shared_params)) plt.savefig(outfile) print('Made plot %s'%(outfile))
def read_length_histogram(raw_sequences_file, path, raw_sequences_filetype='FASTQ'): # Creates a histogram of read lengths if raw_sequences_filetype == "FASTQ": iter_seq = util.iter_fsq else: iter_seq = util.iter_fst x = [] counter = 0 for record in iter_seq(raw_sequences_file): [sid, seq] = record[:2] counter = counter + 1 if(counter > 100000): break x.append(len(seq)) x = np.array(x) plt.figure() plt.hist(x, 50) plt.title('Distribution of amplicon read lengths') plt.xlabel('Read length') plt.ylabel('Freq') plt.savefig(os.path.join(path, 'read_lengths_distribution.png'))
def plot(irl, sim): plt.figure(1) plt.hist(irl, bins='auto') plt.axis([0,16, 0, 100]) plt.title('Final Four Seeding Distribution (Real Life)') plt.figure(2) plt.hist(sim, bins='auto') plt.axis([0,16, 0, 100]) plt.title('Final Four Seeding Distribution (Simulation)') plt.show() return # Returns statistics on the similarity of the distibutions of the historical Final # 4 seeding and the seeds of the simulated Final 4 teams.
def equalize(df): df = df.fillna(0) #df[df['probability']<0.4] = 0 relevant = df[df['probability']>=0.4] #print relevant #print relevant['probability'].values #relevant = histeq(relevant['probability'].values,len(relevant['probability'].values)) #print relevant['probability'].values.mean() plt.hist(df[df['probability']>0.9]['probability'], 60) plt.show() return df # !!!!!!!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # this code is made for submission files, which have a index column. # These columns are removed in this code. # If your submission file hasn't a index column # comment out both line 20 and line 26 # This function merges submission files. If files contain the same seriesuid, coordX, coordY, coordZ combination the probability are ensembled # by taking the everage. The order of the nodules in the submission files doesn't matter either if the files contain the same nodules. # All the submission files have to be added to a folder called submission, all the files in this folder are ensembled.
def _freedman_diaconis_bins(self, *arrays: t.Tuple[t.List[Number]]) -> int: """ Calculate number of hist bins using Freedman-Diaconis rule. If more than one array is passed, the maximum number of bins calculated for each array is used. Adapted from seaborns source code (adapted originally from http://stats.stackexchange.com/questions/798/). """ import seaborn as sns def freedman_diaconis(array: np.array): array = [a for a in array if not math.isnan(a)] h = 2 * sns.utils.iqr(array) / (len(array) ** (1 / 3)) # fall back to sqrt(a) bins if iqr is 0 if h == 0: return int(np.sqrt(len(array))) else: return int(np.ceil((max(array) - min(array)) / h)) return max(map(freedman_diaconis, arrays))
def plot_hist(runornot): while runornot: plt.figure(1) timelist = ['year','month','day','hour','dayofweek'] layoutlist = [231,232,234,235,236] for timeiterm in timelist: plt.subplot(layoutlist[timelist.index(timeiterm)]) plt.hist(blog[timeiterm].values, bins = len(set(blog[timeiterm].values)), facecolor='blue', alpha=0.5) #plt.xlabel() plt.ylabel('freq') plt.title(timeiterm) #plt.text(60, .025, r'$\mu=100,\ \sigma=15$') #plt.axis([40, 160, 0, 0.03]) #plt.grid(True) plt.show() #.title('Histogram') plt.show()
def plot_atoms_as_histograms(dictionary): """ It plots the atoms composing the dictionary as histograms. Parameters ---------- dictionary: array_like, shape=(n_atoms, n_features) """ for i in range(0, dictionary.shape[0]): fig = plt.figure() fig.canvas.set_window_title(str(i+1) + " atom") length = len(dictionary[i, :]) x = np.asarray(range(0, length)) w = dictionary[i, :] plt.hist(x, bins=length, weights=w) plt.xlim((0, dictionary.shape[1])) plt.show()
def histograms_plot(): """ histograms plot """ # ?????? mu, sigma = 100, 15 x = mu + sigma * np.random.randn(10000) # ?????? num_bins = 50 # ????,??????? n, bins, patches = plt.hist(x, bins=num_bins, normed=1, color="green", alpha=0.6, label="hist") # ??????????,???? y = mlab.normpdf(bins, mu, sigma) plt.plot(bins, y, "r--", label="line") # ?????? plt.legend(loc="upper left", shadow=True) # ???? plt.show() return # histograms_plot()
def compare_keypress_delays(self, *args): all_delays = [] for count, file_index in enumerate(args): instance = self.read_data_file(file_index) delay = instance.keypress_delay_average() plt.hist(int(delay),[0,1]) all_delays.append(delay) # plt.axis([0, count, 0, int(max(all_delays))]) plt.show() return all_delays
def plot_scattermatrix(df, **kwargs): """plot a scattermatrix from dataframe """ if df is None: logger.log(loglevel_debug, "plot_scattermatrix: no data passed") return # df = pd.DataFrame(X, columns=['x1_t', 'x2_t', 'x1_tptau', 'x2_tptau', 'u_t']) # scatter_data_raw = np.hstack((np.array(Xs), np.array(Ys))) # scatter_data_raw = np.hstack((Xs, Ys)) # logger.log(loglevel_debug, "scatter_data_raw", scatter_data_raw.shape) plt.ioff() # df = pd.DataFrame(scatter_data_raw, columns=["x_%d" % i for i in range(scatter_data_raw.shape[1])]) sm = scatter_matrix(df, ax = kwargs['ax'], alpha=0.2, figsize=(10, 10), diagonal='hist') print type(sm), sm.shape, sm[0,0] # fig = sm[0,0].get_figure() # if SAVEPLOTS: # fig.savefig("fig_%03d_scattermatrix.pdf" % (fig.number), dpi=300) # fig.show() # plt.show()
def compare_fits(x): shape, scale = fit(x) app_shape, app_scale = x.mean() / x.std(), x.mean() # _, np_shape, _, np_scale = exponweib.fit(x, floc=0) # # Plot # def weib(x, n, a): # a == shape # return (a / n) * (x / n)**(a - 1) * np.exp(-(x / n)**a) # # count, _, _ = plt.hist(x, 100) # xx = np.linspace(x.min(), x.max(), 10000) # yy = weib(xx, scale, shape) # yy_app = weib(xx, app_scale, app_shape) # yy_np = weib(xx, np_scale, np_shape) # plt.plot(xx, yy*(count.max() / yy.max()), label='MLE') # plt.plot(xx, yy_app*(count.max() / yy_app.max()), label='App') # plt.plot(xx, yy_np*(count.max() / yy_np.max()), label='Scipy') # plt.legend() # plt.show() return (shape, scale), (app_shape, app_scale)
def limite_central2(): N=5000 k = 1.99999999 r=evalua(k, N) np.random.shuffle(r) epsilon = .1 x1 = zeros(N) mu = 0 for i in range(N): np.random.shuffle(r) x1[i] = sum(r[:i]) / (i+1) plt.hist(x1, bins=1000, range=(mu - epsilon, mu + epsilon), normed=True) formatter = FuncFormatter(to_percent) plt.gca().yaxis.set_major_formatter(formatter)
def plot_weight_histogram(model, outfile, lower=-0.25, upper=0.25): n = len(model.params) plt.clf() for (i, theano_shared_params) in enumerate(model.params): weights = theano_shared_params.get_value() values = weights.flatten() plt.subplot(n,1,i+1) frame = plt.gca() frame.axes.get_yaxis().set_ticks([]) if i != n-1: ## only keep bottom one frame.axes.get_xaxis().set_ticks([]) plt.hist(values, 100) plt.xlim(lower, upper) print ' param no. %s'%(i) print get_stats(theano_shared_params) plt.savefig(outfile) print 'Made plot %s'%(outfile)
def plot_all_times_to_correct_decision(self,thr=0.5,stay_above=True,unit="spikes",spikemeasure="growing_spikecount", do_title=True): times = np.array([self.time_to_correct_decision(e,thr,stay_above,unit,spikemeasure) for e in self.experiments]).flatten() # times[30:50] = np.Inf maximum = int(np.ceil(max(times[times!=np.Inf]))) plt_inf = maximum+2 # for unsuccessful trials (time=Inf), set time to some value distinct from any actual decision time. times[times==np.Inf] = plt_inf fig = plt.figure(figsize=(hcPlotting.fig_width,hcPlotting.fig_height/3)) bins = np.hstack([np.arange(0.25,maximum+1,0.5),[plt_inf,plt_inf+1]]) n,_,_ = plt.hist(times,bins,color='k',edgecolor='w') ax = plt.gca() ax.set_xlim((0,plt_inf+1)) ax.set_ylim(ax.get_ylim()[0],ax.get_ylim()[1]+1) plt.plot((plt_inf,plt_inf),(0,ax.get_ylim()[1]),'r') ax.set_xticks(range(maximum+1)+[plt_inf+0.5]) ax.set_xticklabels([str(i) for i in range(maximum+1)]+[r'$\infty$']) ax.set_ylabel("nr. of trials") ax.set_xlabel("spikes observed before classification") if do_title: plt.title("thr = "+str(thr)+", stay_above = "+str(stay_above)+", classes: " +" vs. ".join(self.classes))
def create_size_histogram(db, args): rows = db.execute('SELECT maxsize, ordertype FROM orderbook;').fetchall() rows = [o for o in rows if o["ordertype"] in filtered_offername_list] ordersizes = sorted([r['maxsize'] / 1e8 for r in rows]) fig = plt.figure() scale = args.get("scale") if (scale is not None) and (scale[0] == "log"): ratio = ordersizes[-1] / ordersizes[0] step = ratio ** 0.0333 # 1/30 bins = [ordersizes[0] * (step ** i) for i in range(30)] else: bins = 30 plt.hist(ordersizes, bins, histtype='bar', rwidth=0.8) if bins is not 30: fig.axes[0].set_xscale('log') plt.grid() plt.xlabel('Order sizes / btc') plt.ylabel('Frequency') return get_graph_html(fig) + ("<br/><a href='?scale=log'>log scale</a>" if bins == 30 else "<br/><a href='?'>linear</a>")
def plot_retx(self): """ Plot the distribution of the retransmission counter for all packets :return: """ retx = [] for pkt in self.packets: for hop in pkt.hop_info: if hop['retx'] != 0: retx.append(hop['retx']) else: raise RuntimeError plt.figure() plt.hist(retx)
def calculate_histogram(self): slice = self.pick_slice.value() ax = self.figure.add_subplot(111) ax.hold(False) plt.cla() n_channels = len(self.zcoord) hues = np.arange(0, 1, 1 / n_channels) self.colors = [colorsys.hsv_to_rgb(_, 1, 1) for _ in hues] self.bins = np.arange(np.amin(np.hstack(self.zcoord)),np.amax(np.hstack(self.zcoord)),slice) self.patches = [] ax.hold(True) for i in range(len(self.zcoord)): n, bins, patches = plt.hist(self.zcoord[i], self.bins, normed=1, facecolor=self.colors[i], alpha=0.5) self.patches.append(patches) plt.xlabel('Z-Coordinate [nm]') plt.ylabel('Counts') plt.title(r'$\mathrm{Histogram\ of\ Z:}$') # refresh canvas self.canvas.draw() self.sl.setMaximum(len(self.bins)-2) #self.sl.setValue(np.ceil((len(self.bins)-2)/2))
def plot_hist(item, figure_id=1): pt.figure(figure_id) kurtosis = -np.ones(8) for i in range(item.shape[1]): pt.subplot(240+i) tmp = item[item[:,i]!=-1,i] tmp = tmp + np.random.rand(len(tmp)) - 0.5 pt.hist(tmp, bins=6, normed=True, range=(0.9,6.1), alpha=0.8, color=colorc[i]) pt.title(name[i]) density = kde.gaussian_kde(tmp) xgrid = np.linspace(0, 6, 100) pt.plot(xgrid, density(xgrid), 'r-') avg = np.mean(tmp) sd = np.std(tmp) pt.plot(xgrid, normpdf(xgrid,avg,sd)) pt.show() kurtosis[i] = sps.kurtosis(item[item[:,i]!=-1,i]) return(kurtosis)
def runobsplot(self): """ A quick histogram to see that intrinsic variance compared to the initial estimate """ tdmin = self.iniest.td - 3.0*self.iniest.tderr tdmax = self.iniest.td + 3.0*self.iniest.tderr fig = plt.figure(figsize=(6, 3)) fig.subplots_adjust(top=0.95, bottom=0.2) if len(self.obsmesdelays) != 0: plt.hist(self.obsmesdelays, range=(tdmin, tdmax), bins=200, color="green", lw=0) plt.xlim(tdmin, tdmax) plt.xlabel("Delay [day]") plt.ylabel("Counts") #ax = plt.gca() plt.figtext(0.15, 0.8, "Intrinsic/initial error ratio: %.2f" % self.intrinsicratio) plt.axvline(self.iniest.td - self.iniest.tderr, color="gray", linestyle="-", zorder=20) plt.axvline(self.iniest.td + self.iniest.tderr, color="gray", linestyle="-", zorder=20) plt.axvline(self.outest.td, color="red", linestyle="-", zorder=20) plt.savefig(os.path.join(self.plotdir, "intrinsic_variance.png")) plt.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("answer_files", nargs="+") args = parser.parse_args() dfs = {} for x in args.answer_files: name = basename(x) name = name[:name.rfind(".")] dfs[name] = pd.read_csv(x) for k, df in dfs.items(): df = df[df["n_answers"] > 0] plt.hist(df["predicted_score"] - df["predicted_score"].mean(), 50, label=k, alpha=0.5) plt.legend() plt.show()
def std_error(y, predicted_y): std_error_list = [] for index in range(0, len(y)): std_error_list.append((np.absolute(y[index] - predicted_y[index]) / y[index])[0]) std_error_list = np.asarray(std_error_list) # print std_error_list.max() count = 0 threshold = 0.20 for item in std_error_list: if item <= threshold: count += 1 print '?????', len(std_error_list), '?' print '???', threshold, '???', count, '?' print '???', threshold ,'?????', count / float(len(std_error_list)) print '?????', std_error_list.mean() print '??????', std_error_list.std() plt.hist(std_error_list, rwidth=0.7) plt.show()
def DrawHist(pl, shs): """??????, shs: ??? array""" shs = np.array(shs, dtype=float) #print "mean: %.2f"%shs.mean() shs = shs[np.isnan(shs) == False] if len(shs)>0: pl.figure pl.hist(shs) def ShowHitCount(shs): #???? go_count = len(shs) - len(shs[np.isnan(shs)]) #??? if len(shs) != 0: v = float(go_count)/ float(len(shs)) #print("trade rato:%.2f%%"%(v*100)) #????? if go_count>0: v = float(len(shs[shs>0]))/float(go_count) #print("win rato: %.2f%%"%(v*100)) pl.show() #ShowHitCount(shs)
def plotEnergyHistogram(sortedEnergy): # Energy distribution histogram lowerRange=sortedEnergy[0] upperRange=sortedEnergy[int(c.histFraction*len(sortedEnergy) )] # Disregard highest energy polymers, because they have much higher energy than the rest and ruin the histogram # Freedman-Diaconis method for determining optimal bin size q1=sortedEnergy[int(0.25*c.histFraction*len(sortedEnergy))] q3=sortedEnergy[int(0.75*c.histFraction*len(sortedEnergy))] IQR=q3-q1 h=2*IQR*(c.histFraction*len(sortedEnergy))**(-1/3) b=(upperRange-lowerRange)/h plt.figure(4) n, bins, patches=plt.hist(sortedEnergy, int(b), range=(lowerRange,upperRange), facecolor='green') plt.ylim([0,1.5*np.max(n)]) plt.xlabel('Potential') plt.ylabel('Number of polymers') plt.title('Total energy distribution')
def newsgroups_class_distrib(): from sklearn.datasets import fetch_20newsgroups ngroup_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'), categories=None) ngroup_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'), categories=None) test_data = ngroup_test.data train_data = ngroup_train.data test_groups = ngroup_test.target train_groups = ngroup_train.target n = 2000 train_groups = train_groups[:n] test_groups = test_groups[:n] plt.figure() plt.hist(train_groups, 20, normed=True, range=(0, 19)) plt.title("train groups") plt.figure() plt.hist(test_groups, 20, normed=True, range=(0, 19)) plt.title("test groups") plt.show()
def factorization_error(U,Vt,val,batch_size=1024): # Get validation data and identify unique users val_nz = scipy.sparse.find(val) unique_users = np.unique(val_nz[0]) # Go through and calculate the errors in batches all_errors = np.empty(0) increments = len(unique_users)//batch_size+1 print('Starting error calculation') for i in range(increments): rows = unique_users[i*batch_size:min((i+1)*batch_size,len(unique_users))] val_data = val[rows,:].toarray() user_data = np.dot(U[rows,:],Vt) user_data[user_data>10]=10 user_data[user_data<0]=0 error = np.abs(np.multiply(user_data-val_data,val_data!=0)) error = error[np.nonzero(error)] all_errors= np.append(all_errors,error) mean_error,per_comp = np.mean(all_errors),i*1.0/increments print('Mean Error: ' + str(mean_error) + ', Percent complete: ' + str(per_comp)) error = [np.mean(all_errors),np.std(all_errors),np.sqrt(np.mean(np.power(all_errors,2)))] plt.hist(all_errors) plt.title('Ratings errors') plt.show() return error
def draw_histogram(latencies_ms, bins, cutoff_time_ms, draw_xlabel=True, draw_ylabel=True): """ Draw one individual histogram. """ n, bins, patches = plt.hist(latencies_ms, bins, color='white', hatch='/') if draw_xlabel: plt.xlabel("Packet latency (ms)") if draw_ylabel: plt.ylabel("Frequency") plt.gca().set_xscale("log") plt.gca().xaxis.set_major_formatter(ScalarFormatter()) plt.xlim([min(bins), max(bins)]) plt.xticks([1, cutoff_time_ms, 100])
def Plot_HIST_Fn(label,distance, save_path, num_bins = 50): dissimilarity = distance[:] gen_dissimilarity_original = [] imp_dissimilarity_original = [] for i in range(len(label)): if label[i] == 1: gen_dissimilarity_original.append(dissimilarity[i]) else: imp_dissimilarity_original.append(dissimilarity[i]) bins = np.linspace(np.amin(distance), np.amax(distance), num_bins) fig = plt.figure() plt.hist(gen_dissimilarity_original, bins, alpha=0.5, facecolor='blue', normed=False, label='gen_dist_original') plt.hist(imp_dissimilarity_original, bins, alpha=0.5, facecolor='red', normed=False, label='imp_dist_original') plt.legend(loc='upper right') plt.title('OriginalFeatures_Histogram.jpg') plt.show() fig.savefig(save_path)
def plot_his(inputs, inputs_norm): # plot histogram for the inputs of every layer for j, all_inputs in enumerate([inputs, inputs_norm]): for i, input in enumerate(all_inputs): plt.subplot(2, len(all_inputs), j*len(all_inputs)+(i+1)) plt.cla() if i == 0: the_range = (-7, 10) else: the_range = (-1, 1) plt.hist(input.ravel(), bins=15, range=the_range, color='#FF5733') plt.yticks(()) if j == 1: plt.xticks(the_range) else: plt.xticks(()) ax = plt.gca() ax.spines['right'].set_color('none') ax.spines['top'].set_color('none') plt.title("%s normalizing" % ("Without" if j == 0 else "With")) plt.draw() plt.pause(0.01)
def length_histogram(fqin, name): ''' Create a histogram, and return the bin edges of the bin containing the most reads ''' logging.info("Creating length histogram to find bin with most reads.") lengths = get_lengths(fqin) plt.hist(lengths, bins='auto') plt.savefig(name, format='png', dpi=100) plt.close("all") hist, bin_edges = np.histogram(lengths, bins='auto') maxindex = np.argmax(hist) return (bin_edges[maxindex], bin_edges[maxindex + 1])
def plotTimeHistogram(times, filename): # times in ms #TODO: understand params and vars hist, bins = np.histogram([i/1000 for i in times], bins=50) # times to s width = 0.7 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:]) / 2 fig, ax = plt.subplots() plt.xlabel('time [s]') plt.ylabel('#files') ax.bar(center, hist, align='center', width=width) fig.savefig(filename)
def histogram_commit_periods(self): plt.hist(self.tickets.CommitPeriod, bins=20, color=self.color_map[1])
def histogram_changed_lines(self): plt.hist(self.changed_lines, bins=30, color=self.color_map[3])
def main(): greyhounds = 500 labs = 500 grey_height = 28 + 4 * np.random.randn(greyhounds) lab_height = 24 + 4 * np.random.randn(labs) plt.hist([grey_height, lab_height], stacked=True, color=['r', 'b']) plt.show() # Run main