我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用matplotlib.ticker.FuncFormatter()。
def plot(self, df, database_name, test_name, y_label): means = df.rolling(70).mean() ax = means.plot( title=test_name, alpha=0.8, xlim=(0, means.index.max() * 1.05), ylim=(0, means.max().max() * 1.05), ) ax.set(xlabel='Amount of objects in table', ylabel=y_label) ax.xaxis.set_major_formatter( FuncFormatter(lambda v, pos: prefix_unit(v, '', -3))) if y_label in self.ticks_formatters: ax.yaxis.set_major_formatter(self.ticks_formatters[y_label]) legend = ax.legend( loc='upper center', bbox_to_anchor=(0.5, 0.0), bbox_transform=plt.gcf().transFigure, fancybox=True, shadow=True, ncol=3) plt.savefig( os.path.join(self.results_path, '%s - %s.svg' % (database_name, test_name)), bbox_extra_artists=(legend,), bbox_inches='tight', )
def read_data_for_battery_plot(self): bd = BatteryDriver() data = bd.get_history_charge() x = [] y = [] for element in data: x.append(element[0]) y.append(element[1]) self.ax.cla() self.ax.set_xlim(min(x), max(x)) self.ax.set_ylim(-10, 110) self.ax.grid(True) def format_date(x, pos=None): ltime = time.localtime(x) return time.strftime('%H:%M', ltime) self.ax.xaxis.set_major_formatter( ticker.FuncFormatter(format_date)) self.fig.autofmt_xdate() self.ax.plot(x, y) self.fig.canvas.draw() return True
def __init__(self, infile, outfile, analysis_type, plot_format, plot_title, src_reverse, debug): self._infile = infile self._outfile = outfile self._analysis_type = analysis_type self._plot_format = plot_format self._plot_title = plot_title self._src_reverse = src_reverse self._debug = debug milli = 1e-3 self._format_milli = ticker.FuncFormatter( lambda y, pos: '{0:g}'.format(y / milli)) kilo = 1e+3 self._format_kilo = ticker.FuncFormatter( lambda y, pos: '{0:g}'.format(y / kilo)) mega = 1e+6 self._format_mega = ticker.FuncFormatter( lambda y, pos: '{0:g}'.format(y / mega)) cent = 100 self._format_percent = ticker.FuncFormatter( lambda y, pos: '{0:g}'.format(y * cent))
def limite_central2(): N=5000 k = 1.99999999 r=evalua(k, N) np.random.shuffle(r) epsilon = .1 x1 = zeros(N) mu = 0 for i in range(N): np.random.shuffle(r) x1[i] = sum(r[:i]) / (i+1) plt.hist(x1, bins=1000, range=(mu - epsilon, mu + epsilon), normed=True) formatter = FuncFormatter(to_percent) plt.gca().yaxis.set_major_formatter(formatter)
def plot(self, title='Rating Curve', log=True): """ plot the rating curve """ fig = plt.figure() ax1 = fig.add_subplot(111, facecolor=[.95,.95,.95]) plt.grid(True, which='both', color='w', ls='-', zorder=0) ax1.scatter(self.stage, self.discharge, color='k', s=10) ax1.set_ylabel(r'Discharge, cfs') ax1.set_xlabel(r'Stage, ft') if log: ax1.set_ylim(0.01, 100) ax1.set_yscale('log'); ax1.set_xscale('log') # log scale x and y ax1.yaxis.set_major_formatter(FuncFormatter(lambda y,pos: ('{{:.{:1d}f}}'.format(int(np.maximum(-np.log10(y),0)))).format(y))) ax1.xaxis.set_major_formatter(FuncFormatter(lambda y,pos: ('{{:.{:1d}f}}'.format(int(np.maximum(-np.log10(y),0)))).format(y))) plt.title(title) ax1.set_axisbelow(True) # puts grid below plot # write the equation in the plot ax1.text(0.05, 0.7, f'y = {self.popt[0]:.3f}x^{self.popt[1]:.3f}', fontsize=15, transform=ax1.transAxes) # draw the model line line = np.linspace(min(self.stage), max(self.stage), 100) ax1.plot(line, exp_curve(line, self.popt[0], self.popt[1]), color='k') plt.show()
def create_example_s_curve_plot(self): # Initialize plot fig, ax = plt.subplots(figsize=(8, 4)) # Plot example S-response curve x = np.arange(0, 20100, 100) y = self.logistic_function(x, L=10000, k=0.0007, x_0=10000) ax.plot(x, y, '-', label="Radio") # Set plot options and show plot ax.legend(loc='right') plt.xlim([0, 20000]) plt.xlabel('Radio spend in euros') plt.ylabel('Additional sales') plt.title('Example of S-shaped response curve') plt.tight_layout() plt.grid() ax.get_xaxis().set_major_formatter(tkr.FuncFormatter(lambda x, p: format(int(x), ','))) ax.get_yaxis().set_major_formatter(tkr.FuncFormatter(lambda x, p: format(int(x), ','))) plt.show()
def _finalizeFigure(fig, ax, outFile=None, yFormat=None, sideLabel=False, labelColor=None, transparent=False, openFile=False, closeFig=True): if yFormat: func = (lambda x, p: format(int(x), ',')) if yFormat == ',' else (lambda x, p: yFormat % x) formatter = FuncFormatter(func) ax.get_yaxis().set_major_formatter(formatter) if sideLabel: labelColor = labelColor or 'lightgrey' # add the filename down the right side of the plot fig.text(1, 0.5, sideLabel, color=labelColor, weight='ultralight', fontsize=7, va='center', ha='right', rotation=270) if outFile: fig.savefig(outFile, bbox_inches='tight', transparent=transparent) if closeFig: plt.close(fig) if openFile: systemOpenFile(outFile)
def plot_pore_yield_hist(): # Close any previous plots plt.close('all') num_bins = 50 new_yield_data = ALL_READS.groupby(["channel", "mux"])['seq_length'].sum() fig, ax = plt.subplots(1) (n, bins, patches) = ax.hist(new_yield_data, num_bins, weights=None, # [1],#channels_by_yield_df['seq_length'], normed=1, facecolor='blue', alpha=0.76) ax.xaxis.set_major_formatter(FuncFormatter(x_hist_to_human_readable)) def y_muxhist_to_human_readable(y, position): # Get numbers of reads per bin in the histogram s = humanfriendly.format_size((bins[1]-bins[0])*y*new_yield_data.count(), binary=False) return reformat_human_friendly(s) ax.yaxis.set_major_formatter(FuncFormatter(y_muxhist_to_human_readable)) # Set the titles and axis labels ax.set_title(f"Yield by pore {SAMPLE_NAME}") ax.grid(color='black', linestyle=':', linewidth=0.5) ax.set_xlabel("Yield in single pore") ax.set_ylabel("Pores per bin") # Ensure labels are not missed. fig.tight_layout() savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_hist_yield_by_pore.png"))
def create_slice(self, context): """ :type context: dict """ model = self._model axes = self._image.axes """ :type: matplotlib.axes.Axes """ axes.set_title(model.title, fontsize=12) axes.tick_params(axis='both') axes.set_ylabel(model.y_axis_name, fontsize=9) axes.set_xlabel(model.x_axis_name, fontsize=9) axes.get_xaxis().set_major_formatter(FuncFormatter(model.x_axis_formatter)) axes.get_xaxis().set_major_locator(AutoLocator()) axes.get_yaxis().set_major_formatter(FuncFormatter(model.y_axis_formatter)) axes.get_yaxis().set_major_locator(AutoLocator()) for label in (axes.get_xticklabels() + axes.get_yticklabels()): label.set_fontsize(9) self._reset_zoom() axes.add_patch(self._vertical_indicator) axes.add_patch(self._horizontal_indicator) self._update_indicators(context) self._image.set_cmap(cmap=context['colormap']) self._view_limits = context["view_limits"][self._model.index_direction['name']] if model.data is not None: self._image.set_data(model.data)
def pct_format(): '''Apply "to_percent" custom format for chart tick labels ''' return ticker.FuncFormatter(to_percent)
def plotYearly(dictframe, ax, uncertainty, color='#0072B2'): if ax is None: figY = plt.figure(facecolor='w', figsize=(10, 6)) ax = figY.add_subplot(111) else: figY = ax.get_figure() ## # Find the max index for an entry of each month ## months = dictframe.ds.dt.month ind = [] for month in range(1,13): ind.append(max(months[months == month].index.tolist())) ## # Plot from the minimum of those maximums on (this will almost certainly result in only 1 year plotted) ## ax.plot(dictframe['ds'][min(ind):], dictframe['yearly'][min(ind):], ls='-', c=color) if uncertainty: ax.fill_between(dictframe['ds'].values[min(ind):], dictframe['yearly_lower'][min(ind):], dictframe['yearly_upper'][min(ind):], color=color, alpha=0.2) ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2) months = MonthLocator(range(1, 13), bymonthday=1, interval=2) ax.xaxis.set_major_formatter(FuncFormatter( lambda x, pos=None: '{dt:%B} {dt.day}'.format(dt=num2date(x)))) ax.xaxis.set_major_locator(months) ax.set_xlabel('Day of year') ax.set_ylabel('yearly') figY.tight_layout() return figY
def __init__(self, dark): self.figure = Figure(figsize=(0, 1000), dpi=75, facecolor='w', edgecolor='k') self.axes = self.figure.add_axes([0.12, 0.08, 0.75, 0.90]) self.figure.patch.set_alpha(0) self.axes.margins(0, 0.05) self.axes.ticklabel_format(useOffset=False) self.axes.xaxis.set_major_locator(MultipleLocatorWithMargin(600, 0, 0.03)) self.axes.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: "{}m".format(int(x/60)))) if dark: self.axes.patch.set_facecolor('black') FigureCanvas.__init__(self, self.figure) self.set_size_request(400, 300) self.lines = {} self.texts = {}
def plot_ohlcv(self, df): fig, ax = plt.subplots() # Plot the candlestick candlestick2_ohlc(ax, df['open'], df['high'], df['low'], df['close'], width=1, colorup='g', colordown='r', alpha=0.5) # shift y-limits of the candlestick plot so that there is space # at the bottom for the volume bar chart pad = 0.25 yl = ax.get_ylim() ax.set_ylim(yl[0] - (yl[1] - yl[0]) * pad, yl[1]) # Add a seconds axis for the volume overlay ax2 = ax.twinx() ax2.set_position( matplotlib.transforms.Bbox([[0.125, 0.1], [0.9, 0.26]])) # Plot the volume overlay # bc = volume_overlay(ax2, df['open'], df['close'], df['volume'], # colorup='g', alpha=0.5, width=1) ax.xaxis.set_major_locator(ticker.MaxNLocator(6)) def mydate(x, pos): try: return df.index[int(x)] except IndexError: return '' ax.xaxis.set_major_formatter(ticker.FuncFormatter(mydate)) plt.margins(0) plt.show()
def plot_index_and_sentiment(tick_seq, shindex_seq, sentiment_seq, date): if len(tick_seq) != len(shindex_seq) or len(tick_seq) != len(sentiment_seq): print('error(plot) : three sequence length is not same') return x = range(len(shindex_seq)) labels = tick_seq y1 = shindex_seq y2 = sentiment_seq def format_fn(tick_val, tick_pos): if int(tick_val) in x: return labels[int(tick_val)] else: return '' fig = plt.figure(figsize=(12,8)) p1 = fig.add_subplot(111) p1.xaxis.set_major_formatter(FuncFormatter(format_fn)) p1.xaxis.set_major_locator(MaxNLocator(integer=True, nbins=12)) delta = shindex_seq[len(shindex_seq) - 1] - shindex_seq[0] if delta > 0: p1.plot(x, y1, label="$SCI$", color="red", linewidth=1) else: p1.plot(x, y1, label="$SCI$", color="green", linewidth=1) p1.plot(x, y2, 'b--', label="$ISI$", color="blue", linewidth=1) plt.title("Shanghai Composite Index(SCI) & Investor Sentiment Index(ISI)") plt.xlabel("Time(5min)") plt.ylabel("Index Value") plt.legend() # plt.show() global subdir filepath = './Pic/' + subdir + '/' + date + '.png' plt.savefig(filepath)
def limite_central(): N=5000 epsilon = 5e-2 x1 = zeros(N) mu = 0.5 for i in range(N): x1[i] = sum(rand(i+1)) / (i+1) plt.hist(x1, bins=100, range=(mu - epsilon, mu + epsilon), normed=True) formatter = FuncFormatter(to_percent) plt.gca().yaxis.set_major_formatter(formatter) #limite_central()
def _set_integer_tick_labels(axis, labels): """Use labels dict to set labels on axis""" axis.set_major_formatter(FuncFormatter(lambda x, _: labels.get(x, ''))) axis.set_major_locator(MaxNLocator(integer=True))
def plot_yearly(self, ax=None, uncertainty=True, yearly_start=0): """Plot the yearly component of the forecast. Parameters ---------- ax: Optional matplotlib Axes to plot on. One will be created if this is not provided. uncertainty: Optional boolean to plot uncertainty intervals. yearly_start: Optional int specifying the start day of the yearly seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts by 1 day to Jan 2, and so on. Returns ------- a list of matplotlib artists """ artists = [] if not ax: fig = plt.figure(facecolor='w', figsize=(10, 6)) ax = fig.add_subplot(111) # Compute yearly seasonality for a Jan 1 - Dec 31 sequence of dates. days = (pd.date_range(start='2017-01-01', periods=365) + pd.Timedelta(days=yearly_start)) df_y = self.seasonality_plot_df(days) seas = self.predict_seasonal_components(df_y) artists += ax.plot( df_y['ds'].dt.to_pydatetime(), seas['yearly'], ls='-', c='#0072B2') if uncertainty: artists += [ax.fill_between( df_y['ds'].dt.to_pydatetime(), seas['yearly_lower'], seas['yearly_upper'], color='#0072B2', alpha=0.2)] ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2) months = MonthLocator(range(1, 13), bymonthday=1, interval=2) ax.xaxis.set_major_formatter(FuncFormatter( lambda x, pos=None: '{dt:%B} {dt.day}'.format(dt=num2date(x)))) ax.xaxis.set_major_locator(months) ax.set_xlabel('Day of year') ax.set_ylabel('yearly') return artists
def plotDatePrice(productID, productTitle, data): # Data setup x, y = [], [] for datapoint in data: date = datapoint.split('|')[0] price = float(datapoint.split('|')[1]) x.append(dt.datetime.strptime(date, '%Y-%m-%d')) y.append(price) x = matplotlib.dates.date2num(x) x_np, y_np = np.array(x), np.array(y) # Plot setup ax = plt.figure(figsize=(6, 3)).add_subplot(111) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() ax.plot(x_np, y_np, color='lightblue', lw=2) ax.margins(0.05) ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: ('$%i' % (x)))) ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d')) plt.yticks(fontsize=8) plt.ylim(ymin=min(y)*0.7, ymax=max(y)*1.3) plt.title('Recent Price History\n'+productTitle, weight ='light', fontsize=12, y=1.08) plt.xticks(rotation=40, fontsize=7) plt.tight_layout() plt.savefig(productID+'.png') return productID+'.png' # ----- Email Configuration ----------------------------------------------------
def ticklabels_to_percent(ax, axis='y'): getattr(ax, '{}axis'.format(axis)).set_major_formatter( mticker.FuncFormatter(lambda s, position: '{:0.2%}'.format(s))) return ax
def ticklabels_to_thousands_sep(ax, axis='y'): getattr(ax, '{}axis'.format(axis)).set_major_formatter( mticker.FuncFormatter(lambda s, position: '{:,}'.format(int(s)))) return ax
def plot(self, addseries=[], log=True, title='Discharge'): """ Quick plot with or without rain data.\n If you wish to plot more than one series to compare them, use addseries to list in order of [time, Q, ...] for each additional series. """ fig = plt.figure() ax1 = fig.add_subplot(111, facecolor=[.95,.95,.95]) plt.grid(True, which='both', color='w', ls='-', zorder=0) ax1.plot(self.time, self.Q, label='Series1') if len(self.rain) != 0: ax2 = ax1.twinx() ax2.plot(self.time, self.rain, alpha=.5, c='b', lw=1, label='Rain') ax2.set_ylim(1, 0) ax2.set_ylabel(r'Rain, in') ax1.set_ylabel('Discharge, cfs') ax1.set_xlabel('Stage, ft') # log scale for y axis if log: ax1.set_yscale('log') ax1.yaxis.set_major_formatter(FuncFormatter(lambda y,pos: ('{{:.{:1d}f}}'.format(int(np.maximum(-np.log10(y),0)))).format(y))) # add ablity to plot multiple time series more = len(addseries) while more > 0: ax1.plot(addseries[more-2], addseries[more-1], label=f'Series{int(len(addseries)/2-more/2 +2)}') more -= 2 ax1.legend(loc='best') plt.title(title) plt.show()
def plot(outfn, a, genomeSize, base2chr, _windowSize, dpi=300, ext="svg"): """Save contact plot""" def format_fn(tick_val, tick_pos): """Mark axis ticks with chromosome names""" if int(tick_val) in base2chr: return base2chr[int(tick_val)] else: sys.stderr.write("[WARNING] %s not in ticks!\n"%tick_val) return '' # invert base2chr base2chr = {genomeSize-b: c for b, c in base2chr.iteritems()} # start figure fig = plt.figure() ax = fig.add_subplot(111) ax.set_title("Contact intensity plot [%sk]"%(_windowSize/1000,)) # label Y axis with chromosome names if len(base2chr)<50: ax.yaxis.set_major_formatter(FuncFormatter(format_fn)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) plt.yticks(base2chr.keys()) ax.set_ylabel("Chromosomes") else: ax.set_ylabel("Genome position") # label axes ax.set_xlabel("Genome position") plt.imshow(a+1, cmap=cm.hot, norm=LogNorm(), extent=(0, genomeSize, 0, genomeSize))# plt.colorbar() # save fig.savefig("%s.%s"%(outfn,ext), dpi=dpi, papertype="a4")
def pimp_axis(x_or_y_ax): """Remove trailing zeros. """ x_or_y_ax.set_major_formatter(ticker.FuncFormatter(ticks_formatter))
def plot_yield_general(): # Close any previous plots plt.close('all') # Set subplots. fig, ax = plt.subplots(1) # Create ticks using numpy linspace. Ideally will create 6 points between 0 and 48 hours. num_points = 7 # Need to include zero point. x_ticks = np.linspace(YIELD_DATA['duration_float'].min(), YIELD_DATA['duration_float'].max(), num_points) ax.set_xticks(x_ticks) # Define axis formatters ax.yaxis.set_major_formatter(FuncFormatter(y_yield_to_human_readable)) ax.xaxis.set_major_formatter(FuncFormatter(x_yield_to_human_readable)) # Set x and y labels and title ax.set_xlabel("Duration (HH:MM)") ax.set_ylabel("Yield") ax.set_title(f"Yield for {SAMPLE_NAME} over time") # Produce plot ax.plot(YIELD_DATA['duration_float'], YIELD_DATA['cumsum_bp'], linestyle="solid", markevery=[]) # Limits must be set after the plot is created ax.set_xlim(YIELD_DATA['duration_float'].min(), YIELD_DATA['duration_float'].max()) ax.set_ylim(ymin=0) # Ensure labels are not missed. fig.tight_layout() savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_yield_plot.png"))
def plot_read_length_hist(): # Close any previous plots plt.close('all') num_bins = 50 seq_df = ALL_READS["seq_length"] if CLIP: # Filter out the top 1000th percentile. seq_df = seq_df[seq_df < seq_df.quantile(0.9995)] def y_hist_to_human_readable_seq(y, position): # Convert distribution to base pairs if y == 0: return 0 s = humanfriendly.format_size(seq_df.sum() * y, binary=False) return reformat_human_friendly(s) # Define how many plots we want (1) fig, ax = plt.subplots(1) # Set the axis formatters ax.yaxis.set_major_formatter(FuncFormatter(y_hist_to_human_readable_seq)) ax.xaxis.set_major_formatter(FuncFormatter(x_hist_to_human_readable)) # Plot the histogram h, w, p = ax.hist(seq_df, num_bins, weights=seq_df, normed=1, facecolor='blue', alpha=0.76) bin_width = reformat_human_friendly(humanfriendly.format_size(w[1]-w[0], binary=False)) # Set the titles and axis labels ax.set_title(f"Read Distribution Graph for {SAMPLE_NAME}") ax.grid(color='black', linestyle=':', linewidth=0.5) ax.set_xlabel(f"Read length: Bin Widths={bin_width}") ax.set_ylabel("Bases per bin") # Ensure labels are not missed. fig.tight_layout() savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_hist_read_length_by_basepair.png"))
def plot_macd(df): macd, macdsignal, macdhist = macd_data(df) fig = plt.figure(figsize=(8, 4)) ax = fig.add_subplot(111) x = np.arange(len(df)) ax.grid(True) # ax.plot(x, df['close'], '.-', label=u'close') # ax.hist(macd) ax.plot(x, macdsignal, 'r-', label=u'macdsignal') ax.plot(x, macdhist, 'k-', label=u'macdhist') ax.legend(loc='best') ax.set_xlabel(str(df[u'date'][:].year)) # ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date)) plt.show()
def make_probes_ba_traj_fig(models1, models2=None, palette=None): # TODO ylim """ Returns fig showing trajectory of probes balanced accuracy """ start = time.time() sns.set_style('white') # load data xys = [] model_groups = [models1] if models2 is None else [models1, models2] for n, models in enumerate(model_groups): model_probes_ba_trajs = [] for nn, model in enumerate(models): model_probes_ba_trajs.append(model.get_traj('probes_ba')) x = models[0].get_data_step_axis() traj_mat = np.asarray([traj[:len(x)] for traj in model_probes_ba_trajs]) # all trajs are truncated to shortest y = np.mean(traj_mat, axis=0) sem = [stats.sem(model_probes_bas) for model_probes_bas in traj_mat.T] xys.append((x, y, sem)) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3)) ax.set_ylim([50, 75]) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_ylabel('Probes Balanced Accuracy', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) # plot for (x, y, sem) in xys: color = next(palette) if palette is not None else 'black' ax.plot(x, y, '-', linewidth=FigsConfigs.LINEWIDTH, color=color) ax.fill_between(x, np.add(y, sem), np.subtract(y, sem), alpha=FigsConfigs.FILL_ALPHA, color='grey') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig
def make_probes_pp_traj_fig(models1, models2=None, palette=None): """ Returns fig showing trajectory of Probes Perplexity """ start = time.time() sns.set_style('white') # load data xys = [] model_groups = [models1] if models2 is None else [models1, models2] for n, models in enumerate(model_groups): probes_pp_trajs_w = [] probes_pp_trajs_uw = [] for nn, model in enumerate(models): probes_pp_trajs_w.append(model.get_traj('probes_pp')) probes_pp_trajs_uw.append(model.get_traj('probes_pp_uw')) x = models[0].get_data_step_axis() traj_mat1 = np.asarray([traj[:len(x)] for traj in probes_pp_trajs_w]) traj_mat2 = np.asarray([traj[:len(x)] for traj in probes_pp_trajs_uw]) y1 = np.mean(traj_mat1, axis=0) y2 = np.mean(traj_mat2, axis=0) xys.append((x, y1, y2)) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3)) ylabel = 'Probes Perplexity' ax.set_ylabel(ylabel, fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) # plot for (x, y1, y2) in xys: color = next(palette) if palette is not None else 'black' ax.plot(x, y1, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='-', label='weighted') ax.plot(x, y2, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='--', label='unweighted') plt.legend(loc='best') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig
def make_avg_traj_figs(model): def make_avg_traj_fig(traj_name): """ Returns fig showing trajectory of Probes Perplexity """ start = time.time() sns.set_style('white') ylims = model.eval_name_range_dict[traj_name] # load data x = model.get_data_step_axis() y = model.get_traj(traj_name) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3), dpi=FigsConfigs.DPI) ax.set_ylim(ylims) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_ylabel(traj_name, fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) # plot ax.plot(x, y, '-', linewidth=FigsConfigs.LINEWIDTH, color='black') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig figs = [make_avg_traj_fig(traj_name) for traj_name in AppConfigs.EVAL_NAMES] return figs
def make_syn_task_stats_figs(model): def make_syn_task_acc_trajs_fig(): # TODO test """ Returns fig showing synonym task accuracy trajectories for each fold """ start = time.time() sns.set_style('white') # load data xys = [] for test_fold_id in range(GlobalConfigs.NUM_TEST_FOLDS): y_train = np.squeeze(np.add(*[model.get_trajs_mat([test_fold_id], traj) for traj in ['syn_task_train_yes_acc', 'syn_task_train_no_acc']]) / 2.0) y_test = np.squeeze(np.add(*[model.get_trajs_mat([test_fold_id], traj) for traj in ['syn_task_test_yes_acc', 'syn_task_test_no_acc']]) / 2.0) print(y_train.shape, y_test.shape) x = model.get_data_step_axis() xys.append((test_fold_id, x, y_train, y_test)) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3), dpi=FigsConfigs.DPI) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_ylabel('Synonym Task Accuracy', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) ax.set_ylim([-10, 110]) ax.axhline(y=50, linestyle='--', c='grey', linewidth=FigsConfigs.LINEWIDTH / 2.0) # plot for (test_fold_id, x, y_train, y_test) in xys: ax.plot(x, y_train, '-', linewidth=FigsConfigs.LINEWIDTH, label='fold {} train'.format(test_fold_id)) ax.plot(x, y_test, '-', linewidth=FigsConfigs.LINEWIDTH, label='fold {} test'.format(test_fold_id)) plt.legend(loc='best') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig figs = [make_syn_task_acc_trajs_fig()] return figs
def make_probe_cat_corr_figs(model, field_input): def make_probe_cat_corr_traj_fig(probe, slice_id, num_slices): """ Returns fig showing correlation between probe prototype activation and category activations. """ start = time.time() sns.set_style('white') sliced_cats = model.probe_store.cat_set[slice_id:slice_id + num_slices] num_sliced_cats = len(sliced_cats) # load data traj_mat = np.zeros((num_sliced_cats, len(model.saved_mb_names))) for n, mb_name in enumerate(model.saved_mb_names): model.acts_df = reload_acts_df(model.model_name, mb_name) probe_act = np.mean(model.get_single_probe_acts_df(probe).values, axis=0) probe_act_repeated = [probe_act] * num_sliced_cats cat_acts_mat = model.get_multi_cat_acts_df().values[slice_id:slice_id + num_slices] traj_mat[:, n] = [np.corrcoef(act1, act2)[1, 0] for act1, act2 in zip(probe_act_repeated, cat_acts_mat)] x = model.get_data_step_axis() # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 4), dpi=FigsConfigs.DPI) ax.set_ylabel('Correlation'.format(probe), fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.set_ylim([0, 1]) # plot for traj, cat in zip(traj_mat, sliced_cats): ax.plot(x, traj, '-', linewidth=FigsConfigs.LINEWIDTH, label=cat) props = dict(boxstyle='round', facecolor='white', alpha=FigsConfigs.FILL_ALPHA) ax.text(0.05, 0.9, probe, transform=ax.transAxes, fontsize=FigsConfigs.LEG_FONTSIZE, verticalalignment='bottom', bbox=props) plt.legend(loc='best') fig.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig figs = [make_probe_cat_corr_traj_fig(field_input[0], slice_id, 5) for slice_id in range(0, model.probe_store.num_cats, 5)] return figs
def plot(self, filename): r"""Save an image file of the transfer function. This function loads up matplotlib, plots all of the constituent transfer functions and saves. Parameters ---------- filename : string The file to save out the plot as. Examples -------- >>> tf = ColorTransferFunction( (-10.0, -5.0) ) >>> tf.add_layers(8) >>> tf.plot("sample.png") """ from matplotlib import pyplot from matplotlib.ticker import FuncFormatter pyplot.clf() ax = pyplot.axes() i_data = np.zeros((self.alpha.x.size, self.funcs[0].y.size, 3)) i_data[:,:,0] = np.outer(np.ones(self.alpha.x.size), self.funcs[0].y) i_data[:,:,1] = np.outer(np.ones(self.alpha.x.size), self.funcs[1].y) i_data[:,:,2] = np.outer(np.ones(self.alpha.x.size), self.funcs[2].y) ax.imshow(i_data, origin='lower') ax.fill_between(np.arange(self.alpha.y.size), self.alpha.x.size * self.alpha.y, y2=self.alpha.x.size, color='white') ax.set_xlim(0, self.alpha.x.size) xticks = np.arange(np.ceil(self.alpha.x[0]), np.floor(self.alpha.x[-1]) + 1, 1) - self.alpha.x[0] xticks *= (self.alpha.x.size-1) / (self.alpha.x[-1] - self.alpha.x[0]) ax.xaxis.set_ticks(xticks) def x_format(x, pos): return "%.1f" % (x * (self.alpha.x[-1] - self.alpha.x[0]) / (self.alpha.x.size-1) + self.alpha.x[0]) ax.xaxis.set_major_formatter(FuncFormatter(x_format)) yticks = np.linspace(0,1,5) * self.alpha.y.size ax.yaxis.set_ticks(yticks) def y_format(y, pos): return (y / self.alpha.y.size) ax.yaxis.set_major_formatter(FuncFormatter(y_format)) ax.set_ylabel("Transmission") ax.set_xlabel("Value") pyplot.savefig(filename)
def show(self, ax=None): r"""Display an image of the transfer function This function loads up matplotlib and displays the current transfer function. Parameters ---------- Examples -------- >>> tf = TransferFunction( (-10.0, -5.0) ) >>> tf.add_gaussian(-9.0, 0.01, 1.0) >>> tf.show() """ from matplotlib import pyplot from matplotlib.ticker import FuncFormatter pyplot.clf() ax = pyplot.axes() i_data = np.zeros((self.alpha.x.size, self.funcs[0].y.size, 3)) i_data[:,:,0] = np.outer(np.ones(self.alpha.x.size), self.funcs[0].y) i_data[:,:,1] = np.outer(np.ones(self.alpha.x.size), self.funcs[1].y) i_data[:,:,2] = np.outer(np.ones(self.alpha.x.size), self.funcs[2].y) ax.imshow(i_data, origin='lower') ax.fill_between(np.arange(self.alpha.y.size), self.alpha.x.size * self.alpha.y, y2=self.alpha.x.size, color='white') ax.set_xlim(0, self.alpha.x.size) xticks = np.arange(np.ceil(self.alpha.x[0]), np.floor(self.alpha.x[-1]) + 1, 1) - self.alpha.x[0] xticks *= (self.alpha.x.size-1) / (self.alpha.x[-1] - self.alpha.x[0]) if len(xticks) > 5: xticks = xticks[::len(xticks)/5] ax.xaxis.set_ticks(xticks) def x_format(x, pos): return "%.1f" % (x * (self.alpha.x[-1] - self.alpha.x[0]) / (self.alpha.x.size-1) + self.alpha.x[0]) ax.xaxis.set_major_formatter(FuncFormatter(x_format)) yticks = np.linspace(0,1,5) * self.alpha.y.size ax.yaxis.set_ticks(yticks) def y_format(y, pos): s = '%0.2f' % ( y ) return s ax.yaxis.set_major_formatter(FuncFormatter(y_format)) ax.set_ylabel("Opacity") ax.set_xlabel("Value")
def multi_bull_eyes(multi_data, cbar=None, cmaps=None, normalisations=None, global_title=None, canvas_title='title', titles=None, units=None, raidal_subdivisions=(2, 8, 8, 11), centered=(True, False, False, True), add_nomenclatures=(True, True, True, True), pfi_where_to_save=None, show=True): plt.clf() n_fig = len(multi_data) if cbar is None: cbar = [True] * n_fig if cmaps is None: cmaps = [mpl.cm.viridis] * n_fig if normalisations is None: normalisations = [mpl.colors.Normalize(vmin=np.min(multi_data[i]), vmax=np.max(multi_data[i])) for i in range(n_fig)] if titles is None: titles = ['Title {}'.format(i) for i in range(n_fig)] h_space = 0.15 / n_fig h_dim_fig = .8 w_dim_fig = .8 / n_fig def fmt(x, pos): # a, b = '{:.2e}'.format(x).split('e') # b = int(b) # return r'${} \times 10^{{{}}}$'.format(a, b) return r"${:.4g}$".format(x) # Make a figure and axes with dimensions as desired. fig = plt.figure(figsize=(3 * n_fig, 4)) fig.canvas.set_window_title(canvas_title) if global_title is not None: plt.suptitle(global_title) for n in range(n_fig): origin_fig = (h_space * (n + 1) + w_dim_fig * n, 0.15) ax = fig.add_axes([origin_fig[0], origin_fig[1], w_dim_fig, h_dim_fig], polar=True) bulls_eye(ax, multi_data[n], cmap=cmaps[n], norm=normalisations[n], raidal_subdivisions=raidal_subdivisions, centered=centered, add_nomenclatures=add_nomenclatures[n]) ax.set_title(titles[n], size=10) if cbar[n]: origin_cbar = (h_space * (n + 1) + w_dim_fig * n, .15) axl = fig.add_axes([origin_cbar[0], origin_cbar[1], w_dim_fig, .05]) cb1 = mpl.colorbar.ColorbarBase(axl, cmap=cmaps[n], norm=normalisations[n], orientation='horizontal', format=ticker.FuncFormatter(fmt)) cb1.ax.tick_params(labelsize=8) if units is not None: cb1.set_label(units[n]) if pfi_where_to_save is not None: plt.savefig(pfi_where_to_save, format='pdf', dpi=330) if show: plt.show()
def plot_seasonality(self, name, ax=None, uncertainty=True): """Plot a custom seasonal component. Parameters ---------- ax: Optional matplotlib Axes to plot on. One will be created if this is not provided. uncertainty: Optional boolean to plot uncertainty intervals. Returns ------- a list of matplotlib artists """ artists = [] if not ax: fig = plt.figure(facecolor='w', figsize=(10, 6)) ax = fig.add_subplot(111) # Compute seasonality from Jan 1 through a single period. start = pd.to_datetime('2017-01-01 0000') period = self.seasonalities[name]['period'] end = start + pd.Timedelta(days=period) plot_points = 200 days = pd.to_datetime(np.linspace(start.value, end.value, plot_points)) df_y = self.seasonality_plot_df(days) seas = self.predict_seasonal_components(df_y) artists += ax.plot(df_y['ds'].dt.to_pydatetime(), seas[name], ls='-', c='#0072B2') if uncertainty: artists += [ax.fill_between( df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'], seas[name + '_upper'], color='#0072B2', alpha=0.2)] ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2) xticks = pd.to_datetime(np.linspace(start.value, end.value, 7) ).to_pydatetime() ax.set_xticks(xticks) if period <= 2: fmt_str = '{dt:%T}' elif period < 14: fmt_str = '{dt:%m}/{dt:%d} {dt:%R}' else: fmt_str = '{dt:%m}/{dt:%d}' ax.xaxis.set_major_formatter(FuncFormatter( lambda x, pos=None: fmt_str.format(dt=num2date(x)))) ax.set_xlabel('ds') ax.set_ylabel(name) return artists
def rating_distributions(): ''' Display histogram of rating counts ''' con = sqlite3.connect(database) cur = con.cursor() user_summary = cur.execute('Select Uid, Count(Uid),Sum(Score) FROM UserData \ WHERE Score>0 GROUP BY Uid ').fetchall() user_summary = np.array([[x[0], x[1], x[2], float(x[2])/x[1]] for x in user_summary]) all_ratings = [x[0] for x in cur.execute('Select Score FROM \ UserData WHERE Score>0 ').fetchall()] # Determine whether those who rate more anime have a different average rating. bin_size, avg_ratings = 50, [] num_ratings = np.arange(bin_size/2,1000,bin_size) for mid_bin in num_ratings: rel_data = user_summary[np.multiply(user_summary[:,1]>=mid_bin-bin_size/2,user_summary[:,1]<mid_bin+bin_size/2),1:3] avg_ratings.append( float(sum(rel_data[:,1]))/float(sum(rel_data[:,0]))) #avg_ratings.append(np.mean(user_summary[np.multiply(user_summary[:,1]>=mid_bin-bin_size/2,user_summary[:,1]<mid_bin+bin_size/2),3])) # Plot these exploratory figures: f, axarr = plt.subplots(2,2) axarr[0,0].set_xlabel('Number of ratings (per user)',size=10) axarr[0,0].set_ylabel('Number of users',size=10) #axarr[0,0].text(-0.3, 1.05, 'A.', transform=axarr[0,0].transAxes, size=10) axarr[0,0].hist(user_summary[:,1],bins=np.arange(0,np.max(user_summary[:,1])+50,50)) axarr[0,0].set_xlim([0,1000]) axarr[0,0].yaxis.set_major_formatter(FuncFormatter(shorten_numbers)) axarr[1,0].set_xlabel('Number of ratings (per user)',size=10) axarr[1,0].set_ylabel('Average rating',size=10) axarr[1,0].plot(num_ratings,avg_ratings) axarr[1,0].set_xlim([0,1000]) axarr[1,0].set_yticks(range(0,11)) axarr[1,0].set_yticklabels(['0','','2','','4','','6','','8','','10']) axarr[1,0].set_ylim([1,10]) #axarr[1,0].text(-0.3, 1.05, 'B.', transform=axarr[1,0].transAxes, size=10) axarr[0,1].set_xlabel('Ratings',size=10) axarr[0,1].set_ylabel('Number of ratings ',size=10) axarr[0,1].hist(all_ratings,bins=np.arange(0.5,11.5)) axarr[0,1].set_xlim([0.5,10.5]) axarr[0,1].yaxis.set_major_formatter(FuncFormatter(shorten_numbers)) #axarr[0,1].text(-0.3, 1.05, 'C.', transform=axarr[0,1].transAxes, size=10) plt.sca(axarr[0, 1]) plt.xticks(range(1,11)) axarr[1,1].set_xlabel('Average ratings (per user)',size=10) axarr[1,1].set_ylabel('Number of users',size=10) axarr[1,1].hist(user_summary[:,3],bins=np.arange(0.5,11.5)) axarr[1,1].set_xlim([0.5,10.5]) axarr[1,1].yaxis.set_major_formatter(FuncFormatter(shorten_numbers)) #axarr[1,1].text(-0.3, 1.05, 'D.', transform=axarr[1,1].transAxes, size=10) plt.sca(axarr[1, 1]) plt.xticks(range(1,11)) f.tight_layout() for (i,j) in ((0,0),(0,1),(1,0),(1,1)): axarr[i,j].xaxis.set_ticks_position('none') axarr[i,j].yaxis.set_ticks_position('none') plt.savefig('results\\User_ratings.png',dpi=300,format='png') print([np.mean(all_ratings), np.std(all_ratings)]) con.close()
def plot_yield_by_quality(): # Close any previous plots plt.close('all') # Read in seqlength and time from ALL_READS dataframe new_yield_data = ALL_READS[['time', "seq_length", "av_qual"]] # Bin qualities qual_bins = [0] + QUALITY_BINS + [new_yield_data["av_qual"].max()] # Cut yield data into quality bins new_yield_data["descriptive_quality"] = pd.cut(new_yield_data["av_qual"], qual_bins, labels=[description for description in reversed(QUALITY_DESCRIPTIONS)]) # Time as index and drop av_qual column new_yield_data.set_index(pd.DatetimeIndex(new_yield_data['time']), inplace=True) new_yield_data.drop('av_qual', axis=1, inplace=True) # Obtain cumulative sum by quality bin in each minute. yield_data_grouped = new_yield_data.groupby("descriptive_quality").apply(lambda d: d.resample("1T").sum().fillna(0))['seq_length'] # Create a dict of dataframes based on groups. yield_data_by_quality = {description: yield_data_grouped[description].to_frame().reset_index() for description in QUALITY_DESCRIPTIONS} for description, yield_df in yield_data_by_quality.items(): yield_df.reset_index(inplace=True) yield_df.set_index("time", inplace=True) yield_df = yield_df.reindex(index=YIELD_DATA.time, fill_value=0) yield_df.reset_index(inplace=True) # Generate a cumulative sum of sequence data yield_df['cumsum_bp'] = yield_df['seq_length'].cumsum() # Convert time to timedelta format and then to float format, in hours. yield_df['duration_tdelta'] = yield_df['time'].apply(lambda t: t - yield_df['time'].min()) yield_df['duration_float'] = yield_df['duration_tdelta'].apply(lambda t: t.total_seconds() / 3600) yield_data_by_quality[description] = yield_df # Set subplots. fig, ax = plt.subplots(1) # Create ticks using numpy linspace. Ideally will create 6 points between 0 and 48 hours. num_points = 7 # Need to include zero point x_ticks = np.linspace(YIELD_DATA['duration_float'].min(), YIELD_DATA['duration_float'].max(), num_points) ax.set_xticks(x_ticks) # Define axis formatters ax.yaxis.set_major_formatter(FuncFormatter(y_yield_to_human_readable)) ax.xaxis.set_major_formatter(FuncFormatter(x_yield_to_human_readable)) # Set x and y labels and title. ax.set_xlabel("Duration (HH:MM)") ax.set_ylabel("Yield") ax.set_title(f"Yield for {SAMPLE_NAME} over time by quality") ax.stackplot(YIELD_DATA['duration_float'], [yield_data_by_quality[description]['cumsum_bp'] for description in QUALITY_DESCRIPTIONS], colors=QUALITY_COLOURS) # Limits must be set after the plot is created ax.set_xlim(YIELD_DATA['duration_float'].min(), YIELD_DATA['duration_float'].max()) ax.set_ylim(ymin=0) # Add legend to plot. ax.legend([mpatches.Patch(color=colour) for colour in QUALITY_COLOURS], QUALITY_DESCRIPTIONS, loc=2) # Ensure labels are not missed. fig.tight_layout() savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_yield_plot_by_quality.png"))
def plot_poremap(): def minknow_column_order(i): return chain(range(i + 33, i + 41), reversed(range(i + 1, i + 9))) # Channels are not in order, 121 is in the topleft, 89 in the top right. # The bottom left is 33 while the bottom right is channel 1. # The following four lines of code create an array that is a top-down, left-right 2D array of MinKNOW. # Split into chunks of 64 (rows of 4) chunks = [1, 2, 3, 4, 5, 6, 7, 0] # In which each row has the follow multiplication factor row_factors = [3, 2, 1, 0] # Create the values that make up the numbers on the far-right column of the grid. rh_values = [64 * chunk + 8 * row_factor for chunk in chunks for row_factor in row_factors] # Use the minknow_column_order function which reference the far-right column for a given row # to fill in the rest of the values for each row. channels_by_order_array = np.array([[j for j in minknow_column_order(i)] for i in rh_values]) # Create an array of the same dimensions but filled with zeroes. channels_by_yield_array = np.zeros(channels_by_order_array.shape) # Sum the values for each channel. channels_by_yield_df = pd.DataFrame(ALL_READS.groupby("channel")["seq_length"].sum()) # Reset the index and have channel as a column instead of the index. channels_by_yield_df.reset_index(level=0, inplace=True) # Iterate through each row of the yield by channel dataframe. for yield_row in channels_by_yield_df.itertuples(): channel_index = [(ix, iy) for ix, row in enumerate(channels_by_order_array) for iy, i in enumerate(row) if int(i) == int(yield_row.channel)][0] # Assign channel yield to position in MinKNOW channels_by_yield_array[channel_index] = yield_row.seq_length # The documentation for seaborn is pretty poor. # I will comment what I've done as best as possible. # Close any previous plots plt.close('all') fig, ax = plt.subplots() fig.set_size_inches(15, 7) # Use the formatter we used for the yield plots. formatter_y = FuncFormatter(y_yield_to_human_readable) sns.heatmap(channels_by_yield_array, # Remove labels from side, they're not useful in this context. xticklabels=False, yticklabels=False, ax=ax, # Prevent extreme values from over-scaling the sidebar. robust=True, # Use the greens scale but in reverse, similar to MinKNOW. cmap="Greens_r", # Format keyword args for the side bar. cbar_kws={"format": formatter_y, "label": "Bases per channel"}) # Create line down the middle as shown in MinKNOW. ax.axvline([8], color='white', lw=15) # Nice big title! ax.set_title("Map of Yield by Channel", fontsize=25) # Ensure labels are not missed. fig.tight_layout() savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_yield_map_by_pore.png"))
def make_test_and_train_pp_traj_fig(models1, models2=None, palette=None, ): """ Returns fig showing trajectory of test and train perplexity """ start = time.time() sns.set_style('white') # load data xys = [] model_groups = [models1] if models2 is None else [models1, models2] for n, models in enumerate(model_groups): model_test_pp_trajs = [] model_train_pp_trajs = [] for model in models: model_test_pp_trajs.append(model.get_traj('test_pp')) model_train_pp_trajs.append(model.get_traj('train_pp')) x = models[0].get_data_step_axis() traj_mat1 = np.asarray([traj[:len(x)] for traj in model_test_pp_trajs]) traj_mat2 = np.asarray([traj[:len(x)] for traj in model_train_pp_trajs]) y1 = np.mean(traj_mat1, axis=0) y2 = np.mean(traj_mat2, axis=0) sem1 = [stats.sem(row) for row in np.asarray(traj_mat1).T] sem2 = [stats.sem(row) for row in np.asarray(traj_mat2).T] xys.append((x, y1, y2, sem1, sem2)) # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3)) ax.set_ylim([0, models1[0].terms.num_set_]) ax.set_ylabel('Perplexity', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.xaxis.set_major_formatter(FuncFormatter(human_format)) ax.yaxis.grid(True) # plot for (x, y1, y2, sem1, sem2) in xys: color = next(palette) if palette is not None else 'black' ax.plot(x, y1, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='-', label='Test') ax.plot(x, y2, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='--', label='Train') ax.fill_between(x, np.add(y1, sem1), np.subtract(y1, sem1), alpha=FigsConfigs.FILL_ALPHA, color='grey') ax.fill_between(x, np.add(y2, sem2), np.subtract(y2, sem2), alpha=FigsConfigs.FILL_ALPHA, color='grey') plt.tight_layout() plt.legend(loc='best') print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig
def make_pp_trajs_figs(model): def make_avg_probe_pp_trajs_fig(probes): """ Returns fig showing avg_probe_pp trajectories for "probes". """ start = time.time() sns.set_style('white') palette = iter(sns.color_palette("hls", len(probes))) # load data avg_probe_pp_trajs_mat = model.get_trajs_mat(probes, 'avg_probe_pp') xys = [] for n, probe in enumerate(probes): x = model.get_data_step_axis() y = avg_probe_pp_trajs_mat[n, :] if x: last_y, last_x = y[-1], x[-1] else: last_y, last_x = 0, 0 # in case x is empty xys.append((x, y, last_x, last_y, probe)) y_thr = np.max([xy[3] for xy in xys]) / 2 # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 6), dpi=FigsConfigs.DPI) ax.set_title(model.probe_store.probe_cat_dict[probes[0]]) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_ylabel('Avg Probe Perplexity', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) # plot for (x, y, last_x, last_y, probe) in xys: ax.plot(x, y, '-', linewidth=1.0, c=next(palette)) if last_y > y_thr: plt.annotate(probe, xy=(last_x, last_y), xytext=(0, 0), textcoords='offset points', va='center', fontsize=8, bbox=dict(boxstyle='round', fc='w')) ax.legend(fontsize=FigsConfigs.LEG_FONTSIZE, loc='upper left') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig figs = [make_avg_probe_pp_trajs_fig(cat_probes) for cat_probes in model.probe_store.cat_probe_list_dict.values()] return figs
def make_cum_freq_trajs_figs(model): def make_cfreq_traj_fig(probes): """ Returns fig showing cumulative frequency trajectories of "probes" """ start = time.time() sns.set_style('white') palette = iter(sns.color_palette("hls", len(probes))) # load data xys = [] for probe in probes: x = range(model.num_docs) y = np.cumsum(model.term_doc_freq_dict[probe]) if x: last_y, last_x = y[-1], x[-1] else: last_y, last_x = 0, 0 # in case x is empty xys.append((x, y, last_x, last_y, probe)) y_thr = np.max([xy[3] for xy in xys]) / 10 # threhsold is at third from max # fig fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3), dpi=FigsConfigs.DPI) ax.set_title(model.probe_store.probe_cat_dict[probes[0]]) ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.set_ylabel('Cumulative Frequency', fontsize=FigsConfigs.AXLABEL_FONT_SIZE) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.tick_params(axis='both', which='both', top='off', right='off') ax.xaxis.set_major_formatter(FuncFormatter(human_format)) # plot for (x, y, last_x, last_y, probe) in xys: ax.plot(x, y, '-', linewidth=1.0, c=next(palette)) if last_y > y_thr: plt.annotate(probe, xy=(last_x, last_y), xytext=(0, 0), textcoords='offset points', va='center', fontsize=FigsConfigs.LEG_FONTSIZE, bbox=dict(boxstyle='round', fc='w')) ax.legend(fontsize=FigsConfigs.LEG_FONTSIZE, loc='upper left') plt.tight_layout() print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start)) return fig figs = [make_cfreq_traj_fig(cat_probes) for cat_probes in model.probe_store.cat_probe_list_dict.values()] return figs