我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用statistics.stdev()。
def get_average_problems_solved_per_user(eligible=True, scoring=True, user_breakdown=None): if user_breakdown is None: user_breakdown = get_team_member_solve_stats(eligible) solves = [] for tid, breakdown in user_breakdown.items(): for uid, ubreakdown in breakdown.items(): if ubreakdown is None: solved = 0 else: if 'correct' in ubreakdown: solved = ubreakdown['correct'] else: solved = 0 if solved > 0 or not scoring: solves += [solved] return (statistics.mean(solves), statistics.stdev(solves))
def summary(self, verbose=False): times = set() for r in self.results: if not r.finish: r.capture() if verbose: print(' {}'.format(r.str(self.dp)), file=self.file) times.add(r.elapsed()) if times: print(_SUMMARY_TEMPLATE.format( count=len(times), mean=mean(times), stddev=stdev(times) if len(times) > 1 else 0, min=min(times), max=max(times), dp=self.dp, ), file=self.file, flush=True) else: raise RuntimeError('timer not started') return times
def read_data_1(stats_dir, filen): stats_dir = utils.abs_path_dir(stats_dir) filen = utils.abs_path_file(filen) data = [] names = [] with open(stats_dir + filen, "r") as filep: for line in filep: # Read file with lines like this: # GA,0.578947368421,0.631578947368,0.710526315789,0.722222222222 # SVMBFF,0.631578947368,0.684210526316,0.815789473684,0.66666666 # VQMM,0.736842105263,0.842105263158,0.842105263158,0.75,0.61111 row = line[:-1].split(",") tmp = [] for index in range(1, len(row)): names.append(row[0]) tmp.append(float(row[index])) data.append(tmp) print(filen.split(".")[0].split("_")[1].title() + " for " + row[0] + " \t= " + str("{0:.3f}".format(sum(tmp)/len(tmp))) + " ± " + str("{0:.3f}".format(stdev(tmp))))
def STDEV(df, n, price='Close', xbar=None): """ Sample standard deviation of data """ stdev_list = [] i = 0 while i < len(df[price]): if i + 1 < n: stdev = float('NaN') else: start = i + 1 - n end = i + 1 stdev = statistics.stdev(df[price][start:end], xbar) stdev_list.append(stdev) i += 1 return stdev_list
def calc_stats(scoredata):#calculate stats, dm if you want more scoredata[7]=len([vote for vote in scoredata[2] if vote >=0]) #print(scoredata[2]) votes = list(abs(vote) for vote in scoredata[2]) scoredata.append(votes) try: scoredata[2] = sum(votes)/scoredata[3] ''' if scoredata[0].startswith('hanss314'): scoredata[2]=1000 ''' except: print('\"{}\" by {} was not voted for'.format(scoredata[1],scoredata[0])) scoredata[2] =0 scoredata[5] = scoredata[2] + scoredata[4] try: scoredata[6] = statistics.stdev(scoredata[9]) except: scoredata[6] = 0 return scoredata
def printWinSizeSummary(neighborTL): '''Given a list where index is genes and the values are neighbor genes, calculate the size of this window in bp for each gene. Return the mean and standard deviation.''' winL = [] for neighborT in neighborTL: winL.append(calcWinSize(neighborT,geneNames,geneInfoD)) median = statistics.median(winL) mean = statistics.mean(winL) stdev = statistics.stdev(winL) print(" median",round(median)) print(" mean",round(mean)) print(" stdev",round(stdev)) ## mods for core stuff (requires changing functions, so we move them here)
def calc_stats(scoredata):#calculate stats, dm if you want more scoredata[7]=len([vote for vote in scoredata[2] if vote >=0]) #print(scoredata[2]) votes = list(abs(vote) for vote in scoredata[2]) try: scoredata[2] = sum(votes)/scoredata[3] ''' if scoredata[0].startswith('hanss314'): scoredata[2]=1000 ''' except: print('\"{}\" by {} was not voted for'.format(scoredata[1],scoredata[0])) scoredata[2] =0 scoredata[5] = scoredata[2] + scoredata[4] try: scoredata[6] = statistics.stdev(votes) except: scoredata[6] = 0 return scoredata
def evaluate_and_update_max_score(self, t, episodes): eval_stats = eval_performance( self.env, self.agent, self.n_runs, max_episode_len=self.max_episode_len, explorer=self.explorer, logger=self.logger) elapsed = time.time() - self.start_time custom_values = tuple(tup[1] for tup in self.agent.get_statistics()) mean = eval_stats['mean'] values = (t, episodes, elapsed, mean, eval_stats['median'], eval_stats['stdev'], eval_stats['max'], eval_stats['min']) + custom_values record_stats(self.outdir, values) if mean > self.max_score: update_best_model(self.agent, self.outdir, t, self.max_score, mean, logger=self.logger) self.max_score = mean return mean
def evaluate_and_update_max_score(self, t, episodes, env, agent): eval_stats = eval_performance( env, agent, self.n_runs, max_episode_len=self.max_episode_len, explorer=self.explorer, logger=self.logger) elapsed = time.time() - self.start_time custom_values = tuple(tup[1] for tup in agent.get_statistics()) mean = eval_stats['mean'] values = (t, episodes, elapsed, mean, eval_stats['median'], eval_stats['stdev'], eval_stats['max'], eval_stats['min']) + custom_values record_stats(self.outdir, values) with self._max_score.get_lock(): if mean > self._max_score.value: update_best_model( agent, self.outdir, t, self._max_score.value, mean, logger=self.logger) self._max_score.value = mean return mean
def calculate_IDL(self, data_lst, Concentration, debug_on): DegreesOfFreedom = len(data_lst) - 1 if DegreesOfFreedom < 1: return 'PoorSensitivity' Ta = self.T_Table_99Confidence.get(DegreesOfFreedom, "TooMany") if debug_on == True: print('DegreesOfFreedom: ', DegreesOfFreedom) print('Concentration,: ', Concentration) print('data_lst: ', data_lst) if Ta == "TooMany": raise Exception('There are more than 21 data values for the IDL calculation and therefore not enough degrees of freedom in T_Table_99Confidence dictionary.') Averge = statistics.mean(data_lst) StandardDeviation = statistics.stdev(data_lst) RSD = (StandardDeviation/Averge) * 100 return round(((Ta * RSD * Concentration)/100),2)
def runPutTest(testDataPath, testDataRangeStart, testDataRangeEnd, f): log.debug('running put tests...') timeStart = time.perf_counter() times = [time.perf_counter()] for i in range(testDataRangeStart, testDataRangeEnd): print(i) thisPath = '%s/%i' % (testDataPath, i) o = loadTestData(thisPath) f.putObject(o, str(i)) times.append(time.perf_counter()) timeEnd = time.perf_counter() log.warning('RESULT (PUT): total test runtime: %s seconds, mean per object: %s' % ( timeEnd - timeStart, ((timeEnd - timeStart) / testDataRangeEnd))) log.critical('RESULT (PUT): median result: %s ' % statistics.median(calculateTimeDeltas(times))) log.critical('RESULT (PUT): standard deviation result: %s ' % statistics.stdev(calculateTimeDeltas(times))) log.critical('RESULT (PUT): mean result: %s ' % statistics.mean(calculateTimeDeltas(times))) # log.critical('RESULT (PUT): individual times: %s ' % (calculateTimeDeltas(times)))
def runGetTest(testDataPath, testDataRangeStart, testDataRangeEnd, f): log.debug('running get tests...') timeStart = time.perf_counter() times = [time.perf_counter()] for i in range(testDataRangeStart, testDataRangeEnd): thisPath = '%s/%i' % (testDataPath, i) o = f.getObject(str(i)) saveTestData(o, thisPath) times.append(time.perf_counter()) timeEnd = time.perf_counter() log.critical('RESULT (GET): total test runtime: %s seconds, mean per object: %s' % ( timeEnd - timeStart, ((timeEnd - timeStart) / testDataRangeEnd))) log.critical('RESULT (GET): median result: %s ' % statistics.median(calculateTimeDeltas(times))) log.critical('RESULT (GET): standard deviation result: %s ' % statistics.stdev(calculateTimeDeltas(times))) log.critical('RESULT (GET): mean result: %s ' % statistics.mean(calculateTimeDeltas(times))) # log.critical('RESULT (GET): individual times: %s ' % (calculateTimeDeltas(times)))
def runDeleteTest(testDataRangeStart, testDataRangeEnd, f): log.debug('running delete tests...') timeStart = time.perf_counter() times = [time.perf_counter()] for i in range(testDataRangeStart, testDataRangeEnd): f.deleteObject(str(i)) times.append(time.perf_counter()) timeEnd = time.perf_counter() log.critical('RESULT (DELETE): total test runtime: %s seconds, mean per object: %s' % ( timeEnd - timeStart, ((timeEnd - timeStart) / testDataRangeEnd))) log.critical('RESULT (DELETE): median result: %s ' % statistics.median(calculateTimeDeltas(times))) log.critical('RESULT (DELETE): standard deviation result: %s ' % statistics.stdev(calculateTimeDeltas(times))) log.critical('RESULT (DELETE): mean result: %s ' % statistics.mean(calculateTimeDeltas(times))) # log.critical('RESULT (DELETE): individual times: %s ' % (calculateTimeDeltas(times))) ############################################################################### ###############################################################################
def eval_performance(rom, p_func, n_runs): assert n_runs > 1, 'Computing stdev requires at least two runs' scores = [] for i in range(n_runs): env = ale.ALE(rom, treat_life_lost_as_terminal=False) test_r = 0 while not env.is_terminal: s = chainer.Variable(np.expand_dims(dqn_phi(env.state), 0)) pout = p_func(s) a = pout.action_indices[0] test_r += env.receive_action(a) scores.append(test_r) print('test_{}:'.format(i), test_r) mean = statistics.mean(scores) median = statistics.median(scores) stdev = statistics.stdev(scores) return mean, median, stdev
def eval_performance(process_idx, make_env, model, phi, n_runs): assert n_runs > 1, 'Computing stdev requires at least two runs' scores = [] for i in range(n_runs): model.reset_state() env = make_env(process_idx, test=True) obs = env.reset() done = False test_r = 0 while not done: s = chainer.Variable(np.expand_dims(phi(obs), 0)) pout, _ = model.pi_and_v(s) a = pout.action_indices[0] obs, r, done, info = env.step(a) test_r += r scores.append(test_r) print('test_{}:'.format(i), test_r) mean = statistics.mean(scores) median = statistics.median(scores) stdev = statistics.stdev(scores) return mean, median, stdev
def ejecutar(función): print(función) cronometrajes = [] stdout = sys.stdout for i in range(100): sys.stdout = None horaInicio = time.time() función() segundos = time.time() - horaInicio sys.stdout = stdout cronometrajes.append(segundos) promedio = statistics.mean(cronometrajes) if i < 10 or i % 10 == 9: print("{} {:3.2f} {:3.2f}".format( 1 + i, promedio, statistics.stdev(cronometrajes, promedio) if i > 1 else 0))
def math_stats_calculations(point_map): point_array = [] for team in team_array: point_array.append(point_map[team]) # Calculates mean mean_val = str(round(statistics.mean(point_array), 2)) # Calculates median median_val = str(round(statistics.median(point_array), 2)) # Calculates standard deviation stdev_val = str(round(statistics.stdev(point_array), 2)) # Calculates variance var_val = str(round(statistics.variance(point_array), 2)) return (mean_val,median_val,stdev_val,var_val) # Calls my function
def encode_benchmark(self, bench): data = {} data['environment'] = self.conf.environment data['project'] = self.conf.project data['branch'] = self.branch data['benchmark'] = bench.get_name() # Other benchmark metadata: # - description # - units="seconds", units_title="Time", lessisbetter=True data['commitid'] = self.revision data['revision_date'] = self.commit_date.isoformat() data['executable'] = self.conf.executable data['result_value'] = bench.mean() # Other result metadata: result_date if bench.get_nvalue() == 1: data['std_dev'] = 0 else: data['std_dev'] = bench.stdev() values = bench.get_values() data['min'] = min(values) data['max'] = max(values) # Other stats metadata: q1, q3 return data
def run(args): # Setup parser p = parser.VCFParser(io.StringIO(HEADER), '<builtin>') # Parse header p.parse_header() # Parse line several times times = [] for r in range(args.repetitions): begin = time.clock() for _ in range(args.line_count): r = p._record_parser.parse_line(LINE) # noqa if args.debug: print(r, file=sys.stderr) times.append(time.clock() - begin) print('Took {:.3} seconds (stdev {:.3})'.format( statistics.mean(times), statistics.stdev(times)), file=sys.stderr)
def temp_stat(temps): """ prints the average, median, std dev, and variance of temps """ import statistics print(temps) print("Mean: ", statistics.mean(temps)) print("Median: ", statistics.median(temps)) print("Standard deviation: ", statistics.stdev(temps)) print("Variance: ", statistics.variance(temps)) #%%
def temp_stat(temps): """ computes the average, median, std dev, and variance of temps """ import statistics print(temps) print("Mean: ", statistics.mean(temps)) print("Median: ", statistics.median(temps)) print("Standard deviation: ", statistics.stdev(temps)) print("Variance: ", statistics.variance(temps)) try: print("Mode: ", statistics.mode(temps)) except statistics.StatisticsError as e: print("Mode error: ", e) #%%
def plot_kde(data): bw = 1.06 * st.stdev(data) / (len(data) ** .2) kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit( np.array(data).reshape(-1, 1)) s = np.linspace(0, 1) e = kde.score_samples(s.reshape(-1, 1)) plt.plot(s, e) mi, ma = argrelextrema(e, np.less)[0], argrelextrema(e, np.greater)[0] logger.info("Minima: %s" % s[mi]) logger.info("Maxima: %s" % s[ma]) plt.plot(s[:mi[0] + 1], e[:mi[0] + 1], 'r', s[mi[0]:mi[1] + 1], e[mi[0]:mi[1] + 1], 'g', s[mi[1]:], e[mi[1]:], 'b', s[ma], e[ma], 'go', s[mi], e[mi], 'ro') plt.xlabel('Probability')
def sigma_scaled(population: List[Genome], **kwargs) -> Iterator[PAIR_T]: try: assert len(population) > 1 except AssertionError: raise TooFewIndividuals fitnesses = tuple(x.fitness for x in population) try: assert any(f > 0.0 for f in fitnesses) except AssertionError: return random_choice(population) sigma = stdev(fitnesses) average_fitness = mean(fitnesses) expected_value_func = lambda x: 1 if sigma == 0 else 1 + ((x - average_fitness) / (2 * sigma)) sigma_sum = sum(expected_value_func(x) for x in fitnesses) scaling_func = lambda x: expected_value_func(x) / sigma_sum return roulette(population=population, scaling_func=scaling_func, **kwargs)
def calc_disagreement(evaluations): """Return the disagreement level for evaluations, or None if no evaluations. Calculated as the max disagreement of (1) N/A and non-N/A responses and (2) non-N/A evaluations :param evaluations: an iterable of Eval """ if evaluations: na_it, rated_it = partition(lambda x: x is not Eval.not_applicable, evaluations) na_votes = list(na_it) rated_votes = list(rated_it) # Here we use the sample standard deviation because we consider the evaluations are a sample of all the # evaluations that could be given. # Not clear the best way to make the N/A disagreement comparable to the evaluation disagreement calculation na_disagreement = ( statistics.stdev(([0] * len(na_votes)) + ([1] * len(rated_votes))) if len(na_votes) + len(rated_votes) > 1 else 0.0) rated_disagreement = ( statistics.stdev([v.value for v in rated_votes]) if len(rated_votes) > 1 else 0.0) return max(na_disagreement, rated_disagreement) else: return None
def outlier_detection(window_data: list) -> list: """ removes outliers from a list This algorithm is modified version of Chauvenet's_criterion (https://en.wikipedia.org/wiki/Chauvenet's_criterion) :param window_data: :return: """ if not window_data: raise ValueError("List is empty.") vals = [] for dp in window_data: vals.append(float(dp.sample)) median = stat.median(vals) standard_deviation = stat.stdev(vals) normal_values = list() for val in window_data: if (abs(float(val.sample)) - median) < standard_deviation: normal_values.append(float(val.sample)) return normal_values
def probable_languages(self, text): """List of most probable programming languages, the list is ordered from the most probable to the less probable. :param str text: source code. :return: languages list :rtype: list """ values = extract(text) input_fn = _to_func([[values], []]) proba = next(self._classifier.predict_proba(input_fn=input_fn)) proba = proba.tolist() threshold = max(proba) - _K_STDEV * stdev(proba) items = sorted(enumerate(proba), key=itemgetter(1), reverse=True) LOGGER.debug("Threshold: %f, probabilities: %s", threshold, items) positions = [pos for pos, value in items if value > threshold] LOGGER.debug("Predicted languages positions %s", positions) names = sorted(self.languages) return [names[pos] for pos in positions]
def getStat(aodvalues): aods=[] aodpercent=[] nan=0 zerovals=0 for e in aodvalues: if isfloat(e.aod_12): aods.append(float(e.aod_12)) if float(e.aod_12)==0: zerovals+=1 if float(e.aod_030)>0 : aodpercent.append(float(e.aod_12)/float(e.aod_030)) if e.aod_12=='NaN': nan+=1 m=mean(aods) s=stdev(aods) mp=mean(aodpercent) sp=stdev(aodpercent) return m,s,mp,sp,nan,zerovals
def eval_performance(rom, p_func, n_runs): assert n_runs > 1, 'Computing stdev requires at least two runs' scores = [] for i in range(n_runs): env = ale.ALE(rom, treat_life_lost_as_terminal=False) test_r = 0 while not env.is_terminal: s = util.dqn_phi(env.state) pout = p_func(s) a = util.categorical_sample(pout) test_r += env.receive_action(a) scores.append(test_r) print 'test_',i,':',test_r mean = statistics.mean(scores) median = statistics.median(scores) stdev = statistics.stdev(scores) return mean, median, stdev
def _print(self): """Print statistics and other informational text.""" mean = statistics.mean(self.prices) median = statistics.median(self.prices) stdev = statistics.stdev(self.prices) high = mean + stdev low = mean - stdev print(dedent('''\ Sourced %d prices in %.3f seconds Mean:\t$%.2f Median:\t$%.2f Hi/Lo:\t$%.2f/$%.2f StDev:\t%.2f ''' % (len(self.prices), self.duration, mean, median, high, low, stdev)))
def get_stats(self, metrics, lang=UNSPECIFIED_TRANSLATION, limit=100): stats = super(NumField, self).get_stats(metrics, lang, limit) stats.update({ 'median': '*', 'mean': '*', 'mode': '*', 'stdev': '*' }) try: # require a non empty dataset stats['mean'] = statistics.mean(self.flatten_dataset(metrics)) stats['median'] = statistics.median(self.flatten_dataset(metrics)) # requires at least 2 values in the dataset stats['stdev'] = statistics.stdev(self.flatten_dataset(metrics), xbar=stats['mean']) # requires a non empty dataset and a unique mode stats['mode'] = statistics.mode(self.flatten_dataset(metrics)) except statistics.StatisticsError: pass return stats
def async_update(self): """Get the latest data and updates the states.""" if not self.is_binary: try: self.mean = round(statistics.mean(self.states), 2) self.median = round(statistics.median(self.states), 2) self.stdev = round(statistics.stdev(self.states), 2) self.variance = round(statistics.variance(self.states), 2) except statistics.StatisticsError as err: _LOGGER.warning(err) self.mean = self.median = STATE_UNKNOWN self.stdev = self.variance = STATE_UNKNOWN if self.states: self.total = round(sum(self.states), 2) self.min = min(self.states) self.max = max(self.states) else: self.min = self.max = self.total = STATE_UNKNOWN
def get_average_eligible_score(): return (statistics.mean([x['score'] for x in get_all_team_scores()]), statistics.stdev([x['score'] for x in get_all_team_scores()]))
def get_average_problems_solved(eligible=True, scoring=True): teams = api.team.get_all_teams(show_ineligible=(not eligible)) values = [len(api.problem.get_solved_pids(tid=t['tid'])) for t in teams if not scoring or len(api.problem.get_solved_pids(tid=t['tid'])) > 0] return statistics.mean(values), statistics.stdev(values)
def get_average_achievement_number(): earned_achievements = api.achievement.get_earned_achievement_instances() frequency = defaultdict(int) for achievement in earned_achievements: frequency[achievement['uid']] += 1 extra = len(api.team.get_all_teams(show_ineligible=False)) - len(frequency.keys()) values = [0] * extra for val in frequency.values(): values.append(val) return statistics.mean(values), statistics.stdev(values)
def stdev(self): return statistics.stdev(self.price) # ?????
def print_stat(msg, times_taken): print('{}: mean {:.2f} secs, median {:.2f} secs, stdev {:.2f}'.format( msg, mean(times_taken), median(times_taken), stdev(times_taken) ))
def client_pool(func, entries_count, workers, additional_args=[]): pool = Pool(workers) start_time = timer() worker_args = [[entries_count // workers] + additional_args] finish_times = pool.starmap(func, worker_args * workers) return (statistics.stdev(finish_times), statistics.mean(finish_times) - start_time)
def parse_args(): global should_draw parser = argparse.ArgumentParser() parser.add_argument('input') parser.add_argument("-e", "--perc_elim", nargs='?', type=int, const=-1, default=20, help='Percentage of contestants eliminated, set to negative number to specify number of contestants') parser.add_argument("-t", "--num_gold", nargs='?', type=int, const=5, default=1, help='Number of contestants to place in gold highlighting') parser.add_argument('-i', '--omit_image', action='store_false', help='Use this flag to not draw image') args = parser.parse_args() path = args.input votes = convert(path) prompt = open('./twows/{}/prompt.txt'.format(path),'r').read().split('\n')[0] scores = [] twowers=set() with open('./twows/{}/responses.csv'.format(path),'r',encoding=encoding) as csvfile:#read responses reader = csv.reader(csvfile) for row in reader: #scoredata format [twower, response, votes/mean, count, boost, final, stdev, votegraph] name = simplify(row[0]) twowers.add(name) try: scores.append([name,row[1],[],0,int(row[2]),0,0,0,[0 for i in range(10)],[]]) except: scores.append([name,row[1],[],0,0,0,0,0,[0 for i in range(10)],[]]) twowers = list(twowers) twower_count = len(twowers) should_draw=args.omit_image top_number = args.num_gold #chart coloring ranges elim_number=0 if int(args.perc_elim) < 0: elim_number = -args.perc_elim else: elim_number = round(args.perc_elim*len(twowers)/100) return (path, prompt, scores, votes, twowers, twower_count, top_number, elim_number)
def getAabrhRawScoreSummmaryD(strainNamesL,aabrhL,scoresO,geneNames): '''Given raw scores and a directory with blast output, finds the sets of all around best reciprocal hits. Then for each pair of species, calculates the mean and standard deviation of scores and stores in a dictionary.''' # now loop through these, sorting scores into a dict keyed by species pair. # create dictionary, (representing an upper triangular matrix) spScoreD={} for i in range(len(strainNamesL)-1): strain1 = strainNamesL[i] for j in range(i+1,len(strainNamesL)): strain2 = strainNamesL[j] spScoreD[(strain1,strain2)]=[] # loop through aabrhL and populate for orthoT in aabrhL: spScoreD = addPairwiseScores(spScoreD,orthoT,scoresO,geneNames) # get mean and standard deviation summaryD = {} for sp1,sp2 in spScoreD: mean = statistics.mean(spScoreD[(sp1,sp2)]) std = statistics.stdev(spScoreD[(sp1,sp2)]) summaryD[(sp1,sp2)] = (mean,std) summaryD[(sp2,sp1)] = (mean,std) return summaryD
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('input') parser.add_argument("-e", "--perc_elim", nargs='?', const=5, default=5) parser.add_argument("-t", "--num_gold", nargs='?', const=5, default=5) args = parser.parse_args() path = args.input votes = convert(path) prompt = open('./twows/{}/prompt.txt'.format(path),'r').read().split('\n')[0] scores = [] twowers=set() with open('./twows/{}/responses.csv'.format(path),'r') as csvfile:#read responses reader = csv.reader(csvfile) for row in reader: #scoredata format [twower, response, votes/mean, count, boost, final, stdev, votegraph] name = simplify(row[0]) twowers.add(name) try: scores.append([name,row[1],[],0,int(row[2]),0,0,0,[0 for i in range(10)]]) except: scores.append([name,row[1],[],0,0,0,0,0,[0 for i in range(10)]]) twowers = list(twowers) twower_count = len(twowers) top_number = int(args.num_gold) #chart coloring ranges elim_number=0 if int(args.perc_elim) < 0: elim_number = -int(args.perc_elim) else: elim_number = round(int(args.perc_elim)*len(twowers)/100) return (path, prompt, scores, votes, twowers, twower_count, top_number, elim_number)
def compute_stats(self): result = {} for func_name, data_points in self.data.items(): result[self._prefix + func_name] = { 'avg': stats.mean(data_points), 'min': min(data_points), 'max': max(data_points), 'num': len(data_points) } if len(data_points) >= 2: result[func_name]['std'] = stats.stdev(data_points) return result
def eval_performance(env, agent, n_runs, max_episode_len=None, explorer=None, logger=None): """Run multiple evaluation episodes and return statistics. Args: env (Environment): Environment used for evaluation agent (Agent): Agent to evaluate. n_runs (int): Number of evaluation runs. max_episode_len (int or None): If specified, episodes longer than this value will be truncated. explorer (Explorer): If specified, the given Explorer will be used for selecting actions. logger (Logger or None): If specified, the given Logger object will be used for logging results. If not specified, the default logger of this module will be used. Returns: Dict of statistics. """ scores = run_evaluation_episodes( env, agent, n_runs, max_episode_len=max_episode_len, explorer=explorer, logger=logger) stats = dict( mean=statistics.mean(scores), median=statistics.median(scores), stdev=statistics.stdev(scores) if n_runs >= 2 else 0.0, max=np.max(scores), min=np.min(scores)) return stats