我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用math.log()。
def decode(self, input_vectors, output): tgt_toks = [self.tgt_vocab[tok] for tok in output] w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state() s = s.add_input(dynet.concatenate([ input_vectors[-1], dynet.vecInput(self.args.hidden_dim) ])) loss = [] for tok in tgt_toks: out_vector = dynet.affine_transform([b, w, s.output()]) probs = dynet.softmax(out_vector) loss.append(-dynet.log(dynet.pick(probs, tok.i))) embed_vector = self.tgt_lookup[tok.i] attn_vector = self.attend(input_vectors, s) inp = dynet.concatenate([embed_vector, attn_vector]) s = s.add_input(inp) loss = dynet.esum(loss) return loss
def _factor_target_indices(self, Y_inds, vocab_size=None, base=2): if vocab_size is None: vocab_size = len(self.dp.word_index) print >>sys.stderr, "Factoring targets of vocabulary size: %d"%(vocab_size) num_vecs = int(math.ceil(math.log(vocab_size)/math.log(base))) + 1 base_inds = [] div_Y_inds = Y_inds print >>sys.stderr, "Number of factors: %d"%num_vecs for i in range(num_vecs): new_inds = div_Y_inds % base if i == num_vecs - 1: if new_inds.sum() == 0: # Most significant "digit" is a zero. Omit it. break base_inds.append(new_inds) div_Y_inds = numpy.copy(div_Y_inds/base) base_vecs = [self._make_one_hot(base_inds_i, base) for base_inds_i in base_inds] return base_vecs
def normalvariate(self, mu, sigma): """Normal distribution. mu is the mean, and sigma is the standard deviation. """ # mu = mean, sigma = standard deviation # Uses Kinderman and Monahan method. Reference: Kinderman, # A.J. and Monahan, J.F., "Computer generation of random # variables using the ratio of uniform deviates", ACM Trans # Math Software, 3, (1977), pp257-260. random = self.random while 1: u1 = random() u2 = 1.0 - random() z = NV_MAGICCONST*(u1-0.5)/u2 zz = z*z/4.0 if zz <= -_log(u2): break return mu + z*sigma ## -------------------- lognormal distribution --------------------
def expovariate(self, lambd): """Exponential distribution. lambd is 1.0 divided by the desired mean. It should be nonzero. (The parameter would be called "lambda", but that is a reserved word in Python.) Returned values range from 0 to positive infinity if lambd is positive, and from negative infinity to 0 if lambd is negative. """ # lambd: rate lambd = 1/mean # ('lambda' is a Python reserved word) # we use 1-random() instead of random() to preclude the # possibility of taking the log of zero. return -_log(1.0 - self.random())/lambd ## -------------------- von Mises distribution --------------------
def test_simple_scaling(): Quantity.set_prefs(spacer=None, show_label=None, label_fmt=None, label_fmt_full=None) q=Quantity('1kg') assert q.render() == '1 kg' assert q.render(scale=0.001, show_units=False) == '1' with pytest.raises(KeyError, message="Unable to convert between 'fuzz' and 'g'."): q.render(scale='fuzz') q=Quantity('1', units='g', scale=1000) assert q.render() == '1 kg' assert q.render(scale=(0.0022046, 'lbs')) == '2.2046 lbs' q=Quantity('1', scale=(1000, 'g')) assert q.render() == '1 kg' assert q.render(scale=lambda v, u: (0.0022046*v, 'lbs')) == '2.2046 lbs' def dB(v, u): return 20*math.log(v, 10), 'dB'+u def adB(v, u): return pow(10, v/20), u[2:] if u.startswith('dB') else u q=Quantity('-40 dBV', scale=adB) assert q.render() == '10 mV' assert q.render(scale=dB) == '-40 dBV'
def score_samples(self, X): """Return the log-likelihood of each sample See. "Pattern Recognition and Machine Learning" by C. Bishop, 12.2.1 p. 574 or http://www.miketipping.com/papers/met-mppca.pdf Parameters ---------- X: array, shape(n_samples, n_features) The data. Returns ------- ll: array, shape (n_samples,) Log-likelihood of each sample under the current model """ check_is_fitted(self, 'mean_') X = check_array(X) Xr = X - self.mean_ n_features = X.shape[1] log_like = np.zeros(X.shape[0]) precision = self.get_precision() log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1) log_like -= .5 * (n_features * log(2. * np.pi) - fast_logdet(precision)) return log_like
def compute_possibility(self, start_position, seg_index, oov_pattern, oov_dct, oov_ctx): # ??????????? weight, j = 0, start_position test_word = [] for tag in oov_pattern: word_content = self.words_graph.get_word(seg_index[j][0], seg_index[j][1]).content oov_freq = oov_dct.get_frequence( word_content, self.oov_tag_encode(tag) ) #print('tag:{} word:{} freq:{} start_prob:{}'.format( # tag, word_content, oov_freq, oov_ctx.prob_to_frequence(oov_ctx.start_prob[self.oov_tag_encode(tag)]))) test_word.append(self.words_graph.get_word(seg_index[j][0], seg_index[j][1]).content) #????: dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1)); poss = math.log(float(oov_ctx.prob_to_frequence(oov_ctx.start_prob[self.oov_tag_encode(tag)]))) - math.log(float(oov_freq + 1)) weight += poss j += 1 #print('compute_possibility() {} {} = {}'.format(oov_pattern, ''.join(test_word), weight)) return weight
def __init__( self, get_params_function, try_params_function ): self.get_params = get_params_function self.try_params = try_params_function self.max_iter = 81 # maximum iterations per configuration self.eta = 3 # defines configuration downsampling rate (default = 3) self.logeta = lambda x: log( x ) / log( self.eta ) self.s_max = int( self.logeta( self.max_iter )) self.B = ( self.s_max + 1 ) * self.max_iter self.results = [] # list of dicts self.counter = 0 self.best_loss = np.inf self.best_counter = -1 # can be called multiple times
def calc_mean_lp_scores(log_prob_scores: List[float], lengths: List[int]) -> List[Union[None, float]]: r""" .. math: \frac{% \log P_\text{model}\left(\xi\right) }{% \text{length}\left(\xi\right) } >>> '{:.3f}'.format(calc_mean_lp_scores([-14.7579], [4])[0]) '-3.689' """ mean_lp_scores = [] for score, length in zip(log_prob_scores, lengths): x = None \ if score is None or length == 0 \ else float(score) / float(length) mean_lp_scores.append(x) return mean_lp_scores
def calc_norm_lp_div_scores( log_prob_scores: List[float], unigram_scores: List[float]) -> List[Union[None, float]]: r""" .. math: \frac{% \log P_\text{model}\left(\xi\right) }{% \log P_\text{unigram}\left(\xi\right) } >>> '{:.3f}'.format(calc_norm_lp_div_scores([-14.7579], [-35.6325])[0]) '-0.414' """ results = [] for log_prob, unigram_score in zip(log_prob_scores, unigram_scores): if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05): x = None else: x = (-1.0) * float(log_prob) / float(unigram_score) results.append(x) return results
def calc_norm_lp_sub_scores( log_prob_scores: List[float], unigram_scores: List[float]) -> List[Union[None, float]]: r""" .. math: \log P_\text{model}\left(\xi\right) - \log P_\text{unigram}\left(\xi\right) >>> '{:.3f}'.format(calc_norm_lp_sub_scores([-14.7579], [-35.6325])[0]) '20.875' """ results = [] for log_prob, unigram_score in zip(log_prob_scores, unigram_scores): if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05): x = None else: x = float(log_prob) - float(unigram_score) results.append(x) return results
def expovariate(self, lambd): """Exponential distribution. lambd is 1.0 divided by the desired mean. It should be nonzero. (The parameter would be called "lambda", but that is a reserved word in Python.) Returned values range from 0 to positive infinity if lambd is positive, and from negative infinity to 0 if lambd is negative. """ # lambd: rate lambd = 1/mean # ('lambda' is a Python reserved word) random = self.random u = random() while u <= 1e-7: u = random() return -_log(u)/lambd ## -------------------- von Mises distribution --------------------
def randint(minvalue, maxvalue): """Returns a random integer x with minvalue <= x <= maxvalue""" # Safety - get a lot of random data even if the range is fairly # small min_nbits = 32 # The range of the random numbers we need to generate range = maxvalue - minvalue # Which is this number of bytes rangebytes = ceil(math.log(range, 2) / 8.) # Convert to bits, but make sure it's always at least min_nbits*2 rangebits = max(rangebytes * 8, min_nbits * 2) # Take a random number of bits between min_nbits and rangebits nbits = random.randint(min_nbits, rangebits) return (read_random_int(nbits) % range) + minvalue
def randomized_primality_testing(n, k): """Calculates whether n is composite (which is always correct) or prime (which is incorrect with error probability 2**-k) Returns False if the number if composite, and True if it's probably prime. """ q = 0.5 # Property of the jacobi_witness function # t = int(math.ceil(k / math.log(1/q, 2))) t = ceil(k / math.log(1/q, 2)) for i in range(t+1): x = randint(1, n-1) if jacobi_witness(x, n): return False return True
def idf(tf_dic_list,global_idf_dic,silent=1): """ Input: global_idf_dic = {} # word -> idf, which may be updated in place """ if silent==0: print("idf ...") doc_len = len(tf_dic_list) idf_dic_list = [] # [{word:idf} for each sample] for c,tf_dic in enumerate(tf_dic_list): idf_dic = {} for word in tf_dic: if word not in global_idf_dic: n_containing = sum([word in tf_dic for tf_dic in tf_dic_list]) global_idf_dic[word] = log(doc_len/(1.0+n_containing)) idf_dic[word] = global_idf_dic[word] idf_dic_list.append(idf_dic) if silent == 0 and c>0 and c%100 == 0: print("{} documents done, total {}, word {}, idf {}".format(c,len(tf_dic_list),word,global_idf_dic[word])) return idf_dic_list
def _ndcg_at(k, label_col): def ndcg_at_k(predicted, actual): # TODO: Taking in rn and then re-sorting might not be necessary, but i can't # find any real guarantee that they would come in order after a groupBy + collect_list, # since they were only ordered within the window function. predicted = [row[label_col] for row in sorted(predicted, key=lambda r: r.rn)] actual = [row[label_col] for row in sorted(actual, key=lambda r: r.rn)] dcg = 0. for i, label in enumerate(predicted): # This form is used to match EvalNDCG in xgboost dcg += ((1 << label) - 1) / math.log(i + 2.0, 2) idcg = 0. for i, label in enumerate(actual): idcg += ((1 << label) - 1) / math.log(i + 2.0, 2) if idcg == 0: return 0 else: return dcg / idcg return F.udf(ndcg_at_k, pyspark.sql.types.DoubleType())
def launch(): opts, h5_files, motifs_fn = __parseArgs() __initLog(opts) motifs = np.loadtxt(motifs_fn, dtype="str", ndmin=1) motifs,not_found = find_motifs_in_control(opts, motifs) if len(not_found)>0: logging.warning("") logging.warning(" ******************** Important *********************") logging.warning(" Did not find %s motifs in %s:" % (len(not_found), opts.control_pkl_name)) for nf in not_found: logging.warning(" %s" % nf) logging.warning(" These motif(s) will be removed from further analysis.") logging.warning(" These %s motifs will be kept:" % len(motifs)) for m in motifs: logging.warning(" %s" % m) logging.warning(" ****************************************************") logging.warning("") else: logging.info("Found entries for all %s motifs in %s" % (len(motifs), opts.control_pkl_name)) build_profiles(opts, h5_files, motifs, motifs_fn) print >> sys.stderr, "mBin methylation profiling has finished running. See log for details."
def get_differentially_private_std(sensitivity, epsilon, delta, tol=DEFAULT_SIGMA_TOLERANCE): ''' Determine smallest standard deviation for a normal distribution such that the probability of a value violating epsilon-differential privacy is at most delta. ''' # std upper bound determined by improving result in literature, # Hardt and Roth, "Beating Randomized Response on Incoherent Matrices" # Thm. 2.6 (and the Lemma in App. A) can be improved to provide the # following upper bound std_upper_bound = (float(sensitivity)/epsilon) * (4.0/3.0) *\ (2 * math.log(1.0/delta))**(0.5) std_lower_bound = tol # use small but non-zero value for std lower-bound if (satisfies_dp(sensitivity, epsilon, delta, std_lower_bound) is True): raise ValueError('Could not find lower bound for std interval.') std = interval_boolean_binary_search(\ lambda x: satisfies_dp(sensitivity, epsilon, delta, x), std_lower_bound, std_upper_bound, tol, return_true=True) return std
def _encode_ratio(inval, outval): ''' Calculate the log ratio between inbound and outbound traffic. Positive when outval > inval, and negative when inval > outval. Returns a non-infinite floating point value: - zero when inval and outval are zero, - a large negative number (< -100) when outval is zero, and - a large positive number (> 100) when inval is zero, and - log(base 2)(outval/inval) otherwise. ''' inval = float(inval) outval = float(outval) if inval == 0.0 and outval == 0.0: return 0.0 elif inval == 0.0: return sys.float_info.max_exp elif outval == 0.0: return sys.float_info.min_exp else: return math.log(outval/inval, 2)
def calculate_oobatake_dS(seq, temp): """Get dS using Oobatake method in units cal/mol. Args: seq (str, Seq, SeqRecord): Amino acid sequence temp (float): Temperature in degrees C Returns: float: dS in units cal/mol """ seq = ssbio.protein.sequence.utils.cast_to_str(seq) dS = 0 temp += 273.15 T0 = 298.15 dCp_sum = _sum_of_dCp(seq) for aa in seq: S0 = oobatake_dictionary[aa]['dS'] dS += S0 return dS + dCp_sum * math.log(temp / T0)
def calculate_dill_dG(seq_len, temp): """Get free energy of unfolding (dG) using Dill method in units J/mol. Args: seq_len (int): Length of amino acid sequence temp (float): Temperature in degrees C Returns: float: Free energy of unfolding dG (J/mol) """ Th = 373.5 # This quantity affects the up-and-down of the dG vs temperature curve (dG values) Ts = 385 # This quantity affects the left-and-right temp += 273.15 dH = (4.0 * seq_len + 143) * 1000 dS = 13.27 * seq_len + 448 dCp = (0.049 * seq_len + 0.85) * 1000 dG = dH + dCp * (temp - Th) - temp * dS - temp * dCp * math.log(float(temp) / Ts) return dG
def Rstr(self): array2=[] prixe = math.log(0.03637 / float(252) + 1) ret = self.sharedf ret['change']=ret['change']-prixe rstr = [] print 1 if len(ret) > 525: for z in range(0, 504): array2.append(math.pow(math.pow(float(1) / 2, float(1 / float(126))), (503 - z))) for h in range(0,525): rstr.append(numpy.NaN) for c in range(525, len(ret)): rett=0 for f in range(0,len(duan)-21): rett=rett+duan.iloc[f, 16]*array2[f] rstr.append(rett) print rstr ret['rstr'] = rstr return ret[['date','rstr']]
def Cmra(self): df=self.sharedf cc=[] cmra=[] prixe=math.log(0.03637/float(12)+1) df=df.set_index('date') df1=df['change'] for x in range(20,len(df1.index)+1): cc.append(df1[x-20:x].sum()-prixe) dd=[] for x in range(12,len(cc)+1): dd.append(sum(cc[x-12:x])) for x in range(252,len(dd)+1): cmra.append(max(cc[x-252:x])-min(cc[x-252:x])) df=df[281:] df['cmra']=cmra df['date']=df.index df=pandas.DataFrame(df.reset_index(drop=True)) return df[['date','cmra']]
def blackcox_pd(equity, extasset, sigma): """Compute the probability of default for external assets following a Geometric Brownian Motion and the Black and Cox model. Parameters: equity (float): equity extasset (float): external assets sigma (float): volatility of the Geometric Browninan Motion Returns: probability of default """ if equity <= 0.0: return 1.0 if equity >= extasset: return 0.0 else: #return 1 + (- 1/2 * (1 + math.erf((-math.log(1 - equity/extasset) - sigma**2/2) / # (math.sqrt(2) * sigma)) ) # + (extasset/equity)/2 * (1 + math.erf((math.log(1 - equity/extasset) - sigma**2/2) / # (math.sqrt(2) * sigma)) ) ) return (1/2 * (1 + math.erf((math.log(1 - equity/extasset) + sigma**2/2) / (math.sqrt(2) * sigma)) ) + (extasset/(extasset - equity))/2 * (1 + math.erf((math.log(1 - equity/extasset) - sigma**2/2) / (math.sqrt(2) * sigma)) ) )
def ndcg(self, y_true, y_pred, k = 20): s = 0. c = self.zipped(y_true, y_pred) c_g = sorted(c, key=lambda x:x[0], reverse=True) c_p = sorted(c, key=lambda x:x[1], reverse=True) #idcg = [0. for i in range(k)] idcg = np.zeros([k], dtype=np.float32) dcg = np.zeros([k], dtype=np.float32) #dcg = [0. for i in range(k)] for i, (g,p) in enumerate(c_g): if g > self.rel_threshold: idcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i) if i >= k: break for i, (g,p) in enumerate(c_p): if g > self.rel_threshold: dcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i) if i >= k: break for idx, v in enumerate(idcg): if v == 0.: dcg[idx] = 0. else: dcg[idx] /= v return dcg
def _log_likelihood(Y,gamma,sigma): """ Compute the log-likelihood for the Generalized Pareto Distribution (?=0) Parameters ---------- Y : numpy.array observations gamma : float GPD index parameter sigma : float GPD scale parameter (>0) Returns ---------- float log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0) """ n = Y.size if gamma != 0: tau = gamma/sigma L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum() else: L = n * ( 1 + log(Y.mean()) ) return L
def _quantile(self,gamma,sigma): """ Compute the quantile at level 1-q Parameters ---------- gamma : float GPD parameter sigma : float GPD parameter Returns ---------- float quantile at level 1-q for the GPD(?,?,?=0) """ r = self.n * self.proba / self.Nt if gamma != 0: return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1) else: return self.init_threshold - sigma*log(r)
def get_results(self): result = {} fn = self.rec_fn() txt = open(fn).read() params_txt = open(self.par_fn()).read().splitlines() columns = ['parameter','value','scale','offset'] param_vals = [dict(zip(columns,line.strip().split())) for line in params_txt[1:]] params = pd.DataFrame(param_vals) params = params.set_index('parameter') for col in columns[1:]: params[col] = params[col].astype('f') result['results_file']=fn result['text']=txt result['parameters'] = params if self.detailed_log: result['log'] = self.read_logs() return result
def get_similarity(word_list1, word_list2): """?????????????????? Keyword arguments: word_list1, word_list2 -- ??????????????????? """ words = list(set(word_list1 + word_list2)) vector1 = [float(word_list1.count(word)) for word in words] vector2 = [float(word_list2.count(word)) for word in words] vector3 = [vector1[x]*vector2[x] for x in xrange(len(vector1))] vector4 = [1 for num in vector3 if num > 0.] co_occur_num = sum(vector4) if abs(co_occur_num) <= 1e-12: return 0. denominator = math.log(float(len(word_list1))) + math.log(float(len(word_list2))) # ?? if abs(denominator) < 1e-12: return 0. return co_occur_num / denominator
def lonlat_to_pixel(self, lonlat, zoom): "Converts a longitude, latitude coordinate pair for the given zoom level." # Setting up, unpacking the longitude, latitude values and getting the # number of pixels for the given zoom level. lon, lat = self.get_lon_lat(lonlat) npix = self._npix[zoom] # Calculating the pixel x coordinate by multiplying the longitude value # with the number of degrees/pixel at the given zoom level. px_x = round(npix + (lon * self._degpp[zoom])) # Creating the factor, and ensuring that 1 or -1 is not passed in as the # base to the logarithm. Here's why: # if fac = -1, we'll get log(0) which is undefined; # if fac = 1, our logarithm base will be divided by 0, also undefined. fac = min(max(sin(DTOR * lat), -0.9999), 0.9999) # Calculating the pixel y coordinate. px_y = round(npix + (0.5 * log((1 + fac) / (1 - fac)) * (-1.0 * self._radpp[zoom]))) # Returning the pixel x, y to the caller of the function. return (px_x, px_y)
def getPSD( df , dw = 0.05, roverlap = 0.5, window='hanning', detrend='constant') : """ Compute the power spectral density """ if type(df) == pd.Series : df = pd.DataFrame(df) nfft = int ( (2*pi / dw) / dx(df) ) nperseg = 2**int(log(nfft)/log(2)) noverlap = nperseg * roverlap """ Return the PSD of a time signal """ try : from scipy.signal import welch except : raise Exception("Welch function not found, please install scipy > 0.12") data = [] for iSig in range(df.shape[1]) : test = welch( df.values[:,iSig] , fs = 1. / dx(df) , window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft, detrend=detrend, return_onesided=True, scaling='density') data.append( test[1] / (2*pi) ) xAxis = test[0][:] * 2*pi return pd.DataFrame( data = np.transpose(data), index = xAxis , columns = [ "psd("+ str(x) +")" for x in df.columns ] )
def find_top_two_peaks(sdata): samples = len(sdata) fft_size = 2**int(floor(log(samples)/log(2.0))) freq = fft(sdata[0:fft_size]) pdata = numpy.zeros(fft_size) for i in xrange(fft_size): pdata[i] = abs(freq[i]) peak = 0 peak1 = 0 peak2 = 0 peak1_index = 0 peak2_index = 0 for i in xrange(fft_size/2): if (pdata[i] > peak1): peak1 = pdata[i] peak1_index = i for i in xrange(fft_size/2): if (pdata[i] > peak2) and (abs(i - peak1_index) > 4): peak2 = pdata[i] peak2_index = i return (peak1,peak1_index,peak2,peak2_index) # REMOVAL CASES
def save_fft(fil,audio_in): samples = len(audio_in) fft_size = 2**int(floor(log(samples)/log(2.0))) freq = fft(audio_in[0:fft_size]) s_data = numpy.zeros(fft_size/2) x_data = numpy.zeros(fft_size/2) peak = 0; for j in xrange(fft_size/2): if (abs(freq[j]) > peak): peak = abs(freq[j]) for j in xrange(fft_size/2): x_data[j] = log(2.0*(j+1.0)/fft_size); if (x_data[j] < -10): x_data[j] = -10 s_data[j] = 10.0*log(abs(freq[j])/peak)/log(10.0) plt.ylim([-50,0]) plt.plot(x_data,s_data) plt.title('fft log power') plt.grid() fields = fil.split('.') plt.savefig(fields[0]+'_fft.png', bbox_inches="tight") plt.clf() plt.close()
def get_test_probs(cmd_args, ngrams_test, corpus_files, model): """ Get sum of probabilities for ngrams of test data. """ # Initialize probs sumprobs = {} for lang in corpus_files: sumprobs[lang] = 0.0 for ngram in ngrams_test: for lang in corpus_files: sumprobs[lang] += ngrams_test[ngram] * probability.LaplaceProbDist.logprob(model.smoothed[lang], ngram) # The population prior is mostly useful for really small test snippets if not cmd_args.no_prior: for lang in corpus_files: # Strip trailing .txt, and check if it's in the population statistics dict lang_prefix = lang[:-4] if lang_prefix in model.stats: # Normalize population counts by approximate total number of people on earth sumprobs[lang] += math.log(model.stats[lang_prefix] / 8e9) else: # If language isn't in the language population statistics, # assume median value of all langs, which is about 500K sumprobs[lang] += math.log(500000 / 8e9) return sumprobs
def construct_pssm(cds, length=90, out_path="", prob=None): """ Construct Position Specific Scoring Matrices with log-likelihood values length: size of analyzed region from start, in bp (sequences that are not this size are discarded) prob : a dict of bases with a priori expected probabilities """ cds = cds[0] if not prob: prob = {"a":0.25, "t":0.25, "g":0.25, "c":0.25} m = {"a":[0]*length, "t":[0]*length, "g":[0]*length, "c":[0]*length} tot_gene = 0.0 for gene in cds: if len(cds[gene]) >= length: tot_gene += 1 for i in range(length): m[cds[gene][i]][i] += 1 for k in m: m[k] = [log((v/tot_gene)/prob[k]) for v in m[k]] if out_path: h = open(out_path, "w") h.write(","+",".join([str(i) for i in range(1,length+1)])+"\n") for b in ["a", "t", "g", "c"]: h.write(b+","+",".join(["%.2f" % v for v in m[b]])+"\n") h.close() return m
def shrink_bgest(r,rvar,theta): """Bernoulli-Gaussian MMSE estimator Perform MMSE estimation E[x|r] for x ~ BernoulliGaussian(lambda,xvar1) r|x ~ Normal(x,rvar) The parameters theta[0],theta[1] represent The variance of non-zero x[i] xvar1 = abs(theta[0]) The probability of nonzero x[i] lamba = 1/(exp(theta[1])+1) """ xvar1 = abs(theta[...,0]) loglam = theta[...,1] # log(1/lambda - 1) beta = 1/(1+rvar/xvar1) r2scale = r*r*beta/rvar rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar) rho1 = rho+1 xhat = beta*r/rho1 dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 )) dxdr = tf.reduce_mean(dxdr,0) return (xhat,dxdr)
def load_trackball_action(self, action): cbTracballOutput = self.builder.get_object("cbTracballOutput") cbAxisOutput = self.builder.get_object("cbAxisOutput") sclFriction = self.builder.get_object("sclFriction") self._recursing = True if isinstance(action.action, MouseAction): self.set_cb(cbTracballOutput, "mouse", 1) self.set_cb(cbAxisOutput, "trackball", 2) elif isinstance(action.action, XYAction): if isinstance(action.action.x, AxisAction): if action.action.x.parameters[0] == Axes.ABS_X: self.set_cb(cbTracballOutput, "left", 1) else: self.set_cb(cbTracballOutput, "right", 1) self.set_cb(cbAxisOutput, "trackball", 2) elif isinstance(action.action.x, MouseAction): if self.editor.get_id() in STICKS: self.set_cb(cbAxisOutput, "wheel_stick", 2) else: self.set_cb(cbAxisOutput, "wheel_pad", 2) if action.friction <= 0: sclFriction.set_value(0) else: sclFriction.set_value(math.log(action.friction * 1000.0, 10)) self._recursing = False
def decode(self, encoding, input, output): """ Single training example decoding function :param encoding: last hidden state from encoder :param input: source sentence :param output: target sentence :return: loss value """ src_toks = [self.src_vocab[tok] for tok in input] tgt_toks = [self.tgt_vocab[tok] for tok in output] w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state().add_input(encoding) loss = [] sent = [] for tok in tgt_toks: out_vector = dynet.affine_transform([b, w, s.output()]) probs = dynet.softmax(out_vector) cross_ent_loss = - dynet.log(dynet.pick(probs, tok.i)) loss.append(cross_ent_loss) embed_vector = self.tgt_lookup[tok.i] s = s.add_input(embed_vector) loss = dynet.esum(loss) return loss
def beam_search_generate(self, src_seq, beam_n=5): dynet.renew_cg() embedded = self.embed_seq(src_seq) input_vectors = self.encode_seq(embedded) w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state() s = s.add_input(input_vectors[-1]) beams = [{"state": s, "out": [], "err": 0}] completed_beams = [] while len(completed_beams) < beam_n: potential_beams = [] for beam in beams: if len(beam["out"]) > 0: embed_vector = self.tgt_lookup[beam["out"][-1].i] s = beam["state"].add_input(embed_vector) out_vector = dynet.affine_transform([b, w, s.output()]) probs = dynet.softmax(out_vector) probs = probs.vec_value() for potential_next_i in range(len(probs)): potential_beams.append({"state": s, "out": beam["out"]+[self.tgt_vocab[potential_next_i]], "err": beam["err"]-math.log(probs[potential_next_i])}) potential_beams.sort(key=lambda x:x["err"]) beams = potential_beams[:beam_n-len(completed_beams)] completed_beams = completed_beams+[beam for beam in beams if beam["out"][-1] == self.tgt_vocab.END_TOK or len(beam["out"]) > 5*len(src_seq)] beams = [beam for beam in beams if beam["out"][-1] != self.tgt_vocab.END_TOK and len(beam["out"]) <= 5*len(src_seq)] completed_beams.sort(key=lambda x:x["err"]) return [beam["out"] for beam in completed_beams]
def fEntropy(countByte, countTotal): x = float(countByte) / countTotal if x > 0: return - x * math.log(x, 2) else: return 0.0
def Print(lines, options): print(lines) filename = None if options.scan: filename = 'PDFiD.log' if options.output != '': filename = options.output if filename: logfile = open(filename, 'a') logfile.write(lines + '\n') logfile.close()
def Main(): moredesc = ''' Arguments: pdf-file and zip-file can be a single file, several files, and/or @file @file: run PDFiD on each file listed in the text file specified wildcards are supported Source code put in the public domain by Didier Stevens, no Copyright Use at your own risk https://DidierStevens.com''' oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__) oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory') oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names') oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates') oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header') oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch') oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)') oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins') oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output') oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)') oParser.add_option('-S', '--select', type=str, default='', help='selection expression') oParser.add_option('-o', '--output', type=str, default='', help='output to log file') (options, args) = oParser.parse_args() if len(args) == 0: if options.disarm: print('Option disarm not supported with stdin') options.disarm = False if options.scan: print('Option scan not supported with stdin') options.scan = False filenames = [''] else: try: filenames = ExpandFilenameArguments(args) except Exception as e: print(e) return PDFiDMain(filenames, options)