我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.log2()。
def fftfilt(b, x, *n): N_x = len(x) N_b = len(b) N = 2**np.arange(np.ceil(np.log2(N_b)),np.floor(np.log2(N_x))) cost = np.ceil(N_x / (N - N_b + 1)) * N * (np.log2(N) + 1) N_fft = int(N[np.argmin(cost)]) N_fft = int(N_fft) # Compute the block length: L = int(N_fft - N_b + 1) # Compute the transform of the filter: H = np.fft.fft(b,N_fft) y = np.zeros(N_x, x.dtype) i = 0 while i <= N_x: il = np.min([i+L,N_x]) k = np.min([i+N_fft,N_x]) yt = np.fft.ifft(np.fft.fft(x[i:il],N_fft)*H,N_fft) # Overlap.. y[i:k] = y[i:k] + yt[:k-i] # and add i += L return y
def normalize_and_transpose(matrix): matrix.tocsc() m = normalize_by_umi(matrix) # Use log counts m.data = np.log2(1 + m.data) # Transpose m = m.T # compute centering (mean) and scaling (stdev) (c,v) = summarize_columns(m) s = np.sqrt(v) return (m, c, s)
def hz2erbs(hz): """ Convert values in Hertz to values in Equivalent rectangle bandwidth (ERBs) Args: hz (float): real number in Hz Returns: erbs (float): real number in ERBs References: [1] Camacho, A., & Harris, J. G. (2008). A sawtooth waveform inspired pitch estimator for speech and music. The Journal of the Acoustical Society of America, 124(3), 1638–1652. https://doi.org/10.1121/1.2951592 """ erbs = 6.44 * (np.log2(229 + hz) - 7.84) return erbs
def __nearest_pow_2(self,x): """ Find power of two nearest to x >>> _nearest_pow_2(3) 2.0 >>> _nearest_pow_2(15) 16.0 :type x: float :param x: Number :rtype: Int :return: Nearest power of 2 to x """ a = math.pow(2, math.ceil(np.log2(x))) b = math.pow(2, math.floor(np.log2(x))) if abs(a - x) < abs(b - x): return a else: return b # calculate spectrogram of signals
def _nearest_pow_2(x): """ Find power of two nearest to x >>> _nearest_pow_2(3) 2.0 >>> _nearest_pow_2(15) 16.0 :type x: float :param x: Number :rtype: Int :return: Nearest power of 2 to x """ a = M.pow(2, M.ceil(np.log2(x))) b = M.pow(2, M.floor(np.log2(x))) if abs(a - x) < abs(b - x): return a else: return b
def build_bins(self, questions): """ returns a dictionary key: document length (rounded to the powers of two) value: indexes of questions with document length equal to key """ # round the input to the nearest power of two round_to_power = lambda x: 2**(int(np.log2(x-1))+1) doc_len = map(lambda x:round_to_power(len(x[0])), questions) bins = {} for i, l in enumerate(doc_len): if l not in bins: bins[l] = [] bins[l].append(i) return bins
def __init__(self, h, x0=None, **kwargs): assert type(h) is list, 'h must be a list' assert len(h) in [2, 3], "TreeMesh is only in 2D or 3D." if '_levels' in kwargs.keys(): self._levels = kwargs.pop('_levels') BaseTensorMesh.__init__(self, h, x0, **kwargs) if self._levels is None: self._levels = int(np.log2(len(self.h[0]))) # self._levels = levels self._levelBits = int(np.ceil(np.sqrt(self._levels)))+1 self.__dirty__ = True #: The numbering is dirty! if '_cells' in kwargs.keys(): self._cells = kwargs.pop('_cells') else: self._cells.add(0)
def volcano(data): if len(data.index.levels[1]) != 2: raise Exception('Volcano requires secondary index with two values') indexA, indexB = data.index.levels[1] dataA = data.xs(indexA, level=1) dataB = data.xs(indexB, level=1) meanA = dataA.mean(axis=0) meanB = dataB.mean(axis=0) change = meanB.div(meanA) statistic, pvalues = ttest_ind(dataA, dataB) pvalues = pd.DataFrame( [statistic, pvalues, -np.log10(pvalues), change, np.log2(change)], columns=data.columns, index=['t', 'p', '-log10(p)', 'foldchange', 'log2(foldchange)']).transpose() return pvalues
def test_branch_cuts_complex64(self): # check branch cuts and continuity on them yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True, np.complex64 yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64 yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64 yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True, np.complex64 yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 # check against bogus branch cuts: assert continuity between quadrants yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64 yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64 yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1, False, np.complex64 yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1, False, np.complex64 yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1, False, np.complex64 yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1, False, np.complex64
def _hist_bin_sturges(x): """ Sturges histogram bin estimator. A very simplistic estimator based on the assumption of normality of the data. This estimator has poor performance for non-normal data, which becomes especially obvious for large data sets. The estimate depends only on size of the data. Parameters ---------- x : array_like Input data that is to be histogrammed, trimmed to range. May not be empty. Returns ------- h : An estimate of the optimal bin width for the given data. """ return x.ptp() / (np.log2(x.size) + 1.0)
def cal_entropy(self,l=3): '''calculate entropy for each sequence''' for (id,seq) in self.seqs.items(): entropy = 0 dna_chars_uniq = FrameKmer.all_possible_kmer(l) dna_len = len(seq) for c in dna_chars_uniq: if 'N' in c: continue prop = seq.count(c)/(1.0*dna_len) if prop ==0: continue information = numpy.log2(1.0/prop) entropy += prop * information yield(id, entropy)
def determine_statistical(self,data,j,k): """ NAME: determine_statistical PURPOSE: Determine the subsample that is part of the statistical sample described by this selection function object INPUT: data - a TGAS subsample (e.g., F stars) j - J magnitudes for data k - K_s magnitudes for data OUTPUT: index array into data that has True for members of the statistical sample HISTORY: 2017-01-18 - Written - Bovy (UofT/CCA) """ # Sky cut data_sid= (data['source_id']\ /2**(35.+2*(12.-numpy.log2(_BASE_NSIDE)))).astype('int') skyindx= True-self._exclude_mask_skyonly[data_sid] # Color, magnitude cuts cmagindx= (j >= self._jmin)*(j <= self._jmax)\ *(j-k >= self._jkmin)*(j-k <= self._jkmax) return skyindx*cmagindx
def statistics(self): stats = 'SP ' stats += self.proximal.statistics() if self.args.boosting_alpha is not None: stats += 'Columns ' + self.boosting.statistics() af = self.boosting.activation_frequency boost_min = np.log2(np.min(af)) / np.log2(self.args.sparsity) boost_mean = np.log2(np.mean(af)) / np.log2(self.args.sparsity) boost_max = np.log2(np.max(af)) / np.log2(self.args.sparsity) stats += '\tLogarithmic Boosting Multiplier min/mean/max {:-.04g}% / {:-.04g}% / {:-.04g}%\n'.format( boost_min * 100, boost_mean * 100, boost_max * 100,) # TODO: Stability, if enabled. pass # TODO: Noise robustness, if enabled. pass return stats
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y, calc_DKL=False): bins = bins.astype(np.float32) num_of_bins = bins.shape[0] # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins)) # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True) digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) b2 = np.ascontiguousarray(digitized).view( np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1]))) unique_array, unique_inverse_t, unique_counts = \ np.unique(b2, return_index=False, return_inverse=True, return_counts=True) p_ts = unique_counts / float(sum(unique_counts)) PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T if calc_DKL: pxy_given_T = np.array( [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))] ) p_XgT = np.vstack(pxy_given_T[:, 0]) p_YgT = pxy_given_T[:, 1] p_YgT = np.vstack(p_YgT).T DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0) H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1) local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y, unique_array) return local_IXT, local_ITY
def get_data(name): """Load data from the given name""" gen_data = {} # new version if os.path.isfile(name + 'data.pickle'): curent_f = open(name + 'data.pickle', 'rb') d2 = cPickle.load(curent_f) # Old version else: curent_f = open(name, 'rb') d1 = cPickle.load(curent_f) data1 = d1[0] data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]]) # Convert log e to log2 normalization_factor = 1 / np.log2(2.718281) epochsInds = np.arange(0, data.shape[4]) d2 = {} d2['epochsInds'] = epochsInds d2['information'] = data / normalization_factor return d2
def query(self, i: int, j: int) -> int: if j <= i: return -1 if j - i == 1: return i j = min(j, len(self.array)) k = numpy.zeros((2,), dtype=numpy.int32) values = numpy.zeros((2,)) l = int(numpy.log2(j - i)) col = max(0, l-1) k[0] = self.table[i, col] k[1] = self.table[j - 2**l, col] values[0] = self.array[k[0]] values[1] = self.array[k[1]] return k[self.func(values)]
def hz_to_cents(freq_hz, ref_hz=32.0): '''Convert frequency values from Hz to cents Parameters ---------- freq_hz : np.array Array of contour frequencies in Hz ref_hz : float Reference frequency in Hz Returns ------- freq_cents : np.array Array of contour frequencies in cents ''' freq_cents = 1200.0 * np.log2(freq_hz / ref_hz) return freq_cents
def ndcg(rel_docs, retr_docs): rel_docs = set(rel_docs) n_retr_docs = len(retr_docs) n_rel_docs = len(rel_docs) dcg = np.zeros((1, 1000)) ndcg = np.zeros((1, 3)) if n_retr_docs == 0: return ndcg for i in range(n_retr_docs): pos = i + 1 if retr_docs[i] in rel_docs: dcg[0, i] = 1/np.log2(pos + 1) idcg = np.zeros((1, 1000)) for i in range(n_rel_docs): pos = i + 1 idcg[0, i] = 1/np.log2(pos + 1) ndcg[0, 0] = np.sum(dcg[0, :20])/np.sum(idcg[0, :20]) ndcg[0, 1] = np.sum(dcg[0, :100])/np.sum(idcg[0, :100]) ndcg[0, 2] = np.sum(dcg[0, :1000])/np.sum(idcg[0, :1000]) return ndcg
def max_shannon_entropy(n): """Returns max possible entropy given "n" mutations. The maximum possible entropy is the entropy of the uniform distribution. The uniform distribution has entropy equal to log(n) (I will use base 2). Parameters ---------- n : int total mutation counts Returns ------- max possible shannon entropy in bits """ if n <= 0: return 0. return float(np.log2(n))
def kl_divergence(p, q): """Compute the Kullback-Leibler (KL) divergence for discrete distributions. Parameters ---------- p : np.array "Ideal"/"true" Probability distribution q : np.array Approximation of probability distribution p Returns ------- kl : float KL divergence of approximating p with the distribution q """ # make sure numpy arrays are floats p = p.astype(float) q = q.astype(float) # compute kl divergence kl = np.sum(np.where(p!=0, p*np.log2(p/q), 0)) return kl
def mean_log_fold_change(data, genes): """Mean log fold change function Parameters ---------- data : pd.Series a series of p-values Returns ------- mlfc : float mean log fold change. """ tmp = data.copy() tmp = tmp[~genes.isin(mlfc_remove_genes)] tmp.sort_values(ascending=True, inplace=True) tmp[tmp==0] = tmp[tmp>0].min() # avoid infinity in log by avoiding zero pvals dist_quant = np.arange(1, len(tmp)+1)/float(len(tmp)) mlfc = np.mean(np.abs(np.log2(tmp/dist_quant))) return mlfc
def compute_log2_ratios(array_1d_0, array_1d_1): """ Compute log2 ratios. array_1d_0 (array): (n) array_1d_1 (array): (n) Returns: array: (n); log2 fold ratios """ array_1d_0_non_0_min = array_1d_0[array_1d_0 != 0].min() array_1d_1_non_0_min = array_1d_1[array_1d_1 != 0].min() print('array_1d_0_non_0_min = {}'.format(array_1d_0_non_0_min)) print('array_1d_1_non_0_min = {}'.format(array_1d_1_non_0_min)) array_1d_0 = where(array_1d_0 == 0, array_1d_0_non_0_min, array_1d_0) array_1d_1 = where(array_1d_1 == 0, array_1d_1_non_0_min, array_1d_1) normalization_factor = array_1d_0.sum() / array_1d_1.sum() print('normalization_factor = {}'.format(normalization_factor)) return log2(array_1d_1 / array_1d_0 * normalization_factor)
def run_test(const, ebn0=range(21), syms=1000000): m = len(const) n = const.shape[1] if const.ndim == 2 else 1 ber = [] for s in ebn0: # eb is divided across n samples, and each symbol has m bits snr_per_sample = s + pow2db(np.log2(m)/n) d1 = comms.random_data(syms, m) x = comms.modulate(d1, const) if pb: x = comms.upconvert(x, fs_by_fd, fc, fs, pulse) # 3 dB extra SNR per sample needed to account for conjugate noise spectrum x = comms.awgn(x, snr_per_sample+3, complex=False) x = comms.downconvert(x, fs_by_fd, fc, fs, pulse) x = x[2*pulse_delay:-2*pulse_delay] else: x = comms.awgn(x, snr_per_sample, complex=True) d2 = comms.demodulate(x, const) ber.append(comms.ber(d1, d2, m)) return ebn0, ber
def bi2sym(x, m): """Convert bits to symbols. :param x: bit array :param m: symbol alphabet size (must be a power of 2) :returns: symbol array >>> import arlpy >>> arlpy.comms.bi2sym([0, 0, 1, 0, 1, 0, 1, 1, 1], 8) array([1, 2, 7]) """ n = int(_np.log2(m)) if 2**n != m: raise ValueError('m must be a power of 2') x = _np.asarray(x, dtype=_np.int) if _np.any(x < 0) or _np.any(x > 1): raise ValueError('Invalid data bits') nsym = len(x)/n x = _np.reshape(x, (nsym, n)) y = _np.zeros(nsym, dtype=_np.int) for i in range(n): y <<= 1 y |= x[:, i] return y
def sym2bi(x, m): """Convert symbols to bits. :param x: symbol array :param m: symbol alphabet size (must be a power of 2) :returns: bit array >>> import arlpy >>> arlpy.comms.sym2bi([1, 2, 7], 8) array([0, 0, 1, 0, 1, 0, 1, 1, 1]) """ n = int(_np.log2(m)) if 2**n != m: raise ValueError('m must be a power of 2') x = _np.asarray(x, dtype=_np.int) if _np.any(x < 0) or _np.any(x >= m): raise ValueError('Invalid data for specified m') y = _np.zeros((len(x), n), dtype=_np.int) for i in range(n): y[:, n-i-1] = (x >> i) & 1 return _np.ravel(y)
def ber(x, y, m=2): """Measure bit error rate between symbols in x and y. :param x: symbol array #1 :param y: symbol array #2 :param m: symbol alphabet size (maximum 64) :returns: bit error rate >>> import arlpy >>> arlpy.comms.ber([0,1,2,3], [0,1,2,2], m=4) 0.125 """ x = _np.asarray(x, dtype=_np.int) y = _np.asarray(y, dtype=_np.int) if _np.any(x >= m) or _np.any(y >= m) or _np.any(x < 0) or _np.any(y < 0): raise ValueError('Invalid data for specified m') if m == 2: return ser(x, y) if m > _MAX_M: raise ValueError('m > %d not supported' % (_MAX_M)) n = _np.product(_np.shape(x))*_np.log2(m) e = x^y e = e[_np.nonzero(e)] e = _np.sum(_popcount[e]) return float(e)/n
def evaluate_mc(data_path, dataset, load_model, mc_steps, seed): """Evaluate the model on the given data using MC averaging.""" ex.commands['print_config']() print("MC Evaluation of model:", load_model) assert mc_steps > 0 reader, (train_data, valid_data, test_data, _) = get_data(data_path, dataset) config = get_config() val_config = deepcopy(config) test_config = deepcopy(config) test_config.batch_size = test_config.num_steps = 1 with tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): _ = Model(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): _ = Model(is_training=False, config=val_config) mtest = Model(is_training=False, config=test_config) tf.initialize_all_variables() saver = tf.train.Saver() saver.restore(session, load_model) print("Testing on non-batched Test ...") test_perplexity = run_mc_epoch(seed, session, mtest, test_data, tf.no_op(), test_config, mc_steps, verbose=True) print("Full Test Perplexity: %.3f, Bits: %.3f" % (test_perplexity, np.log2(test_perplexity)))
def average_ndcg(self): ndcg = 0. for goal, prediction in self.instances: if len(prediction) > 0: dcg = 0. max_dcg = 0. for i, p in enumerate(prediction[:min(len(prediction), self.k)]): if i < len(goal): max_dcg += 1. / np.log2(2 + i) if p in goal: dcg += 1. / np.log2(2 + i) ndcg += dcg/max_dcg return ndcg / len(self.instances)
def process_single_image(filename, image_format, scale_metadata_path, threshold_radius, smooth_radius, brightness_offset, crop_radius, smooth_method): image = imageio.imread(filename, format=image_format) scale = _get_scale(image, scale_metadata_path) if crop_radius > 0: c = crop_radius image = image[c:-c, c:-c] pixel_threshold_radius = int(np.ceil(threshold_radius / scale)) pixel_smoothing_radius = smooth_radius * pixel_threshold_radius thresholded = pre.threshold(image, sigma=pixel_smoothing_radius, radius=pixel_threshold_radius, offset=brightness_offset, smooth_method=smooth_method) quality = shape_index(image, sigma=pixel_smoothing_radius, mode='reflect') skeleton = morphology.skeletonize(thresholded) * quality framedata = csr.summarise(skeleton, spacing=scale) framedata['squiggle'] = np.log2(framedata['branch-distance'] / framedata['euclidean-distance']) framedata['scale'] = scale framedata.rename(columns={'mean pixel value': 'mean shape index'}, inplace=True) framedata['filename'] = filename return image, thresholded, skeleton, framedata
def cheaptrick(x, fs, temporal_positions, f0_sequence, vuv, fftlen="auto", q1=-0.15): f0_sequence = f0_sequence.copy() f0_low_limit = 71 default_f0 = 500 if fftlen == "auto": fftlen = int(2 ** np.ceil(np.log2(3. * float(fs) / f0_low_limit + 1))) #raise ValueError("Only fftlen auto currently supported") fft_size = fftlen f0_low_limit = fs * 3.0 / (fft_size - 3.0) f0_sequence[vuv == 0] = default_f0 spectrogram = np.zeros((int(fft_size / 2.) + 1, len(f0_sequence))) for i in range(len(f0_sequence)): if f0_sequence[i] < f0_low_limit: f0_sequence[i] = default_f0 spectrogram[:, i] = cheaptrick_estimate_one_slice(x, fs, f0_sequence[i], temporal_positions[i], fft_size, q1) return temporal_positions, spectrogram.T, fs
def d4c_love_train(x, fs, current_f0, current_position, threshold): vuv = 0 if current_f0 == 0: return vuv lowest_f0 = 40 current_f0 = max([current_f0, lowest_f0]) fft_size = int(2 ** np.ceil(np.log2(3. * fs / lowest_f0 + 1))) boundary0 = int(np.ceil(100 / (float(fs) / fft_size))) boundary1 = int(np.ceil(4000 / (float(fs) / fft_size))) boundary2 = int(np.ceil(7900 / (float(fs) / fft_size))) waveform = d4c_get_windowed_waveform(x, fs, current_f0, current_position, 1.5, 2) power_spectrum = np.abs(np.fft.fft(waveform, int(fft_size)) ** 2) power_spectrum[0:boundary0 + 1] = 0. cumulative_spectrum = np.cumsum(power_spectrum) if (cumulative_spectrum[boundary1] / cumulative_spectrum[boundary2]) > threshold: vuv = 1 return vuv
def IntToBinVec(x, v = None): #If no vector is passed create a new one if(v is None): dim = int(np.log2(x)) + 1 v = np.zeros([dim], dtype = np.int) #v will contain the binary vector c = 0 while(x > 0): #If the vector has been filled; return truncating the rest if c >= len(v): break #Test if the LSB is set if(x & 1 == 1): #Set the bits in right-to-left order v[c] = 1 #Onto the next column and bit c += 1 x >>= 1 return v #Plot the model R learning the data set A, Y #R: A regression model #A: The data samples #Y: The target vectors
def plot_state(rho, method='city'): """Plot the quantum state.""" num = int(np.log2(len(rho))) # Need updating to check its a matrix if method == 'city': plot_state_city(rho) elif method == "paulivec": plot_state_paulivec(rho) elif method == "qsphere": plot_state_qsphere(rho) elif method == "bloch": for i in range(num): bloch_state = list(map(lambda x: np.real(np.trace( np.dot(x.to_matrix(), rho))), pauli_singles(i, num))) plot_bloch_vector(bloch_state, "qubit " + str(i)) elif method == "wigner": plot_wigner_function(rho) ############################################################### # Plotting Wigner functions ###############################################################
def test_branch_cuts(self): # check branch cuts and continuity on them yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True # check against bogus branch cuts: assert continuity between quadrants yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1 yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1 yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1 yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1 yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1 yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1
def _test_calculate_on_random_set(self, alpha, random_str): """ Test utils.entropy on random set generated from alphanumerics. Note, this method presumes a uniform distribution from the stringer.random_string method. """ STRING_LENGTH = len(random_str) ALPHA_LEN = len(alpha) ent_calc = self.ent.calculate(random_str) p = 1/ALPHA_LEN # this is the expected Shannon entropy for a uniform distribution exp_ent = STRING_LENGTH * p * log2(p) accepted_err = 10 # why the hell is this failing? # assert_true(exp_ent-accepted_err <= # ent_calc <= # exp_ent+accepted_err)
def est_entro_MLE(samp): """MLE estimate of Shannon entropy (in bits) of input sample This function returns a scalar MLE estimate of the entropy of samp when samp is a vector, or returns a row vector containing the MLE estimate of each column of samp when samp is a matrix. Input: ----- samp: a vector or matrix which can only contain integers. The input data type can be any integer types such as uint8/int8/ uint16/int16/uint32/int32/uint64/int64, or floating-point such as single/double. Output: ----- est: the entropy (in bits) of the input vector or that of each column of the input matrix. The output data type is double. """ samp = formalize_sample(samp) [n, wid] = samp.shape n = float(n) f = fingerprint(samp) prob = np.arange(1, f.shape[0] + 1) / n prob_mat = - prob * np.log2(prob) return prob_mat.dot(f)
def updateTrace(self, event=None): if self.collecting: # make sure we're not passing through the current strans filter self.flt.stransFilter = None self.flt.trainCap = self.getCap() if self.rawView: data = self.flt.trainCap.data chanNames = self.flt.trainCap.getChanNames() else: data = self.flt.filteredTrain chanNames = self.flt.getOutChans() data = data - data.mean(axis=0) #dFactor = int(np.log2(2*data.size/float(256*8*10))) + 1 dFactor = (data.size // (256*8*10)) + 1 if dFactor > 1: data = sig.decimate(data, factor=dFactor)#, lowpassFrac=0.75, order=4) t = self.flt.trainCap.getNSec() scale = np.max(2*data.std(axis=0)) self.tracePlot.draw(data, t=t, chanNames=chanNames, scale=scale)
def get_zoom(cls, latitudes, longitudes, size, scale): """Compute level of zoom needed to display all points in a single tile. :param pandas.Series latitudes: set of latitudes :param pandas.Series longitudes: set of longitudes :param int size: size of the tile :param int scale: 1 or 2 (free plan), see Google Static Maps API docs :return: zoom level :rtype: int """ # Extreme pixels min_pixel = cls.to_pixel(latitudes.min(), longitudes.min()) max_pixel = cls.to_pixel(latitudes.max(), longitudes.max()) # Longitude spans from -180 to +180, latitudes only from -90 to +90 amplitudes = (max_pixel - min_pixel).abs() * pd.Series([2., 1.], index=['x_pixel', 'y_pixel']) return int(np.log2(2 * size / amplitudes.max()))
def pwdist_kld(self, seq1idx, seq2idx): """Kullback-Leibler discrepancy (KL) between two vectors. The KL discrepancy between sequences X and Y, is computed from their L-tuple (word) frequencies. References: 1. Wu, Hsieh, Li (2001) Biometrics 57: 441-448. doi: 10.1111/j.0006-341X.2001.00441.x Notes: 1. KL discrepancy must be computed based on relative frequencies (those that sum to 1). 2. To avoid having an infinite dK L (X, Y) when freqs2 = 0, the authors suggest modifying the orifinal formulation by adding a unit to both terms of the frequency ratio. """ freqs1 = self[seq1idx] + 1 freqs2 = self[seq2idx] + 1 values = freqs1 * np.log2(freqs1 / freqs2) value = np.sum(values) return value
def merge_TFIDF(N, vocab_path, TF_DF_prefix, total_doc): t0 = time.time() TF = {} DF = {} total_TF = 1e-10 for i in range(N): new_TF = pk.load(open(TF_DF_prefix+str(i)+'TF.pkl')) new_DF = pk.load(open(TF_DF_prefix+str(i)+'DF.pkl')) total_TF += merge_dict(TF, new_TF) merge_dict(DF, new_DF) t = time.time() - t0 print '%d / %d merged. time %fs' %(i+1, N, t) pk.dump(TF, open(TF_DF_prefix+'TF.pkl', 'w')) pk.dump(DF, open(TF_DF_prefix+'DF.pkl', 'w')) TFIDF = TF.copy() #for word, value in TFIDF.iteritems(): #TFIDF[word] = TF[word] * 1.0 / total_TF * np.log2(total_doc*1.0/DF[word]) save_vocab_txt(TFIDF, vocab_path+'_tfidf.txt')
def compute_word2vec(docs, DF, nDoc, model, vecDim=300): N = len(docs) nonExist_vocab = {} feat = np.zeros((N, 300), dtype=np.float32) for idx, doc in enumerate(docs): nonExist_list = [] TF = {} spt = doc.split(' ') nWord = len(spt) update_vocab(TF, spt) vec = np.zeros(vecDim, dtype=np.float32) for word, tf in TF.items(): try: tfidf = 1.0 * tf / nWord * np.log2(1.0 * nDoc / DF[word]) vec += tfidf * word2vec(model, word) except: nonExist_list.append(word) pass feat[idx, :] = vec update_vocab(nonExist_vocab, nonExist_list) if np.mod(idx, 10000) == 0: print '# %d' %idx print 'nonExist: %d' %len(nonExist_vocab.keys()) return feat, nonExist_vocab
def save_histogram_with_otsu(name, histograms, otsu): plt.clf() figure, axarr = plt.subplots(3) figure.tight_layout() for x, otsu_value in zip(range(3), otsu): axarr[x].bar(np.arange(0, histograms[x].size), np.log2(np.where(histograms[x] >= 1, histograms[x], 1)), 1.0) axarr[x].grid(True) axarr[x].set_ylabel("log2") for val in otsu_value: axarr[x].axvline(x=val, color="r") axarr[x].set_xlim(0, histograms[x].size) axarr[0].set_title('Hue') axarr[1].set_title('Saturation') axarr[2].set_title('Value') return plt
def build_classifier(labeled, unlabeled): err = np.zeros(self.num_classifiers) err_prime = np.zeros(self.num_classifiers) s_prime = np.zeros(self.num_classifiers) inbags = [None] * self.num_classifiers np.random.seed(self.m_seed) num_original_labeled_insts = labeled.shape[0] # set up the random tree options self.num_kvalue = self.num_features if self.num_kvalue < 1: self.num_kvalue = int(np.log2(labeled.shape[1])) + 1 self.estimator = self.estimator.set_params(**{"max_features": self.m_kvalue}) pass
def get_grid(self, max_size=2048): import itertools possible_combos = self.get_combinations_size() #print "possible combos:",possible_combos assert possible_combos > 0 num_columns = int(np.log2(possible_combos)) if possible_combos > max_size: # Grid too large so sample instead combo_grid = np.random.binomial(1, 0.5, (max_size, num_columns)) else: # Get entire grid combo_grid = list(itertools.product([0, 1], repeat=num_columns)) assert len(combo_grid) == possible_combos combo_grid = np.array(combo_grid) # Scale the grid cat_mask = ~self.get_numerical_mask() X_scaler_cat = copy.deepcopy(self.scaler_) X_scaler_cat.mean_ = X_scaler_cat.mean_[cat_mask] X_scaler_cat.scale_ = X_scaler_cat.scale_[cat_mask] X_scaler_cat.var_ = X_scaler_cat.var_[cat_mask] combo_grid = X_scaler_cat.transform(combo_grid) return combo_grid