我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.histogram()。
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000): bins = np.arange(0,len(time_range),1) #print bins a,b = np.histogram(Rates, bins) ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2 Fs = 1./(1*0.001) freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1] px = ff[0:len(ff)/2+1] max_px = np.max(px[1:]) idx = px == max_px corr_freq = freq[pl.find(idx)] new_px = px max_pow = new_px[pl.find(idx)] return new_px,freq,corr_freq[0],freq2, max_pow
def get_histogram(self, data): """ Project the descriptions on to the codebook/vocabulary, returning the histogram of words [N x 1] => [1 x K] histogram """ if self.method == 'vq' or self.method == 'bow': code = self.get_code(data) code_hist = self.bow(data, code, self.K) elif self.method == 'vlad': code = self.get_code(data) code_hist = self.vlad(data, code) elif self.method == 'fisher': code = self.get_code(data) code_hist = self.fisher(data, code) else: raise NotImplementedError('''Histogram method %s not implemented. ''' '''Use vq/bow or vlad or fisher!''' % self.method) return code_hist
def histogram(name, values, bins, collections=None): # pylint: disable=line-too-long """Outputs a `Summary` protocol buffer with a histogram. The generated [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) has one summary value containing a histogram for `values`. This op reports an `InvalidArgument` error if any value is not finite. Args: name: A name for the generated node. Will also serve as a series name in TensorBoard. values: A real numeric `Tensor`. Any shape. Values to use to build the histogram. collections: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. Returns: A scalar `Tensor` of type `string`. The serialized `Summary` protocol buffer. """ name = _clean_tag(name) values = makenp(values) hist = make_histogram(values.astype(float), bins) return Summary(value=[Summary.Value(tag=name, histo=hist)])
def modeFilter(data, window=500, step=None, bins=None): """Filter based on histogram-based mode function""" d1 = data.view(np.ndarray) vals = [] l2 = int(window/2.) if step is None: step = l2 i = 0 while True: if i > len(data)-step: break vals.append(mode(d1[i:i+window], bins)) i += step chunks = [np.linspace(vals[0], vals[0], l2)] for i in range(len(vals)-1): chunks.append(np.linspace(vals[i], vals[i+1], step)) remain = len(data) - step*(len(vals)-1) - l2 chunks.append(np.linspace(vals[-1], vals[-1], remain)) d2 = np.hstack(chunks) if (hasattr(data, 'implements') and data.implements('MetaArray')): return MetaArray(d2, info=data.infoCopy()) return d2
def makedists(pdata,binl): ##### This is called from within makeraindist. ##### Caclulate distributions pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2] bins=np.append(0,binl) n=np.empty((nlon,nlat,len(binl))) binno=np.empty(pdata.shape) for ilon in range(nlon): for ilat in range(nlat): # this is the histogram - we'll get frequency from this thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) n[ilon,ilat,:]=thisn # these are the bin locations. we'll use these for the amount dist binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) #### Calculate the number of days with non-missing data, for normalization ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1)) thisppdfmap=n/ndmat #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution testpamtmap=np.empty(thisppdfmap.shape) for ibin in range(len(bins)-1): testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2) thispamtmap=testpamtmap/ndmat return thisppdfmap,thispamtmap
def add_column(self, table): """Add single column DataFrame to the histogram object. If multiple columns share the same name, a (n) will be appended to the name, where n is the next available number. Args: :table: (:obj:`dataframe`) A PySpark DataFrame with a single column """ if len(table.columns) > 1: raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames') column_name = table.columns[0] if not isinstance(table.schema.fields[0].dataType, NumericType): raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported' % (column_name, str(table.schema.fields[0].dataType))) self.col_list.append((table, column_name))
def to_pandas(self, kind='hist'): """Returns a pandas dataframe from the Histogram object. This function calculates the Histogram function in Spark if it was not done yet. Args: :kind: (:obj:`str`, optional): 'hist' or 'density'. When using hist this returns the histogram object as pandas dataframe. When using density the index contains the bin centers, and the values in the DataFrame are the scaled values. Defaults to 'hist' Returns: A pandas DataFrame from the Histogram object. """ self.build() if kind == 'hist': return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()]) elif kind == 'density': result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()]) return result.apply(lambda x: x / x.max(), axis=0)
def add_data(self, data): """Ads 1 or more columns to a histogram. Multiple options are available: * Add a single column dataframe * Add a list of single column dataframes * Add a dataframe with multiple columns Args: :data: A single column Spark dataframe, a list of single column Spark dataframes, or a multi column Spark dataframe. """ if isinstance(data, list): for df_column in data: self.add_column(df_column) elif len(data.columns) > 1: for col_name in data.columns: self.add_column(data.select(col_name)) else: self.add_column(data)
def add_column(self, table): """Add single column DataFrame to the histogram object. If multiple columns share the same name, a (n) will be appended to the name, where n is the next available number. Args: table (:obj:`dataframe`): A pyspark dataframe with a single column """ if len(table.columns) > 1: raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames') column_name = table.columns[0] if not isinstance(table.schema.fields[0].dataType, NumericType): raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported' % (column_name, str(table.schema.fields[0].dataType))) self.col_list.append((table, column_name))
def to_pandas(self, kind='hist'): """Returns a pandas dataframe from the Histogram object. This function calculates the Histogram function in Spark if it was not done yet. Args: kind (:obj:`str`, optional): 'hist' or 'density'. When using hist this returns the histogram object as pandas dataframe. When using density the index contains the bin centers, and the values in the dataframe are the scaled values. Defaults to 'hist' Returns: A pandas DataFrame from the Histogram object. """ self.build() if kind == 'hist': return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()]) elif kind == 'density': result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()]) return result.apply(lambda x: x / x.max(), axis=0)
def add_data(self, data): """Ads 1 or more columns to a histogram Multiple options are available: * Add a single column dataframe * Add a list of single column dataframes * Add a dataframe with multiple columns Args: (:obj:`Data`): A single column Spark dataframe, a list of single column Spark dataframes, or a multi column Spark dataframe. """ if isinstance(data, list): for df_column in data: self.add_column(df_column) elif len(data.columns) > 1: for col_name in data.columns: self.add_column(data.select(col_name)) else: self.add_column(data)
def calculate_plane_histogram(plane, doseplane, dosegridpoints, maxdose, dd, id, structure, hist): """Calculate the DVH for the given plane in the structure.""" contours = [[x[0:2] for x in c['data']] for c in plane] # If there is no dose for the current plane, go to the next plane if not len(doseplane): return (np.arange(0, maxdose), 0) # Create a zero valued bool grid grid = np.zeros((dd['rows'], dd['columns']), dtype=np.uint8) # Calculate the histogram for each contour in the plane # and boolean xor to remove holes for i, contour in enumerate(contours): m = get_contour_mask(dd, id, dosegridpoints, contour) grid = np.logical_xor(m.astype(np.uint8), grid).astype(np.bool) hist, vol = calculate_contour_dvh( grid, doseplane, maxdose, dd, id, structure) return (hist, vol)
def calculate_contour_dvh(mask, doseplane, maxdose, dd, id, structure): """Calculate the differential DVH for the given contour and dose plane.""" # Multiply the structure mask by the dose plane to get the dose mask mask = ma.array(doseplane * dd['dosegridscaling'] * 100, mask=~mask) # Calculate the differential dvh hist, edges = np.histogram(mask.compressed(), bins=maxdose, range=(0, maxdose)) # Calculate the volume for the contour for the given dose plane vol = sum(hist) * ((id['pixelspacing'][0]) * (id['pixelspacing'][1]) * (structure['thickness'])) return hist, vol # ========================== Test DVH Calculation =========================== #
def rdf(coords, bins=100, r_max=None): """ Radial distribution function Parameters ---------- coords : list of coordinate arrays bins : int or numpy array distance bins r_max : positive float or None maximum distance """ if np.ndim(coords) == 2: coords = [coords] d = np.sqrt(np.concatenate(map(calc_distances, coords), 0)) if r_max is not None: d = d[d<r_max] g, bins = np.histogram(d, bins=bins) r = 0.5 * (bins[1:]+bins[:-1]) return r, g/r**2
def get_hist_val(self, var_value): """Get bin count for bin by value of histogram variable :param var_value: a specific value to find corresponding bin. :returns: bin counter value :rtype: int """ try: bin_label = self.value_to_bin_label(var_value) except Exception as exc: self.log().error( 'bin label for variable value "%s" not found (%s)', str(var_value), exc.message) return 0 return self.get_bin_count(bin_label)
def to_normalized(self, **kwargs): """Return a normalized copy of this histogram :param str new_var_name: assign new variable name :param list variable_range: variable range used for finding the right bins to get values from. :param bool combine_values: if bin_specs is not set, combine existing bin labels with variable range. """ # convert to normalized histogram new_var_name = str(kwargs.pop('variable', self.variable)) bin_vals = self.get_bin_vals(**kwargs) values = np.float64(bin_vals[0]) / bin_vals[0].sum() # When values is a numpy array of 1 element np.float64() returns a 0-dimensional array. See # https://github.com/numpy/numpy/issues/3161. The following # if-statement is a workaround for this issue. if not values.shape: values = values.reshape((1,)) return Histogram(counts=(values, bin_vals[1]), variable=new_var_name)
def _from_numpy(self, counts, bin_edges): """Create Histogram from NumPy-style histogram :param array counts: numpy histogram counts array :param array bin_edges: bin edges """ # initialize from NumPy-style histogram _check_num_vals(counts) if len(counts) == len(bin_edges) - 1: # interpret specified variable values as bin edges del self._bin_specs self.bin_specs = {'bin_edges': list(bin_edges)} bin_edges = list(range(len(counts))) elif len(counts) != len(bin_edges): # cannot interpret specified variable values as bin values self.log().critical('numbers of specified variable values (%d) and value counts (%d) do not match', len(bin_edges), len(counts)) raise AssertionError('specified variable values and value counts do not match') self._val_counts = ValueCounts((self.variable,), (self.variable,), dict(((v,), c) for c, v in zip(counts, bin_edges)))
def to_root_hist(histogram, **kwargs): """Convert Eskapade histogram to root histogram Input Eskapade histogram first gets converted to a numpy histogram, which is then converted to a root histogram. All kwargs besides the input histograms are passed on to histogram.get_bin_vals(), which makes the numpy histogram. :param histogram: input Eskapade histogram :returns: root histogram :rtype: ROOT.TH1 """ if not isinstance(histogram, Histogram): raise TypeError('histogram not of type %s' % Histogram) # convert to ROOT histogram new_var_name = str(kwargs.pop('variable', histogram.variable)) return bin_vals_to_hist(histogram.get_bin_vals(**kwargs), var_name=new_var_name)
def hist_to_bin_vals(hist): """Convert root histogram to numpy bin_vals Create bin_counts and bin_edges lists, similar to np.histogram() function. :param ROOT.TH1 hist: input root histogram, assumed to be 1-dimensional. :returns: two comma-separated arrays: bin_entries, bin_edges """ # check input type assert isinstance(hist, ROOT.TH1), 'root hist needs to be 1-dimensional' # create bin_counts and bin_edges lists, similar to np.histogram() function bin_entries = [] bin_edges = [] n_bins = hist.GetNbinsX() for i in range(n_bins): bin_entries.append(hist.GetBinContent(i + 1)) bin_edges.append(hist.GetBinLowEdge(i + 1)) bin_edges.append(hist.GetBinLowEdge(n_bins + 1)) return bin_entries, bin_edges
def plot_entropy_distribution(): fig = plt.figure() ax = fig.add_subplot(111) entropy = read_pickle('output/normalized_entropy.obj') hist, bin_edges = np.histogram(entropy, bins=10000) print hist, bin_edges #ax.set_yscale('log') #ax.set_xscale('log') ax.plot(bin_edges[:-1], hist, marker='o', markersize=3, markeredgecolor='none', color='#D65F5F') #ax.set_ylim([10**0, 10**6]) #ax.set_xlim([10**0, 10**6]) ax.set_xlabel('Entropy') ax.set_ylabel('Frequency') fig.tight_layout() fig.savefig( 'output/normalized_entropy_distribution.pdf', bbox_inches='tight')
def test_outliers(self): # Check that outliers are not tallied a = np.arange(10) + .5 # Lower outliers h, b = histogram(a, range=[0, 9]) assert_equal(h.sum(), 9) # Upper outliers h, b = histogram(a, range=[1, 10]) assert_equal(h.sum(), 9) # Normalization h, b = histogram(a, range=[1, 9], normed=True) assert_almost_equal((h * diff(b)).sum(), 1, decimal=15) # Weights w = np.arange(10) + .5 h, b = histogram(a, range=[1, 9], weights=w, normed=True) assert_equal((h * diff(b)).sum(), 1) h, b = histogram(a, bins=8, range=[1, 9], weights=w) assert_equal(h, w[1:-1])
def test_simple(self): """ Straightforward testing with a mixture of linspace data (for consistency). All test values have been precomputed and the values shouldn't change """ # Some basic sanity checking, with some fixed data. # Checking for the correct number of bins basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, 'doane': 8, 'sqrt': 8, 'auto': 7}, 500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, 'doane': 12, 'sqrt': 23, 'auto': 10}, 5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, 'doane': 17, 'sqrt': 71, 'auto': 17}} for testlen, expectedResults in basic_test.items(): # Create some sort of non uniform data to test with # (2 peak uniform mixture) x1 = np.linspace(-10, -1, testlen // 5 * 2) x2 = np.linspace(1, 10, testlen // 5 * 3) x = np.concatenate((x1, x2)) for estimator, numbins in expectedResults.items(): a, b = np.histogram(x, estimator) assert_equal(len(a), numbins, err_msg="For the {0} estimator " "with datasize of {1}".format(estimator, testlen))
def test_small(self): """ Smaller datasets have the potential to cause issues with the data adaptive methods, especially the FD method. All bin numbers have been precalculated. """ small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, 'doane': 1, 'sqrt': 1}, 2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2, 'doane': 1, 'sqrt': 2}, 3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3, 'doane': 3, 'sqrt': 2}} for testlen, expectedResults in small_dat.items(): testdat = np.arange(testlen) for estimator, expbins in expectedResults.items(): a, b = np.histogram(testdat, estimator) assert_equal(len(a), expbins, err_msg="For the {0} estimator " "with datasize of {1}".format(estimator, testlen))
def test_outlier(self): """ Check the FD, Scott and Doane with outliers. The FD estimates a smaller binwidth since it's less affected by outliers. Since the range is so (artificially) large, this means more bins, most of which will be empty, but the data of interest usually is unaffected. The Scott estimator is more affected and returns fewer bins, despite most of the variance being in one area of the data. The Doane estimator lies somewhere between the other two. """ xcenter = np.linspace(-10, 10, 50) outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter)) outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11} for estimator, numbins in outlier_resultdict.items(): a, b = np.histogram(outlier_dataset, estimator) assert_equal(len(a), numbins)
def _hist_bin_sqrt(x): """ Square root histogram bin estimator. Bin width is inversely proportional to the data size. Used by many programs for its simplicity. Parameters ---------- x : array_like Input data that is to be histogrammed, trimmed to range. May not be empty. Returns ------- h : An estimate of the optimal bin width for the given data. """ return x.ptp() / np.sqrt(x.size)
def _hist_bin_sturges(x): """ Sturges histogram bin estimator. A very simplistic estimator based on the assumption of normality of the data. This estimator has poor performance for non-normal data, which becomes especially obvious for large data sets. The estimate depends only on size of the data. Parameters ---------- x : array_like Input data that is to be histogrammed, trimmed to range. May not be empty. Returns ------- h : An estimate of the optimal bin width for the given data. """ return x.ptp() / (np.log2(x.size) + 1.0)
def _hist_bin_rice(x): """ Rice histogram bin estimator. Another simple estimator with no normality assumption. It has better performance for large data than Sturges, but tends to overestimate the number of bins. The number of bins is proportional to the cube root of data size (asymptotically optimal). The estimate depends only on size of the data. Parameters ---------- x : array_like Input data that is to be histogrammed, trimmed to range. May not be empty. Returns ------- h : An estimate of the optimal bin width for the given data. """ return x.ptp() / (2.0 * x.size ** (1.0 / 3))
def _hist_bin_scott(x): """ Scott histogram bin estimator. The binwidth is proportional to the standard deviation of the data and inversely proportional to the cube root of data size (asymptotically optimal). Parameters ---------- x : array_like Input data that is to be histogrammed, trimmed to range. May not be empty. Returns ------- h : An estimate of the optimal bin width for the given data. """ return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y, calc_DKL=False): bins = bins.astype(np.float32) num_of_bins = bins.shape[0] # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins)) # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True) digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) b2 = np.ascontiguousarray(digitized).view( np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1]))) unique_array, unique_inverse_t, unique_counts = \ np.unique(b2, return_index=False, return_inverse=True, return_counts=True) p_ts = unique_counts / float(sum(unique_counts)) PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T if calc_DKL: pxy_given_T = np.array( [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))] ) p_XgT = np.vstack(pxy_given_T[:, 0]) p_YgT = pxy_given_T[:, 1] p_YgT = np.vstack(p_YgT).T DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0) H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1) local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y, unique_array) return local_IXT, local_ITY
def fit_koff(nmax=523, NN=4e8, **params): tbind = params.pop("tbind") params["kd"] = 1e9/tbind dx = params.pop("dx") rw = randomwalk.get_rw(NAME, params, setup=setup_rw, calc=True) rw.domains[1].dx = dx times = draw_empirically(rw, N=NN, nmax=nmax, success=False) bins = np.logspace(np.log10(min(times)), np.log10(max(times)), 35) #bins = np.logspace(-3., 2., 35) hist, _ = np.histogram(times, bins=bins) cfd = np.cumsum(hist)/float(np.sum(hist)) t = 0.5*(bins[:-1] + bins[1:]) tmean = times.mean() toff = NLS(t, cfd, t0=tmean) koff = 1./toff return dict(t=t, cfd=cfd, toff=toff, tmean=tmean, koff=koff) ##### run rw in collect mode and draw bindings from empirical distributions
def compute_normal_histograms(normal_cloud): norm_x_vals = [] norm_y_vals = [] norm_z_vals = [] numBins = 64 for norm_component in pc2.read_points(normal_cloud, field_names = ('normal_x', 'normal_y', 'normal_z'), skip_nans=True): norm_x_vals.append(norm_component[0]) norm_y_vals.append(norm_component[1]) norm_z_vals.append(norm_component[2]) # Compute histograms for the normals in the point cloud norm1_hist = np.histogram(norm_x_vals, bins=numBins, range=(0, 256)) norm2_hist = np.histogram(norm_y_vals, bins=numBins, range=(0, 256)) norm3_hist = np.histogram(norm_z_vals, bins=numBins, range=(0, 256)) # Concatenate and normalize the histograms norm_hist_features = np.concatenate((norm1_hist[0],norm2_hist[0], norm3_hist[0])).astype(np.float64) norm_features = norm_hist_features / np.sum(norm_hist_features) return norm_features
def build_histogram(feature_id, bins=50): feature = Feature.objects.get(pk=feature_id) if feature.is_categorical: bins = len(feature.categories) # Only read column with that name dataframe = _get_dataframe(feature.dataset.id) bin_set = [] bins, bin_edges = np.histogram(dataframe[feature.name], bins=bins) for bin_index, bin_value in enumerate(bins): from_value = bin_edges[bin_index] to_value = bin_edges[bin_index + 1] bin = Bin( feature=feature, from_value=from_value, to_value=to_value, count=bin_value ) bin_set.append(bin) Bin.objects.bulk_create(bin_set) del bins, bin_edges, bin_set
def from_data(cls, data, binsize=1): """Initialization for a DVH from raw data. Parameters ---------- data : iterable or numpy array An iterable of dose data that is used to create the histogram binsize : int, optional Bin width size (in cGy used to create the histogram) """ data = np.array(data) bins = np.arange(0, data.max() + 1, binsize) if bins.size == 1: bins = np.array([0, data.max()]) if data.max() not in bins: bins = np.append(bins, data.max()) counts, bins = np.histogram(data, bins) return cls(counts, bins)
def density(x, nbins, normalize=True): """ Histogram of univariate input data: basically calls numpy's histogram method and does a proper normalization. @param x: input numpy array @param nbins: number of bins @type nbins: integer @param normalize: if true, histogram will be normalized """ from numpy import histogram hy, hx = histogram(x, nbins) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') if normalize: hy /= (hx[1] - hx[0]) * hy.sum() return hx, hy
def log_histogram(self, name, value, step=None): """Log a histogram for given name on given step. Args: name (str): name of the variable (it will be converted to a valid tensorflow summary name). value (tuple or list): either list of numbers to be summarized as a histogram, or a tuple of bin_edges and bincounts that directly define a histogram. step (int): non-negative integer used for visualization """ if isinstance(value, six.string_types): raise TypeError('"value" should be a number, got {}' .format(type(value))) self._check_step(step) tf_name = self._ensure_tf_name(name) summary = self._histogram_summary(tf_name, value, step=step) self._log_summary(tf_name, summary, value, step=step)
def estimate_basket_length(baskets): basket_lengths = list() basket_ids = baskets['data'] for basket_id in basket_ids: basket = baskets['data'][basket_id]['basket'] basket_len = len(basket) basket_lengths.append(basket_len) if len(basket_lengths) <= 10: return int(np.round(np.median(basket_lengths))) nbr_bins = np.round(estimate_nbr_bins(basket_lengths)) val, bins = np.histogram(basket_lengths, bins=nbr_bins) ebl = int(np.round(bins[np.argmax(val)])) ebl = ebl + 1 if ebl == 1 else ebl return ebl
def estimate_month_basket_length(baskets): month_basket_lenght = [[] for x in xrange(12)] basket_ids = baskets['data'] for basket_id in basket_ids: date_object = datetime.datetime.strptime(basket_id[0:10], '%Y_%m_%d') basket = baskets['data'][basket_id]['basket'] month_id = date_object.month - 1 basket_len = len(basket) month_basket_lenght[month_id].append(basket_len) month_ebl = list() for month_id in xrange(12): nbr_bins = estimate_nbr_bins(month_basket_lenght[month_id]) nbr_bins = np.round(nbr_bins) val, bins = np.histogram(month_basket_lenght[month_id], bins=nbr_bins) mebl = int(np.round(bins[np.argmax(val)])) mebl = mebl + 1 if mebl == 1 else mebl month_ebl.append(mebl) return month_ebl
def generate_data(sample_size=200, pd=[[0.4, 0.4], [0.1, 0.1]]): pd = np.array(pd) pd /= pd.sum() offset = 50 bins = np.r_[np.zeros((1,)), np.cumsum(pd)] bin_counts = np.histogram(np.random.rand(sample_size), bins)[0] data = np.empty((0, 2)) targets = [] for ((i, j), p), count in zip(np.ndenumerate(pd), bin_counts): xs = np.random.uniform(low=0.0, high=50.0, size=count) + j * offset ys = np.random.uniform(low=0.0, high=50.0, size=count) + -i * offset data = np.vstack((data, np.c_[xs, ys])) if i == j: targets.extend([1] * count) else: targets.extend([-1] * count) return np.c_[data, targets]
def get_mode_pth_from_array(posterior, tuningcurve=None): """If tuningcurve is provided, then we map it back to the external coordinates / units. Otherwise, we stay in the bin space.""" n_xbins = posterior.shape[0] if tuningcurve is None: xmin = 0 xmax = n_xbins else: # TODO: this only works for TuningCurve1D currently if isinstance(tuningcurve, auxiliary.TuningCurve1D): xmin = tuningcurve.bins[0] xmax = tuningcurve.bins[-1] else: raise TypeError("tuningcurve type not yet supported!") _, bins = np.histogram([], bins=n_xbins, range=(xmin,xmax)) xbins = (bins + xmax/n_xbins)[:-1] mode_pth = np.argmax(posterior, axis=0)*xmax/n_xbins mode_pth = np.where(np.isnan(posterior.sum(axis=0)), np.nan, mode_pth) return mode_pth
def get_mean_pth_from_array(posterior, tuningcurve=None): """If tuningcurve is provided, then we map it back to the external coordinates / units. Otherwise, we stay in the bin space.""" n_xbins = posterior.shape[0] if tuningcurve is None: xmin = 0 xmax = 1 else: # TODO: this only works for TuningCurve1D currently if isinstance(tuningcurve, auxiliary.TuningCurve1D): xmin = tuningcurve.bins[0] xmax = tuningcurve.bins[-1] else: raise TypeError("tuningcurve type not yet supported!") _, bins = np.histogram([], bins=n_xbins, range=(xmin,xmax)) xbins = (bins + xmax/n_xbins)[:-1] mean_pth = (xbins * posterior.T).sum(axis=1) return mean_pth
def generateHistogram(self): # 10 equal-width bins computed on all the data if not self.has_true_labels: hist, bin_edges = np.histogram(self.plot_datasets['all'].values, bins = 10, density = False) else: hist, bin_edges = np.histogram(self.plot_datasets['malicious'].values, bins = 10, density = False) x_labels = [str(bin_edges[e]) + ' - ' + str(bin_edges[e+1]) for e in range(len(bin_edges)-1)] barplot = BarPlot(x_labels) for label, dataset in self.plot_datasets.iteritems(): hist, bin_edges = np.histogram(dataset.values, bins = bin_edges, density = False) hist_dataset = PlotDataset(hist, dataset.label) hist_dataset.setColor(dataset.color) barplot.addDataset(hist_dataset) output_filename = self.output_directory + 'histogram.json' with open(output_filename, 'w') as f: barplot.exportJson(f)
def reorganize_histogram_data(self, data): data_x, data_y = data try: data_x = float(data_x) except: logger.error("Channel X Must be Scalar Data") try: data_y = data_y.flatten() bins = 10 # default bins if "BINS" in self.params: bins = self.params['BINS'] data_y = np.histogram(data_y, bins) except: logger.error("Channel Y Must be Numpy Array") return (data_x, data_y)
def plot(data=None, x=None, y=None, hue=None, kind='line', offset=0.75, cmap='Dark2', smooth=1, order=None, bins=10, weights=None, figsize=None): ''' Create 'Joy Plot': data (pd.DataFrame): DataFrame holding all data x (str) : DataFrame column to use as x value y (str) : DataFrame column to use as y values hue (str): DataFrame column to use to group data kind (str): specify plot type; line or hist offset (int/float): vertical seperation between plots cmap (str/list): name of matplotlib cmap, or list of colors to be used for plots smooth (int): smoothing window, if smoothing to be applied order (list): order of categories - top to bottom bins (int/list): bins if using hist. int for all hists to have same bins else list of bin no. for each hist weights (boolean/list): should the histogram be weighted? ''' plotter = _pyjoyplotter(data=data, x=x, y=y, hue=hue, offset=offset, cmap=cmap, smooth=smooth, kind=kind, order=order, bins=bins, weights=weights, figsize=figsize) return plotter._plot()
def classify(self, image): """ Given a 28x28 image, returns an array representing the 2 highest probable prediction :param image: :return: array of 2 highest prob-digit tuples """ if cv2.__version__[0] == '2': res = self.model.find_nearest(np.array([self.feature(image)]), k=11) else: res = self.model.findNearest(np.array([self.feature(image)]), k=11) hist = np.histogram(res[2], bins=9, range=(1, 10), normed=True)[0] zipped = sorted(zip(hist, np.arange(1, 10)), reverse=True) return np.array(zipped[:2])
def length_histogram(fqin, name): ''' Create a histogram, and return the bin edges of the bin containing the most reads ''' logging.info("Creating length histogram to find bin with most reads.") lengths = get_lengths(fqin) plt.hist(lengths, bins='auto') plt.savefig(name, format='png', dpi=100) plt.close("all") hist, bin_edges = np.histogram(lengths, bins='auto') maxindex = np.argmax(hist) return (bin_edges[maxindex], bin_edges[maxindex + 1])
def test_against_numpy(self): source = [np.random.random((16, 12, 5)) for _ in range(10)] stack = np.stack(source, axis = -1) bins = np.linspace(0, 1, num = 10) from_numpy = np.histogram(stack, bins = bins)[0] from_ihistogram = last(ihistogram(source, bins = bins)) # Since histogram output is int, cannot use allclose self.assertTrue(np.all(np.equal(from_numpy, from_ihistogram)))
def ihistogram(arrays, bins): """ Streaming histogram calculation. Parameters ---------- arrays : iterable of ndarrays Arrays to be combined. This iterable can also a generator. Arrays in this stream can be of any shape; the histogram is computed over the flattened array. bins : iterable Bin edges, including the rightmost edge, allowing for non-uniform bin widths. Yields ------ hist : `~numpy.ndarray` Streamed histogram. See Also -------- numpy.histogram : 1D histogram of dense arrays. """ # TODO: weights bins = np.asarray(bins) # np.histogram also returns the bin edges, which we ignore hist_func = lambda arr: np.histogram(arr, bins = bins)[0] hist = hist_func(next(arrays)) yield hist for arr in arrays: hist += hist_func(arr) yield hist
def gradient_histogram(flow_img, binsize=12): """ calculate histogram """ assert len(flow_img.shape) == 3, "Wrong flow image." # NOTE the frame is in RGB, while cv2 is in BGR, so do REMEMBER to reverse it. img_mag, img_v, img_u = np.split(flow_img, 3, 2) # NOTE the role reversal: the "y-coordinate" is the first function parameter, the "x-coordinate" is the second. # NOTE that we use same axis configure as image axis(x is larger to the right, y is larger to the bottom), # so add a minus sign before img_v, to make the two axis align. orientation = np.arctan2(-img_v, img_u) # Original result not applicable # Directly use full 360 degree new_orient = orientation # Prune zero motion _mag_greater_zero = img_mag > 0.0 pruned_orient = new_orient[_mag_greater_zero] # Histogram of optical flow hofbins = np.arange(-math.pi, math.pi+1e-6, 2*math.pi/binsize) hist, bin_edges = np.histogram(pruned_orient.flatten(), bins= hofbins) #, density=True) # Normalize hist = hist.astype(np.float32) / (np.sum(_mag_greater_zero) + 1e-6) return hist, bin_edges