Python numpy 模块,nansum() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.nansum()

项目:rain-metrics-python    作者:apendergrass    | 项目源码 | 文件源码
def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap
项目:rain-metrics-python    作者:apendergrass    | 项目源码 | 文件源码
def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap
项目:POWER    作者:pennelise    | 项目源码 | 文件源码
def plot_power_rose(wind_directions,power,num_wd_bins):
    """Plot a power rose. Kind of a hacked wind rose. 

    Arguments:
    wind_directions -- a np array of wind directions filtered for icing
    power -- a np array of percent power production corresponding to wind_directions
    num_wd_bins -- the number of wind direction bins to include on the rose.
    """
    dir_bins = np.array(np.linspace(0.0,360.0 - 360.0 / num_wd_bins,num_wd_bins))
    #Find the total amount of power produced in each sector.
    dir_power = np.array([np.nansum(filter_obstacles(power,wind_directions,(wd + 180.0) % 360.0, 360 - 360/float(num_wd_bins))) for wd in dir_bins])
    dir_power = np.round(dir_power * 100.0 / np.nansum(dir_power), decimals=0)   #Normalize it and round to nearest int. 

    proportional_wd = np.array([])
    for i in range(len(dir_power)):
        for n in range(int(dir_power[i])): #Loop as many times as the percent of power produced in this sector.
                proportional_wd = np.append(proportional_wd,dir_bins[i]) #i.e., if 50% of power comes from the south, append 50 instances of 180.0 degrees.
    ones = np.ones(len(proportional_wd))

    ax = new_axes()
    ax.bar(proportional_wd, ones,normed=False, opening=0.8, edgecolor='white', bins = [0.0,100.], cmap=cm.RdGy)
    set_legend(ax)
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def ests_ll_quad(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses Gaussian quadrature, and thus returns an *approximate*
        integral.
        """
        mu0, gamma0, err0 = np.split(params, 3)
        x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1))  # (QCOUNTXnhospXnmeas)
        loc = mu0 + np.outer(QC1, gamma0)
        loc = np.tile(loc, (self.n, 1, 1))
        loc = np.transpose(loc, (1, 0, 2))
        scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1))
        zs = lpdf_3d(x=x, loc=loc, scale=scale)

        w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1))
        wted = np.nansum(w2 * zs, axis=2).T  # (nhosp X QCOUNT)
        qh = np.tile(QC1, (self.n, 1))  # (nhosp X QCOUNT)
        combined = wted + norm.logpdf(qh)  # (nhosp X QCOUNT)

        return logsumexp(np.nan_to_num(combined), b=QC2, axis=1)  # (nhosp)
项目:phoebe2    作者:phoebe-project    | 项目源码 | 文件源码
def chi2(b, dataset, model1='phoebe1model', model2='phoebe2model'):

    ds = b.get_dataset(dataset) - b.get_dataset(dataset, method='*dep')
    if ds.method=='lc':
        depvar = 'fluxes'
    elif ds.method=='rv':
        depvar = 'rvs'
    else:
        raise NotImplementedError("chi2 doesn't support dataset method: '{}'".format(ds.method))

    chi2 = 0.0
    for comp in ds.components if len(ds.components) else [None]:
        if comp=='_default':
            continue
        # phoebe gives nans for RVs when a star is completely eclipsed, whereas
        # phoebe1 will give a value.  So let's use nansum to just ignore those
        # regions of the RV curve
        print "***", depvar, dataset, model1, model2, comp
        chi2 += np.nansum((b.get_value(qualifier=depvar, dataset=dataset, model=model1, component=comp, context='model')\
            -b.get_value(qualifier=depvar, dataset=dataset, model=model2, component=comp, context='model'))**2)

    return chi2
项目:diffacto    作者:statisticalbiotechnology    | 项目源码 | 文件源码
def weighted_average(weights, pep_abd, group_ix):
    '''
    Calculate weighted geometric means for sample groups
    Inputs:
        weights:    weights of peptides after filtering by loading threshold
        pep_abd:    peptide abundances after filtering by loading threshold
        group_ix:   array indexes of sample groups
    '''
    global nGroups
    abd_w = pep_abd * weights[..., None]
    one_w = abd_w / abd_w * weights[..., None]
    a_sums = np.nansum(abd_w, axis=0)
    w_sums = np.nansum(one_w, axis=0)
    expr = np.empty(nGroups)
    for i in range(expr.shape[0]):
        expr[i] = a_sums[group_ix[i]].sum() / w_sums[group_ix[i]].sum()
    return expr
项目:alfpy    作者:aziele    | 项目源码 | 文件源码
def pwdist_canberra(self, seq1idx, seq2idx):
        """Compute the Canberra distance between two vectors.

        References:
            1. http://scipy.org/

        Notes:
            When `u[i]` and `v[i]` are 0 for given i, then
            the fraction 0/0 = 0 is used in the calculation.
        """
        u = self[seq1idx]
        v = self[seq2idx]
        olderr = np.seterr(invalid='ignore')
        try:
            d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
        finally:
            np.seterr(**olderr)
        return d
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def normalize(self, to=1.0):

        """
        This function ...
        :param to:
        :return:
        """

        # Calculate the sum of all the pixels
        sum = np.nansum(self)

        # Calculate the conversion factor
        factor = to / sum

        # Multiply the frame with the conversion factor
        self.__imul__(factor)

    # -----------------------------------------------------------------
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def normalize(self, to=1.0):

        """
        This function ...
        :param to:
        :return:
        """

        # Calculate the sum of all the pixels
        sum = np.nansum(self)

        # Calculate the conversion factor
        factor = to / sum

        # Multiply the frame with the conversion factor
        self.__imul__(factor)

    # -----------------------------------------------------------------
项目:FLASH    作者:yuyuz    | 项目源码 | 文件源码
def calculate_optimizer_time(trials):
    optimizer_time = []
    time_idx = 0

    optimizer_time.append(trials.cv_starttime[0] - trials.starttime[time_idx])

    for i in range(len(trials.cv_starttime[1:])):
        if trials.cv_starttime[i + 1] > trials.endtime[time_idx]:
            optimizer_time.append(trials.endtime[time_idx] -
                                  trials.cv_endtime[i])
            time_idx += 1
            optimizer_time.append(trials.cv_starttime[i + 1] -
                                  trials.starttime[time_idx])
        else:
            optimizer_time.append(trials.cv_starttime[i + 1] -
                                  trials.cv_endtime[i])

    optimizer_time.append(trials.endtime[time_idx] - trials.cv_endtime[-1])
    trials.optimizer_time = optimizer_time

    # We need to import numpy again
    import numpy as np
    return np.nansum(optimizer_time)
项目:gullikson-scripts    作者:kgullikson88    | 项目源码 | 文件源码
def lnlike(self, pars):
        # Pull theta out of pars
        theta = pars[:self.Nbins]

        # Generate the inner summation
        gamma = np.ones_like(self.bin_idx) * np.nan
        good = (self.bin_idx < self.Nbins) & (self.bin_idx >= 0)  # nans in q get put in nonexistent bins
        gamma[good] = self.Nobs * self.censoring_fcn(self.mcmc_samples[good]) * theta[self.bin_idx[good]]
        summation = np.nanmean(gamma, axis=1)

        # Calculate the integral
        I = self._integral_fcn(theta)

        # Generate the log-likelihood
        ll = -I + np.nansum(np.log(summation))
        return ll
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_sum_inf(self):
        import pandas.core.nanops as nanops

        s = Series(np.random.randn(10))
        s2 = s.copy()

        s[5:8] = np.inf
        s2[5:8] = np.nan

        self.assertTrue(np.isinf(s.sum()))

        arr = np.random.randn(100, 100).astype('f4')
        arr[:, 2] = np.inf

        with cf.option_context("mode.use_inf_as_null", True):
            assert_almost_equal(s.sum(), s2.sum())

        res = nanops.nansum(arr, axis=1)
        self.assertTrue(np.isinf(res).all())
项目:scikit-gstat    作者:mmaelicke    | 项目源码 | 文件源码
def r(self):
        """
        Pearson correlation of the fitted Variogram

        :return:
        """
        # get the experimental and theoretical variogram and cacluate means
        experimental, model = self.__model_deviations()
        mx = np.nanmean(experimental)
        my = np.nanmean(model)

        # claculate the single pearson correlation terms
        term1 = np.nansum(np.fromiter(map(lambda x, y: (x-mx) * (y-my), experimental, model), np.float))

        t2x = np.nansum(np.fromiter(map(lambda x: (x-mx)**2, experimental), np.float))
        t2y = np.nansum(np.fromiter(map(lambda y: (y-my)**2, model), np.float))

        return term1 / (np.sqrt(t2x * t2y))
项目:CVProject    作者:hieuxinhe94    | 项目源码 | 文件源码
def entropy(v, axis=0):
  """
  Optimized implementation of entropy. This version is faster than that in 
  scipy.stats.distributions, particularly over long vectors.
  """
  v = numpy.array(v, dtype='float')
  s = numpy.sum(v, axis=axis)
  with numpy.errstate(divide='ignore', invalid='ignore'):
    rhs = numpy.nansum(v * numpy.log(v), axis=axis) / s
    r = numpy.log(s) - rhs
  # Where dealing with binarized events, it is possible that an event always
  # occurs and thus has 0 information. In this case, the negative class
  # will have frequency 0, resulting in log(0) being computed as nan.
  # We replace these nans with 0
  nan_index = numpy.isnan(rhs)
  if nan_index.any():
    r[nan_index] = 0
  return r
项目:ramp-workflow    作者:paris-saclay-cds    | 项目源码 | 文件源码
def __call__(self, y_true_proba, y_proba):
        """
        See Murphy (1973) A vector partition of the probability score
        """
        np.seterr(divide="ignore")
        pos_obs_freq = np.histogram(
            y_proba[y_true_proba == 1], bins=self.bins)[0]
        fore_freq = np.histogram(y_proba, bins=self.bins)[0]
        climo = y_true_proba.mean()
        unc = climo * (1 - climo)
        pos_obs_rel_freq = np.zeros(pos_obs_freq.size)
        for p in range(pos_obs_rel_freq.size):
            if fore_freq[p] > 0:
                pos_obs_rel_freq[p] = pos_obs_freq[p] / fore_freq[p]
            else:
                pos_obs_rel_freq[p] = np.nan
        score = np.nansum(fore_freq * (pos_obs_rel_freq - climo) ** 2)
        score /= float(y_proba.size)
        return score / unc
项目:FunnyPyML    作者:MrPig    | 项目源码 | 文件源码
def cluster_f_measure(ytrue, pred):
    # higher is better
    assert len(ytrue) == len(pred), 'inputs length must be equal.'
    label2ix = {label: i for i, label in enumerate(np.unique(ytrue))}
    _ytrue = np.array([label2ix[v] for v in ytrue])
    nSize = len(_ytrue)
    nClassTrue = len(np.unique(ytrue))
    nClassPred = len(np.unique(pred))
    f = np.zeros((nClassTrue, nClassPred)).astype(dtype=np.float64)
    for i in xrange(nClassTrue):
        freq_i = len(_ytrue[_ytrue == i])
        for j in xrange(nClassPred):
            freq_j = len(pred[pred == j])
            freq_i_j = float(len(filter(lambda x: x == j, pred[_ytrue == i])))
            precision = freq_i_j / freq_j if freq_j != 0 else 0
            recall = freq_i_j / freq_i if freq_i != 0 else 0
            if precision == 0 or recall == 0:
                f[i, j] = 0.
            else:
                f[i, j] = 2. * (precision * recall) / (precision + recall)
    return np.nansum([f[i][j] * len(_ytrue[_ytrue == i]) for i in xrange(nClassTrue) for j in xrange(nClassPred)]) / nSize
项目:OptiCalcRead    作者:MrLeylo    | 项目源码 | 文件源码
def ponderateByConcentration():
    print 'Loading feature concentration..........'
    sdFile = open('varStandarDevs.txt','rb')
    standevs=pickle.load(sdFile)
    sdFile.close()
    totDevs={}
    for feature in standevs:
        totDevs[feature]=sum([abs(standevs[feature][si]) for si in range(len(standevs[feature]))])/len(standevs[feature])
    localF=['turningAngle','turningAngleDifference','Coord','LP']
    globalF=['accAngle','coG','relStrokeLength','liS','quadraticError']
    totalF=['turningAngle','turningAngleDifference','Coord','LP','Style','accAngle','coG','relStrokeLength','liS','quadraticError']
    print 'Ponderating features..........'
    weights={}
    norm=np.nansum([1/float(math.sqrt(totDevs[feature])) for feature in totalF])
    for feature in totalF:
        weights[feature]=(1/float(math.sqrt(totDevs[feature])))/float(norm)
    print 'Features weighted as'
    print weights
    return weights
项目:mglex    作者:fungs    | 项目源码 | 文件源码
def nandot(a, b):  # TODO: speed up, avoid copying data
    "A numpy.dot() replacement which treats (0*-Inf)==0 and works around BLAS NaN bugs in matrices."
    # important note: a contains zeros and b contains inf/-inf/nan, not the other way around

    # workaround for zero*-inf=nan in dot product (must be 0 according to 0^0=1 with probabilities)
    # 1) calculate dot product
    # 2) select nan entries
    # 3) re-calculate matrix entries where 0*inf = 0 using np.nansum()
    tmp = np.dot(a, b)
    indices = np.where(np.isnan(tmp))
    ri, ci = indices
    with np.errstate(invalid='ignore'):
        values = np.nansum(a[ri, :] * b[:, ci].T, axis=1)
    values[np.isnan(values)] = 0.0
    tmp[indices] = values
    return tmp
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def argnanmedoid(x, axis=1):
    """
    Return the indices of the medoid

    :param x: input array
    :param axis: axis to medoid along
    :return: indices of the medoid
    """
    if axis == 0:
        x = x.T

    invalid = anynan(x, axis=0)
    band, time = x.shape
    diff = x.reshape(band, time, 1) - x.reshape(band, 1, time)
    dist = np.sqrt(np.sum(diff * diff, axis=0))  # dist = np.linalg.norm(diff, axis=0) is slower somehow...
    dist_sum = nansum(dist, axis=0)
    dist_sum[invalid] = np.inf
    i = np.argmin(dist_sum)

    return i
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def medoid_indices(arr, invalid=None):
    """
    The indices of the medoid.

    :arg arr: input array
    :arg invalid: mask for invalid data containing NaNs
    """
    # vectorized version of `argnanmedoid`
    bands, times, ys, xs = arr.shape

    diff = (arr.reshape(bands, times, 1, ys, xs) -
            arr.reshape(bands, 1, times, ys, xs))

    dist = np.linalg.norm(diff, axis=0)
    dist_sum = nansum(dist, axis=0)

    if invalid is None:
        # compute it in case it's not already available
        invalid = anynan(arr, axis=0)

    dist_sum[invalid] = np.inf
    return np.argmin(dist_sum, axis=0)
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def frame_to_series(self, field, frame, columns=None):
        """
        Convert a frame with a DatetimeIndex and sid columns into a series with
        a sid index, using the aggregator defined by the given field.
        """
        if isinstance(frame, pd.DataFrame):
            columns = frame.columns
            frame = frame.values

        if not len(frame):
            return pd.Series(
                data=(0 if field == 'volume' else np.nan),
                index=columns,
            ).values

        if field in ['price', 'close']:
            # shortcircuit for full last row
            vals = frame[-1]
            if np.all(~np.isnan(vals)):
                return vals
            return ffill(frame)[-1]
        elif field == 'open':
            return bfill(frame)[0]
        elif field == 'volume':
            return np.nansum(frame, axis=0)
        elif field == 'high':
            return np.nanmax(frame, axis=0)
        elif field == 'low':
            return np.nanmin(frame, axis=0)
        else:
            raise ValueError("Unknown field {}".format(field))
项目:deep-prior    作者:moberweger    | 项目源码 | 文件源码
def getJointNumFramesVisible(self, jointID):
        """
        Get number of frames in which joint is visible
        :param jointID: joint ID
        :return: number of frames
        """

        return numpy.nansum(self.gt[:, jointID, :]) / self.gt.shape[2]  # 3D
项目:mpnum    作者:dseuss    | 项目源码 | 文件源码
def est_pmf_from_mpps(self, other, samples, eps=1e-10):
        """Estimate probability mass function from MPPovmList samples

        :param MPPovmList other: An :class:`MPPovmList` instance
        :param samples: Iterable of samples (e.g. from
            :func:`MPPovmList.samples()`)

        :returns: `(p_est, n_samples_used)`, both are shape
            `self.nsoutdims` ndarrays. `p_est` provides estimated
            probabilities and `n_samples_used` provides the effective
            number of samples used for each probability.

        """
        assert len(other.mpps) == len(samples)
        pmf_ests = np.zeros((len(other.mpps),) + self.nsoutdims, float)
        n_samples = np.zeros(len(other.mpps), int)
        for pos, other_mpp, other_samples in zip(it.count(), other.mpps, samples):
            pmf_ests[pos, ...], n_samples[pos] = self.est_pmf_from(
                other_mpp, other_samples, eps)
        n_out = np.prod(self.nsoutdims)
        pmf_ests = pmf_ests.reshape((len(other.mpps), n_out))
        given = ~np.isnan(pmf_ests)
        n_samples_used = (given * n_samples[:, None]).sum(0)
        # Weighted average over available estimates according to the
        # number of samples underlying each estimate. Probabilities
        # without any estimates produce 0.0 / 0 = nan in `pmf_est`.
        pmf_est = np.nansum(pmf_ests * n_samples[:, None], 0) / n_samples_used
        return (pmf_est.reshape(self.nsoutdims),
                n_samples_used.reshape(self.nsoutdims))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_nansum_with_boolean(self):
        # gh-2978
        a = np.zeros(2, dtype=np.bool)
        try:
            np.nansum(a)
        except:
            raise AssertionError()
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_nansum(self):
        tgt = np.sum(self.mat)
        for mat in self.integer_arrays():
            assert_equal(np.nansum(mat), tgt)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_allnans(self):
        # Check for FutureWarning
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            res = np.nansum([np.nan]*3, axis=None)
            assert_(res == 0, 'result is not 0')
            assert_(len(w) == 0, 'warning raised')
            # Check scalar
            res = np.nansum(np.nan)
            assert_(res == 0, 'result is not 0')
            assert_(len(w) == 0, 'warning raised')
            # Check there is no warning for not all-nan
            np.nansum([0]*3, axis=None)
            assert_(len(w) == 0, 'unwanted warning raised')
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_empty(self):
        for f, tgt_value in zip([np.nansum, np.nanprod], [0, 1]):
            mat = np.zeros((0, 3))
            tgt = [tgt_value]*3
            res = f(mat, axis=0)
            assert_equal(res, tgt)
            tgt = []
            res = f(mat, axis=1)
            assert_equal(res, tgt)
            tgt = tgt_value
            res = f(mat, axis=None)
            assert_equal(res, tgt)
项目:introspective    作者:numeristical    | 项目源码 | 文件源码
def compact_logit(x, eps=.00001):
    import warnings
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="divide by zero encountered in true_divide")
        warnings.filterwarnings("ignore", message="divide by zero encountered in log")
        warnings.filterwarnings("ignore", message="invalid value encountered in multiply")
        return np.nansum(((x<=eps)*x, (x>=(1-eps))*x, ((x>eps)&(x<(1-eps)))*((1-2*eps)*(np.log(x/(1-x)))/(2*np.log((1-eps)/eps))+.5)),axis=0)
项目:expan    作者:zalando    | 项目源码 | 文件源码
def _get_weights(self, data, kpi, variant):
        if kpi not in self.reference_kpis:
            return 1.0
        reference_kpi  = self.reference_kpis[kpi]
        x              = self.get_kpi_by_name_and_variant(data, reference_kpi, variant)
        zeros_and_nans = sum(x == 0) + np.isnan(x).sum()
        non_zeros      = len(x) - zeros_and_nans
        return non_zeros/np.nansum(x) * x
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def KL(a, b):
    """Calculate the Kullback Leibler divergence between a and b """
    D_KL = np.nansum(np.multiply(a, np.log(np.divide(a, b+np.spacing(1)))), axis=1)
    return D_KL
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_information(probTgivenXs, PYgivenTs, PXs, PYs):
    """Calculate the MI - I(X;T) and I(Y;T)"""
    PTs = np.nansum(probTgivenXs*PXs, axis=1)
    Ht = np.nansum(-np.dot(PTs, np.log2(PTs)))
    Htx = - np.nansum((np.dot(np.multiply(probTgivenXs, np.log2(probTgivenXs)), PXs)))
    Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
    Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
    IYT = Hy - Hyt
    ITX = Ht - Htx
    return ITX, IYT
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_information_1(probTgivenXs, PYgivenTs, PXs, PYs, PTs):
    """Calculate the MI - I(X;T) and I(Y;T)"""
    #PTs = np.nansum(probTgivenXs*PXs, axis=1)
    Ht = np.nansum(-np.dot(PTs, np.log2(PTs+np.spacing(1))))
    Htx = - np.nansum((np.dot(np.multiply(probTgivenXs, np.log2(probTgivenXs+np.spacing(1))), PXs)))
    Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
    Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
    IYT = Hy - Hyt
    ITX = Ht - Htx
    return ITX, IYT
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_information(probTgivenXs, PYgivenTs, PXs, PYs, PTs):
    """Calculate the MI - I(X;T) and I(Y;T)"""
    #PTs = np.nansum(probTgivenXs*PXs, axis=1)
    t_indeces = np.nonzero(PTs)
    Ht = np.nansum(-np.dot(PTs, np.log2(PTs+np.spacing(1))))
    Htx = - np.nansum((np.dot(np.multiply(probTgivenXs, np.log2(probTgivenXs)), PXs)))
    Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
    Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))

    IYT = Hy - Hyt
    ITX = Ht - Htx

    return ITX, IYT
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def t_calc_information(p_x_given_t, PYgivenTs, PXs, PYs):
    """Calculate the MI - I(X;T) and I(Y;T)"""
    Hx = np.nansum(-np.dot(PXs, np.log2(PXs)))
    Hxt = - np.nansum((np.dot(np.multiply(p_x_given_t, np.log2(p_x_given_t)), PXs)))
    Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
    Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
    IYT = Hy - Hyt
    ITX = Hx - Hxt
    return ITX, IYT
项目:eemeter    作者:openeemeter    | 项目源码 | 文件源码
def _fit_cdd_only(df, weighted=False):

    bps = [i[4:] for i in df.columns if i[:3] == 'CDD']
    best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
    best_formula, cdd_qualified = None, False

    try:  # TODO: fix big try block anti-pattern
        for bp in bps:
            candidate_cdd_formula = 'upd ~ CDD_' + bp
            if (np.nansum(df['CDD_' + bp] > 0) < 10) or \
               (np.nansum(df['CDD_' + bp]) < 20):
                continue
            if weighted:
                candidate_cdd_mod = smf.wls(formula=candidate_cdd_formula, data=df,
                                            weights=df['ndays'])
            else:
                candidate_cdd_mod = smf.ols(formula=candidate_cdd_formula, data=df)
            candidate_cdd_res = candidate_cdd_mod.fit()
            candidate_cdd_rsquared = candidate_cdd_res.rsquared_adj
            if (candidate_cdd_rsquared > best_rsquared and
                    candidate_cdd_res.params['Intercept'] >= 0 and
                    candidate_cdd_res.params['CDD_' + bp] >= 0 and
                    candidate_cdd_res.pvalues['CDD_' + bp] < 0.1):
                best_bp, best_rsquared = int(bp), candidate_cdd_rsquared
                best_mod, best_res = candidate_cdd_mod, candidate_cdd_res
                cdd_qualified = True
                best_formula = 'upd ~ CDD_' + bp
    except:  # TODO: catch specific error
        best_rsquared, cdd_qualified = 0, False
        best_formula, best_mod, best_res = None, None, None
        best_bp = None

    return best_formula, best_mod, best_res, best_rsquared, cdd_qualified, best_bp
项目:eemeter    作者:openeemeter    | 项目源码 | 文件源码
def _fit_hdd_only(df, weighted=False):

    bps = [i[4:] for i in df.columns if i[:3] == 'HDD']
    best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
    best_formula, hdd_qualified = None, False

    try:  # TODO: fix big try block anti-pattern
        for bp in bps:
            candidate_hdd_formula = 'upd ~ HDD_' + bp
            if (np.nansum(df['HDD_' + bp] > 0) < 10) or \
               (np.nansum(df['HDD_' + bp]) < 20):
                continue
            if weighted:
                candidate_hdd_mod = smf.wls(formula=candidate_hdd_formula, data=df,
                                            weights=df['ndays'])
            else:
                candidate_hdd_mod = smf.ols(formula=candidate_hdd_formula, data=df)
            candidate_hdd_res = candidate_hdd_mod.fit()
            candidate_hdd_rsquared = candidate_hdd_res.rsquared_adj
            if (candidate_hdd_rsquared > best_rsquared and
                    candidate_hdd_res.params['Intercept'] >= 0 and
                    candidate_hdd_res.params['HDD_' + bp] >= 0 and
                    candidate_hdd_res.pvalues['HDD_' + bp] < 0.1):
                best_bp, best_rsquared = int(bp), candidate_hdd_rsquared
                best_mod, best_res = candidate_hdd_mod, candidate_hdd_res
                hdd_qualified = True
                best_formula = 'upd ~ HDD_' + bp
    except:  # TODO: catch specific error
        best_rsquared, hdd_qualified = 0, False
        best_formula, best_mod, best_res = None, None, None
        best_bp = None

    return best_formula, best_mod, best_res, best_rsquared, hdd_qualified, best_bp
项目:eemeter    作者:openeemeter    | 项目源码 | 文件源码
def calc_gross(self):
        return np.nansum(self.input_data.energy)
项目:polara    作者:Evfro    | 项目源码 | 文件源码
def get_relevance_scores(matched_predictions, positive_feedback, not_rated_penalty):
    users_num = matched_predictions.shape[0]
    reldata = get_relevance_data(matched_predictions, positive_feedback, not_rated_penalty)
    true_pos, false_pos = reldata.tp, reldata.fp
    true_neg, false_neg = reldata.tn, reldata.fn

    with np.errstate(invalid='ignore'):
        # true positive rate
        precision = true_pos / (true_pos + false_pos)
        # sensitivity
        recall = true_pos / (true_pos + false_neg)
        # false positive rate
        fallout = false_pos / (false_pos + true_neg)
        # true negative rate
        specifity = true_neg / (false_pos + true_neg)
        # false negative rate
        miss_rate = false_neg / (false_neg + true_pos)

    #average over all users
    precision = unmask(np.nansum(precision) / users_num)
    recall = unmask(np.nansum(recall) / users_num)
    fallout = unmask(np.nansum(fallout) / users_num)
    specifity = unmask(np.nansum(specifity) / users_num)
    miss_rate = unmask(np.nansum(miss_rate) / users_num)

    scores = namedtuple('Relevance', ['precision', 'recall', 'fallout', 'specifity', 'miss_rate'])
    scores = scores._make([precision, recall, fallout, specifity, miss_rate])
    return scores
项目:polara    作者:Evfro    | 项目源码 | 文件源码
def get_ranking_scores(matched_predictions, feedback_data, switch_positive, alternative=True):
    users_num, topk, holdout = matched_predictions.shape
    ideal_scores_idx = np.argsort(feedback_data, axis=1)[:, ::-1] #returns column index only
    ideal_scores_idx = np.ravel_multi_index((np.arange(feedback_data.shape[0])[:, None], ideal_scores_idx), dims=feedback_data.shape)

    where = np.ma.where if np.ma.is_masked(feedback_data) else np.where
    is_positive = feedback_data >= switch_positive
    positive_feedback = where(is_positive, feedback_data, 0)
    negative_feedback = where(~is_positive, -feedback_data, 0)

    relevance_scores_pos = (matched_predictions * positive_feedback[:, None, :]).sum(axis=2)
    relevance_scores_neg = (matched_predictions * negative_feedback[:, None, :]).sum(axis=2)
    ideal_scores_pos = positive_feedback.ravel()[ideal_scores_idx]
    ideal_scores_neg = negative_feedback.ravel()[ideal_scores_idx]

    discount_num = max(holdout, topk)
    if alternative:
        discount = np.log2(np.arange(2, discount_num+2))
        relevance_scores_pos = 2**relevance_scores_pos - 1
        relevance_scores_neg = 2**relevance_scores_neg - 1
        ideal_scores_pos = 2**ideal_scores_pos - 1
        ideal_scores_neg = 2**ideal_scores_neg - 1
    else:
        discount = np.hstack([1, np.log(np.arange(2, discount_num+1))])

    dcg = (relevance_scores_pos / discount[:topk]).sum(axis=1)
    dcl = (relevance_scores_neg / -discount[:topk]).sum(axis=1)
    idcg = (ideal_scores_pos / discount[:holdout]).sum(axis=1)
    idcl = (ideal_scores_neg / -discount[:holdout]).sum(axis=1)

    with np.errstate(invalid='ignore'):
        ndcg = unmask(np.nansum(dcg / idcg) / users_num)
        ndcl = unmask(np.nansum(dcl / idcl) / users_num)

    ranking_score = namedtuple('Ranking', ['nDCG', 'nDCL'])._make([ndcg, ndcl])
    return ranking_score
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def vwap(df):
    """
    Volume-weighted average price (VWAP) is a ratio generally used by
    institutional investors and mutual funds to make buys and sells so as not
    to disturb the market prices with large orders. It is the average share
    price of a stock weighted against its trading volume within a particular
    time frame, generally one day.

    Read more: Volume Weighted Average Price - VWAP
    https://www.investopedia.com/terms/v/vwap.asp#ixzz4xt922daE

    Parameters
    ----------
    df: pd.DataFrame

    Returns
    -------

    """
    if 'close' not in df.columns or 'volume' not in df.columns:
        raise ValueError('price data must include `volume` and `close`')

    vol_sum = np.nansum(df['volume'].values)

    try:
        ret = np.nansum(df['close'].values * df['volume'].values) / vol_sum
    except ZeroDivisionError:
        ret = np.nan

    return ret
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def _calculate(self, X, y, categorical, metafeatures, helpers):
        res = np.nansum(helpers.get_value("NumSymbols"))
        return res if np.isfinite(res) else 0


################################################################################
# Statistical meta features
# Only use third and fourth statistical moment because it is common to
# standardize for the other two
# see Engels & Theusinger, 1998 - Using a Data Metric for Preprocessing Advice for Data Mining Applications.
项目:nelpy    作者:nelpy    | 项目源码 | 文件源码
def trajectory_score_array(posterior, slope=None, intercept=None, w=None, weights=None, normalize=False):
    """Docstring goes here

    This is the score that Davidson et al. maximizes, in order to get a linear trajectory,
    but here we kind of assume that that we have the trajectory already, and then just score it.

    w is the number of bin rows to include in score, in each direction. That is, w=0 is only the modes,
    and w=1 is a band of width=3, namely the modes, and 1 bin above, and 1 bin below the mode.

    The score is NOT averaged!"""

    rows, cols = posterior.shape

    if w is None:
        w = 0
    if not float(w).is_integer:
        raise ValueError("w has to be an integer!")
    if slope is None or intercept is None:
        slope, intercept, _ = linregress_array(posterior=posterior)

    x = np.arange(cols)
    line_y = np.round((slope*x + intercept)) # in position bin #s

    # idea: cycle each column so that the top w rows are the band surrounding the regression line

    if np.isnan(slope): # this will happen if we have 0 or only 1 decoded bins
        return np.nan
    else:
        temp = column_cycle_array(posterior, -line_y+w)

    if normalize:
        num_non_nan_bins = round(np.nansum(posterior))
    else:
        num_non_nan_bins = 1

    return np.nansum(temp[:2*w+1,:])/num_non_nan_bins
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def test_nsum(x):
    assume(np.max(x[np.isfinite(x)]) < 1e4)
    assume(np.min(x[np.isfinite(x)]) > -1e4)
    aae(nsum(x), np.nansum(x))
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def test_nsum_row(x):
    assume(np.max(x[np.isfinite(x)]) < 1e4)
    assume(np.min(x[np.isfinite(x)]) > -1e4)
    aae(nsum_row(x), np.nansum(x, axis=1))
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def test_preds_ll(alpha, mu, gamma, err, num, w):
    current_impl = Lvm.preds_ll(alpha, mu, gamma, err, num, w)
    simple_impl = np.nansum(w * norm.logpdf(num, mu+gamma*alpha, err))
    simple_impl += np.sum(norm.logpdf(alpha))
    assert_approx_equal(current_impl, simple_impl)
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def ests_obj(self, params):
        """The objective function to minimize for the model parameters."""
        # return -nsum(self.ests_ll(params))
        return -np.nansum(self.ests_ll(params))
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def nsum_row(a):
    return nansum(a, axis=1)
项目:semi-auto-anno    作者:moberweger    | 项目源码 | 文件源码
def getJointNumFramesVisible(self, jointID):
        """
        Get number of frames in which joint is visible
        :param jointID: joint ID
        :return: number of frames
        """

        return numpy.nansum(self.gt[:, jointID, :]) / self.gt.shape[2]  # 3D
项目:crick    作者:jcrist    | 项目源码 | 文件源码
def test_basic_stats(x):
    s = SummaryStats()
    s.update(x)

    assert s.count() == np.count_nonzero(~np.isnan(x))
    np.testing.assert_allclose(s.sum(), np.nansum(x), rtol=RTOL, atol=ATOL)
    np.testing.assert_equal(s.min(), np.nanmin(x) if len(x) else np.nan)
    np.testing.assert_equal(s.max(), np.nanmax(x) if len(x) else np.nan)
    np.testing.assert_allclose(s.mean(), np.nanmean(x) if len(x) else np.nan,
                               rtol=RTOL, atol=ATOL)
    np.testing.assert_allclose(s.var(), np.nanvar(x) if len(x) else np.nan,
                               rtol=RTOL, atol=ATOL)
    np.testing.assert_allclose(s.std(), np.nanstd(x) if len(x) else np.nan,
                               rtol=RTOL, atol=ATOL)
项目:spykes    作者:KordingLab    | 项目源码 | 文件源码
def log_likelihood(y, yhat):
    '''Helper function to compute the log likelihood.'''
    eps = np.spacing(1)
    return np.nansum(y * np.log(eps + yhat) - yhat)