Python statsmodels.api 模块,GLM 实例源码

我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用statsmodels.api.GLM

项目:bambi    作者:bambinos    | 项目源码 | 文件源码
def _get_intercept_stats(self, add_slopes=True):
        # start with mean and variance of Y on the link scale
        mod = sm.GLM(endog=self.model.y.data,
                     exog=np.repeat(1, len(self.model.y.data)),
                     family=self.model.family.smfamily(),
                     missing='drop' if self.model.dropna else 'none').fit()
        mu = mod.params
        # multiply SE by sqrt(N) to turn it into (approx.) SD(Y) on link scale
        sd = (mod.cov_params()[0] * len(mod.mu))**.5

        # modify mu and sd based on means and SDs of slope priors.
        if len(self.model.fixed_terms) > 1 and add_slopes:
            means = np.array([x['mu'] for x in self.priors.values()])
            sds = np.array([x['sd'] for x in self.priors.values()])
            # add to intercept prior
            index = list(self.priors.keys())
            mu -= np.dot(means, self.stats['mean_x'][index])
            sd = (sd**2 + np.dot(sds**2, self.stats['mean_x'][index]**2))**.5

        return mu, sd
项目:bambi    作者:bambinos    | 项目源码 | 文件源码
def __init__(self, model, taylor):
        self.model = model
        self.stats = model.dm_statistics if hasattr(model, 'dm_statistics') \
            else None
        self.dm = pd.DataFrame({lev: t.data[:, i]
                                for t in model.fixed_terms.values()
                                for i, lev in enumerate(t.levels)})
        self.priors = {}
        missing = 'drop' if self.model.dropna else 'none'
        self.mle = sm.GLM(endog=self.model.y.data, exog=self.dm,
                          family=self.model.family.smfamily(),
                          missing=missing).fit()
        self.taylor = taylor
        with open(join(dirname(__file__), 'config', 'derivs.txt'), 'r') as file:
            self.deriv = [next(file).strip('\n') for x in range(taylor+1)]
项目:DriverPower    作者:smshuai    | 项目源码 | 文件源码
def run_glm(X, y, model_name):
    """ Train the binomial/negative binomial GLM

    Args:
        X (np.array): scaled X. 
        y (pd.df): four columns response table.

    Returns:
        sm.model: trained GLM models.

    """
    # Add const manually. sm.add_constant cannot add 1 for shape (1, n)
    X = np.c_[X, np.ones(X.shape[0])]
    if model_name == 'Binomial':
        logger.info('Building binomial GLM')
        # make two columns response (# success, # failure)
        y_binom = np.zeros((y.shape[0], 2), dtype=np.int_)
        y_binom[:, 0] = y.nMut
        y_binom[:, 1] = y.length * y.N - y.nMut
        glm = sm.GLM(y_binom, X, family=sm.families.Binomial())
    elif model_name == 'NegativeBinomial':
        logger.info('Building negative binomial GLM')
        # use nMut as response and length*N as exposure
        glm = sm.GLM(y.nMut.values, X, family=sm.families.NegativeBinomial(), exposure=y.length.values*y.N.values+1)
    else:
        sys.stderr.write('Unknown GLM name {}. Must be Binomial or NegativeBinomial'.format(model_name))
        sys.exit(1)
    model = glm.fit()
    return model
项目:plotnine    作者:has2k1    | 项目源码 | 文件源码
def glm(data, xseq, **params):
    """
    Fit GLM
    """
    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)
    results = sm.GLM(data['y'], X).fit(**params['method_args'])
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        # TODO: Depends on statsmodel > 0.7
        # https://github.com/statsmodels/statsmodels/pull/2151
        # https://github.com/statsmodels/statsmodels/pull/3406
        # Remove the try/except when a compatible version is released
        try:
            prediction = results.get_prediction(Xseq)
            ci = prediction.conf_int(1 - params['level'])
            data['ymin'] = ci[:, 0]
            data['ymax'] = ci[:, 1]
        except (AttributeError, TypeError):
            warnings.warn(
                "Cannot compute confidence intervals."
                "Install latest/development version of statmodels.")

    return data
项目:neurotools    作者:michaelerule    | 项目源码 | 文件源码
def glmfit(X,Y):
        '''
        Wrapper for statsmodels glmfit that prepares a constant 
        parameter and configuration options for poisson-GLM fitting.
        Please see the documentation for glmfit in statsmodels for
        more details. 

        This method will automatically add a constant colum to the feature
        matrix Y

        Parameters
        ----------
        X : array-like
            A nobs x k array where `nobs` is the number of observations and `k`
            is the number of regressors. An intercept is not included by default
            and should be added by the user (models specified using a formula
            include an intercept by default). See `statsmodels.tools.add_constant`.
        Y : array-like
            1d array of poisson counts.  This array can be 1d or 2d.
        '''
        # check for and maybe add constant value to X
        if not all(X[:,0]==X[0,0]):
            X = hstack([ ones((shape(X)[0],1),dtype=X.dtype), X])

        poisson_model   = sm.GLM(Y,X,family=sm.families.Poisson())
        poisson_results = poisson_model.fit()
        M = poisson_results.params
        return M
项目:bambi    作者:bambinos    | 项目源码 | 文件源码
def _scale_random(self, term):

        # these default priors are only defined for HalfNormal priors
        if term.prior.args['sd'].name != 'HalfNormal':
            return

        sd_corr = term.prior.scale

        # recreate the corresponding fixed effect data
        fix_data = term.data.sum(axis=1)

        # handle intercepts and cell means
        if term.constant:
            mu, sd = self._get_intercept_stats()
            sd *= sd_corr
        # handle slopes
        else:
            exists = [x for x in self.dm.columns
                      if np.array_equal(fix_data, self.dm[x].values)]
            # handle case where there IS a corresponding fixed effect
            if exists and exists[0] in self.priors.keys():
                sd = self.priors[exists[0]]['sd']
            # handle case where there IS NOT a corresponding fixed effect
            else:
                # the usual case: add the random effect data as a fixed effect
                # in the design matrix
                if not exists:
                    fix_dataframe = pd.DataFrame(fix_data)
                    # things break if column names are integers (the default)
                    fix_dataframe.rename(
                        columns={c: '_'+str(c) for c in fix_dataframe.columns},
                        inplace=True)
                    exog = self.dm.join(fix_dataframe)
                # this handles the corner case where there technically is the
                # corresponding fixed effect, but the parameterization differs
                # between the fixed- and random-effect specification. usually
                # this means the fixed effects use cell-means coding but the
                # random effects use k-1 coding
                else:
                    group = term.name.split('|')[1]
                    exog = self.model.random_terms.values()
                    exog = [v.data.sum(1) for v in exog
                            if v.name.split('|')[-1] == group]
                    index = ['_'+str(i) for i in range(len(exog))]
                    exog = pd.DataFrame(exog, index=index).T
                # this will replace self.mle (which is missing predictors)
                missing = 'drop' if self.model.dropna else 'none'
                full_mod = sm.GLM(endog=self.model.y.data, exog=exog,
                                  family=self.model.family.smfamily(),
                                  missing=missing).fit()
                sd = self._get_slope_stats(exog=exog, predictor=fix_data,
                                           full_mod=full_mod, sd_corr=sd_corr)

        # set the prior SD.
        term.prior.args['sd'].update(sd=np.squeeze(np.atleast_1d(sd)))
项目:genetest    作者:pgxcentre    | 项目源码 | 文件源码
def fit(self, y, X):
        """Fit the model.

        Args:
            y (pandas.DataFrame): The vector of endogenous variable.
            X (pandas.DataFrame): The matrix of exogenous variables.

        """
        # Creating the GLM model from StatsModels and fitting it
        model = sm.GLM(y, X, family=sm.families.Binomial())
        try:
            fitted = model.fit()

        except PerfectSeparationError as e:
            raise StatsError(str(e))

        out = {}
        parameters = fitted.params.index

        # Results about the model fit.
        out = {
            "MODEL": {
                "log_likelihood": fitted.llf,
                "nobs": fitted.nobs
            },
        }

        # Getting the confidence intervals
        conf_ints = fitted.conf_int()

        for param in parameters:
            # If GWAS, check that inference could be done on the SNP.
            if param == "SNPs" and np.isnan(fitted.pvalues[param]):
                raise StatsError("Inference did not converge.")

            out[param] = {
                "coef": fitted.params[param],
                "std_err": fitted.bse[param],
                "lower_ci": conf_ints.loc[param, 0],
                "upper_ci": conf_ints.loc[param, 1],
                "t_value": fitted.tvalues[param],
                "p_value": fitted.pvalues[param],
            }

        return out