Python statsmodels.api 模块,Logit() 实例源码

我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用statsmodels.api.Logit()

项目:Uber-DS-Challenge    作者:bjherger    | 项目源码 | 文件源码
def run_statsmodels_models(train, test, model_description):
    """
    Run logistic regression model to predict whether a signed up driver ever actually drove.
    :param input_df: Data frame prepared for statsmodels regression
    :type input_df: pd.DataFrame
    :return: AUC for model generated
    :rtype: float
    """
    # Run model on all observations
    # Use dmatrices to format data
    logging.info('Running model w/ description: %s' %model_description)
    logging.debug('Train df: \n%s' % train.describe())
    logging.debug('Test df: \n%s' % test.describe())
    y_train, X_train = dmatrices(model_description, data=train, return_type='dataframe', NA_action='drop')
    y_test, X_test = dmatrices(model_description, data=test, return_type='dataframe', NA_action='drop')

    # Create, fit model
    mod = sm.Logit(endog=y_train, exog=X_train)
    res = mod.fit(method='bfgs', maxiter=100)

    # Output model summary
    print train['city_name'].value_counts()
    print train['signup_channel'].value_counts()
    print res.summary()

    # Create, output AUC
    predicted = res.predict(X_test)
    auc = roc_auc_score(y_true=y_test, y_score=predicted)
    print 'AUC for 20%% holdout: %s' %auc

    # Return AUC for model generated
    return auc



# Main section
项目:pscore_match    作者:kellieotto    | 项目源码 | 文件源码
def compute(self, method='logistic'):
        """
        Compute propensity score and measures of goodness-of-fit

        Parameters
        ----------
        method : str
            Propensity score estimation method. Either 'logistic' or 'probit'
        """
        predictors = sm.add_constant(self.covariates, prepend=False)
        if method == 'logistic':
            model = sm.Logit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
        elif method == 'probit':
            model = sm.Probit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
        else:
            raise ValueError('Unrecognized method')
        return model.predict()
项目:themarketingtechnologist    作者:thomhopmans    | 项目源码 | 文件源码
def run_logistic_regression(df):
    # Logistic regression
    X = df['pageviews_cumsum']
    X = sm.add_constant(X)
    y = df['is_conversion']
    logit = sm.Logit(y, X)
    logistic_regression_results = logit.fit()
    print(logistic_regression_results.summary())
    return logistic_regression_results
项目:themarketingtechnologist    作者:thomhopmans    | 项目源码 | 文件源码
def run_logistic_regression(self):
        # Logistic regression
        X = self.df['pageviews_cumsum']
        X = sm.add_constant(X)
        y = self.df['is_conversion']
        logit = sm.Logit(y, X)
        self.logistic_regression_results = logit.fit()
        print self.logistic_regression_results.summary()
项目:contextual-advertising-deploy    作者:andreicnica    | 项目源码 | 文件源码
def fit(self):
        # self.model = linear_model.LogisticRegression(C=1e3)
        # self.model.fit(self.X, self.y)
        # self.model.score(self.X, self.y)

        X = self.X.copy()
        X['intercept'] = 1

        logit = sm.Logit(self.y, X)
        self.model = logit.fit()
        print self.model.summary()
项目:aq_weather    作者:eliucidate    | 项目源码 | 文件源码
def sm_logit(self,Xtrain,ytrain, Xtest, ytest):
            sm_results = sm.Logit(ytrain, Xtrain).fit_regularized(alpha = 10, disp = False)
            print sm_results.summary()
            # predict train labels
            train_predictions = sm_results.predict(Xtrain)
            train_accuracy = self.calculate_accuracy(train_predictions, ytrain)
            print "train accuracy: ", train_accuracy * 100
            for i in range(len(train_predictions)):
                train_predictions[i] = round(train_predictions[i])
            train_confMatrix = confusion_matrix(ytrain, train_predictions, labels = [1.0, 0.0])
            print "train confusion matrix:", train_confMatrix
            # predict test labels
            test_predictions = sm_results.predict(Xtest)
            test_accuracy = self.calculate_accuracy(test_predictions, ytest)
            print "test accuracy: ", test_accuracy * 100
            for i in range(len(test_predictions)):
                test_predictions[i] = round(test_predictions[i])
            test_confMatrix = confusion_matrix(ytest, test_predictions, labels = [1.0, 0.0])
            print "test confusion matrix:", test_confMatrix
项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def get_data():
    f_path = "../dataset/logistic_regression/UCLA_dataset.csv"
    df = pd.read_csv(f_path)
    print df.head()

    print df.describe()

    print df.std()

    print pd.crosstab(df['admit'], df['rank'], rownames=['admit'])

    # df.hist()
    # pl.show()

    # dummy_ranks = pd.get_dummies(df['rank'], prefix='rank')
    # print dummy_ranks.head()

    # train_cols = df.columns[1:]
    # lr = sm.Logit(df['admit'], df[train_cols])
    # ret = lr.fit()
    # print ret.summary()

    train, test = train_test_split(df, test_size=0.2)
    train_x, train_y = train[train.columns[1:]], train['admit']
    test_x, test_y = test[test.columns[1:]], test['admit']

    lr = LogisticRegression()
    lr.fit(train_x, train_y)

    y_pred = lr.predict(test_x)
    print accuracy_score(test_y, y_pred)

    rf = RandomForestClassifier(n_jobs=4)
    rf.fit(train_x, train_y)
    Y_pred = rf.predict(test_x)
    cnf_matrix = confusion_matrix(test_y, Y_pred)
    print cnf_matrix

    accuracy_percent = accuracy_score(test_y, Y_pred)
    print "accuracy is: %s%s" % (accuracy_percent, '%')
    recall_percent = recall_score(test_y, Y_pred)
    print "recall is: %s%s" % (recall_percent, '%')