我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用statsmodels.api.Logit()。
def run_statsmodels_models(train, test, model_description): """ Run logistic regression model to predict whether a signed up driver ever actually drove. :param input_df: Data frame prepared for statsmodels regression :type input_df: pd.DataFrame :return: AUC for model generated :rtype: float """ # Run model on all observations # Use dmatrices to format data logging.info('Running model w/ description: %s' %model_description) logging.debug('Train df: \n%s' % train.describe()) logging.debug('Test df: \n%s' % test.describe()) y_train, X_train = dmatrices(model_description, data=train, return_type='dataframe', NA_action='drop') y_test, X_test = dmatrices(model_description, data=test, return_type='dataframe', NA_action='drop') # Create, fit model mod = sm.Logit(endog=y_train, exog=X_train) res = mod.fit(method='bfgs', maxiter=100) # Output model summary print train['city_name'].value_counts() print train['signup_channel'].value_counts() print res.summary() # Create, output AUC predicted = res.predict(X_test) auc = roc_auc_score(y_true=y_test, y_score=predicted) print 'AUC for 20%% holdout: %s' %auc # Return AUC for model generated return auc # Main section
def compute(self, method='logistic'): """ Compute propensity score and measures of goodness-of-fit Parameters ---------- method : str Propensity score estimation method. Either 'logistic' or 'probit' """ predictors = sm.add_constant(self.covariates, prepend=False) if method == 'logistic': model = sm.Logit(self.treatment, predictors).fit(disp=False, warn_convergence=True) elif method == 'probit': model = sm.Probit(self.treatment, predictors).fit(disp=False, warn_convergence=True) else: raise ValueError('Unrecognized method') return model.predict()
def run_logistic_regression(df): # Logistic regression X = df['pageviews_cumsum'] X = sm.add_constant(X) y = df['is_conversion'] logit = sm.Logit(y, X) logistic_regression_results = logit.fit() print(logistic_regression_results.summary()) return logistic_regression_results
def run_logistic_regression(self): # Logistic regression X = self.df['pageviews_cumsum'] X = sm.add_constant(X) y = self.df['is_conversion'] logit = sm.Logit(y, X) self.logistic_regression_results = logit.fit() print self.logistic_regression_results.summary()
def fit(self): # self.model = linear_model.LogisticRegression(C=1e3) # self.model.fit(self.X, self.y) # self.model.score(self.X, self.y) X = self.X.copy() X['intercept'] = 1 logit = sm.Logit(self.y, X) self.model = logit.fit() print self.model.summary()
def sm_logit(self,Xtrain,ytrain, Xtest, ytest): sm_results = sm.Logit(ytrain, Xtrain).fit_regularized(alpha = 10, disp = False) print sm_results.summary() # predict train labels train_predictions = sm_results.predict(Xtrain) train_accuracy = self.calculate_accuracy(train_predictions, ytrain) print "train accuracy: ", train_accuracy * 100 for i in range(len(train_predictions)): train_predictions[i] = round(train_predictions[i]) train_confMatrix = confusion_matrix(ytrain, train_predictions, labels = [1.0, 0.0]) print "train confusion matrix:", train_confMatrix # predict test labels test_predictions = sm_results.predict(Xtest) test_accuracy = self.calculate_accuracy(test_predictions, ytest) print "test accuracy: ", test_accuracy * 100 for i in range(len(test_predictions)): test_predictions[i] = round(test_predictions[i]) test_confMatrix = confusion_matrix(ytest, test_predictions, labels = [1.0, 0.0]) print "test confusion matrix:", test_confMatrix
def get_data(): f_path = "../dataset/logistic_regression/UCLA_dataset.csv" df = pd.read_csv(f_path) print df.head() print df.describe() print df.std() print pd.crosstab(df['admit'], df['rank'], rownames=['admit']) # df.hist() # pl.show() # dummy_ranks = pd.get_dummies(df['rank'], prefix='rank') # print dummy_ranks.head() # train_cols = df.columns[1:] # lr = sm.Logit(df['admit'], df[train_cols]) # ret = lr.fit() # print ret.summary() train, test = train_test_split(df, test_size=0.2) train_x, train_y = train[train.columns[1:]], train['admit'] test_x, test_y = test[test.columns[1:]], test['admit'] lr = LogisticRegression() lr.fit(train_x, train_y) y_pred = lr.predict(test_x) print accuracy_score(test_y, y_pred) rf = RandomForestClassifier(n_jobs=4) rf.fit(train_x, train_y) Y_pred = rf.predict(test_x) cnf_matrix = confusion_matrix(test_y, Y_pred) print cnf_matrix accuracy_percent = accuracy_score(test_y, Y_pred) print "accuracy is: %s%s" % (accuracy_percent, '%') recall_percent = recall_score(test_y, Y_pred) print "recall is: %s%s" % (recall_percent, '%')