我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用statsmodels.api.qqplot()。
def tsplot(y, lags=None, figsize=(10, 8), style='bmh'): if not isinstance(y, pd.Series): y = pd.Series(y) with plt.style.context(style): fig = plt.figure(figsize=figsize) # mpl.rcParams['font.family'] = 'Ubuntu Mono' layout = (3, 2) ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2) acf_ax = plt.subplot2grid(layout, (1, 0)) pacf_ax = plt.subplot2grid(layout, (1, 1)) qq_ax = plt.subplot2grid(layout, (2, 0)) pp_ax = plt.subplot2grid(layout, (2, 1)) y.plot(ax=ts_ax) ts_ax.set_title('Time Series Analysis Plots') smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5) smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5) sm.qqplot(y, line='s', ax=qq_ax) qq_ax.set_title('QQ Plot') scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax) plt.tight_layout() return
def plot_resid(model, x): ''' Given a trained StatsModel linear regression model, plot the residual error in a scatter plot as well as a qqplot model: a trained StatsModel linear regression model. x: the input data which was used to train the model. returns: the figure upon which the residuals were drawn ''' fig, ax_list = plt.subplots(1, 2) y_hat = model.predict(x) resid = model.outlier_test()['student_resid'] ax_list[0].scatter(y_hat, resid, alpha=.2) ax_list[0].axhline(0, linestyle='--') sm.qqplot(resid, line='s', ax=ax_list[1]) fig.tight_layout() return fig
def cross_section_qqplot(data, factor_name, date): ''' ?? -------------------------------- data:DataFrame(index:[Date,IDs],factor1,factor2,...) factor_name:str date?str ''' ax = plt.gca() plot_data = data.ix[(date,), factor_name].values fig = sm.qqplot(plot_data, line='45', fit=True,ax=ax) plt.show() return ax # ??4 # ic ???
def mult_regression(wine_set): # center quantitative IVs for regression analysis w = wine_set['quality'] wine_set = wine_set - wine_set.mean() wine_set['quality'] = w print ("OLS multivariate regression model") # first i have run with all columns; than chose the most significant for each wine set and rerun: if len(wine_set) < 2000: # for red model1 = smf.ols( formula="quality ~ volatile_acidity + chlorides + pH + sulphates + alcohol", data=wine_set) else: # for white model1 = smf.ols( formula="quality ~ volatile_acidity + density + pH + sulphates + alcohol", data=wine_set) results1 = model1.fit() print(results1.summary()) # q-q plot for normality qq = sm.qqplot(results1.resid, line = 'r') plt.show() # plot of residuals stdres = pd.DataFrame(results1.resid_pearson) plt.plot(stdres, 'o', ls = 'None') l = plt.axhline(y=0, color = 'r') plt.ylabel('Standardized redisual') plt.xlabel('Observation number') plt.show() # # diagnostic plots # figure1 = plt.figure(figsize=(12, 8)) # figure1 = sm.graphics.plot_regress_exog(results1, "alcohol", fig = figure1) # plt.show() # # figure1 = plt.figure(figsize=(12, 8)) # figure1 = sm.graphics.plot_regress_exog(results1, "sulphates", fig = figure1) # plt.show() # leverage plot figure1 = sm.graphics.influence_plot(results1, size=8) plt.show() # call(mult_regression) # ____________________________ Logistic Regression _____________________