Python sklearn.tree 模块,DecisionTreeRegressor() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.tree.DecisionTreeRegressor()

项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def cross_validate_best_known():
    '''
        import and clean the tractor data, then do a corss validation on each of the three models we are
        training here. A RandomForest, a GradientBoost, and an AdaBoost backed by a DecisionTree. Print
        the scores.

        The parameters we're using here are the "best" that we've found so far using a grid search.
    '''
    tractor_data = pd.read_csv('data/train.csv')
    tractor_data = cln.clean_all(tractor_data)
    X = tractor_data
    y = tractor_data.pop('SalePrice')

    rf = RandomForestRegressor(max_features=2, min_samples_split=4, n_estimators=50, min_samples_leaf=2)
    gb = GradientBoostingRegressor(loss='quantile', learning_rate=0.0001, n_estimators=50, max_features='log2', min_samples_split=2, max_depth=1)
    ada_tree_backing = DecisionTreeRegressor(max_features='sqrt', splitter='random', min_samples_split=4, max_depth=3)
    ab = AdaBoostRegressor(ada_tree_backing, learning_rate=0.1, loss='square', n_estimators=1000)

    validate.cross_v_scores([rf, gb, ab], X, y)
    # RandomForestRegressor -- RMLSE: -0.596797712098, R2: 0.0272065373946
    # GradientBoostingRegressor -- RMLSE: -0.996134592541, R2: -2.37202164829
    # AdaBoostRegressor -- RMLSE: -0.706385708459, R2: -0.103966980393
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)
项目:zhihu-machine-learning-challenge-2017    作者:HouJP    | 项目源码 | 文件源码
def load(file_path):
        with open(file_path + '.params', 'r') as params_file:
            params = json.load(params_file)

        weak_learners = list()
        for wl_id in range(params['n_round']):
            # wl = DecisionTreeRegressor(max_depth=params['max_depth'],
            #                            max_features=params['max_features'],
            #                            min_samples_leaf=params['min_samples_leaf'])
            wl = joblib.load(file_path + '.wl%d' % wl_id)
            weak_learners.append(wl)

        rankgbm = RankGBM(params['vote_k'],
                          n_round=params['n_round'],
                          max_depth=params['max_depth'],
                          max_features=params['max_features'],
                          min_samples_leaf=params['min_samples_leaf'],
                          learn_rate=params['learn_rate'])
        rankgbm.weak_learners = weak_learners

        return rankgbm
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_features': ['sqrt', 'log2', None],
                             'max_depth': range(2,1000),
                             }
                            ]


        reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def convert(model, feature_names, target):
    """Convert a decision tree model to protobuf format.

    Parameters
    ----------
    decision_tree : DecisionTreeRegressor
        A trained scikit-learn tree model.

    feature_names: [str]
        Name of the input columns.

    target: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _tree.DecisionTreeRegressor)
    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'tree_') and model.tree_ is not None)
    return _MLModel(_convert_tree_ensemble(model, feature_names, target))
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_tree_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = DecisionTreeRegressor(random_state=1)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, 'multiArrayType', 'doubleType', 1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
                                 type(coreml_model.predict({'data': test_data})['target']))
                self.assertEqual(scikit_model.predict(test_data)[0],
                                 coreml_model.predict({'data': test_data})['target'],
                                 msg="{} != {} for Dtype: {}".format(
                                     scikit_model.predict(test_data)[0],
                                     coreml_model.predict({'data': test_data})['target'],
                                     dtype
                                 )
                                 )
            except RuntimeError:
                print("{} not supported. ".format(dtype))
项目:menrva    作者:amirziai    | 项目源码 | 文件源码
def decision_tree(X, y, regression, max_depth=3):
    from sklearn.tree import export_graphviz
    from sklearn.externals.six import StringIO  
    from IPython.core.pylabtools import figsize
    from IPython.display import Image
    figsize(12.5, 6)
    import pydot

    if regression:
        clf = DecisionTreeRegressor(max_depth=max_depth)
    else:
        clf = DecisionTreeClassifier(max_depth=max_depth)

    clf.fit(X, y)
    dot_data = StringIO()  
    export_graphviz(clf, out_file=dot_data, feature_names=list(X.columns),
                    filled=True, rounded=True,)
    graph = pydot.graph_from_dot_data(dot_data.getvalue())  
    return Image(graph.create_png())
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_DecisionTreeRegressor(*data):
    '''
    test DT regression
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    regr = DecisionTreeRegressor()
    regr.fit(X_train, y_train)
    print("Training score:{0}".format(regr.score(X_train,y_train)))
    print("Testing score:{0}".format(regr.score(X_test,y_test)))
    ##graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    X = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
    Y = regr.predict(X)
    ax.scatter(X_train, y_train, label="train sample",c='g')
    ax.scatter(X_test, y_test, label="test sample",c='r')
    ax.plot(X, Y, label="predict_value", linewidth=2,alpha=0.5)
    ax.set_xlabel("data")
    ax.set_ylabel("target")
    ax.set_title("Decision Tree Regression")
    ax.legend(framealpha=0.5)
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def bench_scikit_tree_regressor(X, Y):
    """Benchmark with scikit-learn decision tree regressor"""

    from sklearn.tree import DecisionTreeRegressor

    gc.collect()

    # start time
    tstart = datetime.now()
    clf = DecisionTreeRegressor()
    clf.fit(X, Y).predict(X)
    delta = (datetime.now() - tstart)
    # stop time

    scikit_regressor_results.append(
        delta.seconds + delta.microseconds / mu_second)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_importances_gini_equal_mse():
    # Check that gini is equivalent to mse for binary output variable

    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    # The gini index and the mean square error (variance) might differ due
    # to numerical instability. Since those instabilities mainly occurs at
    # high tree depth, we restrict this maximal depth.
    clf = DecisionTreeClassifier(criterion="gini", max_depth=5,
                                 random_state=0).fit(X, y)
    reg = DecisionTreeRegressor(criterion="mse", max_depth=5,
                                random_state=0).fit(X, y)

    assert_almost_equal(clf.feature_importances_, reg.feature_importances_)
    assert_array_equal(clf.tree_.feature, reg.tree_.feature)
    assert_array_equal(clf.tree_.children_left, reg.tree_.children_left)
    assert_array_equal(clf.tree_.children_right, reg.tree_.children_right)
    assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_friedman_mse_in_graphviz():
    clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0)
    clf.fit(X, y)
    dot_data = StringIO()
    export_graphviz(clf, out_file=dot_data)

    clf = GradientBoostingClassifier(n_estimators=2, random_state=0)
    clf.fit(X, y)
    for estimator in clf.estimators_:
        export_graphviz(estimator[0], out_file=dot_data)

    for finding in finditer("\[.*?samples.*?\]", dot_data.getvalue()):
        assert_in("friedman_mse", finding.group())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_bootstrap_features():
    # Test that bootstrapping features may generate duplicate features.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=False,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_equal(boston.data.shape[1], np.unique(features).shape[0])

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=True,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_greater(boston.data.shape[1], np.unique(features).shape[0])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=1,
                                random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)
项目:dstk    作者:jotterbach    | 项目源码 | 文件源码
def _get_shape_for_attribute(attribute_data, labels, class_weights, feature_name, criterion, splitter,
                             max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                             max_features, random_state, max_leaf_nodes, presort):

    dtr = DecisionTreeRegressor(criterion=criterion,
                                splitter=splitter,
                                max_depth=max_depth,
                                min_samples_split=min_samples_split,
                                min_samples_leaf=min_samples_leaf,
                                min_weight_fraction_leaf=min_weight_fraction_leaf,
                                max_features=max_features,
                                random_state=random_state,
                                max_leaf_nodes=max_leaf_nodes,
                                presort=presort)

    dtr.fit(attribute_data.reshape(-1, 1), labels)
    return feature_name, _get_sum_of_gamma_correction(dtr.tree_, attribute_data, labels, class_weights, feature_name)
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def create_model(list_of_features):

    n_estimators=10000 
    n_jobs=4 
    x_train=data_frame[list_of_features] 
    y_train=data_frame.iloc[:,-1]
    x_test=data_frame_test[list_of_features] 
    random_state=0

    forest=BaggingRegressor(base_estimator=DecisionTreeRegressor(),n_estimators=n_estimators,random_state=random_state, n_jobs=n_jobs)
    forest.fit(x_train[list_of_features],y_train)
    Y_pred=forest.predict(data_frame_test[list_of_features].as_matrix()) 

    i=0
    file=open('submission.csv','w')
    header="Id,SalePrice"
    header=header+'\n'
    file.write(header)
    for id in (data_frame_test['Id']):
        str="{},{}".format(id,Y_pred[i])
        str=str+'\n'
        file.write(str)
        i+=1
项目:Machine-Learning    作者:Jegathis    | 项目源码 | 文件源码
def RunTestor():
    # Create Volume test
    vXs, vYs = generateBaseVector(volume_filename, "volume")
    vXs_more = createVolumeVector(vXs, weather_filename)
    vXs_fin = generateProcessedVolumeVector(vXs_more)

    vX_train, vX_test, vy_train, vy_test = train_test_split(vXs_fin, vYs, test_size=0.1)
    volume_reg = DecisionTreeRegressor()
    volume_reg.fit(vX_train, vy_train)
    vResult = volume_reg.predict(vX_test)

    v_mape = VolumeMAPE(vX_test, vResult, vy_test)

    # Create Travel Time test
    tXs, tYs = generateBaseVector(travel_filename, "travel_time")
    tXs_more = createTravelTimeVector(tXs, weather_filename)
    tXs_fin = generateProcessedTravelTimeVector(tXs_more)

    tX_train, tX_test, ty_train, ty_test = train_test_split(tXs_fin, tYs, test_size=0.1)
    travelTime_reg = DecisionTreeRegressor()
    travelTime_reg.fit(tX_train, ty_train)
    tResult = travelTime_reg.predict(tX_test)

    t_mape = TravelTimeMAPE(tX_test, tResult, ty_test)

    print("MAPE of Volume Prediction: " + str(v_mape) + "\n")
    print("MAPE of Travel Prediction: " + str(t_mape) + "\n")
    return True
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def setClf(self):
        min_samples_split = 10
        self.clf = DecisionTreeRegressor(random_state=0, min_samples_split= min_samples_split)
        return
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def test_logitboost_musk_fitting():
    c = LogitBoostClassifier(
            base_estimator=DecisionTreeRegressor(max_depth=1),
            n_estimators=30,
            learning_rate=1.0
    )
    data = MUSK1()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.6)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.05
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def test_logitboost_hastie_fitting():
    c = LogitBoostClassifier(
            base_estimator=DecisionTreeRegressor(max_depth=1),
            n_estimators=30,
            learning_rate=1.0
    )
    data = Hastie_10_2()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.5)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.2
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def test_gentleboost_musk_fitting():
    c = GentleBoostClassifier(
        base_estimator=DecisionTreeRegressor(max_depth=1),
        n_estimators=30,
        learning_rate=1.0
    )
    data = MUSK1()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.5)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.1
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def test_gentleboost_hastie_fitting():
    c = GentleBoostClassifier(
        base_estimator=DecisionTreeRegressor(max_depth=1),
        n_estimators=30,
        learning_rate=1.0
    )
    data = Hastie_10_2()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.5)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.2
项目:coremltools    作者:gsabran    | 项目源码 | 文件源码
def convert(model, feature_names, target):
    """Convert a decision tree model to protobuf format.

    Parameters
    ----------
    decision_tree : DecisionTreeRegressor
        A trained scikit-learn tree model.

    feature_names: [str]
        Name of the input columns.

    target: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _tree.DecisionTreeRegressor)
    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'tree_') and model.tree_ is not None)
    return _MLModel(_convert_tree_ensemble(model, feature_names, target))
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def fit(self, X, y, sample_weight=None):
        from sklearn.tree import DecisionTreeRegressor

        self.max_features = float(self.max_features)
        if self.max_depth == "None":
            self.max_depth = None
        else:
            num_features = X.shape[1]
            max_depth = max(1, int(np.round(self.max_depth * num_features, 0)))
        self.min_samples_split = int(self.min_samples_split)
        self.min_samples_leaf = int(self.min_samples_leaf)
        if self.max_leaf_nodes == "None":
            self.max_leaf_nodes = None
        else:
            self.max_leaf_nodes = int(self.max_leaf_nodes)
        self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf)

        self.estimator = DecisionTreeRegressor(
            criterion=self.criterion,
            max_depth=max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            max_leaf_nodes=self.max_leaf_nodes,
            random_state=self.random_state)
        self.estimator.fit(X, y, sample_weight=sample_weight)
        return self
项目:dexml    作者:DexGroves    | 项目源码 | 文件源码
def test_cart_d1_agrees_with_scikit():
    d_cart = GaussCART(X, y, 1)
    d_pred = d_cart.predict(X)

    sk_cart = tree.DecisionTreeRegressor(max_depth=1)
    sk_cart = sk_cart.fit(X, y)
    sk_pred = sk_cart.predict(X)

    d_error = np.round(sose(y, d_pred), 6)
    sk_error = np.round(sose(y, sk_pred), 6)

    assert d_error == sk_error
项目:dexml    作者:DexGroves    | 项目源码 | 文件源码
def test_cart_d3_agrees_with_scikit():
    d_cart = GaussCART(X, y, 3)
    d_pred = d_cart.predict(X)

    sk_cart = tree.DecisionTreeRegressor(max_depth=3)
    sk_cart = sk_cart.fit(X, y)
    sk_pred = sk_cart.predict(X)

    d_error = np.round(sose(y, d_pred), 6)
    sk_error = np.round(sose(y, sk_pred), 6)

    assert d_error == sk_error
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionAdaBoost, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create AdaBoost regression object
        decisionReg = DecisionTreeRegressor(max_depth=10)
        rng = np.random.RandomState(1)
        self.adaReg = AdaBoostRegressor(decisionReg,
                          n_estimators=400,
                          random_state=rng)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionDecisionTree, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create linear regression object
        self.model = DecisionTreeRegressor(max_depth=7, max_features=None)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def drawValidationCurve(self):
        """
        To draw the validation curve
        :return:NA
        """
        X, y = self.X_train, self.y_train.ravel()
        indices = np.arange(y.shape[0])
        #np.random.shuffle(indices)
        X, y = X[indices], y[indices]

        train_sizes = range(2,60)
        train_scores, valid_scores = validation_curve(DecisionTreeRegressor(max_features=None), X, y, "max_depth",
                                              train_sizes, cv=5, scoring='mean_squared_error')
        train_scores = -1.0/5 *train_scores
        valid_scores = -1.0/5 *valid_scores

        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        valid_scores_mean = np.mean(valid_scores, axis=1)
        valid_scores_std = np.std(valid_scores, axis=1)

        plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
        plt.fill_between(train_sizes, valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std, alpha=0.1, color="g")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
                 label="Training MSE")
        plt.plot(train_sizes, valid_scores_mean, '*-', color="g",
                 label="Cross-validation MSE")

        plt.legend(loc="best")

        plt.xlabel('Max Depth')
        plt.ylabel('MSE')
        plt.title('Validation Curve with Decision \nTree Regression on the parameter of Max Depth')
        plt.grid(True)
        plt.show()
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def test_search_cv_results_none_param():
    X, y = [[1], [2], [3], [4], [5]], [0, 0, 0, 0, 1]
    estimators = (DecisionTreeRegressor(), DecisionTreeClassifier())
    est_parameters = {"random_state": [0, None]}
    cv = KFold(random_state=0)

    for est in estimators:
        grid_search = dcv.GridSearchCV(est, est_parameters, cv=cv).fit(X, y)
        assert_array_equal(grid_search.cv_results_['param_random_state'],
                           [0, None])
项目:enhancement    作者:lwzswufe    | 项目源码 | 文件源码
def regress(y, x, test_x=[]):
    if len(test_x) == 0:
        test_x = x
    clf = DecisionTreeRegressor()
    clf.fit(x, y)
    y_p = clf.predict(test_x)
    plt.scatter(y, y_p)
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def ada_boost_tree_grid_search():
    ada_boost_tree_grid = {
        'base_estimator__max_features': ['sqrt'],
        'base_estimator__splitter': ['best', 'random'],
        'base_estimator__min_samples_split': [2, 4],
        'base_estimator__max_depth': [1, 3],
        'n_estimators': [50, 100, 1000],
        'learning_rate': [.001, .01, .1],
        'loss': ['linear', 'square', 'exponential']
    }
    abr = AdaBoostRegressor(DecisionTreeRegressor())

    return ada_boost_tree_grid, abr
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeRegressor

        scikit_data = load_boston()
        scikit_model = DecisionTreeRegressor(random_state = 1)
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_conversion_bad_inputs(self):

        # Error on converting an untrained model
        with self.assertRaises(Exception):
            model = DecisionTreeRegressor()
            spec = skl_converter.convert(model, 'data', 'out')

        # Check the expected class during covnersion.
        from sklearn.preprocessing import OneHotEncoder
        with self.assertRaises(Exception):
            model = OneHotEncoder()
            spec = skl_converter.convert(model, 'data', 'out')
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeRegressor

        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data['data']
        self.target = scikit_data['target']
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        # rng = np.random.RandomState(1)
        clf = AdaBoostRegressor(DecisionTreeRegressor())
        clf.fit(X, Y)
        return clf
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        clf = DecisionTreeRegressor()
        clf.fit(X, Y)
        return clf
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        clf = DecisionTreeRegressor(max_depth=4)
        clf.fit(X, Y)
        return clf
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def test_boston(self):
        from sklearn.tree import DecisionTreeRegressor as DecisionTreeRegressorSklearn
        model = DecisionTreeRegressor(max_n_splits=3)
        model_sklearn = DecisionTreeRegressorSklearn()

        dataset = load_boston()
        mse = []
        mse_sklearn = []

        for fold in range(5):
            X_train, X_test, y_train, y_test = train_test_split(
                dataset.data, dataset.target, test_size=0.33)

            model.fit(X_train, y_train)
            y = model.predict(X_test)
            mse.append(mean_squared_error(y, y_test))

            model_sklearn.fit(X_train, y_train)
            y = model_sklearn.predict(X_test)
            mse_sklearn.append(mean_squared_error(y, y_test))

        mean_mse = np.mean(mse)
        mean_mse_sklearn = np.mean(mse_sklearn)
        print(mean_mse, mean_mse_sklearn)
        # Check that our model differs in MSE no worse than 20%
        self.assertTrue(np.abs(mean_mse - mean_mse_sklearn) / mean_mse_sklearn < 0.2)
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def test_boston(self):
        from sklearn.tree import DecisionTreeRegressor as DecisionTreeRegressorSklearn
        model = DecisionTreeRegressor(tree_type='oblivious', max_n_splits=3)
        model_sklearn = DecisionTreeRegressorSklearn()

        dataset = load_boston()
        mse = []
        mse_sklearn = []

        for fold in range(5):
            X_train, X_test, y_train, y_test = train_test_split(
                dataset.data, dataset.target, test_size=0.33)

            model.fit(X_train, y_train)
            y = model.predict(X_test)
            mse.append(mean_squared_error(y, y_test))

            model_sklearn.fit(X_train, y_train)
            y = model_sklearn.predict(X_test)
            mse_sklearn.append(mean_squared_error(y, y_test))

        mean_mse = np.mean(mse)
        mean_mse_sklearn = np.mean(mse_sklearn)
        print(mean_mse, mean_mse_sklearn)
        # Check that our model differs in MSE no worse than 50%
        self.assertTrue(np.abs(mean_mse - mean_mse_sklearn) / mean_mse_sklearn < 0.5)


    # def test_check_estimators(self):
    #     """
    #     Tests that models adhere to scikit-learn Estimator interface.
    #     """
    #     check_estimator(DecisionTreeClassifier)
项目:FeatureHub    作者:HDI-Project    | 项目源码 | 文件源码
def __init__(self, problem_type):
        self.problem_type = problem_type

        if self._is_classification():
            self.model = DecisionTreeClassifier(random_state=RANDOM_STATE+1)
        elif self._is_regression():
            self.model = DecisionTreeRegressor(random_state=RANDOM_STATE+2)
        else:
            raise NotImplementedError
项目:python-machine-learning-book    作者:jeremyn    | 项目源码 | 文件源码
def evaluate_decision_tree_regression(X, y):
    tree = DecisionTreeRegressor(max_depth=3)
    tree.fit(X, y)

    sort_index = X.flatten().argsort()

    lin_regplot(X[sort_index], y[sort_index], tree)
    plt.xlabel('% lower status of the population [LSTAT]')
    plt.ylabel("Price in $1000's [MEDV]")

    plt.show()
项目:Kaggle_HomeDepot    作者:ChenglongChen    | 项目源码 | 文件源码
def __init__(self, base_estimator=None, n_estimators=50, max_features=1.0,
                max_depth=6, learning_rate=1.0, loss='linear', random_state=None):
        if base_estimator and base_estimator == 'etr':
            base_estimator = ExtraTreeRegressor(max_depth=max_depth,
                                        max_features=max_features)
        else:
            base_estimator = DecisionTreeRegressor(max_depth=max_depth,
                                        max_features=max_features)

        self.model = sklearn.ensemble.AdaBoostRegressor(
                                    base_estimator=base_estimator,
                                    n_estimators=n_estimators,
                                    learning_rate=learning_rate,
                                    random_state=random_state,
                                    loss=loss)
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_DecisionTreeRegressor_splitter(*data):
    '''
    test the performance with different splitters
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    splitters=['best','random']
    for splitter in splitters:
        regr = DecisionTreeRegressor(splitter=splitter)
        regr.fit(X_train, y_train)
        print("Splitter {0}".format(splitter))
        print("Training score:{0}".format(regr.score(X_train,y_train)))
        print("Testing score:{0}".format(regr.score(X_test,y_test)))
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_DecisionTreeRegressor_depth(*data,maxdepth):
    '''
    test the score with different max_depth
    :param data: train_data, test_data, train_value, test_value
    :param maxdepth: an integer
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    depths=np.arange(1,maxdepth)
    training_scores=[]
    testing_scores=[]
    for depth in depths:
        regr = DecisionTreeRegressor(max_depth=depth)
        regr.fit(X_train, y_train)
        training_scores.append(regr.score(X_train,y_train))
        testing_scores.append(regr.score(X_test,y_test))

    ## graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(depths,training_scores,label="traing score")
    ax.plot(depths,testing_scores,label="testing score")
    ax.set_xlabel("maxdepth")
    ax.set_ylabel("score")
    ax.set_title("Decision Tree Regression")
    ax.legend(framealpha=0.5)
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_presort_sparse():
    ests = (DecisionTreeClassifier(presort=True),
            DecisionTreeRegressor(presort=True))
    sparse_matrices = (csr_matrix, csc_matrix, coo_matrix)

    y, X = datasets.make_multilabel_classification(random_state=0,
                                                   n_samples=50,
                                                   n_features=1,
                                                   n_classes=20)
    y = y[:, 0]

    for est, sparse_matrix in product(ests, sparse_matrices):
        yield check_presort_sparse, est, sparse_matrix(X), y
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_oob_score_regression():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                           n_estimators=50,
                           bootstrap=True,
                           oob_score=True,
                           random_state=rng).fit(X_train, y_train)

    test_score = clf.score(X_test, y_test)

    assert_less(abs(test_score - clf.oob_score_), 0.1)

    # Test with few estimators
    assert_warns(UserWarning,
                 BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                  n_estimators=1,
                                  bootstrap=True,
                                  oob_score=True,
                                  random_state=rng).fit,
                 X_train,
                 y_train)