我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.model_selection()。
def _cv_len(cv, X, y): """This method computes the length of a cross validation object, agnostic of whether sklearn-0.17 or sklearn-0.18 is being used. Parameters ---------- cv : `sklearn.cross_validation._PartitionIterator` or `sklearn.model_selection.BaseCrossValidator` The cv object from which to extract length. If using sklearn-0.17, this can be computed by calling `len` on ``cv``, else it's computed with `cv.get_n_splits(X, y)`. X : pd.DataFrame or np.ndarray, shape(n_samples, n_features) The dataframe or np.ndarray being fit in the grid search. y : np.ndarray, shape(n_samples,) The target being fit in the grid search. Returns ------- int """ return len(cv) if not SK18 else cv.get_n_splits(X, y)
def _set_cv(cv, X, y, classifier): """This method returns either a `sklearn.cross_validation._PartitionIterator` or `sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17 or sklearn-0.18 is being used. Parameters ---------- cv : int, `_PartitionIterator` or `BaseCrossValidator` The CV object or int to check. If an int, will be converted into the appropriate class of crossvalidator. X : pd.DataFrame or np.ndarray, shape(n_samples, n_features) The dataframe or np.ndarray being fit in the grid search. y : np.ndarray, shape(n_samples,) The target being fit in the grid search. classifier : bool Whether the estimator being fit is a classifier Returns ------- `_PartitionIterator` or `BaseCrossValidator` """ return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)
def _cross_val(data, est, cv, n_jobs): """Helper to compute cross validation.""" try: from sklearn.model_selection import cross_val_score except ImportError: # XXX support sklearn < 0.18 from sklearn.cross_validation import cross_val_score return np.mean(cross_val_score(est, data, cv=cv, n_jobs=n_jobs, scoring=_gaussian_loglik_scorer))
def __init__(self, clf=None, le=None): # type: (sklearn.model_selection.GridSearchCV, sklearn.preprocessing.LabelEncoder) -> None """Construct a new intent classifier using the sklearn framework.""" from sklearn.preprocessing import LabelEncoder if le is not None: self.le = le else: self.le = LabelEncoder() self.clf = clf
def train(self, training_data, config, **kwargs): # type: (TrainingData, RasaNLUConfig, **Any) -> None """Train the intent classifier on a data set. :param num_threads: number of threads used during training time""" from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC import numpy as np labels = [e.get("intent") for e in training_data.intent_examples] if len(set(labels)) < 2: logger.warn("Can not train an intent classifier. Need at least 2 different classes. " + "Skipping training of intent classifier.") else: y = self.transform_labels_str2num(labels) X = np.stack([example.get("text_features") for example in training_data.intent_examples]) sklearn_config = config.get("intent_classifier_sklearn") C = sklearn_config.get("C", [1, 2, 5, 10, 20, 100]) kernel = sklearn_config.get("kernel", "linear") # dirty str fix because sklearn is expecting str not instance of basestr... tuned_parameters = [{"C": C, "kernel": [str(kernel)]}] cv_splits = max(2, min(MAX_CV_FOLDS, np.min(np.bincount(y)) // 5)) # aim for 5 examples in each fold self.clf = GridSearchCV(SVC(C=1, probability=True, class_weight='balanced'), param_grid=tuned_parameters, n_jobs=config["num_threads"], cv=cv_splits, scoring='f1_weighted', verbose=1) self.clf.fit(X, y)
def _set_cv(cv, estimator=None, X=None, y=None): """Set the default CV depending on whether clf is classifier/regressor.""" # Detect whether classification or regression if estimator in ['classifier', 'regressor']: est_is_classifier = estimator == 'classifier' else: est_is_classifier = is_classifier(estimator) # Setup CV if check_version('sklearn', '0.18'): from sklearn import model_selection as models from sklearn.model_selection import (check_cv, StratifiedKFold, KFold) if isinstance(cv, (int, np.int)): XFold = StratifiedKFold if est_is_classifier else KFold cv = XFold(n_splits=cv) elif isinstance(cv, str): if not hasattr(models, cv): raise ValueError('Unknown cross-validation') cv = getattr(models, cv) cv = cv() cv = check_cv(cv=cv, y=y, classifier=est_is_classifier) else: from sklearn import cross_validation as models from sklearn.cross_validation import (check_cv, StratifiedKFold, KFold) if isinstance(cv, (int, np.int)): if est_is_classifier: cv = StratifiedKFold(y=y, n_folds=cv) else: cv = KFold(n=len(y), n_folds=cv) elif isinstance(cv, str): if not hasattr(models, cv): raise ValueError('Unknown cross-validation') cv = getattr(models, cv) if cv.__name__ not in ['KFold', 'LeaveOneOut']: raise NotImplementedError('CV cannot be defined with str' ' for sklearn < .017.') cv = cv(len(y)) cv = check_cv(cv=cv, X=X, y=y, classifier=est_is_classifier) # Extract train and test set to retrieve them at predict time if hasattr(cv, 'split'): cv_splits = [(train, test) for train, test in cv.split(X=np.zeros_like(y), y=y)] else: # XXX support sklearn.cross_validation cv cv_splits = [(train, test) for train, test in cv] if not np.all([len(train) for train, _ in cv_splits]): raise ValueError('Some folds do not have any train epochs.') return cv, cv_splits