def __init__(self, datasets, pipelines): """init""" # check dataset if not isinstance(datasets, list): if isinstance(datasets, BaseDataset): datasets = [datasets] else: raise(ValueError("datasets must be a list or a dataset instance")) for dataset in datasets: if not(isinstance(dataset, BaseDataset)): raise(ValueError("datasets must only contains dataset instance")) self.datasets = datasets # check pipelines if not isinstance(pipelines, dict): raise(ValueError("pipelines must be a dict or a Pipeline instance")) for name, pipeline in pipelines.items(): if not(isinstance(pipeline, BaseEstimator)): raise(ValueError("pipelines must only contains Pipelines instance")) self.pipelines = pipelines
def is_estimator(model): """ Determines if a model is an estimator using issubclass and isinstance. Parameters ---------- estimator : class or instance The object to test if it is a Scikit-Learn clusterer, especially a Scikit-Learn estimator or Yellowbrick visualizer """ if inspect.isclass(model): return issubclass(model, BaseEstimator) return isinstance(model, BaseEstimator) # Alias for closer name to isinstance and issubclass
def test_subclass(self): """ Assert the feature visualizer is in its rightful place """ visualizer = FeatureVisualizer() self.assertIsInstance(visualizer, TransformerMixin) self.assertIsInstance(visualizer, BaseEstimator) self.assertIsInstance(visualizer, Visualizer) # def test_interface(self): # """ # Test the feature visualizer interface # """ # # visualizer = FeatureVisualizer() # with self.assertRaises(NotImplementedError): # visualizer.poof()
def test_subclass(self): """ Assert the text visualizer is subclassed correctly """ visualizer = TextVisualizer() self.assertIsInstance(visualizer, TransformerMixin) self.assertIsInstance(visualizer, BaseEstimator) self.assertIsInstance(visualizer, Visualizer) # def test_interface(self): # """ # Test the feature visualizer interface # """ # # visualizer = TextVisualizer() # with self.assertRaises(NotImplementedError): # visualizer.poof()
def test_sample_weight_adaboost_regressor(): """ AdaBoostRegressor should work without sample_weights in the base estimator The random weighted sampling is done internally in the _boost method in AdaBoostRegressor. """ class DummyEstimator(BaseEstimator): def fit(self, X, y): pass def predict(self, X): return np.zeros(X.shape[0]) boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3) boost.fit(X, y_regr) assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
def __init__(self, task: Task, models: List[BaseEstimator]): super().__init__(task) self.models = models
def __init__(self, model: BaseEstimator, task: Task, space: Space, scorer: Scorer, opt_logger: OptimizationLogger): self.model = model self.task = task self.space = space self.scorer = scorer self.opt_logger = opt_logger self.best = None
def _check_sklearn_model(model): if not (isinstance(model, BaseEstimator) and isinstance(model, RegressorMixin)): raise RuntimeError('Needs to supply an instance of a scikit-learn ' 'compatible regression class.')
def normalize_estimator(est): """Normalize an estimator. Note: Since scikit-learn requires duck-typing, but not sub-typing from ``BaseEstimator``, we sometimes need to call this function directly.""" return type(est).__name__, normalize_token(est.get_params())
def predict_regression(x_test, trained_estimator): """ Given feature data and a trained estimator, return a regression prediction Args: x_test: trained_estimator (sklearn.base.BaseEstimator): a trained scikit-learn estimator Returns: a prediction """ validate_estimator(trained_estimator) prediction = trained_estimator.predict(x_test) return prediction
def predict_classification(x_test, trained_estimator): """ Given feature data and a trained estimator, return a classification prediction Args: x_test: trained_estimator (sklearn.base.BaseEstimator): a trained scikit-learn estimator Returns: a prediction """ validate_estimator(trained_estimator) prediction = np.squeeze(trained_estimator.predict_proba(x_test)[:, 1]) return prediction
def validate_estimator(possible_estimator): """ Given an object, raise an error if it is not a scikit-learn BaseEstimator Args: possible_estimator (object): Object of any type. Returns: True or raises error - the True is used only for testing """ if not issubclass(type(possible_estimator), BaseEstimator): raise HealthcareAIError( 'Predictions require an estimator. You passed in {}, which is of type: {}'.format(possible_estimator, type(possible_estimator))) return True
def default(self, obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.dtype): return str(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.bool_): return bool(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, BaseEstimator): # handle sklearn estimators return Configuration(obj.__class__.__name__, 0, obj.get_params()) elif isinstance(obj, Configuration): if "version" in obj.params or "name" in obj.params: raise ValueError() out = OrderedDict() out["name"] = obj.name if obj.version != 0: out["version"] = obj.version out.update(obj.params) return out elif isinstance(obj, Configurable): return obj.get_config() elif isinstance(obj, set): return sorted(obj) # Ensure deterministic order else: try: return super().default(obj) except TypeError: return str(obj)
def setclassifier(self, estimator=KNeighborsClassifier(n_neighbors=10)): """Assign classifier for which decision boundary should be plotted. Parameters ---------- estimator : BaseEstimator instance, optional (default=KNeighborsClassifier(n_neighbors=10)). Classifier for which the decision boundary should be plotted. Must have probability estimates enabled (i.e. estimator.predict_proba must work). Make sure it is possible for probability estimates to get close to 0.5 (more specifically, as close as specified by acceptance_threshold). """ self.classifier = estimator
def _generate_bases_test(est, pd_est): def test(self): self.assertTrue(isinstance(pd_est, FrameMixin), pd_est) self.assertFalse(isinstance(est, FrameMixin)) self.assertTrue(isinstance(pd_est, base.BaseEstimator)) try: mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.DensityMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] except: if _sklearn_ver > 17: raise mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] for mixin in mixins: self.assertEqual( isinstance(pd_est, mixin), isinstance(est, mixin), mixin) return test
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" assert_raises_regex(TypeError, msg, check_estimator, object) # check that we have a fit method msg = "object has no attribute 'fit'" assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) # check that fit does input validation msg = "TypeError not raised by fit" assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier) # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator " + name + " doesn't seem to fail gracefully on sparse data" # the check for sparse input handling prints to the stdout, # instead of raising an error, so as not to remove the original traceback. # that means we need to jump through some hoops to catch it. old_stdout = sys.stdout string_buffer = StringIO() sys.stdout = string_buffer try: check_estimator(NoSparseClassifier) except: pass finally: sys.stdout = old_stdout assert_true(msg in string_buffer.getvalue()) # doesn't error on actual estimator check_estimator(AdaBoostClassifier) check_estimator(MultiTaskElasticNet)
def get_attributes(obj): if isinstance(obj, TfidfVectorizer): return get_tfidf_attributes(obj) elif isinstance(obj, XGBClassifier): return pickle.dumps(obj) elif isinstance(obj, BaseEstimator): return {attr: getattr(obj, attr) for attr in dir(obj) if not attr.startswith('_') and attr.endswith('_') and attr not in skip_attributes} elif obj is not None: raise TypeError(type(obj))
def set_attributes(parent, field, attributes): obj = getattr(parent, field) if isinstance(obj, TfidfVectorizer): set_ifidf_attributes(obj, attributes) elif isinstance(obj, XGBClassifier): setattr(parent, field, pickle.loads(attributes)) elif isinstance(obj, BaseEstimator): for k, v in attributes.items(): try: setattr(obj, k, v) except AttributeError: raise AttributeError( 'can\'t set attribute {} on {}'.format(k, obj)) elif obj is not None: raise TypeError(type(obj))
def wrap(func): return FuncWrapper(func) # BaseEstimator figures out what our params are based on the signature # of init, so we have to list them all here (though in this case it's # just the funciton we're wrapping)
def fit(self, X, y=None, **params): # have to load dataset here, not in init, to # work with BaseEstimator cloning self.tsList_ = loadDatasets(self.datasetName, seed=self.seed, whichExamples=self.whichExamples, instancesPerTs=self.instancesPerTs, minNumInstances=self.minNumInstances, maxNumInstances=self.maxNumInstances, cropDataLength=self.cropDataLength) return self
def run(self): from distutils.dir_util import copy_tree import sklearn from sklearn import base from jinja2 import Template class_template = Template( open(os.path.join('docs/source/api_class.rst.jinja2')).read()) sklearn_modules = {} for mod_name in sklearn.__all__: if mod_name.startswith('_'): continue try: orig = __import__('sklearn.%s' % mod_name, fromlist=['']) except: for _ in range(20): print('failed to import %s' % orig) # Tmp Ami continue sklearn_modules[mod_name] = [] for name in dir(orig): c = getattr(orig, name) try: if not issubclass(c, base.BaseEstimator): continue except TypeError: continue sklearn_modules[mod_name].append('ibex.sklearn.%s.%s' % (mod_name, name)) content = class_template.render( class_name=name, full_class_name='ibex.sklearn.%s.%s' % (mod_name, name)) f_name = 'docs/source/api_ibex_sklearn_%s_%s.rst' % (mod_name, name.lower()) open(f_name, 'w').write(content) class_template = Template( open(os.path.join('docs/source/api.rst.jinja2')).read()) content = class_template.render( sklearn_modules=sklearn_modules) f_name = 'docs/source/api.rst' open(f_name, 'w').write(content) run_str = 'make text' subprocess.call(run_str.split(' '), cwd='docs') run_str = 'make html' if not self.reduced_checks: run_str += ' spelling lint linkcheck' subprocess.check_call(run_str.split(' '), cwd='docs')