我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.svm.NuSVC()。
def convert(model, feature_names, target): """Convert a Nu-Support Vector Classification (NuSVC) model to the protobuf spec. Parameters ---------- model: NuSVC A trained NuSVC encoder model. feature_names: [str], optional (default=None) Name of the input columns. target: str, optional (default=None) Name of the output column. Returns ------- model_spec: An object of type Model_pb. Protobuf representation of the model """ if not(_HAS_SKLEARN): raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') _sklearn_util.check_expected_type(model, _NuSVC) return _SVC.convert(model, feature_names, target)
def __init__(self, path, etype, **kwargs): super(EnsembleModel, self).__init__(path, etype=etype, **kwargs) self.basedir = "models/ensemble/" self.goldstd = kwargs.get("goldstd") self.data = {} self.offsets = [] self.pipeline = Pipeline( [ #('clf', SGDClassifier(loss='hinge', penalty='l1', alpha=0.0001, n_iter=5, random_state=42)), #('clf', SGDClassifier()) # ('clf', svm.NuSVC(nu=0.01 )) ('clf', RandomForestClassifier(class_weight={False:1, True:1}, n_jobs=-1, criterion="entropy", warm_start=True)) # ('clf', tree.DecisionTreeClassifier(criterion="entropy")), # ('clf', MultinomialNB()) # ('clf', GaussianNB()) #('clf', svm.SVC(kernel="rbf", degree=2, C=1)), #('clf', svm.SVC(kernel="linear", C=2)) #('clf', DummyClassifier(strategy="constant", constant=True)) ])
def __init__(self, corpus, relationtype, modelname="scikit_classifier"): super(ScikitRE, self).__init__() self.modelname = relationtype + "_" + modelname self.relationtype = relationtype self.pairtype = relationtype self.corpus = corpus self.pairs = [] self.features = [] self.labels = [] self.pred = [] self.clusters = word2vec.load_clusters("corpora/Thaliana/documents-processed-clusters.txt") self.posfmeasure = make_scorer(f1_score, average='binary', pos_label=True) self.generate_data(corpus, modelname, relationtype) self.text_clf = Pipeline([('vect', CountVectorizer(analyzer='char_wb', ngram_range=(3,20), min_df=0.0, max_df=0.7)), #('vect', CountVectorizer(ngram_range=(1,3), binary=False, max_features=None)), #('tfidf', TfidfTransformer(use_idf=True, norm="l2")), #('clf', SGDClassifier(loss='hinge', penalty='l1', alpha=0.0001, n_iter=5, random_state=42)), #('clf', SGDClassifier()) #('clf', svm.NuSVC(nu=0.01 )) #('clf', RandomForestClassifier(class_weight={False:1, True:2}, n_jobs=-1)) ('clf', MultinomialNB(alpha=0.01, fit_prior=False)) #('clf', DummyClassifier(strategy="constant", constant=True)) ])
def _train(self, X_matrix, y, **kwargs): """???? Parameters: X_matrix (numpy.array): - ???????????? y (numpy.array): - ??????????? Returns: sklearn.model: - sklearn??? """ from sklearn.svm import NuSVC model = NuSVC(**kwargs) model.fit(X_matrix, y) return model
def test_probability(): # Predict probabilities using SVC # This uses cross validation, so we use a slightly bigger testing set. for clf in (svm.SVC(probability=True, random_state=0, C=1.0), svm.NuSVC(probability=True, random_state=0)): clf.fit(iris.data, iris.target) prob_predict = clf.predict_proba(iris.data) assert_array_almost_equal( np.sum(prob_predict, 1), np.ones(iris.data.shape[0])) assert_true(np.mean(np.argmax(prob_predict, 1) == clf.predict(iris.data)) > 0.9) assert_almost_equal(clf.predict_proba(iris.data), np.exp(clf.predict_log_proba(iris.data)), 8)
def test_conversion_bad_inputs(self): from sklearn.preprocessing import OneHotEncoder # Error on converting an untrained model with self.assertRaises(TypeError): model = NuSVC() spec = scikit_converter.convert(model, 'data', 'out') # Check the expected class during conversion with self.assertRaises(TypeError): model = OneHotEncoder() spec = scikit_converter.convert(model, 'data', 'out')
def generate_base_classification(): from sklearn.svm import LinearSVC, NuSVC, SVC from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB models = [ #(LinearSVC, params('C', 'loss')), # (NuSVC, params('nu', 'kernel', 'degree')), #(SVC, params('C', 'kernel')), #(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')), (DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')), (RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')), #(GaussianProcessClassifier, None), (LogisticRegression, params('C', 'penalty')), #(PassiveAggressiveClassifier, params('C', 'loss')), #(RidgeClassifier, params('alpha')), # we do in-place modification of what the method params return in order to add # more loss functions that weren't defined in the method #(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])), (KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({ 'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree'] })), (MultinomialNB, params('alpha')), #(GaussianNB, None), #(BernoulliNB, params('alpha')) ] return models
def __init__(self, classifier): assert isinstance(classifier, (SVC, LinearSVC, NuSVC)), \ "Classifier must be a sklearn's SVM classifier (SVC, LinearSVC, NuSVC)." self.clf = classifier self.model = None
def test_bad_input(): # Test that it gives proper exception on deficient input # impossible value of C assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) # impossible value of nu clf = svm.NuSVC(nu=0.0) assert_raises(ValueError, clf.fit, X, Y) Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X, Y2) # Test with arrays that are non-contiguous. for clf in (svm.SVC(), svm.LinearSVC(random_state=0)): Xf = np.asfortranarray(X) assert_false(Xf.flags['C_CONTIGUOUS']) yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T) yf = yf[:, -1] assert_false(yf.flags['F_CONTIGUOUS']) assert_false(yf.flags['C_CONTIGUOUS']) clf.fit(Xf, yf) assert_array_equal(clf.predict(T), true_result) # error for precomputed kernelsx clf = svm.SVC(kernel='precomputed') assert_raises(ValueError, clf.fit, X, Y) # sample_weight bad dimensions clf = svm.SVC() assert_raises(ValueError, clf.fit, X, Y, sample_weight=range(len(X) - 1)) # predict with sparse input when trained with dense clf = svm.SVC().fit(X, Y) assert_raises(ValueError, clf.predict, sparse.lil_matrix(X)) Xt = np.array(X).T clf.fit(np.dot(X, Xt), Y) assert_raises(ValueError, clf.predict, X) clf = svm.SVC() clf.fit(X, Y) assert_raises(ValueError, clf.predict, Xt)
def test_immutable_coef_property(): # Check that primal coef modification are not silently ignored svms = [ svm.SVC(kernel='linear').fit(iris.data, iris.target), svm.NuSVC(kernel='linear').fit(iris.data, iris.target), svm.SVR(kernel='linear').fit(iris.data, iris.target), svm.NuSVR(kernel='linear').fit(iris.data, iris.target), svm.OneClassSVM(kernel='linear').fit(iris.data), ] for clf in svms: assert_raises(AttributeError, clf.__setattr__, 'coef_', np.arange(3)) assert_raises((RuntimeError, ValueError), clf.coef_.__setitem__, (0, 0), 0)
def test_decision_function_shape_two_class(): for n_classes in [2, 3]: X, y = make_blobs(centers=n_classes, random_state=0) for estimator in [svm.SVC, svm.NuSVC]: clf = OneVsRestClassifier(estimator( decision_function_shape="ovr")).fit(X, y) assert_equal(len(clf.predict(X)), len(y))
def test_error(): # Test that it gives proper exception on deficient input # impossible value of C assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) # impossible value of nu clf = svm.NuSVC(nu=0.0) assert_raises(ValueError, clf.fit, X_sp, Y) Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X_sp, Y2) clf = svm.SVC() clf.fit(X_sp, Y) assert_array_equal(clf.predict(T), true_result)
def _evaluation_test_helper(self, class_labels, use_probability_estimates, allow_slow, allowed_prob_delta=0.00001): # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'nu': 0.75}, {'nu': 0.25, 'shrinking': True}, {'shrinking': False}] # Generate some random data x, y = [], [] random.seed(42) for _ in range(50): x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)]) y.append(random.choice(class_labels)) column_names = ['x1', 'x2', 'x3'] # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(class_labels): y[i] = val df = pd.DataFrame(x, columns=column_names) # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_params['probability'] = use_probability_estimates cur_params['max_iter'] = 10 # Don't want test to take too long # print("cur_params=" + str(cur_params)) cur_model = NuSVC(**cur_params) cur_model.fit(x, y) spec = scikit_converter.convert(cur_model, column_names, 'target') if use_probability_estimates: probability_lists = cur_model.predict_proba(x) df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists] metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability') self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], allowed_prob_delta) else: df['prediction'] = cur_model.predict(x) metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0) if not allow_slow: break if not allow_slow: break
def trainModel(xtrain, xtest, ytrain, ytest): classifiers = [ # KNeighborsClassifier(3), # SVC(kernel="linear", probability=True), # NuSVC(probability=True), # DecisionTreeClassifier(), RandomForestClassifier(), # AdaBoostClassifier(), # GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=200, # subsample=1.0, criterion='friedman_mse', min_samples_split=2, # min_samples_leaf=1, min_weight_fraction_leaf=0., # max_depth=5), # GradientBoostingClassifier(), # GaussianNB(), # LinearDiscriminantAnalysis(), # QuadraticDiscriminantAnalysis() ] log_cols = ["Classifier", "Accuracy", "Log Loss"] log = pd.DataFrame(columns=log_cols) for clf in classifiers: clf.fit(xtrain, ytrain) name = clf.__class__.__name__ print("=" * 30) print(name) print('****Results****') train_predictions = clf.predict(xtest) # acc = accuracy_score(ytest, train_predictions) # print("Accuracy: {:.4%}".format(acc)) train_porb_predictions = clf.predict_proba(xtest) ll = log_loss(ytest, train_porb_predictions) print("Log Loss: {}".format(ll)) # printResult(ytest, train_predictions) # result.printMultiResult(ytest, train_predictions) save_path = "doc/result.txt" desc = "sentiment by tfidf " result_str = result.printMultiResult(ytest, train_predictions) result.saveResult(save_path, desc, result_str) # # log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols) # log = log.append(log_entry) print("=" * 30)