我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.naive_bayes.GaussianNB()。
def get_feature_importance(self,clf, model_name ): clfs = {'RandomForestClassifier':'feature_importances', 'ExtraTreesClassifier': 'feature_importances', 'AdaBoostClassifier': 'feature_importances', 'LogisticRegression': 'coef', 'svm.SVC': 'coef', 'GradientBoostingClassifier': 'feature_importances', 'GaussianNB': None, 'DecisionTreeClassifier': 'feature_importances', 'SGDClassifier': 'coef', 'KNeighborsClassifier': None, 'linear.SVC': 'coef'} if clfs[model_name] == 'feature_importances': return list(clf.feature_importances_) elif clfs[model_name] == 'coef': return list(clf.coef_.tolist()) else: return None
def get_classifier_class(class_name): name_table = { 'svm': SVC, 'k_neighbors': KNeighborsClassifier, 'gaussian_process': GaussianProcessClassifier, 'decision_tree': DecisionTreeClassifier, 'random_forest': RandomForestClassifier, 'ada_boost': AdaBoostClassifier, 'mlp': MLPClassifier, 'gaussian_naive_bayes': GaussianNB, 'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis } if class_name not in name_table: raise ValueError('No such classifier') return name_table[class_name]
def __create_classifiers(self): classifiers = list() classifiers.append({"func": linear_model.SGDClassifier(loss="log"), "name": "sgd"}) classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'), "name": "knn1"}) classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'), "name": "knn3"}) classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'), "name": "knn5"}) classifiers.append({"func": GaussianNB(), "name": "naive_bayes"}) # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"}) # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"}) # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"}) return classifiers
def define_model(self, model, parameters, n_cores = 0): clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7), 'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'), 'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200), 'LogisticRegression': LogisticRegression(penalty='l1', C=1e5), 'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0), 'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10), 'GaussianNB': GaussianNB(), 'DecisionTreeClassifier': DecisionTreeClassifier(), 'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7), 'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 'linear.SVC': svm.LinearSVC() } if model not in clfs: raise ConfigError("Unsupported model {}".format(model)) clf = clfs[model] clf.set_params(**parameters) return clf
def build_models_DOC(train_pos_vec, train_neg_vec): """ Returns a GaussianNB and LosticRegression Model that are fit to the training data. """ Y = ["pos"]*len(train_pos_vec) + ["neg"]*len(train_neg_vec) # Use sklearn's GaussianNB and LogisticRegression functions to fit two models to the training data. # For LogisticRegression, pass no parameters train_vec = [] train_vec.extend(train_pos_vec) train_vec.extend(train_neg_vec) nb_model = GaussianNB() nb_model.fit(train_vec, Y) lr_model = LogisticRegression() lr_model.fit(train_vec, Y) return nb_model, lr_model
def learns(tests,trains,indep=lambda x: x[:-1], dep = lambda x: x[-1], rf = Abcd(), lg = Abcd(), dt = Abcd(), nb = Abcd()): x1,y1,x2,y2= trainTest(tests,trains,indep,dep) forest = RandomForestClassifier(n_estimators = 50) forest = forest.fit(x1,y1) for n,got in enumerate(forest.predict(x2)): rf(predicted = got, actual = y2[n]) logreg = linear_model.LogisticRegression(C=1e5) logreg.fit(x1, y1) for n,got in enumerate(logreg.predict(x2)): lg(predicted = got, actual = y2[n]) bayes = GaussianNB() bayes.fit(x1,y1) for n,got in enumerate(bayes.predict(x2)): nb(predicted = got, actual = y2[n]) dectree = DecisionTreeClassifier(criterion="entropy", random_state=1) dectree.fit(x1,y1) for n,got in enumerate(dectree.predict(x2)): dt(predicted = got, actual = y2[n])
def main(): args = get_args() # load and split data dataset, target = load_dataset(args.file) train_x, train_y, test_x, actual = split_dataset( dataset, target, args.split) print("Training set size: %d, Testing set size: %d" % (len(train_x), len(test_x))) # prepare model summaries = summarize_by_class(train_x, train_y) # test model predictions = get_predictions(summaries, test_x) display(actual, predictions) # using scikit gnb = GaussianNB() y_pred = gnb.fit(train_x, train_y).predict(test_x) display(actual, y_pred)
def classification_gaussian_nb(self): self.signals.PrintInfo.emit("Gaussian NB") output_dir = self.output_dir + 'gaussian_nb_out/' if not os.path.exists(output_dir): os.makedirs(output_dir) vectorizer = HashingVectorizer() fdata = vectorizer.fit_transform(self.fdata) trainingSet = fdata[:self.split] testSet = fdata[self.split:] classificator = GaussianNB() classificator.fit(trainingSet.toarray(), self.trainingClass) results = classificator.predict(testSet.toarray()) proba = classificator.predict_proba(testSet.toarray()) self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_,self.test_filenames) out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames) self.signals.PrintInfo.emit(out_text)
def train(): with open('./bin/train.bin', 'rb') as f: ds = pickle.load(f) XTrain, yTrain = ds['X'], ds['y'] del ds with open('./bin/validation.bin', 'rb') as f: ds = pickle.load(f) XValidation, yValidation = ds['X'], ds['y'] del ds clf = GaussianNB() clf.fit(XTrain, yTrain) print "Training Set Length:", XTrain.shape print "Test Set Length:", XValidation.shape print "Test Scores:", clf.score(XValidation, yValidation) with open('./bin/gnbClf.bin', 'wb') as f: pickle.dump(clf, f) print "[SUCCESS] Saved classifier as `gnbClf.bin`"
def Fit(self, bags, bagData): self.Bayes, self.GBayes = [], [] for i in xrange(10): bnb = BernoulliNB() gnb = GaussianNB() x, y, xg = [], [], [] for j in xrange(10): if i != j: for vv in xrange(len(bagData[j][0])): x.append(self.Convert(bagData[j][0][vv])) xg.append(self.ConvertGauss(bagData[j][0][vv])) y.extend(bagData[j][1]) bnb.fit(x, y) gnb.fit(xg, y) self.Bayes.append(bnb) self.GBayes.append(gnb)
def test_GaussianNB(*data): ''' Test Gaussian NB :param data: train_data, test_data, train_value, test_value :return: None ''' X_train,X_test,y_train,y_test=data cls=naive_bayes.GaussianNB() cls.fit(X_train,y_train) print('Training Score: {0}' .format( cls.score(X_train,y_train))) print('Testing Score: {0}' .format( cls.score(X_test, y_test)))
def GaussianNBPredictModel(localTrainLabel, config): train = pd.read_csv('../feature/trainQlist.csv', header = 0, sep = ",") test = pd.read_csv('../feature/testQlist.csv', header = 0, sep = ",") print "Train tf-idf vector Model..." encode = TfidfVectorizer(decode_error = 'ignore', norm = "l2", binary = False, sublinear_tf = True, min_df = 50) localTrainFeature = encode.fit_transform(train['qlist'].values) localTestFeature = encode.transform(train['qlist'].values) print localTrainFeature.shape, localTestFeature.shape print 'train...' model = GaussianNB() model.fit(X = localTrainFeature.toarray(), y = localTrainLabel) print 'predict...' if config['prob'] == False: return model.predict(localTestFeature.toarray()), test['uid'].values else: return model.predict_log_proba(localTestFeature.toarray()), test['uid'].values #-- Multinomial Navie Bayes corss validation model frame
def test_discretenb_pickle(): # Test picklability of discrete naive Bayes classifiers for cls in [BernoulliNB, MultinomialNB, GaussianNB]: clf = cls().fit(X2, y2) y_pred = clf.predict(X2) store = BytesIO() pickle.dump(clf, store) clf = pickle.load(BytesIO(store.getvalue())) assert_array_equal(y_pred, clf.predict(X2)) if cls is not GaussianNB: # TODO re-enable me when partial_fit is implemented for GaussianNB # Test pickling of estimator trained with partial_fit clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2)) clf2.partial_fit(X2[3:], y2[3:]) store = BytesIO() pickle.dump(clf2, store) clf2 = pickle.load(BytesIO(store.getvalue())) assert_array_equal(y_pred, clf2.predict(X2))
def train_classifier_listing(self): self.clfListing = GaussianNB() files = self.b2s.ls('data/training') X = np.zeros((len(files), self.numFeat)) Y = np.zeros(len(files)) for i, file in enumerate(files): f = file['fileName'] # read json into feature vector if not f.endswith('.json'): continue textJson = self.b2s.download(f) listing = json.loads(textJson) X[i] = self.bundle_json_obj(listing) Y[i] = max(int(listing['price'] / 50), 10) self.clfListing.fit(X, Y) temp = tempfile.NamedTemporaryFile() joblib.dump(self.clfListing, temp.name) self.b2s.upload('classifiers/nb_listing.pkl', temp.read(), 'application/octet-stream') return self.clfListing.score(X, Y) # train a classifier on description
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None): """Description of compare compare multiple classifier and display the best one """ utils.print_success("Comparison of differents classifiers") if data is not None: train_features = data["train_features"] train_groundtruths = data["train_groundtruths"] test_features = data["test_features"] test_groundtruths = data["test_groundtruths"] else: train = utils.abs_path_file(train) test = utils.abs_path_file(test) train_features, train_groundtruths = read_file(train) test_features, test_groundtruths = read_file(test) if not utils.create_dir(res_dir): res_dir = utils.abs_path_dir(res_dir) classifiers = { "RandomForest": RandomForestClassifier(n_jobs=-1) # "RandomForest": RandomForestClassifier(n_estimators=5), # "KNeighbors":KNeighborsClassifier(3), # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), # "DecisionTree":DecisionTreeClassifier(max_depth=5), # "MLP":MLPClassifier(), # "AdaBoost":AdaBoostClassifier(), # "GaussianNB":GaussianNB(), # "QDA":QuadraticDiscriminantAnalysis(), # "SVM":SVC(kernel="linear", C=0.025), # "GradientBoosting":GradientBoostingClassifier(), # "ExtraTrees":ExtraTreesClassifier(), # "LogisticRegression":LogisticRegression(), # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis() } for key in classifiers: utils.print_success(key) clf = classifiers[key] utils.print_info("\tFit") clf.fit(train_features, train_groundtruths) utils.print_info("\tPredict") predictions = clf.predict(test_features) return predictions
def getEstimator(es): estimator = None algo = es.ml_algorithm.upper() if algo == 'NAIVEBAYESGAUSSIAN': estimator = naive_bayes.GaussianNB() elif algo == 'SVM': estimator = svm.SVC(kernel=es.svmKernel, degree = 3, C = 0.1, random_state=es.random_seed) elif algo == 'RF': estimator = RandomForestClassifier(n_estimators=100, random_state=es.random_seed) elif algo == 'DECISIONTREE': estimator = DecisionTreeClassifier(random_state=es.random_seed) elif algo == 'RANDOM': estimator = DummyClassifier(random_state=es.random_seed) else: print("Please enter correct estimator (NaiveBayesGaussian/SVM/RF/DecisionTree)") #TODO: add regression? return estimator
def test_smoke(): a = nb.GaussianNB() b = nb_.GaussianNB() a.fit(X, y) b.fit(X.compute(), y.compute()) assert_eq(a.class_prior_.compute(), b.class_prior_) assert_eq(a.class_count_.compute(), b.class_count_) assert_eq(a.theta_.compute(), b.theta_) assert_eq(a.sigma_.compute(), b.sigma_) assert_eq(a.predict_proba(X).compute(), b.predict_proba(X_)) assert_eq(a.predict(X).compute(), b.predict(X_)) assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))
def build_naive_bayes_model(x_train, y_train): nb_model = GaussianNB() nb_model.fit(x_train,y_train.ravel()) return nb_model
def createPipeline(self): self.pipeline = Pipeline([ ('scaler', StandardScaler()), ('model', naive_bayes.GaussianNB())])
def naive_bayes_predict(training_samples, training_labels, test_samples, test_lables): from sklearn.naive_bayes import GaussianNB clf = GaussianNB() t0 = time() clf.fit(training_samples,training_labels) training_time = round(time()-t0, 3) t0 = time() pred = clf.predict(test_samples) test_time = round(time()-t0, 3) from sklearn.metrics import accuracy_score acc = accuracy_score(pred,test_lables) no_features = np.array(training_samples).shape[1] training_samples = np.array(training_samples).shape[0] test_samples = np.array(test_samples).shape[0] with open("Temp\\results.txt","w") as outfile: outfile.write("Alogirthm : {}\n".format("Naive Bayes")) outfile.write("No of features : {}\n".format(no_features)) outfile.write("No of training samples : {}\n".format(training_samples)) outfile.write("No of test samples : {}\n".format(test_samples)) outfile.write("Training time : {}\n".format(training_time)) outfile.write("Test time : {}\n".format(test_time)) outfile.write("Accuracy : {}\n".format(acc)) with open("Temp\\result_labels.csv","wb") as outfile: np.savetxt(outfile,pred)
def main(): #before_release movie_info_before_release = load_movie_info_before_release() print '***Before release***' X = create_input(movie_info_before_release) Y = create_output_before_release(movie_info_before_release) clf = linear_model.SGDClassifier(loss='log') test_classifier(clf, X, Y, 'before_release') clf = GaussianNB() test_classifier(clf, X, Y, 'before_release') clf = RandomForestClassifier(n_estimators=10, max_depth=10) test_classifier(clf, X, Y, 'before_release') #After release movie_info = load_movie_info() print '***After release***' X = create_input(movie_info) Y = create_output(movie_info) clf = linear_model.SGDClassifier(loss='log') test_classifier(clf, X, Y, 'after_release') clf = GaussianNB() test_classifier(clf, X, Y, 'after_release') clf = RandomForestClassifier(n_estimators=10, max_depth=10) test_classifier(clf, X, Y, 'after_release')
def nb_xyat_weight1(df_cell_train_feats, y_train, df_cell_test_feats): def prepare_feats(df): df_new = pd.DataFrame() df_new["x"] = df["x"] df_new["y"] = df["y"] df_new["hour"] = df["hour"] df_new["weekday"] = df["weekday"] df_new["accuracy"] = df["accuracy"].apply(np.log10) return df_new logging.info("train nb_xyat_weight1 model") clf = GaussianNB() clf.fit(prepare_feats(df_cell_train_feats), y_train, df_cell_train_feats["time"] ** 2) y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats)) return y_test_pred
def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True, model_type='logreg',C=10.0, alpha=1.0, cutoff=0.50, n_iter=1): # pull relevant data and run parsing and classification df = pd.read_csv(filename) if (len(df.columns)==2): # make sure columns have the right names df.columns = ['raw','amount'] if new_run: # initialize the model; if model_type=='logreg': model = linear_model.SGDClassifier(loss='log',warm_start=True, n_iter=n_iter,alpha=alpha) elif model_type=='passive-aggressive': model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True) elif model_type=='naive-bayes': model = naive_bayes.GaussianNB() else: raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes') else: # load a saved, pre-trained model modelFileLoad = open(modelname, 'rb') model = pickle.load(modelFileLoad) fileCities = dirs.data_dir + 'cities_by_state.pickle' us_cities = pd.read_pickle(fileCities) df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff, model_type=model_type) df.to_csv(fileout,index=False) # Saving logistic regression model from training set 1 modelFileSave = open(modelname, 'wb') pickle.dump(model, modelFileSave) modelFileSave.close() # ------ testing functions
def solve(): # ???????????? training_arr = numpy.loadtxt('adult.txt', dtype=bytes, comments='#', delimiter=',') test_data = numpy.loadtxt('adult_test.txt', dtype=bytes, comments='#', delimiter=',') # ??? x ???? y ???, ????? # x_list = numpy.ndarray(len(training_arr)) # y_list = numpy.ndarray(len(training_arr)) # ????? y ? y_list = [int(element[12]) for element in training_arr] # ????, ??????? x ? x_list = [[transform(x) for x in element[0:12]] for element in training_arr] # ?????? test_data = [[transform(x) for x in element] for element in test_data] assert isinstance(y_list[0], int) # y ??????? 0 ? 1, ? int assert len(x_list[0]) == 12 # x ????? 12 ???, ???? 12 ????? assert len(test_data[0]) == 12 # ????? x ?? clf = GaussianNB() clf.partial_fit(x_list, y_list, numpy.unique(y_list)) # clf.fit(x_list, y_list) ????? res_arr = clf.predict(test_data) partial_fit_result = "".join([str(x) for x in res_arr]) print("[*] ??????: {}".format(partial_fit_result))
def train(self, pd): model = naive_bayes.GaussianNB() model.fit(pd.data, pd.target) print model return model
def nb_experiment(scope_name, X, y): for lp in lp_cand: results = [] for r in range(50): with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill( 3) + '_train') as f: trainLabel = pk.load(f) with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill( 3) + '_test') as f: testLabel = pk.load(f) XTrain = X[trainLabel.keys()] XTest = X[testLabel.keys()] if not isinstance(XTrain, np.ndarray): XTrain = XTrain.toarray() XTest = XTest.toarray() yTrain = y[trainLabel.keys()] yTest = y[testLabel.keys()] # train #clf = MultinomialNB() clf = GaussianNB() #clf = BernoulliNB() clf.fit(XTrain, yTrain) # test pred = clf.predict(XTest) results.append(sum(pred == yTest) / float(yTest.shape[0])) return np.mean(results)
def MakeClassification(index,instancesData,classesData,instancesTest,type="proba",classifiersType="normal"): classifiers = [ OneVsRestClassifier(sklearn.svm.SVC(probability=1),4), DecisionTreeClassifier(random_state=0), KNeighborsClassifier(n_jobs=4), MLPClassifier(), sklearn.svm.SVC(probability=1,decision_function_shape="ovo"), OutputCodeClassifier(LinearSVC(random_state=0),code_size=2, random_state=0) ] if (classifiersType == "ova"): classifiers = [ OneVsRestClassifier(sklearn.svm.SVC(probability=1),4), OneVsRestClassifier(DecisionTreeClassifier(random_state=0),4), OneVsRestClassifier(KNeighborsClassifier(),4), OneVsRestClassifier(MLPClassifier(),4), OneVsRestClassifier(GaussianNB(),4) ] if (index >= len(classifiers)): print "ERROR. The index is not valid." return None else: #print "Performing classification" if type == "proba": return classifiers[index].fit(instancesData,classesData).predict_proba(instancesTest) else: return classifiers[index].fit(instancesData,classesData).predict(instancesTest)
def __init__(self, training_data, training_target): self.training_data = training_data self.training_target = training_target self.clf = GaussianNB()
def bayes_train(train_data, train_target): model = GaussianNB() model.fit(train_data, train_target) expected = train_target predicted = model.predict(train_data) # summarize the fit of the model print metrics.classification_report(expected, predicted) print metrics.confusion_matrix(expected, predicted)
def NB(train_x, train_y, test_x, test_y): """ ????? """ classifier = GaussianNB() classifier.fit(train_x, train_y) pred = classifier.predict_proba(test_x) predict_pro = [] for pro in pred: predict_pro.append(pro[1]) predict_y = classifier.predict(test_x) auc = evaluate_auc(predict_pro, test_y) evaluate(predict_y, test_y) return auc
def sub_NB(train_x, train_y, test_x, test_y): """ ???????? """ classifier = GaussianNB() classifier.fit(train_x, train_y) pred = classifier.predict_proba(test_x) predict_pro = [] for pro in pred: predict_pro.append(pro[1]) return np.array(predict_pro)
def _init_model(self): return GaussianNB()
def generate_base_classification(): from sklearn.svm import LinearSVC, NuSVC, SVC from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB models = [ #(LinearSVC, params('C', 'loss')), # (NuSVC, params('nu', 'kernel', 'degree')), #(SVC, params('C', 'kernel')), #(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')), (DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')), (RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')), #(GaussianProcessClassifier, None), (LogisticRegression, params('C', 'penalty')), #(PassiveAggressiveClassifier, params('C', 'loss')), #(RidgeClassifier, params('alpha')), # we do in-place modification of what the method params return in order to add # more loss functions that weren't defined in the method #(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])), (KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({ 'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree'] })), (MultinomialNB, params('alpha')), #(GaussianNB, None), #(BernoulliNB, params('alpha')) ] return models
def __init__(self): SingleClassifier.SingleClassifier.__init__(self) #weak classifier self.clf=GaussianNB()
def __init__(self): self.learner = GaussianNB()
def get_naive_bayes(self): """get naive bayes algorithm""" return GaussianNB()
def define_clfs_params(self): ''' Defines all relevant parameters and classes for classfier objects. Edit these if you wish to change parameters. ''' # These are the classifiers self.clfs = { 'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1), 'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'), 'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200), 'LR': LogisticRegression(penalty = 'l1', C = 1e5), 'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0), 'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10), 'NB': GaussianNB(), 'DT': DecisionTreeClassifier(), 'SGD': SGDClassifier(loss = 'log', penalty = 'l2'), 'KNN': KNeighborsClassifier(n_neighbors = 3) } # These are the parameters which will be run through self.params = { 'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]}, 'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]}, 'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]}, 'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]}, 'NB': {}, 'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]}, 'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']} }
def runner(i): sem.acquire() print("learn begin %s" % i) clf = ensemble.BaggingClassifier(naive_bayes.GaussianNB()) clf = clf.fit(traindata, trainlabel[i]) svms.append((i, clf)) result[i] = clf.predict_proba(testdata) dbresult[i] = clf.predict_proba(dbdata) #print("label %s done\n%s" # % (i, metrics.classification_report(testlabel[i], result[i]))) #print metrics.confusion_matrix(testlabel[i], result) sem.release()
def runner(i): sem.acquire() print("learn begin %s" % i) clf = ensemble.AdaBoostClassifier(naive_bayes.GaussianNB()) clf = clf.fit(traindata, trainlabel[i]) svms.append((i, clf)) result[i] = clf.predict_proba(testdata) dbresult[i] = clf.predict_proba(dbdata) #print("label %s done\n%s" # % (i, metrics.classification_report(testlabel[i], result[i]))) #print metrics.confusion_matrix(testlabel[i], result) sem.release()
def runner(i): sem.acquire() print("learn begin %s" % i) clf = naive_bayes.GaussianNB() clf = clf.fit(traindata, trainlabel[i]) svms.append((i, clf)) result[i] = clf.predict(testdata) dbresult[i] = clf.predict(dbdata) print("label %s done\n%s" % (i, metrics.classification_report(testlabel[i], result[i]))) #print metrics.confusion_matrix(testlabel[i], result) sem.release()
def test_gaussiannb(): iris = load_iris() clf = GaussianNB() clf.fit(iris.data, iris.target) y_pred = clf.predict(iris.data) print(y_pred) clf_ = SKGaussianNB() clf_.fit(iris.data, iris.target) print(clf_.predict(iris.data)) print(iris.target)
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None): self.logger = get_logger('classifier') self.display_name = name self.genres = genres self.m_genres = { genre:i for i, genre in enumerate(genres) } self.randstate = np.random.RandomState() self.scaler = StandardScaler() clf_kwargs = { } if not clf_kwargs else clf_kwargs if type in ['svm', 'mlp']: clf_kwargs['random_state'] = self.randstate if type == 'knn': self.proto_clf = KNeighborsClassifier(**clf_kwargs) elif type == 'svm': self.proto_clf = SVC(**clf_kwargs) elif type == 'dtree': self.proto_clf = DecisionTreeClassifier(**clf_kwargs) elif type == 'gnb': self.proto_clf = GaussianNB(**clf_kwargs) elif type == 'perc': self.proto_clf = Perceptron(**clf_kwargs) elif type == 'mlp': self.proto_clf = MLPClassifier(**clf_kwargs) elif type == 'ada': self.proto_clf = AdaBoostClassifier(**clf_kwargs) else: raise LookupError('Classifier type "{}" is invalid'.format(type)) self._convert_data(data) self.logger.info('Classifier: {} (params={})'.format( self.proto_clf.__class__.__name__, clf_kwargs ))
def getModels(): result = [] result.append("LinearRegression") result.append("BayesianRidge") result.append("ARDRegression") result.append("ElasticNet") result.append("HuberRegressor") result.append("Lasso") result.append("LassoLars") result.append("Rigid") result.append("SGDRegressor") result.append("SVR") result.append("MLPClassifier") result.append("KNeighborsClassifier") result.append("SVC") result.append("GaussianProcessClassifier") result.append("DecisionTreeClassifier") result.append("RandomForestClassifier") result.append("AdaBoostClassifier") result.append("GaussianNB") result.append("LogisticRegression") result.append("QuadraticDiscriminantAnalysis") return result
def test_AdaBoostClassifier_base_classifier(*data): ''' test Adaboost classifier with different number of classifier, and category of classifier :param data: train_data, test_data, train_value, test_value :return: None ''' from sklearn.naive_bayes import GaussianNB X_train,X_test,y_train,y_test=data fig=plt.figure() ax=fig.add_subplot(2,1,1) clf=ensemble.AdaBoostClassifier(learning_rate=0.1) clf.fit(X_train,y_train) ## graph estimators_num=len(clf.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(0,1) ax.set_title("AdaBoostClassifier with Decision Tree") ax=fig.add_subplot(2,1,2) clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB()) clf.fit(X_train,y_train) ## graph estimators_num=len(clf.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(0,1) ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes") plt.show()
def script_run(): # ??keyword kw_list = build_key_word("train.txt") # ???? fp = open("new_word.txt", encoding="utf-8", mode="w") for word in kw_list: fp.write(word + "\n") fp.close() # kw_list = load_key_words("word.txt") feature, label = get_feature("train.txt", kw_list) gnb = GaussianNB() gnb = gnb.fit(feature, label) joblib.dump(gnb, 'model/gnb.model') print("????") # print(feature,label)
def GaussianNBLocalModel(localTrainFeature, localTestFeature, localTrainLabel, config): print 'train...' model = GaussianNB() model.fit(X = localTrainFeature.toarray(), y = localTrainLabel) print 'predict...' if config['prob'] == False: return model.predict(localTestFeature.toarray()) else: return model.predict_log_proba(localTestFeature.toarray()) #-- Gaussian Navie Bayes online predict model frame
def sk_demo_1(): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) Y = np.array([1, 1, 1, 2, 2, 2]) clf = GaussianNB() clf.fit(X, Y) test_item = np.array([[-0.8, -1]]) print clf.predict(test_item) # [1] print clf.get_params()
def sk_nb_diabetes(): x_train, x_test, y_train, y_test = load_diabetes_data() clf = GaussianNB()
def test_majority_label_iris(): """Check classification by majority label on dataset iris.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard') scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy') assert_almost_equal(scores.mean(), 0.95, decimal=2)
def test_weights_iris(): """Check classification by average probabilities on dataset iris.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[1, 2, 10]) scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy') assert_almost_equal(scores.mean(), 0.93, decimal=2)