Python sklearn.naive_bayes 模块,GaussianNB() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.naive_bayes.GaussianNB()

项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
项目:rltk    作者:usc-isi-i2    | 项目源码 | 文件源码
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
项目:oss-github-analysis-project    作者:itu-oss-project-team    | 项目源码 | 文件源码
def __create_classifiers(self):
        classifiers = list()
        classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
                            "name": "sgd"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
                            "name": "knn1"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
                            "name": "knn3"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
                            "name": "knn5"})
        classifiers.append({"func": GaussianNB(),
                            "name": "naive_bayes"})

        # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
        # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
        # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
        return classifiers
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
项目:Twitter-and-IMDB-Sentimental-Analytics    作者:abhinandanramesh    | 项目源码 | 文件源码
def build_models_DOC(train_pos_vec, train_neg_vec):
    """
    Returns a GaussianNB and LosticRegression Model that are fit to the training data.
    """
    Y = ["pos"]*len(train_pos_vec) + ["neg"]*len(train_neg_vec)

    # Use sklearn's GaussianNB and LogisticRegression functions to fit two models to the training data.
    # For LogisticRegression, pass no parameters
    train_vec = []
    train_vec.extend(train_pos_vec)
    train_vec.extend(train_neg_vec)

    nb_model = GaussianNB()
    nb_model.fit(train_vec, Y)

    lr_model = LogisticRegression()
    lr_model.fit(train_vec, Y)

    return nb_model, lr_model
项目:XTREE    作者:ai-se    | 项目源码 | 文件源码
def learns(tests,trains,indep=lambda x: x[:-1],
                    dep = lambda x: x[-1],
                    rf  = Abcd(),
                    lg  = Abcd(),
                    dt  = Abcd(),
                    nb  = Abcd()):
  x1,y1,x2,y2= trainTest(tests,trains,indep,dep) 
  forest = RandomForestClassifier(n_estimators = 50)  
  forest = forest.fit(x1,y1)
  for n,got in enumerate(forest.predict(x2)):
    rf(predicted = got, actual = y2[n])
  logreg = linear_model.LogisticRegression(C=1e5)
  logreg.fit(x1, y1)
  for n,got in enumerate(logreg.predict(x2)):
    lg(predicted = got, actual = y2[n])
  bayes =  GaussianNB()
  bayes.fit(x1,y1)
  for n,got in enumerate(bayes.predict(x2)):
    nb(predicted = got, actual = y2[n])
  dectree = DecisionTreeClassifier(criterion="entropy",
                         random_state=1)
  dectree.fit(x1,y1)
  for n,got in enumerate(dectree.predict(x2)):
    dt(predicted = got, actual = y2[n])
项目:pyML    作者:tekrei    | 项目源码 | 文件源码
def main():
    args = get_args()
    # load and split data
    dataset, target = load_dataset(args.file)
    train_x, train_y, test_x, actual = split_dataset(
        dataset, target, args.split)
    print("Training set size: %d, Testing set size: %d" %
          (len(train_x), len(test_x)))
    # prepare model
    summaries = summarize_by_class(train_x, train_y)
    # test model
    predictions = get_predictions(summaries, test_x)
    display(actual, predictions)
    # using scikit
    gnb = GaussianNB()
    y_pred = gnb.fit(train_x, train_y).predict(test_x)
    display(actual, y_pred)
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def classification_gaussian_nb(self):
        self.signals.PrintInfo.emit("Gaussian NB")
        output_dir = self.output_dir + 'gaussian_nb_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        classificator = GaussianNB()
        classificator.fit(trainingSet.toarray(), self.trainingClass)
        results = classificator.predict(testSet.toarray())
        proba = classificator.predict_proba(testSet.toarray())

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_,self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)
项目:Machine-Learning-Projects    作者:poke19962008    | 项目源码 | 文件源码
def train():
    with open('./bin/train.bin', 'rb') as f:
        ds = pickle.load(f)
        XTrain, yTrain = ds['X'], ds['y']
        del ds

    with open('./bin/validation.bin', 'rb') as f:
        ds = pickle.load(f)
        XValidation, yValidation = ds['X'], ds['y']
        del ds

    clf = GaussianNB()
    clf.fit(XTrain, yTrain)

    print "Training Set Length:", XTrain.shape
    print "Test Set Length:", XValidation.shape
    print "Test Scores:", clf.score(XValidation, yValidation)

    with open('./bin/gnbClf.bin', 'wb') as f:
        pickle.dump(clf, f)
        print "[SUCCESS] Saved classifier as `gnbClf.bin`"
项目:Sberbank    作者:dimaquick    | 项目源码 | 文件源码
def Fit(self, bags, bagData):
        self.Bayes, self.GBayes = [], []
        for i in xrange(10):
            bnb = BernoulliNB()
            gnb = GaussianNB()
            x, y, xg = [], [], []
            for j in xrange(10):
                if i != j:
                    for vv in xrange(len(bagData[j][0])):
                        x.append(self.Convert(bagData[j][0][vv]))
                        xg.append(self.ConvertGauss(bagData[j][0][vv]))
                    y.extend(bagData[j][1])
            bnb.fit(x, y)
            gnb.fit(xg, y)
            self.Bayes.append(bnb)
            self.GBayes.append(gnb)
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_GaussianNB(*data):
    '''
    Test Gaussian NB
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    cls=naive_bayes.GaussianNB()
    cls.fit(X_train,y_train)
    print('Training Score: {0}' .format( cls.score(X_train,y_train)))
    print('Testing Score: {0}' .format( cls.score(X_test, y_test)))
项目:2016CCF-SouGou    作者:AbnerYang    | 项目源码 | 文件源码
def GaussianNBPredictModel(localTrainLabel, config):
    train = pd.read_csv('../feature/trainQlist.csv', header = 0, sep = ",")
    test = pd.read_csv('../feature/testQlist.csv', header = 0, sep = ",")
    print "Train tf-idf vector Model..."
    encode = TfidfVectorizer(decode_error = 'ignore', norm = "l2", binary = False, sublinear_tf = True, min_df = 50)
    localTrainFeature = encode.fit_transform(train['qlist'].values)
    localTestFeature = encode.transform(train['qlist'].values)

    print localTrainFeature.shape, localTestFeature.shape

    print 'train...'
    model = GaussianNB()
    model.fit(X = localTrainFeature.toarray(), y = localTrainLabel)
    print 'predict...'
    if config['prob'] == False:
        return model.predict(localTestFeature.toarray()), test['uid'].values
    else:
        return model.predict_log_proba(localTestFeature.toarray()), test['uid'].values

#-- Multinomial Navie Bayes corss validation model frame
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_discretenb_pickle():
    # Test picklability of discrete naive Bayes classifiers

    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
        clf = cls().fit(X2, y2)
        y_pred = clf.predict(X2)

        store = BytesIO()
        pickle.dump(clf, store)
        clf = pickle.load(BytesIO(store.getvalue()))

        assert_array_equal(y_pred, clf.predict(X2))

        if cls is not GaussianNB:
            # TODO re-enable me when partial_fit is implemented for GaussianNB

            # Test pickling of estimator trained with partial_fit
            clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
            clf2.partial_fit(X2[3:], y2[3:])
            store = BytesIO()
            pickle.dump(clf2, store)
            clf2 = pickle.load(BytesIO(store.getvalue()))
            assert_array_equal(y_pred, clf2.predict(X2))
项目:cs4300sp2017-finalproject    作者:AN313    | 项目源码 | 文件源码
def train_classifier_listing(self):
        self.clfListing = GaussianNB()
        files = self.b2s.ls('data/training')
        X = np.zeros((len(files), self.numFeat))
        Y = np.zeros(len(files))
        for i, file in enumerate(files):
            f = file['fileName']
            # read json into feature vector
            if not f.endswith('.json'):
                continue
            textJson = self.b2s.download(f)
            listing = json.loads(textJson)
            X[i] = self.bundle_json_obj(listing)
            Y[i] = max(int(listing['price'] / 50), 10)
        self.clfListing.fit(X, Y)
        temp = tempfile.NamedTemporaryFile()
        joblib.dump(self.clfListing, temp.name)
        self.b2s.upload('classifiers/nb_listing.pkl',
                        temp.read(), 'application/octet-stream')
        return self.clfListing.score(X, Y)

    # train a classifier on description
项目:ISM2017    作者:ybayle    | 项目源码 | 文件源码
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
项目:rdocChallenge    作者:Elyne    | 项目源码 | 文件源码
def getEstimator(es):

    estimator = None
    algo = es.ml_algorithm.upper()
    if algo == 'NAIVEBAYESGAUSSIAN':
        estimator = naive_bayes.GaussianNB()
    elif algo == 'SVM':
        estimator = svm.SVC(kernel=es.svmKernel, degree = 3, C = 0.1, random_state=es.random_seed)
    elif algo == 'RF':
        estimator = RandomForestClassifier(n_estimators=100, random_state=es.random_seed)
    elif algo == 'DECISIONTREE':
        estimator = DecisionTreeClassifier(random_state=es.random_seed)
    elif algo == 'RANDOM':
        estimator = DummyClassifier(random_state=es.random_seed)
    else:
        print("Please enter correct estimator (NaiveBayesGaussian/SVM/RF/DecisionTree)")

    #TODO: add regression?
    return estimator
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def test_smoke():
    a = nb.GaussianNB()
    b = nb_.GaussianNB()
    a.fit(X, y)
    b.fit(X.compute(), y.compute())

    assert_eq(a.class_prior_.compute(), b.class_prior_)
    assert_eq(a.class_count_.compute(), b.class_count_)
    assert_eq(a.theta_.compute(), b.theta_)
    assert_eq(a.sigma_.compute(), b.sigma_)

    assert_eq(a.predict_proba(X).compute(), b.predict_proba(X_))
    assert_eq(a.predict(X).compute(), b.predict(X_))
    assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))
项目:bguFinalProject    作者:liranfar    | 项目源码 | 文件源码
def build_naive_bayes_model(x_train, y_train):

    nb_model = GaussianNB()
    nb_model.fit(x_train,y_train.ravel())
    return nb_model
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def createPipeline(self):
        self.pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('model', naive_bayes.GaussianNB())])
项目:Audio-classification-using-Bag-of-Frames-approach    作者:amogh3892    | 项目源码 | 文件源码
def naive_bayes_predict(training_samples, training_labels, test_samples, test_lables):
    from sklearn.naive_bayes import GaussianNB

    clf = GaussianNB()

    t0 = time()
    clf.fit(training_samples,training_labels)
    training_time = round(time()-t0, 3)

    t0 = time()
    pred = clf.predict(test_samples)
    test_time = round(time()-t0, 3)

    from sklearn.metrics import accuracy_score

    acc = accuracy_score(pred,test_lables)

    no_features = np.array(training_samples).shape[1]
    training_samples = np.array(training_samples).shape[0]
    test_samples = np.array(test_samples).shape[0]

    with open("Temp\\results.txt","w") as outfile:
        outfile.write("Alogirthm : {}\n".format("Naive Bayes"))
        outfile.write("No of features : {}\n".format(no_features))
        outfile.write("No of training samples : {}\n".format(training_samples))
        outfile.write("No of test samples : {}\n".format(test_samples))
        outfile.write("Training time : {}\n".format(training_time))
        outfile.write("Test time : {}\n".format(test_time))
        outfile.write("Accuracy : {}\n".format(acc))

    with open("Temp\\result_labels.csv","wb") as outfile:
        np.savetxt(outfile,pred)
项目:Movie-Success-Predictor    作者:Blueteak    | 项目源码 | 文件源码
def main():
    #before_release
    movie_info_before_release = load_movie_info_before_release()
    print '***Before release***'

    X = create_input(movie_info_before_release)
    Y = create_output_before_release(movie_info_before_release)

    clf = linear_model.SGDClassifier(loss='log')
    test_classifier(clf, X, Y, 'before_release')

    clf = GaussianNB()
    test_classifier(clf, X, Y, 'before_release')

    clf = RandomForestClassifier(n_estimators=10, max_depth=10)
    test_classifier(clf, X, Y, 'before_release')

    #After release
    movie_info = load_movie_info()
    print '***After release***' 

    X = create_input(movie_info)
    Y = create_output(movie_info)

    clf = linear_model.SGDClassifier(loss='log')
    test_classifier(clf, X, Y, 'after_release')

    clf = GaussianNB()
    test_classifier(clf, X, Y, 'after_release')

    clf = RandomForestClassifier(n_estimators=10, max_depth=10)
    test_classifier(clf, X, Y, 'after_release')
项目:5th_place_solution_facebook_check_ins    作者:aikinogard    | 项目源码 | 文件源码
def nb_xyat_weight1(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"].apply(np.log10)
        return df_new
    logging.info("train nb_xyat_weight1 model")
    clf = GaussianNB()
    clf.fit(prepare_feats(df_cell_train_feats), y_train, df_cell_train_feats["time"] ** 2)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred
项目:banking-class    作者:eli-goodfriend    | 项目源码 | 文件源码
def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True,
            model_type='logreg',C=10.0,
            alpha=1.0, cutoff=0.50, n_iter=1):
    # pull relevant data and run parsing and classification
    df = pd.read_csv(filename) 
    if (len(df.columns)==2): # make sure columns have the right names
        df.columns = ['raw','amount']

    if new_run: # initialize the model;
        if model_type=='logreg':
            model = linear_model.SGDClassifier(loss='log',warm_start=True,
                                           n_iter=n_iter,alpha=alpha)
        elif model_type=='passive-aggressive':
            model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True)
        elif model_type=='naive-bayes':
            model = naive_bayes.GaussianNB()
        else:
            raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes')
    else: # load a saved, pre-trained model
        modelFileLoad = open(modelname, 'rb')
        model = pickle.load(modelFileLoad)

    fileCities = dirs.data_dir + 'cities_by_state.pickle'
    us_cities = pd.read_pickle(fileCities)

    df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff,
                model_type=model_type)

    df.to_csv(fileout,index=False)

    # Saving logistic regression model from training set 1
    modelFileSave = open(modelname, 'wb')
    pickle.dump(model, modelFileSave)
    modelFileSave.close()


# ------ testing functions
项目:qlcoder    作者:L1nwatch    | 项目源码 | 文件源码
def solve():
    # ????????????
    training_arr = numpy.loadtxt('adult.txt', dtype=bytes, comments='#', delimiter=',')
    test_data = numpy.loadtxt('adult_test.txt', dtype=bytes, comments='#', delimiter=',')

    # ??? x ???? y ???, ?????
    # x_list = numpy.ndarray(len(training_arr))
    # y_list = numpy.ndarray(len(training_arr))

    # ????? y ?
    y_list = [int(element[12]) for element in training_arr]
    # ????, ??????? x ?
    x_list = [[transform(x) for x in element[0:12]] for element in training_arr]
    # ??????
    test_data = [[transform(x) for x in element] for element in test_data]

    assert isinstance(y_list[0], int)  # y ??????? 0 ? 1, ? int
    assert len(x_list[0]) == 12  # x ????? 12 ???, ???? 12 ?????
    assert len(test_data[0]) == 12  # ????? x ??

    clf = GaussianNB()
    clf.partial_fit(x_list, y_list, numpy.unique(y_list))  # clf.fit(x_list, y_list) ?????
    res_arr = clf.predict(test_data)
    partial_fit_result = "".join([str(x) for x in res_arr])

    print("[*] ??????: {}".format(partial_fit_result))
项目:tidml    作者:tidchile    | 项目源码 | 文件源码
def train(self, pd):
        model = naive_bayes.GaussianNB()
        model.fit(pd.data, pd.target)
        print model
        return model
项目:semihin    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def nb_experiment(scope_name, X, y):
    for lp in lp_cand:
        results = []
        for r in range(50):
            with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill(
                    3) + '_train') as f:
                trainLabel = pk.load(f)
            with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill(
                    3) + '_test') as f:
                testLabel = pk.load(f)

            XTrain = X[trainLabel.keys()]
            XTest = X[testLabel.keys()]
            if not isinstance(XTrain, np.ndarray):
                XTrain = XTrain.toarray()
                XTest = XTest.toarray()
            yTrain = y[trainLabel.keys()]
            yTest = y[testLabel.keys()]

            # train
            #clf = MultinomialNB()
            clf = GaussianNB()
            #clf = BernoulliNB()
            clf.fit(XTrain, yTrain)

            # test
            pred = clf.predict(XTest)
            results.append(sum(pred == yTest) / float(yTest.shape[0]))
        return np.mean(results)
项目:DistributedClassifier    作者:rsboos    | 项目源码 | 文件源码
def MakeClassification(index,instancesData,classesData,instancesTest,type="proba",classifiersType="normal"):
    classifiers = [
    OneVsRestClassifier(sklearn.svm.SVC(probability=1),4),
    DecisionTreeClassifier(random_state=0),
    KNeighborsClassifier(n_jobs=4),
    MLPClassifier(),
    sklearn.svm.SVC(probability=1,decision_function_shape="ovo"),
    OutputCodeClassifier(LinearSVC(random_state=0),code_size=2, random_state=0)
    ]
    if (classifiersType == "ova"):
        classifiers = [
            OneVsRestClassifier(sklearn.svm.SVC(probability=1),4),
            OneVsRestClassifier(DecisionTreeClassifier(random_state=0),4),
            OneVsRestClassifier(KNeighborsClassifier(),4),
            OneVsRestClassifier(MLPClassifier(),4),
            OneVsRestClassifier(GaussianNB(),4)
        ]
    if (index >= len(classifiers)):
        print "ERROR. The index is not valid."
        return None
    else:
        #print "Performing classification"
        if type == "proba":
            return classifiers[index].fit(instancesData,classesData).predict_proba(instancesTest)
        else:
            return classifiers[index].fit(instancesData,classesData).predict(instancesTest)
项目:Spam-Message-Classifier-sklearn    作者:ZPdesu    | 项目源码 | 文件源码
def __init__(self, training_data, training_target):
        self.training_data = training_data
        self.training_target = training_target
        self.clf = GaussianNB()
项目:Spam-Message-Classifier-sklearn    作者:ZPdesu    | 项目源码 | 文件源码
def bayes_train(train_data, train_target):

    model = GaussianNB()
    model.fit(train_data, train_target)
    expected = train_target
    predicted = model.predict(train_data)
    # summarize the fit of the model
    print metrics.classification_report(expected, predicted)
    print metrics.confusion_matrix(expected, predicted)
项目:CreditScoring    作者:cqw5    | 项目源码 | 文件源码
def NB(train_x, train_y, test_x, test_y):
    """ ????? """
    classifier = GaussianNB()
    classifier.fit(train_x, train_y)
    pred = classifier.predict_proba(test_x)
    predict_pro = []
    for pro in pred:
        predict_pro.append(pro[1])
    predict_y = classifier.predict(test_x)
    auc = evaluate_auc(predict_pro, test_y)
    evaluate(predict_y, test_y)
    return auc
项目:CreditScoring    作者:cqw5    | 项目源码 | 文件源码
def sub_NB(train_x, train_y, test_x, test_y):
    """ ???????? """
    classifier = GaussianNB()
    classifier.fit(train_x, train_y)
    pred = classifier.predict_proba(test_x)
    predict_pro = []
    for pro in pred:
        predict_pro.append(pro[1])
    return np.array(predict_pro)
项目:motion-classification    作者:matthiasplappert    | 项目源码 | 文件源码
def _init_model(self):
        return GaussianNB()
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def generate_base_classification():
    from sklearn.svm import LinearSVC, NuSVC, SVC
    from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
    models = [
        #(LinearSVC, params('C', 'loss')),
#         (NuSVC, params('nu', 'kernel', 'degree')),
        #(SVC, params('C', 'kernel')),
        #(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
        (DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
        (RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')),
        #(GaussianProcessClassifier, None),
        (LogisticRegression, params('C', 'penalty')),
        #(PassiveAggressiveClassifier, params('C', 'loss')),
        #(RidgeClassifier, params('alpha')),
        # we do in-place modification of what the method params return in order to add
        # more loss functions that weren't defined in the method
        #(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])),
        (KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({
            'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree']
        })),
        (MultinomialNB, params('alpha')),
        #(GaussianNB, None),
        #(BernoulliNB, params('alpha'))
    ]

    return models
项目:intelligentCampus    作者:Jackal007    | 项目源码 | 文件源码
def __init__(self):
        SingleClassifier.SingleClassifier.__init__(self)
        #weak classifier
        self.clf=GaussianNB()
项目:ml-traffic    作者:Zepheus    | 项目源码 | 文件源码
def __init__(self):
        self.learner = GaussianNB()
项目:jenova    作者:dungba88    | 项目源码 | 文件源码
def get_naive_bayes(self):
        """get naive bayes algorithm"""
        return GaussianNB()
项目:fake-news-detection    作者:aldengolab    | 项目源码 | 文件源码
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             }
项目:Image_Retrieval    作者:ddlricardo    | 项目源码 | 文件源码
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = ensemble.BaggingClassifier(naive_bayes.GaussianNB())
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict_proba(testdata)
    dbresult[i] = clf.predict_proba(dbdata)
    #print("label %s done\n%s"
    # % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
项目:Image_Retrieval    作者:ddlricardo    | 项目源码 | 文件源码
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = ensemble.AdaBoostClassifier(naive_bayes.GaussianNB())
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict_proba(testdata)
    dbresult[i] = clf.predict_proba(dbdata)
    #print("label %s done\n%s"
    # % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
项目:Image_Retrieval    作者:ddlricardo    | 项目源码 | 文件源码
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = naive_bayes.GaussianNB()
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict(testdata)
    dbresult[i] = clf.predict(dbdata)
    print("label %s done\n%s"
     % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
项目:plume    作者:WiseDoge    | 项目源码 | 文件源码
def test_gaussiannb():
    iris = load_iris()
    clf = GaussianNB()
    clf.fit(iris.data, iris.target)
    y_pred = clf.predict(iris.data)
    print(y_pred)
    clf_ = SKGaussianNB()
    clf_.fit(iris.data, iris.target)
    print(clf_.predict(iris.data))


    print(iris.target)
项目:genrec    作者:kkanellis    | 项目源码 | 文件源码
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None):
        self.logger = get_logger('classifier')
        self.display_name = name

        self.genres = genres
        self.m_genres = { genre:i for i, genre in enumerate(genres) }
        self.randstate = np.random.RandomState()
        self.scaler = StandardScaler()

        clf_kwargs = { } if not clf_kwargs else clf_kwargs
        if type in ['svm', 'mlp']:
            clf_kwargs['random_state'] = self.randstate

        if type == 'knn':
            self.proto_clf = KNeighborsClassifier(**clf_kwargs)
        elif type == 'svm':
            self.proto_clf = SVC(**clf_kwargs)
        elif type == 'dtree':
            self.proto_clf = DecisionTreeClassifier(**clf_kwargs)
        elif type == 'gnb':
            self.proto_clf = GaussianNB(**clf_kwargs)
        elif type == 'perc':
            self.proto_clf = Perceptron(**clf_kwargs)
        elif type == 'mlp':
            self.proto_clf = MLPClassifier(**clf_kwargs)
        elif type == 'ada':
            self.proto_clf = AdaBoostClassifier(**clf_kwargs)
        else:
            raise LookupError('Classifier type "{}" is invalid'.format(type))

        self._convert_data(data)

        self.logger.info('Classifier: {} (params={})'.format(
            self.proto_clf.__class__.__name__,
            clf_kwargs
        ))
项目:sia-cog    作者:deepakkumar1984    | 项目源码 | 文件源码
def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostClassifier_base_classifier(*data):
    '''
    test Adaboost classifier with different number of classifier, and category of classifier
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    from sklearn.naive_bayes import GaussianNB
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()
    ax=fig.add_subplot(2,1,1)

    clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
    clf.fit(X_train,y_train)
    ## graph
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1)
    ax.set_title("AdaBoostClassifier with Decision Tree")

    ax=fig.add_subplot(2,1,2)
    clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB())
    clf.fit(X_train,y_train)
    ## graph
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1)
    ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes")
    plt.show()
项目:dudulu    作者:MashiMaroLjc    | 项目源码 | 文件源码
def script_run():
    # ??keyword
    kw_list = build_key_word("train.txt")
    # ????
    fp = open("new_word.txt", encoding="utf-8", mode="w")
    for word in kw_list:
        fp.write(word + "\n")
    fp.close()
   # kw_list = load_key_words("word.txt")
    feature, label = get_feature("train.txt", kw_list)
    gnb = GaussianNB()
    gnb = gnb.fit(feature, label)
    joblib.dump(gnb, 'model/gnb.model')
    print("????")
    # print(feature,label)
项目:2016CCF-SouGou    作者:AbnerYang    | 项目源码 | 文件源码
def GaussianNBLocalModel(localTrainFeature, localTestFeature, localTrainLabel, config):
    print 'train...'
    model = GaussianNB()
    model.fit(X = localTrainFeature.toarray(), y = localTrainLabel)
    print 'predict...'
    if config['prob'] == False:
        return model.predict(localTestFeature.toarray())
    else:
        return model.predict_log_proba(localTestFeature.toarray())

#-- Gaussian Navie Bayes online predict model frame
项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def sk_demo_1():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    Y = np.array([1, 1, 1, 2, 2, 2])
    clf = GaussianNB()
    clf.fit(X, Y)
    test_item = np.array([[-0.8, -1]])
    print clf.predict(test_item)
    # [1]
    print clf.get_params()
项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def sk_nb_diabetes():
    x_train, x_test, y_train, y_test = load_diabetes_data()
    clf = GaussianNB()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_majority_label_iris():
    """Check classification by majority label on dataset iris."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    eclf = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                voting='hard')
    scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
    assert_almost_equal(scores.mean(), 0.95, decimal=2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_weights_iris():
    """Check classification by average probabilities on dataset iris."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='soft',
                            weights=[1, 2, 10])
    scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
    assert_almost_equal(scores.mean(), 0.93, decimal=2)