我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.neural_network.MLPClassifier()。
def get_classifier_class(class_name): name_table = { 'svm': SVC, 'k_neighbors': KNeighborsClassifier, 'gaussian_process': GaussianProcessClassifier, 'decision_tree': DecisionTreeClassifier, 'random_forest': RandomForestClassifier, 'ada_boost': AdaBoostClassifier, 'mlp': MLPClassifier, 'gaussian_naive_bayes': GaussianNB, 'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis } if class_name not in name_table: raise ValueError('No such classifier') return name_table[class_name]
def __create_classifiers(self): classifiers = list() classifiers.append({"func": linear_model.SGDClassifier(loss="log"), "name": "sgd"}) classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'), "name": "knn1"}) classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'), "name": "knn3"}) classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'), "name": "knn5"}) classifiers.append({"func": GaussianNB(), "name": "naive_bayes"}) # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"}) # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"}) # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"}) return classifiers
def __init__(self, filepath="files", is_delta_mode=False, verbose=False): self.verbose = verbose self.message = "" self.filepath = filepath self.is_delta = is_delta_mode # Load files try: self.NN = pickle.load(open(self.filepath+'/model.pkl','rb')) # Load user names userList = open(self.filepath+"/metadata.txt", "r") self.users = userList.read().split('\n') userList.close() except FileNotFoundError: print("Model and metadata.txt not found.") self.mlp = MLPClassifier(hidden_layer_sizes=(50, 50, 50), activation = 'logistic') if self.verbose: print("Delta Mode enable = ", is_delta_mode) # Train the network and generate model.pkl file and csv file
def rede_neural(X, y): print("Iniciando treinamento da Rede Neural") X2 = normalize(X) clf = MLPClassifier(hidden_layer_sizes=(100,50), activation='tanh', algorithm='adam', alpha=1e-5, learning_rate='constant',tol=1e-8,learning_rate_init=0.0002, early_stopping=True,validation_fraction=0.2) kf = KFold(len(y),n_folds=3) i = 0 for train,test in kf: start = time.time() i = i + 1 print("Treinamento",i) # dividindo dataset em treino e test #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=1) X_train, X_test, y_train, y_test = X2[train], X2[test], y[train], y[test] # fit clf.fit(X_train, y_train) print("score:",clf.score(X_test, y_test),"(",(time.time()-start)/60.0,"minutos )") return clf
def NN_model(X, target): '''A perceptron classifier for classifying whether a route should be made one-way or not for a particular time-period. Parameters ========== X : int An integer column matrix The default algorithm ‘adam’ works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, ‘l-bfgs’ can converge faster and perform better. activation: logistic, the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). alpha: 0.0001 default learning_rate: 'constant' max_iter : int, optional, default 200 tol : float, optional, default 1e-4 ''' y = [0, 1] clf = MLPClassifier(hidden_layer_sizes=(0, 0), activation='logistic', algorithm='l-bfgs', early_stopping=True) clf.fit(X, y)
def get_classifier(self): algo=self.algo if algo=="GBT": return GradientBoostingClassifier() elif algo=="RF": return RandomForestClassifier() elif algo=="ADB": return AdaBoostClassifier() elif algo =="DT": return DecisionTreeClassifier() elif algo=="NB": return BernoulliNB() elif algo=="SGD": return SGDClassifier() elif algo=="SVC": return LinearSVC() elif algo=="MLPC": return MLPClassifier(activation='logistic', batch_size='auto', early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive', learning_rate_init=0.1, max_iter=5000, random_state=1, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) return 0
def test(): from sklearn.neural_network import MLPClassifier records = np.random.randint(0, 2, (10, 6)) results = np.random.randint(0, 2, (10, 3)) # records = np.eye(6) # results = records nn = mynn() nn2 = MLPClassifier() nn._fit(records, results) nn2.fit(records, results) print results print nn._predict(records) print nn2.predict(records) # print nn.ww # print results # print nn.predict(records)
def neural_network(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("NEURAL NETWORK.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features) classifier_nn = MLPClassifier(hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set],), alpha=const.PAR_NN_ALPHA[sensors_set], max_iter=const.PAR_NN_MAX_ITER, tol=const.PAR_NN_TOL) classifier_nn.fit(train_features_scaled, train_classes) test_prediction = classifier_nn.predict(test_features_scaled) acc = accuracy_score(test_classes, test_prediction) print("ACCURACY : " + str(acc)) print("END NEURAL NETWORK") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) file_content = "acc\n" + str(acc) with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_NEURAL_NETWORK_RESULTS, 'w') as f: f.write(file_content) # support vector machine algorithm training on training al train set and test on all test set
def para_ann(dataframe): ### Training and Testing Set random.seed(0) sample_index = random.sample(list(dataframe.index),int(1*len(dataframe.index))) para_index = random.sample(sample_index, int(0.5*len(sample_index))) op_df_train = dataframe.ix[para_index] op_df_holdout = dataframe.drop(para_index) columns = ['SMA_10','Momentum','stoch_K', 'WMA_10', 'MACD','A/D' , 'Volume'] X = op_df_train[columns].as_matrix() Y = op_df_train['Adj Close'].as_matrix() ### ANN model lbfgs_ann = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1).fit(X,Y) X_holdout = op_df_holdout[columns].as_matrix() Y_holdout = op_df_holdout['Adj Close'].as_matrix() Z = pd.DataFrame(np.zeros((1,1)), columns = ['ANN with backpropagation']) Y_result = Y_holdout pred = lbfgs_ann.predict(X_holdout) Y_result = np.vstack((Y_result, np.array(pred))) Z.iloc[0,0] = sum(pred==Y_holdout)/len(pred) Y_result = Y_result.T return Z, Y_result
def MLPClassifier(X_train, y_train): from sklearn.neural_network import MLPClassifier now = datetime.datetime.now() print ("MLPClassifier start in " + now.strftime('%Y-%m-%d %H:%M:%S')) MLPC = MLPClassifier() MLPC.fit(X_train, y_train) now = datetime.datetime.now() print ("MLPClassifier train done in " + now.strftime('%Y-%m-%d %H:%M:%S')) y_pred_MLPC = MLPC.predict_proba(X_test) y_pred_MLPC = pd.DataFrame(y_pred_MLPC[:,1:2],columns=['MLPC_predictions']) y_pred_MLPC.to_csv('MLPC_result.csv', index=False) now = datetime.datetime.now() print ("MLPClassifier predict done in " + now.strftime('%Y-%m-%d %H:%M:%S')) # SVC model
def MLPClassifier(X_train, y_train,X_test): from sklearn.neural_network import MLPClassifier now = datetime.datetime.now() print ("MLPClassifier start in " + now.strftime('%Y-%m-%d %H:%M:%S')) MLPC = MLPClassifier(activation = 'relu', hidden_layer_sizes = 100) MLPC.fit(X_train, y_train) now = datetime.datetime.now() print ("MLPClassifier train done in " + now.strftime('%Y-%m-%d %H:%M:%S')) y_pred_MLPC = MLPC.predict_proba(X_test) y_pred_MLPC = pd.DataFrame(y_pred_MLPC[:,1:2],columns=['MLPC_predictions']) y_pred_MLPC.to_csv('MLPC_result_all.csv', index=False) now = datetime.datetime.now() print ("MLPClassifier predict done in " + now.strftime('%Y-%m-%d %H:%M:%S')) # SVC model
def _train(self, X_matrix, y, **kwargs): """???? Parameters: X_matrix (numpy.array): - ???????????? y (numpy.array): - ??????????? Returns: sklearn.model: - sklearn??? """ from sklearn.neural_network import MLPClassifier model = MLPClassifier(**kwargs) model.fit(X_matrix, y) return model
def plot_on_dataset(X, y, ax, name): # for each dataset, plot learning for each learning strategy print("\nlearning on dataset %s" % name) ax.set_title(name) X = MinMaxScaler().fit_transform(X) mlps = [] if name == "digits": # digits is larger but converges fairly quickly max_iter = 15 else: max_iter = 400 for label, param in zip(labels, params): print("training: %s" % label) mlp = MLPClassifier(verbose=0, random_state=0, max_iter=max_iter, **param) mlp.fit(X, y) mlps.append(mlp) print("Training set score: %f" % mlp.score(X, y)) print("Training set loss: %f" % mlp.loss_) for mlp, label, args in zip(mlps, labels, plot_args): ax.plot(mlp.loss_curve_, label=label, **args)
def test_alpha(): # Test that larger alpha yields weights closer to zero""" X = X_digits_binary[:100] y = y_digits_binary[:100] alpha_vectors = [] alpha_values = np.arange(2) absolute_sum = lambda x: np.sum(np.abs(x)) for alpha in alpha_values: mlp = MLPClassifier(hidden_layer_sizes=10, alpha=alpha, random_state=1) mlp.fit(X, y) alpha_vectors.append(np.array([absolute_sum(mlp.coefs_[0]), absolute_sum(mlp.coefs_[1])])) for i in range(len(alpha_values) - 1): assert (alpha_vectors[i] > alpha_vectors[i + 1]).all()
def test_lbfgs_classification(): # Test lbfgs on classification. # It should achieve a score higher than 0.95 for the binary and multi-class # versions of the digits dataset. for X, y in classification_datasets: X_train = X[:150] y_train = y[:150] X_test = X[150:] expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind) for activation in ACTIVATION_TYPES: mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X_train, y_train) y_predict = mlp.predict(X_test) assert_greater(mlp.score(X_train, y_train), 0.95) assert_equal((y_predict.shape[0], y_predict.dtype.kind), expected_shape_dtype)
def test_learning_rate_warmstart(): # Tests that warm_start reuses past solution.""" X = [[3, 2], [1, 6], [5, 6], [-2, -4]] y = [1, 1, 1, 0] for learning_rate in ["invscaling", "constant"]: mlp = MLPClassifier(algorithm='sgd', hidden_layer_sizes=4, learning_rate=learning_rate, max_iter=1, power_t=0.25, warm_start=True) mlp.fit(X, y) prev_eta = mlp._optimizer.learning_rate mlp.fit(X, y) post_eta = mlp._optimizer.learning_rate if learning_rate == 'constant': assert_equal(prev_eta, post_eta) elif learning_rate == 'invscaling': assert_equal(mlp.learning_rate_init / pow(8 + 1, mlp.power_t), post_eta)
def test_partial_fit_classification(): # Test partial_fit on classification. # `partial_fit` should yield the same results as 'fit'for binary and # multi-class classification. for X, y in classification_datasets: X = X y = y mlp = MLPClassifier(algorithm='sgd', max_iter=100, random_state=1, tol=0, alpha=1e-5, learning_rate_init=0.2) mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPClassifier(algorithm='sgd', random_state=1, alpha=1e-5, learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=np.unique(y)) pred2 = mlp.predict(X) assert_array_equal(pred1, pred2) assert_greater(mlp.score(X, y), 0.95)
def test_predict_proba_binary(): # Test that predict_proba works as expected for binary class.""" X = X_digits_binary[:50] y = y_digits_binary[:50] clf = MLPClassifier(hidden_layer_sizes=5) clf.fit(X, y) y_proba = clf.predict_proba(X) y_log_proba = clf.predict_log_proba(X) (n_samples, n_classes) = y.shape[0], 2 proba_max = y_proba.argmax(axis=1) proba_log_max = y_log_proba.argmax(axis=1) assert_equal(y_proba.shape, (n_samples, n_classes)) assert_array_equal(proba_max, proba_log_max) assert_array_equal(y_log_proba, np.log(y_proba)) assert_equal(roc_auc_score(y, y_proba[:, 1]), 1.0)
def __init__(self, path): ''' Constructor ''' self.path = path self.model = MLPClassifier(solver='lbfgs', max_iter=600, hidden_layer_sizes=(300,), random_state=1) #self.model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(200,7), random_state=1) #self.model = MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', # beta_1=0.9, beta_2=0.999, early_stopping=False, # epsilon=1e-08, hidden_layer_sizes=(200,), learning_rate='constant', # learning_rate_init=0.001, max_iter=300, momentum=0.9, # nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, # solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, # warm_start=False) self.model_name = 'mlp' self.scaler_mlp = 'scaler' self.scaler = StandardScaler()
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None): """Description of compare compare multiple classifier and display the best one """ utils.print_success("Comparison of differents classifiers") if data is not None: train_features = data["train_features"] train_groundtruths = data["train_groundtruths"] test_features = data["test_features"] test_groundtruths = data["test_groundtruths"] else: train = utils.abs_path_file(train) test = utils.abs_path_file(test) train_features, train_groundtruths = read_file(train) test_features, test_groundtruths = read_file(test) if not utils.create_dir(res_dir): res_dir = utils.abs_path_dir(res_dir) classifiers = { "RandomForest": RandomForestClassifier(n_jobs=-1) # "RandomForest": RandomForestClassifier(n_estimators=5), # "KNeighbors":KNeighborsClassifier(3), # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), # "DecisionTree":DecisionTreeClassifier(max_depth=5), # "MLP":MLPClassifier(), # "AdaBoost":AdaBoostClassifier(), # "GaussianNB":GaussianNB(), # "QDA":QuadraticDiscriminantAnalysis(), # "SVM":SVC(kernel="linear", C=0.025), # "GradientBoosting":GradientBoostingClassifier(), # "ExtraTrees":ExtraTreesClassifier(), # "LogisticRegression":LogisticRegression(), # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis() } for key in classifiers: utils.print_success(key) clf = classifiers[key] utils.print_info("\tFit") clf.fit(train_features, train_groundtruths) utils.print_info("\tPredict") predictions = clf.predict(test_features) return predictions
def test_basic(self, single_chunk_classification): X, y = single_chunk_classification a = nn.ParitalMLPClassifier(classes=[0, 1], random_state=0) b = nn_.MLPClassifier(random_state=0) a.fit(X, y) b.partial_fit(X, y, classes=[0, 1]) assert_estimator_equal(a, b)
def remake_mlp(self, event=None): sizes = tuple([int(s.strip()) for s in str(self.layer_sizes.text()).split(',')]) alpha = float(self.alpha_var.text()) self.mlp = neural_network.MLPClassifier(hidden_layer_sizes=sizes, alpha=alpha)
def function_approx(): # init clf = MLPClassifier(solver='sgd', alpha=1e-5, activation='relu', hidden_layer_sizes=(10), learning_rate='constant', learning_rate_init=0.001, random_state=1, early_stopping=False, verbose=True) def fn(x, y): return round(x + y) # train _MAX = 3 X = [] y = [] for i in range(1000): _x, _y = random.randint(0, _MAX), random.randint(0, _MAX) #_xnoise, _ynoise = random.random(), random.random() _xnoise, _ynoise = 0, 0 X.append([_x / _MAX + _xnoise, _y / _MAX + _ynoise]) y.append(fn(_x, _y)) print(X) print(y) clf.fit(X, y) print("weights:", clf.coefs_) print("biases: ", clf.intercepts_) # classify for i in range(10): _x, _y = random.uniform(0, _MAX), random.uniform(0, _MAX) classification = clf.predict([[_x / _MAX, _y / _MAX]]) print("Classified {} as {} (should be {})".format( [_x, _y], classification, fn(_x, _y)))
def mnist(): #digits = datasets.load_digits() # subsampled version mnist = datasets.fetch_mldata("MNIST original") print("Got the data.") X, y = mnist.data / 255., mnist.target X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] #images_and_labels = list(zip(digits.images, digits.target)) #for index, (image, label) in enumerate(images_and_labels[:4]): # plt.subplot(2, 4, index + 1) # plt.axis('off') # plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') # plt.title('Training: %i' % label) classifiers = [ #("SVM", svm.SVC(gamma=0.001)), # TODO doesn't finish; needs downsampled version? ("NN", MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4, solver='sgd', verbose=10, tol=1e-4, random_state=1, learning_rate_init=.1)), ] for name, classifier in classifiers: print(name) classifier.fit(X_train, y_train) predicted = classifier.predict(X_test) print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(y_test, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, predicted)) #images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted)) #for index, (image, prediction) in enumerate(images_and_predictions[:4]): # plt.subplot(2, 4, index + 5) # plt.axis('off') # plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') # plt.title('Prediction: %i' % prediction) #plt.show()
def MakeClassification(index,instancesData,classesData,instancesTest,type="proba",classifiersType="normal"): classifiers = [ OneVsRestClassifier(sklearn.svm.SVC(probability=1),4), DecisionTreeClassifier(random_state=0), KNeighborsClassifier(n_jobs=4), MLPClassifier(), sklearn.svm.SVC(probability=1,decision_function_shape="ovo"), OutputCodeClassifier(LinearSVC(random_state=0),code_size=2, random_state=0) ] if (classifiersType == "ova"): classifiers = [ OneVsRestClassifier(sklearn.svm.SVC(probability=1),4), OneVsRestClassifier(DecisionTreeClassifier(random_state=0),4), OneVsRestClassifier(KNeighborsClassifier(),4), OneVsRestClassifier(MLPClassifier(),4), OneVsRestClassifier(GaussianNB(),4) ] if (index >= len(classifiers)): print "ERROR. The index is not valid." return None else: #print "Performing classification" if type == "proba": return classifiers[index].fit(instancesData,classesData).predict_proba(instancesTest) else: return classifiers[index].fit(instancesData,classesData).predict(instancesTest)
def __init__(self): SingleClassifier.SingleClassifier.__init__(self) # weak classifier self.clf = MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False)
def learn(x, y, test_x): (temp_x, temp_y) = tools.simple_negative_sample(x, y, variables.select_rate_nn) clf = MLPClassifier(hidden_layer_sizes=(variables.unit_num_nn,), random_state=2017, max_iter=2000, alpha=variables.alpha_nn, learning_rate_init=variables.learning_rate_init_nn,solver="adam",activation="relu").fit(temp_x, temp_y) prediction_list = clf.predict(test_x) prediction_list_prob = clf.predict_proba(test_x) return prediction_list,prediction_list_prob
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None): self.logger = get_logger('classifier') self.display_name = name self.genres = genres self.m_genres = { genre:i for i, genre in enumerate(genres) } self.randstate = np.random.RandomState() self.scaler = StandardScaler() clf_kwargs = { } if not clf_kwargs else clf_kwargs if type in ['svm', 'mlp']: clf_kwargs['random_state'] = self.randstate if type == 'knn': self.proto_clf = KNeighborsClassifier(**clf_kwargs) elif type == 'svm': self.proto_clf = SVC(**clf_kwargs) elif type == 'dtree': self.proto_clf = DecisionTreeClassifier(**clf_kwargs) elif type == 'gnb': self.proto_clf = GaussianNB(**clf_kwargs) elif type == 'perc': self.proto_clf = Perceptron(**clf_kwargs) elif type == 'mlp': self.proto_clf = MLPClassifier(**clf_kwargs) elif type == 'ada': self.proto_clf = AdaBoostClassifier(**clf_kwargs) else: raise LookupError('Classifier type "{}" is invalid'.format(type)) self._convert_data(data) self.logger.info('Classifier: {} (params={})'.format( self.proto_clf.__class__.__name__, clf_kwargs ))
def __init__(self): params = dict(clf__hidden_layer_sizes=[(50,), (70,), (100,)], clf__solver=['lbfgs', 'adam', 'sgd']) super().__init__(neural_network.MLPClassifier(), params, 'MLPClassifier')
def train(self, training_set, training_target): clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(10, 6), random_state=1) clf.fit(training_set, training_target) joblib.dump(clf, 'output/MLP.pkl')
def getModels(): result = [] result.append("LinearRegression") result.append("BayesianRidge") result.append("ARDRegression") result.append("ElasticNet") result.append("HuberRegressor") result.append("Lasso") result.append("LassoLars") result.append("Rigid") result.append("SGDRegressor") result.append("SVR") result.append("MLPClassifier") result.append("KNeighborsClassifier") result.append("SVC") result.append("GaussianProcessClassifier") result.append("DecisionTreeClassifier") result.append("RandomForestClassifier") result.append("AdaBoostClassifier") result.append("GaussianNB") result.append("LogisticRegression") result.append("QuadraticDiscriminantAnalysis") return result
def test_partial_fit_classes_error(): # Tests that passing different classes to partial_fit raises an error""" X = [[3, 2]] y = [0] clf = MLPClassifier(algorithm='sgd') clf.partial_fit(X, y, classes=[0, 1]) assert_raises(ValueError, clf.partial_fit, X, y, classes=[1, 2])
def test_partial_fit_errors(): # Test partial_fit error handling.""" X = [[3, 2], [1, 6]] y = [1, 0] # no classes passed assert_raises(ValueError, MLPClassifier( algorithm='sgd').partial_fit, X, y, classes=[2]) # l-bfgs doesn't support partial_fit assert_false(hasattr(MLPClassifier(algorithm='l-bfgs'), 'partial_fit'))
def test_params_errors(): # Test that invalid parameters raise value error""" X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier assert_raises(ValueError, clf(hidden_layer_sizes=-1).fit, X, y) assert_raises(ValueError, clf(max_iter=-1).fit, X, y) assert_raises(ValueError, clf(shuffle='true').fit, X, y) assert_raises(ValueError, clf(alpha=-1).fit, X, y) assert_raises(ValueError, clf(learning_rate_init=-1).fit, X, y) assert_raises(ValueError, clf(algorithm='hadoken').fit, X, y) assert_raises(ValueError, clf(learning_rate='converge').fit, X, y) assert_raises(ValueError, clf(activation='cloak').fit, X, y)
def test_sparse_matrices(): # Test that sparse and dense input matrices output the same results.""" X = X_digits_binary[:50] y = y_digits_binary[:50] X_sparse = csr_matrix(X) mlp = MLPClassifier(random_state=1, hidden_layer_sizes=15) mlp.fit(X, y) pred1 = mlp.decision_function(X) mlp.fit(X_sparse, y) pred2 = mlp.decision_function(X_sparse) assert_almost_equal(pred1, pred2) pred1 = mlp.predict(X) pred2 = mlp.predict(X_sparse) assert_array_equal(pred1, pred2)
def test_tolerance(): # Test tolerance. # It should force the algorithm to exit the loop when it converges. X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd', verbose=10) clf.fit(X, y) assert_greater(clf.max_iter, clf.n_iter_)
def test_verbose_sgd(): # Test verbose. X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(algorithm='sgd', max_iter=2, verbose=10, hidden_layer_sizes=2) old_stdout = sys.stdout sys.stdout = output = StringIO() clf.fit(X, y) clf.partial_fit(X, y) sys.stdout = old_stdout assert 'Iteration' in output.getvalue()
def test_early_stopping(): X = X_digits_binary[:100] y = y_digits_binary[:100] tol = 0.2 clf = MLPClassifier(tol=tol, max_iter=3000, algorithm='sgd', early_stopping=True) clf.fit(X, y) assert_greater(clf.max_iter, clf.n_iter_) valid_scores = clf.validation_scores_ best_valid_score = clf.best_validation_score_ assert_equal(max(valid_scores), best_valid_score) assert_greater(best_valid_score + tol, valid_scores[-2]) assert_greater(best_valid_score + tol, valid_scores[-1])
def test_adaptive_learning_rate(): X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(tol=0.5, max_iter=3000, algorithm='sgd', learning_rate='adaptive', verbose=10) clf.fit(X, y) assert_greater(clf.max_iter, clf.n_iter_) assert_greater(1e-6, clf._optimizer.learning_rate)
def indexThreeMLP(): x = 10 ** 7 #????? cur1 = conn.cursor() cur1.execute('select * from szzs_black_swan limit 2,9999999999999999;') result1 = cur1.fetchall() fv = [] #?? for res in result1: a = [] a.append(float(list(res)[3])) a.append(float(list(res)[5])/x) #???????? fv.append(a) cur2 = conn.cursor() cur2.execute('select rise_fall_next from szzs_black_swan limit 2,9999999999999999;') result2 = cur2.fetchall() cla = [] #?? for res in result2: cla.append(int(list(res)[0])) cur3 = conn.cursor() cur3.execute('select * from szzs_black_swan order by date desc;') result3 = cur3.fetchmany(1) test = [] #???? for res in result3: test.append(float(list(res)[3])) test.append(float(list(res)[5])/x) fv = np.array(fv) cla = np.array(cla) test = np.array(test) fv = np.array(fv) cla = np.array(cla) test = np.array(test) mlp = MLPClassifier(solver='lbfgs', alpha=0.0001, hidden_layer_sizes=(1000, 200), random_state=1, batch_size='auto') mlp.fit(fv, cla) # ???????? test = [test] # ??????????? prediction = mlp.predict(test) return test, prediction
def worker10(fv1, cla1, test1): func_name = sys._getframe().f_code.co_name mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 200), random_state=1, batch_size='auto') mlp.fit(fv1, cla1) # ???????? prediction = mlp.predict(test1) q.put((prediction, func_name)) # ????
def worker11(fv2, cla2, test2): func_name = sys._getframe().f_code.co_name mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 200), random_state=1, batch_size='auto') mlp.fit(fv2, cla2) # ???????? prediction = mlp.predict(test2) q.put((prediction, func_name)) # ????
def train_test(train, test, res_dir="res/", disp=True, outfilename=None): """Description of compare compare multiple classifier and display the best one """ utils.print_success("Comparison of differents classifiers") if train is not None and test is not None: train_features = [] test_features = [] train_groundtruths = [] test_groundtruths = [] for elem in train: train_groundtruths.append(elem) train_features.append(train[elem]) for elem in test: test_groundtruths.append(elem) test_features.append(test[elem]) else: utils.print_error("No valid data provided.") res_dir = utils.create_dir(res_dir) classifiers = { # "RandomForest": RandomForestClassifier(n_estimators=5), "KNeighbors":KNeighborsClassifier(1), # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), # "DecisionTree":DecisionTreeClassifier(max_depth=5), # "MLP":MLPClassifier(), # "AdaBoost":AdaBoostClassifier(), # "GaussianNB":GaussianNB(), # "QDA":QuadraticDiscriminantAnalysis(), # "SVM":SVC(kernel="linear", C=0.025), # "GradientBoosting":GradientBoostingClassifier(), # "ExtraTrees":ExtraTreesClassifier(), # "LogisticRegression":LogisticRegression(), # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis() } for key in classifiers: utils.print_success(key) clf = classifiers[key] utils.print_info("\tFit") clf.fit(train_features, train_groundtruths) utils.print_info("\tPredict") predictions = clf.predict(test_features) print("Precision weighted\t" + str(precision_score(test_groundtruths, predictions, average='weighted'))) print("Recall weighted\t" + str(recall_score(test_groundtruths, predictions, average='weighted'))) print("F1 weighted\t" + str(f1_score(test_groundtruths, predictions, average='weighted'))) # print("Precision weighted\t" + str(precision_score(test_groundtruths, predictions, average=None))) # print("Recall weighted\t" + str(recall_score(test_groundtruths, predictions, average=None))) # print("f1 weighted\t" + str(f1_score(test_groundtruths, predictions, average=None)))
def main(): # load and split data start = time() # check if file exists data_file = "data/creditcard.csv" if not isfile(data_file): try: # download the data set # Note: it is around 180MB data_url = "https://github.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/raw/master/creditcard.csv" urlretrieve(data_url, data_file) print("download data file to %s" % data_file) except Error: print("can't access or download the data set") print("please try to download it manually and put into data/creditcard.csv") sys.exit() dataset, target = load_dataset(data_file) print("Loaded data in %.4f seconds" % (time() - start)) start = time() x_train, x_test, y_train, y_test = train_test_split( dataset, target, test_size=.2, random_state=42) print("Training set size:%d, Testing set size: %d" % (len(x_train), len(x_test))) print("Prepared data for models in %.4f seconds" % (time() - start)) scores = [] models = {"GNB": GaussianNB(), "DT": DecisionTreeClassifier(max_depth=5), "MLP": MLPClassifier(alpha=1.0), #"LSVC": SVC(kernel="linear", C=0.025), # very slow as there is too much data "NN": KNeighborsClassifier(), "RF": RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), "ABC": AdaBoostClassifier(), "SGD": SGDClassifier(), } names = [] for k, model in models.items(): print("Running %s" % k) start = time() fitted_model = model.fit(x_train, y_train) print("Training time: %.4f seconds" % (time() - start)) start = time() y_predicted = fitted_model.predict(x_test) print("Testing time: %.4f seconds" % (time() - start)) scores.append(display(y_test, y_predicted, save="figures/" + k + ".png")) names.append(k) # scatter plot scores of all the models plot_scores(scores, names, save="figures/scores.png")
def classes_combination(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) class_combination = list(itertools.combinations(self.classes, 2)) train = self.dataset.get_train.copy() test = self.dataset.get_test.copy() if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_TWO_CLASSES_COMBINATION, 'w') as f: f.write("combination, algorithm, accuracy") for combination in class_combination: cc_train = train.loc[(train['target'] == combination[0]) | (train['target'] == combination[1])] cc_test = test.loc[(test['target'] == combination[0]) | (test['target'] == combination[1])] train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( cc_train, cc_test, features) # buil all classifier classifier_tree = tree.DecisionTreeClassifier() classifier_forest = RandomForestClassifier(n_estimators=const.PAR_RF_ESTIMATOR) classifier_nn = MLPClassifier(hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set],), alpha=const.PAR_NN_ALPHA[sensors_set], max_iter=const.PAR_NN_MAX_ITER, tol=const.PAR_NN_TOL) classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False) # train all classifier classifier_tree.fit(train_features, train_classes) classifier_forest.fit(train_features, train_classes) classifier_nn.fit(train_features, train_classes) classifier_svm.fit(train_features, train_classes) # use classifier on test set test_prediction_tree = classifier_tree.predict(test_features) test_prediction_forest = classifier_forest.predict(test_features) test_prediction_nn = classifier_nn.predict(test_features) test_prediction_svm = classifier_svm.predict(test_features) # evaluate classifier acc_tree = accuracy_score(test_classes, test_prediction_tree) acc_forest = accuracy_score(test_classes, test_prediction_forest) acc_nn = accuracy_score(test_classes, test_prediction_nn) acc_svm = accuracy_score(test_classes, test_prediction_svm) # print result print(str(combination)) print("DECISION TREE : ", str(acc_tree)) f.write(str(combination) + ", DT ," + str(acc_tree) + "\n") print("RANDOM FOREST : ", str(acc_forest)) f.write(str(combination) + ", RF ," + str(acc_forest) + "\n") print("NEURAL NETWORK : ", str(acc_nn)) f.write(str(combination) + ", NN ," + str(acc_nn) + "\n") print("SUPPORT VECTOR MACHINE : ", str(acc_svm)) f.write(str(combination) + ", SVM ," + str(acc_svm) + "\n") # use different algorithms leaving one subject out from training and testing only on this subject - # considering all classes in dataset and only user classes
def genderclassify(sentence): """ genderclassify tags with the help of multilayer perceptron classifier trained over word vectors created with gensim's word2vec :param sentence: string to be tokenized and tagged :type sentence: string :return: Returns a List of tuples of the form [(token1, genderTag), (token2, genderTag)...] :rtype: List of Tuples. """ sentences = sent.drawlist() sentences2 = sents.drawlist() sentences2.append(sentence) sentences = sentences + sentences2 sentences = [tok.wordtokenize(i) for i in sentences] sentence = tok.wordtokenize(sentence) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) model = gensim.models.Word2Vec(sentences, size =10, min_count=1) pred = [] for word in sentence: pred.append(model.wv[word].tolist()) genders = gndr.drawlist() vector = [i[0] for i in genders] tags = [i[1] for i in genders] print(tags) X = vector y = tags clf = MLPClassifier(solver='sgd', alpha= 1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf.fit(X, y) predictions = clf.predict_proba(pred).tolist() predictions = [genderdecode(i) for i in predictions] print(predictions) for index,item in enumerate(sentence): sentence[index] = (sentence[index], predictions[index]) return(sentence)
def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2): clf = None precision = [] recall = [] fscore = [] if classifier == "NN": clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None) elif classifier == "LR": clf = linear_model.LogisticRegression(C=1e3) #clf = tree.DecisionTreeClassifier() if classifier == "RF": clf = RandomForestClassifier() elif classifier == "NB": clf = GaussianNB() elif classifier == "SVM": clf = LinearSVC() elif classifier == "KNN": clf = NearestCentroid() skf = StratifiedKFold(n_splits=nfold, shuffle=True) y_test_total = [] y_pred_total = [] for train_index, test_index in skf.split(document_term_matrix, labels): X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index] y_train, y_test = labels[train_index], labels[test_index] y_test_total.extend(y_test.tolist()) model = clf.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_total.extend(y_pred.tolist()) p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted') print accuracy_score(y_test, y_pred) a_score.append(accuracy_score(y_test, y_pred)) precision.append(p) recall.append(r) fscore.append(f) plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)) plt.savefig('lc.png') return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)
def getSKLearnModel(modelName): if modelName == 'LinearRegression': model = linear_model.LinearRegression() elif modelName == 'BayesianRidge': model = linear_model.BayesianRidge() elif modelName == 'ARDRegression': model = linear_model.ARDRegression() elif modelName == 'ElasticNet': model = linear_model.ElasticNet() elif modelName == 'HuberRegressor': model = linear_model.HuberRegressor() elif modelName == 'Lasso': model = linear_model.Lasso() elif modelName == 'LassoLars': model = linear_model.LassoLars() elif modelName == 'Rigid': model = linear_model.Ridge() elif modelName == 'SGDRegressor': model = linear_model.SGDRegressor() elif modelName == 'SVR': model = SVR() elif modelName=='MLPClassifier': model = MLPClassifier() elif modelName=='KNeighborsClassifier': model = KNeighborsClassifier() elif modelName=='SVC': model = SVC() elif modelName=='GaussianProcessClassifier': model = GaussianProcessClassifier() elif modelName=='DecisionTreeClassifier': model = DecisionTreeClassifier() elif modelName=='RandomForestClassifier': model = RandomForestClassifier() elif modelName=='AdaBoostClassifier': model = AdaBoostClassifier() elif modelName=='GaussianNB': model = GaussianNB() elif modelName=='LogisticRegression': model = linear_model.LogisticRegression() elif modelName=='QuadraticDiscriminantAnalysis': model = QuadraticDiscriminantAnalysis() return model
def test_gradient(): # Test gradient. # This makes sure that the activation functions and their derivatives # are correct. The numerical and analytical computation of the gradient # should be close. for n_labels in [2, 3]: n_samples = 5 n_features = 10 X = np.random.random((n_samples, n_features)) y = 1 + np.mod(np.arange(n_samples) + 1, n_labels) Y = LabelBinarizer().fit_transform(y) for activation in ACTIVATION_TYPES: mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10, algorithm='l-bfgs', alpha=1e-5, learning_rate_init=0.2, max_iter=1, random_state=1) mlp.fit(X, y) theta = np.hstack([l.ravel() for l in mlp.coefs_ + mlp.intercepts_]) layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] + [mlp.n_outputs_]) activations = [] deltas = [] coef_grads = [] intercept_grads = [] activations.append(X) for i in range(mlp.n_layers_ - 1): activations.append(np.empty((X.shape[0], layer_units[i + 1]))) deltas.append(np.empty((X.shape[0], layer_units[i + 1]))) fan_in = layer_units[i] fan_out = layer_units[i + 1] coef_grads.append(np.empty((fan_in, fan_out))) intercept_grads.append(np.empty(fan_out)) # analytically compute the gradients def loss_grad_fun(t): return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas, coef_grads, intercept_grads) [value, grad] = loss_grad_fun(theta) numgrad = np.zeros(np.size(theta)) n = np.size(theta, 0) E = np.eye(n) epsilon = 1e-5 # numerically compute the gradients for i in range(n): dtheta = E[:, i] * epsilon numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] - loss_grad_fun(theta - dtheta)[0]) / (epsilon * 2.0)) assert_almost_equal(numgrad, grad)