我们从Python开源项目中,提取了以下13个代码示例,用于说明如何使用sklearn.svm()。
def get_model_class(method): """ Returns the class associated with a method string. :param method: A string describing the method to use. :return: A class corresponding to the method. """ if method == 'logistic': return sklearn.linear_model.LogisticRegression elif method == 'svm': return sklearn.svm.SVC elif method == 'mirowski-svm': return sklearn.svm.SVC elif method == 'sgd': return sklearn.linear_model.SGDClassifier elif method == 'random-forest': return sklearn.ensemble.RandomForestClassifier elif method == 'nearest-centroid': return sklearn.neighbors.NearestCentroid elif method == 'knn': return sklearn.neighbors.KNeighborsClassifier elif method == 'bagging': return sklearn.ensemble.BaggingClassifier else: raise NotImplementedError("Method {} is not supported".format(method))
def main(): baskets.time_me.set_default_mode('print') logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('tags', nargs='+') parser.add_argument('-f', '--train-fold', default='train') parser.add_argument('--validation-fold', help='Fold for validation (default: None)') parser.add_argument('--no-metafeats', action='store_true') parser.add_argument('--svm', action='store_true') args = parser.parse_args() with time_me("Loaded metavectors"): meta_df = pd.read_pickle(METAVECTORS_PICKLEPATH) with time_me("Made training vectors"): X, y = vectorize_fold(args.train_fold, args.tags, meta_df, use_metafeats=not args.no_metafeats) # This sucks. if args.svm: # slooow :( (sklearn docs say hard to scale to dataset w more than like 20k examples) #model = sklearn.svm.SVC(verbose=True, probability=True, C=1.0) model = sklearn.svm.LinearSVC( penalty='l2', loss='hinge', C=.001, verbose=1,) else: # TODO: C model = LogisticRegression(verbose=1) with time_me('Trained model', mode='print'): model.fit(X, y) model_fname = 'model.pkl' joblib.dump(model, model_fname) return model # TODO: report acc on validation set
def train_svm(X, y, k): if k == 'linear': svm = SVC(C=1.0, kernel='linear') elif k =='rbf': svm = SVC(C=1.0, kernel='rbf') svm.fit(X, y) return svm
def __init__(self, n_features=100, transform=True, classifier='lsvm', kernel='rbf', n_neighbors=5): self.n_features = n_features self.transform = transform self.clf_type = classifier if classifier == 'lsvm': self.clf = LinearSVC() elif classifier == 'svm': self.clf = SVC(kernel=kernel, probability=True) elif classifier == 'knn': self.clf = KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='brute', metric='cosine')
def predict_proba(self, x, y, normalize=False, flip=False): X = deepcopy(x) if self.transform: X = decompose(X, self.n_features, normalize, flip) if self.clf_type == 'svm': return self.clf.predict_proba(X) elif self.clf_type in ['lsvm', 'nbsvm']: return platt_scale(X, y, self.clf) elif self.clf_type == 'knn': return self.clf.predict_proba(X)
def trainSVMTK(docs, pairs, dditype, model="svm_tk_classifier.model", excludesentences=[]): if os.path.isfile("ddi_models/" + model): os.remove("ddi_models/" + model) if os.path.isfile("ddi_models/" + model + ".txt"): os.remove("ddi_models/" + model + ".txt") #docs = use_external_data(docs, excludesentences, dditype) xerrors = 0 with open("ddi_models/" + model + ".txt", 'w') as train: #print pairs for p in pairs: if dditype != "all" and pairs[p][relations.PAIR_DDI] and pairs[p][relations.PAIR_TYPE] != dditype: continue sid = relations.getSentenceID(p) if sid not in excludesentences: tree = pairs[p][relations.PAIR_DEP_TREE][:] #print "tree1:", tree #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20: #print line # line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|" # xerrors += 1 #else: line = get_svm_train_line(tree, pairs[p], sid, docs[sid][relations.SENTENCE_PAIRS][p]) if not pairs[p][relations.PAIR_DDI]: line = '-' + line elif pairs[p][relations.PAIR_TYPE] != dditype and dditype != "all": line = '-' + line train.write(line) #print "tree errors:", xerrors svmlightcall = Popen(["./svm-light-TK-1.2/svm-light-TK-1.2.1/svm_learn", "-t", "5", "-L", "0.4", "-T", "2", "-S", "2", "-g", "10", "-D", "0", "-C", "T", basedir + model + ".txt", basedir + model], stdout = PIPE, stderr = PIPE) res = svmlightcall.communicate() if not os.path.isfile("ddi_models/" + model): print "failed training model " + basedir + model print res sys.exit()
def train(method='svm', savePath=None, choice='basic'): if not savePath: savePath = os.path.abspath('./support/clf/' + method + '.pk') else: savePath = os.path.abspath(savePath) trainer = trainerFactory(method, choice, savePath) if not trainer: print('No such method to train for now') return trainer.train() trainer.save() print('Train Process Complete')
def train(self, training_data, config, **kwargs): # type: (TrainingData, RasaNLUConfig, **Any) -> None """Train the intent classifier on a data set. :param num_threads: number of threads used during training time""" from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC import numpy as np labels = [e.get("intent") for e in training_data.intent_examples] if len(set(labels)) < 2: logger.warn("Can not train an intent classifier. Need at least 2 different classes. " + "Skipping training of intent classifier.") else: y = self.transform_labels_str2num(labels) X = np.stack([example.get("text_features") for example in training_data.intent_examples]) sklearn_config = config.get("intent_classifier_sklearn") C = sklearn_config.get("C", [1, 2, 5, 10, 20, 100]) kernel = sklearn_config.get("kernel", "linear") # dirty str fix because sklearn is expecting str not instance of basestr... tuned_parameters = [{"C": C, "kernel": [str(kernel)]}] cv_splits = max(2, min(MAX_CV_FOLDS, np.min(np.bincount(y)) // 5)) # aim for 5 examples in each fold self.clf = GridSearchCV(SVC(C=1, probability=True, class_weight='balanced'), param_grid=tuned_parameters, n_jobs=config["num_threads"], cv=cv_splits, scoring='f1_weighted', verbose=1) self.clf.fit(X, y)
def testSVMTK(sentence, pairs, pairs_list, model="svm_tk_classifier.model", tag=""): if os.path.isfile(basedir + tag + "svm_test_data.txt"): os.remove(basedir + tag + "svm_test_data.txt") if os.path.isfile(basedir + tag + "svm_test_output.txt"): os.remove(basedir + tag + "svm_test_output.txt") #docs = use_external_data(docs, excludesentences, dditype) #pidlist = pairs.keys() total = 0 with open(temp_dir + tag + "svm_test_data.txt", 'w') as test: for pid in pairs: sid = pairs[pid].sid tree = sentence.parsetree #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 30: #print line #line = reparse_tree(line) # line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|\n" # xerrors += 1 #else: line = get_svm_train_line(tree, pairs[pid], sid) line = '-' + line test.write(line) total += 1 #print "tree errors:", xerrors, "total:", total svmtklightargs = ["./bin/svm-light-TK-1.2/svm-light-TK-1.2.1/svm_classify", temp_dir + tag + "svm_test_data.txt", basedir + model, temp_dir + tag + "svm_test_output.txt"] svmlightcall = Popen(svmtklightargs, stdout=PIPE, stderr=PIPE) res = svmlightcall.communicate() # logging.debug(res[0].split('\n')[-3:]) #os.system(' '.join(svmtklightargs)) if not os.path.isfile(temp_dir + tag + "svm_test_output.txt"): print "something went wrong with SVM-light-TK" print res sys.exit() with open(temp_dir + tag + "svm_test_output.txt", 'r') as out: lines = out.readlines() if len(lines) != len(pairs_list): print "check " + tag + "svm_test_output.txt! something is wrong" print res sys.exit() for p, pid in enumerate(pairs): score = float(lines[p]) if float(score) < 0: pairs[pid].recognized_by[relations.SST_PRED] = -1 else: pairs[pid].recognized_by[relations.SST_PRED] = 1 logging.info("{} - {} SST: {}".format(pairs[pid].entities[0], pairs[pid].entities[0], score)) return pairs
def main(): parser = optparse.OptionParser("[!] usage: python cross_validate_SVM.py -F <data file>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") (options, args) = parser.parse_args() dataFile = options.dataFile # TODO: remove only for testing if False: cfg = config.Config() data_path = cfg.paths['data'] data_file_standard = cfg.paths['data_file_standard'] dataFile = data_path + data_file_standard if dataFile == None: print(parser.usage) exit(0) data = sio.loadmat(dataFile) X = data["X"] m,n = np.shape(X) y = np.squeeze(data["y"]) kernel_grid = ["rbf"] C_grid = [5] gamma_grid = [1] kf = KFold(m, n_folds=5) fold = 1 for kernel in kernel_grid: for C in C_grid: for gamma in gamma_grid: fold=1 FoMs = [] for train, test in kf: print("[*]", fold, kernel, C, gamma) file = data_path + "classifiers/cv/SVM_kernel"+str(kernel)+"_C"+str(C)+\ "_gamma"+str(gamma)+"_"+dataFile.split("/")[-1].split(".")[0]+\ "_fold"+str(fold)+".pkl" try: svm = pickle.load(open(file,"rb")) except IOError: train_x, train_y = X[train], y[train] svm = train_SVM(train_x, train_y, kernel, C, gamma) outputFile = open(file, "wb") pickle.dump(svm, outputFile) FoM, threshold = measure_FoM(X[test], y[test], svm, False) fold+=1 FoMs.append(FoM) print("[+] mean FoM: %.3lf" % (np.mean(np.array(FoMs)))) print()
def main(): parser = optparse.OptionParser("[!] usage: python cross_validate_SVM.py -F <data file>") parser.add_option("-F", dest="dataFile", type="string", \ help="specify data file to analyse") (options, args) = parser.parse_args() dataFile = options.dataFile if dataFile == None: print parser.usage exit(0) data = sio.loadmat(dataFile) X = data["X"] m,n = np.shape(X) y = np.squeeze(data["y"]) kernel_grid = ["rbf"] C_grid = [5] gamma_grid = [1] kf = KFold(m, n_folds=5) fold = 1 for kernel in kernel_grid: for C in C_grid: for gamma in gamma_grid: fold=1 FoMs = [] for train, test in kf: print "[*]", fold, kernel, C, gamma file = "cv/SVM_kernel"+str(kernel)+"_C"+str(C)+\ "_gamma"+str(gamma)+"_"+dataFile.split("/")[-1].split(".")[0]+\ "_fold"+str(fold)+".pkl" try: svm = pickle.load(open(file,"rb")) except IOError: train_x, train_y = X[train], y[train] svm = train_SVM(train_x, train_y, kernel, C, gamma) outputFile = open(file, "wb") pickle.dump(svm, outputFile) FoM, threshold = measure_FoM(X[test], y[test], svm, False) fold+=1 FoMs.append(FoM) print "[+] mean FoM: %.3lf" % (np.mean(np.array(FoMs))) print
def train_svm_classifer(features, labels, model_output_path): """ train_svm_classifer will train a SVM, saved the trained and SVM model and report the classification performance features: 2D array of each input feature for each sample labels: array of string labels classifying each sample model_output_path: path for storing the trained svm model """ # save 20% of data for performance evaluation X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2) param = [ { "kernel": ["linear"], "C": [1, 10, 100, 1000] }, { "kernel": ["rbf"], "C": [1, 10, 100, 1000], "gamma": [1e-2, 1e-3, 1e-4, 1e-5] } ] # request probability estimation svm = SVC(probability=True) # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel clf = grid_search.GridSearchCV(svm, param, cv=10, n_jobs=20, verbose=3) clf.fit(X_train, y_train) if os.path.exists(model_output_path): joblib.dump(clf.best_estimator_, model_output_path) else: print("Cannot save trained svm model to {0}.".format(model_output_path)) print("\nBest parameters set:") print(clf.best_params_) y_predict=clf.predict(X_test) labels=sorted(list(set(labels))) print("\nConfusion matrix:") print("Labels: {0}\n".format(",".join(labels))) print(confusion_matrix(y_test, y_predict, labels=labels)) print("\nClassification report:") print(classification_report(y_test, y_predict))