我们从Python开源项目中,提取了以下1个代码示例,用于说明如何使用sklearn.feature_extraction()。
def rand_forest_train(self): # ?????????? users = pd.read_csv('names.csv') # ??similarity?platform?reputation?entropy???????????? X = users[['similarity', 'platform', 'reputation', 'entropy']] y = users['human_or_machine'] # ?????????? 25%??????? from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33) # ???????????????? from sklearn.feature_extraction import DictVectorizer vec = DictVectorizer(sparse=False) X_train = vec.fit_transform(X_train.to_dict(orient='record')) X_test = vec.transform(X_test.to_dict(orient='record')) # ????????????????????? from sklearn.tree import DecisionTreeClassifier dtc = DecisionTreeClassifier() dtc.fit(X_train, y_train) dtc_y_pred = dtc.predict(X_test) # ??????????????????????? from sklearn.ensemble import RandomForestClassifier rfc = RandomForestClassifier() rfc.fit(X_train, y_train) rfc_y_pred = rfc.predict(X_test) # ??????????????????????? from sklearn.ensemble import GradientBoostingClassifier gbc = GradientBoostingClassifier() gbc.fit(X_train, y_train) gbc_y_pred = gbc.predict(X_test) from sklearn.metrics import classification_report # ??????????????????? ?????????? ??? F1?? print("??????????", dtc.score(X_test, y_test)) print(classification_report(dtc_y_pred, y_test)) # ??????????????????????????????? ??? F1?? print("????????????", rfc.score(X_test, y_test)) print(classification_report(rfc_y_pred, y_test)) # ??????????????????????????????? ??? F1?? print("????????????", gbc.score(X_test, y_test)) print(classification_report(gbc_y_pred, y_test)) users = pd.read_csv('values.csv') # ?????????? X = users[['similarity', 'platform', 'reputation', 'entropy']] X = vec.transform(X.to_dict(orient='record')) print(rfc.predict(X)) self.dtc = dtc self.rfc = rfc self.gbc = gbc