我们从Python开源项目中,提取了以下39个代码示例,用于说明如何使用nltk.classify()。
def demo_sent_subjectivity(text): """ Classify a single sentence as subjective or objective using a stored SentimentAnalyzer. :param text: a sentence whose subjectivity has to be classified. """ from nltk.classify import NaiveBayesClassifier from nltk.tokenize import regexp word_tokenizer = regexp.WhitespaceTokenizer() try: sentim_analyzer = load('sa_subjectivity.pickle') except LookupError: print('Cannot find the sentiment analyzer you want to load.') print('Training a new one using NaiveBayesClassifier.') sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True) # Tokenize and convert to lower case tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)] print(sentim_analyzer.classify(tokenized_text))
def train(self, *args, **kwargs): """Train the classifier with a labeled feature set and return the classifier. Takes the same arguments as the wrapped NLTK class. This method is implicitly called when calling ``classify`` or ``accuracy`` methods and is included only to allow passing in arguments to the ``train`` method of the wrapped NLTK class. .. versionadded:: 0.6.2 :rtype: A classifier """ try: self.classifier = self.nltk_class.train(self.train_features, *args, **kwargs) return self.classifier except AttributeError: raise ValueError("NLTKClassifier must have a nltk_class" " variable that is not None.")
def ment(text): try: doc = pickle.load(open("pickle/doc.pickle", "rb")) except: print("Pickles missing! ") print("Program will now constuct pickles, this may take some time.") trainClassifier().train() doc = pickle.load(open("pickle/doc.pickle", "rb")) wordFeat = pickle.load(open("pickle/wordFeat.pickle", "rb")) featSet = pickle.load(open("pickle/featSet.pickle", "rb")) ONB = pickle.load(open("pickle/ONB.pickle", "rb")) MNB = pickle.load(open("pickle/MNB.pickle", "rb")) BNB = pickle.load(open("pickle/BNB.pickle", "rb")) LR = pickle.load(open("pickle/LR.pickle", "rb")) LSVC = pickle.load(open("pickle/LSVC.pickle", "rb")) SGDC = pickle.load(open("pickle/SGDC.pickle", "rb")) vote = sent(ONB,MNB,BNB,LR,LSVC,SGDC) feats = sent().featureFind(text,wordFeat) out = (vote.conf(feats))*100 # out = str(out)+"%" return vote.classify(feats),out
def classify(self,features,threshold=0.8): business_keywords = ["business","wi","fi","wifi","internet","wireless"] clean_keywords = ['clean'] prob_dist = self.classifier.prob_classify(features) most_prob_label = prob_dist.max() if prob_dist.prob(most_prob_label) >= threshold: return most_prob_label elif any( bkeyword in features for bkeyword in business_keywords): # features contain keyword for business return common.AspectBusiness elif any ( ckeyword in features for ckeyword in clean_keywords): return common.AspectClean else: return common.AspectUnknown
def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) return mode(votes)
def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) conf = choice_votes / len(votes) return conf
def sentiment(text): feats = find_features(text) return voted_classifier.classify(feats),voted_classifier.confidence(feats)
def __init__(self, *classifiers): self._classifiers = classifiers #Creating our own classify method. #After iterating we return mode(votes), which just returns the most popular vote.
def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) return mode(votes) #Defining another parameter, confidence. #Since we have algorithms voting, we can tally the votes for and against the winning vote, and call this "confidence.
def is_positive(sentence): sentence_features = find_features(sentence, all_features) return 1 if classifier.classify(sentence_features) == "pos" else 0
def classify(self, text): """Classifies a string of text.""" raise NotImplementedError('Must implement a "classify" method.')
def classify(self, text): """Classifies the text. :param str text: A string of text. """ text_features = self.extract_features(text) return self.classifier.classify(text_features)
def accuracy(self, test_set, format=None): """Compute the accuracy on a test set. :param test_set: A list of tuples of the form ``(text, label)``, or a file pointer. :param format: If ``test_set`` is a filename, the file format, e.g. ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the file format. """ if is_filelike(test_set): test_data = self._read_data(test_set) else: # test_set is a list of tuples test_data = test_set test_features = [(self.extract_features(d), c) for d, c in test_data] return nltk.classify.accuracy(self.classifier, test_features)
def train(self, *args, **kwargs): """Train the classifier with a labeled and unlabeled feature sets and return the classifier. Takes the same arguments as the wrapped NLTK class. This method is implicitly called when calling ``classify`` or ``accuracy`` methods and is included only to allow passing in arguments to the ``train`` method of the wrapped NLTK class. :rtype: A classifier """ self.classifier = self.nltk_class.train(self.positive_features, self.unlabeled_features, self.positive_prob_prior) return self.classifier
def classify(self, features): self.votes = [] for self.i in self._classifiers: self.j = self.i.classify(features) self.votes.append(self.j) return mode(self.votes) # find the confidents of results # must be handed: # *featured words
def conf(self, features): self.votes = [] for self.i in self._classifiers: self.j = self.i.classify(features) self.votes.append(self.j) self.choice_votes = self.votes.count(mode(self.votes)) self.conf = self.choice_votes / len(self.votes) return self.conf # find the features of document # must be handed: # *document to find feature of # *word features
def DoClassify(CurClassifier, topicResultsTxt, topicTweetsLDATxt): counter = 0 topicSentiments = dict() topicResult = open(topicResultsTxt, 'w') with open(topicTweetsLDATxt) as topicFile: for line in topicFile: if counter != 100: tSentiment = CurClassifier.classify(extract_features(line.split())) if tSentiment in topicSentiments.keys(): topicSentiments[tSentiment] += 1 else: topicSentiments[tSentiment] = 1 counter += 1 else: majorSentiment = 'Dummy' topicSentiments[majorSentiment] = 1 for sentiKey in topicSentiments.keys(): if topicSentiments[majorSentiment] < topicSentiments[sentiKey]: majorSentiment = sentiKey topicResult.write(majorSentiment +'\n') topicSentiments.clear() counter = 0 topicResult.close() #Extracting the features of the tweet without term frequencies with the format as needed by the classifier
def classify(query, engine=engine, threshold=.85, limit=5): """spell out most probable diseases and respective percentages.""" words = preprocess(' '.join(query)) print('understanding {}...'.format(words)) objects = engine.prob_classify(words) keys = list(objects.samples()) samples = [tuple((key, objects.prob(key))) for key in keys] return feed_conversation(samples, limit, threshold)