Python nltk.stem 模块,LancasterStemmer() 实例源码

我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用nltk.stem.LancasterStemmer()

项目:100knock2017    作者:tmu-nlp    | 项目源码 | 文件源码
def preprocessor_data(data, ids, test=0):
    stopwords_set = set(stopwords.words('english'))
    stemmer = stem.LancasterStemmer()

    data_in_preprocessed = []
    labels = []

    for line in data:
        words_preprocessed = []
        line.lower()
        label, words = line.split()[0], line.split()[1:]
        labels.append(int(label))

        for word in words:
            if word in stopwords_set:
                continue
            lemmatized = stemmer.stem(word)
            if test == 0:
                ids[lemmatized]
            words_preprocessed.append(lemmatized)
        data_in_preprocessed.append(words_preprocessed)

    return data_in_preprocessed, labels
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def getFeature(word_list):
    stemmer = stem.LancasterStemmer()
    # stemmer2 = stem.PorterStemmer()
    feature = defaultdict(lambda: 0)
    for word in word_list:
        if not isStopWords(word):
            word_stem = stemmer.stem(word)
            feature[word_stem] += 1
    return dict(feature)
项目:fake_news    作者:bmassman    | 项目源码 | 文件源码
def stem_text(text):
    from nltk.stem import LancasterStemmer
    ls = LancasterStemmer()
    tokens = tokenize_text(text)
    filtered_tokens = [ls.stem(token) for token in tokens]
    filtered_text = ' '.join(filtered_tokens)
    return filtered_text
项目:100knock2017    作者:tmu-nlp    | 项目源码 | 文件源码
def preprocessor_words(words):
    stopwords_set = set(stopwords.words('english'))
    stemmer = stem.LancasterStemmer()

    words_preprocessed = []
    for word in words:
        if word in stopwords_set:
            continue
        lemmatized = stemmer.stem(word)
        words_preprocessed.append(lemmatized)

    return words_preprocessed