@OptionMetadata(displayName = "stemmer", description = "The stemming algorithm to use on the words. Default: no stemming.", commandLineParamName = "stemmer", commandLineParamSynopsis = "-stemmer <string>", displayOrder = 4) public Stemmer getStemmer() { return m_stemmer; }
@OptionMetadata(displayName = "stemmer", description = "The stemming algorithm to use on the words from the lexicon. It is recommended to use the same stemmer used with the main filter." + " Default: no stemming.", commandLineParamName = "lex-stemmer", commandLineParamSynopsis = "-lex-stemmer <string>", displayOrder = 4) public Stemmer getStemmer() { return m_stemmer; }
/** * Tokenizes a String * @param content the content * @param toLowerCase true for lowercasing the content * @param standarizeUrlsUsers true for standarizing urls and users * @param reduceRepeatedLetters true for reduing repeated letters * @param tokenizer the tokenizer * @param stemmer the stemmer * @param stop the stopwords handler * @return a list of tokens */ static public List<String> tokenize(String content, boolean toLowerCase, boolean standarizeUrlsUsers, boolean reduceRepeatedLetters, Tokenizer tokenizer, Stemmer stemmer, StopwordsHandler stop) { if (toLowerCase) content = content.toLowerCase(); // if a letters appears two or more times it is replaced by only two // occurrences of it if (reduceRepeatedLetters) content = content.replaceAll("([a-z])\\1+", "$1$1"); List<String> tokens = new ArrayList<String>(); tokenizer.tokenize(content); for(;tokenizer.hasMoreElements();){ String token=tokenizer.nextElement(); if(!stop.isStopword(token)){ if (standarizeUrlsUsers) { // Replace URLs to a generic URL if (token.matches("http.*|ww\\..*|www\\..*")) { token="http://www.url.com"; } // Replaces user mentions to a generic user else if (token.matches("@.*")) { token="@user"; } } tokens.add(stemmer.stem(token)); } } return tokens; }
@OptionMetadata( displayName = "stemmer", description = "The Weka stemmer to use.", commandLineParamName = "stemmer", commandLineParamSynopsis = "-stemmer <String>", displayOrder = 0 ) public Stemmer getStemmer() { return stemmer; }
public void setStemmer(Stemmer m_stemmer) { this.m_stemmer = m_stemmer; }
public void setStemmer(Stemmer stemmer) { this.stemmer = stemmer; }
/** * Returns the current stemming algorithm, null if none is used. * * @return the current stemming algorithm, null if none set */ public Stemmer getStemmer() { return m_stemmer; }
/** * Returns the current stemming algorithm, null if none is used. * * @return the current stemming algorithm, null if none set */ public Stemmer getStemmer() { return m_Stemmer; }