Java 类weka.core.stemmers.Stemmer 实例源码

项目:AffectiveTweets    文件:TweetToFeatureVector.java   
@OptionMetadata(displayName = "stemmer",
        description = "The stemming algorithm to use on the words. Default: no stemming.",
        commandLineParamName = "stemmer",
        commandLineParamSynopsis = "-stemmer <string>", displayOrder = 4)   
public Stemmer getStemmer() {
    return m_stemmer;
}
项目:AffectiveTweets    文件:ArffLexiconWordLabeller.java   
@OptionMetadata(displayName = "stemmer",
        description = "The stemming algorithm to use on the words from the lexicon. It is recommended to use the same stemmer used with the main filter."
                + " Default: no stemming.",
        commandLineParamName = "lex-stemmer",
        commandLineParamSynopsis = "-lex-stemmer <string>", displayOrder = 4)   
public Stemmer getStemmer() {
    return m_stemmer;
}
项目:AffectiveTweets    文件:ArffLexiconEvaluator.java   
@OptionMetadata(displayName = "stemmer",
        description = "The stemming algorithm to use on the words from the lexicon. It is recommended to use the same stemmer used with the main filter."
                + " Default: no stemming.",
        commandLineParamName = "lex-stemmer",
        commandLineParamSynopsis = "-lex-stemmer <string>", displayOrder = 4)   
public Stemmer getStemmer() {
    return m_stemmer;
}
项目:AffectiveTweets    文件:Utils.java   
/**
 * Tokenizes a String
 * @param content the content
 * @param toLowerCase true for lowercasing the content
 * @param standarizeUrlsUsers true for standarizing urls and users
 * @param reduceRepeatedLetters true for reduing repeated letters
 * @param tokenizer the tokenizer
 * @param stemmer the stemmer
 * @param stop the stopwords handler
 * @return a list of tokens
 */
static public List<String> tokenize(String content, boolean toLowerCase, boolean standarizeUrlsUsers, boolean reduceRepeatedLetters, Tokenizer tokenizer, Stemmer stemmer, StopwordsHandler stop) {

    if (toLowerCase)
        content = content.toLowerCase();

    // if a letters appears two or more times it is replaced by only two
    // occurrences of it
    if (reduceRepeatedLetters)
        content = content.replaceAll("([a-z])\\1+", "$1$1");


    List<String> tokens = new ArrayList<String>();

    tokenizer.tokenize(content);
    for(;tokenizer.hasMoreElements();){
        String token=tokenizer.nextElement();
        if(!stop.isStopword(token)){

            if (standarizeUrlsUsers) {
                // Replace URLs to a generic URL
                if (token.matches("http.*|ww\\..*|www\\..*")) {
                    token="http://www.url.com";
                }
                // Replaces user mentions to a generic user
                else if (token.matches("@.*")) {
                    token="@user";
                }

            }               

            tokens.add(stemmer.stem(token));
        }
    }

    return tokens;

}
项目:wekaDeeplearning4j    文件:StemmingPreprocessor.java   
@OptionMetadata(
  displayName = "stemmer",
  description = "The Weka stemmer to use.",
  commandLineParamName = "stemmer",
  commandLineParamSynopsis = "-stemmer <String>",
  displayOrder = 0
)
public Stemmer getStemmer() {
  return stemmer;
}
项目:AffectiveTweets    文件:TweetToFeatureVector.java   
public void setStemmer(Stemmer m_stemmer) {
    this.m_stemmer = m_stemmer;
}
项目:AffectiveTweets    文件:ArffLexiconWordLabeller.java   
public void setStemmer(Stemmer m_stemmer) {
    this.m_stemmer = m_stemmer;
}
项目:AffectiveTweets    文件:ArffLexiconEvaluator.java   
public void setStemmer(Stemmer m_stemmer) {
    this.m_stemmer = m_stemmer;
}
项目:wekaDeeplearning4j    文件:StemmingPreprocessor.java   
public void setStemmer(Stemmer stemmer) {
  this.stemmer = stemmer;
}
项目:repo.kmeanspp.silhouette_score    文件:NaiveBayesMultinomialText.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 *
 * @return the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_stemmer;
}
项目:repo.kmeanspp.silhouette_score    文件:SGDText.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 * 
 * @return the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_stemmer;
}
项目:repo.kmeanspp.silhouette_score    文件:StringToWordVector.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 * 
 * @return the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_Stemmer;
}
项目:autoweka    文件:NaiveBayesMultinomialText.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 *
 * @return          the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_stemmer;
}
项目:autoweka    文件:SGDText.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 *
 * @return          the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_stemmer;
}
项目:autoweka    文件:StringToWordVector.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 *
 * @return          the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_Stemmer;
}
项目:umple    文件:NaiveBayesMultinomialText.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 * 
 * @return the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_stemmer;
}
项目:umple    文件:SGDText.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 * 
 * @return the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_stemmer;
}
项目:umple    文件:StringToWordVector.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 * 
 * @return the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_Stemmer;
}
项目:jbossBA    文件:StringToWordVector.java   
/**
 * Returns the current stemming algorithm, null if none is used.
 *
 * @return          the current stemming algorithm, null if none set
 */
public Stemmer getStemmer() {
  return m_Stemmer;
}