private static List<String> getStopList(String path) { List<String> stopwords = new ArrayList<String>(); try { InputStream in = CatalanAnalyzer.class.getResourceAsStream(path); BufferedReader input = new BufferedReader( new InputStreamReader(in)); for(String line = input.readLine(); line != null; line = input.readLine()) { if (line.startsWith("#")) continue; stopwords.add(line); } input.close(); return stopwords; } catch(IOException e) { e.printStackTrace(); System.exit(1); return null; } }
public CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new CatalanAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, CatalanAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET) ); analyzer.setVersion(version); }
@Inject public CatalanAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettingsService.getSettings(), name, settings); analyzer = new CatalanAnalyzer(Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); analyzer.setVersion(version); }
@Override public CatalanAnalyzer get() { return this.analyzer; }
/** * Returns the default stopwords set used by Lucene language analyzer for the specified language. * * @param language The language for which the stopwords are. The supported languages are English, French, Spanish, * Portuguese, Italian, Romanian, German, Dutch, Swedish, Norwegian, Danish, Russian, Finnish, * Irish, Hungarian, Turkish, Armenian, Basque and Catalan. * @return The default stopwords set used by Lucene language analyzers. */ private static CharArraySet getDefaultStopwords(String language) { switch (language) { case "English": return EnglishAnalyzer.getDefaultStopSet(); case "French": return FrenchAnalyzer.getDefaultStopSet(); case "Spanish": return SpanishAnalyzer.getDefaultStopSet(); case "Portuguese": return PortugueseAnalyzer.getDefaultStopSet(); case "Italian": return ItalianAnalyzer.getDefaultStopSet(); case "Romanian": return RomanianAnalyzer.getDefaultStopSet(); case "German": return GermanAnalyzer.getDefaultStopSet(); case "Dutch": return DutchAnalyzer.getDefaultStopSet(); case "Swedish": return SwedishAnalyzer.getDefaultStopSet(); case "Norwegian": return NorwegianAnalyzer.getDefaultStopSet(); case "Danish": return DanishAnalyzer.getDefaultStopSet(); case "Russian": return RussianAnalyzer.getDefaultStopSet(); case "Finnish": return FinnishAnalyzer.getDefaultStopSet(); case "Irish": return IrishAnalyzer.getDefaultStopSet(); case "Hungarian": return HungarianAnalyzer.getDefaultStopSet(); case "Turkish": return SpanishAnalyzer.getDefaultStopSet(); case "Armenian": return SpanishAnalyzer.getDefaultStopSet(); case "Basque": return BasqueAnalyzer.getDefaultStopSet(); case "Catalan": return CatalanAnalyzer.getDefaultStopSet(); default: return CharArraySet.EMPTY_SET; } }