public static List<Cell> removeStopwords_list (List<Cell> l){ String temp = new String(); Cell c; //int prev_size = l.size(); //int new_size; Stopwords checker = new Stopwords(); for(int i=0; i< l.size(); i++){ c = l.get(i); temp = c.getText(); if(checker.is(temp)){ l.remove(i); i--; ////tln("Removed stopword: " + temp); }else{ ////tln("## Didnt Remove stopword: " + temp); } } //new_size = l.size(); return l; }
public static List<Cell> readFile (String path, String classe, String original_file) { Stopwords checker = new Stopwords(); List<Cell> wBag = new ArrayList<Cell>(); Scanner sc2 = null; Cell s; int counter = 0; File f = new File(path); if(f.isDirectory())return null; try { sc2 = new Scanner(f); } catch (FileNotFoundException e) { //tln("erro"); e.printStackTrace(); } while (sc2.hasNextLine()) { Scanner s2 = new Scanner(sc2.nextLine()); while (s2.hasNext()) { s = new Cell(); /* tratar espaços */ s.setText((s2.next().toLowerCase()).replaceAll("[^A-Za-z]", "")); s.setClasse(classe); s.setOriginal_file(original_file); if((s.getText() != "") && !checker.is(s.getText()) ) wBag.add(s); else counter++; } s2.close(); } sc2.close(); return wBag; }
public static List<String> defaultStopWords() { Stopwords stopwords = new Stopwords(); List<String> words = new ArrayList<>(100); Enumeration elements = stopwords.elements(); while ( elements.hasMoreElements() ) { words.add( (String) elements.nextElement() ); } return words; }
protected boolean isStopword(T info) { return Stopwords.isStopword(InfoGetFields.getLemma(info)); }