public LinkedList<String> makeExtractList(String filePath) { String content; LinkedList<String> extractList = new LinkedList<String>(); File file = new File(filePath); try { Scanner sc = new Scanner(new FileInputStream(file)); while (sc.hasNextLine()) { content = sc.nextLine(); SnowballStemmer porter = new SnowballStemmer(); content = porter.stem(content); System.out.println(content); extractList.add(content); } sc.close(); } catch (FileNotFoundException fnf) { fnf.printStackTrace(); } catch (Exception e) { e.printStackTrace(); System.out.println("\nProgram terminated Safely..."); } return extractList; }
@Test public void testConfigRotation() throws Exception { Map<String, String> failedConfigs = new HashMap<>(); tii = new RnnTextEmbeddingInstanceIterator(); tii.setWordVectorLocation(modelSlim); data = DatasetLoader.loadAnger(); // Reduce datasize RemovePercentage rp = new RemovePercentage(); rp.setPercentage(98); rp.setInputFormat(data); data = Filter.useFilter(data, rp); RnnOutputLayer out = new RnnOutputLayer(); out.setLossFn(new LossMSE()); out.setActivationFunction(new ActivationIdentity()); final Dl4jWordsFromFile wff = new Dl4jWordsFromFile(); wff.setStopwords(new File("src/test/resources/stopwords/english.txt")); // Iterate stopwords for (Dl4jAbstractStopwords sw : new Dl4jAbstractStopwords[] {new Dl4jRainbow(), new Dl4jNull(), wff}) { tii.setStopwords(sw); final StemmingPreprocessor spp = new StemmingPreprocessor(); spp.setStemmer(new SnowballStemmer()); // Iterate TokenPreProcess for (TokenPreProcess tpp : new TokenPreProcess[] { new CommonPreprocessor(), new EndingPreProcessor(), new LowCasePreProcessor(), spp }) { tii.setTokenPreProcess(tpp); // Iterate tokenizer faktory for (TokenizerFactory tf : new TokenizerFactory[] { new DefaultTokenizerFactory(), new CharacterNGramTokenizerFactory(), new TweetNLPTokenizerFactory(), }) { tii.setTokenizerFactory(tf); // Create clean classifier clf = new RnnSequenceClassifier(); clf.setNumEpochs(1); clf.setLayers(out); clf.setInstanceIterator(tii); clf.settBPTTforwardLength(3); clf.settBPTTbackwardLength(3); String conf = "\n - TokenPreProcess: " + tpp.getClass().getSimpleName() + "\n - TokenizerFactory: " + tf.getClass().getSimpleName() + "\n - StopWords: " + sw.getClass().getSimpleName(); log.info(conf); try { clf.buildClassifier(data); } catch (Exception e) { failedConfigs.put(conf, e.toString()); } } } } // Check if anything failed if (!failedConfigs.isEmpty()) { final String err = failedConfigs .keySet() .stream() .map(s -> "Config failed: " + s + "\nException: " + failedConfigs.get(s)) .collect(Collectors.joining("\n")); Assert.fail("Some of the configs failed:\n" + err); } }