public void testDelim() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName()); args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
public void index() { try { Directory dir = FSDirectory.open(new File("D:/data/indices/sandbox")); Analyzer analyzer = new MyPayloadAnalyzer(new FloatEncoder()); IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); iwconfig.setSimilarity(new MyPayloadSimilarity()); iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // load mappings and classifiers HashMap<String, String> mappings = this.loadDataMappings(); HashMap<String, HashMap> cMaps = this.loadData(); IndexWriter writer = new IndexWriter(dir, iwconfig); indexDocuments(writer, mappings, cMaps); writer.close(); } catch (IOException e) { System.out.println("Exception while indexing: " + e.getMessage()); } }
public void index() { try { logger.info("Index will be written to: " + configManager.getProperty(ConfigConstants.CONCEPT_INDEX_FOLDER)); Directory dir = FSDirectory.open(new File(configManager.getProperty(ConfigConstants.CONCEPT_INDEX_FOLDER))); Analyzer analyzer = new BilkentDemoPayloadAnalyzer(new FloatEncoder()); IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); iwconfig.setSimilarity(new BilkentDemoPayloadSimilarity()); iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // load mappings and classifiers HashMap<String, String> mappings = this.loadDataMappings(); HashMap<String, HashMap> cMaps = this.loadData(); IndexWriter writer = new IndexWriter(dir, iwconfig); indexDocuments(writer, mappings, cMaps); writer.commit(); writer.close(); } catch (IOException e) { System.out.println("Exception while indexing: " + e.getMessage()); } }
/** * Indexes all concepts contained in a collection */ public void index() { // true creates a new index / false updates the existing index boolean create = false; // check if data directory exists //logger.debug("content Dir = " + this.contentFolder); final File contentFolderDir = new File(this.contentFolder); if (!contentFolderDir.exists() || !contentFolderDir.canRead()) { logger.error("Document directory '" + contentFolderDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } // to calculate indexing time as a performance measure Date start = new Date(); try { //logger.debug("Indexing concepts to directory '" + this.indexFolder + "'..."); Directory dir = FSDirectory.open(new File(this.indexFolder)); // defines analyzers based on field types; e.g. concept fields use the PayloadAnalyzer //Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); //analyzerPerField.put("concept", new PayloadAnalyzer(new FloatEncoder())); //PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( // new StandardAnalyzer(Version.LUCENE_4_10_1), analyzerPerField); Analyzer analyzer = new PayloadAnalyzer(new FloatEncoder()); IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); iwconfig.setSimilarity(new PayloadSimilarity()); if (create) { // Create new index, remove previous index //logger.debug("Creating a new concept index in directory: '" + this.indexFolder + "'..."); iwconfig.setOpenMode(OpenMode.CREATE); } else { // Add new documents to existing index //logger.debug("Updating the concept index in directory: '" + this.indexFolder + "'..."); iwconfig.setOpenMode(OpenMode.CREATE_OR_APPEND); } // load mappings HashMap<String, String> mappings = this.loadMappings(); // load concept classifiers HashMap<String, HashMap> conceptMaps = this.loadConceptFiles(contentFolderDir); // test //logger.info("lookup test: blue->1: " + conceptMaps.get("blue").get("1")); // index IndexWriter writer = new IndexWriter(dir, iwconfig); indexDocuments(writer, mappings, conceptMaps); writer.close(); // time stamping Date end = new Date(); logger.debug("Indexing time: " + (end.getTime() - start.getTime()) + " total milliseconds"); } catch (IOException e) { logger.error("Exception: " + e.getMessage()); } }