/** * Test the WordScorer emitted by the smoothing model */ public void testBuildWordScorer() throws IOException { SmoothingModel testModel = createTestModel(); Map<String, Analyzer> mapping = new HashMap<>(); mapping.put("field", new WhitespaceAnalyzer()); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping); IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper)); Document doc = new Document(); doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); DirectoryReader ir = DirectoryReader.open(writer); WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d, BytesRefs.toBytesRef(" ")); assertWordScorer(wordScorer, testModel); }
/** * Constructs a new KrillIndex bound to a persistant index. * * @param directory * A {@link Directory} pointing to an index * @throws IOException */ public KrillIndex (Directory directory) throws IOException { this.directory = directory; // Add analyzers // TODO: Should probably not be here - make configurable Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("textClass", new KeywordAnalyzer()); analyzerPerField.put("keywords", new KeywordAnalyzer()); analyzerPerField.put("foundries", new KeywordAnalyzer()); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new TextAnalyzer(), analyzerPerField); // Create configuration with base analyzer this.config = new IndexWriterConfig(analyzer); }
@Test public void failureToCreateAnIndexShouldNotLeaveConfigurationBehind() throws Exception { // WHEN try { // PerFieldAnalyzerWrapper is invalid since it has no public no-arg constructor nodeIndex( stringMap( "analyzer", PerFieldAnalyzerWrapper.class.getName() ) ); fail( "Should have failed" ); } catch ( RuntimeException e ) { assertThat( e.getMessage(), CoreMatchers.containsString( PerFieldAnalyzerWrapper.class.getName() ) ); } // THEN - assert that there's no index config about this index left behind assertFalse( "There should be no index config for index '" + currentIndexName() + "' left behind", ((GraphDatabaseAPI)graphDb).getDependencyResolver().resolveDependency( IndexConfigStore.class ).has( Node.class, currentIndexName() ) ); }
private void doAddOrUpdateDocument(final KBEnrichmentRequest request, final KBModifications mod) { final HashMap<String, String> hash = new HashMap<String, String>(); final List<DocumentToProcess> docsToProcess = request.getDocList(); final DocumentToProcess doc = docsToProcess.get(0); final List<EntryToProcess> list = doc.getEntryList(); for (final EntryToProcess pro : list) { hash.put(pro.getFieldName(), pro.getValue()); } Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("Mainlink", new DoserIDAnalyzer()); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new DoserStandardAnalyzer(), analyzerPerField); final NewDocumentOrUpdateOperator operator = new NewDocumentOrUpdateOperator( request.getKburi(), aWrapper, doc.getKey(), hash, request.getPrimaryKeyField(), mod); try { KnowledgebaseModification.getInstance() .processNewKnowledgeOperation(operator); } catch (final ModifyKnowledgeBaseException e) { Logger.getRootLogger().error(e.getStackTrace()); } }
public static Analyzer getKEAAnalyzer(String fieldName){ Map<String, Analyzer> amap = new HashMap<>(); amap.put(Commons.getFieldName(fieldName, 1), new KEAAnalyzer(1)); amap.put(Commons.getFieldName(fieldName, 2), new KEAAnalyzer(2)); amap.put(Commons.getFieldName(fieldName, 3), new KEAAnalyzer(3)); return new PerFieldAnalyzerWrapper(new StandardAnalyzer(), amap); }
private void doAddDocument(final KBEnrichmentRequest request) { final List<HashMap<String, String>> list = new LinkedList<HashMap<String, String>>(); final List<DocumentToProcess> process = request.getDocList(); for (final DocumentToProcess doc : process) { final HashMap<String, String> hash = new HashMap<String, String>(); final List<EntryToProcess> entrylist = doc.getEntryList(); for (final EntryToProcess entry : entrylist) { hash.put(entry.getFieldName(), entry.getValue()); } list.add(hash); } Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("Mainlink", new DoserIDAnalyzer()); analyzerPerField.put("ID", new DoserIDAnalyzer()); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new DoserStandardAnalyzer(), analyzerPerField); final AddNewDocumentsOperator operator = new AddNewDocumentsOperator( request.getKburi(), aWrapper, list, request.getPrimaryKeyField()); try { KnowledgebaseModification.getInstance() .processNewKnowledgeOperation(operator); } catch (final ModifyKnowledgeBaseException e) { Logger.getRootLogger().error("ModifyKnowledgeBaseException", e); } }
private void doUpdateDocument(final KBEnrichmentRequest request, final KBModifications mod) { final HashMap<String, HashMap<String, String>> hash = new HashMap<String, HashMap<String, String>>(); final List<DocumentToProcess> docs = request.getDocList(); for (final DocumentToProcess doc : docs) { final HashMap<String, String> map = new HashMap<String, String>(); final List<EntryToProcess> entries = doc.getEntryList(); for (final EntryToProcess entry : entries) { map.put(entry.getFieldName(), entry.getValue()); } hash.put(doc.getKey(), map); } Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("Mainlink", new DoserIDAnalyzer()); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new DoserStandardAnalyzer(), analyzerPerField); final UpdateKnowledgeBaseEntryOperator operator = new UpdateKnowledgeBaseEntryOperator( request.getKburi(), aWrapper, hash, request.getPrimaryKeyField(), mod); try { KnowledgebaseModification.getInstance() .processNewKnowledgeOperation(operator); } catch (final ModifyKnowledgeBaseException e) { Logger.getRootLogger().error(e.getStackTrace()); } }
public CAnalyzer(Version version) { matchVersion = version; Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); //for option name analyzerPerField.put("op_name", new OptionNameAnalyzer(matchVersion)); //for annotated option description analyzerPerField.put("op_desc", new EnglishAnalyzer(matchVersion)); analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(matchVersion), analyzerPerField); }
@SuppressWarnings("resource") private static Analyzer createAnalyzer() { final Analyzer colorAnnotatorAnalyzer = new ColorAnnotatorAnalyzer(); final Analyzer animalAnnotatorAnalyzer = new AnimalAnnotatorAnalyzer(); final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(); return new PerFieldAnalyzerWrapper(defaultAnalyzer, ImmutableMap.<String, Analyzer> of( COLOR_FIELD, colorAnnotatorAnalyzer, ANIMAL_FIELD, animalAnnotatorAnalyzer)); }
public VocabularyIndexAnalyzer() throws IOException, URISyntaxException { super(NO_REUSE_STRATEGY); Map<String, Analyzer> fieldAnalyzers = new HashMap<>(); fieldAnalyzers.put(NodeProperties.LABEL, new TermAnalyzer()); fieldAnalyzers.put(NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION, new TermAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers); }
public VocabularyQueryAnalyzer() { Map<String, Analyzer> fieldAnalyzers = new HashMap<>(); fieldAnalyzers.put(NodeProperties.LABEL, new TermAnalyzer()); fieldAnalyzers.put(NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION, new TermAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers); }
@BeforeClass public static void beforeClass() { englishAnalyzer = new EnglishAnalyzer(); spanishAnalyzer = new SpanishAnalyzer(); Map<String, Analyzer> analyzers = new HashMap<>(); analyzers.put("english", englishAnalyzer); analyzers.put("spanish", spanishAnalyzer); perFieldAnalyzer = new PerFieldAnalyzerWrapper(spanishAnalyzer, analyzers); }
@SuppressWarnings("resource") private Analyzer createAnalyzer() { Map<String, Analyzer> analyzerPerField = new HashMap<>(); Analyzer defaultAnalyzer = new KeywordAnalyzer(); analyzerPerField.put(FIELD_NAME, new MoveTextAnalyzer()); return new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField); }
public void createIndex(List<File> files, String idxDirectory, String baseURI) { try { urlAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); literalAnalyzer = new LiteralAnalyzer(LUCENE_VERSION); Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); mapping.put(TripleIndex.FIELD_NAME_SUBJECT, urlAnalyzer); mapping.put(TripleIndex.FIELD_NAME_PREDICATE, urlAnalyzer); mapping.put(TripleIndex.FIELD_NAME_OBJECT_URI, urlAnalyzer); mapping.put(TripleIndex.FIELD_NAME_OBJECT_LITERAL, literalAnalyzer); PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(urlAnalyzer, mapping); File indexDirectory = new File(idxDirectory); indexDirectory.mkdir(); directory = new MMapDirectory(indexDirectory); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, perFieldAnalyzer); iwriter = new IndexWriter(directory, config); iwriter.commit(); for (File file : files) { String type = FileUtil.getFileExtension(file.getName()); if (type.equals(TTL)) indexTTLFile(file, baseURI); if (type.equals(TSV)) indexTSVFile(file); iwriter.commit(); } iwriter.close(); ireader = DirectoryReader.open(directory); } catch (Exception e) { log.error("Error while creating TripleIndex.", e); } }
public void createIndex(List<File> files, String idxDirectory, String baseURI) { try { urlAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); literalAnalyzer = new LiteralAnalyzer(LUCENE_VERSION); Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); mapping.put(FIELD_NAME_URI, urlAnalyzer); mapping.put(FIELD_NAME_SURFACE_FORM, literalAnalyzer); mapping.put(FIELD_NAME_URI_COUNT, literalAnalyzer); mapping.put(FIELD_NAME_CONTEXT, literalAnalyzer); PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(urlAnalyzer, mapping); File indexDirectory = new File(idxDirectory); indexDirectory.mkdir(); directory = new MMapDirectory(indexDirectory); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, perFieldAnalyzer); iwriter = new IndexWriter(directory, config); iwriter.commit(); for (File file : files) { String type = FileUtil.getFileExtension(file.getName()); if (type.equals(TTL)) indexTTLFile(file, baseURI); iwriter.commit(); } } catch (Exception e) { log.error("Error while creating TripleIndex.", e); } }
/** * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#getAnalyzer()}. */ @Test public final void testGetAnalyzer() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory); target.initializeIndex(); assertTrue(target.getAnalyzer() instanceof PerFieldAnalyzerWrapper); target.dispose(); }
public void addEvent (Event newEvent) throws IOException { Directory index = FSDirectory.open (Paths.get(indexDirectoryPath)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper (new StandardAnalyzer (), fieldAnalyzerLookup); IndexWriterConfig indexWriterConfig = new IndexWriterConfig (analyzer); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter (index, indexWriterConfig); Document d = eventToDocument(newEvent); indexWriter.addDocument(d); indexWriter.commit(); indexWriter.close(); index.close (); }
public void updateEvent (Event event) throws IOException { Directory index = FSDirectory.open (Paths.get(indexDirectoryPath)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper (new StandardAnalyzer (), fieldAnalyzerLookup); IndexWriterConfig indexWriterConfig = new IndexWriterConfig (analyzer); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter (index, indexWriterConfig); Document d = eventToDocument(event); indexWriter.updateDocument(new Term ("id", event.getId()), d); indexWriter.commit(); indexWriter.close(); index.close (); }
@NotNull private static Analyzer indexAnalyzer() { final Map<String, Analyzer> fieldAnalyzers = Maps.newHashMap(); fieldAnalyzers.put(DRUG_NAME_FIELD, concatenatingAnalyzer()); return new PerFieldAnalyzerWrapper(wordDelimiterAnalyzer(), fieldAnalyzers); }
/** * We assume that the initial indexing has been done and a set of reference objects has been * found and indexed in the separate fileList. However further documents were added and they * now need to get a ranked list of reference objects. So we (i) get all these new documents * missing the field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper).setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
public PerFieldAnalyzerWrapper getPerFieldAnalyzer() throws Exception { return lumongoAnalyzerFactory.getPerFieldAnalyzer(); }
protected Analyzer getAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(CaseInsensitiveKeywordAnalyzer.ANALYZER, analyzers); return analyzer; }
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which is semantically enriched * by a URI pointing to a SKOS concept "weapons". * * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label (altLabel) for the concept "weapons". * * @throws IOException */ @Test public void uriBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field( "description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(matchVersion, skosFile, ExpansionType.URI); /* Define different analyzers for different fields */ Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(matchVersion), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig( matchVersion, indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery query1 = new BooleanQuery(); query1.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD); query1.add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD); query1.add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(query1, 10); /* the document matches because "arms" is among the expanded terms */ Assert.assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query2 = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query2, 10); /* ... also returns the document as result */ Assert.assertEquals(1, results.totalHits); }
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field. * * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label for "weapons", the term which is contained * in the subject field. * * @throws IOException */ @Test public void labelBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field( "description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(matchVersion, skosFile, ExpansionType.LABEL); /* Define different analyzers for different fields */ Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(matchVersion), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig( matchVersion, indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery query1 = new BooleanQuery(); query1.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD); query1.add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD); query1.add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(query1, 10); /* the document matches because "arms" is among the expanded terms */ Assert.assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query2 = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query2, 10); /* ... also returns the document as result */ Assert.assertEquals(1, results.totalHits); }
static Analyzer getAnalyzer(Language language) { Map<String, Analyzer> analyzerMap = new HashMap<>(); analyzerMap.put(FIELD_NAME, new LanguageToolAnalyzer(new JLanguageTool(language), false)); analyzerMap.put(FIELD_NAME_LOWERCASE, new LanguageToolAnalyzer(new JLanguageTool(language), true)); return new PerFieldAnalyzerWrapper(new DoNotUseAnalyzer(), analyzerMap); }
public void addOrUpdateNote (Note note, String noteHtmlContents) throws IOException { Directory index = FSDirectory.open (Paths.get(indexDirectoryPath)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper (new StandardAnalyzer (), fieldAnalyzerLookup); IndexWriterConfig indexWriterConfig = new IndexWriterConfig (analyzer); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter (index, indexWriterConfig); Document d = noteToDocument(note, noteHtmlContents); if (!DirectoryReader.indexExists(index)) indexWriter.addDocument(d); else { IndexReader indexReader = DirectoryReader.open (index); IndexSearcher indexSearcher = new IndexSearcher (indexReader); TopDocs existingDocuments = indexSearcher.search(new TermQuery (new Term ("id", note.getId())), 1); if (existingDocuments.totalHits == 0) indexWriter.addDocument(d); else indexWriter.updateDocument(new Term ("id", note.getId()), d); } indexWriter.commit(); indexWriter.close(); index.close (); }
public List <NoteSearchResult> search (String query, boolean isCaseSensitive, boolean wholeWordsOnly, boolean isRegularExpression) throws IOException, ParseException { if (isRegularExpression) { query = "/" + query + "/"; } else if (wholeWordsOnly) { query = "\"" + query + "\""; } Directory index = FSDirectory.open (Paths.get(indexDirectoryPath)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper (new StandardAnalyzer (), fieldAnalyzerLookup); Query parsedQuery = new QueryParser(isCaseSensitive ? "title_cs" : "title", analyzer).parse (query); IndexReader indexReader = DirectoryReader.open (index); IndexSearcher indexSearcher = new IndexSearcher (indexReader); TopDocs documents = indexSearcher.search(parsedQuery, indexReader.numDocs() + 1); List <NoteSearchResult> searchResults = new ArrayList <NoteSearchResult>(); for (ScoreDoc hit : documents.scoreDocs) { Document d = indexSearcher.doc (hit.doc); NoteSearchResult searchResult = documentToNote(d); searchResults.add(searchResult); } indexReader.close(); index.close(); return searchResults; }
/** * Factory method to get a new writer for accessing the lucene index. * * @return The prepared index writer. * @throws IOException * An error when trying to init the index access. */ private IndexWriter createIndexWriter() throws IOException { PerFieldAnalyzerWrapper wrapper = createAnalzyerWrapper(); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, wrapper); IndexWriter indexWriter = new IndexWriter(directory, config); return indexWriter; }
/** * This method retrieves a wrapper that handles all index fields with a specific * {@link Analyzer}. * * @param stopWords * The stop-word list to be used by the {@link LuceneCodeAnalyzer}. * @param splitCamelCase * Specifies whether to split on case-change or not. * @param stemming * Option to use stemming or not. * @return A {@link AnalyzerWrapper}. */ private PerFieldAnalyzerWrapper createAnalzyerWrapper() { Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put(Indexer.INDEX_CONTENT, new LuceneCodeAnalyzer(stopWords, splitCamelCase, stemming, featureTermSet, featuredTermsOnly)); analyzerPerField.put(Indexer.INDEX_COMMENT, new StandardAnalyzer(LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new LuceneCodeAnalyzer(stopWords, splitCamelCase, stemming, featureTermSet, featuredTermsOnly), analyzerPerField); return aWrapper; }
public List <EventSearchResult> search (String query, boolean isCaseSensitive, boolean wholeWordsOnly, boolean isRegularExpression) throws IOException, ParseException { if (isRegularExpression) { query = "/" + query + "/"; } else if (wholeWordsOnly) { query = "\"" + query + "\""; } Directory index = FSDirectory.open (Paths.get(indexDirectoryPath)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper (new StandardAnalyzer (), fieldAnalyzerLookup); Query parsedQuery = new QueryParser(isCaseSensitive ? "text_cs" : "text", analyzer).parse (query); IndexReader indexReader = DirectoryReader.open (index); IndexSearcher indexSearcher = new IndexSearcher (indexReader); TopDocs documents = indexSearcher.search(parsedQuery, indexReader.numDocs() + 1); List <EventSearchResult> searchResults = new ArrayList <EventSearchResult>(); for (ScoreDoc hit : documents.scoreDocs) { Document d = indexSearcher.doc (hit.doc); EventSearchResult searchResult = new EventSearchResult (); searchResult.setId(d.get("id")); searchResult.setText(d.get("text_cs")); searchResult.setStartDate(d.get("original_start_date")); searchResults.add(searchResult); } indexReader.close(); index.close(); return searchResults; }
PerFieldAnalyzerWrapper getPerFieldAnalyzer() throws Exception;