public void testAnalyzerAlias() throws IOException { Settings settings = Settings.builder() .put("index.analysis.analyzer.foobar.alias","default") .put("index.analysis.analyzer.foobar.type", "keyword") .put("index.analysis.analyzer.foobar_search.alias","default_search") .put("index.analysis.analyzer.foobar_search.type","english") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) // analyzer aliases are only allowed in 2.x indices .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_5)) .build(); AnalysisRegistry newRegistry = getNewRegistry(settings); IndexAnalyzers indexAnalyzers = getIndexAnalyzers(newRegistry, settings); assertThat(indexAnalyzers.get("default").analyzer(), is(instanceOf(KeywordAnalyzer.class))); assertThat(indexAnalyzers.get("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class))); assertWarnings("setting [index.analysis.analyzer.foobar.alias] is only allowed on index [test] because it was created before " + "5.x; analyzer aliases can no longer be created on new indices.", "setting [index.analysis.analyzer.foobar_search.alias] is only allowed on index [test] because it was created before " + "5.x; analyzer aliases can no longer be created on new indices."); }
public void testTopLevel() throws Exception { Aggregation result; if (randomBoolean()) { result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC)); } else { Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1"); result = testCase(query, topHits("_name")); } SearchHits searchHits = ((TopHits) result).getHits(); assertEquals(3L, searchHits.getTotalHits()); assertEquals("3", searchHits.getAt(0).getId()); assertEquals("type", searchHits.getAt(0).getType()); assertEquals("2", searchHits.getAt(1).getId()); assertEquals("type", searchHits.getAt(1).getType()); assertEquals("1", searchHits.getAt(2).getId()); assertEquals("type", searchHits.getAt(2).getType()); }
public FbEntitySearcher(String indexDir, int numOfDocs, String searchingStrategy) throws IOException { LogInfo.begin_track("Constructing Searcher"); if (!searchingStrategy.equals("exact") && !searchingStrategy.equals("inexact")) throw new RuntimeException("Bad searching strategy: " + searchingStrategy); this.searchStrategy = searchingStrategy; queryParser = new QueryParser( Version.LUCENE_44, FbIndexField.TEXT.fieldName(), searchingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44)); LogInfo.log("Opening index dir: " + indexDir); IndexReader indexReader = DirectoryReader.open(SimpleFSDirectory.open(new File(indexDir))); indexSearcher = new IndexSearcher(indexReader); LogInfo.log("Opened index with " + indexReader.numDocs() + " documents."); this.numOfDocs = numOfDocs; LogInfo.end_track(); }
/** * Parses the query. Using this instead of a QueryParser in order * to avoid thread-safety issues with Lucene's query parser. * * @param fieldName the name of the field * @param value the value of the field * @return the parsed query */ private Query parseTokens(String fieldName, String value) { BooleanQuery searchQuery = new BooleanQuery(); if (value != null) { Analyzer analyzer = new KeywordAnalyzer(); try { TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(value)); tokenStream.reset(); CharTermAttribute attr = tokenStream.getAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { String term = attr.toString(); Query termQuery = new TermQuery(new Term(fieldName, term)); searchQuery.add(termQuery, Occur.SHOULD); } } catch (IOException e) { throw new DukeException("Error parsing input string '" + value + "' " + "in field " + fieldName); } } return searchQuery; }
@Override protected void loadAnalyzerFactory(Map<String, AnalyzerInfo> analyzerFactoryMap) { //extract entire word registerAnalyzer(analyzerFactoryMap, "keyword", "Keyword Analyzer", new DefaultAnalyzerFactory(KeywordAnalyzer.class)); //lucene StandardAnalyzer registerAnalyzer(analyzerFactoryMap, "standard", "Standard Analyzer", new DefaultAnalyzerFactory(StandardAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "ngram", "NGram Analyzer", new DefaultAnalyzerFactory(NGramWordAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "primary", "Primary Word Analyzer", new DefaultAnalyzerFactory(PrimaryWordAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "whitespace", "Whitespace Analyzer", new DefaultAnalyzerFactory(WhitespaceAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "csv", "Comma separated value Analyzer", new DefaultAnalyzerFactory(CSVAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "autocomplete", "Autocomplete Analyzer", new DefaultAnalyzerFactory(AutocompleteAnalyzer.class)); }
@Override public String[] suggestEndpointOptions(Set<String> names, String unknownOption) { // each option must be on a separate line in a String StringBuilder sb = new StringBuilder(); for (String name : names) { sb.append(name); sb.append("\n"); } StringReader reader = new StringReader(sb.toString()); try { PlainTextDictionary words = new PlainTextDictionary(reader); // use in-memory lucene spell checker to make the suggestions RAMDirectory dir = new RAMDirectory(); SpellChecker checker = new SpellChecker(dir); checker.indexDictionary(words, new IndexWriterConfig(new KeywordAnalyzer()), false); return checker.suggestSimilar(unknownOption, maxSuggestions); } catch (Exception e) { // ignore } return null; }
public CodeSearcher(String indexDir, String field) { logger.info("index directory: "+ indexDir); this.field = field; this.indexDir = indexDir; try { this.reader = DirectoryReader.open(FSDirectory.open(new File( this.indexDir))); } catch (IOException e) { logger.error("cant get the reader to index dir, exiting, " + indexDir); e.printStackTrace(); System.exit(1); } this.searcher = new IndexSearcher(this.reader); this.analyzer = new KeywordAnalyzer();// //new WhitespaceAnalyzer(Version.LUCENE_46); // TODO: pass // the // analyzer // as // argument // to // constructor new CloneHelper(); // i don't remember why we are making this object? this.queryParser = new QueryParser(Version.LUCENE_46, this.field, analyzer); }
public void prepareIndex() throws IOException { File globalWFMDIr = new File(Util.GTPM_INDEX_DIR); if (!globalWFMDIr.exists()) { Util.createDirs(Util.GTPM_INDEX_DIR); } KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer(); IndexWriterConfig wfmIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, keywordAnalyzer); wfmIndexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); wfmIndexWriterConfig.setRAMBufferSizeMB(1024); logger.info("PREPARE INDEX"); try { wfmIndexWriter = new IndexWriter(FSDirectory.open(new File(Util.GTPM_INDEX_DIR)), wfmIndexWriterConfig); wfmIndexWriter.commit(); wfmIndexer = new DocumentMaker(wfmIndexWriter); } catch (IOException e) { e.printStackTrace(); } }
public VectorCache(int dimension, int cacheSize) throws IOException { this.dimension = dimension; this.cacheSize = cacheSize; this.vectorCache = CacheBuilder.newBuilder() .maximumSize(this.cacheSize) .build( new CacheLoader<String, Vector>() { @Override public Vector load(String key) throws IOException { return getVectorFromIndex(key); } }); IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, new KeywordAnalyzer()); writer = new IndexWriter(FSDirectory.open(new File("./VC_" + ID)), iwc); dirReader = DirectoryReader.open(writer, true); searcher = new IndexSearcher(dirReader); }
public void openIndexForSearching(boolean useDerivedIndex) { try { if (useDerivedIndex) reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + DERIVED_INDEX_FOLDER))); else reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + MAIN_INDEX_FOLDER))); searcher = new IndexSearcher(reader); searcher.setSimilarity(new DefaultSimilarity()); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); QueryParser typeQueryParser = new QueryParser(Version.LUCENE_4_9, "TYPE", new KeywordAnalyzer()); conceptQuery = typeQueryParser.parse(CONCEPT_TYPE_STRING); conceptIdQueryParser = new QueryParser(Version.LUCENE_4_9, "CONCEPT_ID", new KeywordAnalyzer()); conceptClassQueryParser = new QueryParser(Version.LUCENE_4_9, "CONCEPT_CLASS_ID", new KeywordAnalyzer()); vocabularyQueryParser = new QueryParser(Version.LUCENE_4_9, "VOCABULARY_ID", new KeywordAnalyzer()); keywordsQueryParser = new QueryParser(Version.LUCENE_4_9, "TERM", analyzer); domainQueryParser = new QueryParser(Version.LUCENE_4_9, "DOMAIN_ID", new KeywordAnalyzer()); standardConceptQueryParser = new QueryParser(Version.LUCENE_4_9, "STANDARD_CONCEPT", new KeywordAnalyzer()); termTypeQueryParser = new QueryParser(Version.LUCENE_4_9, "TERM_TYPE", new KeywordAnalyzer()); numDocs = reader.numDocs(); } catch (Exception e) { throw new RuntimeException(e); } }
@Inject public VocabularyNeo4jImpl(GraphDatabaseService graph, @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil, NodeTransformer transformer) throws IOException { this.graph = graph; this.curieUtil = curieUtil; this.transformer = transformer; if (null != neo4jLocation) { Directory indexDirectory = FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index")) .toPath()); Directory spellDirectory = FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker")) .toPath()); spellChecker = new SpellChecker(spellDirectory); try (IndexReader reader = DirectoryReader.open(indexDirectory)) { IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer()); spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX), config, true); } } else { spellChecker = null; } }
@Before public void setup() throws IOException { TableContext.clear(); _base = new File(TMPDIR, "MutatableActionTest"); rmr(_base); File file = new File(_base, TABLE); file.mkdirs(); TableContext.clear(); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setName("test"); tableDescriptor.setTableUri(file.toURI().toString()); TableContext tableContext = TableContext.create(tableDescriptor); ShardContext shardContext = ShardContext.create(tableContext, "test"); _action = new MutatableAction(shardContext); _conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); }
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount) throws IOException { HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf); Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>(); int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount); assertEquals(i, partition); Document doc = getDoc(i); indexWriter.addDocument(doc); indexWriter.close(); }
private IndexReader getReader() throws CorruptIndexException, LockObtainFailedException, IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO)); doc.add(new StringField("a", "b", Store.YES)); doc.add(new StringField("family", "f1", Store.YES)); Document doc1 = new Document(); doc.add(new StringField("a", "b", Store.YES)); writer.addDocument(doc); writer.addDocument(doc1); writer.close(); return DirectoryReader.open(directory); }
private IndexReader getReaderWithDocsHavingFamily() throws CorruptIndexException, LockObtainFailedException, IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO)); doc.add(new StringField("a", "b", Store.YES)); doc.add(new StringField("family", "f2", Store.YES)); Document doc1 = new Document(); doc1.add(new StringField("a", "b", Store.YES)); doc1.add(new StringField("family", "f1", Store.YES)); writer.addDocument(doc); writer.addDocument(doc1); writer.close(); return DirectoryReader.open(directory); }
@Test public void testMultipleWritersOpenOnSameDirectory() throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration, new Path(_path, "test_multiple")); IndexWriter writer1 = new IndexWriter(directory, config.clone()); addDoc(writer1, getDoc(1)); IndexWriter writer2 = new IndexWriter(directory, config.clone()); addDoc(writer2, getDoc(2)); writer1.close(); writer2.close(); DirectoryReader reader = DirectoryReader.open(directory); int maxDoc = reader.maxDoc(); assertEquals(1, maxDoc); Document document = reader.document(0); assertEquals("2", document.get("id")); reader.close(); }
@Test public void testSymlinkWithIndexes() throws IOException { HdfsDirectory dir1 = new HdfsDirectory(_configuration, new Path(_base, "dir1")); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); IndexWriter writer1 = new IndexWriter(dir1, conf.clone()); writer1.addDocument(getDoc()); writer1.close(); HdfsDirectory dir2 = new HdfsDirectory(_configuration, new Path(_base, "dir2")); IndexWriter writer2 = new IndexWriter(dir2, conf.clone()); writer2.addIndexes(dir1); writer2.close(); DirectoryReader reader1 = DirectoryReader.open(dir1); DirectoryReader reader2 = DirectoryReader.open(dir2); assertEquals(1, reader1.maxDoc()); assertEquals(1, reader2.maxDoc()); assertEquals(1, reader1.numDocs()); assertEquals(1, reader2.numDocs()); Document document1 = reader1.document(0); Document document2 = reader2.document(0); assertEquals(document1.get("id"), document2.get("id")); }
protected FieldManager newFieldManager(boolean create) throws IOException { return new BaseFieldManager(_fieldLessField, new KeywordAnalyzer(), new Configuration()) { @Override protected boolean tryToStore(FieldTypeDefinition fieldTypeDefinition, String fieldName) { return true; } @Override protected void tryToLoad(String field) { } @Override protected List<String> getFieldNamesToLoad() throws IOException { return new ArrayList<String>(); } }; }
@Override public void setDAG(DirectedAcyclicGraph directedAcyclicGraph) { super.setDAG(directedAcyclicGraph); // Connect to the Lucene DB try { Analyzer analyser = new KeywordAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyser); config.setOpenMode(OpenMode.CREATE_OR_APPEND); Path path = DAGModule.moduleFile(directedAcyclicGraph.rootDir_, INDEX_FOLDER).toPath(); Directory directory = FSDirectory.open(path); // Directory directory = new RAMDirectory(); writer_ = new IndexWriter(directory, config); // Searching parser_ = new QueryParser(LOWERCASE_FIELD, analyser); manager_ = new SearcherManager(writer_, true, new SearcherFactory()); } catch (Exception e) { e.printStackTrace(); } }
@Test public void testCascadeCombo() throws IOException { ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), new KeywordAnalyzer() ), new StandardAnalyzer(TEST_VERSION_CURRENT), new KeywordAnalyzer() ); for (int i = 0 ; i < 3 ; i++) assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)), new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)}, new int[]{ 0, 0, 0, 0, 5, 7, 7, 14, 14, 19, 19}, new int[]{ 4, 4, 20, 20, 6, 13, 13, 18, 18, 20, 20}, new int[]{ 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0}); }
@Test public void testCascadeComboTwiceSameInstanceSolvedByCaching() throws IOException { Analyzer analyzer = new KeywordAnalyzer(); ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzer ).enableTokenStreamCaching(), new StandardAnalyzer(TEST_VERSION_CURRENT), analyzer ).enableTokenStreamCaching(); for (int i = 0 ; i < 3 ; i++) assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)), new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)}, new int[]{ 0, 0, 0, 0, 5, 7, 7, 14, 14, 19, 19}, new int[]{ 4, 4, 20, 20, 6, 13, 13, 18, 18, 20, 20}, new int[]{ 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0}); }
@Before public void setupMapperParser() { Index index = new Index("test"); Map<String, AnalyzerProviderFactory> analyzerFactoryFactories = Maps.newHashMap(); analyzerFactoryFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer())); AnalysisService analysisService = new AnalysisService(index, ImmutableSettings.Builder.EMPTY_SETTINGS, null, analyzerFactoryFactories, null, null, null); mapperParser = new DocumentMapperParser(index, analysisService, new PostingsFormatService(index), new SimilarityLookupService(index, ImmutableSettings.Builder.EMPTY_SETTINGS)); Settings settings = settingsBuilder() .put("opennlp.models.name.file", "src/test/resources/models/en-ner-person.bin") .put("opennlp.models.date.file", "src/test/resources/models/en-ner-date.bin") .put("opennlp.models.location.file", "src/test/resources/models/en-ner-location.bin") .build(); LogConfigurator.configure(settings); OpenNlpService openNlpService = new OpenNlpService(settings); openNlpService.start(); mapperParser.putTypeParser(OpenNlpMapper.CONTENT_TYPE, new OpenNlpMapper.TypeParser(analysisService, openNlpService)); }
public static void main(String[] args) throws IOException { IndexWriterConfig conf = new IndexWriterConfig(new KeywordAnalyzer()); try (IndexWriter iw1 = new IndexWriter(FSDirectory.open(new File("/tmp/1grams").toPath()), conf)) { addDoc(iw1, "the", 55); addDoc(iw1, "nice", 10); addDoc(iw1, "building", 1); Document document = new Document(); document.add(new TextField("totalTokenCount", String.valueOf(3), Field.Store.YES)); iw1.addDocument(document); } IndexWriterConfig conf2 = new IndexWriterConfig(new KeywordAnalyzer()); try (IndexWriter iw2 = new IndexWriter(FSDirectory.open(new File("/tmp/2grams").toPath()), conf2)) { addDoc(iw2, "the nice", 3); addDoc(iw2, "nice building", 2); } IndexWriterConfig conf3 = new IndexWriterConfig(new KeywordAnalyzer()); try (IndexWriter iw3 = new IndexWriter(FSDirectory.open(new File("/tmp/3grams").toPath()), conf3)) { addDoc(iw3, "the nice building", 1); } }
public void testAnalyzerAliasDefault() throws IOException { Settings settings = Settings.builder() .put("index.analysis.analyzer.foobar.alias","default") .put("index.analysis.analyzer.foobar.type", "keyword") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) // analyzer aliases are only allowed in 2.x indices .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_5)) .build(); AnalysisRegistry newRegistry = getNewRegistry(settings); IndexAnalyzers indexAnalyzers = getIndexAnalyzers(newRegistry, settings); assertThat(indexAnalyzers.get("default").analyzer(), is(instanceOf(KeywordAnalyzer.class))); assertThat(indexAnalyzers.get("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class))); assertWarnings("setting [index.analysis.analyzer.foobar.alias] is only allowed on index [test] because it was created before " + "5.x; analyzer aliases can no longer be created on new indices."); }
public FbEntityIndexer(String namefile, String outputDir, String indexingStrategy) throws IOException { if (!indexingStrategy.equals("exact") && !indexingStrategy.equals("inexact")) throw new RuntimeException("Bad indexing strategy: " + indexingStrategy); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44 , indexingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44)); config.setOpenMode(OpenMode.CREATE); config.setRAMBufferSizeMB(256.0); indexer = new IndexWriter(new SimpleFSDirectory(new File(outputDir)), config); this.nameFile = namefile; }
/** * Gets username. * * @return the username */ @Field(index = org.hibernate.search.annotations.Index.YES, analyze = Analyze.YES, store = Store.NO) @Analyzer(impl = KeywordAnalyzer.class) public String getUsername() { return username; }
private QueryExpression create(Request request, ResourceDefinition resourceDefinition) throws InvalidQueryException { String queryString; if (request.getCardinality() == Request.Cardinality.INSTANCE) { String idPropertyName = resourceDefinition.getIdPropertyName(); queryString = String.format("%s:%s", idPropertyName, request.<String>getProperty(idPropertyName)); } else { queryString = request.getQueryString(); } QueryExpression queryExpression; if (queryString != null && !queryString.isEmpty()) { QueryParser queryParser = new QueryParser(Version.LUCENE_48, "name", new KeywordAnalyzer()); queryParser.setLowercaseExpandedTerms(false); queryParser.setAllowLeadingWildcard(true); Query query; try { query = queryParser.parse((String) escape(queryString)); } catch (ParseException e) { throw new InvalidQueryException(e.getMessage()); } LOG.info("LuceneQuery: {}", query); queryExpression = create(query, resourceDefinition); } else { queryExpression = new AlwaysQueryExpression(); } // add query properties to request so that they are returned request.addAdditionalSelectProperties(queryExpression.getProperties()); return queryExpression; }
/** * constructor * @param directory Lucene Directory * @param file Index File Path * @param isVolatile isVolatile, if true, delete file automatically * @throws IOException IOException */ public LuceneKVSBase(Directory directory, File file, boolean isVolatile) throws IOException { this.directory = directory; this.file = file; Analyzer analyzer = new KeywordAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); this.writer = new IndexWriter(directory, config); // LuceneObjectKVS avoid "no segments* file found in RAMDirectory" Exception this.writer.commit(); this.manager = new SearcherManager(directory, new KVSSearcherFactory()); this.isAutoCommit = true; this.isAsyncReflesh = true; this.numDocs = new AtomicInteger(writer.numDocs()); this.isVolatile = isVolatile; @SuppressWarnings("rawtypes") final LuceneKVSBase own = this; Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { close(own); } catch (IOException e) { e.printStackTrace(); } } }); }
private static Query _matchKeywordQuery(final IndexDocumentFieldID fieldId, final String keyword) { if (Strings.isNullOrEmpty(keyword)) return null; QueryParser qp = new QueryParser(LuceneConstants.VERSION, fieldId.asString(), new KeywordAnalyzer()); Query outQry = null; try { outQry = qp.parse(keyword); } catch(ParseException parseEx) { log.error("Error parsing the keyword search filter: {}",parseEx.getMessage(), parseEx); } return outQry; }
@Override public String init(NamedList config, SolrCore coreParam) { String myname = (String) config.get(DICTIONARY_NAME); this.core = coreParam; // Workaround for SOLR-6246 (lock exception on core reload): close // any suggester registered with the same name. if (registry.containsKey(myname)) { MultiSuggester suggesterToClose = null; for (Object o : registry.get(myname)) { MultiSuggester suggester = (MultiSuggester) o; if (suggester.core.getName().equals(coreParam.getName())) { suggesterToClose = suggester; break; } } if (suggesterToClose != null) { registry.remove(myname, suggesterToClose); try { suggesterToClose.close(); } catch (IOException e) { LOG.error("An exception occurred while closing the spellchecker", e); } } } super.init(config, coreParam); // effectively disable analysis *by the SpellChecker/Suggester component* // because this leads // to independent suggestions for each token; we want AIS to perform // analysis and consider the tokens together analyzer = new KeywordAnalyzer(); initWeights((NamedList) config.get("fields"), coreParam); Integer maxLengthConfig = (Integer) config.get("maxSuggestionLength"); maxSuggestionLength = maxLengthConfig != null ? maxLengthConfig : DEFAULT_MAX_SUGGESTION_LENGTH; registry.put(myname, this); core.addCloseHook(new CloseHandler()); return myname; }
@Before public void setUp() throws Exception { keywordAnalyzer = new KeywordAnalyzer(); searchFields = new HashMap<>(); searchFields.put("f1", 1.0f); searchFields.put("f11", 1.0f); searchFields.put("f12", 1.0f); searchFields.put("f13", 1.0f); searchFields.put("f14", 1.0f); searchFields.put("f15", 1.0f); searchFields.put("f2", 2.0f); searchFields.put("f21", 2.0f); searchFields.put("f22", 2.0f); searchFields.put("f23", 2.0f); searchFields.put("f24", 2.0f); searchFields.put("f25", 2.0f); searchFields.put("f3", 3.0f); searchFields.put("f31", 3.0f); searchFields.put("f32", 3.0f); searchFields.put("f33", 3.0f); searchFields.put("f34", 3.0f); searchFields.put("f35", 3.0f); stopWords = new HashSet<>(Arrays.asList("stopA", "stopB", "stopC")); }
@Test public void testThatTheTrueDFIsReturned() throws Exception { ConstantFieldBoost fieldBoost = new ConstantFieldBoost(1f); Analyzer analyzer = new KeywordAnalyzer(); Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, analyzer); addNumDocs("f1", "v1", indexWriter, 1); addNumDocs("f1", "v5", indexWriter, 5); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = newSearcher(indexReader); StandardDocumentFrequencyAndTermContextProvider provider = new StandardDocumentFrequencyAndTermContextProvider(); ; int idx1 = new DependentTermQuery(newTerm("f1", "v5", provider), provider, fieldBoost).tqIndex; int idx2 = new DependentTermQuery(newTerm("f1", "v1", provider), provider, fieldBoost).tqIndex; DocumentFrequencyAndTermContextProvider.DocumentFrequencyAndTermContext context1 = provider.getDocumentFrequencyAndTermContext(idx1, indexSearcher); DocumentFrequencyAndTermContextProvider.DocumentFrequencyAndTermContext context2 = provider.getDocumentFrequencyAndTermContext(idx2, indexSearcher); assertEquals(5, context1.df); assertEquals(1, context2.df); assertEquals(5, context1.termContext.docFreq()); assertEquals(1, context2.termContext.docFreq()); indexReader.close(); directory.close(); analyzer.close(); }
public VocabularyIndexAnalyzer() throws IOException, URISyntaxException { super(NO_REUSE_STRATEGY); Map<String, Analyzer> fieldAnalyzers = new HashMap<>(); fieldAnalyzers.put(NodeProperties.LABEL, new TermAnalyzer()); fieldAnalyzers.put(NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION, new TermAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers); }
public VocabularyQueryAnalyzer() { Map<String, Analyzer> fieldAnalyzers = new HashMap<>(); fieldAnalyzers.put(NodeProperties.LABEL, new TermAnalyzer()); fieldAnalyzers.put(NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.SYNONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION, new TermAnalyzer()); fieldAnalyzers.put(Concept.ABREVIATION + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM, new TermAnalyzer()); fieldAnalyzers.put(Concept.ACRONYM + LuceneUtils.EXACT_SUFFIX, new ExactAnalyzer()); analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers); }
@SuppressWarnings("resource") private Analyzer createAnalyzer() { Map<String, Analyzer> analyzerPerField = new HashMap<>(); Analyzer defaultAnalyzer = new KeywordAnalyzer(); analyzerPerField.put(FIELD_NAME, new MoveTextAnalyzer()); return new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField); }
private IndexReader getIndexReader() throws IOException { IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, conf); writer.close(); return DirectoryReader.open(dir); }
private Directory addDir(String v) throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(directory, config); writer.addDocument(getDoc(v)); writer.close(); return directory; }
@Test public void test3() throws IOException, InterruptedException { // Thread.sleep(30000); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(_cacheDirectory, conf); int docs = 100000; for (int i = 0; i < docs; i++) { if (i % 500 == 0) { System.out.println(i); } writer.addDocument(newDoc()); // Thread.sleep(1); } writer.close(); System.out.println("done writing"); DirectoryReader reader = DirectoryReader.open(_cacheDirectory); System.out.println("done opening"); assertEquals(docs, reader.numDocs()); Document document = reader.document(0); System.out.println("done fetching"); System.out.println(document); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("test", "test")), 10); System.out.println("done searching"); assertEquals(docs, topDocs.totalHits); reader.close(); }
private static Directory getDirectoryUpdateRow(String currentRowId) { try { RAMDirectory directoryUpdateRow = new RAMDirectory(); IndexWriter writer = new IndexWriter(directoryUpdateRow, new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer())); Document document = new Document(); document.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO)); document.add(new StringField(BlurConstants.UPDATE_ROW, currentRowId, Store.NO)); writer.addDocument(document); writer.close(); return directoryUpdateRow; } catch (Exception e) { throw new RuntimeException(e); } }