public void testAnalyzer() throws Exception { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41); String queryString = "category:/philosophy/eastern"; Query query = new QueryParser(Version.LUCENE_41, "contents", analyzer).parse(queryString); assertEquals("path got split, yikes!", "category:\"philosophy eastern\"", query.toString("contents")); PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(analyzer); perFieldAnalyzer.addAnalyzer("category", new WhitespaceAnalyzer(Version.LUCENE_41)); query = new QueryParser(Version.LUCENE_41, "contents", perFieldAnalyzer).parse(queryString); assertEquals("leave category field alone", "category:/philosophy/eastern", query.toString("contents")); }
public static Analyzer createAnalyzer() { final PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer()); analyzer.addAnalyzer(DocumentUtil.FIELD_IDENTS, new WhitespaceAnalyzer()); analyzer.addAnalyzer(DocumentUtil.FIELD_FEATURE_IDENTS, new WhitespaceAnalyzer()); analyzer.addAnalyzer(DocumentUtil.FIELD_CASE_INSENSITIVE_FEATURE_IDENTS, new DocumentUtil.LCWhitespaceAnalyzer()); return analyzer; }
private Analyzer getServerAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("checkin", new KeywordAnalyzer()); analyzer.addAnalyzer("registered", new KeywordAnalyzer()); analyzer.addAnalyzer("ram", new KeywordAnalyzer()); analyzer.addAnalyzer("swap", new KeywordAnalyzer()); analyzer.addAnalyzer("cpuMHz", new KeywordAnalyzer()); analyzer.addAnalyzer("cpuNumberOfCpus", new KeywordAnalyzer()); return analyzer; }
private Analyzer getErrataAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("advisoryName", new KeywordAnalyzer()); analyzer.addAnalyzer("synopsis", new StandardAnalyzer()); analyzer.addAnalyzer("description", new StandardAnalyzer()); analyzer.addAnalyzer("topic", new StandardAnalyzer()); analyzer.addAnalyzer("solution", new StandardAnalyzer()); return analyzer; }
private Analyzer getSnapshotTagAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("id", new KeywordAnalyzer()); analyzer.addAnalyzer("snapshotId", new KeywordAnalyzer()); analyzer.addAnalyzer("orgId", new KeywordAnalyzer()); analyzer.addAnalyzer("serverId", new KeywordAnalyzer()); analyzer.addAnalyzer("tagNameId", new KeywordAnalyzer()); analyzer.addAnalyzer("created", new KeywordAnalyzer()); analyzer.addAnalyzer("modified", new KeywordAnalyzer()); return analyzer; }
private Analyzer getHardwareDeviceAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("id", new KeywordAnalyzer()); analyzer.addAnalyzer("serverId", new KeywordAnalyzer()); analyzer.addAnalyzer("pciType", new KeywordAnalyzer()); return analyzer; }
private Analyzer getServerCustomInfoAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("id", new KeywordAnalyzer()); analyzer.addAnalyzer("serverId", new KeywordAnalyzer()); analyzer.addAnalyzer("created", new KeywordAnalyzer()); analyzer.addAnalyzer("modified", new KeywordAnalyzer()); analyzer.addAnalyzer("createdBy", new KeywordAnalyzer()); analyzer.addAnalyzer("lastModifiedBy", new KeywordAnalyzer()); return analyzer; }
private Analyzer getDefaultAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("id", new KeywordAnalyzer()); analyzer.addAnalyzer("arch", new KeywordAnalyzer()); analyzer.addAnalyzer("epoch", new KeywordAnalyzer()); analyzer.addAnalyzer("version", new KeywordAnalyzer()); analyzer.addAnalyzer("release", new KeywordAnalyzer()); analyzer.addAnalyzer("filename", new KeywordAnalyzer()); return analyzer; }
public void createPerFieldAnalyzer(){ Map<String, Analyzer> fieldAnalyzerMap = new HashMap<String, Analyzer>(); for(Language language : languages) { String topicContentFieldName = MultiLingualAbstractOTDFLucDocCreator.Fields.getLanguageTopicContentField(language); Analyzer analyzer = getAnalyzer(language); if(analyzer!=null) fieldAnalyzerMap.put(topicContentFieldName, analyzer); } analyzers = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_36), fieldAnalyzerMap); }
public void createPerFieldAnalyzer(Set<Language> languagesDone){ Map<String, Analyzer> fieldAnalyzerMap = new HashMap<String, Analyzer>(); for(Language language : languagesDone) { String topicContentFieldName = MultiLingualArticleOTDFLucDocCreator.Fields.getLanguageTopicContentField(language); Analyzer analyzer = getAnalyzer(language); if(analyzer!=null) fieldAnalyzerMap.put(topicContentFieldName, analyzer); } analyzers = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_36), fieldAnalyzerMap); }
public void testPerFieldAnalyzer() throws Exception { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new SimpleAnalyzer()); analyzer.addAnalyzer("partnum", new KeywordAnalyzer()); Query query = new QueryParser(Version.LUCENE_41, "description", analyzer).parse( "partnum:Q36 AND SPACE"); assertEquals("Q36 kept as-is", "+partnum:Q36 +space", query.toString("description")); assertEquals("doc found!", 1, TestUtil.hitCount(searcher, query)); }
private void convertCanonicOutput(CanonicOutput canonicOutput,Document document,LuceneOptions luceneOptions) { SimilarityForms sf = SimilarityFormConverterWrapper.getConverter().process(canonicOutput); document.add(newField("co.configuration.id", canonicOutput.getApplicationRun().getConfiguration().getId().toString(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36) ) ); document.add(newField("co.revision.id", canonicOutput.getApplicationRun().getRevision().getId().toString(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36) ) ); document.add(newField("co.applicationrun.id", canonicOutput.getApplicationRun().getId().toString(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36) ) ); if(canonicOutput.getAnnotations() != null && !canonicOutput.getAnnotations().isEmpty()) { for(Annotation a : canonicOutput.getAnnotations()) { document.add(newField("co.annotation", a.getAnnotationContent(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36))); } } // mathml is converted into Single String representation // which is stored in co.distanceForm document.add(newField("co.distanceForm",sf.getDistanceForm(),luceneOptions,null)); PerFieldAnalyzerWrapper keywordAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer()); for(String s : sf.getCountForm().keySet()) { document.add(newField("co.element", s+"="+sf.getCountForm().get(s), luceneOptions, keywordAnalyzer)); } logger.info("Canonic output ["+canonicOutput.getId()+"] indexed."); }
/** * Método para construir el índice con la colección por defecto * * @param operación a realizar: MAKE o ADD */ private long build(int operation) throws IndexException { long indexedFiles = 0; String message = "Lucene index will be created at [" + this.indexPath + "]"; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); //inicia la indexacion try { if (safeToBuildIndex(this.indexPath, operation)) { setStartTimeOfIndexation(new Date()); // this.analyzer = new NGramAnalyzer(); //TODO I changed theses lines 2012-11-12 // this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzer())); // this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS), new StopStemAnalyzer()); if (this.appendIndex) { //Adding: new docs this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); if (applyLSI) { this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath), new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), false, IndexWriter.MaxFieldLength.UNLIMITED); } // ("number "+writer.getReader().maxDoc()); } else { //create or overwrite index this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); if (applyLSI) { this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath), new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), true, IndexWriter.MaxFieldLength.UNLIMITED); } } indexedFiles = indexDocs(this.writer, this.writerLSI, this.collectionPath, operation); message = "Optimizing..."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); this.writer.optimize(); this.writer.close(); if (applyLSI) { this.writerLSI.optimize(); this.writerLSI.close(); } setEndTimeOfIndexation(new Date()); message = "Indexation Time " + this.getIndexationTime() + " milliseconds."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); } } catch (IOException e) { message = " caught a " + e.getClass() + "\n with message: " + e.getMessage() + "."; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } initLSIManager(); // inicializar la matriz de LSI return indexedFiles; }
/** * Método para construir el índice a partir de una colección de files * * @param operation ----- operación a realizar: MAKE o ADD * @param collectionPath ----- lista de ficheros que representan la * colección */ private long build(List<File> collectionPath, int operation) throws IndexException { long indexedFiles = 0; String message = "Indexing to directory '" + this.indexPath + "'..."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); //inicia la indexacion try { setStartTimeOfIndexation(new Date()); if (safeToBuildIndex(this.indexPath, operation)) { // this.analyzer = new NGramAnalyzer(); this.setFieldAnalyzer(new PerFieldAnalyzerWrapper(new NGramAnalyzer())); this.getFieldAnalyzer().addAnalyzer(getDocumentField(FIELD_CODE_ALL_COMMENTS), new StopStemAnalyzer()); if (this.appendIndex) { //añadir docs a un indice existente this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); if (applyLSI) { this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath), new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), false, IndexWriter.MaxFieldLength.UNLIMITED); } } else { //crear o sobreescribir this.writer = new IndexWriter(FSDirectory.open(this.indexPath), this.getFieldAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); if (applyLSI) { this.writerLSI = new IndexWriter(FSDirectory.open(this.indexLSIPath), new PerFieldAnalyzerWrapper(new StopStemAnalyzer()), true, IndexWriter.MaxFieldLength.UNLIMITED); } } indexedFiles = indexDocs(this.writer, this.writerLSI, collectionPath, operation); message = "Optimizing..."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); this.writer.optimize(); this.writer.close(); if (applyLSI) { this.writerLSI.optimize(); this.writerLSI.close(); } setEndTimeOfIndexation(new Date()); message = "Indexation Time " + this.getIndexationTime() + " milliseconds."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); } } catch (IOException e) { message = " caught a " + e.getClass() + "\n with message: " + e.getMessage() + "."; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } initLSIManager(); return indexedFiles; }
/** * @return the fieldAnalyzer */ public PerFieldAnalyzerWrapper getFieldAnalyzer() { return fieldAnalyzer; }
/** * @param fieldAnalyzer the fieldAnalyzer to set */ public void setFieldAnalyzer(PerFieldAnalyzerWrapper fieldAnalyzer) { this.fieldAnalyzer = fieldAnalyzer; }
/** * @return the fieldAnalyzerCS */ public PerFieldAnalyzerWrapper getFieldAnalyzerCS() { return fieldAnalyzerCS; }
/** * @param fieldAnalyzerCS the fieldAnalyzerCS to set */ public void setFieldAnalyzerCS(PerFieldAnalyzerWrapper fieldAnalyzerCS) { this.fieldAnalyzerCS = fieldAnalyzerCS; }