Java 类org.apache.lucene.analysis.core.SimpleAnalyzer 实例源码

项目:Twitter-Analyzer    文件:IndexManager.java   
/**
 * Construct an empty IndexManager
 */
public IndexManager(){
    SimpleAnalyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47);  
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);

    try {
        System.out.println("Building the Index...");
        this.indexWriter = new IndexWriter(FSDirectory.open(new File(PATH)), indexWriterConfig);

        //first ask the database to give me all of the tweets.
        OracleDAL db = new OracleDAL();
        ArrayList<Tweet> list = (ArrayList<Tweet>) db.getAllTweets();



        //now build the index

        int indexedDocumentCount = this.indexDocsFromList(indexWriter, list); 

        indexWriter.close();  

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
项目:hmftools    文件:TreatmentCurator.java   
@NotNull
private static SpellChecker createIndexSpellchecker(@NotNull final Directory index) throws IOException {
    final Directory spellCheckerDirectory = new RAMDirectory();
    final IndexReader indexReader = DirectoryReader.open(index);
    final Analyzer analyzer = new SimpleAnalyzer();
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    final Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f);
    final SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory);

    spellChecker.indexDictionary(dictionary, config, false);
    spellChecker.setAccuracy(SPELLCHECK_ACCURACY);
    return spellChecker;
}
项目:crawl-eval    文件:ExtractThis.java   
public static void main(String[] args) throws Exception {
    Analyzer analyzer = new ShingleAnalyzerWrapper(new SimpleAnalyzer(), 9);
    String content = KeepEverythingExtractor.INSTANCE.getText(new InputStreamReader(System.in));
    TokenStream ts = analyzer.tokenStream("extracted_text", content);
    CharTermAttribute cattr = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
        System.out.println(cattr.toString());
    }
    ts.close();

}
项目:Camel    文件:LuceneIndexAndQueryProducerTest.java   
@Override
protected JndiRegistry createRegistry() throws Exception {
    JndiRegistry registry = new JndiRegistry(createJndiContext());
    registry.bind("std", new File("target/stdindexDir"));
    registry.bind("load_dir", new File("src/test/resources/sources"));
    registry.bind("stdAnalyzer", new StandardAnalyzer());
    registry.bind("simple", new File("target/simpleindexDir"));
    registry.bind("simpleAnalyzer", new SimpleAnalyzer());
    registry.bind("whitespace", new File("target/whitespaceindexDir"));
    registry.bind("whitespaceAnalyzer", new WhitespaceAnalyzer());
    return registry;
}
项目:hope-tactical-equipment    文件:LuceneService.java   
public LuceneService(String directoryPath) {
    try {
        File indexFiles = new File(directoryPath);
        // 索引文件的保存位置
        dir = FSDirectory.open(indexFiles);
        // 分析器
        analyzer = new SimpleAnalyzer(DEFAULT_VERSION);
        // 配置类
    } catch (IOException e) {
        e.printStackTrace();
    }
}
项目:lire    文件:TestRerankTextSearch.java   
public void testIndexing() throws IOException, ParserConfigurationException, SAXException {
    IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_42, new SimpleAnalyzer(Version.LUCENE_42));
    IndexWriter iw = new IndexWriter(FSDirectory.open(testIndex), iwConf);
    // if you want to append the index to a pre-existing one use the next line.
    // iwConf.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    // create a LIRE DocumentBuilder for extracting FCTH (just an example, every other feature will do).
    DocumentBuilder builder = DocumentBuilderFactory.getFCTHDocumentBuilder();
    ArrayList<File> files = FileUtils.getAllImageFiles(new File("testdata/ferrari"), true);
    // for handling the XML of the test data set
    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setNamespaceAware(true);
    SAXParser saxParser = spf.newSAXParser();
    XMLReader xmlReader = saxParser.getXMLReader();
    for (Iterator<File> iterator = files.iterator(); iterator.hasNext(); ) {
        File img = iterator.next();
        String path = img.getCanonicalPath();
        // create the document with the LIRE DocumentBuilder, this adds the image features to the document.
        Document d = builder.createDocument(new FileInputStream(img), path);
        // handling the XML of the test data set
        path = path.substring(0,path.lastIndexOf('.')) + ".xml";
        TagHandler handler = new TagHandler();
        xmlReader.setContentHandler(handler);
        xmlReader.parse(new InputSource(new File(path).toURI().toString()));
        // add the text to the document ...
        d.add(new TextField("tags", handler.getTags(), Field.Store.YES));
        // don't forget to add the document to the index.
        iw.addDocument(d);
    }
    iw.close();
}
项目:lucenelab    文件:ContextSuggestDemo.java   
public ContextSuggestDemo() throws IOException {
    indexDir = new RAMDirectory();
    suggestDir = new RAMDirectory();
    analyzer = new SimpleAnalyzer();
    suggester = new AnalyzingInfixSuggester(suggestDir, analyzer, analyzer, 1, true);
    buildSearchIndex();
    buildSuggesterIndex();
}
项目:Android-Indexing-Service    文件:FileSearcher.java   
/**
 * Creates a query based on the given term field and type
 * @param term Search Term for the query
 * @param field Document Field for the Query which the term is matched against
 * @param type The type of query to be created, either QUERY_BOOLEAN, or QUERY_STANDARD,
 * @return a query for the given field and term using either a BooleanQuery with a
 * WildcardQuery for the term or a Query built from a QueryParser and SimpleAnalyzer
 */
private Query getQuery(String term, String field, int type) {
    Query qry = null;
    if(type == FileSearcher.QUERY_BOOLEAN) {
        qry = new BooleanQuery();
        String[] words = term.split(" ");
        ((BooleanQuery) qry).add(new WildcardQuery(new Term(field, "*" + words[0])),
                BooleanClause.Occur.MUST);
        if(words.length > 1) {
            for(int i = 1; i < words.length - 1; i++) {
                ((BooleanQuery) qry).add(new WildcardQuery(new Term(field, words[i])),
                        BooleanClause.Occur.MUST);
            }
            ((BooleanQuery) qry).add(new WildcardQuery(new Term(field,
                            words[words.length - 1] + "*")),
                    BooleanClause.Occur.MUST);
        }
    } else if(type == FileSearcher.QUERY_STANDARD) {
        try {
            qry = new QueryParser(Version.LUCENE_47, field,
                    new SimpleAnalyzer(Version.LUCENE_47)).parse(term);
        } catch(ParseException e) {
            e.printStackTrace();
        }
    }
    return qry;
}
项目:NYBC    文件:TestPerFieldAnalyzerWrapper.java   
public void testPerField() throws Exception {
  String text = "Qwerty";

  Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
  analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));

  PerFieldAnalyzerWrapper analyzer =
            new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);

  TokenStream tokenStream = analyzer.tokenStream("field",
      new StringReader(text));
  CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
  tokenStream.reset();

  assertTrue(tokenStream.incrementToken());
  assertEquals("WhitespaceAnalyzer does not lowercase",
               "Qwerty",
               termAtt.toString());

  tokenStream = analyzer.tokenStream("special",
      new StringReader(text));
  termAtt = tokenStream.getAttribute(CharTermAttribute.class);
  tokenStream.reset();

  assertTrue(tokenStream.incrementToken());
  assertEquals("SimpleAnalyzer lowercases",
               "qwerty",
               termAtt.toString());
}
项目:AGDISTIS    文件:TripleIndexCreator.java   
public void createIndex(List<File> files, String idxDirectory, String baseURI) {
    try {
        urlAnalyzer = new SimpleAnalyzer(LUCENE_VERSION);
        literalAnalyzer = new LiteralAnalyzer(LUCENE_VERSION);
        Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
        mapping.put(TripleIndex.FIELD_NAME_SUBJECT, urlAnalyzer);
        mapping.put(TripleIndex.FIELD_NAME_PREDICATE, urlAnalyzer);
        mapping.put(TripleIndex.FIELD_NAME_OBJECT_URI, urlAnalyzer);
        mapping.put(TripleIndex.FIELD_NAME_OBJECT_LITERAL, literalAnalyzer);
        PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(urlAnalyzer, mapping);

        File indexDirectory = new File(idxDirectory);
        indexDirectory.mkdir();
        directory = new MMapDirectory(indexDirectory);
        IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, perFieldAnalyzer);
        iwriter = new IndexWriter(directory, config);
        iwriter.commit();
        for (File file : files) {
            String type = FileUtil.getFileExtension(file.getName());
            if (type.equals(TTL))
                indexTTLFile(file, baseURI);
            if (type.equals(TSV))
                indexTSVFile(file);
            iwriter.commit();
        }
        iwriter.close();
        ireader = DirectoryReader.open(directory);
    } catch (Exception e) {
        log.error("Error while creating TripleIndex.", e);
    }
}
项目:AGDISTIS    文件:TripleIndexCreatorContext.java   
public void createIndex(List<File> files, String idxDirectory, String baseURI) {
    try {
        urlAnalyzer = new SimpleAnalyzer(LUCENE_VERSION);
        literalAnalyzer = new LiteralAnalyzer(LUCENE_VERSION);
        Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
        mapping.put(FIELD_NAME_URI, urlAnalyzer);
        mapping.put(FIELD_NAME_SURFACE_FORM, literalAnalyzer);
        mapping.put(FIELD_NAME_URI_COUNT, literalAnalyzer);
        mapping.put(FIELD_NAME_CONTEXT, literalAnalyzer);
        PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(urlAnalyzer, mapping);

        File indexDirectory = new File(idxDirectory);
        indexDirectory.mkdir();
        directory = new MMapDirectory(indexDirectory);
        IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, perFieldAnalyzer);
        iwriter = new IndexWriter(directory, config);
        iwriter.commit();
        for (File file : files) {
            String type = FileUtil.getFileExtension(file.getName());
            if (type.equals(TTL))
                indexTTLFile(file, baseURI);
            iwriter.commit();
        }
    } catch (Exception e) {
        log.error("Error while creating TripleIndex.", e);
    }
}
项目:t4f-data    文件:AosFragments.java   
public void addIndexes() throws Exception {
    Directory otherDir = null;
    Directory ramDir = null;
    IndexWriter writer = new IndexWriter(otherDir, new IndexWriterConfig(Version.LUCENE_41, new SimpleAnalyzer(
            Version.LUCENE_41)));
    writer.addIndexes(new Directory[] { ramDir });
}
项目:elasticsearch_my    文件:SimpleAnalyzerProvider.java   
public SimpleAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    this.simpleAnalyzer = new SimpleAnalyzer();
    this.simpleAnalyzer.setVersion(version);
}
项目:elasticsearch_my    文件:SimpleAnalyzerProvider.java   
@Override
public SimpleAnalyzer get() {
    return this.simpleAnalyzer;
}
项目:Elasticsearch    文件:SimpleAnalyzerProvider.java   
@Inject
public SimpleAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.simpleAnalyzer = new SimpleAnalyzer();
    this.simpleAnalyzer.setVersion(version);
}
项目:Elasticsearch    文件:SimpleAnalyzerProvider.java   
@Override
public SimpleAnalyzer get() {
    return this.simpleAnalyzer;
}
项目:lire    文件:MetricSpacesInvertedListIndexing.java   
/**
 * We assume that the initial indexing has been done and a set of reference objects has been
 * found and indexed in the separate fileList. However further documents were added and they
 * now need to get a ranked list of reference objects. So we (i) get all these new documents
 * missing the field "ro-order" and (ii) add this field.
 *
 * @param indexPath the index to update
 * @throws IOException
 */
public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
            new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        if (document.getField("ro-order") == null) {  // if the field is not here we create it.
            ImageSearchHits hits = searcher.search(document, readerRo);
            sb.delete(0, sb.length());
            for (int j = 0; j < numReferenceObjectsUsed; j++) {
                sb.append(hits.doc(j).getValues("ro-id")[0]);
                sb.append(' ');
            }
            // System.out.println(sb.toString());
            document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);
            countUpdated++;
        }

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

        // debug:
        System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
}
项目:lire    文件:TestRerankTextSearch.java   
public void testSearch() throws IOException, ParseException {
    // create a Lucene IndexReader and the according IndexSearcher:
    IndexReader reader = DirectoryReader.open(FSDirectory.open(testIndex));
    IndexSearcher searcher = new IndexSearcher(reader);
    // The QueryParser takes a String and creates a query out of it. Make sure you use the same field
    // as for indexing, in this case "tags"
    QueryParser q = new QueryParser(Version.LUCENE_42, "tags", new SimpleAnalyzer(Version.LUCENE_42));
    // let's just take the tags of the first document in the index:
    Query query = q.parse(reader.document(1).getValues("tags")[0]);
    // now that's the actual search:
    // NOTE: The number of results here is critical. The less documents are returned here, the
    // less the image re-ranking can mess up. However, the recall (the absolute number of relevant
    // documents returned) is also influenced by this. Best to try several values like 10, 100, 200, 500, ...
    TopDocs results = searcher.search(query, 10);
    // here we print the results of the text search, just for the win.
    System.out.println("-----------> SEARCH RESULTS ...");
    for (int i = 0; i < results.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = results.scoreDocs[i];
        System.out.print(scoreDoc.score + "\t: ");
        // reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] gets you the actual image file path.
        // LIRE manages all needed filed names as static Strings in DocumentBuilder ...
        System.out.print(reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> ");
        System.out.println(reader.document(scoreDoc.doc).getValues("tags")[0]);
    }
    // just for a visual example ... this will pop up a browser window
    FileUtils.browseUri(FileUtils.saveImageResultsToHtml("text", results, reader, reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));

    // and now for the re-ranking:
    // make sure to use a low level feature that has been indexed -- check the DocumentBuilder in above method.
    RerankFilter rerank = new RerankFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH);
    // note that you need the document here, it contains the low level feature ...
    // if you don't have it but just the image you need to create a new one with the
    // appropriate DocumentBuilder -- check the DocumentBuilder in above method.
    ImageSearchHits hitsReranked = rerank.filter(results, reader, reader.document(1));
    // and here we print the re-ranked hits:
    System.out.println("-----------> RERANKED ...");
    for (int i = 0; i < hitsReranked.length(); i++) {
        System.out.print(hitsReranked.score(i) + "\t: ");
        System.out.print(hitsReranked.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> ");
        System.out.println(hitsReranked.doc(i).getValues("tags")[0]);
    }
    // just for a visual example ... this will pop up a browser window.
    FileUtils.browseUri(FileUtils.saveImageResultsToHtml("reranked", hitsReranked, reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
}
项目:stratio-cassandra    文件:PreBuiltAnalyzersTest.java   
@Test
public void testGetSimple() {
    Analyzer analyzer = PreBuiltAnalyzers.SIMPLE.get();
    Assert.assertEquals(SimpleAnalyzer.class, analyzer.getClass());
}
项目:luceneappengine    文件:GaeDirectoryTest.java   
private static IndexWriterConfig config() {
    return GaeLuceneUtil.getIndexWriterConfig(new SimpleAnalyzer());
}
项目:t4f-data    文件:AosIndexUtil.java   
private static Analyzer getAnalyzer() {
    return new SimpleAnalyzer(Version.LUCENE_41);
}
项目:t4f-data    文件:BasicIndexTest.java   
private static Analyzer getAnalyzer() {
    return new SimpleAnalyzer(Version.LUCENE_41);
}
项目:lucene-skos-ehri    文件:SKOSEngineImpl.java   
/**
 * This constructor loads the SKOS model from a given InputStream using the
 * given serialization language parameter, which must be either N3, RDF/XML,
 * or TURTLE.
 * 
 * @param inputStream
 *          the input stream
 * @param lang
 *          the serialization language
 * @throws IOException
 *           if the model cannot be loaded
 */
public SKOSEngineImpl(final Version version, InputStream inputStream,
    String lang) throws IOException {

  if (!("N3".equals(lang) || "RDF/XML".equals(lang) || "TURTLE".equals(lang))) {
    throw new IOException("Invalid RDF serialization format");
  }

  matchVersion = version;

  analyzer = new SimpleAnalyzer(matchVersion);

  skosModel = ModelFactory.createDefaultModel();

  skosModel.read(inputStream, null, lang);

  indexDir = new RAMDirectory();

  entailSKOSModel();

  indexSKOSModel();

  searcher = new IndexSearcher(DirectoryReader.open(indexDir));
}
项目:lucene-skos-ehri    文件:SKOSEngineImpl.java   
/**
 * This constructor loads the SKOS model from a given filename or URI, starts
 * the indexing process and sets up the index searcher.
 * 
 * @param languages
 *          the languages to be considered
 * @param filenameOrURI
 * @throws IOException
 */
public SKOSEngineImpl(final Version version, String filenameOrURI,
    String... languages) throws IOException {
  matchVersion = version;
  analyzer = new SimpleAnalyzer(matchVersion);

  String langSig = "";
  if (languages != null) {
    this.languages = new TreeSet<String>(Arrays.asList(languages));
    langSig = "-" + StringUtils.join(this.languages, ".");
  }

  String name = FilenameUtils.getName(filenameOrURI);
  //File dir = new File("skosdata/" + name + langSig);

  File dir = new File("/opt/webapps/solr4/ehri/portal/data/skosdata/" + name + langSig);

  indexDir = FSDirectory.open(dir);

  // TODO: Generate also if source file is modified
  if (!dir.isDirectory()) {
    // load the skos model from the given file
    FileManager fileManager = new FileManager();
    fileManager.addLocatorFile();
    fileManager.addLocatorURL();
    fileManager.addLocatorClassLoader(SKOSEngineImpl.class.getClassLoader());

    if (FilenameUtils.getExtension(filenameOrURI).equals("zip")) {
      fileManager.addLocatorZip(filenameOrURI);
      filenameOrURI = FilenameUtils.getBaseName(filenameOrURI);
    }

    skosModel = fileManager.loadModel(filenameOrURI);

    entailSKOSModel();

    indexSKOSModel();
  }

  searcher = new IndexSearcher(DirectoryReader.open(indexDir));
}
项目:lucene-skos-ehri    文件:AbstractTermExpansionTest.java   
/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which contains plain subject
 * terms.
 * 
 * A search for "arms" doesn't return that record because the term "arms" is
 * not explicitly contained in the record (document).
 * 
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 */
@Test
public void noExpansion() throws CorruptIndexException,
    LockObtainFailedException, IOException {

  /* defining the document to be indexed */
  Document doc = new Document();
  doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
  doc.add(new Field(
      "description",
      "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
          + "The spear was mainly a thrusting weapon, but could also be thrown. "
          + "It was the principal weapon of the auxiliary soldier... "
          + "(second - fourth century, Arbeia Roman Fort).",
      TextField.TYPE_NOT_STORED));
  doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

  /* setting up a writer with a default (simple) analyzer */
  writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(
      Version.LUCENE_45, new SimpleAnalyzer(Version.LUCENE_45)));

  /* adding the document to the index */
  writer.addDocument(doc);

  /* defining a query that searches over all fields */
  BooleanQuery query = new BooleanQuery();
  query.add(new TermQuery(new Term("title", "arms")),
      BooleanClause.Occur.SHOULD);
  query.add(new TermQuery(new Term("description", "arms")),
      BooleanClause.Occur.SHOULD);
  query.add(new TermQuery(new Term("subject", "arms")),
      BooleanClause.Occur.SHOULD);

  /* creating a new searcher */
  searcher = new IndexSearcher(DirectoryReader.open(writer, false));

  TopDocs results = searcher.search(query, 10);

  /* no results are returned since there is no term match */
  Assert.assertEquals(0, results.totalHits);

}
项目:lucene-skos-ehri    文件:SKOSLabelFilterTest.java   
@Test
public void testTermQuery() throws CorruptIndexException, IOException,
    QueryNodeException {

  Document doc = new Document();
  doc.add(new Field("content", "I work for the united nations",
      TextField.TYPE_STORED));

  writer.addDocument(doc);

  searcher = new IndexSearcher(DirectoryReader.open(writer, false));

  StandardQueryParser parser = new StandardQueryParser(new SimpleAnalyzer(
      matchVersion));

  Query query = parser.parse("united nations", "content");

  Assert.assertEquals(1, TestUtil.hitCount(searcher, query));

}
项目:lucene-skos-ehri    文件:URIbasedTermExpansionTest.java   
/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically enriched
 * by a URI pointing to a SKOS concept "weapons".
 * 
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 * 
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {

  /* defining the document to be indexed */
  Document doc = new Document();
  doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
  doc.add(new Field(
      "description",
      "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
          + "The spear was mainly a thrusting weapon, but could also be thrown. "
          + "It was the principal weapon of the auxiliary soldier... "
          + "(second - fourth century, Arbeia Roman Fort).",
      TextField.TYPE_NOT_STORED));
  doc.add(new Field("subject",
      "http://www.ukat.org.uk/thesaurus/concept/859",
      TextField.TYPE_NOT_STORED));

  /* setting up the SKOS analyzer */
  String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";

  /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
  Analyzer skosAnalyzer = new SKOSAnalyzer(matchVersion, skosFile,
      ExpansionType.URI);

  /* Define different analyzers for different fields */
  Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>();
  analyzerPerField.put("subject", skosAnalyzer);
  PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(
      new SimpleAnalyzer(matchVersion), analyzerPerField);

  /* setting up a writer with a default (simple) analyzer */
  writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(
      matchVersion, indexAnalyzer));

  /* adding the document to the index */
  writer.addDocument(doc);

  /* defining a query that searches over all fields */
  BooleanQuery query1 = new BooleanQuery();
  query1.add(new TermQuery(new Term("title", "arms")),
      BooleanClause.Occur.SHOULD);
  query1.add(new TermQuery(new Term("description", "arms")),
      BooleanClause.Occur.SHOULD);
  query1.add(new TermQuery(new Term("subject", "arms")),
      BooleanClause.Occur.SHOULD);

  /* creating a new searcher */
  searcher = new IndexSearcher(DirectoryReader.open(writer, false));

  TopDocs results = searcher.search(query1, 10);

  /* the document matches because "arms" is among the expanded terms */
  Assert.assertEquals(1, results.totalHits);

  /* defining a query that searches for a broader concept */
  Query query2 = new TermQuery(new Term("subject", "military equipment"));

  results = searcher.search(query2, 10);

  /* ... also returns the document as result */
  Assert.assertEquals(1, results.totalHits);

}
项目:lucene-skos-ehri    文件:LabelbasedTermExpansionTest.java   
/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field.
 * 
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label for "weapons", the term which is contained
 * in the subject field.
 * 
 * @throws IOException
 */
@Test
public void labelBasedTermExpansion() throws IOException {

  /* defining the document to be indexed */
  Document doc = new Document();
  doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
  doc.add(new Field(
      "description",
      "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
          + "The spear was mainly a thrusting weapon, but could also be thrown. "
          + "It was the principal weapon of the auxiliary soldier... "
          + "(second - fourth century, Arbeia Roman Fort).",
      TextField.TYPE_NOT_STORED));
  doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

  /* setting up the SKOS analyzer */
  String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";

  /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
  Analyzer skosAnalyzer = new SKOSAnalyzer(matchVersion, skosFile,
      ExpansionType.LABEL);

  /* Define different analyzers for different fields */
  Map<String,Analyzer> analyzerPerField = new HashMap<String,Analyzer>();
  analyzerPerField.put("subject", skosAnalyzer);
  PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(
      new SimpleAnalyzer(matchVersion), analyzerPerField);

  /* setting up a writer with a default (simple) analyzer */
  writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(
      matchVersion, indexAnalyzer));

  /* adding the document to the index */
  writer.addDocument(doc);

  /* defining a query that searches over all fields */
  BooleanQuery query1 = new BooleanQuery();
  query1.add(new TermQuery(new Term("title", "arms")),
      BooleanClause.Occur.SHOULD);
  query1.add(new TermQuery(new Term("description", "arms")),
      BooleanClause.Occur.SHOULD);
  query1.add(new TermQuery(new Term("subject", "arms")),
      BooleanClause.Occur.SHOULD);

  /* creating a new searcher */
  searcher = new IndexSearcher(DirectoryReader.open(writer, false));

  TopDocs results = searcher.search(query1, 10);

  /* the document matches because "arms" is among the expanded terms */
  Assert.assertEquals(1, results.totalHits);

  /* defining a query that searches for a broader concept */
  Query query2 = new TermQuery(new Term("subject", "military equipment"));

  results = searcher.search(query2, 10);

  /* ... also returns the document as result */
  Assert.assertEquals(1, results.totalHits);

}
项目:theSemProject    文件:Tokenizer.java   
/**
 * Ritorna il risultato di un tagcloud su di un testo. Dato che ogni termine
 * che appare nel cloud è un rappresentate di una serie di termini il
 * sistema fa un doppio passaggio. Quello che si vuole fare è riunire sotto
 * uno stesso termine rappresentante tutti i token che hanno la stessa
 * tokenizzazione. In pratica l'analizzatore sintattico di fronte ai due
 * termini come "tecnico" e "tecnica" ritorna lo stesso token "tecnic"
 * Quello che si vuole è che nel could non appaia la parola "tecnic" ma
 * sempre e comunque il primo dei termini che è apparso nel testo Nella
 * nuova comparira quindi la parola "tecnico" che va a rappresentare sia
 * "tecnico" che "tecnica"
 *
 * @param ret Risultato del tag clouding
 * @param text testo su cui fare il tagcloud
 * @param id id univoco del documento il cui testo è passato come parametro
 * @param analyzer analizzatore sintattico
 * @throws Exception Eccezione
 */
public static void getTagClasses(final TagCloudResults ret, String text, String id, Analyzer analyzer) throws Exception {
    TokenizerFilter tf = (String term) -> {
        try {
            String newTerm = tokenize(term.trim(), analyzer, -1);
            if (newTerm.length() > 0) {

                ret.add(newTerm, term, id);
            }
        } catch (Exception exception) {
        }

    };
    tokenize(text, new SimpleAnalyzer(), -1, tf);
}
项目:lire    文件:LuceneUtils.java   
/**
 * Creates an IndexWriter for given index path, with a SimpleAnalyzer.
 *
 * @param indexPath the path to the index directory
 * @param create    set to true if you want to create a new index
 * @return the IndexWriter
 * @throws IOException
 */
public static IndexWriter createIndexWriter(String indexPath, boolean create) throws IOException {
    return createIndexWriter(indexPath, create, AnalyzerType.SimpleAnalyzer);
}