protected boolean locateContainer(String nodeRef, IndexReader reader) { boolean found = false; try { TermDocs td = reader.termDocs(new Term("ID", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); if (document.getField("ISCONTAINER") != null) { found = true; break; } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete container and below for " + nodeRef, e); } return found; }
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { int max = reader.maxDoc(); OpenBitSet good = new OpenBitSet(max); good.set(0, max); for( List<Field> values : terms ) { for( Field nv : values ) { Term term = new Term(nv.getField(), nv.getValue()); TermDocs docs = reader.termDocs(term); while( docs.next() ) { good.clear(docs.doc()); } docs.close(); } } return good; }
private String indexGet(final String key) throws IOException { final Term s = new Term(KEY_FIELD, key); synchronized (this.luceneWriter) { for (int attempt = 0;; ++attempt) { try { final TermDocs termDocs = this.luceneReader.termDocs(s); if (termDocs.next()) { final Document doc = this.luceneReader.document(termDocs.doc()); return doc.get(VALUE_FIELD); } return null; } catch (final Throwable ex) { indexError(ex, attempt); } } } }
public static void main(String[] args) { String indexPath = args[0]; String url = args[1]; try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)), false); Term s = new Term(LuceneDataStore.LuceneTransaction.KEY_NAME, url); TermDocs termDocs = reader.termDocs(s); if (termDocs.next()) { Document document = reader.document(termDocs.doc()); // Record r = LuceneDataStore.unserializeRecord(document.getBinaryValue(LuceneDataStore.LuceneTransaction.VALUE_NAME)); // System.out.println(r.toString(null, true)); } else { logger.info("URI {} not found", url); } reader.close(); } catch (Exception e) { logger.error(e.getMessage()); } }
public boolean filter(Term t) { int freq = 0; TermDocs tDocs; try { tDocs = indexReader.termDocs(t); while( tDocs.next() ){ freq += tDocs.freq(); } if( freq < minFreq ){ return false; } } catch(Exception e) { e.printStackTrace(); } return true; }
private long getLargestTxIdByCommitTime(SolrIndexReader reader, Long lastTxCommitTimeBeforeHoles) throws IOException { long txid = -1; if (lastTxCommitTimeBeforeHoles != -1) { TermDocs docs = reader.termDocs(new Term(QueryConstants.FIELD_TXCOMMITTIME, NumericEncoder .encode(lastTxCommitTimeBeforeHoles))); while (docs.next()) { Document doc = reader.document(docs.doc()); Fieldable field = doc.getFieldable(QueryConstants.FIELD_TXID); if (field != null) { long currentTxId = Long.valueOf(field.stringValue()); if (currentTxId > txid) { txid = currentTxId; } } } } return txid; }
private long getLargestChangeSetIdByCommitTime(SolrIndexReader reader, Long lastChangeSetCommitTimeBeforeHoles) throws IOException { long txid = -1; if (lastChangeSetCommitTimeBeforeHoles != -1) { TermDocs docs = reader.termDocs(new Term(QueryConstants.FIELD_ACLTXCOMMITTIME, NumericEncoder .encode(lastChangeSetCommitTimeBeforeHoles))); while (docs.next()) { Document doc = reader.document(docs.doc()); Fieldable field = doc.getFieldable(QueryConstants.FIELD_ACLTXID); if (field != null) { long currentTxId = Long.valueOf(field.stringValue()); if (currentTxId > txid) { txid = currentTxId; } } } } return txid; }
/** * Returns the field values associated with a document * @param context the operation context * @param fieldName the field name * @param uuid the document uuid * @return the field values (null if not found) * @throws CorruptIndexException if the index is corrupt * @throws IOException if an I/O exception occurs */ public String[] queryFieldByUuid(TocContext context, String fieldName, String uuid) throws CorruptIndexException, IOException { TermDocs termDocs = null; try { uuid = Val.chkStr(uuid); if (uuid.length() > 0) { IndexSearcher searcher = this.getSearcher(context); IndexReader reader = searcher.getIndexReader(); MapFieldSelector selector = new MapFieldSelector(new String[]{fieldName}); termDocs = reader.termDocs(); termDocs.seek(new Term(Storeables.FIELD_UUID,uuid)); if (termDocs.next()) { Document document = reader.document(termDocs.doc(),selector); return document.getValues(fieldName); } } } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} } return null; }
/** * Queries for documents that match one or more of the supplied values. * @param reader the index reader * @return the OpenBitSet (documents with matches are set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryValues(IndexReader reader, String field, String[] values) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); if ((values != null) && (values.length > 0)) { TermDocs termDocs = null; try { Term baseTerm = new Term(field); termDocs = reader.termDocs(); for (String value: values) { termDocs.seek(baseTerm.createTerm(value.trim().toLowerCase())); while (termDocs.next()) { bitSet.set(termDocs.doc()); } } } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} } } return bitSet; }
/** * Queries for documents that match the supplied value. * @param reader the index reader * @return the OpenBitSet (documents with matches are set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryValue(IndexReader reader, String field, String value) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); if ((value != null) && (value.length() > 0)) { TermDocs termDocs = null; try { Term term = new Term(field,value); termDocs = reader.termDocs(); termDocs.seek(term); while (termDocs.next()) { bitSet.set(termDocs.doc()); } } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} } } return bitSet; }
/** * Queries for documents that match the supplied value. * @param reader the index reader * @return the OpenBitSet (documents with matches are set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryValue(IndexReader reader) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); if ((this.value != null) && (this.value.length() > 0)) { TermDocs termDocs = null; try { Term term = new Term(this.fieldName,this.value); termDocs = reader.termDocs(); termDocs.seek(term); while (termDocs.next()) { bitSet.set(termDocs.doc()); } } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} } } return bitSet; }
final Scorer scorer(IndexReader reader) throws IOException { if (terms.size() == 0) // optimize zero-term case return null; if (terms.size() == 1) { // optimize one-term case Term term = (Term)terms.elementAt(0); TermDocs docs = reader.termDocs(term); if (docs == null) return null; return new TermScorer(docs, reader.norms(term.field()), weight); } TermPositions[] tps = new TermPositions[terms.size()]; for (int i = 0; i < terms.size(); i++) { TermPositions p = reader.termPositions((Term)terms.elementAt(i)); if (p == null) return null; tps[i] = p; } if (slop == 0) // optimize exact case return new ExactPhraseScorer(tps, reader.norms(field), weight); else return new SloppyPhraseScorer(tps, slop, reader.norms(field), weight); }
protected static Set<String> deletePrimary(Collection<String> nodeRefs, IndexReader reader, boolean delete) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); for (String nodeRef : nodeRefs) { try { TermDocs td = reader.termDocs(new Term("PRIMARYPARENT", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete node by primary parent for " + nodeRef, e); } } return refs; }
protected static Set<String> deleteReference(Collection<String> nodeRefs, IndexReader reader, boolean delete) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); for (String nodeRef : nodeRefs) { try { TermDocs td = reader.termDocs(new Term("PARENT", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete node by parent for " + nodeRef, e); } } return refs; }
protected static Set<String> deleteContainerAndBelow(String nodeRef, IndexReader reader, boolean delete, boolean cascade) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); try { if (delete) { reader.deleteDocuments(new Term("ID", nodeRef)); } refs.add(nodeRef); if (cascade) { TermDocs td = reader.termDocs(new Term("ANCESTOR", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } } catch (IOException e) { throw new LuceneIndexException("Failed to delete container and below for " + nodeRef, e); } return refs; }
/** Construct a <code>TermScorer</code>. * @param weight The weight of the <code>Term</code> in the query. * @param td An iterator over the documents matching the <code>Term</code>. * @param similarity The </code>Similarity</code> implementation to be used for score computations. * @param norms The field norms of the document fields for the <code>Term</code>. */ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) { super(similarity); this.weight = weight; this.termDocs = td; this.norms = norms; this.weightValue = weight.getValue(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) scoreCache[i] = getSimilarity().tf(i) * weightValue; }
public Scorer scorer(IndexReader reader) throws IOException { TermDocs termDocs = reader.termDocs(term); if (termDocs == null) return null; String field = term.field(); return new TermScorer(this, termDocs, similarity, reader.hasNorms(field) ? reader.norms(field) : null); }
/** * A simplified implementation of matrixSearch() that only works on a single * field, and currently only returns the count per term. It could easily be * extended to return a list of ItemIds per term, it simply wasn't necessary * when I was writing it! * <p> * This simplified implementation was written to overcome the memory * pressures that matrixSearch() creates when you have over half a million * terms for a field. MatrixSearch() creates *many* BitSets that it holds on * to to reuse as it recurse through a list of fields. Since we only care * about a single field in this implementation, we can avoid generating and * holding onto BitSets. */ public Multimap<String, Pair<String, Integer>> facetCount(@Nullable final Search searchreq, final Collection<String> fields) { return search(new Searcher<Multimap<String, Pair<String, Integer>>>() { @Override public Multimap<String, Pair<String, Integer>> search(IndexSearcher searcher) throws IOException { final IndexReader reader = searcher.getIndexReader(); final OpenBitSet filteredBits = searchRequestToBitSet(searchreq, searcher, reader); final Multimap<String, Pair<String, Integer>> rv = ArrayListMultimap.create(); for( String field : fields ) { for( Term term : new XPathFieldIterator(reader, field, "") ) { int count = 0; TermDocs docs = reader.termDocs(term); while( docs.next() ) { if( filteredBits.get(docs.doc()) ) { count++; } } docs.close(); if( count > 0 ) { rv.put(field, new Pair<String, Integer>(term.text(), count)); } } } return rv; } }); }
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { int max = reader.maxDoc(); OpenBitSet good = new OpenBitSet(max); Institution institution = CurrentInstitution.get(); Term term = new Term(FreeTextQuery.FIELD_INSTITUTION, Long.toString(institution.getUniqueId())); TermDocs docs = reader.termDocs(term); while( docs.next() ) { good.set(docs.doc()); } docs.close(); return good; }
@Override public Stream<Record> lookup(URI type, Set<? extends URI> ids, @Nullable Set<? extends URI> properties) throws IOException, IllegalArgumentException, IllegalStateException { optimize(type); List<Record> returns = new ArrayList<>(); for (URI id : ids) { String uri; try { uri = id.toString(); } catch (NullPointerException e) { throw new IOException(e); } logger.debug("Selecting {}", uri); Term s = new Term(KEY_NAME, uri); TermDocs termDocs = readers.get(type).termDocs(s); if (termDocs.next()) { Document doc = readers.get(type).document(termDocs.doc()); Record r = unserializeRecord(doc.getBinaryValue(VALUE_NAME), serializer); if (properties != null && !properties.isEmpty()) { r.retain(Iterables.toArray(properties, URI.class)); } returns.add(r); } } return Stream.create(returns); }
/** * Devuelve los postings de un término dado * * @param term Termino a buscar para devolver sus postings * @return lista de postings de un termino */ @Override public List<Posting> getTermPostings(String term) { ArrayList<Posting> postingList = new ArrayList<>(); try { TermDocs termDocs = ireader.termDocs(new Term("content", term)); TermPositions termPositions = ireader.termPositions(new Term("content", term)); //si se usa seek termDocs se borra //termDocs.seek(new Term(term)); while(termDocs.next()) { int docId = termDocs.doc(); int freq = termDocs.freq(); ArrayList<Long> positions = new ArrayList<>(); while (termPositions.next()) { positions.add((long)termPositions.nextPosition()); } Posting p = new Posting(docId + "", freq, positions); postingList.add(p); } return postingList; } catch (IOException ex) { Logger.getLogger(LuceneIndexing.class.getName()).log(Level.SEVERE, null, ex); } return postingList; }
public synchronized TermDocs getNodeDocs() throws IOException { if (nodes == null) { TermDocs nodeDocs = termDocs(new Term("ISNODE", "T")); nodes = new OpenBitSet(); while (nodeDocs.next()) { nodes.set(nodeDocs.doc()); } nodeDocs.close(); } return new TermDocSet(nodes); }
/** * 해당 단어가 들어간 문서들에서 모든 빈도 수 (문서의 수가 아닌)를 계산합니다. * * @param reader 인덱스 리더 * @param term 단어 * @return 단어의 총 빈도 수 ( 문서 내의 모든 빈도 수의 합 ) * @throws Exception */ public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception { long totalTermFreq = 0; TermDocs docs = reader.termDocs(term); while (docs.next()) { totalTermFreq += docs.freq(); } return totalTermFreq; }
/** * Queries the ACL values indexed for a document. * @param uuid the document UUID * @return the ACL values (can be null) * @throws CatalogIndexException if an exception occurs */ @Override public String[] queryAcls(String uuid) throws CatalogIndexException { ArrayList<String> values = new ArrayList<String>(); IndexSearcher searcher = null; TermDocs termDocs = null; try { uuid = Val.chkStr(uuid); if (uuid.length() > 0) { searcher = newSearcher(); String[] aFields = new String[]{Storeables.FIELD_ACL}; MapFieldSelector selector = new MapFieldSelector(aFields); searcher = newSearcher(); IndexReader reader = searcher.getIndexReader(); termDocs = reader.termDocs(); termDocs.seek(new Term(Storeables.FIELD_UUID,uuid)); if (termDocs.next()) { Document document = reader.document(termDocs.doc(),selector); Field[] fields = document.getFields(Storeables.FIELD_ACL); if ((fields != null) && (fields.length > 0)) { for (Field field: fields) { values.add(field.stringValue()); } } } } } catch (IOException e) { String sMsg = "Error accessing index:\n "+Val.chkStr(e.getMessage()); throw new CatalogIndexException(sMsg,e); } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} closeSearcher(searcher); } return values.toArray(new String[0]); }
/** * Queries the system modified date associated with an indexed document. * @param uuid the document UUID * @return the update date (null if none was found) * @throws CatalogIndexException if an exception occurs */ @Override public Timestamp queryModifiedDate(String uuid) throws CatalogIndexException { Timestamp tsUpdate = null; IndexSearcher searcher = null; TermDocs termDocs = null; try { uuid = Val.chkStr(uuid); if (uuid.length() > 0) { String[] aFields = new String[]{Storeables.FIELD_DATEMODIFIED}; MapFieldSelector selector = new MapFieldSelector(aFields); searcher = newSearcher(); IndexReader reader = searcher.getIndexReader(); termDocs = reader.termDocs(); termDocs.seek(new Term(Storeables.FIELD_UUID,uuid)); if (termDocs.next()) { Document document = reader.document(termDocs.doc(),selector); String sUpdate = document.get(Storeables.FIELD_DATEMODIFIED); tsUpdate = new Timestamp(Long.valueOf(sUpdate)); } } } catch (IOException e) { String sMsg = "Error accessing index:\n "+Val.chkStr(e.getMessage()); throw new CatalogIndexException(sMsg,e); } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} closeSearcher(searcher); } return tsUpdate; }
/** * Loads an assertion based upon the subject id of the active operation. * @param context the assertion operation context * @param mustExist <code>true</code> true if the assertion must exist * @return the assertion (null if not found) * @throws CorruptIndexException if the index is corrupt * @throws IOException if an I/O exception occurs * @throws AsnInvalidOperationException if mustExist and the assertion was not found */ public Assertion loadAssertionById(AsnContext context, boolean mustExist) throws CorruptIndexException, IOException, AsnInvalidOperationException { TermDocs termDocs = null; IndexReader reader = null; AsnOperation operation = context.getOperation(); try { String assertionId = Val.chkStr(operation.getSubject().getValuePart()); if (assertionId.length() > 0) { reader = this.makeIndexReader(); termDocs = reader.termDocs(); termDocs.seek(new Term(AsnConstants.FIELD_SYS_ASSERTIONID,assertionId)); if (termDocs.next()) { Document document = reader.document(termDocs.doc()); Assertion assertion = operation.getAssertionSet().newAssertion(context,false); assertion.load(document); return assertion; } } } finally { try {if (termDocs != null) termDocs.close();} catch (Exception ef) {} this.closeReader(reader); } if (mustExist) { String msg = "This assertion id was not found - "+operation.getSubject().getURN(); throw new AsnInvalidOperationException(msg); } return null; }
static LuceneUnsortedIntTermDocIterator create(final IndexReader r, final String field) throws IOException { final TermEnum terms = r.terms(new Term(field, "")); final TermDocs termDocs; try { termDocs = r.termDocs(); } catch (IOException e) { try { terms.close(); } catch (IOException e1) { log.error("error closing TermEnum", e1); } throw e; } return new LuceneUnsortedIntTermDocIterator(field, terms, termDocs); }