void processQuery(Query query, ParseContext context) { ParseContext.Document doc = context.doc(); FieldType pft = (FieldType) this.fieldType(); QueryAnalyzer.Result result; try { result = QueryAnalyzer.analyze(query); } catch (QueryAnalyzer.UnsupportedQueryException e) { doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); return; } for (Term term : result.terms) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(new BytesRef(term.field())); builder.append(FIELD_VALUE_SEPARATOR); builder.append(term.bytes()); doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); } if (result.verified) { doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); } else { doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType())); } }
public void testWriteRead() throws IOException { try(BlobStore store = newBlobStore()) { final BlobContainer container = store.blobContainer(new BlobPath()); byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16))); writeBlob(container, "foobar", new BytesArray(data)); try (InputStream stream = container.readBlob("foobar")) { BytesRefBuilder target = new BytesRefBuilder(); while (target.length() < data.length) { byte[] buffer = new byte[scaledRandomIntBetween(1, data.length - target.length())]; int offset = scaledRandomIntBetween(0, buffer.length - 1); int read = stream.read(buffer, offset, buffer.length - offset); target.append(new BytesRef(buffer, offset, read)); } assertEquals(data.length, target.length()); assertArrayEquals(data, Arrays.copyOfRange(target.bytes(), 0, target.length())); } } }
public void testIteratorRandom() throws IOException { int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8)); BytesReference pbr = newBytesReference(length); if (randomBoolean()) { int sliceOffset = randomIntBetween(0, pbr.length()); int sliceLength = randomIntBetween(0, pbr.length() - sliceOffset); pbr = pbr.slice(sliceOffset, sliceLength); } if (randomBoolean()) { pbr = new BytesArray(pbr.toBytesRef()); } BytesRefIterator iterator = pbr.iterator(); BytesRef ref = null; BytesRefBuilder builder = new BytesRefBuilder(); while((ref = iterator.next()) != null) { builder.append(ref); } assertArrayEquals(BytesReference.toBytes(pbr), BytesRef.deepCopyOf(builder.toBytesRef()).bytes); }
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) { BytesRef[] toJoin = new BytesRef[this.candidates.length]; int len = separator.length * this.candidates.length - 1; for (int i = 0; i < toJoin.length; i++) { Candidate candidate = candidates[i]; if (preTag == null || candidate.userInput) { toJoin[i] = candidate.term; } else { final int maxLen = preTag.length + postTag.length + candidate.term.length; final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once highlighted.grow(maxLen); if (i == 0 || candidates[i-1].userInput) { highlighted.append(preTag); } highlighted.append(candidate.term); if (toJoin.length == i + 1 || candidates[i+1].userInput) { highlighted.append(postTag); } toJoin[i] = highlighted.get(); } len += toJoin[i].length; } result.grow(len); return WordScorer.join(separator, result, toJoin); }
private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { // Logic similar to QueryParser#getFieldQuery try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); BytesRefBuilder builder = new BytesRefBuilder(); while (source.incrementToken()) { // UTF-8 builder.copyChars(termAtt); query.add(new Term(field, builder.toBytesRef())); } } query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return query; }
private static void sort(final BytesRefBuilder scratch, final BytesRefBuilder scratch1, final BytesRefArray bytes, final int[] indices) { final int numValues = bytes.size(); assert indices.length >= numValues; if (numValues > 1) { new InPlaceMergeSorter() { final Comparator<BytesRef> comparator = Comparator.naturalOrder(); @Override protected int compare(int i, int j) { return comparator.compare(bytes.get(scratch, indices[i]), bytes.get(scratch1, indices[j])); } @Override protected void swap(int i, int j) { int value_i = indices[i]; indices[i] = indices[j]; indices[j] = value_i; } }.sort(0, numValues); } }
public void testSimpleNumericOps() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); Document doc = searcher.doc(topDocs.scoreDocs[0].doc); IndexableField f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); BytesRefBuilder bytes = new BytesRefBuilder(); LegacyNumericUtils.intToPrefixCoded(2, 0, bytes); topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1); doc = searcher.doc(topDocs.scoreDocs[0].doc); f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); indexWriter.close(); }
/** * Test method for {@link org.apache.lucene.util.BytesRefHash#size()}. */ public void testSize() { BytesRefBuilder ref = new BytesRefBuilder(); int num = scaledRandomIntBetween(2, 20); for (int j = 0; j < num; j++) { final int mod = 1+randomInt(40); for (int i = 0; i < 797; i++) { String str; do { str = TestUtil.randomRealisticUnicodeString(random(), 1000); } while (str.length() == 0); ref.copyChars(str); long count = hash.size(); long key = hash.add(ref.get()); if (key < 0) assertEquals(hash.size(), count); else assertEquals(hash.size(), count + 1); if(i % mod == 0) { newHash(); } } } hash.close(); }
/** * Binary search for the given term. * * @param term * the term to locate. * @throws IOException If there is a low-level I/O error. */ int getIndexOffset(Term term) throws IOException { int lo = 0; int hi = indexSize - 1; PagedBytesDataInput input = dataInput.clone(); BytesRefBuilder scratch = new BytesRefBuilder(); while (hi >= lo) { int mid = (lo + hi) >>> 1; int delta = compareTo(term, mid, input, scratch); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; }
/** * Returns the longest BytesRef that is a prefix of all accepted strings and * visits each state at most once. The automaton must be deterministic. * * @return common prefix */ public static BytesRef getCommonPrefixBytesRef(Automaton a) { BytesRefBuilder builder = new BytesRefBuilder(); HashSet<Integer> visited = new HashSet<>(); int s = 0; boolean done; Transition t = new Transition(); do { done = true; visited.add(s); if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) { a.getTransition(s, 0, t); if (t.min == t.max && !visited.contains(t.dest)) { builder.append((byte) t.min); s = t.dest; done = false; } } } while (!done); return builder.get(); }
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) { BytesRef[] toJoin = new BytesRef[this.candidates.length]; int len = separator.length * this.candidates.length - 1; for (int i = 0; i < toJoin.length; i++) { Candidate candidate = candidates[i]; if (preTag == null || candidate.userInput) { toJoin[i] = candidate.term; } else { final int maxLen = preTag.length + postTag.length + candidate.term.length; final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once highlighted.grow(maxLen); if (i == 0 || candidates[i-1].userInput) { highlighted.append(preTag); } highlighted.append(candidate.term); if (toJoin.length == i + 1 || candidates[i+1].userInput) { highlighted.append(postTag); } toJoin[i] = highlighted.get(); } len += toJoin[i].length; } result.grow(len); return SuggestUtils.join(separator, result, toJoin); }
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext, Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { // Logic similar to QueryParser#getFieldQuery int count = 0; try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); BytesRefBuilder builder = new BytesRefBuilder(); while (source.incrementToken()) { // UTF-8 builder.copyChars(termAtt); query.add(new Term(field, builder.toBytesRef())); count++; } } if (count == 0) { return null; } query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return query; }
@Override public BytesRef normalizeQueryTarget(String val, boolean strict, String fieldName, boolean appendExtraDelim) throws IOException { TokenStream ts = getQueryAnalyzer().tokenStream(fieldName, val); try { ts.reset(); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = ts.getAttribute(TypeAttribute.class); String matchType = strict ? INDEXED_TOKEN_TYPE : NORMALIZED_TOKEN_TYPE; while (ts.incrementToken()) { if (matchType.equals(typeAtt.type())) { BytesRefBuilder ret = new BytesRefBuilder(); ret.copyChars(termAtt.toString()); if (!strict || appendExtraDelim) { ret.append(delimBytes, 0, delimBytes.length); } return ret.get(); } } return new BytesRef(BytesRef.EMPTY_BYTES); } finally { ts.close(); } }
private Bits getNumericDocsWithField(FieldInfo fieldInfo) throws IOException { final OneField field = fields.get(fieldInfo.name); final IndexInput in = data.clone(); final BytesRefBuilder scratch = new BytesRefBuilder(); return new Bits() { @Override public boolean get(int index) { try { in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*index); SimpleTextUtil.readLine(in, scratch); // data SimpleTextUtil.readLine(in, scratch); // 'T' or 'F' return scratch.byteAt(0) == (byte) 'T'; } catch (IOException e) { throw new RuntimeException(e); } } @Override public int length() { return maxDoc; } }; }
public static void readLine(DataInput in, BytesRefBuilder scratch) throws IOException { int upto = 0; while(true) { byte b = in.readByte(); scratch.grow(1+upto); if (b == ESCAPE) { scratch.setByteAt(upto++, in.readByte()); } else { if (b == NEWLINE) { break; } else { scratch.setByteAt(upto++, b); } } } scratch.setLength(upto); }
private TreeMap<String,Long> readFields(IndexInput in) throws IOException { ChecksumIndexInput input = new BufferedChecksumIndexInput(in); BytesRefBuilder scratch = new BytesRefBuilder(); TreeMap<String,Long> fields = new TreeMap<>(); while (true) { SimpleTextUtil.readLine(input, scratch); if (scratch.get().equals(END)) { SimpleTextUtil.checkFooter(input); return fields; } else if (StringHelper.startsWith(scratch.get(), FIELD)) { String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8); fields.put(fieldName, input.getFilePointer()); } } }
private GroupHead(int doc, BytesRef groupValue) throws IOException { super(groupValue, doc + readerContext.docBase); sortValues = new BytesRefBuilder[sortsIndex.length]; sortOrds = new int[sortsIndex.length]; scores = new float[sortsIndex.length]; for (int i = 0; i < sortsIndex.length; i++) { if (fields[i].getType() == SortField.Type.SCORE) { scores[i] = scorer.score(); } else { sortOrds[i] = sortsIndex[i].getOrd(doc); sortValues[i] = new BytesRefBuilder(); if (sortOrds[i] != -1) { sortValues[i].copyBytes(sortsIndex[i].get(doc)); } } } }
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final Builder<Object> builder = new Builder<>( FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, false, PackedInts.DEFAULT, true, 15); BytesRefBuilder scratch = new BytesRefBuilder(); BytesRef entry; final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.get().compareTo(entry) != 0) { builder.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : builder.finish(); }
static private Transformer transformer(final FieldType ft) { return new Transformer() { BytesRefBuilder term = new BytesRefBuilder(); @Override public BytesRef transform(Object joinId) { String joinStr = joinId.toString(); // logic same as TermQParserPlugin if (ft != null) { ft.readableToIndexed(joinStr, term); } else { term.copyChars(joinStr); } return term.toBytesRef(); } }; }
Query createCandidateQuery(IndexReader indexReader) throws IOException { List<BytesRef> extractedTerms = new ArrayList<>(); LeafReader reader = indexReader.leaves().get(0).reader(); Fields fields = reader.fields(); for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } BytesRef fieldBr = new BytesRef(field); TermsEnum tenum = terms.iterator(); for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(fieldBr); builder.append(FIELD_VALUE_SEPARATOR); builder.append(term); extractedTerms.add(builder.toBytesRef()); } } Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms); // include extractionResultField:failed, because docs with this term have no extractedTermsField // and otherwise we would fail to return these docs. Docs that failed query term extraction // always need to be verified by MemoryIndex: Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED)); return new BooleanQuery.Builder() .add(extractionSuccess, Occur.SHOULD) .add(extractionFailure, Occur.SHOULD) .build(); }
public void testRandomReads() throws IOException { int length = randomIntBetween(10, scaledRandomIntBetween(PAGE_SIZE * 2, PAGE_SIZE * 20)); BytesReference pbr = newBytesReference(length); StreamInput streamInput = pbr.streamInput(); BytesRefBuilder target = new BytesRefBuilder(); while (target.length() < pbr.length()) { switch (randomIntBetween(0, 10)) { case 6: case 5: target.append(new BytesRef(new byte[]{streamInput.readByte()})); break; case 4: case 3: BytesRef bytesRef = streamInput.readBytesRef(scaledRandomIntBetween(1, pbr.length() - target.length())); target.append(bytesRef); break; default: byte[] buffer = new byte[scaledRandomIntBetween(1, pbr.length() - target.length())]; int offset = scaledRandomIntBetween(0, buffer.length - 1); int read = streamInput.read(buffer, offset, buffer.length - offset); target.append(new BytesRef(buffer, offset, read)); break; } } assertEquals(pbr.length(), target.length()); BytesRef targetBytes = target.get(); assertArrayEquals(BytesReference.toBytes(pbr), Arrays.copyOfRange(targetBytes.bytes, targetBytes.offset, targetBytes.length)); }
public void testIterator() throws IOException { int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8)); BytesReference pbr = newBytesReference(length); BytesRefIterator iterator = pbr.iterator(); BytesRef ref; BytesRefBuilder builder = new BytesRefBuilder(); while((ref = iterator.next()) != null) { builder.append(ref); } assertArrayEquals(BytesReference.toBytes(pbr), BytesRef.deepCopyOf(builder.toBytesRef()).bytes); }
public void testSliceIterator() throws IOException { int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8)); BytesReference pbr = newBytesReference(length); int sliceOffset = randomIntBetween(0, pbr.length()); int sliceLength = randomIntBetween(0, pbr.length() - sliceOffset); BytesReference slice = pbr.slice(sliceOffset, sliceLength); BytesRefIterator iterator = slice.iterator(); BytesRef ref = null; BytesRefBuilder builder = new BytesRefBuilder(); while((ref = iterator.next()) != null) { builder.append(ref); } assertArrayEquals(BytesReference.toBytes(slice), BytesRef.deepCopyOf(builder.toBytesRef()).bytes); }
private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { final List<Token> result = new ArrayList<>(); final String field = suggestion.getField(); DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new DirectCandidateGenerator.TokenConsumer() { @Override public void nextToken() { Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder()))); result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset())); } }, spare); return result; }
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) { result.clear(); for (int i = 0; i < toJoin.length - 1; i++) { result.append(toJoin[i]); result.append(separator); } result.append(toJoin[toJoin.length-1]); return result.get(); }
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException { if (preFilter == null) { return term; } final BytesRefBuilder result = byteSpare; analyze(preFilter, term, field, new TokenConsumer() { @Override public void nextToken() throws IOException { this.fillBytesRef(result); } }, spare); return result.get(); }
/** * Make sure the {@link #values} array can store at least {@link #count} entries. */ protected final void grow() { if (values.length < count) { final int oldLen = values.length; final int newLen = ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF); values = Arrays.copyOf(values, newLen); for (int i = oldLen; i < newLen; ++i) { values[i] = new BytesRefBuilder(); } } }
public static BytesRef[] createUidsForTypesAndIds(Collection<String> types, Collection<?> ids) { BytesRef[] uids = new BytesRef[types.size() * ids.size()]; BytesRefBuilder typeBytes = new BytesRefBuilder(); BytesRefBuilder idBytes = new BytesRefBuilder(); int index = 0; for (String type : types) { typeBytes.copyChars(type); for (Object id : ids) { uids[index++] = Uid.createUidAsBytes(typeBytes.get(), BytesRefs.toBytesRef(id, idBytes)); } } return uids; }
private static void checksumFromLuceneFile(Directory directory, String file, Map<String, StoreFileMetaData> builder, Logger logger, Version version, boolean readFileAsHash) throws IOException { final String checksum; final BytesRefBuilder fileHash = new BytesRefBuilder(); try (IndexInput in = directory.openInput(file, IOContext.READONCE)) { final long length; try { length = in.length(); if (length < CodecUtil.footerLength()) { // truncated files trigger IAE if we seek negative... these files are really corrupted though throw new CorruptIndexException("Can't retrieve checksum from file: " + file + " file length must be >= " + CodecUtil.footerLength() + " but was: " + in.length(), in); } if (readFileAsHash) { final VerifyingIndexInput verifyingIndexInput = new VerifyingIndexInput(in); // additional safety we checksum the entire file we read the hash for... hashFile(fileHash, new InputStreamIndexInput(verifyingIndexInput, length), length); checksum = digestToString(verifyingIndexInput.verify()); } else { checksum = digestToString(CodecUtil.retrieveChecksum(in)); } } catch (Exception ex) { logger.debug((Supplier<?>) () -> new ParameterizedMessage("Can retrieve checksum from file [{}]", file), ex); throw ex; } builder.put(file, new StoreFileMetaData(file, length, checksum, version, fileHash.get())); } }
/** * Computes a strong hash value for small files. Note that this method should only be used for files < 1MB */ public static void hashFile(BytesRefBuilder fileHash, InputStream in, long size) throws IOException { final int len = (int) Math.min(1024 * 1024, size); // for safety we limit this to 1MB fileHash.grow(len); fileHash.setLength(len); final int readBytes = Streams.readFully(in, fileHash.bytes(), 0, len); assert readBytes == len : Integer.toString(readBytes) + " != " + Integer.toString(len); assert fileHash.length() == len : Integer.toString(fileHash.length()) + " != " + Integer.toString(len); }