private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException { if (b == 0) { return null; } else if (b == 1) { return DocValuesType.NUMERIC; } else if (b == 2) { return DocValuesType.BINARY; } else if (b == 3) { return DocValuesType.SORTED; } else if (b == 4) { return DocValuesType.SORTED_SET; } else if (b == 5) { return DocValuesType.SORTED_NUMERIC; } else { throw new CorruptIndexException("invalid docvalues byte: " + b + " (resource=" + input + ")"); } }
private static byte docValuesByte(DocValuesType type) { if (type == null) { return 0; } else if (type == DocValuesType.NUMERIC) { return 1; } else if (type == DocValuesType.BINARY) { return 2; } else if (type == DocValuesType.SORTED) { return 3; } else if (type == DocValuesType.SORTED_SET) { return 4; } else if (type == DocValuesType.SORTED_NUMERIC) { return 5; } else { throw new AssertionError(); } }
private FieldInfo getDVField(String field, DocValuesType type) { FieldInfo fi = fieldInfos.fieldInfo(field); if (fi == null) { // Field does not exist return null; } if (fi.getDocValuesType() == null) { // Field was not indexed with doc values return null; } if (fi.getDocValuesType() != type) { // Field DocValues are different than requested type return null; } return fi; }
private void addField(Document doc, String field, String value, DocValuesType type) { doc.add(new StringField(field, value, Field.Store.YES)); if (type == null) { return; } String dvField = field + "_dv"; Field valuesField = null; switch (type) { case NUMERIC: valuesField = new NumericDocValuesField(dvField, Integer.parseInt(value)); break; case BINARY: valuesField = new BinaryDocValuesField(dvField, new BytesRef(value)); break; case SORTED: valuesField = new SortedDocValuesField(dvField, new BytesRef(value)); break; } doc.add(valuesField); }
@SuppressWarnings({"unchecked","rawtypes"}) private <T> AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector(DocValuesType dvType, Sort groupSort, String groupField, int topNGroups) throws IOException { Random random = random(); if (dvType != null) { if (random.nextBoolean()) { return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<>(), groupSort, topNGroups); } else { return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups); } } else { if (random.nextBoolean()) { return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<>(), groupSort, topNGroups); } else { return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups); } } }
public void testExcIndexingDocBeforeDocValues() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setDocValueType(DocValuesType.SORTED); ft.freeze(); Field field = new Field("test", "value", ft); field.setTokenStream(new TokenStream() { @Override public boolean incrementToken() { throw new RuntimeException("no"); } }); doc.add(field); try { w.addDocument(doc); fail("did not hit exception"); } catch (RuntimeException re) { // expected } w.addDocument(new Document()); w.close(); dir.close(); }
public void testDocValues() throws IOException { assertU(adoc("id", "1", "floatdv", "4.5", "intdv", "-1", "intdv", "3", "stringdv", "value1", "stringdv", "value2")); assertU(commit()); try (SolrCore core = h.getCoreInc()) { final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true); final SolrIndexSearcher searcher = searcherRef.get(); try { final AtomicReader reader = searcher.getAtomicReader(); assertEquals(1, reader.numDocs()); final FieldInfos infos = reader.getFieldInfos(); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("stringdv").getDocValuesType()); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("floatdv").getDocValuesType()); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("intdv").getDocValuesType()); SortedSetDocValues dv = reader.getSortedSetDocValues("stringdv"); dv.setDocument(0); assertEquals(0, dv.nextOrd()); assertEquals(1, dv.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); } finally { searcherRef.decref(); } } }
@SuppressWarnings({"unchecked","rawtypes"}) private <T> AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector(DocValuesType dvType, Sort groupSort, String groupField, int topNGroups) throws IOException { Random random = random(); if (dvType != null) { if (random.nextBoolean()) { return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<Object, Object>(), groupSort, topNGroups); } else { return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups); } } else { if (random.nextBoolean()) { return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<Object, Object>(), groupSort, topNGroups); } else { return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups); } } }
private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, DocValuesType valueType) { doc.add(new TextField(groupField, value, Field.Store.YES)); if (canUseIDV) { Field valuesField = null; switch(valueType) { case BINARY: valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value)); break; case SORTED: valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value)); break; default: fail("unhandled type"); } doc.add(valuesField); } }
void checkCanWrite(FieldInfo field) { if ((field.getDocValuesType() == DocValuesType.NUMERIC || field.getDocValuesType() == DocValuesType.BINARY) && field.getDocValuesGen() != -1) { // ok } else { throw new UnsupportedOperationException("this codec can only be used for reading"); } }
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException { if (b == 0) { return null; } else if (b == 1) { return DocValuesType.NUMERIC; } else if (b == 2) { return DocValuesType.BINARY; } else if (b == 3) { return DocValuesType.SORTED; } else if (b == 4) { return DocValuesType.SORTED_SET; } else { throw new CorruptIndexException("invalid docvalues byte: " + b + " (resource=" + input + ")"); } }
public void finish() throws IOException { if (fieldInfo.omitsNorms() == false) { if (norms == null) { fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC); norms = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed, false); } norms.addValue(docState.docID, similarity.computeNorm(invertState)); } termsHashPerField.finish(); }
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen(); OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues)dv).mapping; if (map.owner == getCoreCacheKey()) { cachedOrdMaps.put(field, map); } } return dv; } } // cached ordinal map if (getFieldInfos().fieldInfo(field).getDocValuesType() != DocValuesType.SORTED) { return null; } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = in.leaves().get(i); SortedDocValues v = context.reader().getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map); }
/** * Updates documents' DocValues fields to the given values. Each field update * is applied to the set of documents that are associated with the * {@link Term} to the same value. All updates are atomically applied and * flushed together. * * @param updates * the updates to apply * @throws CorruptIndexException * if the index is corrupt * @throws IOException * if there is a low-level IO error */ public void updateDocValues(Term term, Field... updates) throws IOException { ensureOpen(); DocValuesUpdate[] dvUpdates = new DocValuesUpdate[updates.length]; for (int i = 0; i < updates.length; i++) { final Field f = updates[i]; final DocValuesType dvType = f.fieldType().docValueType(); if (dvType == null) { throw new IllegalArgumentException("can only update NUMERIC or BINARY fields! field=" + f.name()); } if (!globalFieldNumberMap.contains(f.name(), dvType)) { throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType); } switch (dvType) { case NUMERIC: dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue()); break; case BINARY: dvUpdates[i] = new BinaryDocValuesUpdate(term, f.name(), f.binaryValue()); break; default: throw new IllegalArgumentException("can only update NUMERIC or BINARY fields: field=" + f.name() + ", type=" + dvType); } } try { if (docWriter.updateDocValues(dvUpdates)) { processEvents(true, false); } } catch (OutOfMemoryError oom) { tragicEvent(oom, "updateDocValues"); } }
/** * Returns the global field number for the given field name. If the name * does not exist yet it tries to add it with the given preferred field * number assigned if possible otherwise the first unassigned field number * is used as the field number. */ synchronized int addOrGet(String fieldName, int preferredFieldNumber, DocValuesType dvType) { if (dvType != null) { DocValuesType currentDVType = docValuesType.get(fieldName); if (currentDVType == null) { docValuesType.put(fieldName, dvType); } else if (currentDVType != null && currentDVType != dvType) { throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\""); } } Integer fieldNumber = nameToNumber.get(fieldName); if (fieldNumber == null) { final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber); if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) { // cool - we can use this number globally fieldNumber = preferredBoxed; } else { // find a new FieldNumber while (numberToName.containsKey(++lowestUnassignedFieldNumber)) { // might not be up to date - lets do the work once needed } fieldNumber = lowestUnassignedFieldNumber; } numberToName.put(fieldNumber, fieldName); nameToNumber.put(fieldName, fieldNumber); } return fieldNumber.intValue(); }
synchronized void verifyConsistent(Integer number, String name, DocValuesType dvType) { if (name.equals(numberToName.get(number)) == false) { throw new IllegalArgumentException("field number " + number + " is already mapped to field name \"" + numberToName.get(number) + "\", not \"" + name + "\""); } if (number.equals(nameToNumber.get(name)) == false) { throw new IllegalArgumentException("field name \"" + name + "\" is already mapped to field number \"" + nameToNumber.get(name) + "\", not \"" + number + "\""); } DocValuesType currentDVType = docValuesType.get(name); if (dvType != null && currentDVType != null && dvType != currentDVType) { throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + name + "\""); } }
/** * Returns true if the {@code fieldName} exists in the map and is of the * same {@code dvType}. */ synchronized boolean contains(String fieldName, DocValuesType dvType) { // used by IndexWriter.updateNumericDocValue if (!nameToNumber.containsKey(fieldName)) { return false; } else { // only return true if the field has the same dvType as the requested one return dvType == docValuesType.get(fieldName); } }
private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) { FieldInfo fi = fieldInfo(name); if (fi == null) { // This field wasn't yet added to this in-RAM // segment's FieldInfo, so now we get a global // number for this field. If the field was seen // before then we'll get the same name and number, // else we'll allocate a new one: final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues); fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, -1, null); assert !byName.containsKey(fi.name); globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType()); byName.put(fi.name, fi); } else { fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions); if (docValues != null) { // Only pay the synchronization cost if fi does not already have a DVType boolean updateGlobal = !fi.hasDocValues(); if (updateGlobal) { // Must also update docValuesType map so it's // aware of this field's DocValueType. This will throw IllegalArgumentException if // an illegal type change was attempted. globalFieldNumbers.setDocValuesType(fi.number, name, docValues); } fi.setDocValuesType(docValues); // this will also perform the consistency check. } if (!fi.omitsNorms() && normType != null) { fi.setNormValueType(normType); } } return fi; }
public DocValuesType docValuesType(String dvType) { if ("false".equals(dvType)) { return null; } else { return DocValuesType.valueOf(dvType); } }
/** write the header for this field */ private void writeFieldEntry(FieldInfo field, FieldInfo.DocValuesType type) throws IOException { SimpleTextUtil.write(data, FIELD); SimpleTextUtil.write(data, field.name, scratch); SimpleTextUtil.writeNewline(data); SimpleTextUtil.write(data, TYPE); SimpleTextUtil.write(data, type.toString(), scratch); SimpleTextUtil.writeNewline(data); }
public void test() throws IOException { DocValuesType type = DocValuesType.SORTED; //for (DocValuesType type : DocValuesType.values()) { if (type != DocValuesType.SORTED_SET && type != DocValuesType.SORTED_NUMERIC) { test(type); } //} }
@SuppressWarnings({"unchecked","rawtypes"}) private <T extends Comparable> AbstractDistinctValuesCollector<AbstractDistinctValuesCollector.GroupCount<T>> createDistinctCountCollector(AbstractFirstPassGroupingCollector<T> firstPassGroupingCollector, String groupField, String countField, DocValuesType dvType) { Random random = random(); Collection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.getTopGroups(0, false); if (FunctionFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) { return (AbstractDistinctValuesCollector) new FunctionDistinctValuesCollector(new HashMap<>(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), (Collection) searchGroups); } else { return (AbstractDistinctValuesCollector) new TermDistinctValuesCollector(groupField, countField, (Collection) searchGroups); } }
IndexContext(Directory directory, DirectoryReader indexReader, DocValuesType dvType, Map<String, Map<String, Set<String>>> searchTermToGroupCounts, String[] contentStrings) { this.directory = directory; this.indexReader = indexReader; this.dvType = dvType; this.searchTermToGroupCounts = searchTermToGroupCounts; this.contentStrings = contentStrings; }
/** 4.0-style docvalues byte */ public byte docValuesByte(DocValuesType type, String legacyTypeAtt) { if (type == null) { assert legacyTypeAtt == null; return 0; } else { assert legacyTypeAtt != null; return (byte) LegacyDocValuesType.valueOf(legacyTypeAtt).ordinal(); } }
private static byte docValuesByte(DocValuesType type) { if (type == null) { return 0; } else if (type == DocValuesType.NUMERIC) { return 1; } else if (type == DocValuesType.BINARY) { return 2; } else if (type == DocValuesType.SORTED) { return 3; } else if (type == DocValuesType.SORTED_SET) { return 4; } else { throw new AssertionError(); } }