float payloadBoost() throws IOException { if (doc != docID()) { final int freq = postings.freq(); payloadBoost = 0; for (int i = 0; i < freq; ++i) { postings.nextPosition(); final BytesRef payload = postings.getPayload(); float boost; if (payload == null) { boost = 1; } else if (payload.length == 1) { boost = SmallFloat.byte315ToFloat(payload.bytes[payload.offset]); } else if (payload.length == 4) { // TODO: for bw compat only, remove this in 6.0 boost = PayloadHelper.decodeFloat(payload.bytes, payload.offset); } else { throw new IllegalStateException("Payloads are expected to have a length of 1 or 4 but got: " + payload); } payloadBoost += boost; } payloadBoost /= freq; doc = docID(); } return payloadBoost; }
private int position(int doc, BytesRef term) { int maxPosition = 20; try { Terms terms = context.reader().getTermVector(doc, weight.field); TermsEnum termsEnum = terms.iterator(); if (!termsEnum.seekExact(term)) { Loggers.getLogger(this.getClass()).error("seekExact failed, returning default position = " + maxPosition + " for field = " + weight.field); return maxPosition; } PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL); dpEnum.nextDoc(); dpEnum.nextPosition(); BytesRef payload = dpEnum.getPayload(); if (payload == null) { Loggers.getLogger(this.getClass()).error("getPayload failed, returning default position = " + maxPosition + " for field = " + weight.field); return maxPosition; } return PayloadHelper.decodeInt(payload.bytes, payload.offset); } catch (Exception ex) { Loggers.getLogger(this.getClass()).error("Unexpected exception, returning default position = " + maxPosition + " for field = " + weight.field, ex); return maxPosition; } }
public void testEncoder() throws Exception { Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); byte[] payData = payAttr.getPayload().bytes; assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
public void testDelim() throws Exception { Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); byte[] payData = payAttr.getPayload().bytes; assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
public void testEncoder() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
public void testDelim() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName()); args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
public float payloadAsFloat(float defaultMissing) { if (payload != null && payload.length != 0) { return PayloadHelper.decodeFloat(payload.bytes, payload.offset); } else { return defaultMissing; } }
public int payloadAsInt(int defaultMissing) { if (payload != null && payload.length != 0) { return PayloadHelper.decodeInt(payload.bytes, payload.offset); } else { return defaultMissing; } }
private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding, char delimiter) { String resultString = ""; ObjectIntHashMap<String> payloadCounter = new ObjectIntHashMap<>(); for (String token : tokens) { if (!payloadCounter.containsKey(token)) { payloadCounter.putIfAbsent(token, 0); } else { payloadCounter.put(token, payloadCounter.get(token) + 1); } resultString = resultString + token; BytesRef payload = payloads.get(token).get(payloadCounter.get(token)); if (payload.length > 0) { resultString = resultString + delimiter; switch (encoding) { case 0: { resultString = resultString + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset)); break; } case 1: { resultString = resultString + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset)); break; } case 2: { resultString = resultString + payload.utf8ToString(); break; } default: { throw new ElasticsearchException("unsupported encoding type"); } } } resultString = resultString + " "; } return resultString; }
private Map<String, List<BytesRef>> createPayloads(String[] tokens, int encoding) { Map<String, List<BytesRef>> payloads = new HashMap<>(); for (String token : tokens) { if (payloads.get(token) == null) { payloads.put(token, new ArrayList<BytesRef>()); } boolean createPayload = randomBoolean(); if (createPayload) { switch (encoding) { case 0: { float theFloat = randomFloat(); payloads.get(token).add(new BytesRef(PayloadHelper.encodeFloat(theFloat))); break; } case 1: { payloads.get(token).add(new BytesRef(PayloadHelper.encodeInt(randomInt()))); break; } case 2: { String payload = randomUnicodeOfLengthBetween(50, 100); for (int c = 0; c < payload.length(); c++) { if (Character.isWhitespace(payload.charAt(c))) { payload = payload.replace(payload.charAt(c), 'w'); } } payloads.get(token).add(new BytesRef(payload)); break; } default: { throw new ElasticsearchException("unsupported encoding type"); } } } else { payloads.get(token).add(new BytesRef()); } } return payloads; }
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { CharTermAttribute termAtt = this.getAttribute(CharTermAttribute.class); final String term = termAtt.toString(); termAtt.setEmpty(); PayloadAttribute payloadAtt = this.getAttribute(PayloadAttribute.class); final BytesRef payload = payloadAtt.getPayload(); if(payload == null) { return true; } float payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset); if(payloadValue == 0.0f){ return true; } String weight = Float.toString(payloadValue); // set weights to zero if in scientific notation if(weight.contains("E-")){ return true; } String boostedTerm = term + "^" + weight; termAtt.append(boostedTerm); return true; } return false; }
protected void setAttributes(String token, float payload) { CharTermAttribute termAtt = this.getAttribute(CharTermAttribute.class); termAtt.setEmpty(); termAtt.append(token); termAtt.setLength(token.length()); PayloadAttribute payloadAtt = this.getAttribute(PayloadAttribute.class); byte[] bytes = PayloadHelper.encodeFloat(payload); payloadAtt.setPayload(new BytesRef(bytes)); }
@Override public float scorePayload(int doc, int start, int end, BytesRef payload) { if (payload != null) { float x = PayloadHelper.decodeFloat(payload.bytes, payload.offset); return x; } return 1.0F; }
@Override public float scorePayload(int docID, int start, int end, BytesRef payload) { float pload = 1.0f; if (payload != null) { //pload = PayloadHelper.decodeFloat(payload.bytes); pload = PayloadHelper.decodeFloat(payload.bytes, payload.offset); } System.out.println("===> docid: " + docID + " payload: " + pload); return pload; }
@Override public float scorePayload(int docID, int start, int end, BytesRef payload) { float pload = 1.0f; if (payload != null) { pload = PayloadHelper.decodeFloat(payload.bytes); } logger.info("===> docid: " + docID + " payload: " + pload); return pload; }
public float scorePayload(int docID, String fieldName, int start, int end, byte[] payload, int offset, int length) { if (payload != null) { return PayloadHelper.decodeFloat(payload, offset); } else { return 1.0F; } }
/** * Adds term weights found by tokenizing text from reader into the Map words * * @param reader a source of text to be tokenized * @param termWeightMap a Map of terms and their weights * @param fieldName Used by analyzer for any special per-field analysis */ private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName) throws IOException { if (analyzer == null) { throw new UnsupportedOperationException("To use RelevancyFeedback without " + "term vectors, you must provide an Analyzer"); } TokenStream ts = analyzer.tokenStream(fieldName, reader); try { int tokenCount = 0; // for every token CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class); ts.reset(); while (ts.incrementToken()) { String word = termAtt.toString(); tokenCount++; if (tokenCount > maxNumTokensParsedPerField) { break; } if(word.trim().length() == 0){ continue; } if (isNoiseWord(word)) { continue; } BytesRef payload = payloadAttr.getPayload(); float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field if(isPayloadField(fieldName) && payload != null){ tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset); } // increment frequency Flt termWeight = termWeightMap.get(word); if (termWeight == null) { termWeightMap.put(word, new Flt(tokenWeight)); } else { termWeight.x += tokenWeight; } } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } }
/** * Adds term weights found by tokenizing text from reader into the Map words * * @param reader a source of text to be tokenized * @param termWeightMap a Map of terms and their weights * @param fieldName Used by analyzer for any special per-field analysis */ private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName) throws IOException { if (analyzer == null) { throw new UnsupportedOperationException("To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); } TokenStream ts = analyzer.tokenStream(fieldName, reader); try { int tokenCount = 0; // for every token CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class); ts.reset(); while (ts.incrementToken()) { String word = termAtt.toString(); tokenCount++; if (tokenCount > maxNumTokensParsedPerField) { break; } if(word.trim().length() == 0){ continue; } if (isNoiseWord(word)) { continue; } BytesRef payload = payloadAttr.getPayload(); float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field if(isPayloadField(fieldName) && payload != null){ tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset); } // increment frequency Flt termWeight = termWeightMap.get(word); if (termWeight == null) { termWeightMap.put(word, new Flt(tokenWeight)); } else { termWeight.x += tokenWeight; } } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } }
BulletinPayloadsFilter(TokenStream in, float warningBoost) { super(in); payloadAttr = addAttribute(PayloadAttribute.class); termAtt = addAttribute(TermAttribute.class); boostPayload = new Payload(PayloadHelper.encodeFloat(warningBoost)); }
/** * Replaces the current term (attributes) with term (attributes) from the * stack * * @throws IOException */ protected void processTermOnStack() throws IOException { ExpandedTerm expandedTerm = termStack.pop(); String term = expandedTerm.getTerm(); SKOSType termType = expandedTerm.getTermType(); String sTerm = ""; try { sTerm = analyze(analyzer, term, new CharsRef()).toString(); } catch (IllegalArgumentException e) { // skip this term return; } /* * copies the values of all attribute implementations from this state into * the implementations of the target stream */ restoreState(current); /* * Adds the expanded term to the term buffer */ termAtt.setEmpty().append(sTerm); /* * set position increment to zero to put multiple terms into the same * position */ posIncrAtt.setPositionIncrement(0); /* * sets the type of the expanded term (pref, alt, broader, narrower, etc.) */ skosAtt.setSkosType(termType); /* * converts the SKOS Attribute to a payload, which is propagated to the * index */ byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal()); payloadAtt.setPayload(new BytesRef(bytes)); }