Java 类org.apache.lucene.analysis.payloads.PayloadHelper 实例源码

项目：elasticsearch_my 文件：AllTermQuery.java

float payloadBoost() throws IOException {
    if (doc != docID()) {
        final int freq = postings.freq();
        payloadBoost = 0;
        for (int i = 0; i < freq; ++i) {
            postings.nextPosition();
            final BytesRef payload = postings.getPayload();
            float boost;
            if (payload == null) {
                boost = 1;
            } else if (payload.length == 1) {
                boost = SmallFloat.byte315ToFloat(payload.bytes[payload.offset]);
            } else if (payload.length == 4) {
                // TODO: for bw compat only, remove this in 6.0
                boost = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
            } else {
                throw new IllegalStateException("Payloads are expected to have a length of 1 or 4 but got: "
                    + payload);
            }
            payloadBoost += boost;
        }
        payloadBoost /= freq;
        doc = docID();
    }
    return payloadBoost;
}

项目：elasticsearch-position-similarity 文件：PositionSimilarity.java

private int position(int doc, BytesRef term) {
    int maxPosition = 20;
    try {
        Terms terms = context.reader().getTermVector(doc, weight.field);
        TermsEnum termsEnum = terms.iterator();
        if (!termsEnum.seekExact(term)) {
            Loggers.getLogger(this.getClass()).error("seekExact failed, returning default position = " +
                    maxPosition + " for field = " + weight.field);
            return maxPosition;
        }
        PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
        dpEnum.nextDoc();
        dpEnum.nextPosition();
        BytesRef payload = dpEnum.getPayload();
        if (payload == null) {
            Loggers.getLogger(this.getClass()).error("getPayload failed, returning default position = " +
                    maxPosition + " for field = " + weight.field);
            return maxPosition;
        }
        return PayloadHelper.decodeInt(payload.bytes, payload.offset);
    } catch (Exception ex) {
        Loggers.getLogger(this.getClass()).error("Unexpected exception, returning default position = " +
                maxPosition + " for field = " + weight.field, ex);
        return maxPosition;
    }
}

项目：search 文件：TestDelimitedPayloadTokenFilterFactory.java

public void testEncoder() throws Exception {
  Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}

项目：search 文件：TestDelimitedPayloadTokenFilterFactory.java

public void testDelim() throws Exception {
  Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("DelimitedPayload",
      "encoder", "float",
      "delimiter", "*").create(stream);
  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}

项目：NYBC 文件：TestDelimitedPayloadTokenFilterFactory.java

public void testEncoder() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}

项目：NYBC 文件：TestDelimitedPayloadTokenFilterFactory.java

public void testDelim() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName());
  args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}

项目：Maskana-Gestor-de-Conocimiento 文件：TestDelimitedPayloadTokenFilterFactory.java

public void testEncoder() throws Exception {
  Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}

项目：Maskana-Gestor-de-Conocimiento 文件：TestDelimitedPayloadTokenFilterFactory.java

public void testDelim() throws Exception {
  Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("DelimitedPayload",
      "encoder", "float",
      "delimiter", "*").create(stream);
  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}

项目：elasticsearch_my 文件：TermPosition.java

public float payloadAsFloat(float defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}

项目：elasticsearch_my 文件：TermPosition.java

public int payloadAsInt(int defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeInt(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}

项目：elasticsearch_my 文件：GetTermVectorsIT.java

private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding, char delimiter) {
    String resultString = "";
    ObjectIntHashMap<String> payloadCounter = new ObjectIntHashMap<>();
    for (String token : tokens) {
        if (!payloadCounter.containsKey(token)) {
            payloadCounter.putIfAbsent(token, 0);
        } else {
            payloadCounter.put(token, payloadCounter.get(token) + 1);
        }
        resultString = resultString + token;
        BytesRef payload = payloads.get(token).get(payloadCounter.get(token));
        if (payload.length > 0) {
            resultString = resultString + delimiter;
            switch (encoding) {
            case 0: {
                resultString = resultString + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset));
                break;
            }
            case 1: {
                resultString = resultString + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset));
                break;
            }
            case 2: {
                resultString = resultString + payload.utf8ToString();
                break;
            }
            default: {
                throw new ElasticsearchException("unsupported encoding type");
            }
            }
        }
        resultString = resultString + " ";
    }
    return resultString;
}

项目：elasticsearch_my 文件：GetTermVectorsIT.java

private Map<String, List<BytesRef>> createPayloads(String[] tokens, int encoding) {
    Map<String, List<BytesRef>> payloads = new HashMap<>();
    for (String token : tokens) {
        if (payloads.get(token) == null) {
            payloads.put(token, new ArrayList<BytesRef>());
        }
        boolean createPayload = randomBoolean();
        if (createPayload) {
            switch (encoding) {
            case 0: {
                float theFloat = randomFloat();
                payloads.get(token).add(new BytesRef(PayloadHelper.encodeFloat(theFloat)));
                break;
            }
            case 1: {
                payloads.get(token).add(new BytesRef(PayloadHelper.encodeInt(randomInt())));
                break;
            }
            case 2: {
                String payload = randomUnicodeOfLengthBetween(50, 100);
                for (int c = 0; c < payload.length(); c++) {
                    if (Character.isWhitespace(payload.charAt(c))) {
                        payload = payload.replace(payload.charAt(c), 'w');
                    }
                }
                payloads.get(token).add(new BytesRef(payload));
                break;
            }
            default: {
                throw new ElasticsearchException("unsupported encoding type");
            }
            }
        } else {
            payloads.get(token).add(new BytesRef());
        }
    }
    return payloads;
}

项目：RelevancyFeedback 文件：PayloadQueryBoostTokenFilter.java

@Override
public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
        CharTermAttribute termAtt = this.getAttribute(CharTermAttribute.class);
        final String term = termAtt.toString();
        termAtt.setEmpty();

        PayloadAttribute payloadAtt = this.getAttribute(PayloadAttribute.class);
        final BytesRef payload = payloadAtt.getPayload();
        if(payload == null) {
            return true;
        }

        float payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
        if(payloadValue == 0.0f){
            return true;
        }

        String weight = Float.toString(payloadValue);
        // set weights to zero if in scientific notation
        if(weight.contains("E-")){
            return true;
        }

        String boostedTerm = term + "^" + weight;
        termAtt.append(boostedTerm);
        return true;
    }
    return false;
}

项目：RelevancyFeedback 文件：MeanPayloadTokenFilter.java

protected void setAttributes(String token, float payload) {
    CharTermAttribute termAtt = this.getAttribute(CharTermAttribute.class);
    termAtt.setEmpty();
    termAtt.append(token);
    termAtt.setLength(token.length());

    PayloadAttribute payloadAtt = this.getAttribute(PayloadAttribute.class);
    byte[] bytes = PayloadHelper.encodeFloat(payload);
    payloadAtt.setPayload(new BytesRef(bytes));
}

项目：Elasticsearch 文件：TermPosition.java

public float payloadAsFloat(float defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}

项目：Elasticsearch 文件：TermPosition.java

public int payloadAsInt(int defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeInt(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}

项目：SolrPlugins 文件：PayloadOnlySimilarity.java

@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
    if (payload != null) {
        float x = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
        return x;
    }
    return 1.0F;
}

项目：SolrPlugins 文件：PayloadIdfSimilarity.java

@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
    if (payload != null) {
        float x = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
        return x;
    }
    return 1.0F;
}

项目：SolrPlugins 文件：PayloadAwareDefaultSimilarityNoIdf.java

@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
    if (payload != null) {
        float x = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
        return x;
    }
    return 1.0F;
}

项目：SolrPlugins 文件：PayloadAwareDefaultSimilarity.java

@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
    if (payload != null) {
        float x = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
        return x;
    }
    return 1.0F;
}

项目：SolrPlugins 文件：PayloadQueryBoostTokenFilter.java

@Override
public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
        CharTermAttribute termAtt = this.getAttribute(CharTermAttribute.class);
        final String term = termAtt.toString();
        termAtt.setEmpty();

        PayloadAttribute payloadAtt = this.getAttribute(PayloadAttribute.class);
        final BytesRef payload = payloadAtt.getPayload();
        if(payload == null) {
            return true;
        }

        float payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
        if(payloadValue == 0.0f){
            return true;
        }

        String weight = Float.toString(payloadValue);
        // set weights to zero if in scientific notation
        if(weight.contains("E-")){
            return true;
        }

        String boostedTerm = term + "^" + weight;
        termAtt.append(boostedTerm);
        return true;
    }
    return false;
}

项目：SolrPlugins 文件：MeanPayloadTokenFilter.java

protected void setAttributes(String token, float payload) {
    CharTermAttribute termAtt = this.getAttribute(CharTermAttribute.class);
    termAtt.setEmpty();
    termAtt.append(token);
    termAtt.setLength(token.length());

    PayloadAttribute payloadAtt = this.getAttribute(PayloadAttribute.class);
    byte[] bytes = PayloadHelper.encodeFloat(payload);
    payloadAtt.setPayload(new BytesRef(bytes));
}

项目：mucke 文件：PayloadShowcase.java

@Override
public float scorePayload(int docID, int start, int end, BytesRef payload) {
    float pload = 1.0f;
    if (payload != null) {
        //pload = PayloadHelper.decodeFloat(payload.bytes);
        pload = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    }
    System.out.println("===>  docid: " + docID + " payload: " + pload);
    return pload;
}

项目：mucke 文件：BilkentDemoPayloadSimilarity.java

@Override
public float scorePayload(int docID, int start, int end, BytesRef payload) {
    float pload = 1.0f;
    if (payload != null) {
        //pload = PayloadHelper.decodeFloat(payload.bytes);
        pload = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    }
    System.out.println("===>  docid: " + docID + " payload: " + pload);
    return pload;
}

项目：mucke 文件：PayloadSimilarity.java

@Override
public float scorePayload(int docID, int start, int end, BytesRef payload) {

    float pload = 1.0f;
    if (payload != null) {
        pload = PayloadHelper.decodeFloat(payload.bytes);
    }

    logger.info("===>  docid: " + docID + " payload: " + pload);
    return pload;
}

项目：t4f-data 文件：BoostingSimilarity.java

public float scorePayload(int docID, String fieldName, int start, int end, byte[] payload, int offset, int length) {
    if (payload != null) {
        return PayloadHelper.decodeFloat(payload, offset);
    }
    else {
        return 1.0F;
    }
}

项目：RelevancyFeedback 文件：RelevancyFeedback.java

/**
 * Adds term weights found by tokenizing text from reader into the Map words
 *
 * @param reader a source of text to be tokenized
 * @param termWeightMap a Map of terms and their weights
 * @param fieldName Used by analyzer for any special per-field analysis
 */
private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName)
        throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException("To use RelevancyFeedback without " +
                "term vectors, you must provide an Analyzer");
    }

    TokenStream ts = analyzer.tokenStream(fieldName, reader);
    try {
        int tokenCount = 0;
        // for every token
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            String word = termAtt.toString();
            tokenCount++;
            if (tokenCount > maxNumTokensParsedPerField) {
                break;
            }
            if(word.trim().length() == 0){
                continue;
            }
            if (isNoiseWord(word)) {
                continue;
            }

            BytesRef payload = payloadAttr.getPayload();
            float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field
            if(isPayloadField(fieldName) && payload != null){
                tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
            }
            // increment frequency
            Flt termWeight = termWeightMap.get(word);
            if (termWeight == null) {
                termWeightMap.put(word, new Flt(tokenWeight));
            } else {
                termWeight.x += tokenWeight;
            }
        }
        ts.end();
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

项目：SolrPlugins 文件：MoreLikeThis.java

/**
 * Adds term weights found by tokenizing text from reader into the Map words
 *
 * @param reader a source of text to be tokenized
 * @param termWeightMap a Map of terms and their weights
 * @param fieldName Used by analyzer for any special per-field analysis
 */
private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName)
        throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException("To use MoreLikeThis without " +
                "term vectors, you must provide an Analyzer");
    }

    TokenStream ts = analyzer.tokenStream(fieldName, reader);
    try {
        int tokenCount = 0;
        // for every token
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            String word = termAtt.toString();
            tokenCount++;
            if (tokenCount > maxNumTokensParsedPerField) {
                break;
            }
            if(word.trim().length() == 0){
                continue;
            }
            if (isNoiseWord(word)) {
                continue;
            }

            BytesRef payload = payloadAttr.getPayload();
            float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field
            if(isPayloadField(fieldName) && payload != null){
                tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
            }
            // increment frequency
            Flt termWeight = termWeightMap.get(word);
            if (termWeight == null) {
                termWeightMap.put(word, new Flt(tokenWeight));
            } else {
                termWeight.x += tokenWeight;
            }
        }
        ts.end();
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

项目：t4f-data 文件：BulletinPayloadsFilter.java

BulletinPayloadsFilter(TokenStream in, float warningBoost) {
  super(in);
  payloadAttr = addAttribute(PayloadAttribute.class);
  termAtt = addAttribute(TermAttribute.class);
  boostPayload = new Payload(PayloadHelper.encodeFloat(warningBoost));
}

项目：lucene-skos-ehri 文件：AbstractSKOSFilter.java

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
  ExpandedTerm expandedTerm = termStack.pop();

  String term = expandedTerm.getTerm();

  SKOSType termType = expandedTerm.getTermType();

  String sTerm = "";

  try {
    sTerm = analyze(analyzer, term, new CharsRef()).toString();
  } catch (IllegalArgumentException e) {
    // skip this term
    return;
  }

  /*
   * copies the values of all attribute implementations from this state into
   * the implementations of the target stream
   */
  restoreState(current);

  /*
   * Adds the expanded term to the term buffer
   */
  termAtt.setEmpty().append(sTerm);

  /*
   * set position increment to zero to put multiple terms into the same
   * position
   */
  posIncrAtt.setPositionIncrement(0);

  /*
   * sets the type of the expanded term (pref, alt, broader, narrower, etc.)
   */
  skosAtt.setSkosType(termType);

  /*
   * converts the SKOS Attribute to a payload, which is propagated to the
   * index
   */
  byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
  payloadAtt.setPayload(new BytesRef(bytes));
}