public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) { super(suffix); this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); flagsAtt = addAttribute(FlagsAttribute.class); p_termAtt = prefix.addAttribute(CharTermAttribute.class); p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class); p_payloadAtt = prefix.addAttribute(PayloadAttribute.class); p_offsetAtt = prefix.addAttribute(OffsetAttribute.class); p_typeAtt = prefix.addAttribute(TypeAttribute.class); p_flagsAtt = prefix.addAttribute(FlagsAttribute.class); }
@Override public void copyTo(AttributeImpl target) { if (target instanceof Token) { final Token to = (Token) target; to.reinit(this); // reinit shares the payload, so clone it: if (payload !=null) { to.payload = payload.clone(); } } else { super.copyTo(target); ((OffsetAttribute) target).setOffset(startOffset, endOffset); ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement); ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone()); ((FlagsAttribute) target).setFlags(flags); ((TypeAttribute) target).setType(type); } }
/** verify that payload gets picked up for 1st group of tokens */ public void testTypeForPayload1() throws IOException { TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(new TokenArrayTokenizer(tokensWithPayloads), new String[] {"normalized", "filing", "prefix"}, "joined", "normalized", "!", false, false); CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class); ttjf.reset(); assertTrue(ttjf.incrementToken()); assertEquals("unconsoled!Unconsoled!The ", termAtt.toString()); assertEquals("joined", typeAtt.type()); assertEquals("payload1", payloadAtt.getPayload().utf8ToString()); assertTrue(ttjf.incrementToken()); assertEquals("room with a view!Room With A View!A ", termAtt.toString()); assertEquals("joined", typeAtt.type()); assertNull(payloadAtt.getPayload()); assertFalse(ttjf.incrementToken()); }
/** verify that payload gets picked up for 2nd group of tokens */ public void testTypeForPayload2() throws IOException { TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(new TokenArrayTokenizer(tokensWithPayloads), new String[] {"normalized", "filing", "prefix"}, "joined", "filing", "!", false, false); CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class); ttjf.reset(); assertTrue(ttjf.incrementToken()); assertEquals("unconsoled!Unconsoled!The ", termAtt.toString()); assertEquals("joined", typeAtt.type()); assertNull(payloadAtt.getPayload()); assertTrue(ttjf.incrementToken()); assertEquals("room with a view!Room With A View!A ", termAtt.toString()); assertEquals("joined", typeAtt.type()); assertEquals("payload2", payloadAtt.getPayload().utf8ToString()); assertFalse(ttjf.incrementToken()); }
@Test public void testShorthand2() throws IOException { JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer(); tokenizer.setReader(new StringReader("{\"filing\": \"something\", \"prefix\": \"The \"}")); tokenizer.reset(); assertTrue(tokenizer.incrementToken()); assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString()); assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type()); assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload()); assertTrue(tokenizer.incrementToken()); assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString()); assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type()); assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload()); assertFalse(tokenizer.incrementToken()); }
@Test public void testShorthand3() throws IOException { JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer(); tokenizer.setReader(new StringReader("{\"prefix\": \"The \", \"filing\": \"something\"}")); tokenizer.reset(); assertTrue(tokenizer.incrementToken()); assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString()); assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type()); assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload()); assertTrue(tokenizer.incrementToken()); assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString()); assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type()); assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload()); assertFalse(tokenizer.incrementToken()); }
@Test public void testDelimitedPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
@Test public void testKeepPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); byte[][] payloads = { "VB".getBytes(StandardCharsets.UTF_8), "NN".getBytes(StandardCharsets.UTF_8) }; FilterPayloadsFilter filter = new FilterPayloadsFilter(baseFilter, payloads, true); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
@Test public void testFilterPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); byte[][] payloads = { "VB".getBytes("UTF-8"), "NN".getBytes("UTF-8") }; FilterPayloadsFilter filter = new FilterPayloadsFilter(baseFilter, payloads, false); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
@Test public void testStripPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); StripPayloadsTokenFilter filter = new StripPayloadsTokenFilter(baseFilter); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermPayload("The", filter, termAtt, payAtt); assertTermPayload("quick", filter, termAtt, payAtt); assertTermPayload("red", filter, termAtt, payAtt); assertTermPayload("fox", filter, termAtt, payAtt); assertTermPayload("jumped", filter, termAtt, payAtt); assertTermPayload("over", filter, termAtt, payAtt); assertTermPayload("the", filter, termAtt, payAtt); assertTermPayload("lazy", filter, termAtt, payAtt); assertTermPayload("brown", filter, termAtt, payAtt); assertTermPayload("dogs", filter, termAtt, payAtt); assertFalse(filter.incrementToken()); }
void walkTerms(TokenStream ts, String op, String[] terms, String[] tags) throws IOException { int i = 0; while (ts.incrementToken()) { CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); String word = termAtt.toString(); if (terms != null) { assertEquals(terms[i], word); } if (tags != null) { if (tags[i] != null) { PayloadAttribute p = ts.getAttribute(PayloadAttribute.class); BytesRef payload = p.getPayload(); //Arrays.copyOfRange(payload.bytes, payload.offset, payload.offset + payload.length); byte[] data = payload.bytes; assertEquals(tags[i], (data != null) ? new String(data, "UTF-8") : null); } } i++; } if (terms != null) { assertEquals(terms.length, i); } }
public void testEncoder() throws Exception { Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); byte[] payData = payAttr.getPayload().bytes; assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
public void testDelim() throws Exception { Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); byte[] payData = payAttr.getPayload().bytes; assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
public void testPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8)); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(StandardCharsets.UTF_8)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
public void testFloatEncoding() throws Exception { String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f)); assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
public void testIntEncoding() throws Exception { String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3)); assertTermEquals("jumped", filter, termAtt, payAtt, null); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class); assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.toString()); BytesRef payload = payloadAtt.getPayload(); if (payload != null) { assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length); for (int i = 0; i < expectPay.length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
public void testFilterTokens() throws Exception { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class); FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class); filter.incrementToken(); assertEquals("accent", termAtt.toString()); assertEquals(2, offsetAtt.startOffset()); assertEquals(7, offsetAtt.endOffset()); assertEquals("wrd", typeAtt.type()); assertEquals(3, posIncAtt.getPositionIncrement()); assertEquals(77, flagsAtt.getFlags()); assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload()); }
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset()); token.setFlags(flagsAttValue); //overwriting any flags already set... token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } stream.end(); stream.close(); }
private static void assertTokenInfos(TokenStream ts, TokenInfo... infos) throws IOException { ts.reset(); final CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); final PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); final ByteArrayDataInput in = new ByteArrayDataInput(); int pos = -1; for (final TokenInfo info : infos) { assertThat(ts.incrementToken()).isTrue(); pos += posIncrAtt.getPositionIncrement(); int len = -1; final BytesRef payload = payloadAtt.getPayload(); if (info.len != -1) { assertThat(payload).isNotNull(); in.reset(payload.bytes); len = in.readVInt(); } else { assertThat(payload).isNull(); } assertThat(new TokenInfo(term.toString(), pos, len)).isEqualTo(info); } assertThat(ts.incrementToken()).isFalse(); }
public void testEncoder() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
public void testDelim() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName()); args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
public void testPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
public void testFloatEncoding() throws Exception { String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f)); assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f)); assertFalse(filter.incrementToken()); }
public void testIntEncoding() throws Exception { String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3)); assertTermEquals("jumped", filter, termAtt, payAtt, null); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83)); assertFalse(filter.incrementToken()); }
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class); stream.reset(); assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.toString()); BytesRef payload = payloadAtt.getPayload(); if (payload != null) { assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length); for (int i = 0; i < expectPay.length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
protected void analyze(Collection<Token> result, Reader text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset()); token.setFlags(flagsAttValue); //overwriting any flags already set... token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } stream.end(); stream.close(); }
@Test public void testColumnVisibilityPayload() throws Exception { String visibility = "U"; String value = "value"; RestrictedField restrictedField = new RestrictedField(new StringField( "field", value, Field.Store.NO), new FieldVisibility(visibility)); try(TokenStream tokenStream = restrictedField.tokenStream( new WhitespaceAnalyzer(), null)) { CharTermAttribute charTermAttribute = tokenStream .getAttribute(CharTermAttribute.class); PayloadAttribute payloadAttribute = tokenStream .getAttribute(PayloadAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { assertEquals(value, new String(charTermAttribute.buffer(), 0, charTermAttribute.length())); assertEquals(visibility, new String(payloadAttribute.getPayload().bytes)); } } }