@Test public void testDelimitedPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
@Test public void testKeepPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); byte[][] payloads = { "VB".getBytes(StandardCharsets.UTF_8), "NN".getBytes(StandardCharsets.UTF_8) }; FilterPayloadsFilter filter = new FilterPayloadsFilter(baseFilter, payloads, true); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
@Test public void testFilterPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); byte[][] payloads = { "VB".getBytes("UTF-8"), "NN".getBytes("UTF-8") }; FilterPayloadsFilter filter = new FilterPayloadsFilter(baseFilter, payloads, false); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertFalse(filter.incrementToken()); }
@Test public void testStripPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); mockTokenizer.setReader(new StringReader(test)); DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer, DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); StripPayloadsTokenFilter filter = new StripPayloadsTokenFilter(baseFilter); filter.reset(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermPayload("The", filter, termAtt, payAtt); assertTermPayload("quick", filter, termAtt, payAtt); assertTermPayload("red", filter, termAtt, payAtt); assertTermPayload("fox", filter, termAtt, payAtt); assertTermPayload("jumped", filter, termAtt, payAtt); assertTermPayload("over", filter, termAtt, payAtt); assertTermPayload("the", filter, termAtt, payAtt); assertTermPayload("lazy", filter, termAtt, payAtt); assertTermPayload("brown", filter, termAtt, payAtt); assertTermPayload("dogs", filter, termAtt, payAtt); assertFalse(filter.incrementToken()); }
public void testEncoder() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
public void testDelim() throws Exception { Map<String,String> args = new HashMap<String, String>(); args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName()); args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*"); DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory(); factory.init(args); ResourceLoader loader = new StringMockResourceLoader("solr/collection1"); factory.inform(loader); TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false); DelimitedPayloadTokenFilter tf = factory.create(input); tf.reset(); while (tf.incrementToken()){ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class); assertTrue("payAttr is null and it shouldn't be", payAttr != null); byte[] payData = payAttr.getPayload().bytes; assertTrue("payData is null and it shouldn't be", payData != null); float payFloat = PayloadHelper.decodeFloat(payData); assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f); } }
@Override public TokenStream create(TokenStream tokenStream) { DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(tokenStream, delimiter, encoder); return filter; }
@Override public DelimitedPayloadTokenFilter create(TokenStream input) { return new DelimitedPayloadTokenFilter(input, delimiter, encoder); }