/** * Constructor. * <p> * Builds a hash function that must obey to a given maximum number of returned values and a highest value. * @param maxValue The maximum highest returned value. * @param nbHash The number of resulting hashed values. * @param hashType type of the hashing function (see {@link Hash}). */ public HashFunction(int maxValue, int nbHash, int hashType) { if (maxValue <= 0) { throw new IllegalArgumentException("maxValue must be > 0"); } if (nbHash <= 0) { throw new IllegalArgumentException("nbHash must be > 0"); } this.maxValue = maxValue; this.nbHash = nbHash; this.hashFunction = Hash.getInstance(hashType); if (this.hashFunction == null) throw new IllegalArgumentException("hashType must be known"); }
@Test public void testDynamicBloomFilter() { int hashId = Hash.JENKINS_HASH; Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber, Hash.JENKINS_HASH, 3); BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(filter) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY)) .test(); assertNotNull("testDynamicBloomFilter error ", filter.toString()); }
@Test public void testFiltersWithJenkinsHash() { int hashId = Hash.JENKINS_HASH; BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId)) .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.FILTER_OR_STRATEGY, BloomFilterTestStrategy.FILTER_AND_STRATEGY, BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); }
@Test public void testFiltersWithMurmurHash() { int hashId = Hash.MURMUR_HASH; BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId)) .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.FILTER_OR_STRATEGY, BloomFilterTestStrategy.FILTER_AND_STRATEGY, BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); }
private BloomFilterCommonTester(int hashId, int numInsertions) { this.hashType = hashId; this.numInsertions = numInsertions; this.preAssertionHelper = new PreAssertionHelper() { @Override public ImmutableSet<Integer> falsePositives(int hashId) { switch (hashId) { case Hash.JENKINS_HASH: { // // false pos for odd and event under 1000 return ImmutableSet.of(99, 963); } case Hash.MURMUR_HASH: { // false pos for odd and event under 1000 return ImmutableSet.of(769, 772, 810, 874); } default: { // fail fast with unknown hash error !!! Assert.assertFalse("unknown hash error", true); return ImmutableSet.of(); } } } }; }
@Test public void shouldWriteAndReadFilter() throws IOException { // Given final BloomFilter filter = new BloomFilter(100, 5, Hash.MURMUR_HASH); filter.add(new Key("ABC".getBytes())); filter.add(new Key("DEF".getBytes())); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final DataOutputStream out = new DataOutputStream(baos); filter.write(out); String x = new String(baos.toByteArray(), AccumuloStoreConstants.BLOOM_FILTER_CHARSET); final ByteArrayInputStream bais = new ByteArrayInputStream(x.getBytes(AccumuloStoreConstants.BLOOM_FILTER_CHARSET)); // When final DataInputStream in = new DataInputStream(bais); final BloomFilter read = new BloomFilter(); read.readFields(in); // Then assertTrue(read.membershipTest(new Key("ABC".getBytes()))); assertTrue(read.membershipTest(new Key("DEF".getBytes()))); assertFalse(read.membershipTest(new Key("lkjhgfdsa".getBytes()))); }
@Override public void run() { while(!isDone || !bloomfilters.isEmpty()) { KR2RMLBloomFilter bf = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); String tmp = getBloomfilter(); try { if (tmp != null) { bf.populateFromCompressedAndBase64EncodedString(tmp); } } catch (IOException e) { } this.bf.or(bf); } isFinished = true; }