/** * initBloomfilters * * @param falsePositiveProbability * @param expectedNumberOfElements */ private void initBloomfilters(double falsePositiveProbability, long expectedNumberOfElements) { double singleBfMaxAddElements = BloomFilterUtils.caculateNumberAddElements(Integer.MAX_VALUE, falsePositiveProbability); double singleBfElements = expectedNumberOfElements; int bflen = 1; //假如比整形最大值,还需要大,分桶设计 if (expectedNumberOfElements > Integer.MAX_VALUE) { bflen = (int) Math.ceil(expectedNumberOfElements % singleBfMaxAddElements); singleBfElements = Math.round(expectedNumberOfElements / bflen) + 1; } this.bloomfilters = new ArrayList<>(bflen); checkArgument(bflen < 1, "the length of bloomfilters cannot be smaller than one."); for (int i = 0; i < bflen; i++) { BloomFilter<CharSequence> bf = BloomFilter.create(Funnels.stringFunnel(this.charset), (long) singleBfElements, falsePositiveProbability); this.bloomfilters.add(bf); } }
@Test public void knownZeroTags32() { // manual instantiation to force salts to be static SerializableSaltedHasher<Integer> hasher = new SerializableSaltedHasher<>(0, 0, Funnels.integerFunnel(), Algorithm.Murmur3_32); IndexTagCalc<Integer> indexer = new IndexTagCalc<>(hasher, 128, 4); // find some 0 value tags int zeroTags = 0; int i = 0; ArrayList<Integer> zeroTagInputs = new ArrayList<>(); while (zeroTags < 20) { if (indexer.getTagValue32(hasher.hashObj(i).asInt()) == 0) { zeroTagInputs.add(i); zeroTags++; } i++; } for (Integer tag : zeroTagInputs) { // none of the zero value tags should be zero from indexer if // rehashing is working properly assertFalse(indexer.generate(tag).tag == 0); } }
@Test public void sanityTagIndexBitsUsed64() { // manual instantiation to force salts to be static SerializableSaltedHasher<Integer> hasher = new SerializableSaltedHasher<>(0, 0, Funnels.integerFunnel(), Algorithm.sipHash24); IndexTagCalc<Integer> indexer = new IndexTagCalc<>(hasher, (long) Math.pow(2, 31), 32); long setBitsIndex = 0; long setBitsTag = 0; // should be enough to set all bits being used... for (int i = 0; i < 1234567; i++) { BucketAndTag bt = indexer.generate(i); setBitsIndex |= bt.index; setBitsTag |= bt.tag; } // will be true if we're using the right number of bits for tag and // index for this calculator assertTrue(Long.bitCount(setBitsIndex) == 31); assertTrue(Long.bitCount(setBitsTag) == 32); // check where the set bits are long bitMask32 = -1L >>> 32;// (mask for lower 32 bits set) long bitMask31 = bitMask32 >>> 1;// (mask for lower 32 bits set) assertTrue(bitMask32 == setBitsTag); assertTrue(bitMask31 == setBitsIndex); }
@Test public void sanityTagIndexBitsUsed128() { // manual instantiation to force salts to be static SerializableSaltedHasher<Integer> hasher = new SerializableSaltedHasher<>(0, 0, Funnels.integerFunnel(), Algorithm.sha256); IndexTagCalc<Integer> indexer = new IndexTagCalc<>(hasher, (long) Math.pow(2, 62), 64); long setBitsIndex = 0; long setBitsTag = 0; // should be enough to set all bits being used... for (int i = 0; i < 1234567; i++) { BucketAndTag bt = indexer.generate(i); setBitsIndex |= bt.index; setBitsTag |= bt.tag; } // will be true if we're using the right number of bits for tag and // index for this calculator assertTrue(Long.bitCount(setBitsIndex) == 64); assertTrue(Long.bitCount(setBitsTag) == 64); // check where the set bits are long bitMask = -1L;// (mask for all 64 bits set) assertTrue(bitMask == setBitsIndex); assertTrue(bitMask == setBitsTag); }
@Test public void sanityTagIndexNotSame() { // manual instantiation to force salts to be static SerializableSaltedHasher<Integer> hasher = new SerializableSaltedHasher<>(0, 0, Funnels.integerFunnel(), Algorithm.sha256); IndexTagCalc<Integer> indexer = new IndexTagCalc<>(hasher, (long) Math.pow(2, 62), 64); // should be enough to set all bits being used... for (int i = 0; i < 1234567; i += 4) { BucketAndTag bt = indexer.generate(i); BucketAndTag bt2 = indexer.generate(i + 1); // we use two equalities to make collisions super-rare since we // otherwise only have 32 bits of hash to compare // we're checking for 2 collisions in 2 pairs of 32 bit hash. Should // be as hard as getting a single 64 bit collision aka... never // happen assertTrue(bt.index != bt.tag || bt2.index != bt2.tag); } }
@Test public void sanityFalseNegative() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); // add them to filter for (int i = 0; i < 100000; i++) { // will return false if filter is full...should NOT be assertTrue(filter.put(i)); } // check for false negatives int falseNegatives = 0; for (int i = 0; i < 100000; i++) { if (!filter.mightContain(i)) { falseNegatives++; } } assertTrue(falseNegatives + " false negatives detected", falseNegatives == 0); }
@Test public void sanityFailedDelete() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); // make a list of test values(all unique) int maxInsertedVal = 100000; for (int i = 0; i < maxInsertedVal; i++) { // will return false if filter is full...should NOT be assertTrue(filter.put(i)); } // check for false deletes(if I can't delete something that's definitely // there) int falseDeletes = 0; for (int i = 0; i < maxInsertedVal; i++) { if (!filter.delete(i)) { falseDeletes++; } } assertTrue(falseDeletes + " false deletions detected", falseDeletes == 0); }
@Test public void sanityFalseDeleteRate() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); int maxInsertedVal = 100000; for (int i = 0; i < maxInsertedVal; i++) { // will return false if filter is full...should NOT be assertTrue(filter.put(i)); } // check for false delete rate(deleted something I didn't add // successfully) int falseDeletes = 0; // false delete rate should roughly match false positive rate int totalAttempts = 10000; maxInsertedVal += 1; for (int i = maxInsertedVal; i < totalAttempts + maxInsertedVal; i++) { if (filter.delete(i)) falseDeletes++; } assertTrue( falseDeletes + " false deletions detected. False delete rate is above 0.02 on filter with 0.01 false positive rate", (double) falseDeletes / totalAttempts < 0.02); }
@Test public void sanityFalsePositiveRate() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); int maxInsertedVal = 100000; // make a list of test values(all unique) for (int i = 0; i < maxInsertedVal; i++) { // will return false if filter is full...should NOT be assertTrue(filter.put(i)); } // check for false positive rate(contains something I didn't add) int falsePositives = 0; maxInsertedVal += 1; int totalAttempts = 100000; for (int i = maxInsertedVal; i < totalAttempts + maxInsertedVal; i++) { if (filter.mightContain(i)) falsePositives++; } assertTrue((double) falsePositives / totalAttempts + " false positive rate is above limit", (double) falsePositives / totalAttempts < 0.02); }
@Test public void sanityTestVictimCache() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); for (int i = 0; i < 9; i++) { assertTrue(filter.put(42)); } assertTrue(filter.getCount() == 9); for (int i = 0; i < 9; i++) { assertTrue(filter.mightContain(42)); assertTrue(filter.delete(42)); } assertFalse(filter.delete(42)); assertFalse(filter.mightContain(42)); assertTrue(filter.getCount() == 0); // at this point victim cache is in use since both buckets for 42 are // full }
@Test public void testEquals() { CuckooFilter<Integer> partFull = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); CuckooFilter<Integer> full = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); for (int i = 0; i < 1000000; i++) { assertTrue(partFull.put(i)); } for (int i = 0;; i++) { if (!full.put(i)) break; } new EqualsTester().addEqualityGroup(partFull).addEqualityGroup(full) .addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build()) .addEqualityGroup(new CuckooFilter.Builder<>(Funnels.longFunnel(), 2000000).withFalsePositiveRate(0.01) .withHashAlgorithm(Algorithm.Murmur3_32).build()) .addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 1000000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build()) .addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000) .withFalsePositiveRate(0.03).withHashAlgorithm(Algorithm.Murmur3_32).build()) .addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_128).build()) .testEquals(); }
@Test public void equals2() { // numInsertions param undersized purposely to force underlying storage saturation CuckooFilter<String> cf1 = CuckooFilter.create(Funnels.unencodedCharsFunnel(), 2); cf1.add("1"); cf1.add("2"); cf1.add("3"); cf1.add("4"); CuckooFilter<String> cf2 = CuckooFilter.create(Funnels.unencodedCharsFunnel(), 2); cf2.add("4"); cf2.add("3"); cf2.add("2"); cf2.add("1"); assertTrue("equals should be true when tables are equivalent but ordered differently", cf1.equals(cf2)); new EqualsTester() .addEqualityGroup(cf1, cf2) .testEquals(); }
@Test public void addAll() { int element1 = 1; int element2 = 2; CuckooFilter<Integer> cf1 = CuckooFilter.create(Funnels.integerFunnel(), 100); cf1.add(element1); assertTrue(cf1.contains(element1)); assertFalse(cf1.contains(element2)); CuckooFilter<Integer> cf2 = CuckooFilter.create(Funnels.integerFunnel(), 100); cf2.add(element2); assertFalse(cf2.contains(element1)); assertTrue(cf2.contains(element2)); assertTrue(cf1.isCompatible(cf2)); cf1.addAll(cf2); assertTrue(cf1.contains(element1)); assertTrue(cf1.contains(element2)); assertFalse(cf2.contains(element1)); assertTrue(cf2.contains(element2)); }
@Test public void addAllFails() { int element = 1; CuckooFilter<Integer> cf1 = CuckooFilter.create(Funnels.integerFunnel(), 100); // purposely fill buckets that contain entries for element while (cf1.add(element)) { assertTrue(cf1.contains(element)); } CuckooFilter<Integer> cf2 = CuckooFilter.create(Funnels.integerFunnel(), 100); cf2.add(element); assertTrue(cf2.contains(element)); assertTrue(cf1.isCompatible(cf2)); assertFalse("putAll should return false when buckets at index & altIndex are already full", cf1.addAll(cf2)); }
GrowthTracker( final SerializableFunction<OutputT, KeyT> keyFn, final Coder<KeyT> outputKeyCoder, GrowthState<OutputT, KeyT, TerminationStateT> state, Growth.TerminationCondition<?, TerminationStateT> terminationCondition) { this.coderFunnel = new Funnel<OutputT>() { @Override public void funnel(OutputT from, PrimitiveSink into) { try { // Rather than hashing the output itself, hash the output key. KeyT outputKey = keyFn.apply(from); outputKeyCoder.encode(outputKey, Funnels.asOutputStream(into)); } catch (IOException e) { throw new RuntimeException(e); } } }; this.terminationCondition = terminationCondition; this.state = state; this.isOutputComplete = state.isOutputComplete; this.pollWatermark = state.pollWatermark; this.terminationState = state.terminationState; this.pending = Lists.newLinkedList(state.pending); }
@Override public Node getNextNode(ImmutableList<Node> pool, Map<UUID, Node> okNodes, String sessionID) { List<String> idStrings = new ArrayList<>(); okNodes.keySet().stream().forEach(xs -> idStrings.add(xs.toString())); RendezvousHash rendezvousHash = new RendezvousHash( Funnels.stringFunnel(Charset.defaultCharset()), idStrings, idStrings.size()); for (Object nodeID : rendezvousHash.get(sessionID.getBytes())) { if (nodeID instanceof String) { UUID id = UUID.fromString((String) nodeID); Node nextNode = okNodes.get(id); if (okToPick(nextNode)) { return nextNode; } } else { return null; } } return null; }
/** * Makes all changes made since the previous * commit/rollback permanent and releases any database locks * currently held by this <code>Connection</code> object. * This method should be * used only when auto-commit mode has been disabled. * * @exception java.sql.SQLException if a database access error occurs, * this method is called while participating in a distributed transaction, * if this method is called on a closed conection or this * <code>Connection</code> object is in auto-commit mode * @see #setAutoCommit */ public synchronized void commit() throws SQLException { numberOfCommits++; RetryExecution execution = new RetryExecution("COMMIT"); execution.execute(connection, new RetryCommand<Void>() { @Override public Void run() throws SQLException { if(tripleBatch != null && tripleBatch.size() > 0) { flushBatch(); } deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000); if(connection != null) { connection.commit(); } return null; } }); this.transactionId = getNextSequence(); }
/** * Undoes all changes made in the current transaction * and releases any database locks currently held * by this <code>Connection</code> object. This method should be * used only when auto-commit mode has been disabled. * * @exception java.sql.SQLException if a database access error occurs, * this method is called while participating in a distributed transaction, * this method is called on a closed connection or this * <code>Connection</code> object is in auto-commit mode * @see #setAutoCommit */ public void rollback() throws SQLException { if(tripleBatch != null && tripleBatch.size() > 0) { synchronized (tripleBatch) { for(KiWiTriple triple : tripleBatch) { triple.setId(-1L); } tripleBatch.clear(); } } deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000); if(connection != null && !connection.isClosed()) { connection.rollback(); } this.transactionId = getNextSequence(); }
@Override public HashCode hash(File file) { try { Hasher hasher = createFileHasher(); Files.copy(file, Funnels.asOutputStream(hasher)); return hasher.hash(); } catch (IOException e) { throw new UncheckedIOException(String.format("Failed to create MD5 hash for file '%s'.", file), e); } }
private RendezvousHash buildHasher( Map<String, RateLimiter> limiterMap, int poolSize, double rate) { List<String> _tempPool = new ArrayList<>(); for (int i = 0; i < poolSize; i++) { String id = UUID.randomUUID().toString(); _tempPool.add(id); limiterMap.put(id, RateLimiter.create(rate)); } return new RendezvousHash(Funnels.stringFunnel(XrpcConstants.DEFAULT_CHARSET), _tempPool); }
@Test public void get() throws Exception { List<String> nodeList = new ArrayList<>(); Map<String, List<String>> mm = PlatformDependent.newConcurrentHashMap(); for (int i = 0; i < 100; i++) { nodeList.add(("Host" + i)); mm.put(("Host" + i), new ArrayList<>()); } RendezvousHash rendezvousHash = new RendezvousHash(Funnels.stringFunnel(Charset.defaultCharset()), nodeList); Random r = new Random(); for (int i = 0; i < 100000; i++) { String thing = (Integer.toString(r.nextInt(123456789))); List<String> hosts = rendezvousHash.get(thing.getBytes(), 3); hosts.forEach( xs -> { mm.get(xs).add(thing); }); } List<Integer> xx = new ArrayList<>(); mm.keySet() .forEach( xs -> { xx.add(mm.get(xs).size()); }); Double xd = xx.stream().mapToInt(x -> x).average().orElse(-1); assertEquals(3000, xd.intValue()); }
@Test void simpleGet() { Map<String, String> map = new ImmutableMap.Builder<String, String>() .put("a", "1") .put("b", "2") .put("c", "3") .put("d", "4") .put("e", "5") .build(); RendezvousHash hasher = new RendezvousHash(Funnels.stringFunnel(XrpcConstants.DEFAULT_CHARSET), map.keySet()); String k1 = "foo"; String k2 = "bar"; String k3 = "baz"; String k4 = "biz"; assertEquals(hasher.getOne(k1.getBytes()), hasher.getOne(k1.getBytes())); assertEquals(hasher.getOne(k2.getBytes()), hasher.getOne(k2.getBytes())); assertEquals(hasher.getOne(k3.getBytes()), hasher.getOne(k3.getBytes())); assertEquals(hasher.getOne(k4.getBytes()), hasher.getOne(k4.getBytes())); System.out.println(hasher.getOne(k1.getBytes())); System.out.println(hasher.getOne(k2.getBytes())); System.out.println(hasher.getOne(k3.getBytes())); System.out.println(hasher.getOne(k4.getBytes())); System.out.println(hasher.getOne(k1.getBytes())); System.out.println(hasher.getOne(k2.getBytes())); System.out.println(hasher.getOne(k3.getBytes())); System.out.println(hasher.getOne(k4.getBytes())); assertNotEquals(hasher.getOne(k1.getBytes()), hasher.getOne(k4.getBytes())); }
/** * 初始化一个bloom过滤器到内存中 * * @param expectedInsertions 预估的最大元素容量 * @param fpp 误报概率 */ public URIBloomFilter(long expectedInsertions, double fpp) { urlCounter = new AtomicLong(0); this.expectedInsertions = expectedInsertions; this.fpp = fpp; bloomFilter = BloomFilter.create( Funnels.stringFunnel(Charset.defaultCharset()), expectedInsertions, fpp); }
@Test public void testSeeds() { // test a seeded hash alg SerializableSaltedHasher<Integer> hasher1 = new SerializableSaltedHasher<>(1, 0, Funnels.integerFunnel(), Algorithm.Murmur3_32); SerializableSaltedHasher<Integer> hasher2 = new SerializableSaltedHasher<>(2, 0, Funnels.integerFunnel(), Algorithm.Murmur3_32); assertFalse(hasher2.hashObj(42).equals(hasher1.hashObj(42))); assertFalse(hasher2.hashObjWithSalt(42, 1).equals(hasher1.hashObjWithSalt(42, 1))); assertFalse(hasher2.hashObjWithSalt(42, 1).equals(hasher1.hashObj(42))); // test salted alg hasher1 = new SerializableSaltedHasher<>(1, 0, Funnels.integerFunnel(), Algorithm.sha256); hasher2 = new SerializableSaltedHasher<>(2, 0, Funnels.integerFunnel(), Algorithm.sha256); assertFalse(hasher2.hashObj(42).equals(hasher1.hashObj(42))); assertFalse(hasher2.hashObjWithSalt(42, 1).equals(hasher1.hashObjWithSalt(42, 1))); assertFalse(hasher2.hashObjWithSalt(42, 1).equals(hasher1.hashObj(42))); // test seeded-salted algs like SIPHash ArrayList<SerializableSaltedHasher<Integer>> hashAry = new ArrayList<SerializableSaltedHasher<Integer>>(); hashAry.add(new SerializableSaltedHasher<Integer>(1, 1, Funnels.integerFunnel(), Algorithm.sipHash24)); hashAry.add(new SerializableSaltedHasher<Integer>(2, 1, Funnels.integerFunnel(), Algorithm.sipHash24)); hashAry.add(new SerializableSaltedHasher<Integer>(1, 2, Funnels.integerFunnel(), Algorithm.sipHash24)); hashAry.add(new SerializableSaltedHasher<Integer>(2, 2, Funnels.integerFunnel(), Algorithm.sipHash24)); HashSet<Long> results = new HashSet<>(); for (SerializableSaltedHasher<Integer> hashVal : hashAry) { long unsalted = hashVal.hashObj(42).asLong(); assertFalse(results.contains(unsalted)); results.add(unsalted); long salty = hashVal.hashObjWithSalt(42, 1).asLong(); assertFalse(results.contains(salty)); results.add(salty); } }
@Test public void testAutoAlgorithm() { SerializableSaltedHasher<Integer> hasher = SerializableSaltedHasher.create(100, Funnels.integerFunnel()); assertTrue(hasher.codeBitSize() == 128); hasher = SerializableSaltedHasher.create(30, Funnels.integerFunnel()); assertTrue(hasher.codeBitSize() < 128); }
@Test public void testEquals() { new EqualsTester() .addEqualityGroup( new SerializableSaltedHasher<byte[]>(0, 0, Funnels.byteArrayFunnel(), Algorithm.Murmur3_32)) .addEqualityGroup( new SerializableSaltedHasher<byte[]>(1, 0, Funnels.byteArrayFunnel(), Algorithm.Murmur3_32)) .addEqualityGroup( new SerializableSaltedHasher<byte[]>(0, 1, Funnels.byteArrayFunnel(), Algorithm.Murmur3_32)) .addEqualityGroup( new SerializableSaltedHasher<Integer>(0, 0, Funnels.integerFunnel(), Algorithm.Murmur3_32)) .addEqualityGroup( new SerializableSaltedHasher<byte[]>(0, 0, Funnels.byteArrayFunnel(), Algorithm.sha256)) .testEquals(); }
@Test public void testCopy() { SerializableSaltedHasher<Integer> hasher = new SerializableSaltedHasher<>(0, 0, Funnels.integerFunnel(), Algorithm.Murmur3_32); SerializableSaltedHasher<Integer> hasherCopy = hasher.copy(); assertTrue(hasherCopy.equals(hasher)); assertNotSame(hasher, hasherCopy); }
@Test public void testCopy() { IndexTagCalc<Integer> calc = IndexTagCalc.create(Funnels.integerFunnel(), 128, 4); IndexTagCalc<Integer> calcCopy = calc.copy(); assertTrue(calcCopy.equals(calc)); assertNotSame(calc, calcCopy); }
@Test public void brokenAltIndex32() { Random rando = new Random(); IndexTagCalc<Integer> calc = IndexTagCalc.create(Funnels.integerFunnel(), 2048, 14); for (int i = 0; i < 10000; i++) { BucketAndTag pos = calc.generate(rando.nextInt()); long altIndex = calc.altIndex(pos.index, pos.tag); assertTrue(pos.index == calc.altIndex(altIndex, pos.tag)); } }
@Test public void brokenAltIndex64() { Random rando = new Random(); IndexTagCalc<Integer> calc = IndexTagCalc.create(Funnels.integerFunnel(), (long) Math.pow(2, 32), 5); for (int i = 0; i < 10000; i++) { BucketAndTag pos = calc.generate(rando.nextInt()); long altIndex = calc.altIndex(pos.index, pos.tag); assertTrue(pos.index == calc.altIndex(altIndex, pos.tag)); } }
@Test public void testCreateDifferentHashLengths() { new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.Murmur3_32).build(); new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.sipHash24).build(); new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.Murmur3_128).build(); new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.sha256).build(); }
@Test public void sanityApproimateCount() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); // fill buckets with duplicates, count along the way for (int i = 0; i < 8; i++) { assertTrue(filter.put(42)); assertTrue(filter.approximateCount(42) == i + 1); } // should fill victim assertTrue(filter.put(42)); assertTrue(filter.approximateCount(42) == 9); // should fail assertFalse(filter.put(42)); // count should be the same assertTrue(filter.approximateCount(42) == 9); // should delete victim and another pos assertTrue(filter.delete(42) && filter.delete(42)); // should be 7 copies now assertTrue(filter.approximateCount(42) == 7); // loop delete rest for (int i = 7; i > 0; i--) { assertTrue(filter.delete(42)); assertTrue(filter.approximateCount(42) == i - 1); } // should be empty assertFalse(filter.mightContain(42)); }
@Test public void sanityOverFillFilter() { // make test set bigger than any size filter we're running for (int i = 1; i < 10; i++) { int filterKeys = 100000 * i; CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), filterKeys) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); // make a list of test values(all unique) // makes sure that filter can handle a 0.8 load factor before // insertion failure int countFailedAt = 0; while (true) { if (!filter.put(countFailedAt)) break; countFailedAt++; } // make sure the filter reports as many as we actually put in assertTrue("Filter reports " + filter.getCount() + " when we actually added " + countFailedAt + " items before failing", filter.getCount() == countFailedAt); // it's okay if filter is a bit bigger than we asked for, it should // never be more than twice as big plus 1 (due to numBuckets power // of 2 rounding) assertTrue("We were able to add " + countFailedAt + " keys to a filter that was only made to hold " + filterKeys, countFailedAt <= (filterKeys * 2) + 1); // keep some tight error bounds to detect small anomalies...just // change if errors out too much assertTrue( "Load Factor only " + filter.getLoadFactor() + " for filter with " + filterKeys + " capacity at first insertion failure. Expected more than 0.95", filter.getLoadFactor() > .95); assertTrue( "Load Factor " + filter.getLoadFactor() + " for filter with " + filterKeys + " capacity at first insertion failure. Expected less than .995", filter.getLoadFactor() < 0.995); } }
@Test public void sanityOverFillBucketMoreThan2B() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 100000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); int maxTries = 30; int failedAt = maxTries; for (int i = 0; i < maxTries; i++) { if (!filter.put(2)) { failedAt = i; break; } } assertTrue("Duplicate insert failed at " + failedAt + " Expected value is (2*BUCKET_SIZE)+victim cache = 9", failedAt == 9); }
@Test public void testVictimCacheTagComparison() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); filter.hasVictim = true; filter.victim = new Victim(1, 2, 42); BucketAndTag test1 = new BucketAndTag(filter.victim.getI1(), 42); BucketAndTag test2 = new BucketAndTag(filter.victim.getI2(), 42); assertTrue(filter.checkIsVictim(test1)); assertTrue(filter.checkIsVictim(test2)); }
@Test public void sanityFillDeleteAllAndCheckABunchOfStuff() { // test with different filter sizes for (int k = 1; k < 20; k++) { int filterKeys = 20000 * k; CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), filterKeys) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); // repeatedly fill and drain filter for (int j = 0; j < 3; j++) { stressFillDrainCheck(filter); } } }
@Test public void testCopyEmpty() { CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000) .withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build(); CuckooFilter<Integer> filterCopy = filter.copy(); assertTrue(filterCopy.equals(filter)); assertNotSame(filter, filterCopy); }