@VisibleForTesting static <T> BloomFilter<T> create( Funnel<? super T> funnel, long expectedInsertions, double fpp, Strategy strategy) { checkNotNull(funnel); checkArgument( expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions); checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp); checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp); checkNotNull(strategy); if (expectedInsertions == 0) { expectedInsertions = 1; } /* * TODO(user): Put a warning in the javadoc about tiny fpp values, since the resulting size * is proportional to -log(p), but there is not much of a point after all, e.g. * optimalM(1000, 0.0000000000000001) = 76680 which is less than 10kb. Who cares! */ long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); try { return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, strategy); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e); } }
@VisibleForTesting static <T> BloomFilter<T> create(Funnel<? super T> funnel, long expectedInsertions, double fpp, Strategy strategy) { checkNotNull(funnel); checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions); checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp); checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp); checkNotNull(strategy); if (expectedInsertions == 0) { expectedInsertions = 1; } /* * TODO(user): Put a warning in the javadoc about tiny fpp values, since the resulting size * is proportional to -log(p), but there is not much of a point after all, e.g. * optimalM(1000, 0.0000000000000001) = 76680 which is less than 10kb. Who cares! */ long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); try { return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, strategy); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e); } }
/** * Creates a {@link BloomFilter BloomFilter<T>} with the expected number of * insertions and expected false positive probability. * * <p>Note that overflowing a {@code BloomFilter} with significantly more elements * than specified, will result in its saturation, and a sharp deterioration of its * false positive probability. * * <p>The constructed {@code BloomFilter<T>} will be serializable if the provided * {@code Funnel<T>} is. * * <p>It is recommended that the funnel be implemented as a Java enum. This has the * benefit of ensuring proper serialization and deserialization, which is important * since {@link #equals} also relies on object identity of funnels. * * @param funnel the funnel of T's that the constructed {@code BloomFilter<T>} will use * @param expectedInsertions the number of expected insertions to the constructed * {@code BloomFilter<T>}; must be positive * @param fpp the desired false positive probability (must be positive and less than 1.0) * @return a {@code BloomFilter} */ public static <T> BloomFilter<T> create( Funnel<T> funnel, int expectedInsertions /* n */, double fpp) { checkNotNull(funnel); checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions); checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp); checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp); if (expectedInsertions == 0) { expectedInsertions = 1; } /* * TODO(user): Put a warning in the javadoc about tiny fpp values, * since the resulting size is proportional to -log(p), but there is not * much of a point after all, e.g. optimalM(1000, 0.0000000000000001) = 76680 * which is less than 10kb. Who cares! */ long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); try { return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, BloomFilterStrategies.MURMUR128_MITZ_32); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e); } }
/** * Creates a {@code Builder} of a {@link BloomFilter BloomFilter<T>}, with the expected number * of insertions and expected false positive probability. * * <p>Note that overflowing a {@code BloomFilter} with significantly more elements * than specified, will result in its saturation, and a sharp deterioration of its * false positive probability. * * <p>The constructed {@code BloomFilter<T>} will be serializable if the provided * {@code Funnel<T>} is. * * <p>It is recommended the funnel is implemented as a Java enum. This has the benefit of ensuring * proper serialization and deserialization, which is important since {@link #equals} also relies * on object identity of funnels. * * @param funnel the funnel of T's that the constructed {@code BloomFilter<T>} will use * @param expectedInsertions the number of expected insertions to the constructed * {@code BloomFilter<T>}; must be positive * @param fpp the desired false positive probability (must be positive and less than 1.0) * @return a {@code BloomFilter} */ public static <T> BloomFilter<T> create( Funnel<T> funnel, int expectedInsertions /* n */, double fpp) { checkNotNull(funnel); checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions); checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp); checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp); if (expectedInsertions == 0) { expectedInsertions = 1; } /* * TODO(user): Put a warning in the javadoc about tiny fpp values, * since the resulting size is proportional to -log(p), but there is not * much of a point after all, e.g. optimalM(1000, 0.0000000000000001) = 76680 * which is less that 10kb. Who cares! */ long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); try { return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, BloomFilterStrategies.MURMUR128_MITZ_32); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e); } }
/** * Creates a BloomFilter. */ private BloomFilter( BitArray bits, int numHashFunctions, Funnel<? super T> funnel, Strategy strategy) { checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions); checkArgument( numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions); this.bits = checkNotNull(bits); this.numHashFunctions = numHashFunctions; this.funnel = checkNotNull(funnel); this.strategy = checkNotNull(strategy); }
/** * Reads a byte stream, which was written by {@linkplain #writeTo(OutputStream)}, into a * {@code BloomFilter<T>}. * * The {@code Funnel} to be used is not encoded in the stream, so it must be provided here. * <b>Warning:</b> the funnel provided <b>must</b> behave identically to the one used to populate * the original Bloom filter! * * @throws IOException if the InputStream throws an {@code IOException}, or if its data does not * appear to be a BloomFilter serialized using the {@linkplain #writeTo(OutputStream)} method. */ public static <T> BloomFilter<T> readFrom(InputStream in, Funnel<? super T> funnel) throws IOException { checkNotNull(in, "InputStream"); checkNotNull(funnel, "Funnel"); int strategyOrdinal = -1; int numHashFunctions = -1; int dataLength = -1; try { DataInputStream din = new DataInputStream(in); // currently this assumes there is no negative ordinal; will have to be updated if we // add non-stateless strategies (for which we've reserved negative ordinals; see // Strategy.ordinal()). strategyOrdinal = din.readByte(); numHashFunctions = UnsignedBytes.toInt(din.readByte()); dataLength = din.readInt(); Strategy strategy = BloomFilterStrategies.values()[strategyOrdinal]; long[] data = new long[dataLength]; for (int i = 0; i < data.length; i++) { data[i] = din.readLong(); } return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy); } catch (RuntimeException e) { String message = "Unable to deserialize BloomFilter from InputStream." + " strategyOrdinal: " + strategyOrdinal + " numHashFunctions: " + numHashFunctions + " dataLength: " + dataLength; throw new IOException(message, e); } }
/** * Reads a byte stream, which was written by {@linkplain #writeTo(OutputStream)}, into a * {@code BloomFilter<T>}. * * The {@code Funnel} to be used is not encoded in the stream, so it must be provided here. * <b>Warning:</b> the funnel provided <b>must</b> behave identically to the one used to populate * the original Bloom filter! * * @throws IOException if the InputStream throws an {@code IOException}, or if its data does not * appear to be a BloomFilter serialized using the {@linkplain #writeTo(OutputStream)} method. */ public static <T> BloomFilter<T> readFrom(InputStream in, Funnel<T> funnel) throws IOException { checkNotNull(in, "InputStream"); checkNotNull(funnel, "Funnel"); int strategyOrdinal = -1; int numHashFunctions = -1; int dataLength = -1; try { DataInputStream din = new DataInputStream(in); // currently this assumes there is no negative ordinal; will have to be updated if we // add non-stateless strategies (for which we've reserved negative ordinals; see // Strategy.ordinal()). strategyOrdinal = din.readByte(); numHashFunctions = UnsignedBytes.toInt(din.readByte()); dataLength = din.readInt(); Strategy strategy = BloomFilterStrategies.values()[strategyOrdinal]; long[] data = new long[dataLength]; for (int i = 0; i < data.length; i++) { data[i] = din.readLong(); } return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy); } catch (RuntimeException e) { String message = "Unable to deserialize BloomFilter from InputStream." + " strategyOrdinal: " + strategyOrdinal + " numHashFunctions: " + numHashFunctions + " dataLength: " + dataLength; throw new IOException(message, e); } }
/** * Creates a BloomFilter. */ private BloomFilter(BitArray bits, int numHashFunctions, Funnel<? super T> funnel, Strategy strategy) { checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions); checkArgument(numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions); this.bits = checkNotNull(bits); this.numHashFunctions = numHashFunctions; this.funnel = checkNotNull(funnel); this.strategy = checkNotNull(strategy); }
/** * Reads a byte stream, which was written by {@linkplain #writeTo(OutputStream)}, into a * {@code BloomFilter<T>}. * * The {@code Funnel} to be used is not encoded in the stream, so it must be provided here. * <b>Warning:</b> the funnel provided <b>must</b> behave identically to the one used to populate * the original Bloom filter! * * @throws IOException if the InputStream throws an {@code IOException}, or if its data does not * appear to be a BloomFilter serialized using the {@linkplain #writeTo(OutputStream)} method. */ public static <T> BloomFilter<T> readFrom(InputStream in, Funnel<T> funnel) throws IOException { checkNotNull(in, "InputStream"); checkNotNull(funnel, "Funnel"); int strategyOrdinal = -1; int numHashFunctions = -1; int dataLength = -1; try { DataInputStream din = new DataInputStream(in); // currently this assumes there is no negative ordinal; will have to be updated if we // add non-stateless strategies (for which we've reserved negative ordinals; see // Strategy.ordinal()). strategyOrdinal = din.readByte(); numHashFunctions = UnsignedBytes.toInt(din.readByte()); dataLength = din.readInt(); Strategy strategy = BloomFilterStrategies.values() [strategyOrdinal]; long[] data = new long[dataLength]; for (int i = 0; i < data.length; i++) { data[i] = din.readLong(); } return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy); } catch (RuntimeException e) { IOException ioException = new IOException("Unable to deserialize BloomFilter from InputStream." + " strategyOrdinal: " + strategyOrdinal + " numHashFunctions: " + numHashFunctions + " dataLength: " + dataLength); ioException.initCause(e); throw ioException; } }
/** * Reads a byte stream, which was written by {@linkplain #writeTo(OutputStream)}, into a * {@code BloomFilter<T>}. * * The {@code Funnel} to be used is not encoded in the stream, so it must be provided here. * <b>Warning:</b> the funnel provided <b>must</b> behave identically to the one used to populate * the original Bloom filter! * * @throws IOException if the InputStream throws an {@code IOException}, or if its data does not * appear to be a BloomFilter serialized using the {@linkplain #writeTo(OutputStream)} method. */ public static <T> BloomFilter<T> readFrom(InputStream in, Funnel<T> funnel) throws IOException { checkNotNull(in, "InputStream"); checkNotNull(funnel, "Funnel"); int strategyOrdinal = -1; int numHashFunctions = -1; int dataLength = -1; try { DataInputStream din = new DataInputStream(in); // currently this assumes there is no negative ordinal; will have to be updated if we // add non-stateless strategies (for which we've reserved negative ordinals; see // Strategy.ordinal()). strategyOrdinal = din.readByte(); numHashFunctions = UnsignedBytes.toInt(din.readByte()); dataLength = din.readInt(); Strategy strategy = BloomFilterStrategies.values()[strategyOrdinal]; long[] data = new long[dataLength]; for (int i = 0; i < data.length; i++) { data[i] = din.readLong(); } return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy); } catch (RuntimeException e) { IOException ioException = new IOException( "Unable to deserialize BloomFilter from InputStream." + " strategyOrdinal: " + strategyOrdinal + " numHashFunctions: " + numHashFunctions + " dataLength: " + dataLength); ioException.initCause(e); throw ioException; } }
/** * Creates a BloomFilter. */ private BloomFilter(BitArray bits, int numHashFunctions, Funnel<T> funnel, Strategy strategy) { checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions); checkArgument(numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions); this.bits = checkNotNull(bits); this.numHashFunctions = numHashFunctions; this.funnel = checkNotNull(funnel); this.strategy = checkNotNull(strategy); }
@VisibleForTesting static <T> BloomFilter<T> create( Funnel<? super T> funnel, int expectedInsertions /* n */, double fpp, Strategy strategy) { checkNotNull(funnel); checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions); checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp); checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp); checkNotNull(strategy); if (expectedInsertions == 0) { expectedInsertions = 1; } /* * TODO(user): Put a warning in the javadoc about tiny fpp values, * since the resulting size is proportional to -log(p), but there is not * much of a point after all, e.g. optimalM(1000, 0.0000000000000001) = 76680 * which is less than 10kb. Who cares! */ long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); try { return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, strategy); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e); } }
/** * Reads a byte stream, which was written by {@linkplain #writeTo(OutputStream)}, into * a {@code BloomFilter<T>}. * * The {@code Funnel} to be used is not encoded in the stream, so it must be provided here. * <b>Warning:</b> the funnel provided <b>must</b> behave identically to the one used to * populate the original Bloom filter! * * @throws IOException if the InputStream throws an {@code IOException}, or if its data does * not appear to be a BloomFilter serialized using the * {@linkplain #writeTo(OutputStream)} method. */ public static <T> BloomFilter<T> readFrom(InputStream in, Funnel<T> funnel) throws IOException { checkNotNull(in, "InputStream"); checkNotNull(funnel, "Funnel"); int strategyOrdinal = -1; int numHashFunctions = -1; int dataLength = -1; try { DataInputStream din = new DataInputStream(in); // currently this assumes there is no negative ordinal; will have to be updated if we // add non-stateless strategies (for which we've reserved negative ordinals; see // Strategy.ordinal()). strategyOrdinal = din.readByte(); numHashFunctions = UnsignedBytes.toInt(din.readByte()); dataLength = din.readInt(); Strategy strategy = BloomFilterStrategies.values()[strategyOrdinal]; long[] data = new long[dataLength]; for (int i = 0; i < data.length; i++) { data[i] = din.readLong(); } return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy); } catch (RuntimeException e) { IOException ioException = new IOException( "Unable to deserialize BloomFilter from InputStream." + " strategyOrdinal: " + strategyOrdinal + " numHashFunctions: " + numHashFunctions + " dataLength: " + dataLength); ioException.initCause(e); throw ioException; } }
@VisibleForTesting static <T> BloomFilter<T> create( Funnel<T> funnel, int expectedInsertions /* n */, double fpp, Strategy strategy) { checkNotNull(funnel); checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions); checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp); checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp); checkNotNull(strategy); if (expectedInsertions == 0) { expectedInsertions = 1; } /* * TODO(user): Put a warning in the javadoc about tiny fpp values, * since the resulting size is proportional to -log(p), but there is not * much of a point after all, e.g. optimalM(1000, 0.0000000000000001) = 76680 * which is less than 10kb. Who cares! */ long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); try { return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, strategy); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e); } }
Object readResolve() { return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy); }
public PackageSanityTests() { setDefault(BitArray.class, new BitArray(1)); setDefault(HashCode.class, HashCode.fromInt(1)); setDefault(String.class, "MD5"); setDefault(int.class, 32); }