static BloomFilter<String> readBloomFilterFromfile(String bloomFilterFilePath) throws IOException { Funnel<String> memberFunnel = new Funnel<String>() { public void funnel(String memberId, PrimitiveSink sink) { sink.putString(memberId, Charsets.UTF_8); } }; try { FileInputStream fis = new FileInputStream(new File(bloomFilterFilePath)); return BloomFilter.readFrom(fis, memberFunnel); } catch(Exception e) { e.printStackTrace(); } return null; }
/** * initBloomfilters * * @param falsePositiveProbability * @param expectedNumberOfElements */ private void initBloomfilters(double falsePositiveProbability, long expectedNumberOfElements) { double singleBfMaxAddElements = BloomFilterUtils.caculateNumberAddElements(Integer.MAX_VALUE, falsePositiveProbability); double singleBfElements = expectedNumberOfElements; int bflen = 1; //假如比整形最大值,还需要大,分桶设计 if (expectedNumberOfElements > Integer.MAX_VALUE) { bflen = (int) Math.ceil(expectedNumberOfElements % singleBfMaxAddElements); singleBfElements = Math.round(expectedNumberOfElements / bflen) + 1; } this.bloomfilters = new ArrayList<>(bflen); checkArgument(bflen < 1, "the length of bloomfilters cannot be smaller than one."); for (int i = 0; i < bflen; i++) { BloomFilter<CharSequence> bf = BloomFilter.create(Funnels.stringFunnel(this.charset), (long) singleBfElements, falsePositiveProbability); this.bloomfilters.add(bf); } }
/** * add * * @param element * @return */ public boolean add(CharSequence element) { if (element == null) { return false; } initCharset(); BloomFilter<CharSequence> bf = this.bloomfilters.get(0); if (getBloomfilterBucketLength() > 1) { byte[] datas = element.toString().getBytes(this.charset); int bfIndex = Math.abs(Hashing.murmur3_128().hashBytes(datas).asInt()) % getBloomfilterBucketLength(); bf = this.bloomfilters.get(bfIndex); } synchronized (bf) { bf.put(element); numberOfAddedElements++; return true; } }
private BloomFilter<Seed> getOrCreate(String segment) { BloomFilter<Seed> seedBloomFilter = bloomFilters.get(segment); if (seedBloomFilter != null) { return seedBloomFilter; } synchronized (segment.intern()) { seedBloomFilter = bloomFilters.get(segment); if (seedBloomFilter != null) { return seedBloomFilter; } long expectedNumber = NumberUtils.toLong(VSCrawlerContext.vsCrawlerConfigFileWatcher.loadedProperties() .getProperty(VSCrawlerConstant.VSCRAWLER_SEED_MANAGER_EXPECTED_SEED_NUMBER), 1000000L); // any way, build a filter instance if not exist seedBloomFilter = BloomFilter.create(new Funnel<Seed>() { @Override public void funnel(Seed from, PrimitiveSink into) { into.putString(seedKeyResolver.resolveSeedKey(from), Charset.defaultCharset()); } }, expectedNumber); bloomFilters.put(segment, seedBloomFilter); } return seedBloomFilter; }
public CollisionHandler(int numFilters, int size) { filters = new ArrayList<>(); for(int i = 0;i < numFilters;++i) { BloomFilter<Long> collisionFilter = BloomFilter.create(new Funnel<Long>() { /** * Sends a stream of data from the {@code from} object into the sink {@code into}. There * is no requirement that this data be complete enough to fully reconstitute the object * later. * * @param from * @param into */ @Override public void funnel(Long from, Sink into) { into.putLong(from); } }, size); filters.add(collisionFilter); } }
/** Returns a new PremiumListRevision for the given key and premium list map. */ @VisibleForTesting public static PremiumListRevision create(PremiumList parent, Set<String> premiumLabels) { PremiumListRevision revision = new PremiumListRevision(); revision.parent = Key.create(parent); revision.revisionId = allocateId(); // All premium list labels are already punycoded, so don't perform any further character // encoding on them. revision.probablePremiumLabels = BloomFilter.create(unencodedCharsFunnel(), premiumLabels.size()); premiumLabels.forEach(revision.probablePremiumLabels::put); try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); revision.probablePremiumLabels.writeTo(bos); checkArgument( bos.size() <= MAX_BLOOM_FILTER_BYTES, "Too many premium labels were specified; Bloom filter exceeds max entity size"); } catch (IOException e) { throw new IllegalStateException("Could not serialize premium labels Bloom filter", e); } return revision; }
private BloomFilter<MapTuple> getFilter(Object batch) { if (_filters.containsKey(batch) == false) { Funnel<MapTuple> funnel = new Funnel<MapTuple>() { private static final long serialVersionUID = 3504134639163725164L; @Override public void funnel(MapTuple from, PrimitiveSink into) { if (_uniqueFields == null) { into.putString(from.values().toString(), Charset.defaultCharset()); } else { for(String f : _uniqueFields) { into.putString(from.get(f).toString(), Charset.defaultCharset()); } } } }; logger().info("Creating unique filter with max expected capacity of: " + _expectedSize); _filters.put(batch, BloomFilter.create(funnel, _expectedSize)); } return _filters.get(batch); }
/** * Makes all changes made since the previous * commit/rollback permanent and releases any database locks * currently held by this <code>Connection</code> object. * This method should be * used only when auto-commit mode has been disabled. * * @exception java.sql.SQLException if a database access error occurs, * this method is called while participating in a distributed transaction, * if this method is called on a closed conection or this * <code>Connection</code> object is in auto-commit mode * @see #setAutoCommit */ public synchronized void commit() throws SQLException { numberOfCommits++; RetryExecution execution = new RetryExecution("COMMIT"); execution.execute(connection, new RetryCommand<Void>() { @Override public Void run() throws SQLException { if(tripleBatch != null && tripleBatch.size() > 0) { flushBatch(); } deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000); if(connection != null) { connection.commit(); } return null; } }); this.transactionId = getNextSequence(); }
/** * Undoes all changes made in the current transaction * and releases any database locks currently held * by this <code>Connection</code> object. This method should be * used only when auto-commit mode has been disabled. * * @exception java.sql.SQLException if a database access error occurs, * this method is called while participating in a distributed transaction, * this method is called on a closed connection or this * <code>Connection</code> object is in auto-commit mode * @see #setAutoCommit */ public void rollback() throws SQLException { if(tripleBatch != null && tripleBatch.size() > 0) { synchronized (tripleBatch) { for(KiWiTriple triple : tripleBatch) { triple.setId(-1L); } tripleBatch.clear(); } } deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000); if(connection != null && !connection.isClosed()) { connection.rollback(); } this.transactionId = getNextSequence(); }
/** * Retrieve an filter base on a data hash obtained from {@link getDataHashCode} * * @return * @throws java.io.IOException */ protected BloomFilter getFilter() throws IOException { if (this.filter != null) { return this.filter; } if (filterProvider.hasFilter(this.filterFileName)) { this.filter = (BloomFilter) filterProvider.loadFilter(this.filterFileName); return this.filter; } double falsePositiveProbability = config.getFalsePositiveProbability(); int expectedNumberOfElements = config.getExpectedNumberOfElements(); this.filter = BloomFilter.create(StringHashFunnel.INSTANCE, expectedNumberOfElements, falsePositiveProbability); return this.filter; }
public void setup(ProcessContext context) { try { // If positive, use that particular field number in the input CSV message as input for count String useMsgList ="taxi_identifier"; bloomFilterFilePaths=context.getProperty(BLOOMFILTER_FILEPATH).evaluateAttributeExpressions().getValue();//"/home/sivaprakash/Workspace/Edgent/bloomfilter_taxi_id.model"; int expectedInsertions = 20000000; useMsgFieldList = useMsgList.split(","); String bloomFilterPathList[] = bloomFilterFilePaths.split(","); // Check to enable matching of bloom filter model files and fields provided in property file if(useMsgFieldList.length != bloomFilterPathList.length) { return; } bloomFilterMap = new HashMap<String,BloomFilter<String>>(); testingRange = expectedInsertions; /// Populating bloom filter for each model for(int i = 0; i < useMsgFieldList.length ;i++) { //Load BloomFilter from serialized file bloomFilter = readBloomFilterFromfile(bloomFilterPathList[i]); if(bloomFilter == null) { return; } bloomFilterMap.put(useMsgFieldList[i], bloomFilter); } } catch (Exception e) { } }
public boolean mightContain(Task task) { for (BloomFilter<Task> filter : bloomFilters) { if (filter.mightContain(task)) { return true; } } return false; }
public void readFromFile(String dir) throws IOException { File file; int p = 0; while ((file = new File(dir, this.getClass().getTypeName() + "$" + p + ".tmp")).exists()) { logger.info("Reading bloom remover data ${} from file {}...", p, file.getPath()); FileInputStream inputStream = new FileInputStream(file); this.bloomFilters.clear(); bloomFilters.add(activateBloomFilter = BloomFilter.readFrom(inputStream, Task.DIGEST)); p++; } logger.info("Bloom remover data [$0-${}] is successfully loaded.", groupSize() - 1); }
/** * 初始化一个bloom过滤器到内存中 * * @param expectedInsertions 预估的最大元素容量 * @param fpp 误报概率 */ public URIBloomFilter(long expectedInsertions, double fpp) { urlCounter = new AtomicLong(0); this.expectedInsertions = expectedInsertions; this.fpp = fpp; bloomFilter = BloomFilter.create( Funnels.stringFunnel(Charset.defaultCharset()), expectedInsertions, fpp); }
/** * Returns true if the element could have been inserted into the Bloom filter. * Use getFalsePositiveProbability() to calculate the probability of this * being correct. * * @param element element to check. * @return true if the element could have been inserted into the Bloom filter. */ public boolean contains(CharSequence element) { if (element == null) { return false; } initCharset(); BloomFilter<CharSequence> bf = this.bloomfilters.get(0); if (getBloomfilterBucketLength() > 1) { byte[] datas = element.toString().getBytes(this.charset); int bfIndex = Math.abs(Hashing.murmur3_128().hashBytes(datas).asInt()) % getBloomfilterBucketLength(); bf = this.bloomfilters.get(bfIndex); } return bf.mightContain(element); }
public static void main(String[] args) { BloomFilter<String> strsBloom = BloomFilter.create(stringFunnel, Integer.MAX_VALUE, 0.5); int cnt=0; for(long i=0;i<4000001;i++){ cnt=cnt+1; strsBloom.put(Long.toString(i)); } System.out.println(cnt); System.out.println(strsBloom.mightContain("5000000")); }
public ChunkySpread(final P2PClient client, final OverlayModule<?> _swaplinks, final int _maxLoad, final double ULT, final double LLT, final int _startDelay, final int _churnDelay, final int _noParentTimeout, final int _pendingTimeout, final LatencyMeasureFactory factory, final Random r) { super(client, r); network.addListener(this, ChunkMessage.class); treesNum = Common.currentConfiguration.descriptions; expectedNodesNum = Common.currentConfiguration.nodes; maxLoad = _maxLoad; upperThreshold = (int) (ULT * maxLoad); lowerThreshold = (int) (LLT * maxLoad); startDelay = _startDelay; churnDelay = _churnDelay; noParentTimeout = _noParentTimeout; currNoParentTimeout = noParentTimeout; pendingTimeout = _pendingTimeout; swaplinks = _swaplinks; potentialFloodParents = new TreeMap<Integer, Queue<NodeAddress>>(); for (int treeID = 0; treeID < treesNum; treeID++) { potentialFloodParents.put(treeID, new LinkedList<NodeAddress>()); } parents = new TreeMap<Integer, NodeAddress>(); bloomFilter = new TreeMap<Integer, BloomFilter<NodeAddress>>(); underLoadedNeighbors = new ArrayList<NodeAddress>(); currPendingTimeout = new ArrayList<Integer>(); for (int treeID = 0; treeID < treesNum; treeID++) { currPendingTimeout.add(-1); } latencyMeasurer = factory.create(); fastNeighbors = new TreeMap<Integer, Set<NodeAddress>>(); for (int i = 0; i < treesNum; i++) { fastNeighbors.put(i, new TreeSet<NodeAddress>()); } slowParents = new TreeMap<Integer, NodeAddress>(); }
private void handleMessage(final ConnectionRequestApprovedMessage<?> msg) { final BloomFilter<NodeAddress> bf = ((FloodInfo) (msg.payload)).getBloomFilter(); final int treeID = ((FloodInfo) (msg.payload)).getTreeID(); final boolean isChurn = ((FloodInfo) (msg.payload)).isChurn(); // for simultaneous approves if (parents.get(treeID) != null) { network.send(new RenounceParent(getMessageTag(), network.getAddress(), msg.sourceId, treeID, null, true)); return; } setNotPending(treeID); potentialFloodParents.get(treeID).clear(); currNoParentTimeout = noParentTimeout; debugPrint(network.getAddress() + " became son of " + msg.sourceId + " in tree " + treeID); // set the bloom filter to be the same as the parent's and add ourselves inheritBloomFilterAndAddSelf(treeID, bf); parents.put(treeID, msg.sourceId); if (isChurn) { return; } // continue the flood to all swaplinks neighbors final Collection<NodeAddress> swapLinksNeighbors = swaplinks.getNeighbors(); for (final NodeAddress neighbor : swapLinksNeighbors) { // avoid flooding the root (server) if (network.getServerNode().equals(neighbor)) { continue; } // we dont send back if (neighbor.equals(msg.sourceId)) { continue; } network.send(new FloodTree(getMessageTag(), network.getAddress(), neighbor, treeID, bloomFilter.get(treeID).copy())); } }
boolean mightContain(K key) { BloomFilter<K> b = bloomFilter; if (b == null) { synchronized (this) { b = bloomFilter; if (b == null) { b = buildBloomFilter(); bloomFilter = b; } } } return b == null || b.mightContain(key); }
void put(K key, ValueHolder<V> holder) { if (holder.clean) { return; } BloomFilter<K> b = bloomFilter; if (b != null) { b.put(key); bloomFilter = b; } SqlHandle c = null; try { c = acquire(); if (c.put == null) { c.put = c.conn.prepareStatement("MERGE INTO data (k, v, created, accessed) VALUES(?,?,?,?)"); } try { keyType.set(c.put, 1, key); c.put.setObject(2, holder.value, Types.JAVA_OBJECT); c.put.setTimestamp(3, new Timestamp(holder.created)); c.put.setTimestamp(4, TimeUtil.nowTs()); c.put.executeUpdate(); holder.clean = true; } finally { c.put.clearParameters(); } } catch (SQLException e) { log.warn("Cannot put into cache " + url, e); c = close(c); } finally { release(c); } }
public boolean create(int initialSize, double fpp) { if (fpp > 1) fpp = 0.000000001; if (filter == null) filter = BloomFilter.create(funnel, initialSize, fpp); this.fpp = fpp; this.initialSize = initialSize; return true; }
@Override protected void process(MapTuple t, OutputCollector c) throws OperationException, InterruptedException { BloomFilter<MapTuple> filter = getFilter(c.getCurrentBatch()); if (filter.mightContain(t)) { // Do nothing... } else { filter.put(t); c.emit(t); } }
@SuppressWarnings("unchecked") private void reloadSavedBloomFilter() throws IOException, ClassNotFoundException { File file = new File(bloomFilterFile); Preconditions.checkArgument(file.exists() && file.length() > 0); InputStream fis = null; ObjectInputStream ois = null; try { fis = new FileInputStream(file); ois = new ObjectInputStream(fis); bloomFilter = (BloomFilter<byte[]>)ois.readObject(); } finally { ois.close(); fis.close(); } }
@Override public void writeBloomFilter(BloomFilter<Integer> bloomFilter, int joinAttributeIndex){ try { writer.write("bloomfilter "+joinAttributeIndex); writer.newLine(); writer.flush(); objectWriter.writeObject(bloomFilter); } catch(Exception e){ e.printStackTrace(); closeSilently(); } }
@Override public Queue<UrlCtxHolder<Ctx>> load() { if (file.exists()) { try { Pair<BloomFilter<String>, Queue<UrlCtxHolder<Ctx>>> data = FilesUtil.decompressFile(file); urls = data.getL(); return data.getR(); } catch (IOException e) { throw new RuntimeException("load file error.", e); } } else { urls = BloomFilter.create(Funnels.stringFunnel(Encodings.UTF_8), expectedInsertions, fpp); return null; } }
@Override public void save(Queue<UrlCtxHolder<Ctx>> tasks) { Pair<BloomFilter<String>, Queue<UrlCtxHolder<Ctx>>> data = Pair.of(urls, tasks); try { FilesUtil.compress2File(file, data); } catch (IOException e) { throw new RuntimeException("save file error.", e); } }
private void load( Collection col, Receiver<TypeValue> receiver, BloomFilter<CharSequence> filter) { if (col != null && receiver != null) for (ASObject obj : col.items()) if (obj.id() != null && (filter == null || filter.put(obj.id()))) { try { receiver.receive(obj); } catch (Throwable t) {} } }
public static void main(String[] args) { LongBloomFilter bloomFilter1 = new LongBloomFilter(NUM_OF_VALUES, 0.001); BloomFilter<Long> bloomFilter2 = BloomFilter.create((from, into) -> into.putLong(from), NUM_OF_VALUES, 0.001); LongCountsBloomFilter bloomFilter3 = new LongCountsBloomFilter(NUM_OF_VALUES, 0.001); Log.i("Writing values for filter 1 took " + putValues(bloomFilter1)); Log.i("Writing values for filter 2 took " + putValues(bloomFilter2)); Log.i("Writing values for filter 3 took " + putValues(bloomFilter3)); Log.i("Reading values for filter 1 took " + readValues(bloomFilter1)); Log.i("Reading values for filter 2 took " + readValues(bloomFilter2)); Log.i("Reading values for filter 3 took " + readValues(bloomFilter3)); }
private static long readValues(BloomFilter<Long> bloomFilter2) { long start = System.currentTimeMillis(); for (int i = 0; i < NUM_OF_VALUES; i++) { if (i % 3 == 0) { bloomFilter2.mightContain((long) i); } } return System.currentTimeMillis() - start; }
private static long putValues(BloomFilter<Long> bloomFilter2) { long start = System.currentTimeMillis(); for (int i = 0; i < NUM_OF_VALUES; i++) { if (i % 3 == 0) { bloomFilter2.put((long) i); } } return System.currentTimeMillis() - start; }
/** * 将元素加入BloomFilter * @return true:可能是第一次put, false:一定是第一次put */ public boolean put(T t) { final BloomFilter<T> temp = filter; boolean result = temp.put(t); filter = temp; return result; }
public KiWiConnection(KiWiPersistence persistence, KiWiDialect dialect, CacheManager cacheManager) throws SQLException { this.cacheManager = cacheManager; this.dialect = dialect; this.persistence = persistence; this.commitLock = new ReentrantLock(); this.literalLock = new ReentrantLock(); this.uriLock = new ReentrantLock(); this.bnodeLock = new ReentrantLock(); this.batchCommit = dialect.isBatchSupported(); this.deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000); this.transactionId = getNextSequence(); initCachePool(); initStatementCache(); }
/** * Public constructor * @param expectedNumberOfElements parameter to create bloom filter, must be * positive * @param falsePositiveProbability parameter to create bloom filter, must be * between 0 and 100% */ public EnhancedBloomFilter(final int expectedNumberOfElements, final double falsePositiveProbability) { // Check parameter if (expectedNumberOfElements <= 0) { throw new IllegalArgumentException( "Parameter 'expectedNumberOfElements' to create bloom filter invalid " + expectedNumberOfElements); } if (falsePositiveProbability <= 0 || falsePositiveProbability >= 1.0) { throw new IllegalArgumentException( "Parameter 'falsePositiveProbability' to create bloom filter invalid " + falsePositiveProbability); } this.addedNumberOfElements = 0; this.expectedNumberOfElements = expectedNumberOfElements; this.falsePositiveProbability = falsePositiveProbability; this.bf = BloomFilter.create(new Funnel<String>() { private static final long serialVersionUID = 1L; @Override public void funnel(final String from, final PrimitiveSink into) { into.putString(from, StandardCharsets.UTF_8); } }, expectedNumberOfElements, falsePositiveProbability); }
/** * Public constructor * @param bf the Bloom filters * @param addedNumberOfElements added number of elements * @param expectedNumberOfElements parameter to create bloom filter, must be * positive * @param falsePositiveProbability parameter to create bloom filter, must be * between 0 and 100% */ public EnhancedBloomFilter(final BloomFilter<String> bf, final int addedNumberOfElements, final int expectedNumberOfElements, final double falsePositiveProbability) { if (bf == null) { throw new NullPointerException("bf argument cannot be null"); } this.bf = bf.copy(); this.addedNumberOfElements = addedNumberOfElements; this.expectedNumberOfElements = expectedNumberOfElements; this.falsePositiveProbability = falsePositiveProbability; }
/** * {@inheritDoc} */ protected FilterAdapter createFilterAdapter() { double falsePositiveProbability = config.getFalsePositiveProbability(); int expectedNumberOfElements = config.getExpectedNumberOfElements(); BloomFilter<String> filter = BloomFilter.create(StringHashFunnel.INSTANCE, expectedNumberOfElements, falsePositiveProbability); return new BloomFilterAdapter(filter); }
protected BloomFilter createBloomFilter(int expectedInsertions, double fpp) { return BloomFilter.create(Funnels.stringFunnel(Charset.forName(Config.DEFAULT_CHARSET)), expectedInsertions, fpp); }
public SinkDuplicateProcessor(Config config) { super(config); //TODO: should be in configuration, assuming worst case scenario with acceptable loss. //In the near future we must treat the losses. bloomFilter = BloomFilter.create(Funnels.byteArrayFunnel(), 100000000, 0.02); }
public EntryPointDeduplicator(IBurpExtenderCallbacks callbacks) { this.callbacks = callbacks; this.helpers = callbacks.getHelpers(); this.dubBloomFilter = BloomFilter.create(Funnels.stringFunnel(Charset.defaultCharset()), 1000); this.dubTree = new BKTree<>(new HammingDistance()); }