public boolean isFullDuplicate(IHttpRequestResponse messageInfo) { PrintWriter stdout = new PrintWriter(callbacks.getStdout(), true); IResponseInfo respInfo = helpers.analyzeResponse(messageInfo.getResponse()); if (dubBloomFilter == null) return false; HashFunction m_hash = Hashing.murmur3_32(); if (helpers.bytesToString(messageInfo.getResponse()).length() > respInfo.getBodyOffset()) { String body = helpers.bytesToString(messageInfo.getResponse()).substring(respInfo.getBodyOffset()); /* full-dub detection */ String dedupHashValue = m_hash.hashBytes(helpers.stringToBytes(body)).toString(); if (dubBloomFilter.mightContain(dedupHashValue)) { return true; } dubBloomFilter.put(dedupHashValue); } return false; }
/** * Files exported from jars are exported into a certain folder so that we can rebuild them * when the related jar file changes. */ @NonNull private static String getJarFilePrefix(@NonNull File inputFile) { // get the filename String name = inputFile.getName(); // remove the extension int pos = name.lastIndexOf('.'); if (pos != -1) { name = name.substring(0, pos); } // add a hash of the original file path. String input = inputFile.getAbsolutePath(); HashFunction hashFunction = Hashing.sha1(); HashCode hashCode = hashFunction.hashString(input, Charsets.UTF_16LE); return name + "-" + hashCode.toString(); }
/** * Build a stringified MAC address using the ClusterMetadata hash for uniqueness. * Form of MAC is "02:eb" followed by four bytes of clusterMetadata hash. */ static String fingerprintMac(ClusterMetadata cm) { if (cm == null) { return DEFAULT_MAC; } HashFunction hf = Hashing.murmur3_32(); HashCode hc = hf.newHasher().putObject(cm, ClusterMetadata.HASH_FUNNEL).hash(); int unqf = hc.asInt(); StringBuilder sb = new StringBuilder(); sb.append("02:eb"); for (int i = 0; i < 4; i++) { byte b = (byte) (unqf >> i * 8); sb.append(String.format(":%02X", b)); } return sb.toString(); }
private static HashFunction configureHash(Algorithm alg, long seedNSalt, long addlSipSeed) { switch (alg) { case xxHash64: return new xxHashFunction(seedNSalt); case Murmur3_128: return Hashing.murmur3_128((int) seedNSalt); case Murmur3_32: return Hashing.murmur3_32((int) seedNSalt); case sha256: return Hashing.sha1(); case sipHash24: return Hashing.sipHash24(seedNSalt, addlSipSeed); default: throw new IllegalArgumentException("Invalid Enum Hashing Algorithm???"); } }
/** * Chooses a directory name, based on a JAR file name, considering exploded-aar and classes.jar. */ public static String getDirectoryNameForJar(File inputFile) { // add a hash of the original file path. HashFunction hashFunction = Hashing.sha1(); HashCode hashCode = hashFunction.hashString(inputFile.getAbsolutePath(), Charsets.UTF_16LE); String name = Files.getNameWithoutExtension(inputFile.getName()); if (name.equals("classes") && inputFile.getAbsolutePath().contains("exploded-aar")) { // This naming scheme is coming from DependencyManager#computeArtifactPath. File versionDir = inputFile.getParentFile().getParentFile(); File artifactDir = versionDir.getParentFile(); File groupDir = artifactDir.getParentFile(); name = Joiner.on('-').join( groupDir.getName(), artifactDir.getName(), versionDir.getName()); } name = name + "_" + hashCode.toString(); return name; }
/** * Returns a unique File for the converted library, even if there are 2 libraries with the same * file names (but different paths) * * @param outFolder the output folder. * @param inputFile the library */ @NonNull public static File getJackFileName(@NonNull File outFolder, @NonNull File inputFile) { // get the filename String name = inputFile.getName(); // remove the extension int pos = name.lastIndexOf('.'); if (pos != -1) { name = name.substring(0, pos); } // add a hash of the original file path. String input = inputFile.getAbsolutePath(); HashFunction hashFunction = Hashing.sha1(); HashCode hashCode = hashFunction.hashString(input, Charsets.UTF_16LE); return new File(outFolder, name + "-" + hashCode.toString() + SdkConstants.DOT_JAR); }
@Test public void HashElement_GivenTestString_ReturnHash() throws Exception { // Arrange byte[] elementToHash = "test".getBytes(); HashFunction hashFunction = mock(HashFunction.class, RETURNS_DEEP_STUBS); when(hashFunction.hashBytes(eq(elementToHash))).thenReturn(HashCode.fromLong(10L)); Guava64BitHasher hasher = new Guava64BitHasher(hashFunction); // Act Hash64Bits hash = hasher.hash(elementToHash); // Assert Hash64Bits expectedHash = new Hash64Bits(10L); assertEquals(expectedHash, hash); }
@Test public void Hash_GivenTestString_ReturnHash() throws Exception { // Arrange byte[] elementToHash = "test".getBytes(); HashFunction hashFunction = mock(HashFunction.class, RETURNS_DEEP_STUBS); when(hashFunction.hashBytes(eq(elementToHash))).thenReturn(HashCode.fromInt(10)); Guava32BitHasher hasher = new Guava32BitHasher(hashFunction); // Act Hash32Bits hash = hasher.hash(elementToHash); // Assert Hash32Bits expectedHash = new Hash32Bits(10); assertEquals(expectedHash, hash); }
static double getHash(String newString){ double hashofString = 0; try{ // The murmur hash is an effective way to uniformly hash different string HashFunction hf = Hashing.murmur3_128(); HashCode hc = hf.newHasher().putString(newString).hash(); byte[] byteArray = hc.asBytes(); ByteBuffer buffer = ByteBuffer.wrap(byteArray); hashofString = buffer.getShort(); } catch(Exception e){ e.printStackTrace(); } return Math.abs(hashofString)%.99; }
static double getKeyHash(String newString){ double hashofString = 0; try{ // The murmur hash is an effective way to uniformly hash different string HashFunction hf = Hashing.murmur3_128(); HashCode hc = hf.newHasher().putString(newString).hash(); byte[] byteArray = hc.asBytes(); ByteBuffer buffer = ByteBuffer.wrap(byteArray); hashofString = buffer.getShort(); } catch(Exception e){ e.printStackTrace(); } return Math.abs(hashofString)%.99; }
/** * Construct a new hash engine, using the default configuration values * present in the the hrfs site configuration file. */ public HashEngine(HashFunction hfn) { this.conf = new HrfsConfiguration(); this.hashfn = hfn; this.nworkers = conf.getInt(HrfsKeys.HRFS_HENGINE_WORKERS, 5); this.biqueue = new ConcurrentLinkedQueue<Block>(); this.boqueue = new ConcurrentLinkedQueue<DataBlock>(); this.ahcnt = new AtomicLong(0); /* Build up our worker pool using the configuration tunable. */ this.executor = new ThreadPoolExecutor(nworkers, nworkers, 1000L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>()); }
/** * Distribute triggers on nodes using a consistent hashing strategy. * This strategy allows to scale and minimize changes and re-distribution when cluster changes. * * @param entries a list of entries to distribute * @param buckets a table of nodes * @return a map of entries distributed across nodes */ public Map<PartitionEntry, Integer> calculatePartition(List<PartitionEntry> entries, Map<Integer, Integer> buckets) { if (entries == null) { throw new IllegalArgumentException("entries must be not null"); } if (isEmpty(buckets)) { throw new IllegalArgumentException("entries must be not null"); } HashFunction md5 = Hashing.md5(); int numBuckets = buckets.size(); Map<PartitionEntry, Integer> newPartition = new HashMap<>(); for (PartitionEntry entry : entries) { newPartition.put(entry, buckets.get(Hashing.consistentHash(md5.hashInt(entry.hashCode()), numBuckets))); } return newPartition; }
public static HashFunction getHasher(HashType hashType) { switch(hashType) { case MURMUR3_128: return Hashing.murmur3_128(); case MURMUR3_32: return Hashing.murmur3_32(); case SIPHASH24: return Hashing.sipHash24(); case MD5: return Hashing.md5(); case SHA1: return Hashing.sha1(); case SHA256: return Hashing.sha256(); case SHA512: return Hashing.sha512(); case ADLER32: return Hashing.adler32(); case CRC32: return Hashing.crc32(); case CRC32C: return Hashing.crc32c(); default: throw new IllegalArgumentException(Utils.format("Unsupported Hashing Algorithm: {}", hashType.name())); } }
private String generateHash( Record record, HashType hashType, Collection<String> fieldsToHash, boolean includeRecordHeader, boolean useSeparator ) throws StageException { try { HashFunction hasher = HashingUtil.getHasher(hashType.getHashType()); HashingUtil.RecordFunnel recordFunnel = HashingUtil.getRecordFunnel( fieldsToHash, includeRecordHeader, useSeparator ); return hasher.hashObject(record, recordFunnel).toString(); } catch (IllegalArgumentException e) { throw new OnRecordErrorException(Errors.HASH_00, hashType.getDigest(), e.toString(), e); } }
private String hashForRecordsWithFieldsAndHeaderAttr( Record record, Collection<String> fieldsToHash, HashType hashType, boolean includeRecordHeaderForHashing, boolean useSeparator ) { HashFunction hasher = HashingUtil.getHasher(hashType.getHashType()); Set<String> validFieldsToHash = new HashSet<>(); for (String fieldPath : fieldsToHash) { Field field = record.get(fieldPath); Field.Type type = field.getType(); if (!(FieldHasherProcessor.UNSUPPORTED_FIELD_TYPES.contains(type) || field.getValue() == null)) { validFieldsToHash.add(fieldPath); } } HashingUtil.RecordFunnel recordFunnel = HashingUtil.getRecordFunnel( validFieldsToHash, includeRecordHeaderForHashing, useSeparator ); return hasher.hashObject(record, recordFunnel).toString(); }
private void putRowKeyToHLLNew(List<String> row, long[] hashValuesLong, HLLCounter[] cuboidCounters, HashFunction hashFunction) { int x = 0; for (String field : row) { Hasher hc = hashFunction.newHasher(); byte[] bytes = hc.putString(x + field).hash().asBytes(); hashValuesLong[x++] = Bytes.toLong(bytes); } for (int i = 0, n = allCuboidsBitSet.length; i < n; i++) { long value = 0; for (int position = 0; position < allCuboidsBitSet[i].length; position++) { value += hashValuesLong[allCuboidsBitSet[i][position]]; } cuboidCounters[i].addHashDirectly(value); } }
@Nonnull public static HashFunction of(@Nonnull String name) { checkNotNull(name); switch (name) { case "adler32": return Hashing.adler32(); case "crc32": return Hashing.crc32(); case "md5": return LegacyHashing.md5(); case "sha1": return LegacyHashing.sha1(); case "sha256": return Hashing.sha256(); case "sha512": return Hashing.sha512(); case "sipHash24": return Hashing.sipHash24(); case "murmur3_32": return Hashing.murmur3_32(); case "murmur3_128": return Hashing.murmur3_128(); default: throw new RuntimeException("unknown hash: " + name); } }
/** * Create an analyzer to calculate a minhash. * * @param tokenizer a tokenizer to parse a text * @param hashBit the number of hash bits * @param seed a base seed for hash function * @param num the number of hash functions * @return analyzer used by {@link MinHash#calculate(Analyzer, String)} */ public static Analyzer createAnalyzer(final Tokenizer tokenizer, final int hashBit, final int seed, final int num) { final HashFunction[] hashFunctions = MinHash.createHashFunctions(seed, num); final Analyzer minhashAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents( final String fieldName) { final TokenStream stream = new MinHashTokenFilter( tokenizer, hashFunctions, hashBit); return new TokenStreamComponents(tokenizer, stream); } }; return minhashAnalyzer; }
public static void main(String[] args) throws Exception { installDefaultUncaughtExceptionHandler(log); final CommandLineParser parser = new PosixParser(); final Options options = new Options() .addOption("j", "threads", true, "number of parallel threads to use for analyzing") .addOption("hash", true, "hash function to use, possible values: " + Arrays.toString(Hashes.values())) .addOption("dir", true, "add directory to search"); final CommandLine cmdline = parser.parse(options, args); final int threads = Integer.valueOf(cmdline.getOptionValue("threads", String.valueOf(Runtime.getRuntime().availableProcessors()))); final HashFunction hash = Hashes.valueOf(cmdline.getOptionValue("hash", "adler32")).hashfunc; final File[] dirs = Collections2.transform(Arrays.asList(cmdline.getOptionValues("dir")), new Function<String, File>() { @Override public File apply(String from) { return new File(from); } }).toArray(new File[]{}); log.info("hash: {}, threads: {}, dirs: {} in total", hash, threads, dirs.length); try { new Dupes(threads, hash, dirs).run(); } finally { Utils.shutdownLogger(); } }
/** * Chooses a directory name, based on a JAR file name, considering exploded-aar and classes.jar. */ @NonNull public static String getDirectoryNameForJar(@NonNull File inputFile) { // add a hash of the original file path. HashFunction hashFunction = Hashing.sha1(); HashCode hashCode = hashFunction.hashString(inputFile.getAbsolutePath(), Charsets.UTF_16LE); String name = Files.getNameWithoutExtension(inputFile.getName()); if (name.equals("classes") && inputFile.getAbsolutePath().contains("exploded-aar")) { // This naming scheme is coming from DependencyManager#computeArtifactPath. File versionDir = inputFile.getParentFile().getParentFile(); File artifactDir = versionDir.getParentFile(); File groupDir = artifactDir.getParentFile(); name = Joiner.on('-').join( groupDir.getName(), artifactDir.getName(), versionDir.getName()); } name = name + "_" + hashCode.toString(); return name; }
/** * * @param textualDocuments * @return a {@link SimHashKNNDB} in which all the documents have been added */ public SimHashKNNDB[] processDocuments(final List<String> textualDocuments, final HashFunction hashFunction, final WeightingModel[] models) { // Initialize simhash DBs final SimHashKNNDB[] simhashDBs = new SimHashKNNDB[models.length]; for (SimHashKNNDB db : simhashDBs) { db = new SimHashKNNDB(hashFunction); } // Clear terrier index clearIndex(); // Add all the documents to the index addDocumentToIndex(textualDocuments); // For each model, populate the DB for (final WeightingModel model : models) { // For each document, get the terms and query the weighting model for (int i = 0; i < textualDocuments.size(); i++) { final String doc = textualDocuments.get(i); } } return simhashDBs; }
public static long[] computeHashes(String item, int numWords, int seed) { long[] hashes = new long[numWords]; for (int word = 0; word < numWords; word += 2) { HashFunction hashFunc = Hashing.murmur3_128(seed + word); Hasher hasher = hashFunc.newHasher(); hasher.putUnencodedChars(item); // get the two longs out HashCode hc = hasher.hash(); ByteBuffer bb = ByteBuffer.wrap(hc.asBytes()); hashes[word] = bb.getLong(0); if (word + 1 < numWords) hashes[word + 1] = bb.getLong(8); } return hashes; }
public final static long[][] computeNGramHashesExact(final String seq, final int nGramSize, final int numWords, final int seed) { HashFunction hf = Hashing.murmur3_128(seed); long[][] hashes = new long[seq.length() - nGramSize + 1][numWords]; for (int iter = 0; iter < hashes.length; iter++) { String subStr = seq.substring(iter, iter + nGramSize); for (int word=0; word<numWords; word++) { HashCode hc = hf.newHasher().putUnencodedChars(subStr).putInt(word).hash(); hashes[iter][word] = hc.asLong(); } } return hashes; }
public final static int[] computeSequenceHashes(final String seq, final int nGramSize, boolean doReverseCompliment) { HashFunction hf = Hashing.murmur3_32(0); int[] hashes = new int[seq.length() - nGramSize + 1]; for (int iter = 0; iter < hashes.length; iter++) { String str = seq.substring(iter, iter + nGramSize); String strReverse = null; if (doReverseCompliment) { strReverse = Utils.rc(str); if (strReverse.compareTo(str)<0) str = strReverse; } HashCode hc = hf.newHasher().putUnencodedChars(str).hash(); hashes[iter] = hc.asInt(); } return hashes; }
public final static long[] computeSequenceHashesLong(final String seq, final int nGramSize, final int seed, final boolean doReverseCompliment) { HashFunction hf = Hashing.murmur3_128(seed); long[] hashes = new long[seq.length() - nGramSize + 1]; for (int iter = 0; iter < hashes.length; iter++) { String str = seq.substring(iter, iter + nGramSize); String strReverse = null; if (doReverseCompliment) { strReverse = Utils.rc(str); if (strReverse.compareTo(str)<0) str = strReverse; } HashCode hc = hf.newHasher().putUnencodedChars(str).hash(); hashes[iter] = hc.asLong(); } return hashes; }
@Test public void testHashCodesM3_128_ints() { int seed = 123; Random rand = new Random(seed); HashFunction hf = Hashing.murmur3_128(seed); for (int i = 0; i < 1000; i++) { int val = rand.nextInt(); byte[] data = ByteBuffer.allocate(4).putInt(val).array(); // guava stores the hashcodes in little endian order ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); buf.put(hf.hashBytes(data).asBytes()); buf.flip(); long gl1 = buf.getLong(); long gl2 = buf.getLong(8); long[] hc = Murmur3.hash128(data, data.length, seed); long m1 = hc[0]; long m2 = hc[1]; assertEquals(gl1, m1); assertEquals(gl2, m2); } }
@Test public void testHashCodesM3_128_longs() { int seed = 123; Random rand = new Random(seed); HashFunction hf = Hashing.murmur3_128(seed); for (int i = 0; i < 1000; i++) { long val = rand.nextLong(); byte[] data = ByteBuffer.allocate(8).putLong(val).array(); // guava stores the hashcodes in little endian order ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); buf.put(hf.hashBytes(data).asBytes()); buf.flip(); long gl1 = buf.getLong(); long gl2 = buf.getLong(8); long[] hc = Murmur3.hash128(data, data.length, seed); long m1 = hc[0]; long m2 = hc[1]; assertEquals(gl1, m1); assertEquals(gl2, m2); } }
@Test public void testHashCodesM3_128_double() { int seed = 123; Random rand = new Random(seed); HashFunction hf = Hashing.murmur3_128(seed); for (int i = 0; i < 1000; i++) { double val = rand.nextDouble(); byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); // guava stores the hashcodes in little endian order ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); buf.put(hf.hashBytes(data).asBytes()); buf.flip(); long gl1 = buf.getLong(); long gl2 = buf.getLong(8); long[] hc = Murmur3.hash128(data, data.length, seed); long m1 = hc[0]; long m2 = hc[1]; assertEquals(gl1, m1); assertEquals(gl2, m2); } }
/** * Build a stringified MAC address using the ClusterMetadata hash for uniqueness. * Form of MAC is "02:eb" followed by four bytes of clusterMetadata hash. * * @param cm cluster metadata * @return stringified mac address */ static String fingerprintMac(ClusterMetadata cm) { if (cm == null) { return DEFAULT_MAC; } HashFunction hf = Hashing.murmur3_32(); HashCode hc = hf.newHasher().putObject(cm, ClusterMetadata.HASH_FUNNEL).hash(); int unqf = hc.asInt(); StringBuilder sb = new StringBuilder(); sb.append("02:eb"); for (int i = 0; i < 4; i++) { byte b = (byte) (unqf >> i * 8); sb.append(String.format(":%02X", b)); } return sb.toString(); }
private int hash() { // Guava documentation recommends using putUnencodedChars to hash raw character bytes within any encoding // unless cross-language compatibility is needed. See the Hasher.putString documentation for more info. Funnel<TrafficSelector> selectorFunnel = (from, into) -> from.criteria() .forEach(c -> into.putUnencodedChars(c.toString())); HashFunction hashFunction = Hashing.murmur3_32(); HashCode hashCode = hashFunction.newHasher() .putUnencodedChars(deviceId.toString()) .putObject(selector, selectorFunnel) .putInt(priority) .putUnencodedChars(tableId.toString()) .hash(); return hashCode.asInt(); }
private static String computeHash(ByteSource source, HashFunction hashFunction) throws IOException { try (InputStream inputStream = source.openStream(); HashingOutputStream outputStream = new HashingOutputStream( hashFunction, new OutputStream() { @Override public void write(int b) throws IOException { // Do nothing. } })) { ByteStreams.copy(inputStream, outputStream); return outputStream.hash().toString(); } }
@Override public int hashCode() { final HashFunction hf = Hashing.md5(); return hf.newHasher() .putString(extractedPath) .putObject(archive, new Funnel<JarFile>() { @Override public void funnel(JarFile from, PrimitiveSink into) { into .putString(from.getName()) .putString(Optional.fromNullable(from.getComment()).or("")); } private static final long serialVersionUID = 3109141395123855989L; }).hash().asInt(); }
public static void verifyRequestSign(String cookieId, String cookieSecret, String url, String userId, String hexKey, String sign0) throws PermissionDeniedException { StringBuilder plain = new StringBuilder(); plain.append(md5(hexKey)); plain.append("|").append(userId) .append("|").append(url) .append("|").append(cookieSecret) .append("|").append(cookieId); //logger.debug("plain: {}", plain.toString()); byte[] data = plain.toString().getBytes(Charsets.US_ASCII); HashFunction hf = Hashing.sha256(); HashCode hc = hf.newHasher().putBytes(data).hash(); String sign1 = hc.toString(); if (!sign0.equals(sign1)){ logger.error("Error Sign. cookieId={}, cookieSecret={}, url={}, key={}, userId={}, server sign={}, client sign={}", cookieId, cookieSecret, url, hexKey, userId, sign1, sign0); throw new PermissionDeniedException("Request Denied. " + url); } }
/** * Constructor. Initializes the RLBS Bloom Filter. * @param k number of simple Bloom Filters composing this instance */ public RLBSBloomFilter(int k, int m, double thresFPR) { // Initialize parameters and calculate derived ones this.thresFPR = thresFPR; this.m = m; this.k = k; this.bitSetSize = (int)(m/k); // Prepare the hash functions to map items to positions in the bit array this.arrHashFunctions = new HashFunction[k]; this.arrBitSets = new BitSet[k]; for(int i = 0; i < k; i++) { // Murmur3 hashing functions, having different seeds are independent from each other this.arrHashFunctions[i] = Hashing.murmur3_128(i); // Each bit array implements a memory of m/k bit positions this.arrBitSets[i] = new BitSet(this.bitSetSize); } // Compute suggessted k, according to Bera et al. (pg. 24) double computedK = (Math.log(this.thresFPR) / Math.log(1 - (1/Math.E))); logger.info("RLBSBF initialized. Memory size (m): {}, Hash-functions: {}, Suggested Hash-functions: {}", this.m, this.k, computedK); }
public DefaultCacheKeyBuilder(HashFunction hashFunction, FileHasher fileHasher, ClassPathSnapshotter snapshotter, ClassLoaderHierarchyHasher classLoaderHierarchyHasher) { this.hashFunction = hashFunction; this.fileHasher = fileHasher; this.snapshotter = snapshotter; this.classLoaderHierarchyHasher = classLoaderHierarchyHasher; }
public CacheEntry build() { Path baseDir = cacheDir.resolve(name); if (!keys.isEmpty()) { HashFunction hf = Hashing.md5(); Hasher h = hf.newHasher(); for (String key : keys) { h.putString(key, Charsets.UTF_8); } HashCode hc = h.hash(); baseDir = baseDir.resolve(hc.toString()); } cacheEntriesLock.lock(); try { CacheEntry cacheEntry = cacheEntries.get(baseDir.toString()); if (cacheEntry != null) { if (!cacheEntry.getKeys().equals(keys)) { throw new PowsyblException("Inconsistent hash"); } } else { cacheEntry = new CacheEntry(baseDir, keys); cacheEntries.put(baseDir.toString(), cacheEntry); } return cacheEntry; } finally { cacheEntriesLock.unlock(); } }
public FastBloomFilter(int bits, int numberHashFunctions) { bs = new BitSet(bits); Random r = new Random(System.currentTimeMillis()); hashFunctions = new HashFunction[numberHashFunctions]; for (int i=0; i<numberHashFunctions; ++i) { hashFunctions[i] = Hashing.murmur3_128(r.nextInt()); } capacity = bits; }