public ScorerContext context( List<String> words ) { Multiset<String> counter = TreeMultiset.create(); counter.addAll( words ); int word_dim = word_model.dimensions(); float[] word_vecs = new float[ counter.size() * word_dim ]; IntArrayList word_counts = new IntArrayList(); int n_words = 0; for( Multiset.Entry<String> entry : counter.entrySet() ) { if( word_model.get( entry.getElement(), word_vecs, n_words * word_dim ) ) { word_counts.add( entry.getCount() ); n_words += 1; } } word_counts.trim(); return create_context( word_vecs, word_counts.elements() ); }
public void visit(NodeTraversal traversal, Node node, Node parent) { Collection<Definition> defs = passUnderTest.getDefinitionsReferencedAt(node); if (defs != null) { StringBuffer sb = new StringBuffer(); sb.append("USE "); sb.append(Token.name(node.getType())); sb.append(" "); sb.append(node.getQualifiedName()); sb.append(" -> "); Multiset<String> defstrs = TreeMultiset.create(); for (Definition def : defs) { Node rValue = def.getRValue(); if (rValue != null) { defstrs.add(Token.name(rValue.getType())); } else { defstrs.add("<null>"); } } sb.append(defstrs.toString()); found.add(sb.toString()); } }
public AppliedPTransformInputWatermark(Collection<? extends Watermark> inputWatermarks) { this.inputWatermarks = inputWatermarks; // The ordering must order elements by timestamp, and must not compare two distinct elements // as equal. This is built on the assumption that any element added as a pending element will // be consumed without modifications. // // The same logic is applied for pending timers Ordering<CommittedBundle<?>> pendingBundleComparator = new BundleByElementTimestampComparator().compound(Ordering.arbitrary()); this.pendingElements = TreeMultiset.create(pendingBundleComparator); this.pendingTimers = TreeMultiset.create(); this.objectTimers = new HashMap<>(); this.existingTimers = new HashMap<>(); currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE); }
@Test void testPerf() { Map<String, Integer> map = IntStream.range(1, 10).boxed() .collect(toMap(it -> "s" + it, identity())); WeightFailover<String> failover = WeightFailover.<String> newGenericBuilder() // .checker(it -> 1.0) // .build(map); long s = currentTimeMillis(); Multiset<String> counter = TreeMultiset.create(); for (int i = 0; i < 100000; i++) { List<String> available = failover.getAvailable(2); counter.addAll(available); } // old 260~270 System.out.println(counter + ", cost:" + (currentTimeMillis() - s)); s = currentTimeMillis(); counter = TreeMultiset.create(); for (int i = 0; i < 100000; i++) { counter.add(failover.getOneAvailable()); } // old 60~70 System.out.println(counter + ", cost:" + (currentTimeMillis() - s)); }
public static void main(String[] args) { // Parse text to separate words String INPUT_TEXT = "Hello World! Hello All! Hi World!"; // Create Multiset Multiset<String> multiset = TreeMultiset.create(Arrays.asList(INPUT_TEXT.split(" "))); // Print count words System.out.println(multiset); // print [All!, Hello x 2, Hi, World! x 2]- in natural (alphabet) order // Print all unique words System.out.println(multiset.elementSet()); // print [All!, Hello, Hi, World!]- in natural (alphabet) order // Print count occurrences of words System.out.println("Hello = " + multiset.count("Hello")); // print 2 System.out.println("World = " + multiset.count("World!")); // print 2 System.out.println("All = " + multiset.count("All!")); // print 1 System.out.println("Hi = " + multiset.count("Hi")); // print 1 System.out.println("Empty = " + multiset.count("Empty")); // print 0 // Print count all words System.out.println(multiset.size()); //print 6 // Print count unique words System.out.println(multiset.elementSet().size()); //print 4 }
public static void main(String[] args) { // Разберем текст на слова String INPUT_TEXT = "Hello World! Hello All! Hi World!"; // Создаем Multiset Multiset<String> multiset = TreeMultiset.create(Arrays.asList(INPUT_TEXT.split(" "))); // Выводим кол-вом вхождений слов System.out.println(multiset); // напечатает [All!, Hello x 2, Hi, World! x 2]- в алфавитном порядке // Выводим все уникальные слова System.out.println(multiset.elementSet()); // напечатает [All!, Hello, Hi, World!]- в алфавитном порядке // Выводим количество по каждому слову System.out.println("Hello = " + multiset.count("Hello")); // напечатает 2 System.out.println("World = " + multiset.count("World!")); // напечатает 2 System.out.println("All = " + multiset.count("All!")); // напечатает 1 System.out.println("Hi = " + multiset.count("Hi")); // напечатает 1 System.out.println("Empty = " + multiset.count("Empty")); // напечатает 0 // Выводим общее количества всех слов в тексте System.out.println(multiset.size()); //напечатает 6 // Выводим общее количество всех уникальных слов System.out.println(multiset.elementSet().size()); //напечатает 4 }
/** * @param relevantNgrams * @param currentName * @return */ public Multiset<String> getAlternativeNames( final Multiset<NGram<String>> relevantNgrams, final String currentName) { // Get all alternative namings final Multiset<String> nameAlternatives = ngramLM .getAlternativeNamings(relevantNgrams, WILDCARD_TOKEN); nameAlternatives.add(currentName); // Give the current identifier a // chance... // Prune naming alternatives final Multiset<String> toKeep = TreeMultiset.create(); int seen = 0; for (final Entry<String> ent : Multisets.copyHighestCountFirst( nameAlternatives).entrySet()) { if (seen > 1000) { break; } toKeep.add(ent.getElement(), ent.getCount()); seen++; } toKeep.add(AbstractNGramLM.UNK_SYMBOL); return toKeep; }
@Test public void testPartitionSpread() throws Exception { Multiset<Integer> results = TreeMultiset.create(); Cluster c = Cluster.empty(); try (Partitioner p = new DefaultPartitioner()) { PartitionKeyGenerator pkg = new PartitionKeyGenerator(); mockPartitions(c); for (int i = 0; i < messages; i++) { results.add(p.partition("test", null, pkg.next(), null, null, c)); } int expected = messages / partitions; double threshold = expected * 0.05; for (Multiset.Entry<Integer> e : results.entrySet()) { int offBy = Math.abs(e.getCount() - expected); assertTrue("Partition " + e.getElement() + " had " + e.getCount() + " elements, expected " + expected + ", threshold is " + threshold, offBy < threshold); } } }
public void testSerialization() { BeanWithMultisetTypes bean = new BeanWithMultisetTypes(); List<String> list = Arrays.asList( "foo", "abc", null, "abc" ); List<String> listWithNonNull = Arrays.asList( "foo", "abc", "bar", "abc" ); bean.multiset = LinkedHashMultiset.create( list ); bean.hashMultiset = HashMultiset.create( Arrays.asList( "abc", "abc" ) ); bean.linkedHashMultiset = LinkedHashMultiset.create( list ); bean.sortedMultiset = TreeMultiset.create( listWithNonNull ); bean.treeMultiset = TreeMultiset.create( listWithNonNull ); bean.immutableMultiset = ImmutableMultiset.copyOf( listWithNonNull ); bean.enumMultiset = EnumMultiset.create( Arrays.asList( AlphaEnum.B, AlphaEnum.A, AlphaEnum.D, AlphaEnum.A ) ); String expected = "{" + "\"multiset\":[\"foo\",\"abc\",\"abc\",null]," + "\"hashMultiset\":[\"abc\",\"abc\"]," + "\"linkedHashMultiset\":[\"foo\",\"abc\",\"abc\",null]," + "\"sortedMultiset\":[\"abc\",\"abc\",\"bar\",\"foo\"]," + "\"treeMultiset\":[\"abc\",\"abc\",\"bar\",\"foo\"]," + "\"immutableMultiset\":[\"foo\",\"abc\",\"abc\",\"bar\"]," + "\"enumMultiset\":[\"A\",\"A\",\"B\",\"D\"]" + "}"; assertEquals( expected, BeanWithMultisetTypesMapper.INSTANCE.write( bean ) ); }
public void testDeserialization() { String input = "{" + "\"multiset\":[\"foo\",\"abc\",\"abc\",null]," + "\"hashMultiset\":[\"abc\",\"abc\"]," + "\"linkedHashMultiset\":[\"foo\",\"abc\",\"abc\",null]," + "\"sortedMultiset\":[\"foo\",\"abc\",\"bar\",\"abc\",null]," + "\"treeMultiset\":[\"bar\",\"abc\",\"abc\",\"foo\",null]," + "\"immutableMultiset\":[\"foo\",\"abc\",\"abc\",\"bar\",null]," + "\"enumMultiset\":[\"B\",\"A\",\"A\",\"D\",null]" + "}"; BeanWithMultisetTypes result = BeanWithMultisetTypesMapper.INSTANCE.read( input ); assertNotNull( result ); List<String> expectedList = Arrays.asList( "foo", "abc", null, "abc" ); List<String> expectedListWithNonNull = Arrays.asList( "foo", "abc", "bar", "abc" ); assertEquals( LinkedHashMultiset.create( expectedList ), result.multiset ); assertEquals( HashMultiset.create( Arrays.asList( "abc", "abc" ) ), result.hashMultiset ); assertEquals( LinkedHashMultiset.create( expectedList ), result.linkedHashMultiset ); assertEquals( TreeMultiset.create( expectedListWithNonNull ), result.sortedMultiset ); assertEquals( TreeMultiset.create( expectedListWithNonNull ), result.treeMultiset ); assertEquals( ImmutableMultiset.copyOf( expectedListWithNonNull ), result.immutableMultiset ); assertEquals( EnumMultiset.create( Arrays.asList( AlphaEnum.B, AlphaEnum.A, AlphaEnum.D, AlphaEnum.A ) ), result.enumMultiset ); }
@Test public void testSchema() throws IOException { SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema014.json"), Charsets.UTF_8).read()); Multiset<String> prefixCounts = TreeMultiset.create(); Multiset<String> otherCounts = TreeMultiset.create(); for (int i = 0; i < 100; i++) { JsonNode r = s.sample(); assertEquals(r.get("v1").asText(), r.get("v2").get("VIN").asText()); prefixCounts.add(r.get("v1").asText().substring(0, 2)); otherCounts.add(r.get("v3").asText().substring(0, 2)); System.out.printf("%s\n", r); } assertEquals("[1F, 2F, 3F]", prefixCounts.elementSet().toString()); assertEquals("[2F, 3F]", otherCounts.elementSet().toString()); }
private <T extends Comparable<?>> Multiset<T> selectNext(RandomSelector<T> selector, Random random, int size) { Multiset<T> selectedElements = TreeMultiset.create(); for (int i = 0; i < size; i++) { selectedElements.add(selector.next(random)); } return selectedElements; }
public void visit(NodeTraversal traversal, Node node, Node parent) { Collection<Definition> defs = passUnderTest.getDefinitionsReferencedAt(node); if (defs != null) { StringBuilder sb = new StringBuilder(); sb.append("USE "); sb.append(Token.name(node.getType())); sb.append(" "); sb.append(node.getQualifiedName()); sb.append(" -> "); Multiset<String> defstrs = TreeMultiset.create(); for (Definition def : defs) { String defstr; Node rValue = def.getRValue(); if (rValue != null) { defstr = Token.name(rValue.getType()); } else { defstr = "<null>"; } if (def.isExtern()) { defstr = "EXTERN " + defstr; } defstrs.add(defstr); } sb.append(defstrs.toString()); found.add(sb.toString()); } }
@Override public SortedMultiset<Integer> computeGrammarTreeSizeStats() { // Get tree size distribution. final SortedMultiset<Integer> treeSizes = TreeMultiset.create(); for (final Entry<T, ConcurrentHashMultiset<TreeNode<T>>> entry : grammar .entrySet()) { for (final Multiset.Entry<TreeNode<T>> rule : entry.getValue() .entrySet()) { treeSizes.add(rule.getElement().getTreeSize(), rule.getCount()); } } return treeSizes; }
public PrecisionRecallStats() { // Initialize for (int i = 0; i < RANK_K_VALUES.length; i++) { suggestionsValuesAtRank.put(RANK_K_VALUES[i], TreeMultiset.<Double> create()); correctSuggestionsValuesAtRank.put(RANK_K_VALUES[i], TreeMultiset.<Double> create()); } }
public void printStatisticsFor(final int[] minPatternSizes, final int[] minPatternCounts) { Arrays.sort(minPatternCounts); Arrays.sort(minPatternSizes); loadPatternsForFiles(); Multiset<Integer> prunedByCount = TreeMultiset.create(); for (final Entry<TreeNode<Integer>> pattern : patterns.entrySet()) { prunedByCount.add(patternDictionary.get(pattern.getElement()), pattern.getCount()); } System.out .println("minCount,minSize,nSeenPatterns,patternRecall,avgPatternSizeSeen,avgCoverage,avgFileRecall,avgSitesMatched,avgPatternSizePerFile"); for (int i = 0; i < minPatternCounts.length; i++) { prunedByCount = getRulesWithMinCount(prunedByCount, minPatternCounts[i]); Multiset<Integer> prunedByCountBySize = prunedByCount; for (int j = 0; j < minPatternSizes.length; j++) { prunedByCountBySize = getRulesWithMinSize(prunedByCountBySize, minPatternSizes[j]); // Great now our patterns are in prunedByCountBySize printPatternStatistics(prunedByCountBySize, minPatternCounts[i], minPatternSizes[j]); } } }
public Multiset<String> getAlternativeNamings( final Multiset<NGram<String>> ngrams, final String tokenToSubstitute) { final Multiset<String> namings = TreeMultiset.create(); final LongTrie<String> globalTrie = getTrie(); for (final Multiset.Entry<NGram<String>> ngramEntry : ngrams.entrySet()) { final NGram<String> ngram = ngramEntry.getElement(); final Set<String> alternatives = checkNotNull(getAlternativesForNGram( globalTrie, ngram, tokenToSubstitute)); namings.addAll(alternatives); } return namings; }
/** * @param prefix */ private double getResidualProbability(final NGram<String> prefix) { final TrieNode<Long> prefixU = trie.getNGramNodeForInput(prefix, true); // now for all these ngrams get their counts and sum their katz final TreeMultiset<Long> counts = TreeMultiset.create(); final Long unkSymbolId = trie.getUnkSymbolId(); for (final Entry<Long, TrieNode<Long>> child : prefixU.prods.entrySet()) { if (child.getKey().equals(unkSymbolId)) { continue; } counts.add(child.getValue().count); } // get the Katz counts and sum them up double katzCountSum = 0; for (final com.google.common.collect.Multiset.Entry<Long> entry : counts .entrySet()) { katzCountSum += getKatzCount(entry.getElement(), prefix.size() + 1) * (entry.getCount()); } final double residual = 1. - katzCountSum / (prefixU.count - prefixU.terminateHere); // There are cases where no probability mass is left checkArgument(residual >= 0); checkArgument(residual <= 1); return residual; }
@Override public void visit(NodeTraversal traversal, Node node, Node parent) { Collection<Definition> defs = passUnderTest.getDefinitionsReferencedAt(node); if (defs != null) { StringBuilder sb = new StringBuilder(); sb.append("USE "); sb.append(Token.name(node.getType())); sb.append(" "); sb.append(node.getQualifiedName()); sb.append(" -> "); Multiset<String> defstrs = TreeMultiset.create(); for (Definition def : defs) { String defstr; Node rValue = def.getRValue(); if (rValue != null) { defstr = Token.name(rValue.getType()); } else { defstr = "<null>"; } if (def.isExtern()) { defstr = "EXTERN " + defstr; } defstrs.add(defstr); } sb.append(defstrs.toString()); found.add(sb.toString()); } }
@Test public void testMultiSet_3() { final Multiset<String> expected = TreeMultiset.create(); expected.addAll( TestHelper.STRINGS ); final Multiset<String> actual = TestHelper.STRINGS.parallelStream().collect( GuavaCollectors.toMultiSet( TreeMultiset::create ) ); assertEquals( expected, actual ); }
public RunningPercentile(int windowSize, int percentile) { this.windowSize = windowSize; this.percentile = percentile; this.lowerTimes = TreeMultiset.create(); this.upperTimes = TreeMultiset.create(); this.times = new long[windowSize]; }
static CompletedTimeTrackerImpl createUsingTreeMultiSet() { Function2<TreeMultiset<Long>,Long,Long,RuntimeException> removeTimesLowerThanAndReturnHighestRemovedFun = new Function2<TreeMultiset<Long>,Long,Long,RuntimeException>() { @Override public Long apply( TreeMultiset<Long> completedTimesAsMilli, Long timeAsMilli ) { long highestRemovedAsMilli = -1; for ( long completedTimeAsMilli : completedTimesAsMilli ) { if ( completedTimeAsMilli < timeAsMilli ) { completedTimesAsMilli.remove( completedTimeAsMilli ); highestRemovedAsMilli = completedTimeAsMilli; } else { break; } } return highestRemovedAsMilli; } }; return new CompletedTimeTrackerImpl( TreeMultiset.<Long>create(), removeTimesLowerThanAndReturnHighestRemovedFun ); }
static InitiatedTimeTrackerImpl createUsingTreeMultiSet() { Function1<TreeMultiset<Long>,Long,RuntimeException> getLastKnownLowestInitiatedTimeFun = new Function1<TreeMultiset<Long>,Long,RuntimeException>() { @Override public Long apply( TreeMultiset<Long> initiatedTimesAsMilli ) { return initiatedTimesAsMilli.firstEntry().getElement(); } }; return new InitiatedTimeTrackerImpl( TreeMultiset.<Long>create(), getLastKnownLowestInitiatedTimeFun ); }
public static <T> void assertPailContents(Pail<T> pail, T... objects) { TreeMultiset contains = getPailContents(pail); TreeMultiset other = TreeMultiset.create(); for(T obj: objects) { other.add(obj); } Assert.assertEquals(failureString(other, contains), other, contains); }
public static void assertPailContents(Pail pail, List objects) { TreeMultiset contains = getPailContents(pail); TreeMultiset other = TreeMultiset.create(); for(Object obj: objects) { other.add(obj); } for(Object o: contains) { } Assert.assertEquals(failureString(other, contains), other, contains); }
public static <T> TreeMultiset<T> getPailContents(Pail<T> pail) { TreeMultiset contains = TreeMultiset.create(); for(T obj: pail) { contains.add(obj); } return contains; }
@Generates private static <E extends Comparable<E>> TreeMultiset<E> generateTreeMultiset( E freshElement) { TreeMultiset<E> multiset = TreeMultiset.create(); multiset.add(freshElement); return multiset; }
public void testTreeMultiset() { assertFreshInstance(new TypeToken<TreeMultiset<String>>() {}); }
public List<Person> getPersons() { TreeMultiset<Person> persons = TreeMultiset.create(new PersonFirstNameComparator()); persons.addAll(teacherManager.getAllTeachers()); persons.addAll(studentManager.getAllStudents()); return ImmutableList.copyOf(persons); }
@Override protected void execute() throws Exception { // If we have raw data rules, parse the input just for validation if (rules.values().stream().anyMatch(r -> r instanceof RawEntryRule || r instanceof RawNodeRule)) { Stopwatch readTimer = Stopwatch.createStarted(); for (Map.Entry<URI, ByteSource> input : graphTool().getInputs().entrySet()) { BdioOptions.Builder options = new BdioOptions.Builder(); GraphTool.setContentType(input.getKey(), options::forContentType); RxJavaBdioDocument doc = new RxJavaBdioDocument(options.build()); doc.read(input.getValue().openStream()) .doOnNext(this::executeWithRawEntry) .flatMapIterable(BdioDocument::toGraphNodes) .doOnNext(this::executeWithRawNode) .subscribe(); } printDebugMessage("Time to read BDIO input: %s%n", readTimer.stop()); } // If we have graph rules, delegate to the super to load the graph if (rules.values().stream().anyMatch(r -> r instanceof LoadedGraphRule || r instanceof CompletedGraphRule)) { super.execute(); } // If we have any violations, report them Multiset<Severity> severityCounts = TreeMultiset.create(); Multiset<Class<?>> ruleCounts = HashMultiset.create(); for (Violation violation : violations) { severityCounts.add(violation.severity()); int ruleCount = ruleCounts.add(violation.rule().getClass(), 1); if (ruleCount < maxViolations) { printOutput("%s: %s: [%s] %s%n", violation.target(), violation.severity(), violation.rule().getClass().getSimpleName(), violation.message()); } else if (ruleCount == maxViolations) { printOutput("[%s] limit reached, further occurances of this volation will be suppressed%n", violation.rule().getClass().getSimpleName()); } } if (!severityCounts.isEmpty()) { printOutput(severityCounts.entrySet().stream() .map(e -> MessageFormat.format("{0,choice,1#1 {1}|1<{0,number,integer} {1}s}", e.getCount(), e.getElement())) .collect(Collectors.joining("%n", "", "%n"))); } printDebugMessage("%d rules executed%n", rules.size()); }
@Override public TreeMultiset createInstance(Type type) { return TreeMultiset.create(); }
public TreeMultiset<Reminder> getReminderSet() { return reminderSet; }
/** * Push results. * * @param realPatternIds * @param suggestions */ public synchronized void pushResults(final Set<Integer> realPatternIds, final SortedSet<Suggestion> suggestions) { int currentK = 1; int currentRankIdx = 0; boolean foundPattern = false; double scoreFound = Double.NEGATIVE_INFINITY; final Pair<SortedMultiset<Double>, SortedMultiset<Double>> fileSuggestions = Pair .<SortedMultiset<Double>, SortedMultiset<Double>> create( TreeMultiset.<Double> create(), TreeMultiset.<Double> create()); for (final Suggestion suggestion : suggestions) { if (realPatternIds.contains(suggestion.id) && !foundPattern) { foundPattern = true; scoreFound = suggestion.score; } checkArgument(currentK <= RANK_K_VALUES[currentRankIdx], "CurrentK is %s but we still haven't evaluated idx %s", currentK, currentRankIdx); if (RANK_K_VALUES[currentRankIdx] == currentK) { // Push the results so far. if (foundPattern) { suggestionsValuesAtRank.get(currentK).add(scoreFound); correctSuggestionsValuesAtRank.get(currentK).add( scoreFound); checkArgument(suggestion.score <= scoreFound, "Score is %s but best is %s", suggestion.score, scoreFound); } else { suggestionsValuesAtRank.get(currentK).add( suggestion.score); } currentRankIdx++; } // Precision Stats allSuggestionsScores.add(suggestion.score); fileSuggestions.first.add(suggestion.score); if (realPatternIds.contains(suggestion.id)) { fileSuggestions.second.add(suggestion.score); } currentK++; if (currentRankIdx >= RANK_K_VALUES.length) { break; } } suggestionPrecision.add(fileSuggestions); }
private static long getGreatestIndexCommittedByMajority(Set<Long> peers, Map<Long, Long> peersLastAckedIndex) { SortedMultiset<Long> committedIndexes = TreeMultiset.create(); committedIndexes.addAll(peers.stream().map(peerId -> peersLastAckedIndex.getOrDefault(peerId, 0L)).collect(Collectors.toList())); return Iterables.get(committedIndexes.descendingMultiset(), calculateNumericalMajority(peers.size()) - 1); }