/** * transforms paragraph vectors into hierarchical vectors * @param iterator iterator over docs * @param lookupTable the paragraph vector table * @param labels the labels * @param k the no. of centroids * @return a map doc->hierarchical vector */ static Map<String, INDArray> getPar2Hier(LabelAwareIterator iterator, WeightLookupTable<VocabWord> lookupTable, List<String> labels, int k, Method method) { Collections.sort(labels); LabelsSource labelsSource = iterator.getLabelsSource(); PatriciaTrie<String> trie = new PatriciaTrie<>(); for (String label : labels) { trie.put(label, label); } Map<String, INDArray> hvs = new TreeMap<>(); // for each doc for (String node : labelsSource.getLabels()) { Par2HierUtils.getPar2HierVector(lookupTable, trie, node, k, hvs, method); } return hvs; }
private PartOfSpeechTagging() { wordTagTrie = new PatriciaTrie<PartOfSpeech>(); try { int count = load(DEFAULT_TAG_FILE, wordTagTrie); System.out.println(String.format(Locale.getDefault(), "load default word tags from %s complete, size %d", DEFAULT_TAG_FILE, count)); if (new File(CUSTOM_TAG_FILE).exists()) { count = load(CUSTOM_TAG_FILE, wordTagTrie); System.out.println(String.format(Locale.getDefault(), "load custom word tags from %s complete, size %d", CUSTOM_TAG_FILE, count)); } else { System.out.println(String.format(Locale.getDefault(), "custom word tags file not exists %s", CUSTOM_TAG_FILE)); } } catch (Exception e) { throw new RuntimeException(e); } }
public void bindToViewModel(PatriciaTrie<ContentProposal> proposals, String[] sortedNames) { final TagListContentProposalProvider proposalProvider = new TagListContentProposalProvider(proposals); ContentProposalAdapter proposalAdapter = new ContentProposalAdapter( tagEntry, new ComboContentAdapter(), proposalProvider, null, null); proposalAdapter.setPropagateKeys(true); proposalAdapter.setProposalAcceptanceStyle(ContentProposalAdapter.PROPOSAL_REPLACE); proposalAdapter.addContentProposalListener(new IContentProposalListener() { @Override public void proposalAccepted(IContentProposal userSelectedProposal) { if (proposalProvider.getMakeNewProposal() == userSelectedProposal) { for (TagListSelectedEventHandler handler: eventHandlers) { handler.requestNewTag(userSelectedProposal.getContent()); clearText(); } } addTagInternal(userSelectedProposal.getContent()); } }); tagEntry.setItems(sortedNames); }
@Override public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { super.init(source, options, env); validateOptions(options); ageoffs = new PatriciaTrie<>(); options.forEach((k, v) -> { if (k.startsWith(AGE_OFF_PREFIX)) { String name = k.substring(AGE_OFF_PREFIX.length()); LOG.trace("Adding {} to Trie with value {}", name, Long.parseLong(v)); long ageoff = Long.parseLong(v); this.minAgeOff = Math.min(this.minAgeOff, ageoff); this.maxAgeOff = Math.max(this.maxAgeOff, ageoff); ageoffs.put(name, ageoff); } }); defaultAgeOff = ageoffs.get(DEFAULT_AGEOFF_KEY); currentTime = System.currentTimeMillis(); }
@Override public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { super.init(source, options, env); validateOptions(options); ageoffs = new PatriciaTrie<>(); options.forEach((k, v) -> { if (k.startsWith(AGE_OFF_PREFIX)) { String name = k.substring(AGE_OFF_PREFIX.length()); LOG.trace("Adding {} to Trie with value", name, Long.parseLong(v)); long ageoff = Long.parseLong(v); this.minAgeOff = Math.min(this.minAgeOff, ageoff); this.maxAgeOff = Math.max(this.maxAgeOff, ageoff); ageoffs.put(name, ageoff); } }); defaultAgeOff = ageoffs.get(DEFAULT_AGEOFF_KEY); currentTime = System.currentTimeMillis(); }
private TelNoNormalizer create() { try { Trie<String, Integer> trie = new PatriciaTrie<>(); trie.putAll(parse("soumu/000124070.xls")); trie.putAll(parse("soumu/000124071.xls")); trie.putAll(parse("soumu/000124072.xls")); trie.putAll(parse("soumu/000124073.xls")); trie.putAll(parse("soumu/000124074.xls")); trie.putAll(parse("soumu/000124075.xls")); trie.putAll(parse("soumu/000124076.xls")); trie.putAll(parse("soumu/000124077.xls")); TelNoNormalizerImpl impl = new TelNoNormalizerImpl(); impl.setAreaCodeTable(trie); return impl; } catch (InvalidFormatException | IOException ex) { throw new IllegalStateException(ex); } }
public static void load() { InputStream is = Checker.class.getResourceAsStream("/bg_BG.dic"); List<String> lines = null; try { lines = IOUtils.readLines(is, "utf-8"); } catch (IOException ex) { throw new IllegalStateException(ex); } finally { IOUtils.closeQuietly(is); } dictionary = new PatriciaTrie<Set<String>>(); for (String line : lines) { int paradigmIdx = line.indexOf("/"); if (paradigmIdx != -1) { String inflectionClasses = line.substring(paradigmIdx + 1); // /AK is possible, i.e. multiple infl. classes per word dictionary.put(line.substring(0, paradigmIdx).toLowerCase(), Sets.newHashSet(charToStringArray(inflectionClasses.toCharArray()))); } else { dictionary.put(line.toLowerCase(), Collections.<String>emptySet()); } } }
public void addChildren(SpringConfigurationMetadataGroup group, String[] pathSegments, String belongsTo) { int startIndex = computeStartIndexAndAddSourcePath(); addSourcePathTillRoot(belongsTo); // Case where alpha.childNode11.charlie is already added via source 1 & source2 tries to add a group for alpha.childNode11 if (startIndex >= pathSegments.length) { if (this.group == null) { this.group = group; } } else { if (children == null) { children = new HashSet<>(); sanitisedChildTrie = new PatriciaTrie<>(); } String pathSegment = pathSegments[startIndex]; String sanitizedPathSegment = sanitize(pathSegment); MetadataNode childNode = MetadataNode.newInstance(pathSegment, this, belongsTo); // If this is the last segment, lets set group boolean noMoreSegmentsLeft = startIndex == pathSegments.length - 1; if (noMoreSegmentsLeft) { childNode.setGroup(group); } children.add(childNode); childNode.setParent(this); sanitisedChildTrie.put(sanitizedPathSegment, childNode); childNode.addChildren(group, pathSegments, belongsTo); } }
public void addChildren(SpringConfigurationMetadataProperty property, String[] pathSegments, String belongsTo) { int startIndex = computeStartIndexAndAddSourcePath(); addSourcePathTillRoot(belongsTo); // Case where alpha.childNode11.charlie is already added via source 1 & source2 tries to add a group for alpha.childNode11 if (startIndex >= pathSegments.length) { if (this.property == null) { this.property = property; } } else { if (children == null) { children = new HashSet<>(); sanitisedChildTrie = new PatriciaTrie<>(); } String pathSegment = pathSegments[startIndex]; String sanitizedPathSegment = sanitize(pathSegment); MetadataNode childNode = MetadataNode.newInstance(pathSegment, this, belongsTo); // If this is the last segment, lets set path boolean noMoreSegmentsLeft = startIndex == pathSegments.length - 1; if (noMoreSegmentsLeft) { childNode.setProperty(property); } children.add(childNode); childNode.setParent(this); sanitisedChildTrie.put(sanitizedPathSegment, childNode); childNode.addChildren(property, pathSegments, belongsTo); } }
SuggestionIndexServiceImpl() { projectSeenContainerPathToContainerInfo = new HashMap<>(); projectSanitisedRootSearchIndex = new PatriciaTrie<>(); moduleNameToSeenContainerPathToContainerInfo = new HashMap<>(); moduleNameToSanitisedRootSearchIndex = new HashMap<>(); }
private void reindexModule(List<ContainerInfo> newProjectSourcesToProcess, List<ContainerInfo> projectContainersToRemove, Module module) { Map<String, ContainerInfo> moduleSeenContainerPathToSeenContainerInfo = moduleNameToSeenContainerPathToContainerInfo .computeIfAbsent(module.getName(), k -> new HashMap<>()); Trie<String, MetadataNode> moduleSanitisedRootSearchIndex = moduleNameToSanitisedRootSearchIndex.get(module.getName()); if (moduleSanitisedRootSearchIndex == null) { moduleSanitisedRootSearchIndex = new PatriciaTrie<>(); moduleNameToSanitisedRootSearchIndex.put(module.getName(), moduleSanitisedRootSearchIndex); } OrderEnumerator moduleOrderEnumerator = OrderEnumerator.orderEntries(module); List<ContainerInfo> newModuleContainersToProcess = computeNewContainersToProcess(moduleOrderEnumerator, moduleSeenContainerPathToSeenContainerInfo); newModuleContainersToProcess.addAll(newProjectSourcesToProcess); List<ContainerInfo> moduleContainersToRemove = computeContainersToRemove(moduleOrderEnumerator, moduleSeenContainerPathToSeenContainerInfo); moduleContainersToRemove.addAll(projectContainersToRemove); processContainers(newModuleContainersToProcess, moduleContainersToRemove, moduleSeenContainerPathToSeenContainerInfo, moduleSanitisedRootSearchIndex); }
public TasksNavigationModel(TasksModel parentModel) { this.parentModel = parentModel; this.parentModel.setTasksNavigationModel(this); this.availableTags = new PatriciaTrie<String>(); this.paginationModel = new TasksPaginationModel(); this.tagSuggestionListeners = new ArrayList<TagSuggestionListener>(); this.tasksUpdatedListeners = new ArrayList<TasksUpdatedListener>(); }
private PatriciaTrie<AutoCompleteType> initializeIndex() { Map<String, AutoCompleteType> index = new HashMap<>(); index.put("==", AutoCompleteType.TOKEN); index.put(">=", AutoCompleteType.TOKEN); index.put("<=", AutoCompleteType.TOKEN); return new PatriciaTrie<>(index); }
/** * 局番割当データ (キー「局番 (6桁)」、値「市外局番の長さ」のマップ) を取得する。 * * @return 局番割当データ (キー「局番 (6桁)」、値「市外局番の長さ」のマップ)。 * @throws InvalidFormatException 局番割当ファイルの形式が不正。 * @throws IOException 局番割当ファイルの読込み異常。 */ @Override public Trie<String, Integer> getObject() throws InvalidFormatException, IOException { Trie<String, Integer> trie = new PatriciaTrie<>(); for (Resource r : resources) { try (InputStream in = r.getInputStream()) { Map<String, Pair<String, String>> map = soumuExcelParser.parse(in); for (Map.Entry<String, Pair<String, String>> entry : map.entrySet()) { trie.put(entry.getKey(), entry.getValue().getLeft().length()); } } } return trie; }
@PostConstruct public void init() { reverse = new PatriciaTrie<>(); for (Entry<String, Checker.InflectedFormType> entry: Checker.formsDictionary.entrySet()) { //using a StringBuilder so that no entry is placed in the jvm string pool String key = new StringBuilder(entry.getKey()).reverse().substring(0, Math.min(5, entry.getKey().length())); List<String> list = reverse.get(key); if (list == null) { list = new ArrayList<String>(); reverse.put(key, list); } list.add(entry.getKey()); } }
/** * Constructor which uses the toString() method of some values to get a string representation. * @param values A collection containing all possible values. */ @SafeVarargs public AbstractMapParameterTransformation(T... values) { this.map = new PatriciaTrie<>(); for (T value : values) { T old = map.put(value.toString().toLowerCase(), value); if (old != null) throw new IllegalArgumentException("Duplicate values for " + value.toString() + " and " + old.toString()); } }
/** * base case: on a leaf hv = pv * on a non-leaf node with n children: hv = pv + k centroids of the n hv */ private static INDArray getPar2HierVector(WeightLookupTable<VocabWord> lookupTable, PatriciaTrie<String> trie, String node, int k, Map<String, INDArray> hvs, Method method) { if (hvs.containsKey(node)) { return hvs.get(node); } INDArray hv = lookupTable.vector(node); String[] split = node.split(REGEX); Collection<String> descendants = new HashSet<>(); if (split.length == 2) { String separator = "."; String prefix = node.substring(0, node.indexOf(split[1])) + separator; SortedMap<String, String> sortedMap = trie.prefixMap(prefix); for (Map.Entry<String, String> entry : sortedMap.entrySet()) { if (prefix.lastIndexOf(separator) == entry.getKey().lastIndexOf(separator)) { descendants.add(entry.getValue()); } } } else { descendants = Collections.emptyList(); } if (descendants.size() == 0) { // just the pv hvs.put(node, hv); return hv; } else { INDArray chvs = Nd4j.zeros(descendants.size(), hv.columns()); int i = 0; for (String desc : descendants) { // child hierarchical vector INDArray chv = getPar2HierVector(lookupTable, trie, desc, k, hvs, method); chvs.putRow(i, chv); i++; } double[][] centroids; if (chvs.rows() > k) { centroids = Par2HierUtils.getTruncatedVT(chvs, k); } else if (chvs.rows() == 1) { centroids = Par2HierUtils.getDoubles(chvs.getRow(0)); } else { centroids = Par2HierUtils.getTruncatedVT(chvs, 1); } switch (method) { case CLUSTER: INDArray matrix = Nd4j.zeros(centroids.length + 1, hv.columns()); matrix.putRow(0, hv); for (int c = 0; c < centroids.length; c++) { matrix.putRow(c + 1, Nd4j.create(centroids[c])); } hv = Nd4j.create(Par2HierUtils.getTruncatedVT(matrix, 1)); break; case SUM: for (double[] centroid : centroids) { hv.addi(Nd4j.create(centroid)); } break; } hvs.put(node, hv); return hv; } }
public InterledgerPrefixMap() { this.prefixMap = new PatriciaTrie<>(); }
public PatriciaTrie<ContentProposal> getContentProposals() { return contentProposals; }
public TagListContentProposalProvider( PatriciaTrie<ContentProposal> contentProposals) { this.contentProposals = contentProposals; }
void beforeUnmarshal(Unmarshaller unmarshaller, Object parent) { typeTrie = new PatriciaTrie<>(); }
void beforeUnmarshal(Unmarshaller unmarshaller, Object parent) { constructorTrie = new PatriciaTrie<>(); methodTrie = new PatriciaTrie<>(); propertyTrie = new PatriciaTrie<>(); }
void beforeUnmarshal(Unmarshaller unmarshaller, Object parent) { namespaceTrie = new PatriciaTrie<>(); }
public static void main(String[] args) throws Exception { Checker c = new Checker(); c.initialize(); Trie<String, InflectedFormType> reverse = new PatriciaTrie<>(); for (Entry<String, Checker.InflectedFormType> entry: Checker.formsDictionary.entrySet()) { //using a StringBuilder so that no entry is placed in the jvm string pool String key = new StringBuilder(entry.getKey()).reverse().toString(); reverse.put(key, entry.getValue()); } FileOutputStream fos = new FileOutputStream("c:/var/echos.txt"); OutputStreamWriter out = new OutputStreamWriter(fos, "utf-8"); for (String form : c.formsDictionary.keySet()) { if (form.length() > 2) { String reversedForm = StringUtils.reverse(form); Set<String> echoesReversed = reverse.prefixMap(reversedForm).keySet(); StringBuilder sb = new StringBuilder(); String delim = ""; for (String echoReversed : echoesReversed) { String echo = StringUtils.reverse(echoReversed); // exclude the same word and any word that is formed directly from it and another word or common prefix String diff = echo.replace(form, ""); if (diff.length() == 1) { diff = ""; // ignore 1-letter diffs } if (form.equals("античен")) { diff = ""; } if (!echo.equals(form) && !c.formsDictionary.containsKey(diff) && !commonPrefixes.contains(diff)) { sb.append(delim + echo); delim = ", "; } } if (sb.length() > 0) { sb.insert(0, form + ": "); sb.append("\r\n"); } out.write(sb.toString()); } } out.close(); }
private static void checkAndInsertHomonym(String word, PatriciaTrie<InflectedFormType> formsDictionary, Set<String> homonyms, InflectedFormType type, String baseForm) { if (formsDictionary.containsKey(word)) { String originalBase = mapping.get(word); InflectedFormType originalType = formsDictionary.get(word); if (baseForm != null && originalBase != null) { if (sameRoot(baseForm, originalBase) // heuristic based on length || ignore(baseForm, originalBase, "н", "м", 1, 1, type, originalType, false, true) // "шлифовам" и "шлифован", напр. || ignore(baseForm, originalBase, "я", "ен", 1, 2, type, originalType, true, false) // червя и червен || ignore(baseForm, originalBase, "ващ", "вам", 1, 1, type, originalType, false, true) || ignore(baseForm, originalBase, "вяне", "вям", 2, 1, type, originalType, false, true) || ignore(baseForm, originalBase, "ан", "а", 1, 0, type, originalType, false, true) || ignore(baseForm, originalBase, "ян", "а", 1, 0, type, originalType, false, true) || ignore(baseForm, originalBase, "ение", "а", 4, 1, type, originalType, false, true) || ignore(baseForm, originalBase, "я", "ение", 1, 4, type, originalType, true, false) || ignore(baseForm, originalBase, "ат", "а", 1, 0, type, originalType, false, true) || ignore(baseForm, originalBase, "ая", "ан", 1, 1, type, originalType, true, false) || ignore(baseForm, originalBase, "я", "ене", 1, 3, type, originalType, true, false) || ignore(baseForm, originalBase, "я", "ан", 1, 2, type, originalType, true, false) || ignore(baseForm, originalBase, "ен", "а", 2, 1, type, originalType, false, true) || ignore(baseForm, originalBase, "я", "ея", 1, 2, type, originalType, true, true) || ignore(baseForm, originalBase, "я", "ещ", 1, 2, type, originalType, true, false) || ignore(baseForm, originalBase, "ящ", "я", 2, 1, type, originalType, false, true) || ignore(baseForm, originalBase, "ещ", "а", 2, 1, type, originalType, false, true) || ignore(baseForm, originalBase, "ин", "", 2, 0, type, originalType, false, false) || ignore(baseForm, originalBase, "ия", "ил", 2, 2, type, originalType, true, false) || ignore(baseForm, originalBase, "ия", "ит", 2, 2, type, originalType, true, false) || ignore(baseForm, originalBase, "ял", "я", 1, 0, type, originalType, false, true) || ignore(baseForm, originalBase, "ям", "я", 1, 0, type, originalType, false, true)) { return; // омоними в основна форма - няма нужда от всичките им форми } else if (baseForm.equals(originalBase) && !word.equals(baseForm)) { return; } } System.out.println(word + " (" + type + "): " + originalType + " (base: " + baseForm + "), original base: " + mapping.get(word) + ")"); homonyms.add(word); } else { mapping.put(word, baseForm); } }
public static PatriciaTrie<String> getFormsDictionaryReferencingBaseForm() { PatriciaTrie<String> trie = new PatriciaTrie<>(); load(); for (Map.Entry<String, Set<String>> word : dictionary.entrySet()) { String baseForm = word.getKey(); if (word.getValue().isEmpty()) { trie.put(baseForm, baseForm); continue; } for (String inflectionClass : word.getValue()) { Multimap<String, String> inflections = inflectionClasses.get(inflectionClass); if (inflections == null) { trie.put(baseForm, baseForm); continue; } for (String ending : inflections.keySet()) { int endingIdx = baseForm.lastIndexOf(ending); if (!baseForm.endsWith(ending) || endingIdx == -1) { continue; } trie.put(baseForm, baseForm); for (String suffix : inflections.get(ending)) { String inflectedWord = baseForm.substring(0, endingIdx) + suffix; trie.put(inflectedWord, baseForm); } } } } // override the forms of the verb "to be" for (String sgForm : toBeFormsSg) { trie.put(sgForm, "съм"); } for (String plForm : toBeFormsPl) { trie.put(plForm, "съм"); } dictionary = null; // eligible for GC. TODO can merge these two load methods, but it's easier not to, for now return trie; }
public static void loadFormsDictionary() { formsDictionary = new PatriciaTrie<InflectedFormType>(); for (Map.Entry<String, Set<String>> word : dictionary.entrySet()) { String baseForm = word.getKey(); if (word.getValue().isEmpty()) { formsDictionary.put(baseForm, InflectedFormType.NOT_INFLECTABLE); continue; } for (String inflectionClass : word.getValue()) { Multimap<String, String> inflections = inflectionClasses.get(inflectionClass); if (inflections == null) { formsDictionary.put(baseForm, InflectedFormType.NOT_INFLECTABLE); continue; } boolean specialCaseNoun = false; if (baseForm.endsWith("й") && (inflectionClass.equals("O") || inflectionClass.equals("M"))) { specialCaseNoun = true; } boolean verb = verbClasses.contains(inflectionClass); for (String ending : inflections.keySet()) { int endingIdx = baseForm.lastIndexOf(ending); if (!baseForm.endsWith(ending) || endingIdx == -1) { continue; } formsDictionary.put(baseForm, getInflectedFormType(specialCaseNoun, verb, false)); Collection<String> pluralSuffixes = pluralInflectionClasses.get(inflectionClass).get(ending); for (String suffix : inflections.get(ending)) { String inflectedWord = baseForm.substring(0, endingIdx) + suffix; boolean isPlural = pluralSuffixes.contains(suffix); formsDictionary.put(inflectedWord, getInflectedFormType(specialCaseNoun, verb, isPlural)); } } } } // override the forms of the verb "to be" for (String sgForm : toBeFormsSg) { formsDictionary.put(sgForm, InflectedFormType.REGULAR_FORM_VERB); } for (String plForm : toBeFormsPl) { formsDictionary.put(plForm, InflectedFormType.PLURAL_FORM_VERB); } }
/** * Constructor. * @param values A map containing the values understood by this transformation. */ public AbstractMapParameterTransformation(Map<String, T> values) { this.map = new PatriciaTrie<>(values); }