Java 类org.apache.lucene.analysis.util.CharArrayMap 实例源码

项目:auto-phrase-tokenfilter    文件:AutoPhrasingTokenFilter.java   
private CharArrayMap convertPhraseSet( CharArraySet phraseSet ) {
CharArrayMap<CharArraySet> phraseMap = new CharArrayMap( 100, false);
Iterator<Object> phraseIt = phraseSet.iterator( ); 
while (phraseIt != null && phraseIt.hasNext() ) {
  char[] phrase = (char[])phraseIt.next();

  Log.debug( "'" + new String( phrase ) + "'" );

  char[] firstTerm = getFirstTerm( phrase );
  Log.debug( "'" + new String( firstTerm ) + "'" );

  CharArraySet itsPhrases = phraseMap.get( firstTerm, 0, firstTerm.length );
  if (itsPhrases == null) {
    itsPhrases = new CharArraySet( 5, false );
    phraseMap.put( new String( firstTerm ), itsPhrases );
     }

     itsPhrases.add( phrase );
}

return phraseMap;
 }
项目:NYBC    文件:HunspellDictionary.java   
/**
 * Reads the affix file through the provided InputStream, building up the prefix and suffix maps
 *
 * @param affixStream InputStream to read the content of the affix file from
 * @param decoder CharsetDecoder to decode the content of the file
 * @throws IOException Can be thrown while reading from the InputStream
 */
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, boolean strict) throws IOException, ParseException {
  prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
  suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);

  LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
  String line = null;
  while ((line = reader.readLine()) != null) {
    if (line.startsWith(ALIAS_KEY)) {
      parseAlias(line);
    } else if (line.startsWith(PREFIX_KEY)) {
      parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(SUFFIX_KEY)) {
      parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(FLAG_KEY)) {
      // Assume that the FLAG line comes before any prefix or suffixes
      // Store the strategy so it can be used when parsing the dic file
      flagParsingStrategy = getFlagParsingStrategy(line);
    }
  }
}
项目:read-open-source-code    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  this.matchVersion = matchVersion;
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_31)) {
    this.stemdict = null;
    this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
  } else {
    this.origStemdict = null;
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRef spare = new CharsRef();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare, iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
项目:read-open-source-code    文件:HunspellDictionary.java   
/**
 * Reads the affix file through the provided InputStream, building up the prefix and suffix maps
 *
 * @param affixStream InputStream to read the content of the affix file from
 * @param decoder CharsetDecoder to decode the content of the file
 * @throws IOException Can be thrown while reading from the InputStream
 */
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, boolean strict) throws IOException, ParseException {
  prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
  suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);

  LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
  String line = null;
  while ((line = reader.readLine()) != null) {
    if (line.startsWith(ALIAS_KEY)) {
      parseAlias(line);
    } else if (line.startsWith(PREFIX_KEY)) {
      parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(SUFFIX_KEY)) {
      parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(FLAG_KEY)) {
      // Assume that the FLAG line comes before any prefix or suffixes
      // Store the strategy so it can be used when parsing the dic file
      flagParsingStrategy = getFlagParsingStrategy(line);
    }
  }
}
项目:read-open-source-code    文件:HunspellDictionary.java   
/**
 * Reads the affix file through the provided InputStream, building up the prefix and suffix maps
 *
 * @param affixStream InputStream to read the content of the affix file from
 * @param decoder CharsetDecoder to decode the content of the file
 * @throws IOException Can be thrown while reading from the InputStream
 */
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, boolean strict) throws IOException, ParseException {
  prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
  suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);

  LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
  String line = null;
  while ((line = reader.readLine()) != null) {
    if (line.startsWith(ALIAS_KEY)) {
      parseAlias(line);
    } else if (line.startsWith(PREFIX_KEY)) {
      parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(SUFFIX_KEY)) {
      parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(FLAG_KEY)) {
      // Assume that the FLAG line comes before any prefix or suffixes
      // Store the strategy so it can be used when parsing the dic file
      flagParsingStrategy = getFlagParsingStrategy(line);
    }
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  this.matchVersion = matchVersion;
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_31)) {
    this.stemdict = null;
    this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
  } else {
    this.origStemdict = null;
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRef spare = new CharsRef();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare, iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:HunspellDictionary.java   
/**
 * Reads the affix file through the provided InputStream, building up the prefix and suffix maps
 *
 * @param affixStream InputStream to read the content of the affix file from
 * @param decoder CharsetDecoder to decode the content of the file
 * @throws IOException Can be thrown while reading from the InputStream
 */
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, boolean strict) throws IOException, ParseException {
  prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
  suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);

  LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
  String line = null;
  while ((line = reader.readLine()) != null) {
    if (line.startsWith(ALIAS_KEY)) {
      parseAlias(line);
    } else if (line.startsWith(PREFIX_KEY)) {
      parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(SUFFIX_KEY)) {
      parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, strict);
    } else if (line.startsWith(FLAG_KEY)) {
      // Assume that the FLAG line comes before any prefix or suffixes
      // Store the strategy so it can be used when parsing the dic file
      flagParsingStrategy = getFlagParsingStrategy(line);
    }
  }
}
项目:lams    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
      matchVersion.onOrAfter(Version.LUCENE_3_6) 
      ? DefaultSetHolder.DEFAULT_STEM_DICT 
      : CharArrayMap.<String>emptyMap());
}
项目:lams    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, stemExclusionTable,
      matchVersion.onOrAfter(Version.LUCENE_3_6)
      ? DefaultSetHolder.DEFAULT_STEM_DICT
      : CharArrayMap.<String>emptyMap());
}
项目:lams    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  setVersion(matchVersion);
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) {
    this.stemdict = null;
    this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
  } else {
    this.origStemdict = null;
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
项目:lams    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:theSemProject    文件:MyAnalyzer.java   
/**
 * Inizializza l'analizzatore sintattico per lingua
 *
 * @param language lingua
 * @param stopwords stop words
 * @param stemExclusionSet elenco dei termini che non deve essere sottoposto
 * a stemming
 * @param stemOverrideDict dizionario dei termini in overriding
 */
public MyAnalyzer(String language, CharArraySet stopwords, CharArraySet stemExclusionSet, CharArrayMap<String> stemOverrideDict) {
    super(stopwords);
    this.language = language;
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
    this.stemTable = DefaultSetHolder.DEFAULT_TABLE;
    if (stemOverrideDict.isEmpty()) {
        this.stemdict = null;
    } else {
        Builder builder = new Builder(false);
        EntryIterator iter = stemOverrideDict.entrySet().iterator();
        CharsRefBuilder spare = new CharsRefBuilder();

        while (iter.hasNext()) {
            char[] ex = iter.nextKey();
            spare.copyChars(ex, 0, ex.length);
            builder.add(spare.get(), (CharSequence) iter.currentValue());
        }

        try {
            this.stemdict = builder.build();
        } catch (IOException var8) {
            throw new RuntimeException("can not build stem dict", var8);
        }
    }

}
项目:search    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
      matchVersion.onOrAfter(Version.LUCENE_3_6) 
      ? DefaultSetHolder.DEFAULT_STEM_DICT 
      : CharArrayMap.<String>emptyMap());
}
项目:search    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, stemExclusionTable,
      matchVersion.onOrAfter(Version.LUCENE_3_6)
      ? DefaultSetHolder.DEFAULT_STEM_DICT
      : CharArrayMap.<String>emptyMap());
}
项目:search    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  setVersion(matchVersion);
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) {
    this.stemdict = null;
    this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
  } else {
    this.origStemdict = null;
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
项目:search    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:search    文件:TestCharArrayMap.java   
public void doRandom(int iter, boolean ignoreCase) {
  CharArrayMap<Integer> map = new CharArrayMap<>(1, ignoreCase);
  HashMap<String,Integer> hmap = new HashMap<>();

  char[] key;
  for (int i=0; i<iter; i++) {
    int len = random().nextInt(5);
    key = new char[len];
    for (int j=0; j<key.length; j++) {
      key[j] = (char)random().nextInt(127);
    }
    String keyStr = new String(key);
    String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 

    int val = random().nextInt();

    Object o1 = map.put(key, val);
    Object o2 = hmap.put(hmapKey,val);
    assertEquals(o1,o2);

    // add it again with the string method
    assertEquals(val, map.put(keyStr,val).intValue());

    assertEquals(val, map.get(key,0,key.length).intValue());
    assertEquals(val, map.get(key).intValue());
    assertEquals(val, map.get(keyStr).intValue());

    assertEquals(hmap.size(), map.size());
  }
}
项目:search    文件:TestCharArrayMap.java   
public void testToString() {
  CharArrayMap<Integer> cm = new CharArrayMap<>(Collections.singletonMap("test",1), false);
  assertEquals("[test]",cm.keySet().toString());
  assertEquals("[1]",cm.values().toString());
  assertEquals("[test=1]",cm.entrySet().toString());
  assertEquals("{test=1}",cm.toString());
  cm.put("test2", 2);
  assertTrue(cm.keySet().toString().contains(", "));
  assertTrue(cm.values().toString().contains(", "));
  assertTrue(cm.entrySet().toString().contains(", "));
  assertTrue(cm.toString().contains(", "));
}
项目:search    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  int num = random.nextInt(10);
  CharArrayMap<String> map = new CharArrayMap<>(num, random.nextBoolean());
  for (int i = 0; i < num; i++) {
    // TODO: make nastier
    map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
  }
  return map;
}
项目:NYBC    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
      matchVersion.onOrAfter(Version.LUCENE_36) 
      ? DefaultSetHolder.DEFAULT_STEM_DICT 
      : CharArrayMap.<String>emptyMap());
}
项目:NYBC    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, stemExclusionTable,
      matchVersion.onOrAfter(Version.LUCENE_36)
      ? DefaultSetHolder.DEFAULT_STEM_DICT
      : CharArrayMap.<String>emptyMap());
}
项目:NYBC    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<SlowSynonymMap>(Version.LUCENE_40, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:NYBC    文件:TestCharArrayMap.java   
public void doRandom(int iter, boolean ignoreCase) {
  CharArrayMap<Integer> map = new CharArrayMap<Integer>(TEST_VERSION_CURRENT, 1, ignoreCase);
  HashMap<String,Integer> hmap = new HashMap<String,Integer>();

  char[] key;
  for (int i=0; i<iter; i++) {
    int len = random().nextInt(5);
    key = new char[len];
    for (int j=0; j<key.length; j++) {
      key[j] = (char)random().nextInt(127);
    }
    String keyStr = new String(key);
    String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 

    int val = random().nextInt();

    Object o1 = map.put(key, val);
    Object o2 = hmap.put(hmapKey,val);
    assertEquals(o1,o2);

    // add it again with the string method
    assertEquals(val, map.put(keyStr,val).intValue());

    assertEquals(val, map.get(key,0,key.length).intValue());
    assertEquals(val, map.get(key).intValue());
    assertEquals(val, map.get(keyStr).intValue());

    assertEquals(hmap.size(), map.size());
  }
}
项目:NYBC    文件:TestCharArrayMap.java   
public void testToString() {
  CharArrayMap<Integer> cm = new CharArrayMap<Integer>(TEST_VERSION_CURRENT, Collections.singletonMap("test",1), false);
  assertEquals("[test]",cm.keySet().toString());
  assertEquals("[1]",cm.values().toString());
  assertEquals("[test=1]",cm.entrySet().toString());
  assertEquals("{test=1}",cm.toString());
  cm.put("test2", 2);
  assertTrue(cm.keySet().toString().contains(", "));
  assertTrue(cm.values().toString().contains(", "));
  assertTrue(cm.entrySet().toString().contains(", "));
  assertTrue(cm.toString().contains(", "));
}
项目:NYBC    文件:TestStemmerOverrideFilter.java   
public void testOverride() throws IOException {
  // lets make booked stem to books
  // the override filter will convert "booked" to "books",
  // but also mark it with KeywordAttribute so Porter will not change it.
  CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
  dictionary.put("booked", "books");
  Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
  TokenStream stream = new PorterStemFilter(
      new StemmerOverrideFilter(tokenizer, dictionary));
  assertTokenStreamContents(stream, new String[] { "books" });
}
项目:NYBC    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  int num = random.nextInt(10);
  CharArrayMap<String> map = new CharArrayMap<String>(TEST_VERSION_CURRENT, num, random.nextBoolean());
  for (int i = 0; i < num; i++) {
    // TODO: make nastier
    map.put(_TestUtil.randomSimpleString(random), _TestUtil.randomSimpleString(random));
  }
  return map;
}
项目:read-open-source-code    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
      matchVersion.onOrAfter(Version.LUCENE_36) 
      ? DefaultSetHolder.DEFAULT_STEM_DICT 
      : CharArrayMap.<String>emptyMap());
}
项目:read-open-source-code    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, stemExclusionTable,
      matchVersion.onOrAfter(Version.LUCENE_36)
      ? DefaultSetHolder.DEFAULT_STEM_DICT
      : CharArrayMap.<String>emptyMap());
}
项目:read-open-source-code    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<SlowSynonymMap>(Version.LUCENE_CURRENT, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:read-open-source-code    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<SlowSynonymMap>(Version.LUCENE_CURRENT, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:read-open-source-code    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
      matchVersion.onOrAfter(Version.LUCENE_3_6) 
      ? DefaultSetHolder.DEFAULT_STEM_DICT 
      : CharArrayMap.<String>emptyMap());
}
项目:read-open-source-code    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, stemExclusionTable,
      matchVersion.onOrAfter(Version.LUCENE_3_6)
      ? DefaultSetHolder.DEFAULT_STEM_DICT
      : CharArrayMap.<String>emptyMap());
}
项目:read-open-source-code    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  setVersion(matchVersion);
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) {
    this.stemdict = null;
    this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
  } else {
    this.origStemdict = null;
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
项目:read-open-source-code    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:Maskana-Gestor-de-Conocimiento    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
      matchVersion.onOrAfter(Version.LUCENE_36) 
      ? DefaultSetHolder.DEFAULT_STEM_DICT 
      : CharArrayMap.<String>emptyMap());
}
项目:Maskana-Gestor-de-Conocimiento    文件:DutchAnalyzer.java   
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
  // historically, this ctor never the stem dict!!!!!
  // so we populate it only for >= 3.6
  this(matchVersion, stopwords, stemExclusionTable,
      matchVersion.onOrAfter(Version.LUCENE_36)
      ? DefaultSetHolder.DEFAULT_STEM_DICT
      : CharArrayMap.<String>emptyMap());
}
项目:Maskana-Gestor-de-Conocimiento    文件:SlowSynonymMap.java   
/**
 * @param singleMatch  List<String>, the sequence of strings to match
 * @param replacement  List<Token> the list of tokens to use on a match
 * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
 * @param mergeExisting merge the replacement tokens with any other mappings that exist
 */
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
  SlowSynonymMap currMap = this;
  for (String str : singleMatch) {
    if (currMap.submap==null) {
      // for now hardcode at 4.0, as its what the old code did.
      // would be nice to fix, but shouldn't store a version in each submap!!!
      currMap.submap = new CharArrayMap<SlowSynonymMap>(Version.LUCENE_40, 1, ignoreCase());
    }

    SlowSynonymMap map = currMap.submap.get(str);
    if (map==null) {
      map = new SlowSynonymMap();
      map.flags |= flags & IGNORE_CASE;
      currMap.submap.put(str, map);
    }

    currMap = map;
  }

  if (currMap.synonyms != null && !mergeExisting) {
    throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
  }
  List<Token> superset = currMap.synonyms==null ? replacement :
        mergeTokens(Arrays.asList(currMap.synonyms), replacement);
  currMap.synonyms = superset.toArray(new Token[superset.size()]);
  if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestCharArrayMap.java   
public void doRandom(int iter, boolean ignoreCase) {
  CharArrayMap<Integer> map = new CharArrayMap<Integer>(TEST_VERSION_CURRENT, 1, ignoreCase);
  HashMap<String,Integer> hmap = new HashMap<String,Integer>();

  char[] key;
  for (int i=0; i<iter; i++) {
    int len = random().nextInt(5);
    key = new char[len];
    for (int j=0; j<key.length; j++) {
      key[j] = (char)random().nextInt(127);
    }
    String keyStr = new String(key);
    String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 

    int val = random().nextInt();

    Object o1 = map.put(key, val);
    Object o2 = hmap.put(hmapKey,val);
    assertEquals(o1,o2);

    // add it again with the string method
    assertEquals(val, map.put(keyStr,val).intValue());

    assertEquals(val, map.get(key,0,key.length).intValue());
    assertEquals(val, map.get(key).intValue());
    assertEquals(val, map.get(keyStr).intValue());

    assertEquals(hmap.size(), map.size());
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestCharArrayMap.java   
public void testToString() {
  CharArrayMap<Integer> cm = new CharArrayMap<Integer>(TEST_VERSION_CURRENT, Collections.singletonMap("test",1), false);
  assertEquals("[test]",cm.keySet().toString());
  assertEquals("[1]",cm.values().toString());
  assertEquals("[test=1]",cm.entrySet().toString());
  assertEquals("{test=1}",cm.toString());
  cm.put("test2", 2);
  assertTrue(cm.keySet().toString().contains(", "));
  assertTrue(cm.values().toString().contains(", "));
  assertTrue(cm.entrySet().toString().contains(", "));
  assertTrue(cm.toString().contains(", "));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  int num = random.nextInt(10);
  CharArrayMap<String> map = new CharArrayMap<String>(TEST_VERSION_CURRENT, num, random.nextBoolean());
  for (int i = 0; i < num; i++) {
    // TODO: make nastier
    map.put(_TestUtil.randomSimpleString(random), _TestUtil.randomSimpleString(random));
  }
  return map;
}