Java 类org.apache.lucene.analysis.ja.util.ToStringUtil 实例源码

项目:search    文件:JapaneseReadingFormFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    String reading = readingAttr.getReading();

    if (useRomaji) {
      if (reading == null) {
        // if its an OOV term, just try the term text
        buffer.setLength(0);
        ToStringUtil.getRomanization(buffer, termAttr);
        termAttr.setEmpty().append(buffer);
      } else {
        ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
      }
    } else {
      // just replace the term text with the reading, if it exists
      if (reading != null) {
        termAttr.setEmpty().append(reading);
      }
    }
    return true;
  } else {
    return false;
  }
}
项目:NYBC    文件:JapaneseReadingFormFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    String reading = readingAttr.getReading();

    if (useRomaji) {
      if (reading == null) {
        // if its an OOV term, just try the term text
        buffer.setLength(0);
        ToStringUtil.getRomanization(buffer, termAttr);
        termAttr.setEmpty().append(buffer);
      } else {
        ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
      }
    } else {
      // just replace the term text with the reading, if it exists
      if (reading != null) {
        termAttr.setEmpty().append(reading);
      }
    }
    return true;
  } else {
    return false;
  }
}
项目:read-open-source-code    文件:JapaneseReadingFormFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    String reading = readingAttr.getReading();

    if (useRomaji) {
      if (reading == null) {
        // if its an OOV term, just try the term text
        buffer.setLength(0);
        ToStringUtil.getRomanization(buffer, termAttr);
        termAttr.setEmpty().append(buffer);
      } else {
        ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
      }
    } else {
      // just replace the term text with the reading, if it exists
      if (reading != null) {
        termAttr.setEmpty().append(reading);
      }
    }
    return true;
  } else {
    return false;
  }
}
项目:read-open-source-code    文件:JapaneseReadingFormFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    String reading = readingAttr.getReading();

    if (useRomaji) {
      if (reading == null) {
        // if its an OOV term, just try the term text
        buffer.setLength(0);
        ToStringUtil.getRomanization(buffer, termAttr);
        termAttr.setEmpty().append(buffer);
      } else {
        ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
      }
    } else {
      // just replace the term text with the reading, if it exists
      if (reading != null) {
        termAttr.setEmpty().append(reading);
      }
    }
    return true;
  } else {
    return false;
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:JapaneseReadingFormFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    String reading = readingAttr.getReading();

    if (useRomaji) {
      if (reading == null) {
        // if its an OOV term, just try the term text
        buffer.setLength(0);
        ToStringUtil.getRomanization(buffer, termAttr);
        termAttr.setEmpty().append(buffer);
      } else {
        ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
      }
    } else {
      // just replace the term text with the reading, if it exists
      if (reading != null) {
        termAttr.setEmpty().append(reading);
      }
    }
    return true;
  } else {
    return false;
  }
}
项目:search    文件:ReadingAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String reading = getReading();
  String readingEN = reading == null ? null : ToStringUtil.getRomanization(reading);
  String pronunciation = getPronunciation();
  String pronunciationEN = pronunciation == null ? null : ToStringUtil.getRomanization(pronunciation);
  reflector.reflect(ReadingAttribute.class, "reading", reading);
  reflector.reflect(ReadingAttribute.class, "reading (en)", readingEN);
  reflector.reflect(ReadingAttribute.class, "pronunciation", pronunciation);
  reflector.reflect(ReadingAttribute.class, "pronunciation (en)", pronunciationEN);
}
项目:search    文件:PartOfSpeechAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String partOfSpeech = getPartOfSpeech();
  String partOfSpeechEN = partOfSpeech == null ? null : ToStringUtil.getPOSTranslation(partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech", partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech (en)", partOfSpeechEN);
}
项目:search    文件:InflectionAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String type = getInflectionType();
  String typeEN = type == null ? null : ToStringUtil.getInflectionTypeTranslation(type);
  reflector.reflect(InflectionAttribute.class, "inflectionType", type);
  reflector.reflect(InflectionAttribute.class, "inflectionType (en)", typeEN);
  String form = getInflectionForm();
  String formEN = form == null ? null : ToStringUtil.getInflectedFormTranslation(form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm", form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm (en)", formEN);
}
项目:NYBC    文件:ReadingAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String reading = getReading();
  String readingEN = reading == null ? null : ToStringUtil.getRomanization(reading);
  String pronunciation = getPronunciation();
  String pronunciationEN = pronunciation == null ? null : ToStringUtil.getRomanization(pronunciation);
  reflector.reflect(ReadingAttribute.class, "reading", reading);
  reflector.reflect(ReadingAttribute.class, "reading (en)", readingEN);
  reflector.reflect(ReadingAttribute.class, "pronunciation", pronunciation);
  reflector.reflect(ReadingAttribute.class, "pronunciation (en)", pronunciationEN);
}
项目:NYBC    文件:PartOfSpeechAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String partOfSpeech = getPartOfSpeech();
  String partOfSpeechEN = partOfSpeech == null ? null : ToStringUtil.getPOSTranslation(partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech", partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech (en)", partOfSpeechEN);
}
项目:NYBC    文件:InflectionAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String type = getInflectionType();
  String typeEN = type == null ? null : ToStringUtil.getInflectionTypeTranslation(type);
  reflector.reflect(InflectionAttribute.class, "inflectionType", type);
  reflector.reflect(InflectionAttribute.class, "inflectionType (en)", typeEN);
  String form = getInflectionForm();
  String formEN = form == null ? null : ToStringUtil.getInflectedFormTranslation(form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm", form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm (en)", formEN);
}
项目:read-open-source-code    文件:ReadingAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String reading = getReading();
  String readingEN = reading == null ? null : ToStringUtil.getRomanization(reading);
  String pronunciation = getPronunciation();
  String pronunciationEN = pronunciation == null ? null : ToStringUtil.getRomanization(pronunciation);
  reflector.reflect(ReadingAttribute.class, "reading", reading);
  reflector.reflect(ReadingAttribute.class, "reading (en)", readingEN);
  reflector.reflect(ReadingAttribute.class, "pronunciation", pronunciation);
  reflector.reflect(ReadingAttribute.class, "pronunciation (en)", pronunciationEN);
}
项目:read-open-source-code    文件:PartOfSpeechAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String partOfSpeech = getPartOfSpeech();
  String partOfSpeechEN = partOfSpeech == null ? null : ToStringUtil.getPOSTranslation(partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech", partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech (en)", partOfSpeechEN);
}
项目:read-open-source-code    文件:InflectionAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String type = getInflectionType();
  String typeEN = type == null ? null : ToStringUtil.getInflectionTypeTranslation(type);
  reflector.reflect(InflectionAttribute.class, "inflectionType", type);
  reflector.reflect(InflectionAttribute.class, "inflectionType (en)", typeEN);
  String form = getInflectionForm();
  String formEN = form == null ? null : ToStringUtil.getInflectedFormTranslation(form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm", form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm (en)", formEN);
}
项目:read-open-source-code    文件:ReadingAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String reading = getReading();
  String readingEN = reading == null ? null : ToStringUtil.getRomanization(reading);
  String pronunciation = getPronunciation();
  String pronunciationEN = pronunciation == null ? null : ToStringUtil.getRomanization(pronunciation);
  reflector.reflect(ReadingAttribute.class, "reading", reading);
  reflector.reflect(ReadingAttribute.class, "reading (en)", readingEN);
  reflector.reflect(ReadingAttribute.class, "pronunciation", pronunciation);
  reflector.reflect(ReadingAttribute.class, "pronunciation (en)", pronunciationEN);
}
项目:read-open-source-code    文件:PartOfSpeechAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String partOfSpeech = getPartOfSpeech();
  String partOfSpeechEN = partOfSpeech == null ? null : ToStringUtil.getPOSTranslation(partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech", partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech (en)", partOfSpeechEN);
}
项目:read-open-source-code    文件:InflectionAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String type = getInflectionType();
  String typeEN = type == null ? null : ToStringUtil.getInflectionTypeTranslation(type);
  reflector.reflect(InflectionAttribute.class, "inflectionType", type);
  reflector.reflect(InflectionAttribute.class, "inflectionType (en)", typeEN);
  String form = getInflectionForm();
  String formEN = form == null ? null : ToStringUtil.getInflectedFormTranslation(form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm", form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm (en)", formEN);
}
项目:Maskana-Gestor-de-Conocimiento    文件:ReadingAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String reading = getReading();
  String readingEN = reading == null ? null : ToStringUtil.getRomanization(reading);
  String pronunciation = getPronunciation();
  String pronunciationEN = pronunciation == null ? null : ToStringUtil.getRomanization(pronunciation);
  reflector.reflect(ReadingAttribute.class, "reading", reading);
  reflector.reflect(ReadingAttribute.class, "reading (en)", readingEN);
  reflector.reflect(ReadingAttribute.class, "pronunciation", pronunciation);
  reflector.reflect(ReadingAttribute.class, "pronunciation (en)", pronunciationEN);
}
项目:Maskana-Gestor-de-Conocimiento    文件:PartOfSpeechAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String partOfSpeech = getPartOfSpeech();
  String partOfSpeechEN = partOfSpeech == null ? null : ToStringUtil.getPOSTranslation(partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech", partOfSpeech);
  reflector.reflect(PartOfSpeechAttribute.class, "partOfSpeech (en)", partOfSpeechEN);
}
项目:Maskana-Gestor-de-Conocimiento    文件:InflectionAttributeImpl.java   
@Override
public void reflectWith(AttributeReflector reflector) {
  String type = getInflectionType();
  String typeEN = type == null ? null : ToStringUtil.getInflectionTypeTranslation(type);
  reflector.reflect(InflectionAttribute.class, "inflectionType", type);
  reflector.reflect(InflectionAttribute.class, "inflectionType (en)", typeEN);
  String form = getInflectionForm();
  String formEN = form == null ? null : ToStringUtil.getInflectedFormTranslation(form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm", form);
  reflector.reflect(InflectionAttribute.class, "inflectionForm (en)", formEN);
}
项目:search    文件:TestTokenInfoDictionary.java   
/** enumerates the entire FST/lookup data and just does basic sanity checks */
public void testEnumerateAll() throws Exception {
  // just for debugging
  int numTerms = 0;
  int numWords = 0;
  int lastWordId = -1;
  int lastSourceId = -1;
  TokenInfoDictionary tid = TokenInfoDictionary.getInstance();
  ConnectionCosts matrix = ConnectionCosts.getInstance();
  FST<Long> fst = tid.getFST().getInternalFST();
  IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<>(fst);
  InputOutput<Long> mapping;
  IntsRef scratch = new IntsRef();
  while ((mapping = fstEnum.next()) != null) {
    numTerms++;
    IntsRef input = mapping.input;
    char chars[] = new char[input.length];
    for (int i = 0; i < chars.length; i++) {
      chars[i] = (char)input.ints[input.offset+i];
    }
    assertTrue(UnicodeUtil.validUTF16String(new String(chars)));

    Long output = mapping.output;
    int sourceId = output.intValue();
    // we walk in order, terms, sourceIds, and wordIds should always be increasing
    assertTrue(sourceId > lastSourceId);
    lastSourceId = sourceId;
    tid.lookupWordIds(sourceId, scratch);
    for (int i = 0; i < scratch.length; i++) {
      numWords++;
      int wordId = scratch.ints[scratch.offset+i];
      assertTrue(wordId > lastWordId);
      lastWordId = wordId;

      String baseForm = tid.getBaseForm(wordId, chars, 0, chars.length);
      assertTrue(baseForm == null || UnicodeUtil.validUTF16String(baseForm));

      String inflectionForm = tid.getInflectionForm(wordId);
      assertTrue(inflectionForm == null || UnicodeUtil.validUTF16String(inflectionForm));
      if (inflectionForm != null) {
        // check that its actually an ipadic inflection form
        assertNotNull(ToStringUtil.getInflectedFormTranslation(inflectionForm));          
      }

      String inflectionType = tid.getInflectionType(wordId);
      assertTrue(inflectionType == null || UnicodeUtil.validUTF16String(inflectionType));
      if (inflectionType != null) {
        // check that its actually an ipadic inflection type
        assertNotNull(ToStringUtil.getInflectionTypeTranslation(inflectionType));
      }

      int leftId = tid.getLeftId(wordId);
      int rightId = tid.getRightId(wordId);

      matrix.get(rightId, leftId);

      tid.getWordCost(wordId);

      String pos = tid.getPartOfSpeech(wordId);
      assertNotNull(pos);
      assertTrue(UnicodeUtil.validUTF16String(pos));
      // check that its actually an ipadic pos tag
      assertNotNull(ToStringUtil.getPOSTranslation(pos));

      String pronunciation = tid.getPronunciation(wordId, chars, 0, chars.length);
      assertNotNull(pronunciation);
      assertTrue(UnicodeUtil.validUTF16String(pronunciation));

      String reading = tid.getReading(wordId, chars, 0, chars.length);
      assertNotNull(reading);
      assertTrue(UnicodeUtil.validUTF16String(reading));
    }
  }
  if (VERBOSE) {
    System.out.println("checked " + numTerms + " terms, " + numWords + " words.");
  }
}
项目:easyjasub    文件:LuceneUtil.java   
/**
 * Romanize katakana with modified hepburn
 */
public static String katakanaToRomaji(String text) {
    return ToStringUtil.getRomanization(text);
}
项目:easyjasub    文件:LuceneUtil.java   
public static String translatePartOfSpeech(String partOfSpeech) {
    String translation = ToStringUtil.getPOSTranslation(partOfSpeech);
    return translation != null ? translation : partOfSpeech;
}
项目:easyjasub    文件:LuceneUtil.java   
public static String translateInflectedForm(String inflectedForm) {
    String translation = ToStringUtil
            .getInflectedFormTranslation(inflectedForm);
    return translation != null ? translation : inflectedForm;
}
项目:easyjasub    文件:LuceneUtil.java   
public static String translateInflectionType(String inflectionType) {
    String translation = ToStringUtil
            .getInflectionTypeTranslation(inflectionType);
    return translation != null ? translation : inflectionType;
}
项目:NYBC    文件:TestTokenInfoDictionary.java   
/** enumerates the entire FST/lookup data and just does basic sanity checks */
public void testEnumerateAll() throws Exception {
  // just for debugging
  int numTerms = 0;
  int numWords = 0;
  int lastWordId = -1;
  int lastSourceId = -1;
  TokenInfoDictionary tid = TokenInfoDictionary.getInstance();
  ConnectionCosts matrix = ConnectionCosts.getInstance();
  FST<Long> fst = tid.getFST().getInternalFST();
  IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
  InputOutput<Long> mapping;
  IntsRef scratch = new IntsRef();
  while ((mapping = fstEnum.next()) != null) {
    numTerms++;
    IntsRef input = mapping.input;
    char chars[] = new char[input.length];
    for (int i = 0; i < chars.length; i++) {
      chars[i] = (char)input.ints[input.offset+i];
    }
    assertTrue(UnicodeUtil.validUTF16String(new String(chars)));

    Long output = mapping.output;
    int sourceId = output.intValue();
    // we walk in order, terms, sourceIds, and wordIds should always be increasing
    assertTrue(sourceId > lastSourceId);
    lastSourceId = sourceId;
    tid.lookupWordIds(sourceId, scratch);
    for (int i = 0; i < scratch.length; i++) {
      numWords++;
      int wordId = scratch.ints[scratch.offset+i];
      assertTrue(wordId > lastWordId);
      lastWordId = wordId;

      String baseForm = tid.getBaseForm(wordId, chars, 0, chars.length);
      assertTrue(baseForm == null || UnicodeUtil.validUTF16String(baseForm));

      String inflectionForm = tid.getInflectionForm(wordId);
      assertTrue(inflectionForm == null || UnicodeUtil.validUTF16String(inflectionForm));
      if (inflectionForm != null) {
        // check that its actually an ipadic inflection form
        assertNotNull(ToStringUtil.getInflectedFormTranslation(inflectionForm));          
      }

      String inflectionType = tid.getInflectionType(wordId);
      assertTrue(inflectionType == null || UnicodeUtil.validUTF16String(inflectionType));
      if (inflectionType != null) {
        // check that its actually an ipadic inflection type
        assertNotNull(ToStringUtil.getInflectionTypeTranslation(inflectionType));
      }

      int leftId = tid.getLeftId(wordId);
      int rightId = tid.getRightId(wordId);

      matrix.get(rightId, leftId);

      tid.getWordCost(wordId);

      String pos = tid.getPartOfSpeech(wordId);
      assertNotNull(pos);
      assertTrue(UnicodeUtil.validUTF16String(pos));
      // check that its actually an ipadic pos tag
      assertNotNull(ToStringUtil.getPOSTranslation(pos));

      String pronunciation = tid.getPronunciation(wordId, chars, 0, chars.length);
      assertNotNull(pronunciation);
      assertTrue(UnicodeUtil.validUTF16String(pronunciation));

      String reading = tid.getReading(wordId, chars, 0, chars.length);
      assertNotNull(reading);
      assertTrue(UnicodeUtil.validUTF16String(reading));
    }
  }
  if (VERBOSE) {
    System.out.println("checked " + numTerms + " terms, " + numWords + " words.");
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestTokenInfoDictionary.java   
/** enumerates the entire FST/lookup data and just does basic sanity checks */
public void testEnumerateAll() throws Exception {
  // just for debugging
  int numTerms = 0;
  int numWords = 0;
  int lastWordId = -1;
  int lastSourceId = -1;
  TokenInfoDictionary tid = TokenInfoDictionary.getInstance();
  ConnectionCosts matrix = ConnectionCosts.getInstance();
  FST<Long> fst = tid.getFST().getInternalFST();
  IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
  InputOutput<Long> mapping;
  IntsRef scratch = new IntsRef();
  while ((mapping = fstEnum.next()) != null) {
    numTerms++;
    IntsRef input = mapping.input;
    char chars[] = new char[input.length];
    for (int i = 0; i < chars.length; i++) {
      chars[i] = (char)input.ints[input.offset+i];
    }
    assertTrue(UnicodeUtil.validUTF16String(new String(chars)));

    Long output = mapping.output;
    int sourceId = output.intValue();
    // we walk in order, terms, sourceIds, and wordIds should always be increasing
    assertTrue(sourceId > lastSourceId);
    lastSourceId = sourceId;
    tid.lookupWordIds(sourceId, scratch);
    for (int i = 0; i < scratch.length; i++) {
      numWords++;
      int wordId = scratch.ints[scratch.offset+i];
      assertTrue(wordId > lastWordId);
      lastWordId = wordId;

      String baseForm = tid.getBaseForm(wordId, chars, 0, chars.length);
      assertTrue(baseForm == null || UnicodeUtil.validUTF16String(baseForm));

      String inflectionForm = tid.getInflectionForm(wordId);
      assertTrue(inflectionForm == null || UnicodeUtil.validUTF16String(inflectionForm));
      if (inflectionForm != null) {
        // check that its actually an ipadic inflection form
        assertNotNull(ToStringUtil.getInflectedFormTranslation(inflectionForm));          
      }

      String inflectionType = tid.getInflectionType(wordId);
      assertTrue(inflectionType == null || UnicodeUtil.validUTF16String(inflectionType));
      if (inflectionType != null) {
        // check that its actually an ipadic inflection type
        assertNotNull(ToStringUtil.getInflectionTypeTranslation(inflectionType));
      }

      int leftId = tid.getLeftId(wordId);
      int rightId = tid.getRightId(wordId);

      matrix.get(rightId, leftId);

      tid.getWordCost(wordId);

      String pos = tid.getPartOfSpeech(wordId);
      assertNotNull(pos);
      assertTrue(UnicodeUtil.validUTF16String(pos));
      // check that its actually an ipadic pos tag
      assertNotNull(ToStringUtil.getPOSTranslation(pos));

      String pronunciation = tid.getPronunciation(wordId, chars, 0, chars.length);
      assertNotNull(pronunciation);
      assertTrue(UnicodeUtil.validUTF16String(pronunciation));

      String reading = tid.getReading(wordId, chars, 0, chars.length);
      assertNotNull(reading);
      assertTrue(UnicodeUtil.validUTF16String(reading));
    }
  }
  if (VERBOSE) {
    System.out.println("checked " + numTerms + " terms, " + numWords + " words.");
  }
}
项目:easyjasub    文件:LuceneUtil.java   
/**
 * Romanize katakana with modified hepburn
 * 
 * @throws IOException
 */
public static void katakanaToRomaji(Appendable builder, CharSequence s)
        throws IOException {
    ToStringUtil.getRomanization(builder, s);
}