Java 类org.apache.lucene.analysis.cn.smart.Utility 实例源码

项目:search    文件:WordDictionary.java   
private void sortEachItems() {
  char[] tmpArray;
  int tmpFreq;
  for (int i = 0; i < wordItem_charArrayTable.length; i++) {
    if (wordItem_charArrayTable[i] != null
        && wordItem_charArrayTable[i].length > 1) {
      for (int j = 0; j < wordItem_charArrayTable[i].length - 1; j++) {
        for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].length; j2++) {
          if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
              wordItem_charArrayTable[i][j2], 0) > 0) {
            tmpArray = wordItem_charArrayTable[i][j];
            tmpFreq = wordItem_frequencyTable[i][j];
            wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
            wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
            wordItem_charArrayTable[i][j2] = tmpArray;
            wordItem_frequencyTable[i][j2] = tmpFreq;
          }
        }
      }
    }
  }
}
项目:search    文件:WordDictionary.java   
/**
 * Look up the text string corresponding with the word char array, 
 * and return the position of the word list.
 * 
 * @param knownHashIndex already figure out position of the first word 
 *   symbol charArray[0] in hash table. If not calculated yet, can be 
 *   replaced with function int findInTable(char[] charArray).
 * @param charArray look up the char array corresponding with the word.
 * @return word location in word array.  If not found, then return -1.
 */
private int findInTable(short knownHashIndex, char[] charArray) {
  if (charArray == null || charArray.length == 0)
    return -1;

  char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
  int start = 0, end = items.length - 1;
  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArray(items[mid], 0, charArray, 1);

    if (cmpResult == 0)
      return mid;// find it
    else if (cmpResult < 0)
      start = mid + 1;
    else if (cmpResult > 0)
      end = mid - 1;

    mid = (start + end) / 2;
  }
  return -1;
}
项目:NYBC    文件:WordDictionary.java   
private void sortEachItems() {
  char[] tmpArray;
  int tmpFreq;
  for (int i = 0; i < wordItem_charArrayTable.length; i++) {
    if (wordItem_charArrayTable[i] != null
        && wordItem_charArrayTable[i].length > 1) {
      for (int j = 0; j < wordItem_charArrayTable[i].length - 1; j++) {
        for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].length; j2++) {
          if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
              wordItem_charArrayTable[i][j2], 0) > 0) {
            tmpArray = wordItem_charArrayTable[i][j];
            tmpFreq = wordItem_frequencyTable[i][j];
            wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
            wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
            wordItem_charArrayTable[i][j2] = tmpArray;
            wordItem_frequencyTable[i][j2] = tmpFreq;
          }
        }
      }
    }
  }
}
项目:NYBC    文件:WordDictionary.java   
/**
 * Look up the text string corresponding with the word char array, 
 * and return the position of the word list.
 * 
 * @param knownHashIndex already figure out position of the first word 
 *   symbol charArray[0] in hash table. If not calculated yet, can be 
 *   replaced with function int findInTable(char[] charArray).
 * @param charArray look up the char array corresponding with the word.
 * @return word location in word array.  If not found, then return -1.
 */
private int findInTable(short knownHashIndex, char[] charArray) {
  if (charArray == null || charArray.length == 0)
    return -1;

  char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
  int start = 0, end = items.length - 1;
  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArray(items[mid], 0, charArray, 1);

    if (cmpResult == 0)
      return mid;// find it
    else if (cmpResult < 0)
      start = mid + 1;
    else if (cmpResult > 0)
      end = mid - 1;

    mid = (start + end) / 2;
  }
  return -1;
}
项目:read-open-source-code    文件:WordDictionary.java   
private void sortEachItems() {
  char[] tmpArray;
  int tmpFreq;
  for (int i = 0; i < wordItem_charArrayTable.length; i++) {
    if (wordItem_charArrayTable[i] != null
        && wordItem_charArrayTable[i].length > 1) {
      for (int j = 0; j < wordItem_charArrayTable[i].length - 1; j++) {
        for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].length; j2++) {
          if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
              wordItem_charArrayTable[i][j2], 0) > 0) {
            tmpArray = wordItem_charArrayTable[i][j];
            tmpFreq = wordItem_frequencyTable[i][j];
            wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
            wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
            wordItem_charArrayTable[i][j2] = tmpArray;
            wordItem_frequencyTable[i][j2] = tmpFreq;
          }
        }
      }
    }
  }
}
项目:read-open-source-code    文件:WordDictionary.java   
/**
 * Look up the text string corresponding with the word char array, 
 * and return the position of the word list.
 * 
 * @param knownHashIndex already figure out position of the first word 
 *   symbol charArray[0] in hash table. If not calculated yet, can be 
 *   replaced with function int findInTable(char[] charArray).
 * @param charArray look up the char array corresponding with the word.
 * @return word location in word array.  If not found, then return -1.
 */
private int findInTable(short knownHashIndex, char[] charArray) {
  if (charArray == null || charArray.length == 0)
    return -1;

  char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
  int start = 0, end = items.length - 1;
  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArray(items[mid], 0, charArray, 1);

    if (cmpResult == 0)
      return mid;// find it
    else if (cmpResult < 0)
      start = mid + 1;
    else if (cmpResult > 0)
      end = mid - 1;

    mid = (start + end) / 2;
  }
  return -1;
}
项目:read-open-source-code    文件:WordDictionary.java   
private void sortEachItems() {
  char[] tmpArray;
  int tmpFreq;
  for (int i = 0; i < wordItem_charArrayTable.length; i++) {
    if (wordItem_charArrayTable[i] != null
        && wordItem_charArrayTable[i].length > 1) {
      for (int j = 0; j < wordItem_charArrayTable[i].length - 1; j++) {
        for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].length; j2++) {
          if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
              wordItem_charArrayTable[i][j2], 0) > 0) {
            tmpArray = wordItem_charArrayTable[i][j];
            tmpFreq = wordItem_frequencyTable[i][j];
            wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
            wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
            wordItem_charArrayTable[i][j2] = tmpArray;
            wordItem_frequencyTable[i][j2] = tmpFreq;
          }
        }
      }
    }
  }
}
项目:read-open-source-code    文件:WordDictionary.java   
/**
 * Look up the text string corresponding with the word char array, 
 * and return the position of the word list.
 * 
 * @param knownHashIndex already figure out position of the first word 
 *   symbol charArray[0] in hash table. If not calculated yet, can be 
 *   replaced with function int findInTable(char[] charArray).
 * @param charArray look up the char array corresponding with the word.
 * @return word location in word array.  If not found, then return -1.
 */
private int findInTable(short knownHashIndex, char[] charArray) {
  if (charArray == null || charArray.length == 0)
    return -1;

  char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
  int start = 0, end = items.length - 1;
  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArray(items[mid], 0, charArray, 1);

    if (cmpResult == 0)
      return mid;// find it
    else if (cmpResult < 0)
      start = mid + 1;
    else if (cmpResult > 0)
      end = mid - 1;

    mid = (start + end) / 2;
  }
  return -1;
}
项目:Maskana-Gestor-de-Conocimiento    文件:WordDictionary.java   
private void sortEachItems() {
  char[] tmpArray;
  int tmpFreq;
  for (int i = 0; i < wordItem_charArrayTable.length; i++) {
    if (wordItem_charArrayTable[i] != null
        && wordItem_charArrayTable[i].length > 1) {
      for (int j = 0; j < wordItem_charArrayTable[i].length - 1; j++) {
        for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].length; j2++) {
          if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
              wordItem_charArrayTable[i][j2], 0) > 0) {
            tmpArray = wordItem_charArrayTable[i][j];
            tmpFreq = wordItem_frequencyTable[i][j];
            wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
            wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
            wordItem_charArrayTable[i][j2] = tmpArray;
            wordItem_frequencyTable[i][j2] = tmpFreq;
          }
        }
      }
    }
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:WordDictionary.java   
/**
 * Look up the text string corresponding with the word char array, 
 * and return the position of the word list.
 * 
 * @param knownHashIndex already figure out position of the first word 
 *   symbol charArray[0] in hash table. If not calculated yet, can be 
 *   replaced with function int findInTable(char[] charArray).
 * @param charArray look up the char array corresponding with the word.
 * @return word location in word array.  If not found, then return -1.
 */
private int findInTable(short knownHashIndex, char[] charArray) {
  if (charArray == null || charArray.length == 0)
    return -1;

  char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
  int start = 0, end = items.length - 1;
  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArray(items[mid], 0, charArray, 1);

    if (cmpResult == 0)
      return mid;// find it
    else if (cmpResult < 0)
      start = mid + 1;
    else if (cmpResult > 0)
      end = mid - 1;

    mid = (start + end) / 2;
  }
  return -1;
}
项目:search    文件:SegTokenFilter.java   
/**
 * Filter an input {@link SegToken}
 * <p>
 * Full-width latin will be converted to half-width, then all latin will be lowercased.
 * All punctuation is converted into {@link Utility#COMMON_DELIMITER}
 * </p>
 * 
 * @param token input {@link SegToken}
 * @return normalized {@link SegToken}
 */
public SegToken filter(SegToken token) {
  switch (token.wordType) {
    case WordType.FULLWIDTH_NUMBER:
    case WordType.FULLWIDTH_STRING: /* first convert full-width -> half-width */
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0xFF10)
          token.charArray[i] -= 0xFEE0;

        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.STRING:
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.DELIMITER: /* convert all punctuation to Utility.COMMON_DELIMITER */
      token.charArray = Utility.COMMON_DELIMITER;
      break;
    default:
      break;
  }
  return token;
}
项目:search    文件:HHMMSegmenter.java   
/**
 * Get the character types for every character in a sentence.
 * 
 * @see Utility#getCharType(char)
 * @param sentence input sentence
 * @return array of character types corresponding to character positions in the sentence
 */
private static int[] getCharTypes(String sentence) {
  int length = sentence.length();
  int[] charTypeArray = new int[length];
  // the type of each character by position
  for (int i = 0; i < length; i++) {
    charTypeArray[i] = Utility.getCharType(sentence.charAt(i));
  }

  return charTypeArray;
}
项目:search    文件:WordDictionary.java   
private void mergeSameWords() {
  int i;
  for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
    if (wordItem_charArrayTable[i] == null)
      continue;
    int len = 1;
    for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
      if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
          wordItem_charArrayTable[i][j - 1], 0) != 0)
        len++;

    }
    if (len < wordItem_charArrayTable[i].length) {
      char[][] tempArray = new char[len][];
      int[] tempFreq = new int[len];
      int k = 0;
      tempArray[0] = wordItem_charArrayTable[i][0];
      tempFreq[0] = wordItem_frequencyTable[i][0];
      for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
        if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
            tempArray[k], 0) != 0) {
          k++;
          // temp[k] = wordItemTable[i][j];
          tempArray[k] = wordItem_charArrayTable[i][j];
          tempFreq[k] = wordItem_frequencyTable[i][j];
        } else {
          // temp[k].frequency += wordItemTable[i][j].frequency;
          tempFreq[k] += wordItem_frequencyTable[i][j];
        }
      }
      // wordItemTable[i] = temp;
      wordItem_charArrayTable[i] = tempArray;
      wordItem_frequencyTable[i] = tempFreq;
    }
  }
}
项目:search    文件:WordDictionary.java   
/**
 * Find the nth word in the dictionary that starts with the supplied prefix
 * 
 * @see #getPrefixMatch(char[])
 * @param charArray input prefix
 * @param knownStart relative position in the dictionary to start
 * @return index of word, or -1 if not found
 */
public int getPrefixMatch(char[] charArray, int knownStart) {
  short index = getWordItemTableIndex(charArray[0]);
  if (index == -1)
    return -1;
  char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
  int start = knownStart, end = items.length - 1;

  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArrayByPrefix(charArray, 1, items[mid], 0);
    if (cmpResult == 0) {
      // Get the first item which match the current word
      while (mid >= 0
          && Utility.compareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
        mid--;
      mid++;
      return mid;// Find the first word that uses charArray as prefix.
    } else if (cmpResult < 0)
      end = mid - 1;
    else
      start = mid + 1;
    mid = (start + end) / 2;
  }
  return -1;
}
项目:NYBC    文件:SegTokenFilter.java   
/**
 * Filter an input {@link SegToken}
 * <p>
 * Full-width latin will be converted to half-width, then all latin will be lowercased.
 * All punctuation is converted into {@link Utility#COMMON_DELIMITER}
 * </p>
 * 
 * @param token input {@link SegToken}
 * @return normalized {@link SegToken}
 */
public SegToken filter(SegToken token) {
  switch (token.wordType) {
    case WordType.FULLWIDTH_NUMBER:
    case WordType.FULLWIDTH_STRING: /* first convert full-width -> half-width */
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0xFF10)
          token.charArray[i] -= 0xFEE0;

        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.STRING:
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.DELIMITER: /* convert all punctuation to Utility.COMMON_DELIMITER */
      token.charArray = Utility.COMMON_DELIMITER;
      break;
    default:
      break;
  }
  return token;
}
项目:NYBC    文件:HHMMSegmenter.java   
/**
 * Get the character types for every character in a sentence.
 * 
 * @see Utility#getCharType(char)
 * @param sentence input sentence
 * @return array of character types corresponding to character positions in the sentence
 */
private static int[] getCharTypes(String sentence) {
  int length = sentence.length();
  int[] charTypeArray = new int[length];
  // the type of each character by position
  for (int i = 0; i < length; i++) {
    charTypeArray[i] = Utility.getCharType(sentence.charAt(i));
  }

  return charTypeArray;
}
项目:NYBC    文件:WordDictionary.java   
private void mergeSameWords() {
  int i;
  for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
    if (wordItem_charArrayTable[i] == null)
      continue;
    int len = 1;
    for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
      if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
          wordItem_charArrayTable[i][j - 1], 0) != 0)
        len++;

    }
    if (len < wordItem_charArrayTable[i].length) {
      char[][] tempArray = new char[len][];
      int[] tempFreq = new int[len];
      int k = 0;
      tempArray[0] = wordItem_charArrayTable[i][0];
      tempFreq[0] = wordItem_frequencyTable[i][0];
      for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
        if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
            tempArray[k], 0) != 0) {
          k++;
          // temp[k] = wordItemTable[i][j];
          tempArray[k] = wordItem_charArrayTable[i][j];
          tempFreq[k] = wordItem_frequencyTable[i][j];
        } else {
          // temp[k].frequency += wordItemTable[i][j].frequency;
          tempFreq[k] += wordItem_frequencyTable[i][j];
        }
      }
      // wordItemTable[i] = temp;
      wordItem_charArrayTable[i] = tempArray;
      wordItem_frequencyTable[i] = tempFreq;
    }
  }
}
项目:NYBC    文件:WordDictionary.java   
/**
 * Find the nth word in the dictionary that starts with the supplied prefix
 * 
 * @see #getPrefixMatch(char[])
 * @param charArray input prefix
 * @param knownStart relative position in the dictionary to start
 * @return index of word, or -1 if not found
 */
public int getPrefixMatch(char[] charArray, int knownStart) {
  short index = getWordItemTableIndex(charArray[0]);
  if (index == -1)
    return -1;
  char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
  int start = knownStart, end = items.length - 1;

  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArrayByPrefix(charArray, 1, items[mid], 0);
    if (cmpResult == 0) {
      // Get the first item which match the current word
      while (mid >= 0
          && Utility.compareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
        mid--;
      mid++;
      return mid;// Find the first word that uses charArray as prefix.
    } else if (cmpResult < 0)
      end = mid - 1;
    else
      start = mid + 1;
    mid = (start + end) / 2;
  }
  return -1;
}
项目:read-open-source-code    文件:SegTokenFilter.java   
/**
 * Filter an input {@link SegToken}
 * <p>
 * Full-width latin will be converted to half-width, then all latin will be lowercased.
 * All punctuation is converted into {@link Utility#COMMON_DELIMITER}
 * </p>
 * 
 * @param token input {@link SegToken}
 * @return normalized {@link SegToken}
 */
public SegToken filter(SegToken token) {
  switch (token.wordType) {
    case WordType.FULLWIDTH_NUMBER:
    case WordType.FULLWIDTH_STRING: /* first convert full-width -> half-width */
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0xFF10)
          token.charArray[i] -= 0xFEE0;

        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.STRING:
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.DELIMITER: /* convert all punctuation to Utility.COMMON_DELIMITER */
      token.charArray = Utility.COMMON_DELIMITER;
      break;
    default:
      break;
  }
  return token;
}
项目:read-open-source-code    文件:HHMMSegmenter.java   
/**
 * Get the character types for every character in a sentence.
 * 
 * @see Utility#getCharType(char)
 * @param sentence input sentence
 * @return array of character types corresponding to character positions in the sentence
 */
private static int[] getCharTypes(String sentence) {
  int length = sentence.length();
  int[] charTypeArray = new int[length];
  // the type of each character by position
  for (int i = 0; i < length; i++) {
    charTypeArray[i] = Utility.getCharType(sentence.charAt(i));
  }

  return charTypeArray;
}
项目:read-open-source-code    文件:WordDictionary.java   
private void mergeSameWords() {
  int i;
  for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
    if (wordItem_charArrayTable[i] == null)
      continue;
    int len = 1;
    for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
      if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
          wordItem_charArrayTable[i][j - 1], 0) != 0)
        len++;

    }
    if (len < wordItem_charArrayTable[i].length) {
      char[][] tempArray = new char[len][];
      int[] tempFreq = new int[len];
      int k = 0;
      tempArray[0] = wordItem_charArrayTable[i][0];
      tempFreq[0] = wordItem_frequencyTable[i][0];
      for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
        if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
            tempArray[k], 0) != 0) {
          k++;
          // temp[k] = wordItemTable[i][j];
          tempArray[k] = wordItem_charArrayTable[i][j];
          tempFreq[k] = wordItem_frequencyTable[i][j];
        } else {
          // temp[k].frequency += wordItemTable[i][j].frequency;
          tempFreq[k] += wordItem_frequencyTable[i][j];
        }
      }
      // wordItemTable[i] = temp;
      wordItem_charArrayTable[i] = tempArray;
      wordItem_frequencyTable[i] = tempFreq;
    }
  }
}
项目:read-open-source-code    文件:WordDictionary.java   
/**
 * Find the nth word in the dictionary that starts with the supplied prefix
 * 
 * @see #getPrefixMatch(char[])
 * @param charArray input prefix
 * @param knownStart relative position in the dictionary to start
 * @return index of word, or -1 if not found
 */
public int getPrefixMatch(char[] charArray, int knownStart) {
  short index = getWordItemTableIndex(charArray[0]);
  if (index == -1)
    return -1;
  char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
  int start = knownStart, end = items.length - 1;

  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArrayByPrefix(charArray, 1, items[mid], 0);
    if (cmpResult == 0) {
      // Get the first item which match the current word
      while (mid >= 0
          && Utility.compareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
        mid--;
      mid++;
      return mid;// Find the first word that uses charArray as prefix.
    } else if (cmpResult < 0)
      end = mid - 1;
    else
      start = mid + 1;
    mid = (start + end) / 2;
  }
  return -1;
}
项目:read-open-source-code    文件:SegTokenFilter.java   
/**
 * Filter an input {@link SegToken}
 * <p>
 * Full-width latin will be converted to half-width, then all latin will be lowercased.
 * All punctuation is converted into {@link Utility#COMMON_DELIMITER}
 * </p>
 * 
 * @param token input {@link SegToken}
 * @return normalized {@link SegToken}
 */
public SegToken filter(SegToken token) {
  switch (token.wordType) {
    case WordType.FULLWIDTH_NUMBER:
    case WordType.FULLWIDTH_STRING: /* first convert full-width -> half-width */
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0xFF10)
          token.charArray[i] -= 0xFEE0;

        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.STRING:
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.DELIMITER: /* convert all punctuation to Utility.COMMON_DELIMITER */
      token.charArray = Utility.COMMON_DELIMITER;
      break;
    default:
      break;
  }
  return token;
}
项目:read-open-source-code    文件:HHMMSegmenter.java   
/**
 * Get the character types for every character in a sentence.
 * 
 * @see Utility#getCharType(char)
 * @param sentence input sentence
 * @return array of character types corresponding to character positions in the sentence
 */
private static int[] getCharTypes(String sentence) {
  int length = sentence.length();
  int[] charTypeArray = new int[length];
  // the type of each character by position
  for (int i = 0; i < length; i++) {
    charTypeArray[i] = Utility.getCharType(sentence.charAt(i));
  }

  return charTypeArray;
}
项目:read-open-source-code    文件:WordDictionary.java   
private void mergeSameWords() {
  int i;
  for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
    if (wordItem_charArrayTable[i] == null)
      continue;
    int len = 1;
    for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
      if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
          wordItem_charArrayTable[i][j - 1], 0) != 0)
        len++;

    }
    if (len < wordItem_charArrayTable[i].length) {
      char[][] tempArray = new char[len][];
      int[] tempFreq = new int[len];
      int k = 0;
      tempArray[0] = wordItem_charArrayTable[i][0];
      tempFreq[0] = wordItem_frequencyTable[i][0];
      for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
        if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
            tempArray[k], 0) != 0) {
          k++;
          // temp[k] = wordItemTable[i][j];
          tempArray[k] = wordItem_charArrayTable[i][j];
          tempFreq[k] = wordItem_frequencyTable[i][j];
        } else {
          // temp[k].frequency += wordItemTable[i][j].frequency;
          tempFreq[k] += wordItem_frequencyTable[i][j];
        }
      }
      // wordItemTable[i] = temp;
      wordItem_charArrayTable[i] = tempArray;
      wordItem_frequencyTable[i] = tempFreq;
    }
  }
}
项目:read-open-source-code    文件:WordDictionary.java   
/**
 * Find the nth word in the dictionary that starts with the supplied prefix
 * 
 * @see #getPrefixMatch(char[])
 * @param charArray input prefix
 * @param knownStart relative position in the dictionary to start
 * @return index of word, or -1 if not found
 */
public int getPrefixMatch(char[] charArray, int knownStart) {
  short index = getWordItemTableIndex(charArray[0]);
  if (index == -1)
    return -1;
  char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
  int start = knownStart, end = items.length - 1;

  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArrayByPrefix(charArray, 1, items[mid], 0);
    if (cmpResult == 0) {
      // Get the first item which match the current word
      while (mid >= 0
          && Utility.compareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
        mid--;
      mid++;
      return mid;// Find the first word that uses charArray as prefix.
    } else if (cmpResult < 0)
      end = mid - 1;
    else
      start = mid + 1;
    mid = (start + end) / 2;
  }
  return -1;
}
项目:Maskana-Gestor-de-Conocimiento    文件:SegTokenFilter.java   
/**
 * Filter an input {@link SegToken}
 * <p>
 * Full-width latin will be converted to half-width, then all latin will be lowercased.
 * All punctuation is converted into {@link Utility#COMMON_DELIMITER}
 * </p>
 * 
 * @param token input {@link SegToken}
 * @return normalized {@link SegToken}
 */
public SegToken filter(SegToken token) {
  switch (token.wordType) {
    case WordType.FULLWIDTH_NUMBER:
    case WordType.FULLWIDTH_STRING: /* first convert full-width -> half-width */
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0xFF10)
          token.charArray[i] -= 0xFEE0;

        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.STRING:
      for (int i = 0; i < token.charArray.length; i++) {
        if (token.charArray[i] >= 0x0041 && token.charArray[i] <= 0x005A) /* lowercase latin */
          token.charArray[i] += 0x0020;
      }
      break;
    case WordType.DELIMITER: /* convert all punctuation to Utility.COMMON_DELIMITER */
      token.charArray = Utility.COMMON_DELIMITER;
      break;
    default:
      break;
  }
  return token;
}
项目:Maskana-Gestor-de-Conocimiento    文件:HHMMSegmenter.java   
/**
 * Get the character types for every character in a sentence.
 * 
 * @see Utility#getCharType(char)
 * @param sentence input sentence
 * @return array of character types corresponding to character positions in the sentence
 */
private static int[] getCharTypes(String sentence) {
  int length = sentence.length();
  int[] charTypeArray = new int[length];
  // the type of each character by position
  for (int i = 0; i < length; i++) {
    charTypeArray[i] = Utility.getCharType(sentence.charAt(i));
  }

  return charTypeArray;
}
项目:Maskana-Gestor-de-Conocimiento    文件:WordDictionary.java   
private void mergeSameWords() {
  int i;
  for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
    if (wordItem_charArrayTable[i] == null)
      continue;
    int len = 1;
    for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
      if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
          wordItem_charArrayTable[i][j - 1], 0) != 0)
        len++;

    }
    if (len < wordItem_charArrayTable[i].length) {
      char[][] tempArray = new char[len][];
      int[] tempFreq = new int[len];
      int k = 0;
      tempArray[0] = wordItem_charArrayTable[i][0];
      tempFreq[0] = wordItem_frequencyTable[i][0];
      for (int j = 1; j < wordItem_charArrayTable[i].length; j++) {
        if (Utility.compareArray(wordItem_charArrayTable[i][j], 0,
            tempArray[k], 0) != 0) {
          k++;
          // temp[k] = wordItemTable[i][j];
          tempArray[k] = wordItem_charArrayTable[i][j];
          tempFreq[k] = wordItem_frequencyTable[i][j];
        } else {
          // temp[k].frequency += wordItemTable[i][j].frequency;
          tempFreq[k] += wordItem_frequencyTable[i][j];
        }
      }
      // wordItemTable[i] = temp;
      wordItem_charArrayTable[i] = tempArray;
      wordItem_frequencyTable[i] = tempFreq;
    }
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:WordDictionary.java   
/**
 * Find the nth word in the dictionary that starts with the supplied prefix
 * 
 * @see #getPrefixMatch(char[])
 * @param charArray input prefix
 * @param knownStart relative position in the dictionary to start
 * @return index of word, or -1 if not found
 */
public int getPrefixMatch(char[] charArray, int knownStart) {
  short index = getWordItemTableIndex(charArray[0]);
  if (index == -1)
    return -1;
  char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
  int start = knownStart, end = items.length - 1;

  int mid = (start + end) / 2, cmpResult;

  // Binary search for the index of idArray
  while (start <= end) {
    cmpResult = Utility.compareArrayByPrefix(charArray, 1, items[mid], 0);
    if (cmpResult == 0) {
      // Get the first item which match the current word
      while (mid >= 0
          && Utility.compareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
        mid--;
      mid++;
      return mid;// Find the first word that uses charArray as prefix.
    } else if (cmpResult < 0)
      end = mid - 1;
    else
      start = mid + 1;
    mid = (start + end) / 2;
  }
  return -1;
}
项目:search    文件:BiSegGraph.java   
private void generateBiSegGraph(SegGraph segGraph) {
  double smooth = 0.1;
  int wordPairFreq = 0;
  int maxStart = segGraph.getMaxStart();
  double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;

  int next;
  char[] idBuffer;
  // get the list of tokens ordered and indexed
  segTokenList = segGraph.makeIndex();
  // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
  int key = -1;
  List<SegToken> nextTokens = null;
  while (key < maxStart) {
    if (segGraph.isStartExist(key)) {

      List<SegToken> tokenList = segGraph.getStartList(key);

      // Calculate all tokens for a given key.
      for (SegToken t1 : tokenList) {
        oneWordFreq = t1.weight;
        next = t1.endOffset;
        nextTokens = null;
        // Find the next corresponding Token.
        // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
        // If we cannot find the next Token, then go to the end and repeat the same cycle.
        while (next <= maxStart) {
          // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
          if (segGraph.isStartExist(next)) {
            nextTokens = segGraph.getStartList(next);
            break;
          }
          next++;
        }
        if (nextTokens == null) {
          break;
        }
        for (SegToken t2 : nextTokens) {
          idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
          System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
          idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
          System.arraycopy(t2.charArray, 0, idBuffer,
              t1.charArray.length + 1, t2.charArray.length);

          // Two linked Words frequency
          wordPairFreq = bigramDict.getFrequency(idBuffer);

          // Smoothing

          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          weight = -Math
              .log(smooth
                  * (1.0 + oneWordFreq)
                  / (Utility.MAX_FREQUENCE + 0.0)
                  + (1.0 - smooth)
                  * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));

          SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.index,
              t2.index, weight);
          this.addSegTokenPair(tokenPair);
        }
      }
    }
    key++;
  }

}
项目:NYBC    文件:BiSegGraph.java   
private void generateBiSegGraph(SegGraph segGraph) {
  double smooth = 0.1;
  int wordPairFreq = 0;
  int maxStart = segGraph.getMaxStart();
  double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;

  int next;
  char[] idBuffer;
  // get the list of tokens ordered and indexed
  segTokenList = segGraph.makeIndex();
  // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
  int key = -1;
  List<SegToken> nextTokens = null;
  while (key < maxStart) {
    if (segGraph.isStartExist(key)) {

      List<SegToken> tokenList = segGraph.getStartList(key);

      // Calculate all tokens for a given key.
      for (SegToken t1 : tokenList) {
        oneWordFreq = t1.weight;
        next = t1.endOffset;
        nextTokens = null;
        // Find the next corresponding Token.
        // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
        // If we cannot find the next Token, then go to the end and repeat the same cycle.
        while (next <= maxStart) {
          // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
          if (segGraph.isStartExist(next)) {
            nextTokens = segGraph.getStartList(next);
            break;
          }
          next++;
        }
        if (nextTokens == null) {
          break;
        }
        for (SegToken t2 : nextTokens) {
          idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
          System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
          idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
          System.arraycopy(t2.charArray, 0, idBuffer,
              t1.charArray.length + 1, t2.charArray.length);

          // Two linked Words frequency
          wordPairFreq = bigramDict.getFrequency(idBuffer);

          // Smoothing

          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          weight = -Math
              .log(smooth
                  * (1.0 + oneWordFreq)
                  / (Utility.MAX_FREQUENCE + 0.0)
                  + (1.0 - smooth)
                  * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));

          SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.index,
              t2.index, weight);
          this.addSegTokenPair(tokenPair);
        }
      }
    }
    key++;
  }

}
项目:read-open-source-code    文件:BiSegGraph.java   
private void generateBiSegGraph(SegGraph segGraph) {
  double smooth = 0.1;
  int wordPairFreq = 0;
  int maxStart = segGraph.getMaxStart();
  double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;

  int next;
  char[] idBuffer;
  // get the list of tokens ordered and indexed
  segTokenList = segGraph.makeIndex();
  // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
  int key = -1;
  List<SegToken> nextTokens = null;
  while (key < maxStart) {
    if (segGraph.isStartExist(key)) {

      List<SegToken> tokenList = segGraph.getStartList(key);

      // Calculate all tokens for a given key.
      for (SegToken t1 : tokenList) {
        oneWordFreq = t1.weight;
        next = t1.endOffset;
        nextTokens = null;
        // Find the next corresponding Token.
        // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
        // If we cannot find the next Token, then go to the end and repeat the same cycle.
        while (next <= maxStart) {
          // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
          if (segGraph.isStartExist(next)) {
            nextTokens = segGraph.getStartList(next);
            break;
          }
          next++;
        }
        if (nextTokens == null) {
          break;
        }
        for (SegToken t2 : nextTokens) {
          idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
          System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
          idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
          System.arraycopy(t2.charArray, 0, idBuffer,
              t1.charArray.length + 1, t2.charArray.length);

          // Two linked Words frequency
          wordPairFreq = bigramDict.getFrequency(idBuffer);

          // Smoothing

          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          weight = -Math
              .log(smooth
                  * (1.0 + oneWordFreq)
                  / (Utility.MAX_FREQUENCE + 0.0)
                  + (1.0 - smooth)
                  * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));

          SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.index,
              t2.index, weight);
          this.addSegTokenPair(tokenPair);
        }
      }
    }
    key++;
  }

}
项目:read-open-source-code    文件:BiSegGraph.java   
private void generateBiSegGraph(SegGraph segGraph) {
  double smooth = 0.1;
  int wordPairFreq = 0;
  int maxStart = segGraph.getMaxStart();
  double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;

  int next;
  char[] idBuffer;
  // get the list of tokens ordered and indexed
  segTokenList = segGraph.makeIndex();
  // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
  int key = -1;
  List<SegToken> nextTokens = null;
  while (key < maxStart) {
    if (segGraph.isStartExist(key)) {

      List<SegToken> tokenList = segGraph.getStartList(key);

      // Calculate all tokens for a given key.
      for (SegToken t1 : tokenList) {
        oneWordFreq = t1.weight;
        next = t1.endOffset;
        nextTokens = null;
        // Find the next corresponding Token.
        // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
        // If we cannot find the next Token, then go to the end and repeat the same cycle.
        while (next <= maxStart) {
          // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
          if (segGraph.isStartExist(next)) {
            nextTokens = segGraph.getStartList(next);
            break;
          }
          next++;
        }
        if (nextTokens == null) {
          break;
        }
        for (SegToken t2 : nextTokens) {
          idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
          System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
          idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
          System.arraycopy(t2.charArray, 0, idBuffer,
              t1.charArray.length + 1, t2.charArray.length);

          // Two linked Words frequency
          wordPairFreq = bigramDict.getFrequency(idBuffer);

          // Smoothing

          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          weight = -Math
              .log(smooth
                  * (1.0 + oneWordFreq)
                  / (Utility.MAX_FREQUENCE + 0.0)
                  + (1.0 - smooth)
                  * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));

          SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.index,
              t2.index, weight);
          this.addSegTokenPair(tokenPair);
        }
      }
    }
    key++;
  }

}
项目:Maskana-Gestor-de-Conocimiento    文件:BiSegGraph.java   
private void generateBiSegGraph(SegGraph segGraph) {
  double smooth = 0.1;
  int wordPairFreq = 0;
  int maxStart = segGraph.getMaxStart();
  double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;

  int next;
  char[] idBuffer;
  // get the list of tokens ordered and indexed
  segTokenList = segGraph.makeIndex();
  // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
  int key = -1;
  List<SegToken> nextTokens = null;
  while (key < maxStart) {
    if (segGraph.isStartExist(key)) {

      List<SegToken> tokenList = segGraph.getStartList(key);

      // Calculate all tokens for a given key.
      for (SegToken t1 : tokenList) {
        oneWordFreq = t1.weight;
        next = t1.endOffset;
        nextTokens = null;
        // Find the next corresponding Token.
        // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
        // If we cannot find the next Token, then go to the end and repeat the same cycle.
        while (next <= maxStart) {
          // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
          if (segGraph.isStartExist(next)) {
            nextTokens = segGraph.getStartList(next);
            break;
          }
          next++;
        }
        if (nextTokens == null) {
          break;
        }
        for (SegToken t2 : nextTokens) {
          idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
          System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
          idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
          System.arraycopy(t2.charArray, 0, idBuffer,
              t1.charArray.length + 1, t2.charArray.length);

          // Two linked Words frequency
          wordPairFreq = bigramDict.getFrequency(idBuffer);

          // Smoothing

          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          weight = -Math
              .log(smooth
                  * (1.0 + oneWordFreq)
                  / (Utility.MAX_FREQUENCE + 0.0)
                  + (1.0 - smooth)
                  * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));

          SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.index,
              t2.index, weight);
          this.addSegTokenPair(tokenPair);
        }
      }
    }
    key++;
  }

}
项目:search    文件:WordDictionary.java   
/**
 * Return true if the dictionary entry at itemIndex for table charArray[0] is charArray
 * 
 * @param charArray input word
 * @param itemIndex item index for table charArray[0]
 * @return true if the entry exists
 */
public boolean isEqual(char[] charArray, int itemIndex) {
  short hashIndex = getWordItemTableIndex(charArray[0]);
  return Utility.compareArray(charArray, 1,
      wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
}
项目:NYBC    文件:WordDictionary.java   
/**
 * Return true if the dictionary entry at itemIndex for table charArray[0] is charArray
 * 
 * @param charArray input word
 * @param itemIndex item index for table charArray[0]
 * @return true if the entry exists
 */
public boolean isEqual(char[] charArray, int itemIndex) {
  short hashIndex = getWordItemTableIndex(charArray[0]);
  return Utility.compareArray(charArray, 1,
      wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
}
项目:read-open-source-code    文件:WordDictionary.java   
/**
 * Return true if the dictionary entry at itemIndex for table charArray[0] is charArray
 * 
 * @param charArray input word
 * @param itemIndex item index for table charArray[0]
 * @return true if the entry exists
 */
public boolean isEqual(char[] charArray, int itemIndex) {
  short hashIndex = getWordItemTableIndex(charArray[0]);
  return Utility.compareArray(charArray, 1,
      wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
}
项目:read-open-source-code    文件:WordDictionary.java   
/**
 * Return true if the dictionary entry at itemIndex for table charArray[0] is charArray
 * 
 * @param charArray input word
 * @param itemIndex item index for table charArray[0]
 * @return true if the entry exists
 */
public boolean isEqual(char[] charArray, int itemIndex) {
  short hashIndex = getWordItemTableIndex(charArray[0]);
  return Utility.compareArray(charArray, 1,
      wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
}
项目:Maskana-Gestor-de-Conocimiento    文件:WordDictionary.java   
/**
 * Return true if the dictionary entry at itemIndex for table charArray[0] is charArray
 * 
 * @param charArray input word
 * @param itemIndex item index for table charArray[0]
 * @return true if the entry exists
 */
public boolean isEqual(char[] charArray, int itemIndex) {
  short hashIndex = getWordItemTableIndex(charArray[0]);
  return Utility.compareArray(charArray, 1,
      wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
}