private void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException { // Need to find numKeyFields separators int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator); for(int k=1; k<numKeyFields && pos!=-1; k++) { pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator); } try { if (pos == -1) { key.set(line, 0, length); val.set(""); } else { StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length); } } catch (CharacterCodingException e) { throw new IOException(StringUtils.stringifyException(e)); } }
/** * Split a line into key and value. * @param line: a byte array of line containing UTF-8 bytes * @param key: key of a record * @param val: value of a record * @throws IOException */ void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException { int numKeyFields = getNumOfKeyFields(); byte[] separator = getFieldSeparator(); // Need to find numKeyFields separators int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator); for(int k=1; k<numKeyFields && pos!=-1; k++) { pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator); } try { if (pos == -1) { key.set(line, 0, length); val.set(""); } else { StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length); } } catch (CharacterCodingException e) { LOG.warn(StringUtils.stringifyException(e)); } }
void splitKeyVal(byte[] line, int length, BytesWritable key, BytesWritable val) throws IOException { int numKeyFields = getNumOfKeyFields(); byte[] separator = getFieldSeparator(); // Need to find numKeyFields separators int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator); for(int k=1; k<numKeyFields && pos!=-1; k++) { pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator); } try { if (pos == -1) { key.set(line, 0, length); val.set(new byte[0], 0, 0); } else { StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length); } } catch (CharacterCodingException e) { LOG.warn(StringUtils.stringifyException(e)); } }
public int[] getWordLengths(byte []b, int start, int end) { //Given a string like "hello how are you", it returns an array //like [4 5, 3, 3, 3], where the first element is the number of //fields if (!keySpecSeen) { //if there were no key specs, then the whole key is one word return new int[] {1}; } int[] lengths = new int[10]; int currLenLengths = lengths.length; int idx = 1; int pos; while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, keyFieldSeparator)) != -1) { if (++idx == currLenLengths) { int[] temp = lengths; lengths = new int[(currLenLengths = currLenLengths*2)]; System.arraycopy(temp, 0, lengths, 0, temp.length); } lengths[idx - 1] = pos - start; start = pos + 1; } if (start != end) { lengths[idx] = end - start; } lengths[0] = idx; //number of words is the first element return lengths; }