@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParserWithTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertNull(parser.getFamily(1)); assertNull(parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(1234l, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser .getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Override protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); }
@Override protected KeyValue createPuts(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); return kv; }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(ImportTsv.TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Override protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException(msg(attributes)); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException(msg(attributes)); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); }
private void checkParsing(ParsedLine parsed, Iterable<String> expected) { ArrayList<String> parsedCols = new ArrayList<String>(); for (int i = 0; i < parsed.getColumnCount(); i++) { parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i), parsed.getColumnLength(i))); } if (!Iterables.elementsEqual(parsedCols, expected)) { fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:" + Joiner.on(",").join(parsedCols)); } }
@Test(expected = BadTsvLineException.class) public void testTsvParserInvalidTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(-1, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { TsvParser parser = new TsvParser( "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(4, parser.getCellVisibilityColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); ParsedLine parse = parser.parse(line, line.length); assertEquals(18, parse.getAttributeKeyOffset()); assertEquals(3, parser.getAttributesKeyColumnIndex()); String attributes[] = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); assertEquals(29, parse.getCellVisibilityColumnOffset()); }
private void checkParsing(ParsedLine parsed, Iterable<String> expected) { ArrayList<String> parsedCols = new ArrayList<String>(); for (int i = 0; i < parsed.getColumnCount(); i++) { parsedCols.add(Bytes.toString( parsed.getLineBytes(), parsed.getColumnOffset(i), parsed.getColumnLength(i))); } if (!Iterables.elementsEqual(parsedCols, expected)) { fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:" + Joiner.on(",").join(parsedCols)); } }