@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParserWithTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertNull(parser.getFamily(1)); assertNull(parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(1234l, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser .getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Override protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); }
/** * Creates a kv from the cell visibility expr specified in the ImportTSV and uses it as the * visibility tag in the kv * @param rowKeyOffset * @param rowKeyLength * @param family * @param familyOffset * @param familyLength * @param qualifier * @param qualifierOffset * @param qualifierLength * @param ts * @param put * @param lineBytes * @param columnOffset * @param columnLength * @param cellVisibilityExpr * @return KeyValue from the cell visibility expr * @throws IOException * @throws BadTsvLineException */ public KeyValue createKVFromCellVisibilityExpr(int rowKeyOffset, int rowKeyLength, byte[] family, int familyOffset, int familyLength, byte[] qualifier, int qualifierOffset, int qualifierLength, long ts, Type put, byte[] lineBytes, int columnOffset, int columnLength, String cellVisibilityExpr) throws IOException, BadTsvLineException { if(this.labels == null && cellVisibilityExpr != null) { createLabels(); } KeyValue kv = null; if (cellVisibilityExpr != null) { // Apply the expansion and parsing here List<Tag> visibilityTags = createVisibilityTags(cellVisibilityExpr); kv = new KeyValue(lineBytes, rowKeyOffset, rowKeyLength, family, familyOffset, familyLength, qualifier, qualifierOffset, qualifierLength, ts, KeyValue.Type.Put, lineBytes, columnOffset, columnLength, visibilityTags); } else { kv = new KeyValue(lineBytes, rowKeyOffset, rowKeyLength, family, familyOffset, familyLength, qualifier, qualifierOffset, qualifierLength, ts, KeyValue.Type.Put, lineBytes, columnOffset, columnLength); } return kv; }
protected KeyValue createPuts(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv = null; if (hfileOutPath == null) { kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (cellVisibilityExpr != null) { // We won't be validating the expression here. The Visibility CP will do // the validation put.setCellVisibility(new CellVisibility(cellVisibilityExpr)); } } else { kv = labelExpander.createKVFromCellVisibilityExpr( parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), cellVisibilityExpr); } put.add(kv); return kv; }
@Override protected KeyValue createPuts(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException("Invalid attributes seperator specified" + attributes); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); return kv; }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(ImportTsv.TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { Cell cell = null; if (hfileOutPath == null) { cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (cellVisibilityExpr != null) { // We won't be validating the expression here. The Visibility CP will do // the validation put.setCellVisibility(new CellVisibility(cellVisibilityExpr)); } if (ttl > 0) { put.setTTL(ttl); } } else { // Creating the KV which needs to be directly written to HFiles. Using the Facade // KVCreator for creation of kvs. cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); } put.add(cell); }
@Override protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException(msg(attributes)); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException(msg(attributes)); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); }
/** * Test cases that throw BadTsvLineException */ @Test(expected = BadTsvLineException.class) public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c"); parser.parse(line, line.length); }
@Test(expected = BadTsvLineException.class) public void testTsvParserInvalidTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(-1, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test(expected = BadTsvLineException.class) public void testTsvParserNoTimestampValue() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); assertEquals(2, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a"); parser.parse(line, line.length); }