private void processRecordField( CommonRecord record, GenericRecord deltaRecord, String fieldName) { CommonRecord nextRecord = null; CommonValue nextValue = record.getField(fieldName); if (nextValue != null && nextValue.isRecord() && nextValue.getRecord().getSchema().getFullName() .equals(deltaRecord.getSchema().getFullName())) { nextRecord = nextValue.getRecord(); GenericFixed uuidFixed = (GenericFixed) deltaRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); // Checking if the uuid was changed if (!uuid.equals(nextRecord.getUuid())) { records.remove(nextRecord.getUuid()); records.put(uuid, nextRecord); nextRecord.setUuid(uuid); } } } else { nextRecord = createCommonRecord(deltaRecord); record.setField(fieldName, commonFactory.createCommonValue(nextRecord)); } updateRecord(nextRecord, deltaRecord); }
private void updateRecord(CommonRecord record, GenericRecord delta) { List<Field> deltaFields = delta.getSchema().getFields(); for (Field deltaField : deltaFields) { String fieldName = deltaField.name(); Object rawDeltaField = delta.get(fieldName); if (LOG.isDebugEnabled()) { LOG.debug("Processing field \"{}\", current value: {}", fieldName, record.getField(fieldName) != null ? record .getField(fieldName).toString() : null); } if (AvroGenericUtils.isRecord(rawDeltaField)) { processRecordField(record, (GenericRecord) rawDeltaField, fieldName); } else if (AvroGenericUtils.isArray(rawDeltaField)) { processArrayField(record, (GenericArray) rawDeltaField, fieldName); } else if (AvroGenericUtils.isEnum(rawDeltaField)) { processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName); } else if (AvroGenericUtils.isFixed(rawDeltaField)) { processFixedField(record, (GenericFixed) rawDeltaField, fieldName); } else { record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField)); } } }
@Override public synchronized void onDeltaReceived(int index, GenericRecord data, boolean fullResync) { GenericFixed uuidFixed = (GenericFixed) data.get(UUID); UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); if (LOG.isDebugEnabled()) { LOG.debug("Processing delta with uuid {}", uuidFixed.toString()); } CommonRecord currentRecord = null; if (!fullResync && records.containsKey(uuid)) { currentRecord = records.get(uuid); } else { records.clear(); currentRecord = createCommonRecord(data); rootRecord = currentRecord; } updateRecord(currentRecord, data); }
/** * Recursively removes UUIDs from the record. * * @param baseRecord The record containing UUID fields. */ public static void removeUuid(GenericRecord baseRecord) { Schema recordSchema = baseRecord.getSchema(); for (Schema.Field fieldSchema : recordSchema.getFields()) { if (baseRecord.get(fieldSchema.name()) != null) { Object field = baseRecord.get(fieldSchema.name()); if (field instanceof GenericFixed) { baseRecord.put(fieldSchema.name(), clearUuid((GenericFixed) field, fieldSchema)); } else if (field instanceof GenericRecord) { removeUuid((GenericRecord) field); } else if (field instanceof GenericArray) { GenericArray arrayField = (GenericArray) field; for (Object obj : arrayField) { if (obj instanceof GenericRecord) { removeUuid((GenericRecord) obj); } } } } } }
@Test public void testGeneration() throws Exception { // Read Configuration Schema Path schemaPath = Paths.get(Thread.currentThread().getContextClassLoader().getResource("generation/simpleSchema.json").toURI()); BaseSchema configuraionSchema = new BaseSchema(new String(Files.readAllBytes(schemaPath))); Path configurationPath = Paths.get(Thread.currentThread().getContextClassLoader().getResource("generation/simpleConfiguration.json").toURI()); String configuraion = new String(Files.readAllBytes(configurationPath)); GenericAvroConverter<GenericRecord> converter = new GenericAvroConverter<>(configuraionSchema.getRawSchema()); // generated default configuration DefaultUuidValidator uuidGenerator = new DefaultUuidValidator(configuraionSchema, new BaseDataFactory()); KaaData processedConfigurationBody = uuidGenerator.validateUuidFields(new BaseData(configuraionSchema, configuraion), null); GenericRecord processedConfiguration = converter.decodeJson(processedConfigurationBody.getRawData()); Object uuid = processedConfiguration.get(CommonConstants.UUID_FIELD); Assert.assertNotNull(uuid); Assert.assertTrue(uuid instanceof GenericFixed); Assert.assertEquals(CommonConstants.KAA_NAMESPACE + "." + CommonConstants.UUID_TYPE, ((GenericFixed) uuid).getSchema().getFullName()); }
@Test public void testValidationWithoutOldConfiguration() throws Exception { // Read Configuration Schema Path schemaPath = Paths.get(Thread.currentThread().getContextClassLoader().getResource("generation/simpleSchema.json").toURI()); BaseSchema configuraionSchema = new BaseSchema(new String(Files.readAllBytes(schemaPath))); Schema avroSchema = new Schema.Parser().parse(configuraionSchema.getRawSchema()); GenericRecord record = new GenericData.Record(avroSchema); record.put("intField", 5); GenericFixed uuid = AvroUtils.generateUuidObject(); record.put(CommonConstants.UUID_FIELD, uuid); GenericAvroConverter<GenericRecord> converter = new GenericAvroConverter<>(avroSchema); String configurationBody = converter.encodeToJson(record); DefaultUuidValidator uuidGenerator = new DefaultUuidValidator(configuraionSchema, new BaseDataFactory()); KaaData processedConfigurationBody = uuidGenerator.validateUuidFields(new BaseData(configuraionSchema, configurationBody), null); GenericRecord processedConfiguration = converter.decodeJson(processedConfigurationBody.getRawData()); Assert.assertNotEquals(processedConfiguration.get(CommonConstants.UUID_FIELD), uuid); }
@Test public void testValidationWithOldConfiguration() throws Exception { // Read Configuration Schema Path schemaPath = Paths.get(Thread.currentThread().getContextClassLoader().getResource("generation/simpleSchema.json").toURI()); BaseSchema configuraionSchema = new BaseSchema(new String(Files.readAllBytes(schemaPath))); Schema avroSchema = new Schema.Parser().parse(configuraionSchema.getRawSchema()); GenericRecord recordNew = new GenericData.Record(avroSchema); recordNew.put("intField", 4); GenericFixed uuidNew = AvroUtils.generateUuidObject(); recordNew.put(CommonConstants.UUID_FIELD, uuidNew); GenericRecord recordOld = new GenericData.Record(avroSchema); recordOld.put("intField", 5); GenericFixed uuidOld = AvroUtils.generateUuidObject(); recordOld.put(CommonConstants.UUID_FIELD, uuidOld); GenericAvroConverter<GenericRecord> converter = new GenericAvroConverter<>(avroSchema); String configurationBodyNew = converter.encodeToJson(recordNew); String configurationBodyOld = converter.encodeToJson(recordOld); DefaultUuidValidator uuidGenerator = new DefaultUuidValidator(configuraionSchema, new BaseDataFactory()); KaaData processedConfigurationBody = uuidGenerator.validateUuidFields(recordNew, recordOld); GenericRecord processedConfiguration = converter.decodeJson(processedConfigurationBody.getRawData()); Assert.assertEquals(processedConfiguration.get(CommonConstants.UUID_FIELD), uuidOld); }
public static AvroBinaryDelta getComplexFieldDelta(Schema schema) { GenericRecord delta = new GenericData.Record(getDeltaSchemaByFullName(schema, "org.kaa.config.testT")); GenericEnumSymbol unchanged = new GenericData.EnumSymbol(getSchemaByFullName(delta.getSchema().getField("testField1").schema().getTypes(), "org.kaaproject.configuration.unchangedT"), "unchanged"); GenericRecord testField2 = new GenericData.Record(getSchemaByFullName(delta.getSchema().getField("testField2").schema().getTypes(), "org.kaa.config.testRecordT")); testField2.put("testField3", 456); byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta.getSchema().getField("__uuid").schema(), rawUuid); delta.put("testField1", unchanged); delta.put("testField2", testField2); delta.put("__uuid", uuid); AvroBinaryDelta deltaExpected = new AvroBinaryDelta(schema); deltaExpected.addDelta(delta); return deltaExpected; }
@Test public void schemaOfTypeFixed() throws JsonProcessingException, IOException { Schema schema = SchemaBuilder.builder().fixed("com.foo.IPv4").size(4); JsonNode datum = mapper.readTree("\"zzzz\""); Object actualObj = JasvornoConverter.convertToAvro(datum, schema); byte[] bytes = ("zzzz").getBytes(); GenericFixed expected = new Fixed(schema, bytes); assertTrue(actualObj instanceof GenericFixed); assertThat(actualObj, is(expected)); }
@Override public GenericFixed apply(GeneratorContext input) { if (schema == null) { schema = new Schema.Parser().parse(jsonSchema); } byte[] buffer = new byte[size]; input.getRandom().nextBytes(buffer); return new GenericData.Fixed(schema, buffer); }
/** * Called to read a fixed value. Overridden to read a pig byte array. */ @Override protected Object readFixed(Object old, Schema expected, Decoder in) throws IOException { GenericFixed fixed = (GenericFixed) super.readFixed(old, expected, in); DataByteArray byteArray = new DataByteArray(fixed.bytes()); return byteArray; }
private CommonRecord createCommonRecord(GenericRecord avroRecord) { GenericFixed uuidFixed = (GenericFixed) avroRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); CommonRecord newRecord = commonFactory.createCommonRecord(uuid, avroRecord.getSchema()); records.put(uuid, newRecord); return newRecord; } else { return commonFactory.createCommonRecord(avroRecord.getSchema()); } }
/** * Creates UUID from the given GenericFixed object. * * @param fixed the fixed * @return uuid object. */ public static UUID createUuidFromFixed(GenericFixed fixed) { ByteBuffer bb = ByteBuffer.wrap(fixed.bytes()); long first = bb.getLong(); long second = bb.getLong(); return new UUID(first, second); }
/** * Checks if the given value is UUID (value's schema is "org.kaaproject.configuration.uuidT"). * * @param field object which going to be verified. * @return true if the value is UUID, false otherwise. */ public static boolean isUuid(Object field) { if (!isFixed(field)) { return false; } GenericFixed checkFixed = (GenericFixed) field; return checkFixed.getSchema().getFullName().equals(UUIDT); }
public static void fillComplexFullResyncDelta(GenericRecord delta) { GenericRecord testField2 = new GenericData.Record(getSchemaByFullName( delta.getSchema().getField("testField2").schema().getTypes(), "org.kaa.config.testRecordT")); testField2.put("testField3", 456); byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta.getSchema() .getField("__uuid").schema(), rawUuid); delta.put("testField1", "abc"); delta.put("testField2", testField2); delta.put("__uuid", uuid); }
public static void fillArrayFullResyncDelta(GenericRecord delta) { byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta.getSchema() .getField("__uuid").schema(), rawUuid); delta.put("__uuid", uuid); GenericArray testField1 = new GenericData.Array(3, getArraySchema(delta, "testField1")); delta.put("testField1", testField1); GenericRecord itemRecord1 = new GenericData.Record(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaa.config.testRecordItemT")); itemRecord1.put("testField2", 1); byte[] rawItemUuid1 = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; GenericFixed itemUuid1 = new GenericData.Fixed(itemRecord1 .getSchema().getField("__uuid").schema(), rawItemUuid1); itemRecord1.put("__uuid", itemUuid1); GenericRecord itemRecord2 = new GenericData.Record(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaa.config.testRecordItemT")); itemRecord2.put("testField2", 2); byte[] rawItemUuid2 = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2}; GenericFixed itemUuid2 = new GenericData.Fixed(itemRecord2 .getSchema().getField("__uuid").schema(), rawItemUuid2); itemRecord2.put("__uuid", itemUuid2); GenericRecord itemRecord3 = new GenericData.Record(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaa.config.testRecordItemT")); itemRecord3.put("testField2", 3); byte[] rawItemUuid3 = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3}; GenericFixed itemUuid3 = new GenericData.Fixed(itemRecord3 .getSchema().getField("__uuid").schema(), rawItemUuid3); itemRecord3.put("__uuid", itemUuid3); testField1.add(itemRecord1); testField1.add(itemRecord2); testField1.add(itemRecord3); }
public static void fillArrayItemUpdateDelta(GenericRecord item) { byte[] rawItemUuid2 = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2}; GenericFixed itemUuid2 = new GenericData.Fixed(item .getSchema().getField("__uuid").schema(), rawItemUuid2); item.put("__uuid", itemUuid2); item.put("testField2", 22); }
public static void fillArrayItemRemoveDelta(GenericRecord delta) { GenericArray testField1 = new GenericData.Array(1, getArraySchema(delta, "testField1")); byte[] rawUuidToRemove = new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; GenericFixed uuidToRemove = new GenericData.Fixed(getSchemaByFullName( testField1.getSchema().getElementType().getTypes(), "org.kaaproject.configuration.uuidT"), rawUuidToRemove); testField1.add(uuidToRemove); delta.put("testField1", testField1); }
@Test public void testArrayOfFixed() { Schema fixedSchema = Schema.createFixed("someFixed", "", "", 2); GenericArray avroArray = CommonToGeneric.createArray(createCommonArray( fixedSchema, new DefaultCommonFixed(fixedSchema, new byte[]{1, 2}))); GenericFixed avroFixed = (GenericFixed) avroArray.get(0); assertArrayEquals(new byte[]{1, 2}, avroFixed.bytes()); }
/** * Check if the field contains UUID and returns value which should be on its place. * * @param uuidField GenericFixed field which is supposed to contain UUID. * @param fieldSchema Schema of given field. * @return null if field is assumed as an UUID or field without changes otherwise */ private static GenericFixed clearUuid(GenericFixed uuidField, Schema.Field fieldSchema) { if (fieldSchema.schema().getType() == Schema.Type.UNION) { for (Schema unionedSchema : fieldSchema.schema().getTypes()) { if (unionedSchema.getFullName().equalsIgnoreCase(UUIDT)) { return null; } } } else { if (uuidField.getSchema().getFullName().equalsIgnoreCase(UUIDT)) { return null; } } return uuidField; }
/** * Processes fixed type. * * @param schemaNode schema for current type. * @return generated value for input record type. */ private Object processFixed(Schema schemaNode) { int size = schemaNode.getFixedSize(); byte[] bytes = new byte[size]; for (int i = 0; i < size; i++) { bytes[i] = (byte) 0; } GenericFixed result = new GenericData.Fixed(schemaNode, bytes); return result; }
@Test public void testValidationOfComplexTypes() throws Exception { // Read Configuration Schema Path schemaPath = Paths.get(Thread.currentThread().getContextClassLoader().getResource("generation/simpleComplexSchema.json").toURI()); BaseSchema configuraionSchema = new BaseSchema(new String(Files.readAllBytes(schemaPath))); Schema.Parser schemaParser = new Schema.Parser(); Schema avroSchema = schemaParser.parse(configuraionSchema.getRawSchema()); GenericRecord recordNew1 = new GenericData.Record(schemaParser.getTypes().get("org.kaaproject.recordT")); recordNew1.put("intField", 4); GenericFixed uuidNew1 = AvroUtils.generateUuidObject(); recordNew1.put(CommonConstants.UUID_FIELD, uuidNew1); GenericRecord rootNew = new GenericData.Record(avroSchema); rootNew.put("recordField", recordNew1); GenericRecord recordOld1 = new GenericData.Record(schemaParser.getTypes().get("org.kaaproject.recordT")); recordOld1.put("intField", 6); recordOld1.put(CommonConstants.UUID_FIELD, uuidNew1); GenericRecord rootOld = new GenericData.Record(avroSchema); rootOld.put("recordField", recordOld1); rootOld.put(CommonConstants.UUID_FIELD, AvroUtils.generateUuidObject()); GenericAvroConverter<GenericRecord> converter = new GenericAvroConverter<>(avroSchema); String configurationBodyNew = converter.encodeToJson(rootNew); String configurationBodyOld = converter.encodeToJson(rootOld); DefaultUuidValidator uuidGenerator = new DefaultUuidValidator(configuraionSchema, new BaseDataFactory()); KaaData processedConfigurationBody = uuidGenerator.validateUuidFields(rootNew, rootOld); GenericRecord processedConfiguration = converter.decodeJson(processedConfigurationBody.getRawData()); GenericRecord processedRecord = (GenericRecord) processedConfiguration.get("recordField"); Assert.assertEquals(uuidNew1, processedRecord.get(CommonConstants.UUID_FIELD)); Assert.assertNotNull(processedConfiguration.get(CommonConstants.UUID_FIELD)); }
@Test public void testValidationOfComplexWithoutOldConfiguration() throws Exception { // Read Configuration Schema Path schemaPath = Paths.get(Thread.currentThread().getContextClassLoader().getResource("generation/simpleComplexSchema.json").toURI()); BaseSchema configuraionSchema = new BaseSchema(new String(Files.readAllBytes(schemaPath))); Schema.Parser schemaParser = new Schema.Parser(); Schema avroSchema = schemaParser.parse(configuraionSchema.getRawSchema()); GenericRecord recordNew1 = new GenericData.Record(schemaParser.getTypes().get("org.kaaproject.recordT")); recordNew1.put("intField", 4); GenericFixed uuidNew1 = AvroUtils.generateUuidObject(); recordNew1.put(CommonConstants.UUID_FIELD, uuidNew1); GenericRecord rootNew = new GenericData.Record(avroSchema); rootNew.put("recordField", recordNew1); GenericAvroConverter<GenericRecord> converter = new GenericAvroConverter<>(avroSchema); String configurationBodyNew = converter.encodeToJson(rootNew); DefaultUuidValidator uuidGenerator = new DefaultUuidValidator(configuraionSchema, new BaseDataFactory()); KaaData processedConfigurationBody = uuidGenerator.validateUuidFields(rootNew, null); GenericRecord processedConfiguration = converter.decodeJson(processedConfigurationBody.getRawData()); GenericRecord processedRecord = (GenericRecord) processedConfiguration.get("recordField"); Assert.assertNotEquals(uuidNew1, processedRecord.get(CommonConstants.UUID_FIELD)); Assert.assertNotNull(processedConfiguration.get(CommonConstants.UUID_FIELD)); }
@Test public void testPrimitiveFieldsDelta() throws IOException, DeltaCalculatorException { URL protocolSchemaUrl = Thread.currentThread().getContextClassLoader().getResource("delta/primitiveFieldsDeltaProtocolSchema.json"); Schema protocolSchema = new Schema.Parser().parse(new File(protocolSchemaUrl.getPath())); URL schemaUrl = Thread.currentThread().getContextClassLoader().getResource("delta/primitiveFieldsDeltaSchema.json"); Schema schema = new Schema.Parser().parse(new File(schemaUrl.getPath())); DeltaCalculationAlgorithm calculator = new DefaultDeltaCalculationAlgorithm(protocolSchema, schema); ByteArrayOutputStream baosOld = new ByteArrayOutputStream(); URL oldConfigUrl = Thread.currentThread().getContextClassLoader().getResource("delta/primitiveFieldsDeltaCurrent.json"); IOUtils.copy(new FileInputStream(oldConfigUrl.getPath()), baosOld, 1024); String oldStr = new String(baosOld.toByteArray(), "UTF-8"); ByteArrayOutputStream baosNew = new ByteArrayOutputStream(); URL newConfigUrl = Thread.currentThread().getContextClassLoader().getResource("delta/primitiveFieldsDeltaNew.json"); IOUtils.copy(new FileInputStream(newConfigUrl.getPath()), baosNew, 1024); String newStr = new String(baosNew.toByteArray(), "UTF-8"); BaseData oldData = new BaseData(new BaseSchema(schema.toString()), oldStr); BaseData newData = new BaseData(new BaseSchema(schema.toString()), newStr); RawBinaryDelta deltaResult = calculator.calculate(oldData, newData); GenericRecord delta = new GenericData.Record(getDeltaSchemaByFullName(protocolSchema, "org.kaa.config.testT")); GenericEnumSymbol unchanged = new GenericData.EnumSymbol(getSchemaByFullName(delta.getSchema().getField("testField1").schema().getTypes(), "org.kaaproject.configuration.unchangedT"), "unchanged"); byte[] rawUuid = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; GenericFixed uuid = new GenericData.Fixed(delta.getSchema().getField("__uuid").schema(), rawUuid); delta.put("testField1", unchanged); delta.put("testField2", 456); delta.put("__uuid", uuid); AvroBinaryDelta deltaExpected = new AvroBinaryDelta(protocolSchema); deltaExpected.addDelta(delta); assertArrayEquals(deltaExpected.getData(), deltaResult.getData()); }
/** * Convert from Avro type to Sqoop's java representation of the SQL type * see SqlManager#toJavaType */ public static Object fromAvro(Object avroObject, Schema schema, String type) { if (avroObject == null) { return null; } switch (schema.getType()) { case NULL: return null; case BOOLEAN: case INT: case FLOAT: case DOUBLE: return avroObject; case LONG: if (type.equals(DATE_TYPE)) { return new Date((Long) avroObject); } else if (type.equals(TIME_TYPE)) { return new Time((Long) avroObject); } else if (type.equals(TIMESTAMP_TYPE)) { return new Timestamp((Long) avroObject); } return avroObject; case BYTES: ByteBuffer bb = (ByteBuffer) avroObject; BytesWritable bw = new BytesWritable(); bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()); if (type.equals(BLOB_REF_TYPE)) { // TODO: Should convert BytesWritable to BlobRef properly. (SQOOP-991) throw new UnsupportedOperationException("BlobRef not supported"); } return bw; case STRING: if (type.equals(BIG_DECIMAL_TYPE)) { return new BigDecimal(avroObject.toString()); } else if (type.equals(DATE_TYPE)) { return Date.valueOf(avroObject.toString()); } else if (type.equals(TIME_TYPE)) { return Time.valueOf(avroObject.toString()); } else if (type.equals(TIMESTAMP_TYPE)) { return Timestamp.valueOf(avroObject.toString()); } return avroObject.toString(); case ENUM: return avroObject.toString(); case UNION: List<Schema> types = schema.getTypes(); if (types.size() != 2) { throw new IllegalArgumentException("Only support union with null"); } Schema s1 = types.get(0); Schema s2 = types.get(1); if (s1.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s2, type); } else if (s2.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s1, type); } else { throw new IllegalArgumentException("Only support union with null"); } case FIXED: return new BytesWritable(((GenericFixed) avroObject).bytes()); case RECORD: case ARRAY: case MAP: default: throw new IllegalArgumentException("Cannot convert Avro type " + schema.getType()); } }
@Override public GenericFixed stringColumn(String value) { return new GenericData.Fixed(avroSchema, value.getBytes()); }
private GenericFixed generateFixed(Schema schema) { byte[] bytes = new byte[schema.getFixedSize()]; random.nextBytes(bytes); return new GenericData.Fixed(schema, bytes); }
public static Class propertyType(Schema fieldSchema) { if (fieldSchema.getType() == Schema.Type.UNION) { boolean hasNull = false; Set<Class> unionTypes = new HashSet<>(); for (Schema memberSchema : fieldSchema.getTypes()) { if (memberSchema.getType() == Schema.Type.NULL) { hasNull = true; } else { Class type = propertyType(memberSchema); if (type != null) { unionTypes.add(type); } } } if (unionTypes.isEmpty()) { return null; } if (unionTypes.size() == 1) { if (hasNull) { return JavaClassHelper.getBoxedType(unionTypes.iterator().next()); } return unionTypes.iterator().next(); } boolean allNumeric = true; for (Class unioned : unionTypes) { if (!JavaClassHelper.isNumeric(unioned)) { allNumeric = false; } } if (allNumeric) { return Number.class; } return Object.class; } else if (fieldSchema.getType() == Schema.Type.RECORD) { return GenericData.Record.class; } else if (fieldSchema.getType() == Schema.Type.ARRAY) { return Collection.class; } else if (fieldSchema.getType() == Schema.Type.MAP) { return Map.class; } else if (fieldSchema.getType() == Schema.Type.FIXED) { return GenericFixed.class; } else if (fieldSchema.getType() == Schema.Type.ENUM) { return GenericEnumSymbol.class; } else if (fieldSchema.getType() == Schema.Type.STRING) { String prop = fieldSchema.getProp(PROP_JAVA_STRING_KEY); return prop == null || !prop.equals(PROP_JAVA_STRING_VALUE) ? CharSequence.class : String.class; } AvroTypeDesc desc = TYPES_PER_AVRO_ORD[fieldSchema.getType().ordinal()]; return desc == null ? null : desc.getType(); }
/** * Reads the next Tuple from the Avro file. * * @return The next Tuple from the Avro file or null if end of file is * reached. */ @Override public Tuple next() throws IOException { if (!dataFileReader.hasNext()) { return null; } GenericRecord record = dataFileReader.next(); for (int i = 0; i < projectionMap.length; ++i) { int columnIndex = projectionMap[i]; Object value = record.get(columnIndex); if (value == null) { outTuple.put(i, NullDatum.get()); continue; } // Get Avro type. Schema.Field avroField = avroFields.get(columnIndex); Schema nonNullAvroSchema = getNonNull(avroField.schema()); Schema.Type avroType = nonNullAvroSchema.getType(); // Get Tajo type. Column column = schema.getColumn(columnIndex); DataType dataType = column.getDataType(); TajoDataTypes.Type tajoType = dataType.getType(); switch (avroType) { case NULL: outTuple.put(i, NullDatum.get()); break; case BOOLEAN: outTuple.put(i, DatumFactory.createBool((Boolean) value)); break; case INT: outTuple.put(i, convertInt(value, tajoType)); break; case LONG: outTuple.put(i, DatumFactory.createInt8((Long) value)); break; case FLOAT: outTuple.put(i, DatumFactory.createFloat4((Float) value)); break; case DOUBLE: outTuple.put(i, DatumFactory.createFloat8((Double) value)); break; case BYTES: outTuple.put(i, convertBytes(value, tajoType, dataType)); break; case STRING: outTuple.put(i, convertString(value, tajoType)); break; case RECORD: throw new RuntimeException("Avro RECORD not supported."); case ENUM: throw new RuntimeException("Avro ENUM not supported."); case MAP: throw new RuntimeException("Avro MAP not supported."); case UNION: throw new RuntimeException("Avro UNION not supported."); case FIXED: outTuple.put(i, new BlobDatum(((GenericFixed) value).bytes())); break; default: throw new RuntimeException("Unknown type."); } } return outTuple; }
/** * Reads the next Tuple from the Avro file. * * @return The next Tuple from the Avro file or null if end of file is * reached. */ @Override public Tuple next() throws IOException { if (!dataFileReader.hasNext()) { return null; } Tuple tuple = new VTuple(schema.size()); GenericRecord record = dataFileReader.next(); for (int i = 0; i < projectionMap.length; ++i) { int columnIndex = projectionMap[i]; Object value = record.get(columnIndex); if (value == null) { tuple.put(columnIndex, NullDatum.get()); continue; } // Get Avro type. Schema.Field avroField = avroFields.get(columnIndex); Schema nonNullAvroSchema = getNonNull(avroField.schema()); Schema.Type avroType = nonNullAvroSchema.getType(); // Get Tajo type. Column column = schema.getColumn(columnIndex); DataType dataType = column.getDataType(); TajoDataTypes.Type tajoType = dataType.getType(); switch (avroType) { case NULL: tuple.put(columnIndex, NullDatum.get()); break; case BOOLEAN: tuple.put(columnIndex, DatumFactory.createBool((Boolean)value)); break; case INT: tuple.put(columnIndex, convertInt(value, tajoType)); break; case LONG: tuple.put(columnIndex, DatumFactory.createInt8((Long)value)); break; case FLOAT: tuple.put(columnIndex, DatumFactory.createFloat4((Float)value)); break; case DOUBLE: tuple.put(columnIndex, DatumFactory.createFloat8((Double)value)); break; case BYTES: tuple.put(columnIndex, convertBytes(value, tajoType, dataType)); break; case STRING: tuple.put(columnIndex, convertString(value, tajoType)); break; case RECORD: throw new RuntimeException("Avro RECORD not supported."); case ENUM: throw new RuntimeException("Avro ENUM not supported."); case MAP: throw new RuntimeException("Avro MAP not supported."); case UNION: throw new RuntimeException("Avro UNION not supported."); case FIXED: tuple.put(columnIndex, new BlobDatum(((GenericFixed)value).bytes())); break; default: throw new RuntimeException("Unknown type."); } } return tuple; }
private Object fromAvro(Object avroObject, Schema fieldSchema, String columnType) { // map from Avro type to Sqoop's Java representation of the SQL type // see SqlManager#toJavaType if (avroObject == null) { return null; } switch (fieldSchema.getType()) { case NULL: return null; case BOOLEAN: case INT: case FLOAT: case DOUBLE: return avroObject; case LONG: if (columnType.equals(DATE_TYPE)) { return new Date((Long) avroObject); } else if (columnType.equals(TIME_TYPE)) { return new Time((Long) avroObject); } else if (columnType.equals(TIMESTAMP_TYPE)) { return new Timestamp((Long) avroObject); } return avroObject; case BYTES: ByteBuffer bb = (ByteBuffer) avroObject; BytesWritable bw = new BytesWritable(); bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()); return bw; case STRING: if (columnType.equals(BIG_DECIMAL_TYPE)) { return new BigDecimal(avroObject.toString()); } else if (columnType.equals(DATE_TYPE)) { return Date.valueOf(avroObject.toString()); } else if (columnType.equals(TIME_TYPE)) { return Time.valueOf(avroObject.toString()); } else if (columnType.equals(TIMESTAMP_TYPE)) { return Timestamp.valueOf(avroObject.toString()); } return avroObject.toString(); case ENUM: return ((GenericEnumSymbol) avroObject).toString(); case UNION: List<Schema> types = fieldSchema.getTypes(); if (types.size() != 2) { throw new IllegalArgumentException("Only support union with null"); } Schema s1 = types.get(0); Schema s2 = types.get(1); if (s1.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s2, columnType); } else if (s2.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s1, columnType); } else { throw new IllegalArgumentException("Only support union with null"); } case FIXED: return new BytesWritable(((GenericFixed) avroObject).bytes()); case RECORD: case ARRAY: case MAP: default: throw new IllegalArgumentException("Cannot convert Avro type " + fieldSchema.getType()); } }
/** * Calls an appropriate write method based on the value. * Value must not be null and the schema must not be nullable. * * @param type a Parquet type * @param avroSchema a non-nullable Avro schema * @param value a non-null value to write */ @SuppressWarnings("unchecked") private void writeValueWithoutConversion(Type type, Schema avroSchema, Object value) { switch (avroSchema.getType()) { case BOOLEAN: recordConsumer.addBoolean((Boolean) value); break; case INT: if (value instanceof Character) { recordConsumer.addInteger((Character) value); } else { recordConsumer.addInteger(((Number) value).intValue()); } break; case LONG: recordConsumer.addLong(((Number) value).longValue()); break; case FLOAT: recordConsumer.addFloat(((Number) value).floatValue()); break; case DOUBLE: recordConsumer.addDouble(((Number) value).doubleValue()); break; case FIXED: recordConsumer.addBinary(Binary.fromReusedByteArray(((GenericFixed) value).bytes())); break; case BYTES: if (value instanceof byte[]) { recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) value)); } else { recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) value)); } break; case STRING: recordConsumer.addBinary(fromAvroString(value)); break; case RECORD: writeRecord(type.asGroupType(), avroSchema, value); break; case ENUM: recordConsumer.addBinary(Binary.fromString(value.toString())); break; case ARRAY: listWriter.writeList(type.asGroupType(), avroSchema, value); break; case MAP: writeMap(type.asGroupType(), avroSchema, (Map<CharSequence, ?>) value); break; case UNION: writeUnion(type.asGroupType(), avroSchema, value); break; } }
@Test public void testAll() throws Exception { Schema schema = new Schema.Parser().parse( Resources.getResource("all.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); GenericData.Record nestedRecord = new GenericRecordBuilder( schema.getField("mynestedrecord").schema()) .set("mynestedint", 1).build(); List<Integer> integerArray = Arrays.asList(1, 2, 3); GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>( Schema.createArray(Schema.create(Schema.Type.INT)), integerArray); GenericFixed genericFixed = new GenericData.Fixed( Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 }); List<Integer> emptyArray = new ArrayList<Integer>(); ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build(); GenericData.Record record = new GenericRecordBuilder(schema) .set("mynull", null) .set("myboolean", true) .set("myint", 1) .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") .set("myarray", genericIntegerArray) .set("myemptyarray", emptyArray) .set("myoptionalarray", genericIntegerArray) .set("myarrayofoptional", genericIntegerArray) .set("mymap", ImmutableMap.of("a", 1, "b", 2)) .set("myemptymap", emptyMap) .set("myfixed", genericFixed) .build(); writer.write(record); writer.close(); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file); GenericRecord nextRecord = reader.read(); Object expectedEnumSymbol = compat ? "a" : new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a"); assertNotNull(nextRecord); assertEquals(null, nextRecord.get("mynull")); assertEquals(true, nextRecord.get("myboolean")); assertEquals(1, nextRecord.get("myint")); assertEquals(2L, nextRecord.get("mylong")); assertEquals(3.1f, nextRecord.get("myfloat")); assertEquals(4.1, nextRecord.get("mydouble")); assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes")); assertEquals(str("hello"), nextRecord.get("mystring")); assertEquals(expectedEnumSymbol, nextRecord.get("myenum")); assertEquals(nestedRecord, nextRecord.get("mynestedrecord")); assertEquals(integerArray, nextRecord.get("myarray")); assertEquals(emptyArray, nextRecord.get("myemptyarray")); assertEquals(integerArray, nextRecord.get("myoptionalarray")); assertEquals(integerArray, nextRecord.get("myarrayofoptional")); assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap")); assertEquals(emptyMap, nextRecord.get("myemptymap")); assertEquals(genericFixed, nextRecord.get("myfixed")); }
@Test public void testArrayWithNullValues() throws Exception { Schema schema = new Schema.Parser().parse( Resources.getResource("all.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); GenericData.Record nestedRecord = new GenericRecordBuilder( schema.getField("mynestedrecord").schema()) .set("mynestedint", 1).build(); List<Integer> integerArray = Arrays.asList(1, 2, 3); GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>( Schema.createArray(Schema.create(Schema.Type.INT)), integerArray); GenericFixed genericFixed = new GenericData.Fixed( Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 }); List<Integer> emptyArray = new ArrayList<Integer>(); ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build(); Schema arrayOfOptionalIntegers = Schema.createArray( optional(Schema.create(Schema.Type.INT))); GenericData.Array<Integer> genericIntegerArrayWithNulls = new GenericData.Array<Integer>( arrayOfOptionalIntegers, Arrays.asList(1, null, 2, null, 3)); GenericData.Record record = new GenericRecordBuilder(schema) .set("mynull", null) .set("myboolean", true) .set("myint", 1) .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") .set("myarray", genericIntegerArray) .set("myemptyarray", emptyArray) .set("myoptionalarray", genericIntegerArray) .set("myarrayofoptional", genericIntegerArrayWithNulls) .set("mymap", ImmutableMap.of("a", 1, "b", 2)) .set("myemptymap", emptyMap) .set("myfixed", genericFixed) .build(); final AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); try { writer.write(record); fail("Should not succeed writing an array with null values"); } catch (Exception e) { Assert.assertTrue("Error message should provide context and help", e.getMessage().contains("parquet.avro.write-old-list-structure")); } finally { writer.close(); } }