public static Schema getSchema(SeekableInput input) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(input, datumReader); Schema schema = dataFileReader.getSchema(); if (PadDefaultNullsToSchema) { // a list of "cloned" fields, with optional default value set to null ArrayList<Field> paddedFields = new ArrayList<Field>(); for (Field field: schema.getFields()) { // should this field be padded? boolean needsNullPadding = (field.schema() != null) // the field has nested schema && (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION && (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue(); Field f = new Field(field.name(), field.schema(), field.doc(), defValue); paddedFields.add(f); } schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); schema.setFields(paddedFields); } return schema; }
public static SchemaComponent deSchema(String name, JsonNode n) { if (n.isObject()) return deSchema(name, (ObjectNode)n); if (n.isArray()) return deSchema(name, (ArrayNode)n); if (n.isBoolean()) return deSchema(name, (BooleanNode)n); if(n.isInt()) return deSchema(name, (IntNode)n); if (n.isFloatingPointNumber()) return deSchema(name, (DoubleNode)n); if (n.isNull()) return deSchema(name, (NullNode)n); return null; }
/** * It takes a record structure in <String, HiveTypeInfo> format. * Generate a schema and return in String. * @param record : record structure * @return String representation of Avro schema. * @throws StageException: If record contains unsupported type */ @Override public String inferSchema(Map<String, HiveTypeInfo> record) throws StageException { Map<String, Schema> fields = new LinkedHashMap<>(); for(Map.Entry<String, HiveTypeInfo> pair: record.entrySet()) { if(!HiveMetastoreUtil.validateObjectName(pair.getKey())) { throw new HiveStageCheckedException(Errors.HIVE_30, pair.getKey()); } Schema columnSchema = Schema.createUnion(ImmutableList.of(Schema.create(Schema.Type.NULL), traverse(pair))); // We always set default value to null columnSchema.addProp("default", NullNode.getInstance()); fields.put(pair.getKey(), columnSchema); } Schema schema = buildSchema(fields); return schema.toString(); }
private Schema convertFields(String name, List<Type> parquetFields) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (Type parquetType : parquetFields) { Schema fieldSchema = convertField(parquetType); if (parquetType.isRepetition(REPEATED)) { throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType); } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) { fields.add(new Schema.Field( parquetType.getName(), optional(fieldSchema), null, NullNode.getInstance())); } else { // REQUIRED fields.add(new Schema.Field(parquetType.getName(), fieldSchema, null, null)); } } Schema schema = Schema.createRecord(name, null, null, false); schema.setFields(fields); return schema; }
@Test public void testUnionOfTwoTypes() throws Exception { Schema schema = Schema.createRecord("record2", null, null, false); Schema multipleTypes = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type .NULL), Schema.create(INT), Schema.create(Schema.Type.FLOAT))); schema.setFields(Arrays.asList( new Schema.Field("myunion", multipleTypes, null, NullNode.getInstance()))); // Avro union is modelled using optional data members of the different // types. This does not translate back into an Avro union testAvroToParquetConversion( schema, "message record2 {\n" + " optional group myunion {\n" + " optional int32 member0;\n" + " optional float member1;\n" + " }\n" + "}\n"); }
@Test public void testArrayOfOptionalRecords() throws Exception { Schema innerRecord = Schema.createRecord("element", null, null, false); Schema optionalString = optional(Schema.create(Schema.Type.STRING)); innerRecord.setFields(Lists.newArrayList( new Schema.Field("s1", optionalString, null, NullNode.getInstance()), new Schema.Field("s2", optionalString, null, NullNode.getInstance()) )); Schema schema = Schema.createRecord("HasArray", null, null, false); schema.setFields(Lists.newArrayList( new Schema.Field("myarray", Schema.createArray(optional(innerRecord)), null, null) )); System.err.println("Avro schema: " + schema.toString(true)); testRoundTripConversion(NEW_BEHAVIOR, schema, "message HasArray {\n" + " required group myarray (LIST) {\n" + " repeated group list {\n" + " optional group element {\n" + " optional binary s1 (UTF8);\n" + " optional binary s2 (UTF8);\n" + " }\n" + " }\n" + " }\n" + "}\n"); }
@Test public void testArrayOfOptionalRecordsOldBehavior() throws Exception { Schema innerRecord = Schema.createRecord("InnerRecord", null, null, false); Schema optionalString = optional(Schema.create(Schema.Type.STRING)); innerRecord.setFields(Lists.newArrayList( new Schema.Field("s1", optionalString, null, NullNode.getInstance()), new Schema.Field("s2", optionalString, null, NullNode.getInstance()) )); Schema schema = Schema.createRecord("HasArray", null, null, false); schema.setFields(Lists.newArrayList( new Schema.Field("myarray", Schema.createArray(optional(innerRecord)), null, null) )); System.err.println("Avro schema: " + schema.toString(true)); // Cannot use round-trip assertion because InnerRecord optional is removed testAvroToParquetConversion(schema, "message HasArray {\n" + " required group myarray (LIST) {\n" + " repeated group array {\n" + " optional binary s1 (UTF8);\n" + " optional binary s2 (UTF8);\n" + " }\n" + " }\n" + "}\n"); }
public Schema generate(String schemaNameOverride) throws IOException { ClassWriter classWriter = new ClassWriter(options, connManager, tableName, null); Map<String, Integer> columnTypes = classWriter.getColumnTypes(); String[] columnNames = classWriter.getColumnNames(columnTypes); List<Field> fields = new ArrayList<Field>(); for (String columnName : columnNames) { String cleanedCol = AvroUtil.toAvroIdentifier(ClassWriter.toJavaIdentifier(columnName)); int sqlType = columnTypes.get(columnName); Schema avroSchema = toAvroSchema(sqlType, columnName); Field field = new Field(cleanedCol, avroSchema, null, NullNode.getInstance()); field.addProp("columnName", columnName); field.addProp("sqlType", Integer.toString(sqlType)); fields.add(field); } TableClassName tableClassName = new TableClassName(options); String shortClassName = tableClassName.getShortClassForTable(tableName); String avroTableName = (tableName == null ? TableClassName.QUERY_RESULT : tableName); String avroName = schemaNameOverride != null ? schemaNameOverride : (shortClassName == null ? avroTableName : shortClassName); String avroNamespace = tableClassName.getPackageForTable(); String doc = "Sqoop import of " + avroTableName; Schema schema = Schema.createRecord(avroName, doc, avroNamespace, false); schema.setFields(fields); schema.addProp("tableName", avroTableName); return schema; }
private Schema updateUnionFields(Schema schema) { Schema updatedSchema = schema; List<Schema.Field> fields = schema.getFields(); boolean hasUnionType = false; List<Schema.Field> updatedFields = new ArrayList<>(fields.size()); for (Schema.Field field : fields) { Schema fieldSchema = field.schema(); Schema.Field updatedField = field; // if it is union and first type is null then set default value as null if (fieldSchema.getType() == Schema.Type.UNION && fieldSchema.getTypes().get(0).getType() == Schema.Type.NULL) { updatedField = new Schema.Field(field.name(), fieldSchema, field.doc(), NullNode.getInstance(), field.order()); hasUnionType = true; } updatedFields.add(updatedField); } if (hasUnionType) { updatedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); updatedSchema.setFields(updatedFields); for (String alias : schema.getAliases()) { updatedSchema.addAlias(alias); } for (Map.Entry<String, org.codehaus.jackson.JsonNode> nodeEntry : schema.getJsonProps().entrySet()) { updatedSchema.addProp(nodeEntry.getKey(), nodeEntry.getValue()); } } return updatedSchema; }
/** * Returns default value for given field or null if no default value should be used. */ private JsonNode getDefaultValue(Schema schema) { if(defaultValuesForTypes.containsKey(schema.getType())) { return defaultValuesForTypes.get(schema.getType()); } if(getConfig().avroNullableFields && getConfig().avroDefaultNullable) { return NullNode.getInstance(); } return null; }
protected JsonNode _bindAsTree(JsonParser paramJsonParser) throws IOException, JsonParseException, JsonMappingException { JsonToken localJsonToken = _initForReading(paramJsonParser); if ((localJsonToken == JsonToken.VALUE_NULL) || (localJsonToken == JsonToken.END_ARRAY) || (localJsonToken == JsonToken.END_OBJECT)); DeserializationContext localDeserializationContext; for (Object localObject = NullNode.instance; ; localObject = (JsonNode)_findRootDeserializer(this._config, JSON_NODE_TYPE).deserialize(paramJsonParser, localDeserializationContext)) { paramJsonParser.clearCurrentToken(); return localObject; localDeserializationContext = _createDeserializationContext(paramJsonParser, this._config); } }
public JsonNode readTree(InputStream paramInputStream) throws IOException, JsonProcessingException { Object localObject = (JsonNode)readValue(paramInputStream, JSON_NODE_TYPE); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(Reader paramReader) throws IOException, JsonProcessingException { Object localObject = (JsonNode)readValue(paramReader, JSON_NODE_TYPE); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(String paramString) throws IOException, JsonProcessingException { Object localObject = (JsonNode)readValue(paramString, JSON_NODE_TYPE); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(JsonParser paramJsonParser, DeserializationConfig paramDeserializationConfig) throws IOException, JsonProcessingException { Object localObject = (JsonNode)_readValue(paramDeserializationConfig, paramJsonParser, JSON_NODE_TYPE); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(File paramFile) throws IOException, JsonParseException { Object localObject = (JsonNode)objectMapper().readValue(paramFile, JsonNode.class); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(InputStream paramInputStream) throws IOException, JsonParseException { Object localObject = (JsonNode)objectMapper().readValue(paramInputStream, JsonNode.class); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(Reader paramReader) throws IOException, JsonParseException { Object localObject = (JsonNode)objectMapper().readValue(paramReader, JsonNode.class); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(String paramString) throws IOException, JsonParseException { Object localObject = (JsonNode)objectMapper().readValue(paramString, JsonNode.class); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(URL paramURL) throws IOException, JsonParseException { Object localObject = (JsonNode)objectMapper().readValue(paramURL, JsonNode.class); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
public JsonNode readTree(byte[] paramArrayOfByte) throws IOException, JsonParseException { Object localObject = (JsonNode)objectMapper().readValue(paramArrayOfByte, 0, paramArrayOfByte.length, JsonNode.class); if (localObject == null) localObject = NullNode.instance; return (JsonNode)localObject; }
@Test public void testOptionalFields() throws Exception { Schema schema = Schema.createRecord("record1", null, null, false); Schema optionalInt = optional(Schema.create(INT)); schema.setFields(Arrays.asList( new Schema.Field("myint", optionalInt, null, NullNode.getInstance()) )); testRoundTripConversion( schema, "message record1 {\n" + " optional int32 myint;\n" + "}\n"); }
@Test public void testOldAvroListOfLists() throws Exception { Schema listOfLists = optional(Schema.createArray(Schema.createArray( Schema.create(INT)))); Schema schema = Schema.createRecord("AvroCompatListInList", null, null, false); schema.setFields(Lists.newArrayList( new Schema.Field("listOfLists", listOfLists, null, NullNode.getInstance()) )); System.err.println("Avro schema: " + schema.toString(true)); testRoundTripConversion(schema, "message AvroCompatListInList {\n" + " optional group listOfLists (LIST) {\n" + " repeated group array (LIST) {\n" + " repeated int32 array;\n" + " }\n" + " }\n" + "}"); // Cannot use round-trip assertion because 3-level representation is used testParquetToAvroConversion(NEW_BEHAVIOR, schema, "message AvroCompatListInList {\n" + " optional group listOfLists (LIST) {\n" + " repeated group array (LIST) {\n" + " repeated int32 array;\n" + " }\n" + " }\n" + "}"); }
@Test public void testOldThriftListOfLists() throws Exception { Schema listOfLists = optional(Schema.createArray(Schema.createArray( Schema.create(INT)))); Schema schema = Schema.createRecord("ThriftCompatListInList", null, null, false); schema.setFields(Lists.newArrayList( new Schema.Field("listOfLists", listOfLists, null, NullNode.getInstance()) )); System.err.println("Avro schema: " + schema.toString(true)); // Cannot use round-trip assertion because repeated group names differ testParquetToAvroConversion(schema, "message ThriftCompatListInList {\n" + " optional group listOfLists (LIST) {\n" + " repeated group listOfLists_tuple (LIST) {\n" + " repeated int32 listOfLists_tuple_tuple;\n" + " }\n" + " }\n" + "}"); // Cannot use round-trip assertion because 3-level representation is used testParquetToAvroConversion(NEW_BEHAVIOR, schema, "message ThriftCompatListInList {\n" + " optional group listOfLists (LIST) {\n" + " repeated group listOfLists_tuple (LIST) {\n" + " repeated int32 listOfLists_tuple_tuple;\n" + " }\n" + " }\n" + "}"); }
@Test public void testUnknownTwoLevelListOfLists() throws Exception { // This tests the case where we don't detect a 2-level list by the repeated // group's name, but it must be 2-level because the repeated group doesn't // contain an optional or repeated element as required for 3-level lists Schema listOfLists = optional(Schema.createArray(Schema.createArray( Schema.create(INT)))); Schema schema = Schema.createRecord("UnknownTwoLevelListInList", null, null, false); schema.setFields(Lists.newArrayList( new Schema.Field("listOfLists", listOfLists, null, NullNode.getInstance()) )); System.err.println("Avro schema: " + schema.toString(true)); // Cannot use round-trip assertion because repeated group names differ testParquetToAvroConversion(schema, "message UnknownTwoLevelListInList {\n" + " optional group listOfLists (LIST) {\n" + " repeated group mylist (LIST) {\n" + " repeated int32 innerlist;\n" + " }\n" + " }\n" + "}"); // Cannot use round-trip assertion because 3-level representation is used testParquetToAvroConversion(NEW_BEHAVIOR, schema, "message UnknownTwoLevelListInList {\n" + " optional group listOfLists (LIST) {\n" + " repeated group mylist (LIST) {\n" + " repeated int32 innerlist;\n" + " }\n" + " }\n" + "}"); }
public JsonNode readTree(File src) throws IOException, JsonParseException { JsonNode n = objectMapper().readValue(src, JsonNode.class); return (n == null) ? NullNode.instance : n; }
private Object processElement(final String key, final JsonNode element) { Object rval = null; if (element instanceof ObjectNode) { Map<String, Object> r2 = processObject((ObjectNode) element); // convert TreeMap entries into Link instances. if (key.equals(Entity.LINKS_KEY)) { String refName = (String) r2.get(Link.LINK_RESOURCE_KEY); String hrefString = (String) r2.get(Link.LINK_HREF_KEY); try { rval = new BasicLink(refName, new URL(hrefString)); } catch (MalformedURLException e) { rval = r2; } } else { rval = r2; } } else if (element instanceof NullNode) { rval = null; } else if (element instanceof ArrayNode) { rval = processArray(key, (ArrayNode) element); } else { rval = processPrimitive(element); } return rval; }
public Schema handleUnionFieldsWithNull(Schema schema, Set<String> visitingTypes) { if (visitingTypes.contains(schema.getFullName())) { return schema; } visitingTypes.add(schema.getFullName()); Schema updatedRootSchema = schema; if (schema.getType() == RECORD) { List<Schema.Field> fields = updatedRootSchema.getFields(); List<Schema.Field> updatedFields = new ArrayList<>(fields.size()); boolean hasUnionType = false; for (Schema.Field field : fields) { Schema fieldSchema = field.schema(); // check for union boolean currentFieldTypeIsUnion = fieldSchema.getType() == Schema.Type.UNION; if (currentFieldTypeIsUnion) { // check for the fields with in union // if it is union and first type is null then set default value as null if (fieldSchema.getTypes().get(0).getType() == Schema.Type.NULL) { hasUnionType = true; } } else { // go through non-union fields, which may be records Schema updatedFieldSchema = handleUnionFieldsWithNull(fieldSchema, visitingTypes); if (fieldSchema != updatedFieldSchema) { hasUnionType = true; } } updatedFields.add(new Schema.Field(field.name(), fieldSchema, field.doc(), currentFieldTypeIsUnion ? NullNode.getInstance() : field.defaultValue(), field.order())); } if (hasUnionType) { updatedRootSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); updatedRootSchema.setFields(updatedFields); for (String alias : schema.getAliases()) { updatedRootSchema.addAlias(alias); } for (Map.Entry<String, org.codehaus.jackson.JsonNode> nodeEntry : schema.getJsonProps().entrySet()) { updatedRootSchema.addProp(nodeEntry.getKey(), nodeEntry.getValue()); } } } return updatedRootSchema; }
private static SchemaComponent deSchema(String name, NullNode n) { NullSC schema = new NullSC(); return schema; }