The only solution I have found to work,so far, is to reload the data with a modified schema. The new schema will load the attributes into a map.
Dataset<Row> newData = sql.read().json(path);
StructType newSchema = (StructType) toMapType(newData.schema(), null, "attributes");
newData = sql.read().schema(newSchema).json(path);
private DataType toMapType(DataType dataType, String fullColName, String col) {
if (dataType instanceof StructType) {
StructType structType = (StructType) dataType;
List<StructField> renamed = Arrays.stream(structType.fields()).map(
f -> toMapType(f, fullColName == null ? f.name() : fullColName + "." + f.name(), col)).collect(Collectors.toList());
return new StructType(renamed.toArray(new StructField[renamed.size()]));
}
return dataType;
}
private StructField toMapType(StructField structField, String fullColName, String col) {
if (fullColName.equals(col)) {
return new StructField(col, new MapType(DataTypes.StringType, DataTypes.LongType, true), true, Metadata.empty());
} else if (col.startsWith(fullColName)) {
return new StructField(structField.name(), toMapType(structField.dataType(), fullColName, col), structField.nullable(), structField.metadata());
}
return structField;
}