I have nested JSON and like to have output in tabular structure. I am able to parse the JSON values individually , but having some problems in tabularizing it. I am able to
HI below is the "DATAFRAME" ONLY Solution which I developed. Looking for complete "RDD ONLY" solution
def main (Args : Array[String]):Unit = { val conf = new SparkConf().setAppName("JSON Read and Write using Spark DataFrame few more options").setMaster("local[1]") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) val sourceJsonDF = sqlContext.read.json("product.json") val jsonFlatDF_level = sourceJsonDF.withColumn("explode_states",explode($"level.states")) .withColumn("explode_link",explode($"level._link")) .select($"level.productReference.TPNB".as("TPNB"), $"level.productReference.unitOfMeasure".as("level_unitOfMeasure"), $"level.locationReference.location".as("level_location"), $"level.locationReference.type".as("level_type"), $"explode_states.state".as("level_state"), $"explode_states.effectiveDateTime".as("level_effectiveDateTime"), $"explode_states.stockQuantity.quantity".as("level_quantity"), $"explode_states.stockQuantity.stockKeepingLevel".as("level_stockKeepingLevel"), $"explode_link.rel".as("level_rel"), $"explode_link.href".as("level_href"), $"explode_link.method".as("level_method")) jsonFlatDF_oldLevel.show() }