How to create Spark Dataset or Dataframe from case classes that contains Enums

前端 未结 1 369
天命终不由人
天命终不由人 2021-01-02 07:18

I have been trying to create Spark Dataset using case classes that contain Enums but I\'m not able to. I\'m using Spark version 1.6.0. The exceptions is complaining about th

1条回答
  •  时光说笑
    2021-01-02 08:07

    You can create your own encoder:

    import org.apache.spark.sql.SQLContext
    import org.apache.spark.{SparkConf, SparkContext}
    
    object MyEnum extends Enumeration {
      type MyEnum = Value
      val Hello, World = Value
    }
    
    case class MyData(field: String, other: MyEnum.Value)
    
    object MyDataEncoders {
      implicit def myDataEncoder: org.apache.spark.sql.Encoder[MyData] =
        org.apache.spark.sql.Encoders.kryo[MyData]
    }  
    
    object EnumTest {
      import MyDataEncoders._
    
      def main(args: Array[String]): Unit = {
        val sparkConf = new SparkConf().setAppName("test").setMaster("local[*]")
        val sc = new SparkContext(sparkConf)
        val sqlCtx = new SQLContext(sc)
    
        import sqlCtx.implicits._
    
        val df = sc.parallelize(Array(MyData("hello", MyEnum.World))).toDS()
    
        println(s"df: ${df.collect().mkString(",")}}")
      }
    }
    

    0 讨论(0)
提交回复
热议问题