I have been trying to create Spark Dataset using case classes that contain Enums but I\'m not able to. I\'m using Spark version 1.6.0. The exceptions is complaining about th
You can create your own encoder:
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
object MyEnum extends Enumeration {
type MyEnum = Value
val Hello, World = Value
}
case class MyData(field: String, other: MyEnum.Value)
object MyDataEncoders {
implicit def myDataEncoder: org.apache.spark.sql.Encoder[MyData] =
org.apache.spark.sql.Encoders.kryo[MyData]
}
object EnumTest {
import MyDataEncoders._
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("test").setMaster("local[*]")
val sc = new SparkContext(sparkConf)
val sqlCtx = new SQLContext(sc)
import sqlCtx.implicits._
val df = sc.parallelize(Array(MyData("hello", MyEnum.World))).toDS()
println(s"df: ${df.collect().mkString(",")}}")
}
}