Spark streaming - consuming message from socket and processing: Null Pointer Exception

大城市里の小女人 提交于 2021-02-11 14:21:48

问题


Need the message from the socket using spark streaming and read the file from filepath specified in the message and write to the dst.

Message from socket : {"fileName" : "sampleFile.dat","filePath":"/Users/Desktop/test/abc1.dat","fileDst":"/Users/Desktop/git/spark-streaming-poc/src/main/resourcs/samplefile2"}

Error:

java.lang.NullPointerException
    at org.apache.spark.sql.execution.SparkPlan.sparkContext(SparkPlan.scala:56)
    at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.metrics$lzycompute(ShuffleExchangeExec.scala:51)
    at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.metrics(ShuffleExchangeExec.scala:50)
    at org.apache.spark.sql.execution.SparkPlan.longMetric(SparkPlan.scala:91)
    at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.<init>(ShuffleExchangeExec.scala:67)
    at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.apply(ShuffleExchangeExec.scala:138)
    at org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:598)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:63)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:63)
    at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.Iterator$class.foreach(Iterator.scala:891)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
    at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
    at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1334)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
    at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.Iterator$class.foreach(Iterator.scala:891)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
    at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
    at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1334)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
    at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
    at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
    at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
    at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
    at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$toString$3.apply(QueryExecution.scala:207)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$toString$3.apply(QueryExecution.scala:207)
    at org.apache.spark.sql.execution.QueryExecution.stringOrError(QueryExecution.scala:99)
    at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:207)
    at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:75)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
    at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668)
    at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228)
    at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:656)
    at StreamDriver.processRecord(StreamDriver.java:101)
    at StreamDriver$1.process(StreamDriver.java:157)
    at StreamDriver$1.process(StreamDriver.java:148)
    at org.apache.spark.sql.execution.streaming.sources.ForeachDataWriter.write(ForeachWriterProvider.scala:120)
    at org.apache.spark.sql.execution.streaming.sources.ForeachDataWriter.write(ForeachWriterProvider.scala:106)
    at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$anonfun$run$3.apply(WriteToDataSourceV2Exec.scala:118)
    at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$anonfun$run$3.apply(WriteToDataSourceV2Exec.scala:116)
    at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
    at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:146)
    at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:67)
    at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec$$anonfun$doExecute$2.apply(WriteToDataSourceV2Exec.scala:66)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
    at org.apache.spark.scheduler.Task.run(Task.scala:121)
    at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

Code :

public static void processRecord(Row value){

        System.out.println(value.getString(0));
        System.out.println(value.getString(1));
        System.out.println(value.getString(2));

        Dataset<Row> ds1 = null;

                try {
                    ds1 = spark.read().schema(createSchema(convertJsonToMap()))
                                                                  //.option("inferschema","true")
                                                                  .option("header","false")
                                                                  .option("delimiter", "|")
                                                                  .csv(value.getString(1));
                } catch (IOException e) {
                    e.printStackTrace();
                }
                ds1.write().mode(SaveMode.Overwrite).option("header","true").csv(value.getString(2));

//        try {
//            ds1 = spark.readStream()
//                    .format("csv")
//                    .schema(createSchema(convertJsonToMap()))
//                    .option("inferSchema", "true")
//                    .option("header", "true")
//                    .option("delimiter", "|")
//                    .load(value.getString(1));
//        } catch (IOException e) {
//            e.printStackTrace();
//        }
//
//
//        StreamingQuery  query1 =
//                ds1.writeStream()
//                        .format("csv")
//                        //   .mode(SaveMode.Append)
//                        .outputMode(OutputMode.Append())
//                        .option("checkpointLocation", "/Users/kotireddy/git/spark-streaming-poc/src/main/resources")
//                        .option("path",value.getString(2))
//                        .start();
//        try {
//            query1.awaitTermination();
//        } catch (StreamingQueryException e) {
//            e.printStackTrace();
//        }
                
    }

    public static void main(String[] args) throws InterruptedException, StreamingQueryException {

        StreamingQuery query = null;
        spark.sparkContext().setCheckpointDir("/Users/Desktop/git/spark-streaming-poc/src/main/resources");

        Dataset<Row> ds = spark.readStream()
                .format("socket")
                .option("host", "localhost")
                .option("port", "12345")
                .load();


        Dataset<Row>  ds2 = ds.select(from_json(col("value").cast("string"), getSchema()).as("data"))
                .select(col("data.fileName"), col("data.filePath"), col("data.fileDst"));


         query = ds2.writeStream().foreach(new ForeachWriter<Row>() {

             @Override
             public boolean open(long partitionId, long version) {
                 return true;
             }

            @Override
            public void process(Row value) {
                processRecord(value);
            }

            @Override
            public void close(Throwable errorOrNull) {
               //errorOrNull.getSuppressed();
            }
         }).start();

        query.awaitTermination();

    }
}

来源:https://stackoverflow.com/questions/64863847/spark-streaming-consuming-message-from-socket-and-processing-null-pointer-exc

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!