问题
When I use log4j2 kafka appender with my spark streaming code, it throws below error when spark.sql task is executed.
Caused by: org.apache.kafka.common.KafkaException: org.apache.kafka.common.serialization.ByteArraySerializer is not an instance of org.apache.kafka.common.serialization.Serializer
at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstance(AbstractConfig.java:372)
The spark streaming code is written in scala. My build.gradle.kts has below dependencies. I am using spark 2.4.3 version and scala 2.11
implementation("org.apache.kafka:kafka_2.11:2.3.0")
implementation("org.scala-lang:scala-library:$scalaVersion")
implementation("org.apache.spark:spark-streaming_$sparkScalaVersion")
implementation("org.apache.spark:spark-streaming-kafka-$sparkKafkaVersion")
implementation("org.apache.spark:spark-core_$sparkScalaVersion")
implementation("org.apache.spark:spark-sql_$sparkScalaVersion")
implementation("org.apache.spark:spark-hive_$sparkScalaVersion")
implementation("com.google.inject:guice:$guiceVersion")
implementation("org.apache.logging.log4j:log4j-slf4j-impl:2.10.0")
The application streams logs from the spark driver. But not from the executor. It fails during initialization
Exception in thread "main" org.apache.spark.sql.AnalysisException: java.lang.ExceptionInInitializerError: null;
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:214)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.org$apache$spark$sql$hive$HiveSessionStateBuilder$$externalCatalog(HiveSessionStateBuilder.scala:39)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog$lzycompute(SessionCatalog.scala:90)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog(SessionCatalog.scala:90)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.databaseExists(SessionCatalog.scala:243)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.org$apache$spark$sql$catalyst$catalog$SessionCatalog$$requireDbExists(SessionCatalog.scala:177)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:316)
at org.apache.spark.sql.execution.command.CreateTableCommand.run(tables.scala:128)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
Caused by: java.lang.ExceptionInInitializerError
at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:152)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:117)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:271)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:384)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:286)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
... 32 more
Caused by: org.apache.kafka.common.KafkaException: Failed to construct kafka producer
at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:433)
at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:298)
at org.apache.logging.log4j.core.appender.mom.kafka.DefaultKafkaProducerFactory.newKafkaProducer(DefaultKafkaProducerFactory.java:40)
at org.apache.logging.log4j.core.appender.mom.kafka.KafkaManager.startup(KafkaManager.java:131)
at org.apache.logging.log4j.core.appender.mom.kafka.KafkaAppender.start(KafkaAppender.java:175)
at org.apache.logging.log4j.core.config.AbstractConfiguration.start(AbstractConfiguration.java:265)
at org.apache.logging.log4j.core.LoggerContext.setConfiguration(LoggerContext.java:545)
at org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:617)
at org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:634)
at org.apache.logging.log4j.core.LoggerContext.start(LoggerContext.java:229)
at org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:153)
at org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:45)
at org.apache.logging.log4j.LogManager.getContext(LogManager.java:194)
at org.apache.logging.log4j.spi.AbstractLoggerAdapter.getContext(AbstractLoggerAdapter.java:122)
at org.apache.logging.slf4j.Log4jLoggerFactory.getContext(Log4jLoggerFactory.java:43)
at org.apache.logging.log4j.spi.AbstractLoggerAdapter.getLogger(AbstractLoggerAdapter.java:46)
at org.apache.logging.slf4j.Log4jLoggerFactory.getLogger(Log4jLoggerFactory.java:29)
at org.slf4j.LoggerFactory.getLogger(LoggerFactory.java:355)
at org.apache.commons.logging.impl.SLF4JLogFactory.getInstance(SLF4JLogFactory.java:155)
at org.apache.commons.logging.impl.SLF4JLogFactory.getInstance(SLF4JLogFactory.java:132)
at org.apache.commons.logging.LogFactory.getLog(LogFactory.java:273)
at org.apache.hadoop.hive.conf.HiveConf.<clinit>(HiveConf.java:66)
... 47 more
Caused by: org.apache.kafka.common.KafkaException: org.apache.kafka.common.serialization.ByteArraySerializer is not an instance of org.apache.kafka.common.serialization.Serializer
at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstance(AbstractConfig.java:372)
at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:360)
... 68 more
My log4j2 xml is as below
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Appenders>
<Console name="console-log" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n%ex"/>
</Console>
<Kafka name="Kafka" topic="topic_name">
<Property name="bootstrap.servers">kafka_cluster_endpoint</Property>
<JSONLayout compact="true" eventEol="true">
<KeyValuePair key="tags" value="$${sys:app.config.env}"/>
<KeyValuePair key="clusterEnv" value="$${sys:app.config.env}"/>
<KeyValuePair key="name" value="sign-offer-streaming-$${sys:app.config.env}"/>
</JSONLayout>
</Kafka>
</Appenders>
<Scripts>
<Script name="kafkaAppenderEnabled" language="nashorn"><![CDATA[
var System = Java.type('java.lang.System'),
Boolean = Java.type('java.lang.Boolean');
Boolean.parseBoolean(System.getProperty('app.config.env', '').length()>0);
]]>
</Script>
</Scripts>
<Loggers>
<Root level="info">
<AppenderRef ref="Kafka">
<ScriptFilter onMatch="ACCEPT" onMisMatch="DENY">
<ScriptRef ref="kafkaAppenderEnabled" />
</ScriptFilter>
</AppenderRef>
<AppenderRef ref="console-log" />
</Root>
<Logger name="org.apache.spark" level="INFO" additivity="false">
<AppenderRef ref="Kafka">
<ScriptFilter onMatch="ACCEPT" onMisMatch="DENY">
<ScriptRef ref="kafkaAppenderEnabled" />
</ScriptFilter>
</AppenderRef>
<AppenderRef ref="console-log" />
</Logger>
</Loggers>
</Configuration>
来源:https://stackoverflow.com/questions/57734367/log4j2-kafka-appender-does-not-work-with-spark-streaming-kafka-consumer