Log4j2 Kafka appender does not work with Spark Streaming Kafka Consumer

笑着哭i 提交于 2020-02-02 17:39:08

问题


When I use log4j2 kafka appender with my spark streaming code, it throws below error when spark.sql task is executed.

Caused by: org.apache.kafka.common.KafkaException: org.apache.kafka.common.serialization.ByteArraySerializer is not an instance of org.apache.kafka.common.serialization.Serializer
    at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstance(AbstractConfig.java:372)

The spark streaming code is written in scala. My build.gradle.kts has below dependencies. I am using spark 2.4.3 version and scala 2.11

    implementation("org.apache.kafka:kafka_2.11:2.3.0")
    implementation("org.scala-lang:scala-library:$scalaVersion")
    implementation("org.apache.spark:spark-streaming_$sparkScalaVersion")
    implementation("org.apache.spark:spark-streaming-kafka-$sparkKafkaVersion")
    implementation("org.apache.spark:spark-core_$sparkScalaVersion")
    implementation("org.apache.spark:spark-sql_$sparkScalaVersion")
    implementation("org.apache.spark:spark-hive_$sparkScalaVersion")
    implementation("com.google.inject:guice:$guiceVersion")
    implementation("org.apache.logging.log4j:log4j-slf4j-impl:2.10.0")

The application streams logs from the spark driver. But not from the executor. It fails during initialization

Exception in thread "main" org.apache.spark.sql.AnalysisException: java.lang.ExceptionInInitializerError: null;
    at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)
    at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:214)
    at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
    at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
    at org.apache.spark.sql.hive.HiveSessionStateBuilder.org$apache$spark$sql$hive$HiveSessionStateBuilder$$externalCatalog(HiveSessionStateBuilder.scala:39)
    at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
    at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
    at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog$lzycompute(SessionCatalog.scala:90)
    at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog(SessionCatalog.scala:90)
    at org.apache.spark.sql.catalyst.catalog.SessionCatalog.databaseExists(SessionCatalog.scala:243)
    at org.apache.spark.sql.catalyst.catalog.SessionCatalog.org$apache$spark$sql$catalyst$catalog$SessionCatalog$$requireDbExists(SessionCatalog.scala:177)
    at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:316)
    at org.apache.spark.sql.execution.command.CreateTableCommand.run(tables.scala:128)
    at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
    at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
    at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
    at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
    at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
    at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364)
    at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
    at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363)
    at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
    at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
    at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
    at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)

Caused by: java.lang.ExceptionInInitializerError
    at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:152)
    at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:117)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
    at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:271)
    at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:384)
    at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:286)
    at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
    at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
    at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:215)
    at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
    at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
    at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
    ... 32 more
Caused by: org.apache.kafka.common.KafkaException: Failed to construct kafka producer
    at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:433)
    at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:298)
    at org.apache.logging.log4j.core.appender.mom.kafka.DefaultKafkaProducerFactory.newKafkaProducer(DefaultKafkaProducerFactory.java:40)
    at org.apache.logging.log4j.core.appender.mom.kafka.KafkaManager.startup(KafkaManager.java:131)
    at org.apache.logging.log4j.core.appender.mom.kafka.KafkaAppender.start(KafkaAppender.java:175)
    at org.apache.logging.log4j.core.config.AbstractConfiguration.start(AbstractConfiguration.java:265)
    at org.apache.logging.log4j.core.LoggerContext.setConfiguration(LoggerContext.java:545)
    at org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:617)
    at org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:634)
    at org.apache.logging.log4j.core.LoggerContext.start(LoggerContext.java:229)
    at org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:153)
    at org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:45)
    at org.apache.logging.log4j.LogManager.getContext(LogManager.java:194)
    at org.apache.logging.log4j.spi.AbstractLoggerAdapter.getContext(AbstractLoggerAdapter.java:122)
    at org.apache.logging.slf4j.Log4jLoggerFactory.getContext(Log4jLoggerFactory.java:43)
    at org.apache.logging.log4j.spi.AbstractLoggerAdapter.getLogger(AbstractLoggerAdapter.java:46)
    at org.apache.logging.slf4j.Log4jLoggerFactory.getLogger(Log4jLoggerFactory.java:29)
    at org.slf4j.LoggerFactory.getLogger(LoggerFactory.java:355)
    at org.apache.commons.logging.impl.SLF4JLogFactory.getInstance(SLF4JLogFactory.java:155)
    at org.apache.commons.logging.impl.SLF4JLogFactory.getInstance(SLF4JLogFactory.java:132)
    at org.apache.commons.logging.LogFactory.getLog(LogFactory.java:273)
    at org.apache.hadoop.hive.conf.HiveConf.<clinit>(HiveConf.java:66)
    ... 47 more
Caused by: org.apache.kafka.common.KafkaException: org.apache.kafka.common.serialization.ByteArraySerializer is not an instance of org.apache.kafka.common.serialization.Serializer
    at org.apache.kafka.common.config.AbstractConfig.getConfiguredInstance(AbstractConfig.java:372)
    at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:360)
    ... 68 more

My log4j2 xml is as below

<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
    <Appenders>
        <Console name="console-log" target="SYSTEM_OUT">
            <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n%ex"/>
        </Console>
        <Kafka name="Kafka" topic="topic_name">
            <Property name="bootstrap.servers">kafka_cluster_endpoint</Property>
            <JSONLayout compact="true" eventEol="true">


                <KeyValuePair key="tags" value="$${sys:app.config.env}"/>
                <KeyValuePair key="clusterEnv" value="$${sys:app.config.env}"/>
                <KeyValuePair key="name" value="sign-offer-streaming-$${sys:app.config.env}"/>
            </JSONLayout>
        </Kafka>
    </Appenders>

    <Scripts>
        <Script name="kafkaAppenderEnabled" language="nashorn"><![CDATA[
        var System = Java.type('java.lang.System'),
        Boolean = Java.type('java.lang.Boolean');
        Boolean.parseBoolean(System.getProperty('app.config.env', '').length()>0);
        ]]>
        </Script>
    </Scripts>
    <Loggers>
        <Root level="info">
            <AppenderRef ref="Kafka">
                <ScriptFilter onMatch="ACCEPT" onMisMatch="DENY">
                    <ScriptRef ref="kafkaAppenderEnabled" />
                </ScriptFilter>
            </AppenderRef>
            <AppenderRef ref="console-log" />
        </Root>
        <Logger name="org.apache.spark" level="INFO" additivity="false">
            <AppenderRef ref="Kafka">
                <ScriptFilter onMatch="ACCEPT" onMisMatch="DENY">
                    <ScriptRef ref="kafkaAppenderEnabled" />
                </ScriptFilter>
            </AppenderRef>
            <AppenderRef ref="console-log" />
        </Logger>
    </Loggers>
</Configuration>

来源:https://stackoverflow.com/questions/57734367/log4j2-kafka-appender-does-not-work-with-spark-streaming-kafka-consumer

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!