【Storm】- Storm集成kafka

做~自己de王妃 提交于 2020-01-07 15:27:22

【推荐】2019 Java 开发者跳槽指南.pdf(吐血整理) >>>

Storm 流式处理Kafka数据


tips

老版本:官方文档

新版本:官方文档

Storm可集成组件:


测试代码

需求:给kafka数据添加日期

实际用途:可根据业务续期自定义,例如解析Nginx日志ip限制访问等

pom

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <parent>
        <groupId>com.zhiwei</groupId>
        <artifactId>data_process_experience</artifactId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <artifactId>storm_experience</artifactId>
    <packaging>jar</packaging>

    <properties>
        <version.storm>1.2.2</version.storm>
        <version.slf4j>1.7.2</version.slf4j>
        <version.lombok>1.18.4</version.lombok>
        <version.kafka>2.2.0</version.kafka>
        <version.storm-kafka>1.2.2</version.storm-kafka>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>${version.storm}</version>
            <exclusions>
                <exclusion>
                    <groupId>ring-cors</groupId>
                    <artifactId>ring-cors</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${version.slf4j}</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>${version.lombok}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.12</artifactId>
            <version>${version.kafka}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-kafka-client</artifactId>
            <version>${version.storm-kafka}</version>
        </dependency>
    </dependencies>
</project>

时间格式化Bolt

package com.zhiwei.kafka;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.time.LocalDate;
import java.util.Map;


/**
 * 数据前缀添加日期
 */
@Slf4j
public class TimeProcessBolt extends BaseRichBolt {

    private OutputCollector collector;

    @Override
    public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
        // 初始化
        //collector:该bolt组件的收集器,用于把处理的数据发给下一个bolt组件
        this.collector = collector;
    }

    @Override
    public void execute(Tuple tuple) {
        log.info("kafka主题:{}, 消息:{}", tuple.getValue(1), tuple.getValue(0));
        if(StringUtils.isNotEmpty((String)tuple.getValue(0))) {
            collector.emit(getTuple(tuple));
        }
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declare) {
        declare.declare(new Fields("key", "message"));
    }

    private Values getTuple(Tuple tuple){
        String nowTime = LocalDate.now().toString();
        return new Values(nowTime, String.format("【 %s 】%s", LocalDate.now().toString(), String.valueOf(tuple.getValue(0))));
    }
}

Topology

package com.zhiwei.kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.kafka.bolt.KafkaBolt;
import org.apache.storm.kafka.bolt.mapper.FieldNameBasedTupleToKafkaMapper;
import org.apache.storm.kafka.bolt.selector.DefaultTopicSelector;
import org.apache.storm.kafka.spout.ByTopicRecordTranslator;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import java.util.Properties;

/**
 * 例如:给kafka数据添加时间戳
 */
public class StormKafkaTopology {

    private static String topologyName = "storm-storm-case";
    private static String spoutTopic = "kafka-spout-topic";
    private static String boltTopic = "kafka-bolt-topic";

    public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {

        TopologyBuilder builder = new TopologyBuilder();

        //kafka记录转换为tuple
        ByTopicRecordTranslator<String, String> byTopicRecordTranslator =
                new ByTopicRecordTranslator<>((record) -> new Values(record.value(), record.topic()), new Fields("value", "topic"));

        //设置消费topic
        byTopicRecordTranslator.forTopic(spoutTopic, (record) -> new Values(record.value(), record.topic()), new Fields("value", "topic"));

        KafkaSpoutConfig<String, String> kafkaSpoutConfig = KafkaSpoutConfig
                .builder("centos:9092", spoutTopic)
                .setProp(ConsumerConfig.GROUP_ID_CONFIG, "myConsumerGroup")
                .setFirstPollOffsetStrategy(KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST)
                .setRecordTranslator(byTopicRecordTranslator)
                .build();

        builder.setSpout("kafkaSpout", new KafkaSpout<>(kafkaSpoutConfig));
        builder.setBolt("timeProcessBolt", new TimeProcessBolt()).shuffleGrouping("kafkaSpout");

        Properties props = new Properties();
        props.put("bootstrap.servers", "centos:9092");
        props.put("acks", "1");
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

        KafkaBolt bolt = new KafkaBolt()
                .withProducerProperties(props)
                .withTopicSelector(new DefaultTopicSelector(boltTopic))
                .withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper());
        builder.setBolt("forwardToKafka", bolt, 8).shuffleGrouping("timeProcessBolt");

        Config config = new Config();
        config.setNumWorkers(2);
        config.setNumAckers(0);

        if (args != null && args.length > 0) {
            //提交到集群运行
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());

        } else {
            //本地模式运行
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology(topologyName, config, builder.createTopology());
            Utils.sleep(100000);
            cluster.killTopology(topologyName);
            cluster.shutdown();
        }
    }
}

效果

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!