一、flume限速拦截
flume架构图
ExecSource exec数据源--实时收集
限速拦截器 代码实现 拦截器 Interceptor 接口
通过第一次发送的时间计算出 ,发送一个事件本应该所需的时间,与实际发送的时间做对比。如果实际的发送时间<本应该花费的时间,说明超速了
1 package com.oldboy.umeng.flume.interceptor;
2
3 import java.util.List;
4
5 import org.apache.flume.Context;
6 import org.apache.flume.Event;
7 import org.apache.flume.interceptor.Interceptor;
8 import org.slf4j.Logger;
9 import org.slf4j.LoggerFactory;
10
11 /**
12 * 限速拦截器
13 */
14 public class SpeedInterceptor implements Interceptor {
15 private static final Logger logger = LoggerFactory.getLogger(org.apache.flume.interceptor.HostInterceptor.class);
16 //每秒发送的字节数
17 private int speed = 1024;
18
19 /**
20 */
21 private SpeedInterceptor(int speed) {
22 this.speed = speed ;
23 }
24
25 public void initialize() {
26 }
27
28 /**
29 * 拦截事件
30 */
31 //上次发送记录的毫秒数
32 long prevTimeMs = 0 ;
33 int prevDataLen = 0 ;
34 public Event intercept(Event event) {
35 System.out.println("开始拦截!");
36 byte[] data = event.getBody();
37 //第一次发送
38 if(prevTimeMs == 0){
39 prevTimeMs = System.currentTimeMillis() ;
40 prevDataLen = data.length ;
41 System.out.println("首次发送! : " + prevDataLen);
42 return event ;
43 }
44 else{
45 long now = System.currentTimeMillis() ;
46
47 //实际消耗的时间
48 long duration = now - prevTimeMs;
49
50 //本应该花费的时间
51 long shouldBeTimeMs = (long)((double)prevDataLen / speed * 1000) ;
52
53 if(duration < shouldBeTimeMs ){
54 try {
55 System.out.println("超速了 : 休眠 " + (shouldBeTimeMs - duration));
56 Thread.sleep(shouldBeTimeMs - duration);
57 } catch (InterruptedException e) {
58 e.printStackTrace();
59 }
60 }
61 prevTimeMs = System.currentTimeMillis() ;
62 prevDataLen = data.length ;
63 return event;
64 }
65 }
66
67 /**
68 * 拦截事件集合
69 */
70 public List<Event> intercept(List<Event> events) {
71 for (Event event : events) {
72 intercept(event);
73 }
74 return events;
75 }
76
77 public void close() {
78 }
79
80 /**
81 *
82 */
83 public static class Builder implements Interceptor.Builder {
84 private int speed ;
85
86 public Interceptor build() {
87 System.out.println("拦截器已创建!!");
88 return new SpeedInterceptor(speed);
89 }
90
91 //从配置文件得参数
92 public void configure(Context context) {
93 speed = context.getInteger("speed", 1024);
94 }
95 }
96 }
导成jar包 ---> 加到 /soft/flume/lib下
/soft/flume/lib
配置
/soft/flume/conf
umeng_nginx_to_kafka.conf
配置拦截器集合---flume配置文件(这个是完整版的---包括去重拦截器、自定义防丢失源、限速拦截器)
a1.sources = r1
a1.channels = c1
a1.sinks = k1
a1.sources.r1.type = com.oldboy.umeng.flume.UmengExecSource
a1.sources.r1.command = tail -F /usr/local/openresty/nginx/logs/access.log
a1.sources.r1.spooldir = /usr/local/openresty/nginx/logs
a1.sources.r1.prefix = access.log.
a1.sources.r1.suffix = COMPLETED
a1.sources.r1.redisHost = s101
a1.sources.r1.redisPort = 6379
a1.sources.r1.interceptors = i1 i2
a1.sources.r1.interceptors.i1.type = com.oldboy.umeng.flume.interceptor.DuplicateInterceptor$Builder
a1.sources.r1.interceptors.i1.redisHost = s101
a1.sources.r1.interceptors.i1.redisPort = 6379
a1.sources.r1.interceptors.i1.expire = 3600
a1.sources.r1.interceptors.i2.type = com.oldboy.umeng.flume.interceptor.SpeedInterceptor$Builder
a1.sources.r1.interceptors.i2.speed = 512 //每次发送512个字节
a1.channels.c1.type = memory
a1.channels.c1.capacity = 10000
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = big12-umeng-raw-logs
a1.sinks.k1.kafka.bootstrap.servers = s102:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = 1
a1.sinks.k1.kafka.producer.linger.ms = 0
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
启动flume
启动kafka
。。。。。。
实现拦截限速
二、flume自定义源防丢失---解决flume还未收集完日志便已经滚动,数据丢失问题(检查 access.log.* 滚动后的文件 依次比对key是否在redis中)
防重、防丢失
改造exec源 :监控目录、收集新文件---增加个守护线程不断监控目录
redis 3号库维护一个key ,防止重复收集。key期限:3天
改造ExecSource 防丢失
UmengExecSource---监控目录、收集新文件---增加个守护线程不断监控目录
自定义数据源UmengExecSource代码太多 请看链接详细分析--UmengExecSource
去重拦截器---DuplicateInterceptor 防重复
配置redis中 key有效期三天---
redis.expire(key , expire) ;
expire = context.getInteger("expire", 3 * 24 * 60 * 60);
1 package com.oldboy.umeng.flume.interceptor;
2
3 import org.apache.flume.Context;
4 import org.apache.flume.Event;
5 import org.apache.flume.interceptor.Interceptor;
6 import org.slf4j.Logger;
7 import org.slf4j.LoggerFactory;
8 import redis.clients.jedis.Jedis;
9
10 import java.util.List;
11
12 /**
13 * 去重拦截器
14 */
15 public class DuplicateInterceptor implements Interceptor {
16 private static final Logger logger = LoggerFactory.getLogger(org.apache.flume.interceptor.HostInterceptor.class);
17 //过期秒数
18 private int expire ;
19 private String redisHost ;
20 private int redisPort ;
21 private int redisDB ;
22
23 private Jedis redis ;
24
25 /**
26 * 构造
27 */
28 private DuplicateInterceptor(String redisHost ,int redisPort ,int redisDB, int expire) {
29 this.redisHost = redisHost ;
30 this.redisPort = redisPort ;
31 this.redisDB = redisDB ;
32 this.expire = expire ;
33 }
34
35 public void initialize() {
36 System.out.println("去重拦截器初始化!!");
37 redis = new Jedis(redisHost ,redisPort) ;
38 redis.select(redisDB) ;
39 }
40
41 /**
42 * 拦截事件
43 */
44 public Event intercept(Event event) {
45 String line = new String(event.getBody()) ;
46 String key = line.substring(0,line.lastIndexOf("#"));
47 System.out.println("去重的key : " + key);
48 if(redis.exists(key)){
49 System.out.println(key + "存在了!");
50 return null ;
51 }
52 else{
53 redis.set(key , "x") ;
54 redis.expire(key , expire) ;
55 System.out.println(key + " : 不存在!!");
56 return event ;
57 }
58 }
59
60 /**
61 * 拦截事件集合
62 */
63 public List<Event> intercept(List<Event> events) {
64 for (Event event : events) {
65 intercept(event);
66 }
67 return events;
68 }
69
70 public void close() {
71 }
72
73 /**
74 *
75 */
76 public static class Builder implements Interceptor.Builder {
77 private String redisHost ;
78 private int redisPort ;
79 private int redisDB ;
80 private int expire ;
81
82 public Interceptor build() {
83 System.out.println("拦截器已创建!!");
84 return new DuplicateInterceptor(redisHost , redisPort ,redisDB,expire);
85 }
86
87 //从配置文件得参数
88 public void configure(Context context) {
89 redisHost = context.getString("redisHost", "s101");
90 redisPort = context.getInteger("redisPort", 6379);
91 expire = context.getInteger("expire", 3 * 24 * 60 * 60);
92 redisDB = context.getInteger("redisDB", 3);
93 }
94 }
95 }
导出jar包
jar包 内容
jar包放 /soft/flume/lib
配置flume配置文件
/soft/flume/conf
umeng_nginx_to_kafka.conf
配置拦截器集合---flume配置文件
见上面flume配置文件
来源:oschina
链接:https://my.oschina.net/u/4418085/blog/3740887