Flink窗口之滑动窗口scala版本
代码
package windows
import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
/**滑动窗口*/
object SlidingWindows {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val line: DataStream[String] = env.socketTextStream("t1",9999)
val flatMaped: DataStream[String] = line.flatMap(_.split(","))
val wordAndOne: DataStream[(String, Int)] = flatMaped.map((_,1))
val keyByed: KeyedStream[(String, Int), Tuple] = wordAndOne.keyBy(0)
//开启一个滑动窗口 时间间隔是10秒钟 滑动时间是5秒
val windows: WindowedStream[(String, Int), Tuple, TimeWindow] = keyByed.timeWindow(Time.seconds(10),Time.seconds(5))
val result: DataStream[(String, Int)] = windows.sum(1)
result.print()
env.execute("SlidingWindows")
}
}
输入数据
[root@t1 ~]# nc -lk 9999
a,a,a
b,b,b
c,c,c
d,d,d
e,e,e
f,f,f
运行结果
6> (a,3)
6> (a,3)
2> (b,3)
4> (c,3)
1> (e,3)
4> (c,3)
5> (d,3)
2> (b,3)
5> (d,3)
1> (e,3)
2> (f,3)
2> (f,3)
总结
- 滑动窗口数据会造成重复
- 运用场景: 每隔30分钟统计前1个小时的数据
来源:CSDN
作者:IT_BULL
链接:https://blog.csdn.net/IT_BULL/article/details/104200893