MapReduce算法(计算每个相同IMSI(国际移动用户标识)、TAC(跟踪区域码)的上行流量和,下行流量和,总流量和)

一笑奈何 提交于 2019-12-20 12:05:44

需求:

计算每个相同IMSI(国际移动用户标识)、TAC(跟踪区域码)的上行流量和,下行流量和,总流量和。

需求说明:将S1U数据里面的VOLUME字段(数据流量)和IMSI、TAC两个字段提取出来,

并且按相同IMSI、TAC对VOLUME求和(上行流量和、下行流量和、总流量的和)。其结果保存为新的文件。

数据:

2604|731|11|fe58db672c0fdf509b00000000010000|6|460028593519735|3520220675936518|15859328363|1|100.78.245.86|100.78.46.134|2152|2152|162597888|1802797180|58211|121570817|cmnet.mnc002.mcc460.gprs|103|1480723076856|1480723079334|2|1|568|255|2|10.40.123.144|FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF|58874|255|183.230.77.151|FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF|80|1668|21348|21|23|0|0|0|0|39|29|0|0|10|103|4096|1360|1|0|1|3|5|200|103|160|205|vweixinthumb.tc.qq.com|http://vweixinthumb.tc.qq.com/150/20250/snsvideodownload?filekey=30270201010420301e0202009604025348041046476a6bb3aabfe3ab7e8f9d6289379d02024d7b0400&bizid=1023&hy=SH&fileparam=302c020101042530230204a14b67d8020457c0f37d02024f1a02031e8d7f02030f424002040670370a0201000400?tp=wxpc&length=1136&width=640||WeChat/6.3.24.17 CFNetwork/758.5.3 Darwin/15.6.0|image/jpg|http://weixin.qq.com/?version=369301521&uin=2993776409&nettype=0&scene=album_friend||19835|1|255|255|||3|0|2478|0|1

数据说明:

数据列的分隔符为“|”,

数据内的第六个数据是IMSI,第十六个数据是TAC,第三十四个数据是上行流量,第三十五个数据是下行流量

代码 实现

package GN.demo02;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * Created by 一个蔡狗 on 2019/12/2.
 */
public class S1u {

    //map
    static class S1uMapper extends Mapper<LongWritable, Text, Text, Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            if (value != null) {
//            数据列的分隔符为“|”,
                String[] split = value.toString().split("\\|");
//            数据内的第六个数据是IMSI,第十六个数据是TAC,第三十四个数据是上行流量,第三十五个数据是下行流量
                if (split.length > 35) {
                    context.write(new Text("IMSI\r\t" + split[5] + "\r\tTAC--->" + split[15]), new Text(split[33] + "," + split[34]));
                }
            }
        }
    }


    //reduce
    static class S1uReduce extends Reducer<Text, Text, Text, Text> {

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            // key IMSI + TAC   value = 上行流量  +   下行流量
            long TrafficUp = 0;    //上行流量和
            long TrafficDo = 0;    //下行流量和
            long TrafficTotal = 0;  //总流量的和

            for (Text value : values) {
                String[] split = value.toString().split(",");
                TrafficUp = TrafficUp + Long.parseLong(split[0]);
                TrafficDo = TrafficDo + Long.parseLong(split[1]);
            }
            TrafficTotal = TrafficUp + TrafficDo;
            context.write(new Text(key),new Text("\r\t上行流量和-->"+TrafficUp+"\r\t"+"下行流量和-->"+TrafficDo+"\r\t"+"总流量的和-->"+TrafficTotal));
        }
    }


    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(S1u.class);

        job.setJobName("S1uMapperOutput");

        job.setMapperClass(S1uMapper.class);
        job.setReducerClass(S1uReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path("E:\\2019-传智项目\\企业需求实战\\01_湖南移动项目需求\\数据\\硬采数据\\data\\s1u"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\2019-传智项目\\企业需求实战\\01_湖南移动项目需求\\数据\\硬采数据\\data\\S1U_OutPut"));

        job.waitForCompletion(true);

    }


}

 

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!