MapReduce outputs lines from input file besides the expected result

落爺英雄遲暮 提交于 2020-01-06 11:04:11

问题


I managed to implement a Map-Reduce in Java, it works for my case but for some reason the output displays besides the desired one, some data from the input file and I can't figure out why?

Here is the class, I left a comment in code to the line which cause me problems. If I delete that line it doesn't work anymore, but with that line written I have that awkward output(containing data for my input + the desired output)

The problem is in "reduce" method at the bottom - I left a comment there

public class SensorMapReducer
{
    public static class SensorMapper extends Mapper<Object, Text, Text, Text>{

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException
        {
            String line = value.toString();

            if(line.startsWith("s"))
                processSensorLine(line,context);
            else
                processSimulatorLine(line,context);
        }

        private void processSensorLine(String line, Mapper<Object, Text, Text, Text>.Context context) throws IOException, InterruptedException
        {
            String[] values = line.split(",");

            Calendar gc = DatatypeConverter.parseDateTime(values[1]);

            Text bucket = new Text(String.format("%4d-%02d-%02dT%02d:%02d:00Z",
                    gc.get(Calendar.YEAR),
                    gc.get(Calendar.MONTH) + 1,
                    gc.get(Calendar.DAY_OF_MONTH),
                    gc.get(Calendar.HOUR),
                    gc.get(Calendar.MINUTE)));

            context.write(bucket, new Text("ACC," + values[2] + "," + values[3] + "," + values[4]));
            context.write(bucket, new Text("GYRO," + values[5] + "," + values[6] + "," + values[7]));
            context.write(bucket, new Text("MAG," + values[8] + "," + values[9] + "," + values[10]));
            context.write(bucket, new Text("REST," + values[11] + "," + values[12] + "," + values[13] + "," + values[14]));
        }

        private void processSimulatorLine(String line, Mapper<Object, Text, Text, Text>.Context context) throws IOException, InterruptedException
        {
            String[] values = line.split(",");

            Calendar gc = DatatypeConverter.parseDateTime(values[1]);

            Text bucket = new Text(String.format("%4d-%02d-%02dT%02d:%02d:00Z",
                    gc.get(Calendar.YEAR),
                    gc.get(Calendar.MONTH) + 1,
                    gc.get(Calendar.DAY_OF_MONTH),
                    gc.get(Calendar.HOUR),
                    gc.get(Calendar.MINUTE)));

            context.write(bucket, new Text("PF" + values[6]));
        }
    }

    public static class SensorReducer extends Reducer<Text, Text, Text, Text>
    {
        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
        {
            XYZ acc = new XYZ(), gyro = new XYZ();
            Single mlux = new Single(), temp = new Single(), pressure = new Single(), rh = new Single();
            int pass = 0, fail = 0;

            for(Text value : values)
            {
                String val = value.toString();
                if(val.startsWith("ACC"))
                    acc.process(val);
                else if(val.startsWith("GYRO"))
                    gyro.process(val);
                else if(val.startsWith("REST"))
                {
                    String[] vals = val.split(",");
                    mlux.process(vals[1]);
                    temp.process(vals[2]);
                    pressure.process(vals[3]);
                    rh.process(vals[4]);
                }
                else if(val.startsWith("PF"))
                {
                    String pf = val.substring(2);

                    if(pf.equalsIgnoreCase("1"))
                        pass++;
                    else
                        fail++;
                }

                // On my environment, if I don't do this it fails for no reason that I can see, but
                // I DO NOT WANT THIS LINE TO BE WRITTEN!!!!
                context.write(key,new Text(val));
            }

            StringBuffer sb = new StringBuffer();
            acc.append(sb);
            sb.append('\t');

            gyro.append(sb);
            sb.append('\t');

            mlux.append(sb);
            sb.append('\t');

            temp.append(sb);
            sb.append('\t');

            pressure.append(sb);
            sb.append('\t');

            rh.append(sb);
            sb.append('\t');

            sb.append(pass);
            sb.append('\t');

            sb.append(fail);

            context.write(key, new Text(sb.toString()));
        }
    }

    private static class Single {

        private int val, count, min, max;

        private void process(String val)
        {
            int v = Double.valueOf(val).intValue();

            this.val += v;
            this.count++;

            max = max > v ? max : v;
            min = min > v ? min : v;
        }

        public void append(StringBuffer sb)
        {
            if(count > 0)
                sb.append(val/count);
            else
                sb.append("");

            sb.append('\t');
            sb.append(min);
            sb.append('\t');
            sb.append(max);
        }
    }

    private static class XYZ {
        double x, y, z;
        double xMax = 0.0, yMax = 0.0, zMax = 0.0;
        double xMin = 0.0, yMin = 0.0, zMin = 0.0;
        int count = 0;

        public void process(String val)
        {
            String[] vals = val.split(",");

            double x = Double.valueOf(vals[1]);
            double y = Double.valueOf(vals[2]);
            double z = Double.valueOf(vals[3]);

            xMax = xMax < x ? x : xMax;
            yMax = yMax < y ? y : yMax;
            zMax = zMax < z ? z : zMax;

            xMin = xMin < x ? x : xMin;
            yMin = yMin < y ? y : yMin;
            zMin = zMin < z ? z : zMin;

            this.x += x;
            this.y += y;
            this.z += z;

            count++;
        }

        public void append(StringBuffer sb)
        {
            sb.append(x/count);
            sb.append('\t');
            sb.append(xMin);
            sb.append('\t');
            sb.append(xMax);
            sb.append('\t');
            sb.append(y/count);
            sb.append('\t');
            sb.append(yMin);
            sb.append('\t');
            sb.append(yMax);
            sb.append('\t');
            sb.append(z/count);
            sb.append('\t');
            sb.append(zMin);
            sb.append('\t');
            sb.append(zMax);
        }
    }

    public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "word count");

        job.setJarByClass(SensorMapper.class);

        job.setMapperClass(SensorMapper.class);
        job.setCombinerClass(SensorReducer.class);
        job.setReducerClass(SensorReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

And here is the output file:

    2018-12-06T08:36:00Z    ACC,19.000000,-18.000000,1035.000000
2018-12-06T08:36:00Z    MAG,-69.000000,-28.000000,28.000000
2018-12-06T08:36:00Z    GYRO,-1708.000000,4638.000000,4882.000000
2018-12-06T08:36:00Z    MAG,-69.000000,-28.000000,27.000000
2018-12-06T08:36:00Z    ACC,21.000000,-14.000000,1030.000000
2018-12-06T08:36:00Z    REST,63360.000000,27610,98384,34
2018-12-06T08:36:00Z    MAG,-67.000000,-28.000000,27.000000
2018-12-06T08:36:00Z    GYRO,2563.000000,4638.000000,-2197.000000
2018-12-06T08:36:00Z    GYRO,-2563.000000,3662.000000,-3906.000000
2018-12-06T08:36:00Z    REST,63360.000000,27730,98391,34
2018-12-06T08:36:00Z    ACC,17.000000,-13.000000,1034.000000
2018-12-06T08:36:00Z    ACC,16.000000,-15.000000,1030.000000
2018-12-06T08:36:00Z    ACC,23.000000,-15.000000,1030.000000
2018-12-06T08:36:00Z    REST,60480.000000,27570,98391,34
2018-12-06T08:36:00Z    MAG,-66.000000,-29.000000,27.000000
2018-12-06T08:36:00Z    GYRO,1098.000000,1220.000000,8422.000000
2018-12-06T08:36:00Z    REST,63360.000000,27650,98381,34
2018-12-06T08:36:00Z    MAG,-69.000000,-29.000000,27.000000
2018-12-06T08:36:00Z    GYRO,-976.000000,1098.000000,5493.000000
2018-12-06T08:36:00Z    REST,63360.000000,27700,98391,34
2018-12-06T08:36:00Z    19.2    23.0    23.0    -15.0   0.0 0.0 1031.8  1035.0  1035.0  -317.2  2563.0  2563.0  3051.2  4638.0  4638.0  2538.8  8422.0  8422.0  62784   63360   63360   27652   27730   27730   98387   98391   98391   34  34  34  0   0
2018-12-06T08:36:00Z    19.2    23.0    23.0    -15.0   0.0 0.0 1031.8  1035.0  1035.0  -317.2  2563.0  2563.0  3051.2  4638.0  4638.0  2538.8  8422.0  8422.0  62784   63360   63360   27652   27730   27730   98387   98391   98391   34  34  34  0   0

Basically, I only need the last two rows. That the output I am looking for, but I don't know why I have that "input" data in front.

PS: If I don't write this line "context.write(key,new Text(val));" in the reduce method, all the computed values written in the output are 0.

Thank you!

来源:https://stackoverflow.com/questions/54223303/mapreduce-outputs-lines-from-input-file-besides-the-expected-result

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!