问题
I managed to implement a Map-Reduce in Java, it works for my case but for some reason the output displays besides the desired one, some data from the input file and I can't figure out why?
Here is the class, I left a comment in code to the line which cause me problems. If I delete that line it doesn't work anymore, but with that line written I have that awkward output(containing data for my input + the desired output)
The problem is in "reduce" method at the bottom - I left a comment there
public class SensorMapReducer
{
public static class SensorMapper extends Mapper<Object, Text, Text, Text>{
public void map(Object key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
if(line.startsWith("s"))
processSensorLine(line,context);
else
processSimulatorLine(line,context);
}
private void processSensorLine(String line, Mapper<Object, Text, Text, Text>.Context context) throws IOException, InterruptedException
{
String[] values = line.split(",");
Calendar gc = DatatypeConverter.parseDateTime(values[1]);
Text bucket = new Text(String.format("%4d-%02d-%02dT%02d:%02d:00Z",
gc.get(Calendar.YEAR),
gc.get(Calendar.MONTH) + 1,
gc.get(Calendar.DAY_OF_MONTH),
gc.get(Calendar.HOUR),
gc.get(Calendar.MINUTE)));
context.write(bucket, new Text("ACC," + values[2] + "," + values[3] + "," + values[4]));
context.write(bucket, new Text("GYRO," + values[5] + "," + values[6] + "," + values[7]));
context.write(bucket, new Text("MAG," + values[8] + "," + values[9] + "," + values[10]));
context.write(bucket, new Text("REST," + values[11] + "," + values[12] + "," + values[13] + "," + values[14]));
}
private void processSimulatorLine(String line, Mapper<Object, Text, Text, Text>.Context context) throws IOException, InterruptedException
{
String[] values = line.split(",");
Calendar gc = DatatypeConverter.parseDateTime(values[1]);
Text bucket = new Text(String.format("%4d-%02d-%02dT%02d:%02d:00Z",
gc.get(Calendar.YEAR),
gc.get(Calendar.MONTH) + 1,
gc.get(Calendar.DAY_OF_MONTH),
gc.get(Calendar.HOUR),
gc.get(Calendar.MINUTE)));
context.write(bucket, new Text("PF" + values[6]));
}
}
public static class SensorReducer extends Reducer<Text, Text, Text, Text>
{
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
XYZ acc = new XYZ(), gyro = new XYZ();
Single mlux = new Single(), temp = new Single(), pressure = new Single(), rh = new Single();
int pass = 0, fail = 0;
for(Text value : values)
{
String val = value.toString();
if(val.startsWith("ACC"))
acc.process(val);
else if(val.startsWith("GYRO"))
gyro.process(val);
else if(val.startsWith("REST"))
{
String[] vals = val.split(",");
mlux.process(vals[1]);
temp.process(vals[2]);
pressure.process(vals[3]);
rh.process(vals[4]);
}
else if(val.startsWith("PF"))
{
String pf = val.substring(2);
if(pf.equalsIgnoreCase("1"))
pass++;
else
fail++;
}
// On my environment, if I don't do this it fails for no reason that I can see, but
// I DO NOT WANT THIS LINE TO BE WRITTEN!!!!
context.write(key,new Text(val));
}
StringBuffer sb = new StringBuffer();
acc.append(sb);
sb.append('\t');
gyro.append(sb);
sb.append('\t');
mlux.append(sb);
sb.append('\t');
temp.append(sb);
sb.append('\t');
pressure.append(sb);
sb.append('\t');
rh.append(sb);
sb.append('\t');
sb.append(pass);
sb.append('\t');
sb.append(fail);
context.write(key, new Text(sb.toString()));
}
}
private static class Single {
private int val, count, min, max;
private void process(String val)
{
int v = Double.valueOf(val).intValue();
this.val += v;
this.count++;
max = max > v ? max : v;
min = min > v ? min : v;
}
public void append(StringBuffer sb)
{
if(count > 0)
sb.append(val/count);
else
sb.append("");
sb.append('\t');
sb.append(min);
sb.append('\t');
sb.append(max);
}
}
private static class XYZ {
double x, y, z;
double xMax = 0.0, yMax = 0.0, zMax = 0.0;
double xMin = 0.0, yMin = 0.0, zMin = 0.0;
int count = 0;
public void process(String val)
{
String[] vals = val.split(",");
double x = Double.valueOf(vals[1]);
double y = Double.valueOf(vals[2]);
double z = Double.valueOf(vals[3]);
xMax = xMax < x ? x : xMax;
yMax = yMax < y ? y : yMax;
zMax = zMax < z ? z : zMax;
xMin = xMin < x ? x : xMin;
yMin = yMin < y ? y : yMin;
zMin = zMin < z ? z : zMin;
this.x += x;
this.y += y;
this.z += z;
count++;
}
public void append(StringBuffer sb)
{
sb.append(x/count);
sb.append('\t');
sb.append(xMin);
sb.append('\t');
sb.append(xMax);
sb.append('\t');
sb.append(y/count);
sb.append('\t');
sb.append(yMin);
sb.append('\t');
sb.append(yMax);
sb.append('\t');
sb.append(z/count);
sb.append('\t');
sb.append(zMin);
sb.append('\t');
sb.append(zMax);
}
}
public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(SensorMapper.class);
job.setMapperClass(SensorMapper.class);
job.setCombinerClass(SensorReducer.class);
job.setReducerClass(SensorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
And here is the output file:
2018-12-06T08:36:00Z ACC,19.000000,-18.000000,1035.000000
2018-12-06T08:36:00Z MAG,-69.000000,-28.000000,28.000000
2018-12-06T08:36:00Z GYRO,-1708.000000,4638.000000,4882.000000
2018-12-06T08:36:00Z MAG,-69.000000,-28.000000,27.000000
2018-12-06T08:36:00Z ACC,21.000000,-14.000000,1030.000000
2018-12-06T08:36:00Z REST,63360.000000,27610,98384,34
2018-12-06T08:36:00Z MAG,-67.000000,-28.000000,27.000000
2018-12-06T08:36:00Z GYRO,2563.000000,4638.000000,-2197.000000
2018-12-06T08:36:00Z GYRO,-2563.000000,3662.000000,-3906.000000
2018-12-06T08:36:00Z REST,63360.000000,27730,98391,34
2018-12-06T08:36:00Z ACC,17.000000,-13.000000,1034.000000
2018-12-06T08:36:00Z ACC,16.000000,-15.000000,1030.000000
2018-12-06T08:36:00Z ACC,23.000000,-15.000000,1030.000000
2018-12-06T08:36:00Z REST,60480.000000,27570,98391,34
2018-12-06T08:36:00Z MAG,-66.000000,-29.000000,27.000000
2018-12-06T08:36:00Z GYRO,1098.000000,1220.000000,8422.000000
2018-12-06T08:36:00Z REST,63360.000000,27650,98381,34
2018-12-06T08:36:00Z MAG,-69.000000,-29.000000,27.000000
2018-12-06T08:36:00Z GYRO,-976.000000,1098.000000,5493.000000
2018-12-06T08:36:00Z REST,63360.000000,27700,98391,34
2018-12-06T08:36:00Z 19.2 23.0 23.0 -15.0 0.0 0.0 1031.8 1035.0 1035.0 -317.2 2563.0 2563.0 3051.2 4638.0 4638.0 2538.8 8422.0 8422.0 62784 63360 63360 27652 27730 27730 98387 98391 98391 34 34 34 0 0
2018-12-06T08:36:00Z 19.2 23.0 23.0 -15.0 0.0 0.0 1031.8 1035.0 1035.0 -317.2 2563.0 2563.0 3051.2 4638.0 4638.0 2538.8 8422.0 8422.0 62784 63360 63360 27652 27730 27730 98387 98391 98391 34 34 34 0 0
Basically, I only need the last two rows. That the output I am looking for, but I don't know why I have that "input" data in front.
PS: If I don't write this line "context.write(key,new Text(val));" in the reduce method, all the computed values written in the output are 0.
Thank you!
来源:https://stackoverflow.com/questions/54223303/mapreduce-outputs-lines-from-input-file-besides-the-expected-result