|
@@ -32,7 +32,7 @@ public class WordCount {
|
|
|
}
|
|
}
|
|
|
if (str.length() > 0) {
|
|
if (str.length() > 0) {
|
|
|
word.set(str);
|
|
word.set(str);
|
|
|
- context.write(word, one);
|
|
|
|
|
|
|
+ context.write(word, one); // output pair
|
|
|
str = "";
|
|
str = "";
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -42,7 +42,7 @@ public class WordCount {
|
|
|
}
|
|
}
|
|
|
if (str.length() > 0) {
|
|
if (str.length() > 0) {
|
|
|
word.set(str);
|
|
word.set(str);
|
|
|
- context.write(word, one);
|
|
|
|
|
|
|
+ context.write(word, one); // output pair
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -58,7 +58,7 @@ public class WordCount {
|
|
|
for (IntWritable val : values) {
|
|
for (IntWritable val : values) {
|
|
|
sum += val.get();
|
|
sum += val.get();
|
|
|
}
|
|
}
|
|
|
- record.put(key.toString(), record.getOrDefault(key.toString(), 0) + sum); // update map, don't write context here
|
|
|
|
|
|
|
+ record.put(key.toString(), record.getOrDefault(key.toString(), 0) + sum); // update map, don't output here
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* cleanup function executed after reducing */
|
|
/* cleanup function executed after reducing */
|
|
@@ -75,7 +75,7 @@ public class WordCount {
|
|
|
for (HashMap.Entry<String, Integer> itr : list) {
|
|
for (HashMap.Entry<String, Integer> itr : list) {
|
|
|
word.set(itr.getKey());
|
|
word.set(itr.getKey());
|
|
|
result.set(itr.getValue());
|
|
result.set(itr.getValue());
|
|
|
- context.write(word, result); // write context here
|
|
|
|
|
|
|
+ context.write(word, result); // output here
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -85,6 +85,7 @@ public class WordCount {
|
|
|
conf.set("mapreduce.output.textoutputformat.separator", " ");
|
|
conf.set("mapreduce.output.textoutputformat.separator", " ");
|
|
|
Job job = Job.getInstance(conf, "word count");
|
|
Job job = Job.getInstance(conf, "word count");
|
|
|
job.setJarByClass(WordCount.class);
|
|
job.setJarByClass(WordCount.class);
|
|
|
|
|
+ job.setNumReduceTasks(5); // number of reducers
|
|
|
job.setMapperClass(TokenizerMapper.class); // map class
|
|
job.setMapperClass(TokenizerMapper.class); // map class
|
|
|
job.setCombinerClass(IntSumReducer.class); // combine class
|
|
job.setCombinerClass(IntSumReducer.class); // combine class
|
|
|
job.setReducerClass(IntSumReducer.class); // reduce class
|
|
job.setReducerClass(IntSumReducer.class); // reduce class
|