|
@@ -19,12 +19,13 @@ public class WordCount {
|
|
|
private final static IntWritable one = new IntWritable(1);
|
|
private final static IntWritable one = new IntWritable(1);
|
|
|
private Text word = new Text();
|
|
private Text word = new Text();
|
|
|
|
|
|
|
|
|
|
+ /* map function */
|
|
|
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
|
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
|
|
String line = value.toString();
|
|
String line = value.toString();
|
|
|
String str = new String("");
|
|
String str = new String("");
|
|
|
for (int i = 0; i < line.length(); i++) {
|
|
for (int i = 0; i < line.length(); i++) {
|
|
|
if (Character.isLetter(line.charAt(i)) || str.length() > 0 && line.charAt(i) == '\'') {
|
|
if (Character.isLetter(line.charAt(i)) || str.length() > 0 && line.charAt(i) == '\'') {
|
|
|
- str += Character.toLowerCase(line.charAt(i));
|
|
|
|
|
|
|
+ str += Character.toLowerCase(line.charAt(i)); // extend word
|
|
|
} else if (str.length() > 0) {
|
|
} else if (str.length() > 0) {
|
|
|
word.set(str);
|
|
word.set(str);
|
|
|
context.write(word, one);
|
|
context.write(word, one);
|
|
@@ -39,22 +40,24 @@ public class WordCount {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
|
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
|
|
- private HashMap<String, Integer> record = new HashMap<String, Integer>();
|
|
|
|
|
|
|
+ private HashMap<String, Integer> record = new HashMap<String, Integer>(); // store count in map, used for final sort
|
|
|
private Text word = new Text();
|
|
private Text word = new Text();
|
|
|
private IntWritable result = new IntWritable();
|
|
private IntWritable result = new IntWritable();
|
|
|
|
|
|
|
|
|
|
+ /* reduce function */
|
|
|
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
|
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
|
|
int sum = 0;
|
|
int sum = 0;
|
|
|
for (IntWritable val : values) {
|
|
for (IntWritable val : values) {
|
|
|
sum += val.get();
|
|
sum += val.get();
|
|
|
}
|
|
}
|
|
|
- record.put(key.toString(), record.getOrDefault(key.toString(), 0) + sum);
|
|
|
|
|
|
|
+ record.put(key.toString(), record.getOrDefault(key.toString(), 0) + sum); // update map, don't write context here
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ /* cleanup function executed after reducing */
|
|
|
public void cleanup(Context context) throws IOException, InterruptedException {
|
|
public void cleanup(Context context) throws IOException, InterruptedException {
|
|
|
- ArrayList<HashMap.Entry<String, Integer>> list = new ArrayList<HashMap.Entry<String, Integer>>(record.entrySet());
|
|
|
|
|
|
|
+ ArrayList<HashMap.Entry<String, Integer>> list = new ArrayList<HashMap.Entry<String, Integer>>(record.entrySet()); // get answer from map
|
|
|
Collections.sort(list, new Comparator<HashMap.Entry<String, Integer>>() {
|
|
Collections.sort(list, new Comparator<HashMap.Entry<String, Integer>>() {
|
|
|
- public int compare(HashMap.Entry<String, Integer> o1, HashMap.Entry<String, Integer> o2) {
|
|
|
|
|
|
|
+ public int compare(HashMap.Entry<String, Integer> o1, HashMap.Entry<String, Integer> o2) { // compare function
|
|
|
if (o1.getValue().equals(o2.getValue())) {
|
|
if (o1.getValue().equals(o2.getValue())) {
|
|
|
return o1.getKey().compareTo(o2.getKey());
|
|
return o1.getKey().compareTo(o2.getKey());
|
|
|
}
|
|
}
|
|
@@ -64,7 +67,7 @@ public class WordCount {
|
|
|
for (HashMap.Entry<String, Integer> itr : list) {
|
|
for (HashMap.Entry<String, Integer> itr : list) {
|
|
|
word.set(itr.getKey());
|
|
word.set(itr.getKey());
|
|
|
result.set(itr.getValue());
|
|
result.set(itr.getValue());
|
|
|
- context.write(word, result);
|
|
|
|
|
|
|
+ context.write(word, result); // write context here
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -73,14 +76,14 @@ public class WordCount {
|
|
|
Configuration conf = new Configuration();
|
|
Configuration conf = new Configuration();
|
|
|
Job job = Job.getInstance(conf, "word count");
|
|
Job job = Job.getInstance(conf, "word count");
|
|
|
job.setJarByClass(WordCount.class);
|
|
job.setJarByClass(WordCount.class);
|
|
|
- job.setMapperClass(TokenizerMapper.class);
|
|
|
|
|
- job.setCombinerClass(IntSumReducer.class);
|
|
|
|
|
- job.setReducerClass(IntSumReducer.class);
|
|
|
|
|
|
|
+ job.setMapperClass(TokenizerMapper.class); // map class
|
|
|
|
|
+ job.setCombinerClass(IntSumReducer.class); // combine class
|
|
|
|
|
+ job.setReducerClass(IntSumReducer.class); // reduce class
|
|
|
job.setOutputKeyClass(Text.class);
|
|
job.setOutputKeyClass(Text.class);
|
|
|
job.setOutputValueClass(IntWritable.class);
|
|
job.setOutputValueClass(IntWritable.class);
|
|
|
- FileOutputFormat.setOutputPath(job, new Path(args[0]));
|
|
|
|
|
|
|
+ FileOutputFormat.setOutputPath(job, new Path(args[0])); // output directory
|
|
|
for (int i = 1; i < args.length; i++) {
|
|
for (int i = 1; i < args.length; i++) {
|
|
|
- FileInputFormat.addInputPath(job, new Path(args[i]));
|
|
|
|
|
|
|
+ FileInputFormat.addInputPath(job, new Path(args[i])); // input files
|
|
|
}
|
|
}
|
|
|
System.exit(job.waitForCompletion(true) ? 0 : 1);
|
|
System.exit(job.waitForCompletion(true) ? 0 : 1);
|
|
|
}
|
|
}
|