WordCount.java 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import java.nio.file.Path;
  2. import java.nio.file.Paths;
  3. import java.nio.file.Files;
  4. import java.nio.file.DirectoryStream;
  5. import java.util.HashMap;
  6. import java.util.ArrayList;
  7. import java.util.Collections;
  8. import java.util.Comparator;
  9. public class WordCount {
  10. public static void main(String[] args) throws Exception {
  11. HashMap<String, Integer> record = new HashMap<String, Integer>(); // store count in map
  12. String path = "../testdata";
  13. Path dataDir = Paths.get(path); // get directory info
  14. DirectoryStream<Path> files = Files.newDirectoryStream(dataDir);
  15. for (Path file : files) {
  16. if (Files.isDirectory(file)) {
  17. continue;
  18. }
  19. String text = new String(Files.readAllBytes(dataDir.resolve(file.getFileName()))); // read file content
  20. String word = new String("");
  21. for (int i = 0; i < text.length(); i++) {
  22. if (Character.isLetter(text.charAt(i)) || word.length() > 0 && text.charAt(i) == '\'') {
  23. word += Character.toLowerCase(text.charAt(i)); // extend word
  24. } else if (word.length() > 0) {
  25. record.put(word, record.getOrDefault(word, 0) + 1); // update map
  26. word = "";
  27. }
  28. }
  29. if (word.length() > 0) {
  30. record.put(word, record.getOrDefault(word, 0) + 1); // update map
  31. }
  32. }
  33. ArrayList<HashMap.Entry<String, Integer>> list = new ArrayList<HashMap.Entry<String, Integer>>(record.entrySet()); // get answer from map
  34. Collections.sort(list, new Comparator<HashMap.Entry<String, Integer>>() {
  35. public int compare(HashMap.Entry<String, Integer> o1, HashMap.Entry<String, Integer> o2) { // compare function
  36. if (o1.getValue().equals(o2.getValue())) {
  37. return o1.getKey().compareTo(o2.getKey());
  38. }
  39. return o1.getValue() < o2.getValue() ? 1 : -1;
  40. }
  41. });
  42. for (HashMap.Entry<String, Integer> itr : list) {
  43. System.out.printf("%s\t%d\n", itr.getKey(), itr.getValue());
  44. }
  45. }
  46. }