WordCount.java 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import java.nio.file.Path;
  2. import java.nio.file.Paths;
  3. import java.nio.file.Files;
  4. import java.nio.file.DirectoryStream;
  5. import java.util.HashMap;
  6. import java.util.ArrayList;
  7. import java.util.Collections;
  8. import java.util.Comparator;
  9. public class WordCount {
  10. public static void main(String[] args) throws Exception {
  11. HashMap<String, Integer> record = new HashMap<String, Integer>(); // store count in map
  12. String path = "../testdata";
  13. Path dataDir = Paths.get(path); // get directory info
  14. DirectoryStream<Path> files = Files.newDirectoryStream(dataDir);
  15. for (Path file : files) {
  16. if (Files.isDirectory(file)) {
  17. continue;
  18. }
  19. String text = new String(Files.readAllBytes(dataDir.resolve(file.getFileName()))); // read file content
  20. String word = new String("");
  21. for (int i = 0; i < text.length(); i++) {
  22. if (Character.isLetter(text.charAt(i)) || word.length() > 0 && text.charAt(i) == '\'') {
  23. word += Character.toLowerCase(text.charAt(i)); // extend word
  24. } else {
  25. for (; word.length() > 0 && word.charAt(word.length() - 1) == '\'';) {
  26. word = word.substring(0, word.length() - 1); // remove trailing quotation mark
  27. }
  28. if (word.length() > 0) {
  29. record.put(word, record.getOrDefault(word, 0) + 1); // update map
  30. word = "";
  31. }
  32. }
  33. }
  34. for (; word.length() > 0 && word.charAt(word.length() - 1) == '\'';) {
  35. word = word.substring(0, word.length() - 1); // remove trailing quotation mark
  36. }
  37. if (word.length() > 0) {
  38. record.put(word, record.getOrDefault(word, 0) + 1); // update map
  39. }
  40. }
  41. ArrayList<HashMap.Entry<String, Integer>> list = new ArrayList<HashMap.Entry<String, Integer>>(record.entrySet()); // get answer from map
  42. Collections.sort(list, new Comparator<HashMap.Entry<String, Integer>>() {
  43. public int compare(HashMap.Entry<String, Integer> o1, HashMap.Entry<String, Integer> o2) { // compare function
  44. if (o1.getValue().equals(o2.getValue())) {
  45. return o1.getKey().compareTo(o2.getKey());
  46. }
  47. return o1.getValue() < o2.getValue() ? 1 : -1;
  48. }
  49. });
  50. for (HashMap.Entry<String, Integer> itr : list) {
  51. System.out.printf("%s %d\n", itr.getKey(), itr.getValue());
  52. }
  53. }
  54. }