You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by gr...@apache.org on 2015/09/21 19:20:33 UTC

svn commit: r1704369 - /tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java

Author: grossws
Date: Mon Sep 21 17:20:26 2015
New Revision: 1704369

URL: http://svn.apache.org/viewvc?rev=1704369&view=rev
Log:
Migrate phone numbers example to file walk API

Modified:
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java?rev=1704369&r1=1704368&r2=1704369&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java Mon Sep 21 17:20:26 2015
@@ -17,9 +17,16 @@
 
 package org.apache.tika.example;
 
-import java.io.File;
-import java.io.FileInputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.Collections;
 import java.util.HashSet;
 
 import org.apache.tika.metadata.Metadata;
@@ -40,7 +47,7 @@ import org.apache.tika.sax.PhoneExtracti
  * from the tika-example directory.
  */
 public class GrabPhoneNumbersExample {
-    private static HashSet<String> phoneNumbers = new HashSet<String>();
+    private static HashSet<String> phoneNumbers = new HashSet<>();
     private static int failedFiles, successfulFiles = 0;
 
     public static void main(String[] args) {
@@ -48,41 +55,49 @@ public class GrabPhoneNumbersExample {
             System.err.println("Usage `java GrabPhoneNumbers [corpus]");
             return;
         }
-        final File folder = new File(args[0]);
-        System.out.println("Searching " + folder.getAbsolutePath() + "...");
+        Path folder = Paths.get(args[0]);
+        System.out.println("Searching " + folder.toAbsolutePath() + "...");
         processFolder(folder);
         System.out.println(phoneNumbers.toString());
         System.out.println("Parsed " + successfulFiles + "/" + (successfulFiles + failedFiles));
     }
 
-    public static void processFolder(final File folder) {
-        for (final File fileEntry : folder.listFiles()) {
-            if (fileEntry.isDirectory()) {
-                processFolder(fileEntry);
-            } else {
-                try {
-                    process(fileEntry);
-                    successfulFiles++;
-                } catch (Exception e) {
+    public static void processFolder(Path folder) {
+        try {
+            Files.walkFileTree(folder, new SimpleFileVisitor<Path>() {
+                @Override
+                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+                    try {
+                        process(file);
+                        successfulFiles++;
+                    } catch (Exception e) {
+                        failedFiles++;
+                        // ignore this file
+                    }
+                    return FileVisitResult.CONTINUE;
+                }
+
+                @Override
+                public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
                     failedFiles++;
-                    // Ignore this file...
+                    return FileVisitResult.CONTINUE;
                 }
-            }
+            });
+        } catch (IOException e) {
+            // ignore failure
         }
     }
 
-    public static void process(File file) throws Exception {
+    public static void process(Path path) throws Exception {
         Parser parser = new AutoDetectParser();
         Metadata metadata = new Metadata();
         // The PhoneExtractingContentHandler will examine any characters for phone numbers before passing them
         // to the underlying Handler.
         PhoneExtractingContentHandler handler = new PhoneExtractingContentHandler(new BodyContentHandler(), metadata);
-        try (InputStream stream = new FileInputStream(file)) {
+        try (InputStream stream = new BufferedInputStream(Files.newInputStream(path))) {
             parser.parse(stream, handler, metadata, new ParseContext());
         }
         String[] numbers = metadata.getValues("phonenumbers");
-        for (String number : numbers) {
-            phoneNumbers.add(number);
-        }
+        Collections.addAll(phoneNumbers, numbers);
     }
 }