You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by gr...@apache.org on 2015/09/21 19:20:33 UTC
svn commit: r1704369 -
/tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
Author: grossws
Date: Mon Sep 21 17:20:26 2015
New Revision: 1704369
URL: http://svn.apache.org/viewvc?rev=1704369&view=rev
Log:
Migrate phone numbers example to file walk API
Modified:
tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java?rev=1704369&r1=1704368&r2=1704369&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java Mon Sep 21 17:20:26 2015
@@ -17,9 +17,16 @@
package org.apache.tika.example;
-import java.io.File;
-import java.io.FileInputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
import java.io.InputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.Collections;
import java.util.HashSet;
import org.apache.tika.metadata.Metadata;
@@ -40,7 +47,7 @@ import org.apache.tika.sax.PhoneExtracti
* from the tika-example directory.
*/
public class GrabPhoneNumbersExample {
- private static HashSet<String> phoneNumbers = new HashSet<String>();
+ private static HashSet<String> phoneNumbers = new HashSet<>();
private static int failedFiles, successfulFiles = 0;
public static void main(String[] args) {
@@ -48,41 +55,49 @@ public class GrabPhoneNumbersExample {
System.err.println("Usage `java GrabPhoneNumbers [corpus]");
return;
}
- final File folder = new File(args[0]);
- System.out.println("Searching " + folder.getAbsolutePath() + "...");
+ Path folder = Paths.get(args[0]);
+ System.out.println("Searching " + folder.toAbsolutePath() + "...");
processFolder(folder);
System.out.println(phoneNumbers.toString());
System.out.println("Parsed " + successfulFiles + "/" + (successfulFiles + failedFiles));
}
- public static void processFolder(final File folder) {
- for (final File fileEntry : folder.listFiles()) {
- if (fileEntry.isDirectory()) {
- processFolder(fileEntry);
- } else {
- try {
- process(fileEntry);
- successfulFiles++;
- } catch (Exception e) {
+ public static void processFolder(Path folder) {
+ try {
+ Files.walkFileTree(folder, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+ try {
+ process(file);
+ successfulFiles++;
+ } catch (Exception e) {
+ failedFiles++;
+ // ignore this file
+ }
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
failedFiles++;
- // Ignore this file...
+ return FileVisitResult.CONTINUE;
}
- }
+ });
+ } catch (IOException e) {
+ // ignore failure
}
}
- public static void process(File file) throws Exception {
+ public static void process(Path path) throws Exception {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
// The PhoneExtractingContentHandler will examine any characters for phone numbers before passing them
// to the underlying Handler.
PhoneExtractingContentHandler handler = new PhoneExtractingContentHandler(new BodyContentHandler(), metadata);
- try (InputStream stream = new FileInputStream(file)) {
+ try (InputStream stream = new BufferedInputStream(Files.newInputStream(path))) {
parser.parse(stream, handler, metadata, new ParseContext());
}
String[] numbers = metadata.getValues("phonenumbers");
- for (String number : numbers) {
- phoneNumbers.add(number);
- }
+ Collections.addAll(phoneNumbers, numbers);
}
}