You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/15 15:00:32 UTC

svn commit: r1300986 - /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java

Author: joern
Date: Thu Mar 15 14:00:32 2012
New Revision: 1300986

URL: http://svn.apache.org/viewvc?rev=1300986&view=rev
Log:
OPENNLP-473 Initial check in

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java   (with props)

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java?rev=1300986&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java Thu Mar 15 14:00:32 2012
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Stack;
+
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * The directory sample stream scans a directory (recursively) for plain text
+ * files and outputs each file as a String object.
+ */
+public class DirectorySampleStream implements ObjectStream<String> {
+
+  private final Charset encoding;
+  
+  private final List<File> inputDirectories;
+  
+  private final boolean isRecursiveScan;
+  
+  private final FileFilter fileFilter;
+  
+  private Stack<File> directories = new Stack<File>();
+  
+  private Stack<File> textFiles = new Stack<File>();
+  
+  public DirectorySampleStream(File dirs[], Charset encoding, FileFilter fileFilter, boolean recursive) {
+
+    this.encoding = encoding;
+    this.fileFilter= fileFilter; 
+    isRecursiveScan = recursive;
+    
+    List<File> inputDirectoryList = new ArrayList<File>(dirs.length);
+    
+    for (File dir : dirs) {
+      if (!dir.isDirectory()) {
+        throw new IllegalArgumentException(
+            "All passed in directories must be directories, but \""
+            + dir.toString() + "\" is not!");
+      }
+      
+      inputDirectoryList.add(dir);
+    }
+    
+    inputDirectories = Collections.unmodifiableList(inputDirectoryList);
+    
+    directories.addAll(inputDirectories);
+  }
+  
+  public DirectorySampleStream(File dir, Charset encoding, FileFilter fileFilter, boolean recursive) {
+    this(new File[]{dir}, encoding, fileFilter, recursive);
+  }
+  
+  static String readFile(File textFile, Charset encoding) throws IOException {
+    
+    Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(textFile), encoding));
+
+    StringBuilder text = new StringBuilder();
+    
+    try {
+      char buffer[] = new char[1024];
+      int length;
+      while ((length = in.read(buffer, 0, buffer.length)) > 0) {
+        text.append(buffer, 0, length);
+      }
+    }
+    finally {
+      try {
+        in.close();
+      }
+      catch (IOException e) {
+        // sorry that this can fail!
+      }
+    }
+    
+    return text.toString();
+  }
+  
+  public String read() throws IOException {
+
+    while(textFiles.isEmpty() && !directories.isEmpty()) {
+      File dir = directories.pop();
+      
+      File files[];
+      
+      if (fileFilter != null) {
+        files = dir.listFiles(fileFilter);
+      }
+      else {
+        files = dir.listFiles();
+      }
+      
+      for (File file : files) {
+        if (file.isFile()) {
+          textFiles.push(file);
+        }
+        else if (isRecursiveScan && file.isDirectory()) {
+          directories.push(file);
+        }
+      }
+    }
+    
+    if (!textFiles.isEmpty()) {
+      return readFile(textFiles.pop(), encoding);
+    }
+    else {
+      return null;
+    }
+  }
+
+  public void reset() {
+    directories.clear();
+    textFiles.clear();
+    
+    directories.addAll(inputDirectories);
+  }
+
+  public void close() throws IOException {
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain