You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2016/02/05 12:39:26 UTC

svn commit: r1728642 - in /jackrabbit/oak/trunk/oak-run: pom.xml src/main/assembly/oak-run.xml src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java

Author: chetanm
Date: Fri Feb  5 11:39:26 2016
New Revision: 1728642

URL: http://svn.apache.org/viewvc?rev=1728642&view=rev
Log:
OAK-3989 - Add S3 datastore support for Text Pre Extraction

-- Added optional dependency on jackrabbit-aws-ext
-- Excluded  jackrabbit-aws-ext from getting packaged in find oak-run

Modified:
    jackrabbit/oak/trunk/oak-run/pom.xml
    jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java

Modified: jackrabbit/oak/trunk/oak-run/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/pom.xml?rev=1728642&r1=1728641&r2=1728642&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-run/pom.xml Fri Feb  5 11:39:26 2016
@@ -400,6 +400,13 @@
       <version>3.1.0</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.jackrabbit</groupId>
+      <artifactId>jackrabbit-aws-ext</artifactId>
+      <version>${jackrabbit.version}</version>
+      <optional>true</optional>
+    </dependency>
+
     <!-- Findbugs annotations -->
     <dependency>
       <groupId>com.google.code.findbugs</groupId>

Modified: jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml?rev=1728642&r1=1728641&r2=1728642&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml Fri Feb  5 11:39:26 2016
@@ -35,6 +35,7 @@
         <exclude>org.apache.derby</exclude>
         <exclude>org.apache.tika:tika-core:*</exclude>
         <exclude>org.apache.tika:tika-parsers:*</exclude>
+        <exclude>org.apache.jackrabbit:jackrabbit-aws-ext:*</exclude>
       </excludes>
       <useStrictFiltering>true</useStrictFiltering>
       <useProjectArtifact>true</useProjectArtifact>

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java?rev=1728642&r1=1728641&r2=1728642&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java Fri Feb  5 11:39:26 2016
@@ -22,15 +22,26 @@ package org.apache.jackrabbit.oak.plugin
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.UUID;
 
+import com.google.common.collect.Maps;
 import com.google.common.io.Closer;
 import com.mongodb.MongoClientURI;
 import com.mongodb.MongoURI;
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.aws.ext.ds.S3DataStore;
+import org.apache.jackrabbit.core.data.DataStore;
+import org.apache.jackrabbit.core.data.DataStoreException;
 import org.apache.jackrabbit.core.data.FileDataStore;
+import org.apache.jackrabbit.oak.commons.PropertiesUtil;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreTextWriter;
 import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
@@ -88,6 +99,11 @@ public class TextExtractorMain {
                     .withRequiredArg()
                     .ofType(File.class);
 
+            OptionSpec<File> s3ConfigSpec = parser
+                    .accepts("s3-config-path", "Path of properties file containing config for S3DataStore")
+                    .withRequiredArg()
+                    .ofType(File.class);
+
             OptionSpec<File> storeDirSpec = parser
                     .accepts("store-path", "Path of directory used to store extracted text content")
                     .withRequiredArg()
@@ -118,7 +134,6 @@ public class TextExtractorMain {
             boolean extract = nonOptions.contains("extract");
             boolean generate = nonOptions.contains("generate");
             File dataFile = null;
-            File fdsDir;
             File storeDir = null;
             File tikaConfigFile = null;
             BlobStore blobStore = null;
@@ -141,7 +156,7 @@ public class TextExtractorMain {
             }
 
             if (options.has(fdsDirSpec)) {
-                fdsDir = fdsDirSpec.value(options);
+                File fdsDir = fdsDirSpec.value(options);
                 checkArgument(fdsDir.exists(), "FileDataStore %s does not exist", fdsDir.getAbsolutePath());
                 FileDataStore fds = new FileDataStore();
                 fds.setPath(fdsDir.getAbsolutePath());
@@ -149,6 +164,32 @@ public class TextExtractorMain {
                 blobStore = new DataStoreBlobStore(fds);
             }
 
+            if (options.has(s3ConfigSpec)){
+                File s3Config = s3ConfigSpec.value(options);
+                checkArgument(s3Config.exists() && s3Config.canRead(), "S3DataStore config cannot be read from [%s]",
+                        s3Config.getAbsolutePath());
+                Properties props = loadProperties(s3Config);
+                log.info("Loaded properties for S3DataStore from {}", s3Config.getAbsolutePath());
+                String pathProp = "path";
+                String repoPath = props.getProperty(pathProp);
+                checkNotNull(repoPath, "Missing required property [%s] from S3DataStore config loaded from [%s]", pathProp, s3Config);
+
+                //Check if 'secret' key is defined. It should be non null for references
+                //to be generated. As the ref are transient we can just use any random value
+                //if not specified
+                String secretConfig = "secret";
+                if (props.getProperty(secretConfig) == null){
+                    props.setProperty(secretConfig, UUID.randomUUID().toString());
+                }
+
+                log.info("Using {} for S3DataStore ", repoPath);
+                DataStore ds = createS3DataStore(props);
+                PropertiesUtil.populate(ds, toMap(props), false);
+                ds.init(pathProp);
+                blobStore = new DataStoreBlobStore(ds);
+                closer.register(asCloseable(ds));
+            }
+
             if (options.has(dataFileSpec)) {
                 dataFile = dataFileSpec.value(options);
             }
@@ -219,6 +260,31 @@ public class TextExtractorMain {
         }
     }
 
+    private static Map<String, ?> toMap(Properties properties) {
+        Map<String, String> map = Maps.newHashMap();
+        for (final String name: properties.stringPropertyNames()) {
+            map.put(name, properties.getProperty(name));
+        }
+        return map;
+    }
+
+    private static DataStore createS3DataStore(Properties props) throws IOException {
+        S3DataStore s3ds = new S3DataStore();
+        s3ds.setProperties(props);
+        return s3ds;
+    }
+
+    private static Properties loadProperties(File s3Config) throws IOException {
+        Properties props = new Properties();
+        InputStream is = FileUtils.openInputStream(s3Config);
+        try{
+            props.load(is);
+        } finally {
+            IOUtils.closeQuietly(is);
+        }
+        return props;
+    }
+
     private static NodeStore bootStrapNodeStore(String src, BlobStore blobStore,
                                                 Closer closer) throws IOException {
         if (src.startsWith(MongoURI.MONGODB_PREFIX)) {
@@ -249,6 +315,19 @@ public class TextExtractorMain {
             }
         };
     }
+
+    private static Closeable asCloseable(final DataStore ds) {
+        return new Closeable() {
+            @Override
+            public void close() throws IOException {
+                try {
+                    ds.close();
+                } catch (DataStoreException e) {
+                    throw new IOException(e);
+                }
+            }
+        };
+    }
 
     private static Closeable asCloseable(final DocumentNodeStore dns) {
         return new Closeable() {