You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2016/02/05 12:39:26 UTC
svn commit: r1728642 - in /jackrabbit/oak/trunk/oak-run: pom.xml
src/main/assembly/oak-run.xml
src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
Author: chetanm
Date: Fri Feb 5 11:39:26 2016
New Revision: 1728642
URL: http://svn.apache.org/viewvc?rev=1728642&view=rev
Log:
OAK-3989 - Add S3 datastore support for Text Pre Extraction
-- Added optional dependency on jackrabbit-aws-ext
-- Excluded jackrabbit-aws-ext from getting packaged in find oak-run
Modified:
jackrabbit/oak/trunk/oak-run/pom.xml
jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
Modified: jackrabbit/oak/trunk/oak-run/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/pom.xml?rev=1728642&r1=1728641&r2=1728642&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-run/pom.xml Fri Feb 5 11:39:26 2016
@@ -400,6 +400,13 @@
<version>3.1.0</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.jackrabbit</groupId>
+ <artifactId>jackrabbit-aws-ext</artifactId>
+ <version>${jackrabbit.version}</version>
+ <optional>true</optional>
+ </dependency>
+
<!-- Findbugs annotations -->
<dependency>
<groupId>com.google.code.findbugs</groupId>
Modified: jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml?rev=1728642&r1=1728641&r2=1728642&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/assembly/oak-run.xml Fri Feb 5 11:39:26 2016
@@ -35,6 +35,7 @@
<exclude>org.apache.derby</exclude>
<exclude>org.apache.tika:tika-core:*</exclude>
<exclude>org.apache.tika:tika-parsers:*</exclude>
+ <exclude>org.apache.jackrabbit:jackrabbit-aws-ext:*</exclude>
</excludes>
<useStrictFiltering>true</useStrictFiltering>
<useProjectArtifact>true</useProjectArtifact>
Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java?rev=1728642&r1=1728641&r2=1728642&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java Fri Feb 5 11:39:26 2016
@@ -22,15 +22,26 @@ package org.apache.jackrabbit.oak.plugin
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
+import java.io.InputStream;
import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.UUID;
+import com.google.common.collect.Maps;
import com.google.common.io.Closer;
import com.mongodb.MongoClientURI;
import com.mongodb.MongoURI;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.aws.ext.ds.S3DataStore;
+import org.apache.jackrabbit.core.data.DataStore;
+import org.apache.jackrabbit.core.data.DataStoreException;
import org.apache.jackrabbit.core.data.FileDataStore;
+import org.apache.jackrabbit.oak.commons.PropertiesUtil;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreTextWriter;
import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
@@ -88,6 +99,11 @@ public class TextExtractorMain {
.withRequiredArg()
.ofType(File.class);
+ OptionSpec<File> s3ConfigSpec = parser
+ .accepts("s3-config-path", "Path of properties file containing config for S3DataStore")
+ .withRequiredArg()
+ .ofType(File.class);
+
OptionSpec<File> storeDirSpec = parser
.accepts("store-path", "Path of directory used to store extracted text content")
.withRequiredArg()
@@ -118,7 +134,6 @@ public class TextExtractorMain {
boolean extract = nonOptions.contains("extract");
boolean generate = nonOptions.contains("generate");
File dataFile = null;
- File fdsDir;
File storeDir = null;
File tikaConfigFile = null;
BlobStore blobStore = null;
@@ -141,7 +156,7 @@ public class TextExtractorMain {
}
if (options.has(fdsDirSpec)) {
- fdsDir = fdsDirSpec.value(options);
+ File fdsDir = fdsDirSpec.value(options);
checkArgument(fdsDir.exists(), "FileDataStore %s does not exist", fdsDir.getAbsolutePath());
FileDataStore fds = new FileDataStore();
fds.setPath(fdsDir.getAbsolutePath());
@@ -149,6 +164,32 @@ public class TextExtractorMain {
blobStore = new DataStoreBlobStore(fds);
}
+ if (options.has(s3ConfigSpec)){
+ File s3Config = s3ConfigSpec.value(options);
+ checkArgument(s3Config.exists() && s3Config.canRead(), "S3DataStore config cannot be read from [%s]",
+ s3Config.getAbsolutePath());
+ Properties props = loadProperties(s3Config);
+ log.info("Loaded properties for S3DataStore from {}", s3Config.getAbsolutePath());
+ String pathProp = "path";
+ String repoPath = props.getProperty(pathProp);
+ checkNotNull(repoPath, "Missing required property [%s] from S3DataStore config loaded from [%s]", pathProp, s3Config);
+
+ //Check if 'secret' key is defined. It should be non null for references
+ //to be generated. As the ref are transient we can just use any random value
+ //if not specified
+ String secretConfig = "secret";
+ if (props.getProperty(secretConfig) == null){
+ props.setProperty(secretConfig, UUID.randomUUID().toString());
+ }
+
+ log.info("Using {} for S3DataStore ", repoPath);
+ DataStore ds = createS3DataStore(props);
+ PropertiesUtil.populate(ds, toMap(props), false);
+ ds.init(pathProp);
+ blobStore = new DataStoreBlobStore(ds);
+ closer.register(asCloseable(ds));
+ }
+
if (options.has(dataFileSpec)) {
dataFile = dataFileSpec.value(options);
}
@@ -219,6 +260,31 @@ public class TextExtractorMain {
}
}
+ private static Map<String, ?> toMap(Properties properties) {
+ Map<String, String> map = Maps.newHashMap();
+ for (final String name: properties.stringPropertyNames()) {
+ map.put(name, properties.getProperty(name));
+ }
+ return map;
+ }
+
+ private static DataStore createS3DataStore(Properties props) throws IOException {
+ S3DataStore s3ds = new S3DataStore();
+ s3ds.setProperties(props);
+ return s3ds;
+ }
+
+ private static Properties loadProperties(File s3Config) throws IOException {
+ Properties props = new Properties();
+ InputStream is = FileUtils.openInputStream(s3Config);
+ try{
+ props.load(is);
+ } finally {
+ IOUtils.closeQuietly(is);
+ }
+ return props;
+ }
+
private static NodeStore bootStrapNodeStore(String src, BlobStore blobStore,
Closer closer) throws IOException {
if (src.startsWith(MongoURI.MONGODB_PREFIX)) {
@@ -249,6 +315,19 @@ public class TextExtractorMain {
}
};
}
+
+ private static Closeable asCloseable(final DataStore ds) {
+ return new Closeable() {
+ @Override
+ public void close() throws IOException {
+ try {
+ ds.close();
+ } catch (DataStoreException e) {
+ throw new IOException(e);
+ }
+ }
+ };
+ }
private static Closeable asCloseable(final DocumentNodeStore dns) {
return new Closeable() {